Skip to content
This repository has been archived by the owner on Oct 30, 2024. It is now read-only.

feat: add progress indication for embedding generation process #127

Merged
merged 1 commit into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions pkg/client/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,12 @@ func ingestPaths(ctx context.Context, c Client, opts *IngestPathsOpts, datasetID
defer sem.Release(1)

ingestedFilesCount++
currentMetadata := metadataStack[len(metadataStack)-1]
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated bugfix

return ingestionFunc(path, currentMetadata.Metadata[filepath.Base(path)]) // FIXME: metadata
var fileMetadata FileMetadata
if len(metadataStack) > 0 {
currentMetadata := metadataStack[len(metadataStack)-1]
fileMetadata = currentMetadata.Metadata[filepath.Base(path)]
}
return ingestionFunc(path, fileMetadata)
})
}

Expand Down
20 changes: 19 additions & 1 deletion pkg/datastore/datastore.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/gptscript-ai/knowledge/pkg/config"
etypes "github.com/gptscript-ai/knowledge/pkg/datastore/embeddings/types"
"github.com/gptscript-ai/knowledge/pkg/datastore/types"
"github.com/gptscript-ai/knowledge/pkg/log"
"github.com/gptscript-ai/knowledge/pkg/output"

"github.com/adrg/xdg"
Expand Down Expand Up @@ -68,6 +69,23 @@ func GetDatastorePaths(dsn, vectordbPath string) (string, string, bool, error) {
return dsn, vectordbPath, isArchive, nil
}

func LogEmbeddingFunc(embeddingFunc cg.EmbeddingFunc) cg.EmbeddingFunc {
return func(ctx context.Context, text string) ([]float32, error) {
l := log.FromCtx(ctx).With("stage", "embedding")

l.With("status", "starting").Info("Creating embedding")

embedding, err := embeddingFunc(ctx, text)
if err != nil {
l.With("status", "failed").Error("Failed to create embedding", "error", err)
return nil, err
}

l.With("status", "completed").Info("Created embedding")
return embedding, nil
}
}

func NewDatastore(dsn string, automigrate bool, vectorDBPath string, embeddingProvider etypes.EmbeddingModelProvider) (*Datastore, error) {
dsn, vectorDBPath, isArchive, err := GetDatastorePaths(dsn, vectorDBPath)
if err != nil {
Expand Down Expand Up @@ -106,7 +124,7 @@ func NewDatastore(dsn string, automigrate bool, vectorDBPath string, embeddingPr

ds := &Datastore{
Index: idx,
Vectorstore: chromem.New(vsdb, embeddingFunc),
Vectorstore: chromem.New(vsdb, LogEmbeddingFunc(embeddingFunc)),
EmbeddingModelProvider: embeddingProvider,
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/datastore/ingest.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ func (s *Datastore) Ingest(ctx context.Context, datasetID string, name string, c
// Add documents to VectorStore -> This generates the embeddings
slog.Debug("Ingesting documents", "count", len(docs))

log.ToCtx(ctx, log.FromCtx(ctx).With("phase", "store").With("num_documents", len(docs)))
ctx = log.ToCtx(ctx, log.FromCtx(ctx).With("phase", "store").With("num_documents", len(docs)))

docIDs, err := s.Vectorstore.AddDocuments(ctx, docs, datasetID)
if err != nil {
Expand Down