Skip to content
This repository has been archived by the owner on Oct 30, 2024. It is now read-only.

Commit

Permalink
change: test pgvector
Browse files Browse the repository at this point in the history
  • Loading branch information
iwilltry42 committed Oct 15, 2024
1 parent 55b060c commit 7c56b15
Show file tree
Hide file tree
Showing 19 changed files with 69 additions and 38 deletions.
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ require (
github.com/google/uuid v1.6.0
github.com/hupe1980/golc v0.0.112
github.com/iwilltry42/bm25-go v0.0.0-20240909111832-a928590cc9da
github.com/jackc/pgx/v5 v5.7.1
github.com/jmcarbo/stopwords v1.1.9
github.com/joho/godotenv v1.5.1
github.com/knadh/koanf/parsers/json v0.1.0
Expand All @@ -38,6 +39,7 @@ require (
github.com/ledongthuc/pdf v0.0.0-20240201131950-da5b75280b06
github.com/lu4p/cat v0.1.5
github.com/mitchellh/mapstructure v1.5.0
github.com/pgvector/pgvector-go v0.2.2
github.com/philippgille/chromem-go v0.6.1-0.20240811154507-a1944285b284
github.com/spf13/cobra v1.8.1
github.com/stretchr/testify v1.9.0
Expand Down Expand Up @@ -111,7 +113,6 @@ require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
github.com/jackc/pgx/v5 v5.7.1 // indirect
github.com/jaytaylor/html2text v0.0.0-20200412013138-3577fbdbcff7 // indirect
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
github.com/jinzhu/inflection v1.0.0 // indirect
Expand All @@ -135,7 +136,6 @@ require (
github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/otiai10/gosseract/v2 v2.2.4 // indirect
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
github.com/pgvector/pgvector-go v0.2.2 // indirect
github.com/pjbgf/sha1cd v0.3.0 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pkoukk/tiktoken-go v0.1.6 // indirect
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/askdir.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func (s *ClientAskDir) Customize(cmd *cobra.Command) {
}

func (s *ClientAskDir) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
9 changes: 5 additions & 4 deletions pkg/cmd/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package cmd

import (
"archive/zip"
"context"
"fmt"
"io"
"os"
Expand Down Expand Up @@ -89,13 +90,13 @@ func (s *Client) loadArchive() error {
return fmt.Errorf("knowledge archive must contain exactly one .db and one .gob file")
}

s.DSN = types.ArchivePrefix + dbFile
s.VectorDBPath = types.ArchivePrefix + vectorStoreFile
s.DatabaseConfig.DSN = types.ArchivePrefix + dbFile
s.VectorDBConfig.DSN = types.ArchivePrefix + vectorStoreFile

return nil
}

func (s *Client) getClient() (client.Client, error) {
func (s *Client) getClient(ctx context.Context) (client.Client, error) {
if err := s.loadArchive(); err != nil {
return nil, err
}
Expand All @@ -111,7 +112,7 @@ func (s *Client) getClient() (client.Client, error) {
return nil, err
}

ds, err := datastore.NewDatastore(s.DSN, s.AutoMigrate == "true", s.VectorDBConfig.VectorDBPath, provider)
ds, err := datastore.NewDatastore(ctx, s.DatabaseConfig.DSN, s.AutoMigrate == "true", s.VectorDBConfig.DSN, provider)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/create_dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func (s *ClientCreateDataset) Customize(cmd *cobra.Command) {
}

func (s *ClientCreateDataset) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/delete_dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func (s *ClientDeleteDataset) Customize(cmd *cobra.Command) {
}

func (s *ClientDeleteDataset) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/cmd/edit_dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package cmd
import (
"encoding/json"
"fmt"

"github.com/gptscript-ai/knowledge/pkg/datastore"
"github.com/gptscript-ai/knowledge/pkg/index"
"github.com/spf13/cobra"
Expand All @@ -23,7 +24,7 @@ func (s *ClientEditDataset) Customize(cmd *cobra.Command) {
}

func (s *ClientEditDataset) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/cmd/export.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package cmd

import (
"fmt"

"github.com/spf13/cobra"
)

Expand All @@ -17,7 +18,7 @@ func (s *ClientExportDatasets) Customize(cmd *cobra.Command) {
}

func (s *ClientExportDatasets) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/cmd/get_dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package cmd
import (
"encoding/json"
"fmt"

"github.com/spf13/cobra"
)

Expand All @@ -19,7 +20,7 @@ func (s *ClientGetDataset) Customize(cmd *cobra.Command) {
}

func (s *ClientGetDataset) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func (s *ClientImportDatasets) Customize(cmd *cobra.Command) {
}

func (s *ClientImportDatasets) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/ingest.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ This is a constraint of the Vector Database and Similarity Search, as different
}

func (s *ClientIngest) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/cmd/list_datasets.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package cmd
import (
"encoding/json"
"fmt"

"github.com/spf13/cobra"
)

Expand All @@ -18,7 +19,7 @@ func (s *ClientListDatasets) Customize(cmd *cobra.Command) {
}

func (s *ClientListDatasets) Run(cmd *cobra.Command, args []string) error {
c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
1 change: 0 additions & 1 deletion pkg/cmd/load.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ func (s *ClientLoad) Run(cmd *cobra.Command, args []string) error {

var texts []string
for _, doc := range docs {

if len(doc.Content) == 0 {
continue
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/cmd/reset.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ func (s *ClientResetDatastore) Customize(cmd *cobra.Command) {
}

func (s *ClientResetDatastore) Run(cmd *cobra.Command, args []string) error {
dsn, vectordbPath, _, err := datastore.GetDefaultDSNs(s.DSN, s.VectorDBConfig.VectorDBPath)
dsn, vectordbPath, _, err := datastore.GetDefaultDSNs(s.DatabaseConfig.DSN, s.VectorDBConfig.DSN)
if err != nil {
return err
}
Expand All @@ -34,6 +34,6 @@ func (s *ClientResetDatastore) Run(cmd *cobra.Command, args []string) error {
return fmt.Errorf("failed to remove vector database directory: %w", err)
}

fmt.Printf("Successfully reset datastore (DSN: %q, VectorDBPath: %q)\n", dsn, vectordbPath)
fmt.Printf("Successfully reset datastore (DSN: %q, DSN: %q)\n", dsn, vectordbPath)
return nil
}
2 changes: 1 addition & 1 deletion pkg/cmd/retrieve.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (s *ClientRetrieve) Run(cmd *cobra.Command, args []string) error {
}
slog.Info("Retrieving sources for query", "query", query, "datasets", datasetIDs)

c, err := s.getClient()
c, err := s.getClient(cmd.Context())
if err != nil {
return err
}
Expand Down
7 changes: 4 additions & 3 deletions pkg/cmd/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@ package cmd

import (
"fmt"
"github.com/gptscript-ai/knowledge/pkg/datastore/embeddings"
"github.com/spf13/cobra"
"log/slog"
"os/signal"
"syscall"

"github.com/gptscript-ai/knowledge/pkg/datastore/embeddings"
"github.com/spf13/cobra"

"github.com/gptscript-ai/knowledge/pkg/config"
"github.com/gptscript-ai/knowledge/pkg/datastore"
"github.com/gptscript-ai/knowledge/pkg/server"
Expand Down Expand Up @@ -47,7 +48,7 @@ func (s *Server) Run(cmd *cobra.Command, _ []string) error {
return err
}

ds, err := datastore.NewDatastore(s.DSN, s.AutoMigrate == "true", s.VectorDBConfig.VectorDBPath, provider)
ds, err := datastore.NewDatastore(cmd.Context(), s.DatabaseConfig.DSN, s.AutoMigrate == "true", s.VectorDBConfig.DSN, provider)
if err != nil {
return fmt.Errorf("failed to initialize datastore: %w", err)
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ type DatabaseConfig struct {
}

type VectorDBConfig struct {
VectorDBPath string `usage:"VectorDBPath to the vector database (default \"chromem:$XDG_DATA_HOME/gptscript/knowledge/vector.db\")" default:"" env:"KNOW_VECTOR_DSN"`
DSN string `name:"vector-dsn" usage:"DSN to the vector database (default \"chromem:$XDG_DATA_HOME/gptscript/knowledge/vector.db\")" default:"" env:"KNOW_VECTOR_DSN"`
}

func LoadConfig(configFile string) (*Config, error) {
Expand Down
4 changes: 2 additions & 2 deletions pkg/datastore/datastore.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func LogEmbeddingFunc(embeddingFunc cg.EmbeddingFunc) cg.EmbeddingFunc {
}
}

func NewDatastore(indexDSN string, automigrate bool, vectorDSN string, embeddingProvider etypes.EmbeddingModelProvider) (*Datastore, error) {
func NewDatastore(ctx context.Context, indexDSN string, automigrate bool, vectorDSN string, embeddingProvider etypes.EmbeddingModelProvider) (*Datastore, error) {
indexDSN, vectorDSN, isArchive, err := GetDefaultDSNs(indexDSN, vectorDSN)
if err != nil {
return nil, fmt.Errorf("failed to determine datastore paths: %w", err)
Expand All @@ -98,7 +98,7 @@ func NewDatastore(indexDSN string, automigrate bool, vectorDSN string, embedding

slog.Debug("Using embedding model provider", "provider", embeddingProvider.Name(), "config", output.RedactSensitive(embeddingProvider.Config()))

vsdb, err := vectorstore.New(vectorDSN, embeddingProvider)
vsdb, err := vectorstore.New(ctx, vectorDSN, embeddingProvider)
if err != nil {
return nil, err
}
Expand Down
Loading

0 comments on commit 7c56b15

Please sign in to comment.