From 83e6a8409d81e874620ece83666199d220c876b8 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Tue, 5 Sep 2023 02:01:23 -0400 Subject: [PATCH 01/16] WIP on example updates for v1 --- .env.example | 2 +- README.md | 136 ++++++++++++----------- package-lock.json | 92 ++++++--------- package.json | 7 +- src/deleteIndex.ts | 13 +-- src/embeddings.ts | 7 +- src/load.ts | 44 +++++--- src/pinecone.ts | 24 ---- src/query.ts | 27 ++--- src/types.ts | 3 + tests/integration/deleteIndex.test.ts | 10 +- tests/integration/semanticSearch.test.ts | 14 +-- 12 files changed, 178 insertions(+), 201 deletions(-) delete mode 100644 src/pinecone.ts create mode 100644 src/types.ts diff --git a/.env.example b/.env.example index 4cbd9e5..a9cd685 100644 --- a/.env.example +++ b/.env.example @@ -1,3 +1,3 @@ PINECONE_API_KEY= PINECONE_ENVIRONMENT= -PINECONE_INDEX= \ No newline at end of file +PINECONE_INDEX=semantic-search \ No newline at end of file diff --git a/README.md b/README.md index c1add8c..563716e 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,41 @@ # Semantic Search -In this walkthrough we will see how to use Pinecone for semantic search. To begin we must install the required prerequisite libraries: +In this walkthrough we will see how to use Pinecone for semantic search. ## Setup -Ensure you have `Node.js` version 19.7.0 and `npm` version 9.5.0 installed. Clone the repository and install the dependencies using `npm install`. +Prerequisites: +- `Node.js` version 19.7.0 +- `npm` version 9.5.0 + +Clone the repository and install the dependencies. + +``` +git clone git@github.com:pinecone-io/semantic-search-example.git +cd semantic-search-example +npm install +``` ### Configuration -Create an `.env` file in the root of the project and add your Pinecone API key and environment details: +In order to run this example, you have to supply the Pinecone credentials needed to interact with the Pinecone API. You can find these credentials in the Pinecone web console. This project uses `dotenv` to easily load values from the `.env` file into the environment when executing. + +Copy the template file: + +```sh +cp .env.example .env +``` + +And fill in your API key and environment details: ```sh PINECONE_API_KEY= PINECONE_ENVIRONMENT= -PINECONE_INDEX= +PINECONE_INDEX=semantic-search ``` +`PINECONE_INDEX` is the name of the index where this demo will store and query embeddings. You can change `PINECONE_INDEX` to any name you like, but make sure the name not going to collide with any indexes you are already using. + ### Building To build the project please run the command: @@ -26,7 +46,7 @@ npm run build ## Application structure -There are two main components to this application: the data loader and the search engine. The data loader is responsible for loading the data into Pinecone. The search engine is responsible for querying the index and returning the results. These two components share two common modules: the `embedder` and the `pinecone` utility module. +There are two main components to this application: the data loader (load.ts) and the search engine (query.ts). The data loader is responsible for loading the data into Pinecone. The search engine is responsible for querying the index and returning similar results. These two components share a common modules, the `embedder`, which transforms natural language strings into embeddings using the [`sentence-transformers/all-MiniLM-L6-v2`](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) model. ## Data Preprocessing @@ -66,10 +86,11 @@ export default loadCSVFile; The text embedding operation is performed in the `Embedder` class. This class uses a pipeline from the [`@xenova/transformers`](https://github.com/xenova/transformers.js) library to generate embeddings for the input text. We use the [`sentence-transformers/all-MiniLM-L6-v2`](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) model to generate the embeddings. The class provides methods to embed a single string or an array of strings in batches​ - which will come in useful a bit later. ```typescript -import { Vector } from "@pinecone-database/pinecone"; +import { PineconeRecord } from "@pinecone-database/pinecone"; import { Pipeline } from "@xenova/transformers"; import { v4 as uuidv4 } from "uuid"; import { sliceIntoChunks } from "./utils/util.js"; +import { TextMetadata } from "./types.js"; class Embedder { private pipe: Pipeline | null = null; @@ -81,7 +102,7 @@ class Embedder { } // Embed a single string - async embed(text: string): Promise { + async embed(text: string): Promise> { const result = this.pipe && (await this.pipe(text)); return { id: uuidv4(), @@ -97,7 +118,7 @@ class Embedder { async embedBatch( texts: string[], batchSize: number, - onDoneBatch: (embeddings: Vector[]) => void + onDoneBatch: (embeddings: PineconeRecord[]) => void ) { const batches = sliceIntoChunks(texts, batchSize); for (const batch of batches) { @@ -112,37 +133,7 @@ class Embedder { const embedder = new Embedder(); export { embedder }; -``` -## Pinecone utility function - -This function ensures that the required environment variables are set, and then initializes the Pinecone client. To save unnecessary instantiations of the Pinecone client, we use a singleton pattern to ensure that only one instance of the client is created. - -```typescript -import { PineconeClient } from "@pinecone-database/pinecone"; -import { config } from "dotenv"; -import { getEnv, validateEnvironmentVariables } from "./utils/util.js"; - -config(); - -let pineconeClient: PineconeClient | null = null; - -// Returns a Promise that resolves to a PineconeClient instance -export const getPineconeClient = async (): Promise => { - validateEnvironmentVariables(); - - if (pineconeClient) { - return pineconeClient; - } else { - pineconeClient = new PineconeClient(); - - await pineconeClient.init({ - apiKey: getEnv("PINECONE_API_KEY"), - environment: getEnv("PINECONE_ENVIRONMENT"), - }); - } - return pineconeClient; -}; ``` ## Loading embeddings into Pinecone @@ -150,15 +141,17 @@ export const getPineconeClient = async (): Promise => { Now that we have a way to load data and create embeddings, let put the two together and save the embeddings in Pinecone. In the following section, we get the path of the file we need to process from the command like. We load the CSV file, create the Pinecone index and then start the embedding process. The embedding process is done in batches of 1000. Once we have a batch of embeddings, we insert them into the index. ```typescript -import { utils } from "@pinecone-database/pinecone"; import cliProgress from "cli-progress"; import { config } from "dotenv"; import loadCSVFile from "./csvLoader.js"; import { embedder } from "./embeddings.js"; -import { getPineconeClient } from "./pinecone.js"; -import { getEnv } from "./utils/util.js"; -const { createIndexIfNotExists, chunkedUpsert } = utils; +import { Pinecone } from '@pinecone-database/pinecone'; +import { getEnv, validateEnvironmentVariables } from "./utils/util.js"; + +import type { TextMetadata } from "./types.js"; + +// Load environment variables from .env config(); const progressBar = new cliProgress.SingleBar( @@ -169,11 +162,10 @@ const progressBar = new cliProgress.SingleBar( let counter = 0; export const load = async (csvPath: string, column: string) => { - // Get index name - const indexName = getEnv("PINECONE_INDEX"); - - // Get a PineconeClient instance - const pineconeClient = await getPineconeClient(); + validateEnvironmentVariables(); + + // Get a Pinecone instance + const pinecone = new Pinecone(); // Create a readable stream from the CSV file const { data, meta } = await loadCSVFile(csvPath); @@ -187,21 +179,32 @@ export const load = async (csvPath: string, column: string) => { // Extract the selected column from the CSV file const documents = data.map((row) => row[column] as string); - // Create a Pinecone index with the name "word-embeddings" and a dimension of 384 - await createIndexIfNotExists(pineconeClient, indexName, 384); + // Get index name + const indexName = getEnv("PINECONE_INDEX"); + + // Check whether the index already exists. If it doesn't, create + // a Pinecone index with a dimension of 384 to hold the outputs + // of our embeddings model. + const indexList = await pinecone.listIndexes(); + if (indexList.indexOf({ name: indexName }) === -1) { + await pinecone.createIndex({ name: indexName, dimension: 384, waitUntilReady: true }) + } - // Select the target Pinecone index - const index = pineconeClient.Index(indexName); + // Select the target Pinecone index. Passing the TextMetadata generic type parameter + // allows typescript to know what shape to expect when interacting with a record's + // metadata field without the need for additional type casting. + const index = pinecone.index(indexName); // Start the progress bar progressBar.start(documents.length, 0); // Start the batch embedding process await embedder.init(); - await embedder.embedBatch(documents, 1, async (embeddings) => { + await embedder.embedBatch(documents, 100, async (embeddings) => { counter += embeddings.length; + console.log(embeddings.length) // Whenever the batch embedding process returns a batch of embeddings, insert them into the index - await chunkedUpsert(index, embeddings, "default"); + await index.upsert(embeddings) progressBar.update(counter); }); @@ -247,31 +250,31 @@ Now that our index is populated we can begin making queries. We are performing a ```typescript import { config } from "dotenv"; import { embedder } from "./embeddings.js"; -import { getPineconeClient } from "./pinecone.js"; +import { Pinecone } from "@pinecone-database/pinecone"; import { getEnv, validateEnvironmentVariables } from "./utils/util.js"; +import type { TextMetadata } from "./types.js"; config(); export const query = async (query: string, topK: number) => { - const indexName = getEnv("PINECONE_INDEX"); validateEnvironmentVariables(); - const pineconeClient = await getPineconeClient(); + const pinecone = new Pinecone(); - // Insert the embeddings into the index - const index = pineconeClient.Index(indexName); + // Target the index + const indexName = getEnv("PINECONE_INDEX"); + const index = pinecone.index(indexName); + await embedder.init(); + // Embed the query const queryEmbedding = await embedder.embed(query); - // Query the index + // Query the index using the query embedding const results = await index.query({ - queryRequest: { - vector: queryEmbedding.values, - topK, - includeMetadata: true, - includeValues: false, - namespace: "default", - }, + vector: queryEmbedding.values, + topK, + includeMetadata: true, + includeValues: false }); // Print the results @@ -284,6 +287,7 @@ export const query = async (query: string, topK: number) => { })) ); }; + ``` The querying process is very similar to the indexing process. We create a Pinecone client, select the index we want to query, and then embed the query. We then use the `query` method to search the index for the most similar embeddings. The `query` method returns a list of matches. Each match contains the metadata associated with the embedding, as well as the score of the match. diff --git a/package-lock.json b/package-lock.json index 0a21a2a..d2ce245 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,7 @@ "version": "0.0.1", "license": "MIT", "dependencies": { - "@pinecone-database/pinecone": "^0.1.6", + "@pinecone-database/pinecone": "file:../pinecone-ts-client", "@xenova/transformers": "2.0.1", "cli-progress": "^3.12.0", "dotenv": "^16.0.3", @@ -40,12 +40,43 @@ "vitest": "^0.34.1" }, "engines": { - "node": "19.7.0", + "node": "^18.0.0", "npm": "9.5.0", "pnpm": "Please use npm 9.5.1", "yarn": "Please use npm 9.5.1" } }, + "../pinecone-ts-client": { + "name": "@pinecone-database/pinecone", + "version": "0.1.6", + "license": "Apache-2.0", + "dependencies": { + "@sinclair/typebox": "^0.28.15", + "@types/web": "^0.0.99", + "ajv": "^8.12.0", + "cross-fetch": "^3.1.5" + }, + "devDependencies": { + "@jest/globals": "^29.3.1", + "@types/jest": "^29.5.0", + "@types/node": "^18.11.17", + "@typescript-eslint/eslint-plugin": "^5.59.11", + "@typescript-eslint/parser": "^5.59.11", + "dotenv": "^16.0.3", + "eslint": "^8.42.0", + "eslint-plugin-import": "^2.27.5", + "jest": "^29.5.0", + "prettier": "^2.8.8", + "ts-jest": "^29.0.5", + "ts-node": "^10.9.1", + "typedoc": "^0.24.8", + "typescript": "^4.9.4", + "unique-names-generator": "^4.7.1" + }, + "engines": { + "node": ">=14.0.0" + } + }, "node_modules/@aashutoshrathi/word-wrap": { "version": "1.2.6", "resolved": "https://registry.npmjs.org/@aashutoshrathi/word-wrap/-/word-wrap-1.2.6.tgz", @@ -642,15 +673,8 @@ } }, "node_modules/@pinecone-database/pinecone": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/@pinecone-database/pinecone/-/pinecone-0.1.6.tgz", - "integrity": "sha512-tCnVc28udecthhgSBTdcMhYEW+xsR++AdZasp+ZE/AvUD1hOR2IR3edjk9m0sDxZyvXbno2KeqUbLIOZr7sCTw==", - "dependencies": { - "cross-fetch": "^3.1.5" - }, - "engines": { - "node": ">=14.0.0" - } + "resolved": "../pinecone-ts-client", + "link": true }, "node_modules/@protobufjs/aspromise": { "version": "1.1.2", @@ -1564,14 +1588,6 @@ "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==", "dev": true }, - "node_modules/cross-fetch": { - "version": "3.1.8", - "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.1.8.tgz", - "integrity": "sha512-cvA+JwZoU0Xq+h6WkMvAUqPEYy92Obet6UdKLfW60qn99ftItKjB5T+BkyWOFWe2pUyfQ+IJHmpOTznqk1M6Kg==", - "dependencies": { - "node-fetch": "^2.6.12" - } - }, "node_modules/cross-spawn": { "version": "7.0.3", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", @@ -3395,25 +3411,6 @@ "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-6.1.0.tgz", "integrity": "sha512-+eawOlIgy680F0kBzPUNFhMZGtJ1YmqM6l4+Crf4IkImjYrO/mqPwRMh352g23uIaQKFItcQ64I7KMaJxHgAVA==" }, - "node_modules/node-fetch": { - "version": "2.6.12", - "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.12.tgz", - "integrity": "sha512-C/fGU2E8ToujUivIO0H+tpQ6HWo4eEmchoPIoXtxCrVghxdKq+QOHqEZW7tuP3KlV3bC8FRMO5nMCC7Zm1VP6g==", - "dependencies": { - "whatwg-url": "^5.0.0" - }, - "engines": { - "node": "4.x || >=6.0.0" - }, - "peerDependencies": { - "encoding": "^0.1.0" - }, - "peerDependenciesMeta": { - "encoding": { - "optional": true - } - } - }, "node_modules/nodemon": { "version": "2.0.22", "resolved": "https://registry.npmjs.org/nodemon/-/nodemon-2.0.22.tgz", @@ -4947,11 +4944,6 @@ "nodetouch": "bin/nodetouch.js" } }, - "node_modules/tr46": { - "version": "0.0.3", - "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", - "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" - }, "node_modules/ts-node": { "version": "10.9.1", "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.1.tgz", @@ -5372,20 +5364,6 @@ } } }, - "node_modules/webidl-conversions": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", - "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" - }, - "node_modules/whatwg-url": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", - "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", - "dependencies": { - "tr46": "~0.0.3", - "webidl-conversions": "^3.0.0" - } - }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/package.json b/package.json index f517e40..433c778 100644 --- a/package.json +++ b/package.json @@ -13,10 +13,11 @@ "dev": "ts-node src", "lint": "eslint src", "format": "npx prettier --write src && npx sort-package-json", - "format:check": "npx prettier --check src" + "format:check": "npx prettier --check src", + "install:dev": "rm -rf node_modules/@pinecone-database/pinecone && mkdir -p node_modules/@pinecone-database/pinecone/dist && cp -r ../pinecone-ts-client/dist node_modules/@pinecone-database/pinecone/dist && cp -r ../pinecone-ts-client/package.json node_modules/@pinecone-database/pinecone" }, "dependencies": { - "@pinecone-database/pinecone": "^0.1.6", + "@pinecone-database/pinecone": "file:../pinecone-ts-client", "@xenova/transformers": "2.0.1", "cli-progress": "^3.12.0", "dotenv": "^16.0.3", @@ -47,7 +48,7 @@ "vitest": "^0.34.1" }, "engines": { - "node": "19.7.0", + "node": "^18.0.0", "npm": "9.5.0", "pnpm": "Please use npm 9.5.1", "yarn": "Please use npm 9.5.1" diff --git a/src/deleteIndex.ts b/src/deleteIndex.ts index 45dbde6..05492db 100644 --- a/src/deleteIndex.ts +++ b/src/deleteIndex.ts @@ -1,20 +1,17 @@ import { config } from "dotenv"; -import { getPineconeClient } from "./pinecone.js"; +import { Pinecone } from "@pinecone-database/pinecone"; import { getEnv, validateEnvironmentVariables } from "./utils/util.js"; config(); +validateEnvironmentVariables(); export const deleteIndex = async () => { const indexName = getEnv("PINECONE_INDEX"); - validateEnvironmentVariables(); - // Initialize the Pinecone client - const pineconeClient = await getPineconeClient(); - try { - await pineconeClient.deleteIndex({ - indexName, - }); + const pinecone = new Pinecone(); + try { + await pinecone.deleteIndex(indexName); console.log(`Index is deleted: ${indexName}`); } catch (e) { console.error(e?.toString()); diff --git a/src/embeddings.ts b/src/embeddings.ts index 43699e3..707a5ab 100644 --- a/src/embeddings.ts +++ b/src/embeddings.ts @@ -1,7 +1,8 @@ -import { Vector } from "@pinecone-database/pinecone"; +import { PineconeRecord } from "@pinecone-database/pinecone"; import { Pipeline } from "@xenova/transformers"; import { v4 as uuidv4 } from "uuid"; import { sliceIntoChunks } from "./utils/util.js"; +import { TextMetadata } from "./types.js"; class Embedder { private pipe: Pipeline | null = null; @@ -13,7 +14,7 @@ class Embedder { } // Embed a single string - async embed(text: string): Promise { + async embed(text: string): Promise> { const result = this.pipe && (await this.pipe(text)); return { id: uuidv4(), @@ -29,7 +30,7 @@ class Embedder { async embedBatch( texts: string[], batchSize: number, - onDoneBatch: (embeddings: Vector[]) => void + onDoneBatch: (embeddings: PineconeRecord[]) => void ) { const batches = sliceIntoChunks(texts, batchSize); for (const batch of batches) { diff --git a/src/load.ts b/src/load.ts index 0bcb993..b035c36 100644 --- a/src/load.ts +++ b/src/load.ts @@ -1,12 +1,14 @@ -import { utils } from "@pinecone-database/pinecone"; import cliProgress from "cli-progress"; import { config } from "dotenv"; import loadCSVFile from "./csvLoader.js"; import { embedder } from "./embeddings.js"; -import { getPineconeClient } from "./pinecone.js"; -import { getEnv } from "./utils/util.js"; -const { createIndexIfNotExists, chunkedUpsert } = utils; +import { Pinecone } from "@pinecone-database/pinecone"; +import { getEnv, validateEnvironmentVariables } from "./utils/util.js"; + +import type { TextMetadata } from "./types.js"; + +// Load environment variables from .env config(); const progressBar = new cliProgress.SingleBar( @@ -17,11 +19,10 @@ const progressBar = new cliProgress.SingleBar( let counter = 0; export const load = async (csvPath: string, column: string) => { - // Get index name - const indexName = getEnv("PINECONE_INDEX"); + validateEnvironmentVariables(); - // Get a PineconeClient instance - const pineconeClient = await getPineconeClient(); + // Get a Pinecone instance + const pinecone = new Pinecone(); // Create a readable stream from the CSV file const { data, meta } = await loadCSVFile(csvPath); @@ -35,21 +36,36 @@ export const load = async (csvPath: string, column: string) => { // Extract the selected column from the CSV file const documents = data.map((row) => row[column] as string); - // Create a Pinecone index with the name "word-embeddings" and a dimension of 384 - await createIndexIfNotExists(pineconeClient, indexName, 384); + // Get index name + const indexName = getEnv("PINECONE_INDEX"); + + // Check whether the index already exists. If it doesn't, create + // a Pinecone index with a dimension of 384 to hold the outputs + // of our embeddings model. + const indexList = await pinecone.listIndexes(); + if (indexList.indexOf({ name: indexName }) === -1) { + await pinecone.createIndex({ + name: indexName, + dimension: 384, + waitUntilReady: true, + }); + } - // Select the target Pinecone index - const index = pineconeClient.Index(indexName); + // Select the target Pinecone index. Passing the TextMetadata generic type parameter + // allows typescript to know what shape to expect when interacting with a record's + // metadata field without the need for additional type casting. + const index = pinecone.index(indexName); // Start the progress bar progressBar.start(documents.length, 0); // Start the batch embedding process await embedder.init(); - await embedder.embedBatch(documents, 1, async (embeddings) => { + await embedder.embedBatch(documents, 100, async (embeddings) => { counter += embeddings.length; + console.log(embeddings.length); // Whenever the batch embedding process returns a batch of embeddings, insert them into the index - await chunkedUpsert(index, embeddings, "default"); + await index.upsert(embeddings); progressBar.update(counter); }); diff --git a/src/pinecone.ts b/src/pinecone.ts deleted file mode 100644 index 9080c7d..0000000 --- a/src/pinecone.ts +++ /dev/null @@ -1,24 +0,0 @@ -import { PineconeClient } from "@pinecone-database/pinecone"; -import { config } from "dotenv"; -import { getEnv, validateEnvironmentVariables } from "./utils/util.js"; - -config(); - -let pineconeClient: PineconeClient | null = null; - -// Returns a Promise that resolves to a PineconeClient instance -export const getPineconeClient = async (): Promise => { - validateEnvironmentVariables(); - - if (pineconeClient) { - return pineconeClient; - } else { - pineconeClient = new PineconeClient(); - - await pineconeClient.init({ - apiKey: getEnv("PINECONE_API_KEY"), - environment: getEnv("PINECONE_ENVIRONMENT"), - }); - } - return pineconeClient; -}; diff --git a/src/query.ts b/src/query.ts index e7fb9da..6da3ffb 100644 --- a/src/query.ts +++ b/src/query.ts @@ -1,30 +1,31 @@ import { config } from "dotenv"; import { embedder } from "./embeddings.js"; -import { getPineconeClient } from "./pinecone.js"; +import { Pinecone } from "@pinecone-database/pinecone"; import { getEnv, validateEnvironmentVariables } from "./utils/util.js"; +import type { TextMetadata } from "./types.js"; config(); +validateEnvironmentVariables(); export const query = async (query: string, topK: number) => { - const indexName = getEnv("PINECONE_INDEX"); validateEnvironmentVariables(); - const pineconeClient = await getPineconeClient(); + const pinecone = new Pinecone(); + + // Target the index + const indexName = getEnv("PINECONE_INDEX"); + const index = pinecone.index(indexName); - // Insert the embeddings into the index - const index = pineconeClient.Index(indexName); await embedder.init(); + // Embed the query const queryEmbedding = await embedder.embed(query); - // Query the index + // Query the index using the query embedding const results = await index.query({ - queryRequest: { - vector: queryEmbedding.values, - topK, - includeMetadata: true, - includeValues: false, - namespace: "default", - }, + vector: queryEmbedding.values, + topK, + includeMetadata: true, + includeValues: false, }); // Print the results diff --git a/src/types.ts b/src/types.ts new file mode 100644 index 0000000..8966205 --- /dev/null +++ b/src/types.ts @@ -0,0 +1,3 @@ +export type TextMetadata = { + text: string; +}; diff --git a/tests/integration/deleteIndex.test.ts b/tests/integration/deleteIndex.test.ts index bbf7613..982c3fa 100644 --- a/tests/integration/deleteIndex.test.ts +++ b/tests/integration/deleteIndex.test.ts @@ -1,5 +1,4 @@ -import { utils } from "@pinecone-database/pinecone"; -import { getPineconeClient } from "@src/pinecone.js"; +import { Pinecone } from "@pinecone-database/pinecone"; import { deleteIndex } from "@src/deleteIndex.js"; import { randomizeIndexName } from "../utils/index.js"; @@ -14,9 +13,12 @@ describe("Delete", () => { process.env.PINECONE_INDEX = INDEX_NAME; try { - const pineconeClient = await getPineconeClient(); + const pinecone = new Pinecone(); - await utils.createIndexIfNotExists(pineconeClient, INDEX_NAME, 384); + const indexList = await pinecone.listIndexes(); + if (indexList.indexOf({ name: INDEX_NAME }) === -1) { + await pinecone.createIndex({ name: INDEX_NAME, dimension: 384, waitUntilReady: true }) + } } catch (error) { console.error(error); } diff --git a/tests/integration/semanticSearch.test.ts b/tests/integration/semanticSearch.test.ts index e4d77ce..6b603d5 100644 --- a/tests/integration/semanticSearch.test.ts +++ b/tests/integration/semanticSearch.test.ts @@ -1,4 +1,4 @@ -import { getPineconeClient } from "@src/pinecone.js"; +import { Pinecone } from '@pinecone-database/pinecone'; import { run } from "@src/index.js"; import { createMockOnProcessExit, randomizeIndexName } from "../utils/index.js"; @@ -31,8 +31,8 @@ describe( await Promise.all( createdIndexes.map(async (indexName) => { try { - const pineconeClient = await getPineconeClient(); - await pineconeClient.deleteIndex({ indexName: indexName }); + const pinecone = new Pinecone(); + await pinecone.deleteIndex(indexName); } catch (e) { console.error(e); } @@ -57,12 +57,10 @@ describe( await run(); - const client = await getPineconeClient(); - const index = client.Index(indexName); + const pinecone = new Pinecone(); + const index = pinecone.index(indexName); const stats = await index - .describeIndexStats({ - describeIndexStatsRequest: {}, - }) + .describeIndexStats() .catch((e) => console.error(e)); // Ensure that all vectors are added From 4280cdc0e5baac507846765f2ccfe715b4d82962 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 6 Sep 2023 03:39:01 -0400 Subject: [PATCH 02/16] Remove console.log, remove @ts-expect-error --- README.md | 1 - src/embeddings.ts | 2 +- src/query.ts | 2 -- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/README.md b/README.md index 563716e..4521c84 100644 --- a/README.md +++ b/README.md @@ -202,7 +202,6 @@ export const load = async (csvPath: string, column: string) => { await embedder.init(); await embedder.embedBatch(documents, 100, async (embeddings) => { counter += embeddings.length; - console.log(embeddings.length) // Whenever the batch embedding process returns a batch of embeddings, insert them into the index await index.upsert(embeddings) progressBar.update(counter); diff --git a/src/embeddings.ts b/src/embeddings.ts index 707a5ab..0c023f0 100644 --- a/src/embeddings.ts +++ b/src/embeddings.ts @@ -1,4 +1,4 @@ -import { PineconeRecord } from "@pinecone-database/pinecone"; +import type { PineconeRecord } from "@pinecone-database/pinecone"; import { Pipeline } from "@xenova/transformers"; import { v4 as uuidv4 } from "uuid"; import { sliceIntoChunks } from "./utils/util.js"; diff --git a/src/query.ts b/src/query.ts index 6da3ffb..a3e9e97 100644 --- a/src/query.ts +++ b/src/query.ts @@ -31,8 +31,6 @@ export const query = async (query: string, topK: number) => { // Print the results console.log( results.matches?.map((match) => ({ - // eslint-disable-next-line @typescript-eslint/ban-ts-comment - // @ts-ignore text: match.metadata?.text, score: match.score, })) From dadfba0192c1604b52c2e61f517a5aa29ac5ad1c Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 6 Sep 2023 03:53:59 -0400 Subject: [PATCH 03/16] Update test with changes to describeIndexStats response type --- tests/integration/semanticSearch.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/semanticSearch.test.ts b/tests/integration/semanticSearch.test.ts index 6b603d5..b2aa4f8 100644 --- a/tests/integration/semanticSearch.test.ts +++ b/tests/integration/semanticSearch.test.ts @@ -64,7 +64,7 @@ describe( .catch((e) => console.error(e)); // Ensure that all vectors are added - expect(stats?.namespaces?.default.vectorCount).toBe(4); + expect(stats?.namespaces?.default.recordCount).toBe(4); // Set environment for querying process.argv = [ From 36efa5ed347529bf8566f8eaba32fa702b92d6cc Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 6 Sep 2023 03:54:16 -0400 Subject: [PATCH 04/16] Bump to dev build --- package-lock.json | 136 +++++++++++++++++++++++++++++++++------------- package.json | 2 +- 2 files changed, 99 insertions(+), 39 deletions(-) diff --git a/package-lock.json b/package-lock.json index d2ce245..099051e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,7 @@ "version": "0.0.1", "license": "MIT", "dependencies": { - "@pinecone-database/pinecone": "file:../pinecone-ts-client", + "@pinecone-database/pinecone": "^0.1.6-dev.20230906074328", "@xenova/transformers": "2.0.1", "cli-progress": "^3.12.0", "dotenv": "^16.0.3", @@ -46,37 +46,6 @@ "yarn": "Please use npm 9.5.1" } }, - "../pinecone-ts-client": { - "name": "@pinecone-database/pinecone", - "version": "0.1.6", - "license": "Apache-2.0", - "dependencies": { - "@sinclair/typebox": "^0.28.15", - "@types/web": "^0.0.99", - "ajv": "^8.12.0", - "cross-fetch": "^3.1.5" - }, - "devDependencies": { - "@jest/globals": "^29.3.1", - "@types/jest": "^29.5.0", - "@types/node": "^18.11.17", - "@typescript-eslint/eslint-plugin": "^5.59.11", - "@typescript-eslint/parser": "^5.59.11", - "dotenv": "^16.0.3", - "eslint": "^8.42.0", - "eslint-plugin-import": "^2.27.5", - "jest": "^29.5.0", - "prettier": "^2.8.8", - "ts-jest": "^29.0.5", - "ts-node": "^10.9.1", - "typedoc": "^0.24.8", - "typescript": "^4.9.4", - "unique-names-generator": "^4.7.1" - }, - "engines": { - "node": ">=14.0.0" - } - }, "node_modules/@aashutoshrathi/word-wrap": { "version": "1.2.6", "resolved": "https://registry.npmjs.org/@aashutoshrathi/word-wrap/-/word-wrap-1.2.6.tgz", @@ -673,8 +642,43 @@ } }, "node_modules/@pinecone-database/pinecone": { - "resolved": "../pinecone-ts-client", - "link": true + "version": "0.1.6-dev.20230906074328", + "resolved": "https://registry.npmjs.org/@pinecone-database/pinecone/-/pinecone-0.1.6-dev.20230906074328.tgz", + "integrity": "sha512-BFCN1TbAANcZiLdHT0psrNM533ubvqRdVmlUT9gyZtooJ0lmUL/C30TQ+MBtpIKN6m9vTsdRFDo3jFaW9sCbUw==", + "dependencies": { + "@sinclair/typebox": "^0.28.15", + "@types/web": "^0.0.99", + "ajv": "^8.12.0", + "cross-fetch": "^3.1.5" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@pinecone-database/pinecone/node_modules/@sinclair/typebox": { + "version": "0.28.20", + "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.28.20.tgz", + "integrity": "sha512-QCF3BGfacwD+3CKhGsMeixnwOmX4AWgm61nKkNdRStyLVu0mpVFYlDSY8gVBOOED1oSwzbJauIWl/+REj8K5+w==" + }, + "node_modules/@pinecone-database/pinecone/node_modules/ajv": { + "version": "8.12.0", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.12.0.tgz", + "integrity": "sha512-sRu1kpcO9yLtYxBKvqfTeh9KzZEwO3STyX1HT+4CaDzC6HpTGYhIhPIzj9XuKU7KYDwnaeh5hcOwjy1QuJzBPA==", + "dependencies": { + "fast-deep-equal": "^3.1.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/@pinecone-database/pinecone/node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==" }, "node_modules/@protobufjs/aspromise": { "version": "1.1.2", @@ -827,6 +831,11 @@ "integrity": "sha512-kNnC1GFBLuhImSnV7w4njQkUiJi0ZXUycu1rUaouPqiKlXkh77JKgdRnTAp1x5eBwcIwbtI+3otwzuIDEuDoxQ==", "dev": true }, + "node_modules/@types/web": { + "version": "0.0.99", + "resolved": "https://registry.npmjs.org/@types/web/-/web-0.0.99.tgz", + "integrity": "sha512-xMz3tOvtkZzc7RpQrDNiLe5sfMmP+fz8bOxHIZ/U8qXyvzDX4L4Ss1HCjor/O9DSelba+1iXK1VM7lruX28hiQ==" + }, "node_modules/@types/yargs": { "version": "17.0.24", "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.24.tgz", @@ -1588,6 +1597,14 @@ "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==", "dev": true }, + "node_modules/cross-fetch": { + "version": "3.1.8", + "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.1.8.tgz", + "integrity": "sha512-cvA+JwZoU0Xq+h6WkMvAUqPEYy92Obet6UdKLfW60qn99ftItKjB5T+BkyWOFWe2pUyfQ+IJHmpOTznqk1M6Kg==", + "dependencies": { + "node-fetch": "^2.6.12" + } + }, "node_modules/cross-spawn": { "version": "7.0.3", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", @@ -2194,8 +2211,7 @@ "node_modules/fast-deep-equal": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", - "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", - "dev": true + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==" }, "node_modules/fast-fifo": { "version": "1.3.0", @@ -3411,6 +3427,25 @@ "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-6.1.0.tgz", "integrity": "sha512-+eawOlIgy680F0kBzPUNFhMZGtJ1YmqM6l4+Crf4IkImjYrO/mqPwRMh352g23uIaQKFItcQ64I7KMaJxHgAVA==" }, + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, "node_modules/nodemon": { "version": "2.0.22", "resolved": "https://registry.npmjs.org/nodemon/-/nodemon-2.0.22.tgz", @@ -4153,7 +4188,6 @@ "version": "2.3.0", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz", "integrity": "sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA==", - "dev": true, "engines": { "node": ">=6" } @@ -4287,6 +4321,14 @@ "node": ">=0.10.0" } }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/resolve": { "version": "1.22.4", "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.4.tgz", @@ -4944,6 +4986,11 @@ "nodetouch": "bin/nodetouch.js" } }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" + }, "node_modules/ts-node": { "version": "10.9.1", "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.1.tgz", @@ -5161,7 +5208,6 @@ "version": "4.4.1", "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", - "dev": true, "dependencies": { "punycode": "^2.1.0" } @@ -5364,6 +5410,20 @@ } } }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/package.json b/package.json index 433c778..23479c6 100644 --- a/package.json +++ b/package.json @@ -17,7 +17,7 @@ "install:dev": "rm -rf node_modules/@pinecone-database/pinecone && mkdir -p node_modules/@pinecone-database/pinecone/dist && cp -r ../pinecone-ts-client/dist node_modules/@pinecone-database/pinecone/dist && cp -r ../pinecone-ts-client/package.json node_modules/@pinecone-database/pinecone" }, "dependencies": { - "@pinecone-database/pinecone": "file:../pinecone-ts-client", + "@pinecone-database/pinecone": "^0.1.6-dev.20230906074328", "@xenova/transformers": "2.0.1", "cli-progress": "^3.12.0", "dotenv": "^16.0.3", From f029e21e15bc9c3ca8799aeab3e7d7a38b6c74dd Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 6 Sep 2023 03:58:50 -0400 Subject: [PATCH 05/16] Adjust engine --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 23479c6..b10e307 100644 --- a/package.json +++ b/package.json @@ -49,7 +49,7 @@ }, "engines": { "node": "^18.0.0", - "npm": "9.5.0", + "npm": "^9.5.0", "pnpm": "Please use npm 9.5.1", "yarn": "Please use npm 9.5.1" } From 0c9bd1af55e669145e94131c3274930cbba1fee0 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 6 Sep 2023 04:01:59 -0400 Subject: [PATCH 06/16] Adjust engine --- package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index b10e307..3782663 100644 --- a/package.json +++ b/package.json @@ -48,8 +48,8 @@ "vitest": "^0.34.1" }, "engines": { - "node": "^18.0.0", - "npm": "^9.5.0", + "node": ">=18.0.0", + "npm": ">=9.5.0", "pnpm": "Please use npm 9.5.1", "yarn": "Please use npm 9.5.1" } From 43ef5e7e7169900cc83aaa3c441364086f55df63 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 6 Sep 2023 04:11:12 -0400 Subject: [PATCH 07/16] Downgrade node requirement to v18 --- README.md | 3 +-- package.json | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 4521c84..bedf80f 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,7 @@ In this walkthrough we will see how to use Pinecone for semantic search. ## Setup Prerequisites: -- `Node.js` version 19.7.0 -- `npm` version 9.5.0 +- `Node.js` version >=18.0.0 Clone the repository and install the dependencies. diff --git a/package.json b/package.json index 3782663..ce8aef1 100644 --- a/package.json +++ b/package.json @@ -48,9 +48,6 @@ "vitest": "^0.34.1" }, "engines": { - "node": ">=18.0.0", - "npm": ">=9.5.0", - "pnpm": "Please use npm 9.5.1", - "yarn": "Please use npm 9.5.1" + "node": ">=18.0.0" } } From e5e70f81f0837222e4cd0882cd3c279fa45afb56 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 6 Sep 2023 04:19:11 -0400 Subject: [PATCH 08/16] Test setup: delete all indexes --- tests/integration/semanticSearch.test.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/integration/semanticSearch.test.ts b/tests/integration/semanticSearch.test.ts index b2aa4f8..78db68b 100644 --- a/tests/integration/semanticSearch.test.ts +++ b/tests/integration/semanticSearch.test.ts @@ -22,6 +22,14 @@ describe( return indexName; }; + beforeAll(async () => { + const pinecone = new Pinecone(); + const listIndexes = pinecone.listIndexes(); + for (const indexName in listIndexes) { + await pinecone.deleteIndex(indexName) + } + }); + afterEach(() => { process.argv = originalArgv; }); From 948ef02c4d8517fd2ec9058ddae3b82441223351 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 6 Sep 2023 04:21:14 -0400 Subject: [PATCH 09/16] Test cleanup --- tests/integration/deleteIndex.test.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integration/deleteIndex.test.ts b/tests/integration/deleteIndex.test.ts index 982c3fa..0d39a01 100644 --- a/tests/integration/deleteIndex.test.ts +++ b/tests/integration/deleteIndex.test.ts @@ -26,8 +26,11 @@ describe("Delete", () => { 5 * 60 * 1000 ); - afterAll(() => { + afterAll(async () => { consoleMock.mockReset(); + + const pinecone = new Pinecone() + await pinecone.deleteIndex(INDEX_NAME); }); it("should delete Pinecone index", async () => { From 48e27cac0f3607275460e0475394fe86e778f8ec Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 6 Sep 2023 04:24:10 -0400 Subject: [PATCH 10/16] Add defensive cleanup before test runs --- tests/integration/deleteIndex.test.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/integration/deleteIndex.test.ts b/tests/integration/deleteIndex.test.ts index 0d39a01..97116bc 100644 --- a/tests/integration/deleteIndex.test.ts +++ b/tests/integration/deleteIndex.test.ts @@ -8,6 +8,11 @@ describe("Delete", () => { // eslint-disable-next-line @typescript-eslint/no-empty-function const consoleMock = vi.spyOn(console, "log").mockImplementation(() => {}); + beforeAll(async () => { + const pinecone = new Pinecone() + await pinecone.deleteIndex(INDEX_NAME); + }) + beforeEach( async () => { process.env.PINECONE_INDEX = INDEX_NAME; From 180d14bd06ffc0e675154ab307c77151b290fb49 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 6 Sep 2023 07:02:22 -0400 Subject: [PATCH 11/16] Update delete test assertion --- tests/integration/semanticSearch.test.ts | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/integration/semanticSearch.test.ts b/tests/integration/semanticSearch.test.ts index 78db68b..c4e81ff 100644 --- a/tests/integration/semanticSearch.test.ts +++ b/tests/integration/semanticSearch.test.ts @@ -116,14 +116,12 @@ describe( mockExit.mockRestore(); }); - it("should log an error if delte is called without valid index name", async () => { - setIndexName("some-non-exiting-index"); + it("should log an error if delete is called without valid index name", async () => { + setIndexName("some-non-existing-index"); process.argv = ["node", "../../src/index", "delete"]; await run(); - expect(consoleMock).toHaveBeenCalledWith( - "PineconeError: PineconeClient: Error calling deleteIndex: 404: Not Found" - ); + expect(consoleMock).toHaveBeenCalledWith(expect.stringContaining("PineconeNotFoundError")); }); }, // Set timeout to 5 mins, becouse creating index can take time From fb7e7ad1965a047fddccf02a1892c055a4ca74de Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 6 Sep 2023 07:23:08 -0400 Subject: [PATCH 12/16] Update tests --- README.md | 4 ++-- src/embeddings.ts | 2 +- tests/integration/load.test.ts | 2 +- tests/integration/semanticSearch.test.ts | 9 ++++++--- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index bedf80f..8bdce00 100644 --- a/README.md +++ b/README.md @@ -85,11 +85,11 @@ export default loadCSVFile; The text embedding operation is performed in the `Embedder` class. This class uses a pipeline from the [`@xenova/transformers`](https://github.com/xenova/transformers.js) library to generate embeddings for the input text. We use the [`sentence-transformers/all-MiniLM-L6-v2`](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) model to generate the embeddings. The class provides methods to embed a single string or an array of strings in batches​ - which will come in useful a bit later. ```typescript -import { PineconeRecord } from "@pinecone-database/pinecone"; +import type { PineconeRecord } from "@pinecone-database/pinecone"; +import type { TextMetadata } from "./types.js"; import { Pipeline } from "@xenova/transformers"; import { v4 as uuidv4 } from "uuid"; import { sliceIntoChunks } from "./utils/util.js"; -import { TextMetadata } from "./types.js"; class Embedder { private pipe: Pipeline | null = null; diff --git a/src/embeddings.ts b/src/embeddings.ts index 0c023f0..6622230 100644 --- a/src/embeddings.ts +++ b/src/embeddings.ts @@ -1,8 +1,8 @@ import type { PineconeRecord } from "@pinecone-database/pinecone"; +import type { TextMetadata } from "./types.js"; import { Pipeline } from "@xenova/transformers"; import { v4 as uuidv4 } from "uuid"; import { sliceIntoChunks } from "./utils/util.js"; -import { TextMetadata } from "./types.js"; class Embedder { private pipe: Pipeline | null = null; diff --git a/tests/integration/load.test.ts b/tests/integration/load.test.ts index 234d56d..f04fffc 100644 --- a/tests/integration/load.test.ts +++ b/tests/integration/load.test.ts @@ -30,7 +30,7 @@ describe("Load", () => { process.env = {}; await expect(load("non-existing.csv", "question1")).rejects.toThrow( - "PINECONE_INDEX environment variable not set" + "PINECONE_API_KEY environment variable not set" ); }); }); diff --git a/tests/integration/semanticSearch.test.ts b/tests/integration/semanticSearch.test.ts index c4e81ff..824f431 100644 --- a/tests/integration/semanticSearch.test.ts +++ b/tests/integration/semanticSearch.test.ts @@ -68,11 +68,14 @@ describe( const pinecone = new Pinecone(); const index = pinecone.index(indexName); const stats = await index - .describeIndexStats() - .catch((e) => console.error(e)); + .describeIndexStats(); // Ensure that all vectors are added - expect(stats?.namespaces?.default.recordCount).toBe(4); + if (stats.namespaces) { + const defaultNamespaceStats = stats.namespaces[''] + expect(defaultNamespaceStats.recordCount).toEqual(4); + } + expect(stats.totalRecordCount).toEqual(4); // Set environment for querying process.argv = [ From b553632e52e28833218ea8bde726973619d19409 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 6 Sep 2023 07:26:59 -0400 Subject: [PATCH 13/16] Remove dev command added in package.json --- package.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/package.json b/package.json index ce8aef1..d8eaaed 100644 --- a/package.json +++ b/package.json @@ -13,8 +13,7 @@ "dev": "ts-node src", "lint": "eslint src", "format": "npx prettier --write src && npx sort-package-json", - "format:check": "npx prettier --check src", - "install:dev": "rm -rf node_modules/@pinecone-database/pinecone && mkdir -p node_modules/@pinecone-database/pinecone/dist && cp -r ../pinecone-ts-client/dist node_modules/@pinecone-database/pinecone/dist && cp -r ../pinecone-ts-client/package.json node_modules/@pinecone-database/pinecone" + "format:check": "npx prettier --check src" }, "dependencies": { "@pinecone-database/pinecone": "^0.1.6-dev.20230906074328", From f675b495187dc2d847ed2f67b0350969c58f9b19 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Wed, 6 Sep 2023 07:30:55 -0400 Subject: [PATCH 14/16] Test adjustments --- tests/integration/deleteIndex.test.ts | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/integration/deleteIndex.test.ts b/tests/integration/deleteIndex.test.ts index 97116bc..46dfcda 100644 --- a/tests/integration/deleteIndex.test.ts +++ b/tests/integration/deleteIndex.test.ts @@ -8,11 +8,6 @@ describe("Delete", () => { // eslint-disable-next-line @typescript-eslint/no-empty-function const consoleMock = vi.spyOn(console, "log").mockImplementation(() => {}); - beforeAll(async () => { - const pinecone = new Pinecone() - await pinecone.deleteIndex(INDEX_NAME); - }) - beforeEach( async () => { process.env.PINECONE_INDEX = INDEX_NAME; @@ -33,9 +28,6 @@ describe("Delete", () => { afterAll(async () => { consoleMock.mockReset(); - - const pinecone = new Pinecone() - await pinecone.deleteIndex(INDEX_NAME); }); it("should delete Pinecone index", async () => { From 28150ac144be505d9fc606f670a9e11513b39728 Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Thu, 7 Sep 2023 06:10:10 -0400 Subject: [PATCH 15/16] Bump dev build --- package-lock.json | 13 +++++-------- package.json | 2 +- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/package-lock.json b/package-lock.json index 099051e..eccc82c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,7 @@ "version": "0.0.1", "license": "MIT", "dependencies": { - "@pinecone-database/pinecone": "^0.1.6-dev.20230906074328", + "@pinecone-database/pinecone": "^0.1.6-dev.20230907100643", "@xenova/transformers": "2.0.1", "cli-progress": "^3.12.0", "dotenv": "^16.0.3", @@ -40,10 +40,7 @@ "vitest": "^0.34.1" }, "engines": { - "node": "^18.0.0", - "npm": "9.5.0", - "pnpm": "Please use npm 9.5.1", - "yarn": "Please use npm 9.5.1" + "node": ">=18.0.0" } }, "node_modules/@aashutoshrathi/word-wrap": { @@ -642,9 +639,9 @@ } }, "node_modules/@pinecone-database/pinecone": { - "version": "0.1.6-dev.20230906074328", - "resolved": "https://registry.npmjs.org/@pinecone-database/pinecone/-/pinecone-0.1.6-dev.20230906074328.tgz", - "integrity": "sha512-BFCN1TbAANcZiLdHT0psrNM533ubvqRdVmlUT9gyZtooJ0lmUL/C30TQ+MBtpIKN6m9vTsdRFDo3jFaW9sCbUw==", + "version": "0.1.6-dev.20230907100643", + "resolved": "https://registry.npmjs.org/@pinecone-database/pinecone/-/pinecone-0.1.6-dev.20230907100643.tgz", + "integrity": "sha512-EQvF3xivlSyx7y2XKZ3YWs8iPFTM4ydjSO1paNqnf56yh/1bBShR8vPcbdA/FaCM+KZgStTUFz1pLJHZFz7aQw==", "dependencies": { "@sinclair/typebox": "^0.28.15", "@types/web": "^0.0.99", diff --git a/package.json b/package.json index d8eaaed..c0eef48 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,7 @@ "format:check": "npx prettier --check src" }, "dependencies": { - "@pinecone-database/pinecone": "^0.1.6-dev.20230906074328", + "@pinecone-database/pinecone": "^0.1.6-dev.20230907100643", "@xenova/transformers": "2.0.1", "cli-progress": "^3.12.0", "dotenv": "^16.0.3", From ea9504d821c0ccd6704b4e180e709be7324aadad Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Thu, 7 Sep 2023 11:06:45 -0400 Subject: [PATCH 16/16] Consume v1 release --- package-lock.json | 8 ++++---- package.json | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/package-lock.json b/package-lock.json index eccc82c..44595ec 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,7 @@ "version": "0.0.1", "license": "MIT", "dependencies": { - "@pinecone-database/pinecone": "^0.1.6-dev.20230907100643", + "@pinecone-database/pinecone": "^1.0.0", "@xenova/transformers": "2.0.1", "cli-progress": "^3.12.0", "dotenv": "^16.0.3", @@ -639,9 +639,9 @@ } }, "node_modules/@pinecone-database/pinecone": { - "version": "0.1.6-dev.20230907100643", - "resolved": "https://registry.npmjs.org/@pinecone-database/pinecone/-/pinecone-0.1.6-dev.20230907100643.tgz", - "integrity": "sha512-EQvF3xivlSyx7y2XKZ3YWs8iPFTM4ydjSO1paNqnf56yh/1bBShR8vPcbdA/FaCM+KZgStTUFz1pLJHZFz7aQw==", + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@pinecone-database/pinecone/-/pinecone-1.0.0.tgz", + "integrity": "sha512-CtsfbK4qTDjnS56FVH64FEWNVnhwOyheBlLe3e9T6o9Gaxc00f/079JWUUiZ1lrkc3K/YkmlYYOXbdGyKP2z3A==", "dependencies": { "@sinclair/typebox": "^0.28.15", "@types/web": "^0.0.99", diff --git a/package.json b/package.json index c0eef48..6e54cc1 100644 --- a/package.json +++ b/package.json @@ -16,7 +16,7 @@ "format:check": "npx prettier --check src" }, "dependencies": { - "@pinecone-database/pinecone": "^0.1.6-dev.20230907100643", + "@pinecone-database/pinecone": "^1.0.0", "@xenova/transformers": "2.0.1", "cli-progress": "^3.12.0", "dotenv": "^16.0.3",