From 7ed92913302535fff36e57dc5950f6b92e7d3bef Mon Sep 17 00:00:00 2001 From: Roy Miara Date: Thu, 2 Nov 2023 10:10:14 +0200 Subject: [PATCH 01/31] canopy server documentation --- .gitignore | 3 +- src/canopy/models/api_models.py | 2 +- src/canopy/models/data_models.py | 53 ++++++---- src/canopy_cli/__init__.py | 27 ++++++ src/canopy_cli/cli.py | 24 +++++ src/canopy_server/__init__.py | 13 +++ src/canopy_server/api_models.py | 52 ++++++++-- src/canopy_server/app.py | 161 +++++++++++++++++++++---------- 8 files changed, 257 insertions(+), 78 deletions(-) diff --git a/.gitignore b/.gitignore index 9116245a..29753e14 100644 --- a/.gitignore +++ b/.gitignore @@ -162,4 +162,5 @@ cython_debug/ # Mac OS **/.DS_Store -datafiles/* \ No newline at end of file +datafiles/* +canopy-api-docs.html \ No newline at end of file diff --git a/src/canopy/models/api_models.py b/src/canopy/models/api_models.py index 964aa48d..53a93585 100644 --- a/src/canopy/models/api_models.py +++ b/src/canopy/models/api_models.py @@ -28,7 +28,7 @@ def calc_total_tokens(cls, v, values, **kwargs): class ChatResponse(BaseModel): - id: str + id: str = Field(description="Canopy session Id.") object: str created: int model: str diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py index 7d0281a2..05a941e5 100644 --- a/src/canopy/models/data_models.py +++ b/src/canopy/models/data_models.py @@ -11,35 +11,52 @@ class Query(BaseModel): - text: str - namespace: str = "" - metadata_filter: Optional[dict] = None - top_k: Optional[int] = None - query_params: dict = Field(default_factory=dict) + text: str = Field(description="The query text.") + namespace: str = Field( + default="", + description="The namespace of the query, to learn more about namespaces, see https://docs.pinecone.io/docs/namespaces", # noqa: E501 + ) + metadata_filter: Optional[dict] = Field( + default=None, + description="A pinecone metadata filter, to learn more about metadata filters, see https://docs.pinecone.io/docs/metadata-filtering", # noqa: E501 + ) + top_k: Optional[int] = Field( + default=None, + description="[soon deprecated] The number of results to return." + ) + query_params: dict = Field( + default_factory=dict, + description="Pinecone Client additional query parameters." + ) class Document(BaseModel): - id: str - text: str - source: str = "" - metadata: Metadata = Field(default_factory=dict) + id: str = Field(description="The document id.") + text: str = Field(description="The document text.") + source: str = Field( + default="", + description="The source of the document: a URL, a file path, etc." + ) + metadata: Metadata = Field( + default_factory=dict, + description="The document metadata, to learn more about metadata, see https://docs.pinecone.io/docs/manage-data", # noqa: E501 + ) class Config: extra = Extra.forbid - @validator('metadata') + @validator("metadata") def metadata_reseved_fields(cls, v): - if 'text' in v: + if "text" in v: raise ValueError('Metadata cannot contain reserved field "text"') - if 'document_id' in v: + if "document_id" in v: raise ValueError('Metadata cannot contain reserved field "document_id"') - if 'source' in v: + if "source" in v: raise ValueError('Metadata cannot contain reserved field "source"') return v class ContextContent(BaseModel, ABC): - # Any context should be able to be represented as well formatted text. # In the most minimal case, that could simply be a call to `.json()`. @abstractmethod @@ -59,6 +76,8 @@ def to_text(self, **kwargs) -> str: return "\n".join([c.to_text(**kwargs) for c in self.content]) +ContextContentResponse = Union[ContextContent, Sequence[ContextContent]] + # --------------------- LLM models ------------------------ @@ -69,12 +88,12 @@ class Role(Enum): class MessageBase(BaseModel): - role: Role - content: str + role: Role = Field(description="The role of the messages author.") + content: str = Field(description="The contents of the message.") def dict(self, *args, **kwargs): d = super().dict(*args, **kwargs) - d['role'] = d['role'].value + d["role"] = d["role"].value return d diff --git a/src/canopy_cli/__init__.py b/src/canopy_cli/__init__.py index e69de29b..a4791643 100644 --- a/src/canopy_cli/__init__.py +++ b/src/canopy_cli/__init__.py @@ -0,0 +1,27 @@ +HTML_TEMPLATE = """ + + + + Canopy API Spec + + + + + + + +
+ Redoc + + + + +""" # noqa: E501 diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py index d0e9000d..d0b5c827 100644 --- a/src/canopy_cli/cli.py +++ b/src/canopy_cli/cli.py @@ -551,5 +551,29 @@ def stop(url): raise CLIError(msg) +@cli.command( + help=( + """ + \b + Open the Canopy Server docs + """ + ) +) +def docs(): + import json + from canopy_cli import HTML_TEMPLATE + from canopy_server.app import app + # generate docs + + filename = "canopy-api-docs.html" + + with open(filename, "w") as fd: + print(HTML_TEMPLATE % json.dumps(app.openapi()), file=fd) + + import webbrowser + + webbrowser.open('file://' + os.path.realpath(filename)) + + if __name__ == "__main__": cli() diff --git a/src/canopy_server/__init__.py b/src/canopy_server/__init__.py index e69de29b..85a89cf2 100644 --- a/src/canopy_server/__init__.py +++ b/src/canopy_server/__init__.py @@ -0,0 +1,13 @@ +description = """ +Canopy is an open-source Retrieval Augmented Generation (RAG) framework and context engine built on top of the Pinecone vector database. Canopy enables you to quickly and easily experiment with and build applications using RAG. Start chatting with your documents or text data with a few simple commands. + +Canopy provides a configurable built-in server so you can effortlessly deploy a RAG-powered chat application to your existing chat UI or interface. Or you can build your own, custom RAG application using the Canopy lirbary. + +## Prerequisites + +### Pinecone API key +To get Pinecone free trial API key and environment register or log into your Pinecone account in the console (https://app.pinecone.io/). You can access your API key from the "API Keys" section in the sidebar of your dashboard, and find the environment name next to it. + +### OpenAI API key +You can find your free trial OpenAI API key https://platform.openai.com/account/api-keys. You might need to login or register to OpenAI services. +""" # noqa: E501 diff --git a/src/canopy_server/api_models.py b/src/canopy_server/api_models.py index dae422f5..c80d542f 100644 --- a/src/canopy_server/api_models.py +++ b/src/canopy_server/api_models.py @@ -1,15 +1,26 @@ from typing import Optional, List -from pydantic import BaseModel +from pydantic import BaseModel, Field from canopy.models.data_models import Messages, Query, Document class ChatRequest(BaseModel): - model: str = "" - messages: Messages - stream: bool = False - user: Optional[str] = None + model: str = Field( + default="", + description="ID of the model to use. If empty, the default model will be used.", # noqa: E501 + ) + messages: Messages = Field( + description="A list of messages comprising the conversation so far." + ) + stream: bool = Field( + default=False, + description="""If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message.""", # noqa: E501 + ) + user: Optional[str] = Field( + default=None, + description="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.", # noqa: E501 + ) class ContextQueryRequest(BaseModel): @@ -19,11 +30,13 @@ class ContextQueryRequest(BaseModel): class ContextUpsertRequest(BaseModel): documents: List[Document] - batch_size: int = 200 + batch_size: int = Field( + default=200, description="Batch size for upserting documents to Pinecone." + ) class ContextDeleteRequest(BaseModel): - document_ids: List[str] + document_ids: List[str] = Field(description="List of document ids to delete.") class HealthStatus(BaseModel): @@ -38,5 +51,28 @@ class ChatDebugInfo(BaseModel): prompt_tokens: Optional[int] = None generated_tokens: Optional[int] = None - def to_text(self,): + def to_text( + self, + ): return self.json() + + +class ShutdownResponse(BaseModel): + message: str = Field( + default="Shutting down", + description="Message indicating the server is shutting down.", + ) + + +class SuccessUpsertResponse(BaseModel): + message: str = Field( + default="Success", + description="Message indicating the upsert was successful.", + ) + + +class SuccessDeleteResponse(BaseModel): + message: str = Field( + default="Success", + description="Message indicating the delete was successful.", + ) diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index bf34cb83..97b70881 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -22,19 +22,39 @@ import uvicorn from typing import cast -from canopy.models.api_models import StreamingChatResponse, ChatResponse -from canopy.models.data_models import Context, UserMessage -from .api_models import \ - ChatRequest, ContextQueryRequest, \ - ContextUpsertRequest, HealthStatus, ContextDeleteRequest +from canopy.models.api_models import ( + StreamingChatResponse, + ChatResponse, +) +from canopy.models.data_models import Context, UserMessage, ContextContentResponse +from .api_models import ( + ChatRequest, + ContextQueryRequest, + ContextUpsertRequest, + HealthStatus, + ContextDeleteRequest, + ShutdownResponse, + SuccessUpsertResponse, + SuccessDeleteResponse, +) from canopy.llm.openai import OpenAILLM from canopy_cli.errors import ConfigError +from canopy_server import description +from canopy import __version__ load_dotenv() # load env vars before import of openai openai.api_key = os.getenv("OPENAI_API_KEY") -app = FastAPI() +app = FastAPI( + title="Canopy API", + description=description, + version=__version__, + license_info={ + "name": "Apache 2.0", + "url": "https://www.apache.org/licenses/LICENSE-2.0.html", + }, +) context_engine: ContextEngine chat_engine: ChatEngine @@ -45,19 +65,29 @@ @app.post( "/context/chat/completions", + response_model=ChatResponse, + responses={500: {"description": "Failed to chat with Canopy"}}, # noqa: E501 ) async def chat( request: ChatRequest = Body(...), -): +) -> ChatResponse: + """ + Chat with Canopy, using the LLM and context engine, and return a response. + + The request schema is following OpenAI's chat completion API schema, but removes the need to configure + anything, other than the messages field: for more imformation see: https://platform.openai.com/docs/api-reference/chat/create + + """ # noqa: E501 try: session_id = request.user or "None" # noqa: F841 question_id = str(uuid.uuid4()) logger.debug(f"Received chat request: {request.messages[-1].content}") - answer = await run_in_threadpool(chat_engine.chat, - messages=request.messages, - stream=request.stream) + answer = await run_in_threadpool( + chat_engine.chat, messages=request.messages, stream=request.stream + ) if request.stream: + def stringify_content(response: StreamingChatResponse): for chunk in response.chunks: chunk.id = question_id @@ -65,7 +95,7 @@ def stringify_content(response: StreamingChatResponse): yield data content_stream = stringify_content(cast(StreamingChatResponse, answer)) - return EventSourceResponse(content_stream, media_type='text/event-stream') + return EventSourceResponse(content_stream, media_type="text/event-stream") else: chat_response = cast(ChatResponse, answer) @@ -74,105 +104,134 @@ def stringify_content(response: StreamingChatResponse): except Exception as e: logger.exception(f"Chat with question_id {question_id} failed") - raise HTTPException( - status_code=500, detail=f"Internal Service Error: {str(e)}") + raise HTTPException(status_code=500, detail=f"Internal Service Error: {str(e)}") @app.post( "/context/query", + response_model=ContextContentResponse, + responses={ + 500: {"description": "Failed to query the knowledgebase or Build the context"} + }, ) async def query( request: ContextQueryRequest = Body(...), -): +) -> ContextContentResponse: + """ + Query the knowledgebase and return a context. Context is a collections of text snippets, each with a source. + Query enables tuning the context length (in tokens) such that you can cap the cost of the generation. + This method can be used with or without a LLM. + """ # noqa: E501 try: context: Context = await run_in_threadpool( context_engine.query, queries=request.queries, - max_context_tokens=request.max_tokens) + max_context_tokens=request.max_tokens, + ) return context.content except Exception as e: logger.exception(e) - raise HTTPException( - status_code=500, detail=f"Internal Service Error: {str(e)}") + raise HTTPException(status_code=500, detail=f"Internal Service Error: {str(e)}") @app.post( "/context/upsert", + response_model=SuccessUpsertResponse, + responses={500: {"description": "Failed to upsert documents"}}, ) async def upsert( request: ContextUpsertRequest = Body(...), -): +) -> SuccessUpsertResponse: + """ + Upsert documents into the knowledgebase. Upserting is a way to add new documents or update existing ones. + Each document has a unique ID. If a document with the same ID already exists, it will be updated. + + This method will run the processing, chunking and endocing of the data in parallel, and then send the + encoded data to the Pinecone Index in batches. + """ # noqa: E501 try: logger.info(f"Upserting {len(request.documents)} documents") - upsert_results = await run_in_threadpool( - kb.upsert, - documents=request.documents, - batch_size=request.batch_size) + await run_in_threadpool( + kb.upsert, documents=request.documents, batch_size=request.batch_size + ) - return upsert_results + return SuccessUpsertResponse() except Exception as e: logger.exception(e) - raise HTTPException( - status_code=500, detail=f"Internal Service Error: {str(e)}") + raise HTTPException(status_code=500, detail=f"Internal Service Error: {str(e)}") @app.post( "/context/delete", + response_model=SuccessDeleteResponse, + responses={500: {"description": "Failed to delete documents"}}, ) async def delete( request: ContextDeleteRequest = Body(...), -): +) -> SuccessDeleteResponse: + """ + Delete documents from the knowledgebase. Deleting documents is done by their unique ID. + """ # noqa: E501 try: logger.info(f"Delete {len(request.document_ids)} documents") - await run_in_threadpool( - kb.delete, - document_ids=request.document_ids) - return {"message": "success"} + await run_in_threadpool(kb.delete, document_ids=request.document_ids) + return SuccessDeleteResponse() except Exception as e: logger.exception(e) - raise HTTPException( - status_code=500, detail=f"Internal Service Error: {str(e)}") + raise HTTPException(status_code=500, detail=f"Internal Service Error: {str(e)}") @app.get( "/health", + response_model=HealthStatus, + responses={500: {"description": "Failed to connect to Pinecone or LLM"}}, ) -async def health_check(): +@app.exception_handler(Exception) +async def health_check() -> HealthStatus: + """ + Health check for the Canopy server. This endpoint checks the connection to Pinecone and the LLM. + """ # noqa: E501 try: await run_in_threadpool(kb.verify_index_connection) except Exception as e: err_msg = f"Failed connecting to Pinecone Index {kb._index_name}" logger.exception(err_msg) raise HTTPException( - status_code=500, detail=f"{err_msg}. Error: {str(e)}") from e + status_code=500, detail=f"{err_msg}. Error: {str(e)}" + ) from e try: msg = UserMessage(content="This is a health check. Are you alive? Be concise") - await run_in_threadpool(llm.chat_completion, - messages=[msg], - max_tokens=50) + await run_in_threadpool(llm.chat_completion, messages=[msg], max_tokens=50) except Exception as e: err_msg = f"Failed to communicate with {llm.__class__.__name__}" logger.exception(err_msg) raise HTTPException( - status_code=500, detail=f"{err_msg}. Error: {str(e)}") from e + status_code=500, detail=f"{err_msg}. Error: {str(e)}" + ) from e return HealthStatus(pinecone_status="OK", llm_status="OK") -@app.get( - "/shutdown" -) -async def shutdown(): +@app.get("/shutdown") +async def shutdown() -> ShutdownResponse: + """ + __WARNING__: Experimental method. + + + This method will shutdown the server. It is used for testing purposes, and not recommended to be used + in production. + This method will locate the parent process and send a SIGINT signal to it. + """ # noqa: E501 logger.info("Shutting down") proc = current_process() pid = proc._parent_pid if "SpawnProcess" in proc.name else proc.pid os.kill(pid, signal.SIGINT) - return {"message": "Shutting down"} + return ShutdownResponse() @app.on_event("startup") @@ -190,11 +249,11 @@ def _init_logging(): stdout_handler = logging.StreamHandler(stream=sys.stdout) handlers = [file_handler, stdout_handler] logging.basicConfig( - format='%(asctime)s - %(processName)s - %(name)-10s [%(levelname)-8s]: ' - '%(message)s', + format="%(asctime)s - %(processName)s - %(name)-10s [%(levelname)-8s]: " + "%(message)s", level=os.getenv("CE_LOG_LEVEL", "INFO").upper(), handlers=handlers, - force=True + force=True, ) logger = logging.getLogger(__name__) @@ -211,8 +270,10 @@ def _init_engines(): _load_config(config_file) else: - logger.info("Did not find config file. Initializing engines with default " - "configuration") + logger.info( + "Did not find config file. Initializing engines with default " + "configuration" + ) Tokenizer.initialize() kb = KnowledgeBase(index_name=index_name) context_engine = ContextEngine(knowledge_base=kb) @@ -230,9 +291,7 @@ def _load_config(config_file): config = yaml.safe_load(f) except Exception as e: logger.exception(f"Failed to load config file {config_file}") - raise ConfigError( - f"Failed to load config file {config_file}. Error: {str(e)}" - ) + raise ConfigError(f"Failed to load config file {config_file}. Error: {str(e)}") tokenizer_config = config.get("tokenizer", {}) Tokenizer.initialize_from_config(tokenizer_config) if "chat_engine" not in config: From bdea5a60a07b8adc9abc5ab780ee94e4623e26d7 Mon Sep 17 00:00:00 2001 From: Roy Miara Date: Thu, 2 Nov 2023 10:39:13 +0200 Subject: [PATCH 02/31] fix response type in Chat --- src/canopy_cli/cli.py | 2 +- src/canopy_server/app.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py index d0b5c827..bc8d005e 100644 --- a/src/canopy_cli/cli.py +++ b/src/canopy_cli/cli.py @@ -559,7 +559,7 @@ def stop(url): """ ) ) -def docs(): +def api_docs(): import json from canopy_cli import HTML_TEMPLATE from canopy_server.app import app diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index 97b70881..7e7a9651 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -20,7 +20,7 @@ from fastapi import FastAPI, HTTPException, Body import uvicorn -from typing import cast +from typing import cast, Union from canopy.models.api_models import ( StreamingChatResponse, @@ -43,6 +43,9 @@ from canopy_server import description from canopy import __version__ + +APIChatResponse = Union[ChatResponse, EventSourceResponse] + load_dotenv() # load env vars before import of openai openai.api_key = os.getenv("OPENAI_API_KEY") @@ -65,12 +68,12 @@ @app.post( "/context/chat/completions", - response_model=ChatResponse, + response_model=APIChatResponse, responses={500: {"description": "Failed to chat with Canopy"}}, # noqa: E501 ) async def chat( request: ChatRequest = Body(...), -) -> ChatResponse: +) -> APIChatResponse: """ Chat with Canopy, using the LLM and context engine, and return a response. From 001bac82c246528875d016795fa024cf5bcc3738 Mon Sep 17 00:00:00 2001 From: Roy Miara Date: Thu, 2 Nov 2023 10:59:51 +0200 Subject: [PATCH 03/31] ignore line mypy --- src/canopy_server/app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index 7e7a9651..6c8b9562 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -232,7 +232,8 @@ async def shutdown() -> ShutdownResponse: """ # noqa: E501 logger.info("Shutting down") proc = current_process() - pid = proc._parent_pid if "SpawnProcess" in proc.name else proc.pid + # TODO: fix this, BaseProcess does have _parent_pid but it is not in the stubs + pid = proc._parent_pid if "SpawnProcess" in proc.name else proc.pid # type: ignore os.kill(pid, signal.SIGINT) return ShutdownResponse() From 78a0a3b339d882955b7e67b841b617885d1a5b0a Mon Sep 17 00:00:00 2001 From: Roy Miara Date: Thu, 2 Nov 2023 12:45:18 +0200 Subject: [PATCH 04/31] remove soon dep. flag --- src/canopy/models/data_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py index 05a941e5..eab29a3f 100644 --- a/src/canopy/models/data_models.py +++ b/src/canopy/models/data_models.py @@ -22,7 +22,7 @@ class Query(BaseModel): ) top_k: Optional[int] = Field( default=None, - description="[soon deprecated] The number of results to return." + description="The number of results to return." ) query_params: dict = Field( default_factory=dict, From 2ad53851a0c1e8578554287bf39b9829f32903ba Mon Sep 17 00:00:00 2001 From: Roy Miara Date: Thu, 2 Nov 2023 12:50:35 +0200 Subject: [PATCH 05/31] Add 'ignored' for model param --- src/canopy_server/api_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/canopy_server/api_models.py b/src/canopy_server/api_models.py index c80d542f..cb6c6049 100644 --- a/src/canopy_server/api_models.py +++ b/src/canopy_server/api_models.py @@ -8,7 +8,7 @@ class ChatRequest(BaseModel): model: str = Field( default="", - description="ID of the model to use. If empty, the default model will be used.", # noqa: E501 + description="ID of the model to use. Currecntly this field is ignored and this should be configured on Canopy config.", # noqa: E501 ) messages: Messages = Field( description="A list of messages comprising the conversation so far." From 575301cf6abc6bc36557d76e9741a63ed209517f Mon Sep 17 00:00:00 2001 From: Roy Miara Date: Thu, 2 Nov 2023 12:51:27 +0200 Subject: [PATCH 06/31] stream descp --- src/canopy_server/api_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/canopy_server/api_models.py b/src/canopy_server/api_models.py index cb6c6049..82c1299b 100644 --- a/src/canopy_server/api_models.py +++ b/src/canopy_server/api_models.py @@ -15,7 +15,7 @@ class ChatRequest(BaseModel): ) stream: bool = Field( default=False, - description="""If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message.""", # noqa: E501 + description="""Whether or not to stream the chatbot's response. If set, the response will be server-sent events containing [chat.completion.chunk](https://platform.openai.com/docs/api-reference/chat/streaming) objects""", # noqa: E501 ) user: Optional[str] = Field( default=None, From 257130c0129ccb3670c4ac5073b938d7bf573380 Mon Sep 17 00:00:00 2001 From: Roy Miara Date: Thu, 2 Nov 2023 12:52:09 +0200 Subject: [PATCH 07/31] user descp --- src/canopy_server/api_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/canopy_server/api_models.py b/src/canopy_server/api_models.py index 82c1299b..868fd4e3 100644 --- a/src/canopy_server/api_models.py +++ b/src/canopy_server/api_models.py @@ -19,7 +19,7 @@ class ChatRequest(BaseModel): ) user: Optional[str] = Field( default=None, - description="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.", # noqa: E501 + description="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Unused, reserved for future extensions", # noqa: E501 ) From 553661cc292ed9ac30a8cc8a511ae5c249849735 Mon Sep 17 00:00:00 2001 From: Roy Miara Date: Thu, 2 Nov 2023 12:52:57 +0200 Subject: [PATCH 08/31] batch size --- src/canopy_server/api_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/canopy_server/api_models.py b/src/canopy_server/api_models.py index 868fd4e3..b6406a6b 100644 --- a/src/canopy_server/api_models.py +++ b/src/canopy_server/api_models.py @@ -31,7 +31,7 @@ class ContextQueryRequest(BaseModel): class ContextUpsertRequest(BaseModel): documents: List[Document] batch_size: int = Field( - default=200, description="Batch size for upserting documents to Pinecone." + default=200, description="The batch size to use when uploading documents chunks to the Pinecone Index." # noqa: E501 ) From d4a725ea77dbc98d2b059148faa9887ed3f57f35 Mon Sep 17 00:00:00 2001 From: Roy Miara Date: Thu, 2 Nov 2023 13:00:11 +0200 Subject: [PATCH 09/31] app.py --- src/canopy_server/app.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index 6c8b9562..9c375fa5 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -77,8 +77,8 @@ async def chat( """ Chat with Canopy, using the LLM and context engine, and return a response. - The request schema is following OpenAI's chat completion API schema, but removes the need to configure - anything, other than the messages field: for more imformation see: https://platform.openai.com/docs/api-reference/chat/create + The request schema is following OpenAI's chat completion API schema: https://platform.openai.com/docs/api-reference/chat/create. + Note that all fields other than `messages` and `stream` are currently ignored. The Canopy server uses the model parameters defined in the `ChatEngine` config for all underlying LLM calls. """ # noqa: E501 try: @@ -121,9 +121,10 @@ async def query( request: ContextQueryRequest = Body(...), ) -> ContextContentResponse: """ - Query the knowledgebase and return a context. Context is a collections of text snippets, each with a source. - Query enables tuning the context length (in tokens) such that you can cap the cost of the generation. - This method can be used with or without a LLM. + Query the knowledge base for relevant context. + The returned text might be structured or unstructured, depending on the ContextEngine's configuration. + Query allows limiting the context length (in tokens), to control LLM costs. + This method does not pass through the LLM and uses only retieval and construction from Pinecone DB. """ # noqa: E501 try: context: Context = await run_in_threadpool( @@ -151,8 +152,7 @@ async def upsert( Upsert documents into the knowledgebase. Upserting is a way to add new documents or update existing ones. Each document has a unique ID. If a document with the same ID already exists, it will be updated. - This method will run the processing, chunking and endocing of the data in parallel, and then send the - encoded data to the Pinecone Index in batches. + The documents will be chunked and encoded, then the resulting encoded chunks will be sent to the Pinecone index in batches """ # noqa: E501 try: logger.info(f"Upserting {len(request.documents)} documents") From c7a014cc3285bb485ed62ac330b4c269b93670a7 Mon Sep 17 00:00:00 2001 From: Roy Miara Date: Thu, 2 Nov 2023 13:11:19 +0200 Subject: [PATCH 10/31] fix trailing whitespace --- src/canopy_server/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index 9c375fa5..406fbefc 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -77,7 +77,7 @@ async def chat( """ Chat with Canopy, using the LLM and context engine, and return a response. - The request schema is following OpenAI's chat completion API schema: https://platform.openai.com/docs/api-reference/chat/create. + The request schema is following OpenAI's chat completion API schema: https://platform.openai.com/docs/api-reference/chat/create. Note that all fields other than `messages` and `stream` are currently ignored. The Canopy server uses the model parameters defined in the `ChatEngine` config for all underlying LLM calls. """ # noqa: E501 @@ -121,7 +121,7 @@ async def query( request: ContextQueryRequest = Body(...), ) -> ContextContentResponse: """ - Query the knowledge base for relevant context. + Query the knowledge base for relevant context. The returned text might be structured or unstructured, depending on the ContextEngine's configuration. Query allows limiting the context length (in tokens), to control LLM costs. This method does not pass through the LLM and uses only retieval and construction from Pinecone DB. From 1d9d566050dccf62b8cc2a4ab6f2794909059486 Mon Sep 17 00:00:00 2001 From: Roy Miara Date: Thu, 2 Nov 2023 22:23:26 +0200 Subject: [PATCH 11/31] fix --- src/canopy_server/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index 406fbefc..9e016d35 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -68,7 +68,7 @@ @app.post( "/context/chat/completions", - response_model=APIChatResponse, + response_model=None, responses={500: {"description": "Failed to chat with Canopy"}}, # noqa: E501 ) async def chat( From acc42c3aea0ea9d8c26608a18d1c2bc6e88570da Mon Sep 17 00:00:00 2001 From: miararoy Date: Thu, 2 Nov 2023 22:57:03 +0200 Subject: [PATCH 12/31] Apply suggestions from code review Co-authored-by: byronnlandry <104170519+byronnlandry@users.noreply.github.com> --- src/canopy_server/api_models.py | 2 +- src/canopy_server/app.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/canopy_server/api_models.py b/src/canopy_server/api_models.py index b6406a6b..530a2cb1 100644 --- a/src/canopy_server/api_models.py +++ b/src/canopy_server/api_models.py @@ -36,7 +36,7 @@ class ContextUpsertRequest(BaseModel): class ContextDeleteRequest(BaseModel): - document_ids: List[str] = Field(description="List of document ids to delete.") + document_ids: List[str] = Field(description="List of document IDs to delete.") class HealthStatus(BaseModel): diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index 9e016d35..1870cef6 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -114,7 +114,7 @@ def stringify_content(response: StreamingChatResponse): "/context/query", response_model=ContextContentResponse, responses={ - 500: {"description": "Failed to query the knowledgebase or Build the context"} + 500: {"description": "Failed to query the knowledge base or build the context"} }, ) async def query( @@ -149,8 +149,8 @@ async def upsert( request: ContextUpsertRequest = Body(...), ) -> SuccessUpsertResponse: """ - Upsert documents into the knowledgebase. Upserting is a way to add new documents or update existing ones. - Each document has a unique ID. If a document with the same ID already exists, it will be updated. + Upsert documents into the knowledge base. Upserting is a way to add new documents or update existing ones. + Each document has a unique ID. If a document with the same ID already exists, it is updated. The documents will be chunked and encoded, then the resulting encoded chunks will be sent to the Pinecone index in batches """ # noqa: E501 From 8227d848033c6bc40a059d6f85aecf9fa694ce6b Mon Sep 17 00:00:00 2001 From: ilai Date: Sun, 5 Nov 2023 10:55:36 +0200 Subject: [PATCH 13/31] [app] Don't use private attribute Instead of using process._parent_id which is not garuranteed, use `os.getppid()` --- src/canopy_server/app.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index 1870cef6..d961084a 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -232,8 +232,7 @@ async def shutdown() -> ShutdownResponse: """ # noqa: E501 logger.info("Shutting down") proc = current_process() - # TODO: fix this, BaseProcess does have _parent_pid but it is not in the stubs - pid = proc._parent_pid if "SpawnProcess" in proc.name else proc.pid # type: ignore + pid = os.getppid() if "SpawnProcess" in proc.name else proc.pid os.kill(pid, signal.SIGINT) return ShutdownResponse() From 6f4b0a13d39a066adf3116620e08c817ebb06cad Mon Sep 17 00:00:00 2001 From: ilai Date: Sun, 5 Nov 2023 11:00:14 +0200 Subject: [PATCH 14/31] [app] Parent process - use an even better solution This is the official method by the mp module, which should work across all OSes (hopefully..) --- src/canopy_server/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index d961084a..5c90f573 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -5,7 +5,7 @@ import uuid import openai -from multiprocessing import current_process +from multiprocessing import current_process, parent_process import yaml from dotenv import load_dotenv @@ -232,7 +232,7 @@ async def shutdown() -> ShutdownResponse: """ # noqa: E501 logger.info("Shutting down") proc = current_process() - pid = os.getppid() if "SpawnProcess" in proc.name else proc.pid + pid = parent_process().pid if parent_process() else proc.pid os.kill(pid, signal.SIGINT) return ShutdownResponse() From 9db8c0f3ccf7dc641778da144958c375bc2ddd7a Mon Sep 17 00:00:00 2001 From: ilai Date: Sun, 5 Nov 2023 11:50:30 +0200 Subject: [PATCH 15/31] [app] Properly handle None case It shouldn't theotically happen, but who knows... --- src/canopy_server/app.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index 5c90f573..ca109a74 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -26,7 +26,7 @@ StreamingChatResponse, ChatResponse, ) -from canopy.models.data_models import Context, UserMessage, ContextContentResponse +from canopy.models.data_models import Context, UserMessage, ContextContents from .api_models import ( ChatRequest, ContextQueryRequest, @@ -112,14 +112,14 @@ def stringify_content(response: StreamingChatResponse): @app.post( "/context/query", - response_model=ContextContentResponse, + response_model=ContextContents, responses={ 500: {"description": "Failed to query the knowledge base or build the context"} }, ) async def query( request: ContextQueryRequest = Body(...), -) -> ContextContentResponse: +) -> ContextContents: """ Query the knowledge base for relevant context. The returned text might be structured or unstructured, depending on the ContextEngine's configuration. @@ -232,7 +232,13 @@ async def shutdown() -> ShutdownResponse: """ # noqa: E501 logger.info("Shutting down") proc = current_process() - pid = parent_process().pid if parent_process() else proc.pid + p_process = parent_process() + pid = p_process.pid if p_process is not None else proc.pid + if not pid: + raise HTTPException( + status_code=500, + detail="Failed to locate parent process. Cannot shutdown server.", + ) os.kill(pid, signal.SIGINT) return ShutdownResponse() From 8f019b6a4d324b7c7191a1a540c37cc5ec4d5789 Mon Sep 17 00:00:00 2001 From: ilai Date: Sun, 5 Nov 2023 11:56:27 +0200 Subject: [PATCH 16/31] [models] Rename ContextContentResponse This was a horrible name that doesn't represent the true meaning of this class --- src/canopy/context_engine/models.py | 4 ++-- src/canopy/models/data_models.py | 15 ++++++++------- src/canopy_server/app.py | 6 +++--- tests/unit/context_engine/test_context_engine.py | 10 +++++----- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/canopy/context_engine/models.py b/src/canopy/context_engine/models.py index bb42946f..5f273425 100644 --- a/src/canopy/context_engine/models.py +++ b/src/canopy/context_engine/models.py @@ -2,7 +2,7 @@ from pydantic import BaseModel -from canopy.models.data_models import ContextContent +from canopy.models.data_models import _ContextContent class ContextSnippet(BaseModel): @@ -10,7 +10,7 @@ class ContextSnippet(BaseModel): text: str -class ContextQueryResult(ContextContent): +class ContextQueryResult(_ContextContent): query: str snippets: List[ContextSnippet] diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py index eab29a3f..639100f6 100644 --- a/src/canopy/models/data_models.py +++ b/src/canopy/models/data_models.py @@ -56,7 +56,7 @@ def metadata_reseved_fields(cls, v): return v -class ContextContent(BaseModel, ABC): +class _ContextContent(BaseModel, ABC): # Any context should be able to be represented as well formatted text. # In the most minimal case, that could simply be a call to `.json()`. @abstractmethod @@ -64,20 +64,21 @@ def to_text(self, **kwargs) -> str: pass +ContextContent = Union[_ContextContent, Sequence[_ContextContent]] + + class Context(BaseModel): - content: Union[ContextContent, Sequence[ContextContent]] + content: ContextContent num_tokens: int = Field(exclude=True) debug_info: dict = Field(default_factory=dict, exclude=True) def to_text(self, **kwargs) -> str: - if isinstance(self.content, ContextContent): - return self.content.to_text(**kwargs) - else: + if isinstance(self.content, Sequence): return "\n".join([c.to_text(**kwargs) for c in self.content]) + else: + return self.content.to_text(**kwargs) -ContextContentResponse = Union[ContextContent, Sequence[ContextContent]] - # --------------------- LLM models ------------------------ diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index ca109a74..f9957952 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -26,7 +26,7 @@ StreamingChatResponse, ChatResponse, ) -from canopy.models.data_models import Context, UserMessage, ContextContents +from canopy.models.data_models import Context, UserMessage, ContextContent from .api_models import ( ChatRequest, ContextQueryRequest, @@ -112,14 +112,14 @@ def stringify_content(response: StreamingChatResponse): @app.post( "/context/query", - response_model=ContextContents, + response_model=ContextContent, responses={ 500: {"description": "Failed to query the knowledge base or build the context"} }, ) async def query( request: ContextQueryRequest = Body(...), -) -> ContextContents: +) -> ContextContent: """ Query the knowledge base for relevant context. The returned text might be structured or unstructured, depending on the ContextEngine's configuration. diff --git a/tests/unit/context_engine/test_context_engine.py b/tests/unit/context_engine/test_context_engine.py index 6eeefedb..a102db36 100644 --- a/tests/unit/context_engine/test_context_engine.py +++ b/tests/unit/context_engine/test_context_engine.py @@ -8,7 +8,7 @@ from canopy.context_engine.models import ContextQueryResult, ContextSnippet from canopy.knowledge_base.base import BaseKnowledgeBase from canopy.knowledge_base.models import QueryResult, DocumentWithScore -from canopy.models.data_models import Query, Context, ContextContent +from canopy.models.data_models import Query, Context, _ContextContent class TestContextEngine: @@ -68,7 +68,7 @@ def test_query(context_engine, queries = [Query(text="How does photosynthesis work?")] max_context_tokens = 100 - mock_context_content = create_autospec(ContextContent) + mock_context_content = create_autospec(_ContextContent) mock_context_content.to_text.return_value = sample_context_text mock_context = Context(content=mock_context_content, num_tokens=21) @@ -93,7 +93,7 @@ def test_query_with_metadata_filter(context_engine, queries = [Query(text="How does photosynthesis work?")] max_context_tokens = 100 - mock_context_content = create_autospec(ContextContent) + mock_context_content = create_autospec(_ContextContent) mock_context_content.to_text.return_value = sample_context_text mock_context = Context(content=mock_context_content, num_tokens=21) @@ -149,7 +149,7 @@ def test_multiple_queries(context_engine, mock_knowledge_base.query.return_value = extended_mock_query_result combined_text = sample_context_text + "\n" + text - mock_context_content = create_autospec(ContextContent) + mock_context_content = create_autospec(_ContextContent) mock_context_content.to_text.return_value = combined_text mock_context = Context(content=mock_context_content, num_tokens=40) @@ -168,7 +168,7 @@ def test_empty_query_results(context_engine, mock_knowledge_base.query.return_value = [] - mock_context_content = create_autospec(ContextContent) + mock_context_content = create_autospec(_ContextContent) mock_context_content.to_text.return_value = "" mock_context = Context(content=mock_context_content, num_tokens=0) From 08a7f2343da539bcf149fc8f4c5fe5387c833924 Mon Sep 17 00:00:00 2001 From: ilai Date: Sun, 5 Nov 2023 12:12:00 +0200 Subject: [PATCH 17/31] [app] Fix description Per Nathan and Byron's feedback --- src/canopy_server/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/canopy_server/__init__.py b/src/canopy_server/__init__.py index 85a89cf2..208f7cfc 100644 --- a/src/canopy_server/__init__.py +++ b/src/canopy_server/__init__.py @@ -6,8 +6,9 @@ ## Prerequisites ### Pinecone API key -To get Pinecone free trial API key and environment register or log into your Pinecone account in the console (https://app.pinecone.io/). You can access your API key from the "API Keys" section in the sidebar of your dashboard, and find the environment name next to it. +If you don't have a Pinecone account, you can sign up for a free Starter plan at https://www.pinecone.io/. +To find your Pinecone API key and environment log into Pinecone console (https://app.pinecone.io/). You can access your API key from the "API Keys" section in the sidebar of your dashboard, and find the environment name next to it. ### OpenAI API key -You can find your free trial OpenAI API key https://platform.openai.com/account/api-keys. You might need to login or register to OpenAI services. +You can find your free trial OpenAI API key https://platform.openai.com/account/api-keys. You might need to log in or register for OpenAI services. """ # noqa: E501 From 3d166000538846269412f94b3967861926fd0505 Mon Sep 17 00:00:00 2001 From: igiloh-pinecone <118673156+igiloh-pinecone@users.noreply.github.com> Date: Sun, 5 Nov 2023 12:19:53 +0200 Subject: [PATCH 18/31] Apply Byron's and Nathan's suggestions from code review Co-authored-by: byronnlandry <104170519+byronnlandry@users.noreply.github.com> --- src/canopy/models/data_models.py | 8 ++++---- src/canopy_server/__init__.py | 2 +- src/canopy_server/api_models.py | 4 ++-- src/canopy_server/app.py | 10 +++++----- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py index 639100f6..44f1ee92 100644 --- a/src/canopy/models/data_models.py +++ b/src/canopy/models/data_models.py @@ -14,11 +14,11 @@ class Query(BaseModel): text: str = Field(description="The query text.") namespace: str = Field( default="", - description="The namespace of the query, to learn more about namespaces, see https://docs.pinecone.io/docs/namespaces", # noqa: E501 + description="The namespace of the query. To learn more about namespaces, see https://docs.pinecone.io/docs/namespaces", # noqa: E501 ) metadata_filter: Optional[dict] = Field( default=None, - description="A pinecone metadata filter, to learn more about metadata filters, see https://docs.pinecone.io/docs/metadata-filtering", # noqa: E501 + description="A Pinecone metadata filter, to learn more about metadata filters, see https://docs.pinecone.io/docs/metadata-filtering", # noqa: E501 ) top_k: Optional[int] = Field( default=None, @@ -39,7 +39,7 @@ class Document(BaseModel): ) metadata: Metadata = Field( default_factory=dict, - description="The document metadata, to learn more about metadata, see https://docs.pinecone.io/docs/manage-data", # noqa: E501 + description="The document metadata. To learn more about metadata, see https://docs.pinecone.io/docs/manage-data", # noqa: E501 ) class Config: @@ -89,7 +89,7 @@ class Role(Enum): class MessageBase(BaseModel): - role: Role = Field(description="The role of the messages author.") + role: Role = Field(description="The role of the message's author. Can be one of ['User', 'Assistant', 'System']") content: str = Field(description="The contents of the message.") def dict(self, *args, **kwargs): diff --git a/src/canopy_server/__init__.py b/src/canopy_server/__init__.py index 208f7cfc..9ca26cbd 100644 --- a/src/canopy_server/__init__.py +++ b/src/canopy_server/__init__.py @@ -1,7 +1,7 @@ description = """ Canopy is an open-source Retrieval Augmented Generation (RAG) framework and context engine built on top of the Pinecone vector database. Canopy enables you to quickly and easily experiment with and build applications using RAG. Start chatting with your documents or text data with a few simple commands. -Canopy provides a configurable built-in server so you can effortlessly deploy a RAG-powered chat application to your existing chat UI or interface. Or you can build your own, custom RAG application using the Canopy lirbary. +Canopy provides a configurable built-in server, so you can effortlessly deploy a RAG-powered chat application to your existing chat UI or interface. Or you can build your own custom RAG application using the Canopy library. ## Prerequisites diff --git a/src/canopy_server/api_models.py b/src/canopy_server/api_models.py index 530a2cb1..4541290e 100644 --- a/src/canopy_server/api_models.py +++ b/src/canopy_server/api_models.py @@ -8,14 +8,14 @@ class ChatRequest(BaseModel): model: str = Field( default="", - description="ID of the model to use. Currecntly this field is ignored and this should be configured on Canopy config.", # noqa: E501 + description="The ID of the model to use. This field is ignored; instead, configure this field in the Canopy config.", # noqa: E501 ) messages: Messages = Field( description="A list of messages comprising the conversation so far." ) stream: bool = Field( default=False, - description="""Whether or not to stream the chatbot's response. If set, the response will be server-sent events containing [chat.completion.chunk](https://platform.openai.com/docs/api-reference/chat/streaming) objects""", # noqa: E501 + description="""Whether or not to stream the chatbot's response. If set, the response is server-sent events containing [chat.completion.chunk](https://platform.openai.com/docs/api-reference/chat/streaming) objects""", # noqa: E501 ) user: Optional[str] = Field( default=None, diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index f9957952..26367591 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -77,7 +77,7 @@ async def chat( """ Chat with Canopy, using the LLM and context engine, and return a response. - The request schema is following OpenAI's chat completion API schema: https://platform.openai.com/docs/api-reference/chat/create. + The request schema follows OpenAI's chat completion API schema: https://platform.openai.com/docs/api-reference/chat/create. Note that all fields other than `messages` and `stream` are currently ignored. The Canopy server uses the model parameters defined in the `ChatEngine` config for all underlying LLM calls. """ # noqa: E501 @@ -122,9 +122,9 @@ async def query( ) -> ContextContent: """ Query the knowledge base for relevant context. - The returned text might be structured or unstructured, depending on the ContextEngine's configuration. - Query allows limiting the context length (in tokens), to control LLM costs. - This method does not pass through the LLM and uses only retieval and construction from Pinecone DB. + The returned text may be structured or unstructured, depending on the Canopy configuration. + Query allows limiting the context length in tokens to control LLM costs. + This method does not pass through the LLM and uses only retrieval and construction from Pinecone DB. """ # noqa: E501 try: context: Context = await run_in_threadpool( @@ -152,7 +152,7 @@ async def upsert( Upsert documents into the knowledge base. Upserting is a way to add new documents or update existing ones. Each document has a unique ID. If a document with the same ID already exists, it is updated. - The documents will be chunked and encoded, then the resulting encoded chunks will be sent to the Pinecone index in batches + The documents are chunked and encoded, then the resulting encoded chunks are sent to the Pinecone index in batches. """ # noqa: E501 try: logger.info(f"Upserting {len(request.documents)} documents") From 2bbba59d4c258b63372d49593663942b321a74ad Mon Sep 17 00:00:00 2001 From: ilai Date: Sun, 5 Nov 2023 12:35:49 +0200 Subject: [PATCH 19/31] [app] Made generating the docs optional This way they are only generated if the user want to generate them locally --- src/canopy_cli/cli.py | 38 ++++++++++++++++++++++++----------- src/canopy_server/__init__.py | 13 ------------ src/canopy_server/app.py | 19 ++++++++++++++++-- 3 files changed, 43 insertions(+), 27 deletions(-) diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py index af1c6b1f..ecd691c7 100644 --- a/src/canopy_cli/cli.py +++ b/src/canopy_cli/cli.py @@ -577,20 +577,34 @@ def stop(url): """ ) ) -def api_docs(): - import json - from canopy_cli import HTML_TEMPLATE - from canopy_server.app import app - # generate docs - - filename = "canopy-api-docs.html" - - with open(filename, "w") as fd: - print(HTML_TEMPLATE % json.dumps(app.openapi()), file=fd) - +@click.option("--url", default="http://0.0.0.0:8000", + help="Canopy's service url. Defaults to http://0.0.0.0:8000") +def api_docs(url): import webbrowser - webbrowser.open('file://' + os.path.realpath(filename)) + generated_docs = False + try: + check_service_health(url) + except CLIError: + msg = (f"Canopy server is not running. Would you like to generate the docs " + f"to a local HTML file?") + click.confirm(click.style(msg, fg="red"), abort=True) + generated_docs = True + + if generated_docs: + import json + from canopy_cli import HTML_TEMPLATE + from canopy_server.app import app + # generate docs + + filename = "canopy-api-docs.html" + msg = f"Generating docs to {filename}" + click.echo(click.style(msg, fg="green")) + with open(filename, "w") as fd: + print(HTML_TEMPLATE % json.dumps(app.openapi()), file=fd) + webbrowser.open('file://' + os.path.realpath(filename)) + else: + webbrowser.open('http://localhost:8000/redoc') if __name__ == "__main__": diff --git a/src/canopy_server/__init__.py b/src/canopy_server/__init__.py index 9ca26cbd..8b137891 100644 --- a/src/canopy_server/__init__.py +++ b/src/canopy_server/__init__.py @@ -1,14 +1 @@ -description = """ -Canopy is an open-source Retrieval Augmented Generation (RAG) framework and context engine built on top of the Pinecone vector database. Canopy enables you to quickly and easily experiment with and build applications using RAG. Start chatting with your documents or text data with a few simple commands. -Canopy provides a configurable built-in server, so you can effortlessly deploy a RAG-powered chat application to your existing chat UI or interface. Or you can build your own custom RAG application using the Canopy library. - -## Prerequisites - -### Pinecone API key -If you don't have a Pinecone account, you can sign up for a free Starter plan at https://www.pinecone.io/. -To find your Pinecone API key and environment log into Pinecone console (https://app.pinecone.io/). You can access your API key from the "API Keys" section in the sidebar of your dashboard, and find the environment name next to it. - -### OpenAI API key -You can find your free trial OpenAI API key https://platform.openai.com/account/api-keys. You might need to log in or register for OpenAI services. -""" # noqa: E501 diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index 26367591..3341c7bb 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -40,7 +40,6 @@ from canopy.llm.openai import OpenAILLM from canopy_cli.errors import ConfigError -from canopy_server import description from canopy import __version__ @@ -49,9 +48,25 @@ load_dotenv() # load env vars before import of openai openai.api_key = os.getenv("OPENAI_API_KEY") +APP_DESCRIPTION = """ +Canopy is an open-source Retrieval Augmented Generation (RAG) framework and context engine built on top of the Pinecone vector database. Canopy enables you to quickly and easily experiment with and build applications using RAG. Start chatting with your documents or text data with a few simple commands. + +Canopy provides a configurable built-in server, so you can effortlessly deploy a RAG-powered chat application to your existing chat UI or interface. Or you can build your own custom RAG application using the Canopy library. + +## Prerequisites + +### Pinecone API key +If you don't have a Pinecone account, you can sign up for a free Starter plan at https://www.pinecone.io/. +To find your Pinecone API key and environment log into Pinecone console (https://app.pinecone.io/). You can access your API key from the "API Keys" section in the sidebar of your dashboard, and find the environment name next to it. + +### OpenAI API key +You can find your free trial OpenAI API key https://platform.openai.com/account/api-keys. You might need to log in or register for OpenAI services. +""" # noqa: E501 + + app = FastAPI( title="Canopy API", - description=description, + description=APP_DESCRIPTION, version=__version__, license_info={ "name": "Apache 2.0", From 4ac9ff9d218219e4dd8c9f605d53c3abd1a5d945 Mon Sep 17 00:00:00 2001 From: ilai Date: Sun, 5 Nov 2023 12:40:29 +0200 Subject: [PATCH 20/31] [app] Moved docs template to their own dedicated file Much more readable --- src/canopy_cli/__init__.py | 28 +-------------------------- src/canopy_cli/cli.py | 2 +- src/canopy_server/_redocs_template.py | 27 ++++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 28 deletions(-) create mode 100644 src/canopy_server/_redocs_template.py diff --git a/src/canopy_cli/__init__.py b/src/canopy_cli/__init__.py index a4791643..8b137891 100644 --- a/src/canopy_cli/__init__.py +++ b/src/canopy_cli/__init__.py @@ -1,27 +1 @@ -HTML_TEMPLATE = """ - - - - Canopy API Spec - - - - - - - -
- Redoc - - - - -""" # noqa: E501 + diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py index ecd691c7..5a181a5c 100644 --- a/src/canopy_cli/cli.py +++ b/src/canopy_cli/cli.py @@ -593,7 +593,7 @@ def api_docs(url): if generated_docs: import json - from canopy_cli import HTML_TEMPLATE + from canopy_server._redocs_template import HTML_TEMPLATE from canopy_server.app import app # generate docs diff --git a/src/canopy_server/_redocs_template.py b/src/canopy_server/_redocs_template.py new file mode 100644 index 00000000..a4791643 --- /dev/null +++ b/src/canopy_server/_redocs_template.py @@ -0,0 +1,27 @@ +HTML_TEMPLATE = """ + + + + Canopy API Spec + + + + + + + +
+ Redoc + + + + +""" # noqa: E501 From 71c7fc6a8a1cc7d1e5e82658e9848168f222d13f Mon Sep 17 00:00:00 2001 From: ilai Date: Sun, 5 Nov 2023 12:42:38 +0200 Subject: [PATCH 21/31] fix linter issues --- src/canopy/models/data_models.py | 3 ++- src/canopy_cli/__init__.py | 1 - src/canopy_cli/cli.py | 4 ++-- src/canopy_server/__init__.py | 1 - 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py index 44f1ee92..3f57ccb6 100644 --- a/src/canopy/models/data_models.py +++ b/src/canopy/models/data_models.py @@ -89,7 +89,8 @@ class Role(Enum): class MessageBase(BaseModel): - role: Role = Field(description="The role of the message's author. Can be one of ['User', 'Assistant', 'System']") + role: Role = Field(description="The role of the message's author. " + "Can be one of ['User', 'Assistant', 'System']") content: str = Field(description="The contents of the message.") def dict(self, *args, **kwargs): diff --git a/src/canopy_cli/__init__.py b/src/canopy_cli/__init__.py index 8b137891..e69de29b 100644 --- a/src/canopy_cli/__init__.py +++ b/src/canopy_cli/__init__.py @@ -1 +0,0 @@ - diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py index 5a181a5c..6bcd9fe3 100644 --- a/src/canopy_cli/cli.py +++ b/src/canopy_cli/cli.py @@ -586,8 +586,8 @@ def api_docs(url): try: check_service_health(url) except CLIError: - msg = (f"Canopy server is not running. Would you like to generate the docs " - f"to a local HTML file?") + msg = ("Canopy server is not running. Would you like to generate the docs " + "to a local HTML file?") click.confirm(click.style(msg, fg="red"), abort=True) generated_docs = True diff --git a/src/canopy_server/__init__.py b/src/canopy_server/__init__.py index 8b137891..e69de29b 100644 --- a/src/canopy_server/__init__.py +++ b/src/canopy_server/__init__.py @@ -1 +0,0 @@ - From 3e63f95de258e2f600874267dabc4a30a9d06dad Mon Sep 17 00:00:00 2001 From: ilai Date: Sun, 5 Nov 2023 13:09:00 +0200 Subject: [PATCH 22/31] [CLI] Control commands order in help message This way, running 'canopy --help' prints the errors in the order matching the quick start --- src/canopy_cli/cli.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py index 6bcd9fe3..716ec1a3 100644 --- a/src/canopy_cli/cli.py +++ b/src/canopy_cli/cli.py @@ -131,7 +131,29 @@ def _load_kb_config(config_file: Optional[str]) -> Dict[str, Any]: return kb_config -@click.group(invoke_without_command=True, context_settings=CONTEXT_SETTINGS) +class CanopyCommandGroup(click.Group): + """ + A custom click Group that lets us control the order of commands in the help menu. + """ + def __init__(self, name=None, commands=None, **attrs): + super().__init__(name, commands, **attrs) + self._commands_order = { + "new": 0, + "upsert": 1, + "start": 2, + "chat": 3, + "health": 4, + "stop": 5, + "api-docs": 6, + + } + + def list_commands(self, ctx): + return sorted(self.commands, key=lambda x: self._commands_order.get(x, 1000)) + + +@click.group(invoke_without_command=True, context_settings=CONTEXT_SETTINGS, + cls=CanopyCommandGroup) @click.version_option(__version__, "-v", "--version", prog_name="Canopy") @click.pass_context def cli(ctx): @@ -608,4 +630,4 @@ def api_docs(url): if __name__ == "__main__": - cli() + cli() \ No newline at end of file From 3f8acd299be7b8f6211d244444b2a34e13ee653d Mon Sep 17 00:00:00 2001 From: ilai Date: Sun, 5 Nov 2023 13:13:51 +0200 Subject: [PATCH 23/31] [cli] Rename 'service' to 'server' This conforms with the naming we use in the documentation --- src/canopy_cli/cli.py | 56 +++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py index 716ec1a3..a70f2e5b 100644 --- a/src/canopy_cli/cli.py +++ b/src/canopy_cli/cli.py @@ -30,7 +30,7 @@ from canopy import __version__ -from canopy_server.app import start as start_service +from canopy_server.app import start as start_server from .cli_spinner import Spinner from canopy_server.api_models import ChatDebugInfo @@ -43,14 +43,14 @@ CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help']) -def check_service_health(url: str): +def check_server_health(url: str): try: res = requests.get(urljoin(url, "/health")) res.raise_for_status() return res.ok except requests.exceptions.ConnectionError: msg = f""" - Canopy service is not running on {url}. + Canopy server is not running on {url}. please run `canopy start` """ raise CLIError(msg) @@ -61,14 +61,14 @@ def check_service_health(url: str): else: error = str(e) msg = ( - f"Canopy service on {url} is not healthy, failed with error: {error}" + f"Canopy server on {url} is not healthy, failed with error: {error}" ) raise CLIError(msg) @retry(reraise=True, wait=wait_fixed(5), stop=stop_after_attempt(6)) -def wait_for_service(chat_service_url: str): - check_service_health(chat_service_url) +def wait_for_server(chat_server_url: str): + check_server_health(chat_server_url) def validate_connection(): @@ -169,12 +169,12 @@ def cli(ctx): # click.echo(command.get_help(ctx)) -@cli.command(help="Check if canopy service is running and healthy.") +@cli.command(help="Check if canopy server is running and healthy.") @click.option("--url", default="http://0.0.0.0:8000", - help="Canopy's service url. Defaults to http://0.0.0.0:8000") + help="Canopy's server url. Defaults to http://0.0.0.0:8000") def health(url): - check_service_health(url) - click.echo(click.style("Canopy service is healthy!", fg="green")) + check_server_health(url) + click.echo(click.style("Canopy server is healthy!", fg="green")) return @@ -430,10 +430,10 @@ def _chat( help="Print additional debugging information") @click.option("--rag/--no-rag", default=True, help="Compare RAG-infused Chatbot with vanilla LLM",) -@click.option("--chat-service-url", default="http://0.0.0.0:8000", - help="URL of the Canopy service to use. Defaults to http://0.0.0.0:8000") -def chat(chat_service_url, rag, debug, stream): - check_service_health(chat_service_url) +@click.option("--chat-server-url", default="http://0.0.0.0:8000", + help="URL of the Canopy server to use. Defaults to http://0.0.0.0:8000") +def chat(chat_server_url, rag, debug, stream): + check_server_health(chat_server_url) note_msg = ( "🚨 Note 🚨\n" "Chat is a debugging tool, it is not meant to be used for production!" @@ -445,7 +445,7 @@ def chat(chat_service_url, rag, debug, stream): note_white_message = ( "This method should be used by developers to test the RAG data and model" "during development. " - "When you are ready to deploy, run the Canopy service as a REST API " + "When you are ready to deploy, run the Canopy server as a REST API " "backend for your chatbot UI. \n\n" "Let's Chat!" ) @@ -468,7 +468,7 @@ def chat(chat_service_url, rag, debug, stream): history=history_with_pinecone, message=message, stream=stream, - api_base=os.path.join(chat_service_url, "context"), + api_base=os.path.join(chat_server_url, "context"), print_debug_info=debug, ) @@ -498,7 +498,7 @@ def chat(chat_service_url, rag, debug, stream): help=( """ \b - Start the Canopy service. + Start the Canopy server. This command will launch a uvicorn server that will serve the Canopy API. If you like to try out the chatbot, run `canopy chat` in a separate terminal @@ -521,7 +521,7 @@ def start(host: str, port: str, reload: bool, config: Optional[str], index_name: Optional[str]): note_msg = ( "🚨 Note 🚨\n" - "For debugging only. To run the Canopy service in production, run the command:" + "For debugging only. To run the Canopy server in production, run the command:" "\n" "gunicorn canopy_server.app:app --worker-class uvicorn.workers.UvicornWorker " f"--bind {host}:{port} --workers " @@ -541,30 +541,30 @@ def start(host: str, port: str, reload: bool, ) os.environ["INDEX_NAME"] = index_name - click.echo(f"Starting Canopy service on {host}:{port}") - start_service(host, port=port, reload=reload, config_file=config) + click.echo(f"Starting Canopy server on {host}:{port}") + start_server(host, port=port, reload=reload, config_file=config) @cli.command( help=( """ \b - Stop the Canopy service. - This command will send a shutdown request to the Canopy service. + Stop the Canopy server. + This command will send a shutdown request to the Canopy server. """ ) ) @click.option("url", "--url", default="http://0.0.0.0:8000", - help="URL of the Canopy service to use. Defaults to http://0.0.0.0:8000") + help="URL of the Canopy server to use. Defaults to http://0.0.0.0:8000") def stop(url): - # Check if the service was started using Gunicorn + # Check if the server was started using Gunicorn res = subprocess.run(["pgrep", "-f", "gunicorn canopy_server.app:app"], capture_output=True) output = res.stdout.decode("utf-8").split() # If Gunicorn was used, kill all Gunicorn processes if output: - msg = ("It seems that Canopy service was launched using Gunicorn.\n" + msg = ("It seems that Canopy server was launched using Gunicorn.\n" "Do you want to kill all Gunicorn processes?") click.confirm(click.style(msg, fg="red"), abort=True) try: @@ -586,7 +586,7 @@ def stop(url): return res.ok except requests.exceptions.ConnectionError: msg = f""" - Could not find Canopy service on {url}. + Could not find Canopy server on {url}. """ raise CLIError(msg) @@ -600,13 +600,13 @@ def stop(url): ) ) @click.option("--url", default="http://0.0.0.0:8000", - help="Canopy's service url. Defaults to http://0.0.0.0:8000") + help="Canopy's server url. Defaults to http://0.0.0.0:8000") def api_docs(url): import webbrowser generated_docs = False try: - check_service_health(url) + check_server_health(url) except CLIError: msg = ("Canopy server is not running. Would you like to generate the docs " "to a local HTML file?") From 1a29f3a73a33c79a03663fd69571f2a4f9fbf00a Mon Sep 17 00:00:00 2001 From: ilai Date: Sun, 5 Nov 2023 14:30:08 +0200 Subject: [PATCH 24/31] linter --- examples/canopy-lib-quickstart.ipynb | 4 ++-- src/canopy_cli/cli.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index 11764922..3b52e57e 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -32,8 +32,8 @@ "output_type": "stream", "text": [ "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip available: \u001B[0m\u001B[31;49m22.2.2\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m23.3.1\u001B[0m\n", + "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\n" ] } ], diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py index a70f2e5b..222839c9 100644 --- a/src/canopy_cli/cli.py +++ b/src/canopy_cli/cli.py @@ -630,4 +630,4 @@ def api_docs(url): if __name__ == "__main__": - cli() \ No newline at end of file + cli() From 48c8d1bb17e4fda210e08cd7013ddf3ea7acb759 Mon Sep 17 00:00:00 2001 From: ilai Date: Sun, 5 Nov 2023 23:16:03 +0200 Subject: [PATCH 25/31] Context must contain a ContextContent that implements to_text() In order to support our current StuffingContextBuilder, I added a new StuffingConxtextContent which inherits from ContextContent and implement to_text() correctly. The app's `/query` path returns a `str`, which is the only guaranteed format of Context. It can be any strucured on unstrucutured data - depending on the ContextBuilder --- .../context_builder/stuffing.py | 19 +++--- src/canopy/context_engine/models.py | 22 ++++++- src/canopy/models/data_models.py | 10 +--- src/canopy_server/app.py | 6 +- tests/e2e/test_app.py | 9 +-- tests/unit/chat_engine/test_chat_engine.py | 19 +++--- .../test_stuffing_context_builder.py | 58 ++++++++++--------- .../context_engine/test_context_engine.py | 16 ++--- 8 files changed, 91 insertions(+), 68 deletions(-) diff --git a/src/canopy/context_engine/context_builder/stuffing.py b/src/canopy/context_engine/context_builder/stuffing.py index 18ebc56a..8024d547 100644 --- a/src/canopy/context_engine/context_builder/stuffing.py +++ b/src/canopy/context_engine/context_builder/stuffing.py @@ -2,7 +2,8 @@ from typing import List, Tuple from canopy.context_engine.context_builder.base import ContextBuilder -from canopy.context_engine.models import ContextQueryResult, ContextSnippet +from canopy.context_engine.models import (ContextQueryResult, ContextSnippet, + StuffingContextContent, ) from canopy.knowledge_base.models import QueryResult, DocumentWithScore from canopy.tokenizer import Tokenizer from canopy.models.data_models import Context @@ -24,12 +25,15 @@ def build(self, ContextQueryResult(query=qr.query, snippets=[]) for qr in query_results] debug_info = {"num_docs": len(sorted_docs_with_origin)} - context = Context(content=context_query_results, - num_tokens=0, - debug_info=debug_info) + context = Context( + content=StuffingContextContent(__root__=context_query_results), + num_tokens=0, + debug_info=debug_info + ) if self._tokenizer.token_count(context.to_text()) > max_context_tokens: - return Context(content=[], num_tokens=0, debug_info=debug_info) + return Context(content=StuffingContextContent(__root__=[]), + num_tokens=1, debug_info=debug_info) seen_doc_ids = set() for doc, origin_query_idx in sorted_docs_with_origin: @@ -45,8 +49,9 @@ def build(self, context_query_results[origin_query_idx].snippets.pop() # remove queries with no snippets - context.content = [qr for qr in context_query_results - if len(qr.snippets) > 0] + context.content = StuffingContextContent( + __root__=[qr for qr in context_query_results if len(qr.snippets) > 0] + ) context.num_tokens = self._tokenizer.token_count(context.to_text()) return context diff --git a/src/canopy/context_engine/models.py b/src/canopy/context_engine/models.py index 5f273425..1d0266fe 100644 --- a/src/canopy/context_engine/models.py +++ b/src/canopy/context_engine/models.py @@ -1,8 +1,8 @@ -from typing import List +from typing import List, Union from pydantic import BaseModel -from canopy.models.data_models import _ContextContent +from canopy.models.data_models import ContextContent class ContextSnippet(BaseModel): @@ -10,9 +10,25 @@ class ContextSnippet(BaseModel): text: str -class ContextQueryResult(_ContextContent): +class ContextQueryResult(BaseModel): query: str snippets: List[ContextSnippet] + +class StuffingContextContent(ContextContent): + __root__: Union[ContextQueryResult, List[ContextQueryResult]] + + def dict(self, **kwargs): + return super().dict(**kwargs)['__root__'] + + def __iter__(self): + return iter(self.__root__) + + def __getitem__(self, item): + return self.__root__[item] + + def __len__(self): + return len(self.__root__) + def to_text(self, **kwargs): return self.json(**kwargs) diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py index 3f57ccb6..1fb365d1 100644 --- a/src/canopy/models/data_models.py +++ b/src/canopy/models/data_models.py @@ -56,7 +56,7 @@ def metadata_reseved_fields(cls, v): return v -class _ContextContent(BaseModel, ABC): +class ContextContent(BaseModel, ABC): # Any context should be able to be represented as well formatted text. # In the most minimal case, that could simply be a call to `.json()`. @abstractmethod @@ -64,19 +64,13 @@ def to_text(self, **kwargs) -> str: pass -ContextContent = Union[_ContextContent, Sequence[_ContextContent]] - - class Context(BaseModel): content: ContextContent num_tokens: int = Field(exclude=True) debug_info: dict = Field(default_factory=dict, exclude=True) def to_text(self, **kwargs) -> str: - if isinstance(self.content, Sequence): - return "\n".join([c.to_text(**kwargs) for c in self.content]) - else: - return self.content.to_text(**kwargs) + return self.content.to_text(**kwargs) # --------------------- LLM models ------------------------ diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index 3341c7bb..84aaf530 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -127,14 +127,13 @@ def stringify_content(response: StreamingChatResponse): @app.post( "/context/query", - response_model=ContextContent, responses={ 500: {"description": "Failed to query the knowledge base or build the context"} }, ) async def query( request: ContextQueryRequest = Body(...), -) -> ContextContent: +) -> str: """ Query the knowledge base for relevant context. The returned text may be structured or unstructured, depending on the Canopy configuration. @@ -147,8 +146,7 @@ async def query( queries=request.queries, max_context_tokens=request.max_tokens, ) - - return context.content + return context.to_text() except Exception as e: logger.exception(e) diff --git a/tests/e2e/test_app.py b/tests/e2e/test_app.py index 41357f4a..fdd8c148 100644 --- a/tests/e2e/test_app.py +++ b/tests/e2e/test_app.py @@ -1,3 +1,4 @@ +import json import os from typing import List @@ -27,14 +28,14 @@ ) -@retry(stop=stop_after_attempt(60), wait=wait_fixed(1)) +@retry(reraise=True, stop=stop_after_attempt(60), wait=wait_fixed(1)) def assert_vector_ids_exist(vector_ids: List[str], knowledge_base: KnowledgeBase): fetch_response = knowledge_base._index.fetch(ids=vector_ids) assert all([v_id in fetch_response["vectors"] for v_id in vector_ids]) -@retry(stop=stop_after_attempt(60), wait=wait_fixed(1)) +@retry(reraise=True, stop=stop_after_attempt(60), wait=wait_fixed(1)) def assert_vector_ids_not_exist(vector_ids: List[str], knowledge_base: KnowledgeBase): fetch_response = knowledge_base._index.fetch(ids=vector_ids) @@ -98,7 +99,7 @@ def test_upsert(client): assert upsert_response.is_success -@retry(stop=stop_after_attempt(60), wait=wait_fixed(1)) +@retry(reraise=True, stop=stop_after_attempt(60), wait=wait_fixed(1)) def test_query(client): # fetch the context with all the right filters query_payload = ContextQueryRequest( @@ -116,7 +117,7 @@ def test_query(client): assert query_response.is_success # test response is as expected on /query - response_as_json = query_response.json() + response_as_json = json.loads(query_response.json()) assert ( response_as_json[0]["query"] diff --git a/tests/unit/chat_engine/test_chat_engine.py b/tests/unit/chat_engine/test_chat_engine.py index 9841424a..330fb616 100644 --- a/tests/unit/chat_engine/test_chat_engine.py +++ b/tests/unit/chat_engine/test_chat_engine.py @@ -6,7 +6,8 @@ from canopy.chat_engine import ChatEngine from canopy.chat_engine.query_generator import QueryGenerator from canopy.context_engine import ContextEngine -from canopy.context_engine.models import ContextQueryResult, ContextSnippet +from canopy.context_engine.models import (ContextQueryResult, ContextSnippet, + StuffingContextContent, ) from canopy.llm import BaseLLM from canopy.models.data_models import SystemMessage from canopy.models.api_models import ChatResponse, _Choice, TokenCounts @@ -58,13 +59,15 @@ def _get_inputs_and_expected(self, ] mock_queries = [Query(text="How does photosynthesis work?")] mock_context = Context( - content=ContextQueryResult( - query="How does photosynthesis work?", - - snippets=[ContextSnippet(source="ref 1", - text=self._generate_text(snippet_length)), - ContextSnippet(source="ref 2", - text=self._generate_text(12))] + content=StuffingContextContent( + __root__=ContextQueryResult( + query="How does photosynthesis work?", + + snippets=[ContextSnippet(source="ref 1", + text=self._generate_text(snippet_length)), + ContextSnippet(source="ref 2", + text=self._generate_text(12))] + ) ), num_tokens=1 # TODO: This is a dummy value. Need to improve. ) diff --git a/tests/unit/context_builder/test_stuffing_context_builder.py b/tests/unit/context_builder/test_stuffing_context_builder.py index bfd9a899..3d487712 100644 --- a/tests/unit/context_builder/test_stuffing_context_builder.py +++ b/tests/unit/context_builder/test_stuffing_context_builder.py @@ -1,6 +1,6 @@ from canopy.context_engine.models import \ - ContextSnippet, ContextQueryResult -from canopy.models.data_models import Context + (ContextSnippet, ContextQueryResult, StuffingContextContent, ) +from canopy.models.data_models import Context, ContextContent from ..stubs.stub_tokenizer import StubTokenizer from canopy.knowledge_base.models import \ QueryResult, DocumentWithScore @@ -46,22 +46,25 @@ def setup_method(self): score=1.0) ]) ] - self.full_context = Context(content=[ - ContextQueryResult(query="test query 1", - snippets=[ - ContextSnippet( - text=self.text1, source="test_source1"), - ContextSnippet( - text=self.text2, source="test_source2") - ]), - ContextQueryResult(query="test query 2", - snippets=[ - ContextSnippet( - text=self.text3, source="test_source3"), - ContextSnippet( - text=self.text4, source="test_source4") - ]) - ], num_tokens=0) + self.full_context = Context( + content=StuffingContextContent(__root__=[ + ContextQueryResult(query="test query 1", + snippets=[ + ContextSnippet( + text=self.text1, source="test_source1"), + ContextSnippet( + text=self.text2, source="test_source2") + ]), + ContextQueryResult(query="test query 2", + snippets=[ + ContextSnippet( + text=self.text3, source="test_source3"), + ContextSnippet( + text=self.text4, source="test_source4") + ]) + ]), + num_tokens=0 + ) self.full_context.num_tokens = self.tokenizer.token_count( self.full_context.to_text()) @@ -74,7 +77,7 @@ def test_context_fits_within_max_tokens(self): def test_context_exceeds_max_tokens(self): context = self.builder.build(self.query_results, max_context_tokens=30) - expected_context = Context(content=[ + expected_context = Context(content=StuffingContextContent(__root__=[ ContextQueryResult(query="test query 1", snippets=[ ContextSnippet( @@ -85,7 +88,7 @@ def test_context_exceeds_max_tokens(self): ContextSnippet( text=self.text3, source="test_source3"), ]) - ], num_tokens=0) + ]), num_tokens=0) expected_context.num_tokens = self.tokenizer.token_count( expected_context.to_text()) @@ -96,13 +99,13 @@ def test_context_exceeds_max_tokens_unordered(self): self.query_results[0].documents[0].text = self.text1 * 100 context = self.builder.build(self.query_results, max_context_tokens=20) - expected_context = Context(content=[ + expected_context = Context(content=StuffingContextContent(__root__=[ ContextQueryResult(query="test query 2", snippets=[ ContextSnippet( text=self.text3, source="test_source3"), ]) - ], num_tokens=0) + ]), num_tokens=0) expected_context.num_tokens = self.tokenizer.token_count( expected_context.to_text()) @@ -111,18 +114,18 @@ def test_context_exceeds_max_tokens_unordered(self): def test_whole_query_results_not_fit(self): context = self.builder.build(self.query_results, max_context_tokens=10) - assert context.num_tokens == 0 + assert context.num_tokens == 1 assert context.content == [] def test_max_tokens_zero(self): context = self.builder.build(self.query_results, max_context_tokens=0) - self.assert_num_tokens(context, 0) + self.assert_num_tokens(context, 1) assert context.content == [] def test_empty_query_results(self): context = self.builder.build([], max_context_tokens=100) - self.assert_num_tokens(context, 0) - assert len(context.content) == 0 + self.assert_num_tokens(context, 1) + assert context.content == [] def test_documents_with_duplicates(self): duplicate_query_results = self.query_results + [ @@ -165,7 +168,7 @@ def test_empty_documents(self): ] context = self.builder.build( empty_query_results, max_context_tokens=100) - self.assert_num_tokens(context, 0) + self.assert_num_tokens(context, 1) assert context.content == [] def assert_num_tokens(self, context: Context, max_tokens: int): @@ -175,6 +178,7 @@ def assert_num_tokens(self, context: Context, max_tokens: int): @staticmethod def assert_contexts_equal(actual: Context, expected: Context): + assert isinstance(actual.content, ContextContent) assert actual.num_tokens == expected.num_tokens assert len(actual.content) == len(expected.content) for actual_qr, expected_qr in zip(actual.content, expected.content): diff --git a/tests/unit/context_engine/test_context_engine.py b/tests/unit/context_engine/test_context_engine.py index a102db36..17c85ee8 100644 --- a/tests/unit/context_engine/test_context_engine.py +++ b/tests/unit/context_engine/test_context_engine.py @@ -5,10 +5,11 @@ from canopy.context_engine import ContextEngine from canopy.context_engine.context_builder.base import ContextBuilder -from canopy.context_engine.models import ContextQueryResult, ContextSnippet +from canopy.context_engine.models import (ContextQueryResult, ContextSnippet, + StuffingContextContent, ) from canopy.knowledge_base.base import BaseKnowledgeBase from canopy.knowledge_base.models import QueryResult, DocumentWithScore -from canopy.models.data_models import Query, Context, _ContextContent +from canopy.models.data_models import Query, Context, ContextContent class TestContextEngine: @@ -68,7 +69,7 @@ def test_query(context_engine, queries = [Query(text="How does photosynthesis work?")] max_context_tokens = 100 - mock_context_content = create_autospec(_ContextContent) + mock_context_content = create_autospec(ContextContent) mock_context_content.to_text.return_value = sample_context_text mock_context = Context(content=mock_context_content, num_tokens=21) @@ -93,7 +94,7 @@ def test_query_with_metadata_filter(context_engine, queries = [Query(text="How does photosynthesis work?")] max_context_tokens = 100 - mock_context_content = create_autospec(_ContextContent) + mock_context_content = create_autospec(ContextContent) mock_context_content.to_text.return_value = sample_context_text mock_context = Context(content=mock_context_content, num_tokens=21) @@ -149,7 +150,7 @@ def test_multiple_queries(context_engine, mock_knowledge_base.query.return_value = extended_mock_query_result combined_text = sample_context_text + "\n" + text - mock_context_content = create_autospec(_ContextContent) + mock_context_content = create_autospec(ContextContent) mock_context_content.to_text.return_value = combined_text mock_context = Context(content=mock_context_content, num_tokens=40) @@ -168,7 +169,7 @@ def test_empty_query_results(context_engine, mock_knowledge_base.query.return_value = [] - mock_context_content = create_autospec(_ContextContent) + mock_context_content = create_autospec(ContextContent) mock_context_content.to_text.return_value = "" mock_context = Context(content=mock_context_content, num_tokens=0) @@ -183,7 +184,8 @@ def test_context_query_result_to_text(): query_result = ContextQueryResult(query="How does photosynthesis work?", snippets=[ContextSnippet(text="42", source="ref")]) - context = Context(content=query_result, num_tokens=1) + context = Context(content=StuffingContextContent(__root__=query_result), + num_tokens=1) assert context.to_text() == json.dumps(query_result.dict()) assert context.to_text(indent=2) == json.dumps(query_result.dict(), indent=2) From 457b61bda55cc74d2c288db5e281cc6eefb34bc1 Mon Sep 17 00:00:00 2001 From: ilai Date: Sun, 5 Nov 2023 23:44:14 +0200 Subject: [PATCH 26/31] linters --- examples/canopy-lib-quickstart.ipynb | 4 ++-- src/canopy/models/data_models.py | 2 +- src/canopy_server/app.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb index ac4981bf..a3e805cc 100644 --- a/examples/canopy-lib-quickstart.ipynb +++ b/examples/canopy-lib-quickstart.ipynb @@ -32,8 +32,8 @@ "output_type": "stream", "text": [ "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip available: \u001B[0m\u001B[31;49m22.2.2\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m23.3.1\u001B[0m\n", + "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\n" ] } ], diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py index 1fb365d1..b56ef185 100644 --- a/src/canopy/models/data_models.py +++ b/src/canopy/models/data_models.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from enum import Enum -from typing import Optional, List, Union, Dict, Sequence, Literal +from typing import Optional, List, Union, Dict, Literal from pydantic import BaseModel, Field, validator, Extra diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index 84aaf530..a1415e04 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -26,7 +26,7 @@ StreamingChatResponse, ChatResponse, ) -from canopy.models.data_models import Context, UserMessage, ContextContent +from canopy.models.data_models import Context, UserMessage from .api_models import ( ChatRequest, ContextQueryRequest, From 50efc687f689f62fa94f0cd4f08613b17d91e0fd Mon Sep 17 00:00:00 2001 From: ilai Date: Mon, 6 Nov 2023 10:56:08 +0200 Subject: [PATCH 27/31] [context] Simplify ContextContent - Made StuffingContextContent always a List - Slightly improved readability of `StuffingContextBuilder` --- .../context_engine/context_builder/stuffing.py | 17 +++++++---------- src/canopy/context_engine/models.py | 4 ++-- tests/unit/chat_engine/test_chat_engine.py | 4 ++-- .../unit/context_engine/test_context_engine.py | 6 +++--- 4 files changed, 14 insertions(+), 17 deletions(-) diff --git a/src/canopy/context_engine/context_builder/stuffing.py b/src/canopy/context_engine/context_builder/stuffing.py index 8024d547..4b8402c6 100644 --- a/src/canopy/context_engine/context_builder/stuffing.py +++ b/src/canopy/context_engine/context_builder/stuffing.py @@ -25,13 +25,9 @@ def build(self, ContextQueryResult(query=qr.query, snippets=[]) for qr in query_results] debug_info = {"num_docs": len(sorted_docs_with_origin)} - context = Context( - content=StuffingContextContent(__root__=context_query_results), - num_tokens=0, - debug_info=debug_info - ) + content = StuffingContextContent(__root__=context_query_results) - if self._tokenizer.token_count(context.to_text()) > max_context_tokens: + if self._tokenizer.token_count(content.to_text()) > max_context_tokens: return Context(content=StuffingContextContent(__root__=[]), num_tokens=1, debug_info=debug_info) @@ -45,16 +41,17 @@ def build(self, snippet) seen_doc_ids.add(doc.id) # if the context is too long, remove the snippet - if self._tokenizer.token_count(context.to_text()) > max_context_tokens: + if self._tokenizer.token_count(content.to_text()) > max_context_tokens: context_query_results[origin_query_idx].snippets.pop() # remove queries with no snippets - context.content = StuffingContextContent( + content = StuffingContextContent( __root__=[qr for qr in context_query_results if len(qr.snippets) > 0] ) - context.num_tokens = self._tokenizer.token_count(context.to_text()) - return context + return Context(content=content, + num_tokens=self._tokenizer.token_count(content.to_text()), + debug_info=debug_info) @staticmethod def _round_robin_sort( diff --git a/src/canopy/context_engine/models.py b/src/canopy/context_engine/models.py index 1d0266fe..2b21e20a 100644 --- a/src/canopy/context_engine/models.py +++ b/src/canopy/context_engine/models.py @@ -1,4 +1,4 @@ -from typing import List, Union +from typing import List from pydantic import BaseModel @@ -16,7 +16,7 @@ class ContextQueryResult(BaseModel): class StuffingContextContent(ContextContent): - __root__: Union[ContextQueryResult, List[ContextQueryResult]] + __root__: List[ContextQueryResult] def dict(self, **kwargs): return super().dict(**kwargs)['__root__'] diff --git a/tests/unit/chat_engine/test_chat_engine.py b/tests/unit/chat_engine/test_chat_engine.py index 330fb616..d6381a0c 100644 --- a/tests/unit/chat_engine/test_chat_engine.py +++ b/tests/unit/chat_engine/test_chat_engine.py @@ -60,14 +60,14 @@ def _get_inputs_and_expected(self, mock_queries = [Query(text="How does photosynthesis work?")] mock_context = Context( content=StuffingContextContent( - __root__=ContextQueryResult( + __root__=[ContextQueryResult( query="How does photosynthesis work?", snippets=[ContextSnippet(source="ref 1", text=self._generate_text(snippet_length)), ContextSnippet(source="ref 2", text=self._generate_text(12))] - ) + )] ), num_tokens=1 # TODO: This is a dummy value. Need to improve. ) diff --git a/tests/unit/context_engine/test_context_engine.py b/tests/unit/context_engine/test_context_engine.py index 17c85ee8..61977f2a 100644 --- a/tests/unit/context_engine/test_context_engine.py +++ b/tests/unit/context_engine/test_context_engine.py @@ -184,11 +184,11 @@ def test_context_query_result_to_text(): query_result = ContextQueryResult(query="How does photosynthesis work?", snippets=[ContextSnippet(text="42", source="ref")]) - context = Context(content=StuffingContextContent(__root__=query_result), + context = Context(content=StuffingContextContent(__root__=[query_result]), num_tokens=1) - assert context.to_text() == json.dumps(query_result.dict()) - assert context.to_text(indent=2) == json.dumps(query_result.dict(), indent=2) + assert context.to_text() == json.dumps([query_result.dict()]) + assert context.to_text(indent=2) == json.dumps([query_result.dict()], indent=2) @staticmethod @pytest.mark.asyncio From f0b40e970ac23f44545848b98f3d3235d34a8b92 Mon Sep 17 00:00:00 2001 From: ilai Date: Mon, 6 Nov 2023 11:03:01 +0200 Subject: [PATCH 28/31] [context] StuffingContextContent - Removed special iterator functions I changed the tests to use explicit json.loads() --- src/canopy/context_engine/models.py | 9 -------- .../test_stuffing_context_builder.py | 23 +++++++++++-------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/src/canopy/context_engine/models.py b/src/canopy/context_engine/models.py index 2b21e20a..89618451 100644 --- a/src/canopy/context_engine/models.py +++ b/src/canopy/context_engine/models.py @@ -21,14 +21,5 @@ class StuffingContextContent(ContextContent): def dict(self, **kwargs): return super().dict(**kwargs)['__root__'] - def __iter__(self): - return iter(self.__root__) - - def __getitem__(self, item): - return self.__root__[item] - - def __len__(self): - return len(self.__root__) - def to_text(self, **kwargs): return self.json(**kwargs) diff --git a/tests/unit/context_builder/test_stuffing_context_builder.py b/tests/unit/context_builder/test_stuffing_context_builder.py index 3d487712..e362d8e5 100644 --- a/tests/unit/context_builder/test_stuffing_context_builder.py +++ b/tests/unit/context_builder/test_stuffing_context_builder.py @@ -1,3 +1,5 @@ +import json + from canopy.context_engine.models import \ (ContextSnippet, ContextQueryResult, StuffingContextContent, ) from canopy.models.data_models import Context, ContextContent @@ -153,7 +155,8 @@ def test_source_metadata_missing(self): context = self.builder.build( missing_metadata_query_results, max_context_tokens=100) self.assert_num_tokens(context, 100) - assert context.content[0].snippets[0].source == "" + content = json.loads(context.to_text()) + assert content[0]["snippets"][0]["source"] == "" def test_empty_documents(self): empty_query_results = [ @@ -180,11 +183,13 @@ def assert_num_tokens(self, context: Context, max_tokens: int): def assert_contexts_equal(actual: Context, expected: Context): assert isinstance(actual.content, ContextContent) assert actual.num_tokens == expected.num_tokens - assert len(actual.content) == len(expected.content) - for actual_qr, expected_qr in zip(actual.content, expected.content): - assert actual_qr.query == expected_qr.query - assert len(actual_qr.snippets) == len(expected_qr.snippets) - for actual_snippet, expected_snippet in zip(actual_qr.snippets, - expected_qr.snippets): - assert actual_snippet.text == expected_snippet.text - assert actual_snippet.source == expected_snippet.source + actual_content = json.loads(actual.to_text()) + expected_content = json.loads(expected.to_text()) + assert len(actual_content) == len(expected_content) + for actual_qr, expected_qr in zip(actual_content, expected_content): + assert actual_qr["query"] == expected_qr["query"] + assert len(actual_qr["snippets"]) == len(expected_qr["snippets"]) + for actual_snippet, expected_snippet in zip(actual_qr["snippets"], + expected_qr["snippets"]): + assert actual_snippet["text"] == expected_snippet["text"] + assert actual_snippet["source"] == expected_snippet["source"] From 02637da6819dc18d74fa92cbfe4e2ef6ff70e03f Mon Sep 17 00:00:00 2001 From: ilai Date: Mon, 6 Nov 2023 13:43:53 +0200 Subject: [PATCH 29/31] [context] Moved SuffingContextBuilder's data models into the same file Makes the code more readable and explicit --- .../context_builder/stuffing.py | 34 +++++++++++++++++-- src/canopy/context_engine/models.py | 25 -------------- src/canopy/models/data_models.py | 15 ++++++-- src/canopy_server/app.py | 5 +-- tests/unit/chat_engine/test_chat_engine.py | 5 +-- .../test_stuffing_context_builder.py | 5 +-- .../context_engine/test_context_engine.py | 5 +-- 7 files changed, 56 insertions(+), 38 deletions(-) delete mode 100644 src/canopy/context_engine/models.py diff --git a/src/canopy/context_engine/context_builder/stuffing.py b/src/canopy/context_engine/context_builder/stuffing.py index 4b8402c6..5aa9b21b 100644 --- a/src/canopy/context_engine/context_builder/stuffing.py +++ b/src/canopy/context_engine/context_builder/stuffing.py @@ -1,13 +1,41 @@ from itertools import zip_longest from typing import List, Tuple +from pydantic import BaseModel + from canopy.context_engine.context_builder.base import ContextBuilder -from canopy.context_engine.models import (ContextQueryResult, ContextSnippet, - StuffingContextContent, ) from canopy.knowledge_base.models import QueryResult, DocumentWithScore from canopy.tokenizer import Tokenizer -from canopy.models.data_models import Context +from canopy.models.data_models import Context, ContextContent + + +# ------------- DATA MODELS ------------- + +class ContextSnippet(BaseModel): + source: str + text: str + + +class ContextQueryResult(BaseModel): + query: str + snippets: List[ContextSnippet] + + +class StuffingContextContent(ContextContent): + __root__: List[ContextQueryResult] + + def dict(self, **kwargs): + return super().dict(**kwargs)['__root__'] + + # In the case of StuffingContextBuilder, we simply want the text representation to + # be a json. Other ContextContent subclasses may render into text differently + def to_text(self, **kwargs): + # We can't use self.json() since this is mapped back to self.to_text() in the + # base class, which would cause infinite recursion. + return super(ContextContent, self).json(**kwargs) + +# ------------- CONTEXT BUILDER ------------- class StuffingContextBuilder(ContextBuilder): diff --git a/src/canopy/context_engine/models.py b/src/canopy/context_engine/models.py deleted file mode 100644 index 89618451..00000000 --- a/src/canopy/context_engine/models.py +++ /dev/null @@ -1,25 +0,0 @@ -from typing import List - -from pydantic import BaseModel - -from canopy.models.data_models import ContextContent - - -class ContextSnippet(BaseModel): - source: str - text: str - - -class ContextQueryResult(BaseModel): - query: str - snippets: List[ContextSnippet] - - -class StuffingContextContent(ContextContent): - __root__: List[ContextQueryResult] - - def dict(self, **kwargs): - return super().dict(**kwargs)['__root__'] - - def to_text(self, **kwargs): - return self.json(**kwargs) diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py index b56ef185..78a07dd8 100644 --- a/src/canopy/models/data_models.py +++ b/src/canopy/models/data_models.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from enum import Enum -from typing import Optional, List, Union, Dict, Literal +from typing import Optional, List, Union, Dict, Literal, Any from pydantic import BaseModel, Field, validator, Extra @@ -63,15 +63,26 @@ class ContextContent(BaseModel, ABC): def to_text(self, **kwargs) -> str: pass + def __str__(self): + return self.to_text() + + def json(self, **kwargs): + return self.to_text(**kwargs) + class Context(BaseModel): content: ContextContent - num_tokens: int = Field(exclude=True) + num_tokens: int debug_info: dict = Field(default_factory=dict, exclude=True) def to_text(self, **kwargs) -> str: return self.content.to_text(**kwargs) + class Config: + @staticmethod + # Override the JSON schema, to show `content` as a string in the docs + def schema_extra(schema: dict[str, Any]) -> None: + schema['properties']['content'] = {'type': 'String', 'title': 'content'} # --------------------- LLM models ------------------------ diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index a1415e04..79af0868 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -127,13 +127,14 @@ def stringify_content(response: StreamingChatResponse): @app.post( "/context/query", + response_model=Context, responses={ 500: {"description": "Failed to query the knowledge base or build the context"} }, ) async def query( request: ContextQueryRequest = Body(...), -) -> str: +) -> Context: """ Query the knowledge base for relevant context. The returned text may be structured or unstructured, depending on the Canopy configuration. @@ -146,7 +147,7 @@ async def query( queries=request.queries, max_context_tokens=request.max_tokens, ) - return context.to_text() + return context except Exception as e: logger.exception(e) diff --git a/tests/unit/chat_engine/test_chat_engine.py b/tests/unit/chat_engine/test_chat_engine.py index d6381a0c..ea8d6415 100644 --- a/tests/unit/chat_engine/test_chat_engine.py +++ b/tests/unit/chat_engine/test_chat_engine.py @@ -6,8 +6,9 @@ from canopy.chat_engine import ChatEngine from canopy.chat_engine.query_generator import QueryGenerator from canopy.context_engine import ContextEngine -from canopy.context_engine.models import (ContextQueryResult, ContextSnippet, - StuffingContextContent, ) +from canopy.context_engine.context_builder.stuffing import (ContextSnippet, + ContextQueryResult, + StuffingContextContent, ) from canopy.llm import BaseLLM from canopy.models.data_models import SystemMessage from canopy.models.api_models import ChatResponse, _Choice, TokenCounts diff --git a/tests/unit/context_builder/test_stuffing_context_builder.py b/tests/unit/context_builder/test_stuffing_context_builder.py index e362d8e5..4881926b 100644 --- a/tests/unit/context_builder/test_stuffing_context_builder.py +++ b/tests/unit/context_builder/test_stuffing_context_builder.py @@ -1,7 +1,8 @@ import json -from canopy.context_engine.models import \ - (ContextSnippet, ContextQueryResult, StuffingContextContent, ) +from canopy.context_engine.context_builder.stuffing import (ContextSnippet, + ContextQueryResult, + StuffingContextContent, ) from canopy.models.data_models import Context, ContextContent from ..stubs.stub_tokenizer import StubTokenizer from canopy.knowledge_base.models import \ diff --git a/tests/unit/context_engine/test_context_engine.py b/tests/unit/context_engine/test_context_engine.py index 61977f2a..ec17c55c 100644 --- a/tests/unit/context_engine/test_context_engine.py +++ b/tests/unit/context_engine/test_context_engine.py @@ -5,8 +5,9 @@ from canopy.context_engine import ContextEngine from canopy.context_engine.context_builder.base import ContextBuilder -from canopy.context_engine.models import (ContextQueryResult, ContextSnippet, - StuffingContextContent, ) +from canopy.context_engine.context_builder.stuffing import (ContextSnippet, + ContextQueryResult, + StuffingContextContent, ) from canopy.knowledge_base.base import BaseKnowledgeBase from canopy.knowledge_base.models import QueryResult, DocumentWithScore from canopy.models.data_models import Query, Context, ContextContent From db9ca51ce36c2285aebe78be8fbb11fd43eaec9c Mon Sep 17 00:00:00 2001 From: ilai Date: Mon, 6 Nov 2023 16:41:25 +0200 Subject: [PATCH 30/31] [app] `/query` return type - added ContextResponse model KISS solution - simply return a different model than the actual internal `Context` --- .../context_engine/context_builder/stuffing.py | 6 ++---- src/canopy/models/data_models.py | 8 -------- src/canopy_server/api_models.py | 5 +++++ src/canopy_server/app.py | 6 ++++-- tests/e2e/test_app.py | 15 +++++++++------ 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/canopy/context_engine/context_builder/stuffing.py b/src/canopy/context_engine/context_builder/stuffing.py index 5aa9b21b..e1cd7c8d 100644 --- a/src/canopy/context_engine/context_builder/stuffing.py +++ b/src/canopy/context_engine/context_builder/stuffing.py @@ -30,12 +30,10 @@ def dict(self, **kwargs): # In the case of StuffingContextBuilder, we simply want the text representation to # be a json. Other ContextContent subclasses may render into text differently def to_text(self, **kwargs): - # We can't use self.json() since this is mapped back to self.to_text() in the - # base class, which would cause infinite recursion. - return super(ContextContent, self).json(**kwargs) + return self.json(**kwargs) -# ------------- CONTEXT BUILDER ------------- +# ------------- CONTEXT BUILDER ------------- class StuffingContextBuilder(ContextBuilder): diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py index 78a07dd8..58c702b2 100644 --- a/src/canopy/models/data_models.py +++ b/src/canopy/models/data_models.py @@ -66,9 +66,6 @@ def to_text(self, **kwargs) -> str: def __str__(self): return self.to_text() - def json(self, **kwargs): - return self.to_text(**kwargs) - class Context(BaseModel): content: ContextContent @@ -78,11 +75,6 @@ class Context(BaseModel): def to_text(self, **kwargs) -> str: return self.content.to_text(**kwargs) - class Config: - @staticmethod - # Override the JSON schema, to show `content` as a string in the docs - def schema_extra(schema: dict[str, Any]) -> None: - schema['properties']['content'] = {'type': 'String', 'title': 'content'} # --------------------- LLM models ------------------------ diff --git a/src/canopy_server/api_models.py b/src/canopy_server/api_models.py index e965b8cb..49a7872a 100644 --- a/src/canopy_server/api_models.py +++ b/src/canopy_server/api_models.py @@ -31,6 +31,11 @@ class ContextQueryRequest(BaseModel): max_tokens: int +class ContextResponse(BaseModel): + content: str + num_tokens: int + + class ContextUpsertRequest(BaseModel): documents: List[Document] batch_size: int = Field( diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index 79af0868..8e18be1c 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -36,6 +36,7 @@ ShutdownResponse, SuccessUpsertResponse, SuccessDeleteResponse, + ContextResponse, ) from canopy.llm.openai import OpenAILLM @@ -127,7 +128,7 @@ def stringify_content(response: StreamingChatResponse): @app.post( "/context/query", - response_model=Context, + response_model=ContextResponse, responses={ 500: {"description": "Failed to query the knowledge base or build the context"} }, @@ -147,7 +148,8 @@ async def query( queries=request.queries, max_context_tokens=request.max_tokens, ) - return context + return ContextResponse(content=context.content.to_text(), + num_tokens=context.num_tokens) except Exception as e: logger.exception(e) diff --git a/tests/e2e/test_app.py b/tests/e2e/test_app.py index fdd8c148..141c3b4a 100644 --- a/tests/e2e/test_app.py +++ b/tests/e2e/test_app.py @@ -13,7 +13,7 @@ from canopy_server.app import app from canopy_server.api_models import (HealthStatus, ContextUpsertRequest, - ContextQueryRequest) + ContextQueryRequest, ContextResponse, ) from .. import Tokenizer upsert_payload = ContextUpsertRequest( @@ -102,6 +102,7 @@ def test_upsert(client): @retry(reraise=True, stop=stop_after_attempt(60), wait=wait_fixed(1)) def test_query(client): # fetch the context with all the right filters + tokenizer = Tokenizer() query_payload = ContextQueryRequest( queries=[ { @@ -116,16 +117,18 @@ def test_query(client): query_response = client.post("/context/query", json=query_payload.dict()) assert query_response.is_success - # test response is as expected on /query - response_as_json = json.loads(query_response.json()) + query_response = query_response.json() + assert (query_response["num_tokens"] == + len(tokenizer.tokenize(query_response["content"]))) + stuffing_content = json.loads(query_response["content"]) assert ( - response_as_json[0]["query"] + stuffing_content[0]["query"] == query_payload.dict()["queries"][0]["text"] - and response_as_json[0]["snippets"][0]["text"] + and stuffing_content[0]["snippets"][0]["text"] == upsert_payload.dict()["documents"][0]["text"] ) - assert (response_as_json[0]["snippets"][0]["source"] == + assert (stuffing_content[0]["snippets"][0]["source"] == upsert_payload.dict()["documents"][0]["source"]) From 3c828c405badf61f11a2e047dc169a15b11ad1b4 Mon Sep 17 00:00:00 2001 From: ilai Date: Mon, 6 Nov 2023 16:49:49 +0200 Subject: [PATCH 31/31] Linter fixes + wrong return type --- src/canopy/models/data_models.py | 2 +- src/canopy_server/app.py | 2 +- tests/e2e/test_app.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py index 58c702b2..dbaa8096 100644 --- a/src/canopy/models/data_models.py +++ b/src/canopy/models/data_models.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from enum import Enum -from typing import Optional, List, Union, Dict, Literal, Any +from typing import Optional, List, Union, Dict, Literal from pydantic import BaseModel, Field, validator, Extra diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py index 8e18be1c..6c67494a 100644 --- a/src/canopy_server/app.py +++ b/src/canopy_server/app.py @@ -135,7 +135,7 @@ def stringify_content(response: StreamingChatResponse): ) async def query( request: ContextQueryRequest = Body(...), -) -> Context: +) -> ContextResponse: """ Query the knowledge base for relevant context. The returned text may be structured or unstructured, depending on the Canopy configuration. diff --git a/tests/e2e/test_app.py b/tests/e2e/test_app.py index 141c3b4a..70e5adca 100644 --- a/tests/e2e/test_app.py +++ b/tests/e2e/test_app.py @@ -13,7 +13,7 @@ from canopy_server.app import app from canopy_server.api_models import (HealthStatus, ContextUpsertRequest, - ContextQueryRequest, ContextResponse, ) + ContextQueryRequest, ) from .. import Tokenizer upsert_payload = ContextUpsertRequest(