From 7ed92913302535fff36e57dc5950f6b92e7d3bef Mon Sep 17 00:00:00 2001
From: Roy Miara <miararoy@gmail.com>
Date: Thu, 2 Nov 2023 10:10:14 +0200
Subject: [PATCH 01/31] canopy server documentation

---
 .gitignore                       |   3 +-
 src/canopy/models/api_models.py  |   2 +-
 src/canopy/models/data_models.py |  53 ++++++----
 src/canopy_cli/__init__.py       |  27 ++++++
 src/canopy_cli/cli.py            |  24 +++++
 src/canopy_server/__init__.py    |  13 +++
 src/canopy_server/api_models.py  |  52 ++++++++--
 src/canopy_server/app.py         | 161 +++++++++++++++++++++----------
 8 files changed, 257 insertions(+), 78 deletions(-)

diff --git a/.gitignore b/.gitignore
index 9116245a..29753e14 100644
--- a/.gitignore
+++ b/.gitignore
@@ -162,4 +162,5 @@ cython_debug/
 # Mac OS
 **/.DS_Store
 
-datafiles/*
\ No newline at end of file
+datafiles/*
+canopy-api-docs.html
\ No newline at end of file
diff --git a/src/canopy/models/api_models.py b/src/canopy/models/api_models.py
index 964aa48d..53a93585 100644
--- a/src/canopy/models/api_models.py
+++ b/src/canopy/models/api_models.py
@@ -28,7 +28,7 @@ def calc_total_tokens(cls, v, values, **kwargs):
 
 
 class ChatResponse(BaseModel):
-    id: str
+    id: str = Field(description="Canopy session Id.")
     object: str
     created: int
     model: str
diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py
index 7d0281a2..05a941e5 100644
--- a/src/canopy/models/data_models.py
+++ b/src/canopy/models/data_models.py
@@ -11,35 +11,52 @@
 
 
 class Query(BaseModel):
-    text: str
-    namespace: str = ""
-    metadata_filter: Optional[dict] = None
-    top_k: Optional[int] = None
-    query_params: dict = Field(default_factory=dict)
+    text: str = Field(description="The query text.")
+    namespace: str = Field(
+        default="",
+        description="The namespace of the query, to learn more about namespaces, see https://docs.pinecone.io/docs/namespaces",  # noqa: E501
+    )
+    metadata_filter: Optional[dict] = Field(
+        default=None,
+        description="A pinecone metadata filter, to learn more about metadata filters, see https://docs.pinecone.io/docs/metadata-filtering",  # noqa: E501
+    )
+    top_k: Optional[int] = Field(
+        default=None,
+        description="[soon deprecated] The number of results to return."
+    )
+    query_params: dict = Field(
+        default_factory=dict,
+        description="Pinecone Client additional query parameters."
+    )
 
 
 class Document(BaseModel):
-    id: str
-    text: str
-    source: str = ""
-    metadata: Metadata = Field(default_factory=dict)
+    id: str = Field(description="The document id.")
+    text: str = Field(description="The document text.")
+    source: str = Field(
+        default="",
+        description="The source of the document: a URL, a file path, etc."
+    )
+    metadata: Metadata = Field(
+        default_factory=dict,
+        description="The document metadata, to learn more about metadata, see https://docs.pinecone.io/docs/manage-data",  # noqa: E501
+    )
 
     class Config:
         extra = Extra.forbid
 
-    @validator('metadata')
+    @validator("metadata")
     def metadata_reseved_fields(cls, v):
-        if 'text' in v:
+        if "text" in v:
             raise ValueError('Metadata cannot contain reserved field "text"')
-        if 'document_id' in v:
+        if "document_id" in v:
             raise ValueError('Metadata cannot contain reserved field "document_id"')
-        if 'source' in v:
+        if "source" in v:
             raise ValueError('Metadata cannot contain reserved field "source"')
         return v
 
 
 class ContextContent(BaseModel, ABC):
-
     # Any context should be able to be represented as well formatted text.
     # In the most minimal case, that could simply be a call to `.json()`.
     @abstractmethod
@@ -59,6 +76,8 @@ def to_text(self, **kwargs) -> str:
             return "\n".join([c.to_text(**kwargs) for c in self.content])
 
 
+ContextContentResponse = Union[ContextContent, Sequence[ContextContent]]
+
 # --------------------- LLM models ------------------------
 
 
@@ -69,12 +88,12 @@ class Role(Enum):
 
 
 class MessageBase(BaseModel):
-    role: Role
-    content: str
+    role: Role = Field(description="The role of the messages author.")
+    content: str = Field(description="The contents of the message.")
 
     def dict(self, *args, **kwargs):
         d = super().dict(*args, **kwargs)
-        d['role'] = d['role'].value
+        d["role"] = d["role"].value
         return d
 
 
diff --git a/src/canopy_cli/__init__.py b/src/canopy_cli/__init__.py
index e69de29b..a4791643 100644
--- a/src/canopy_cli/__init__.py
+++ b/src/canopy_cli/__init__.py
@@ -0,0 +1,27 @@
+HTML_TEMPLATE = """<!DOCTYPE html>
+<html>
+<head>
+    <meta http-equiv="content-type" content="text/html; charset=UTF-8">
+    <title>Canopy API Spec</title>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <link rel="shortcut icon" href="https://polybit-apps.s3.amazonaws.com/stdlib/users/pinecone/profile/image.png">
+    <style>
+        body {
+            margin: 0;
+            padding: 0;
+        }
+    </style>
+    <style data-styled="" data-styled-version="4.4.1"></style>
+</head>
+<body>
+    <div id="redoc-container"></div>
+    <title>Redoc</title>
+    <script src="https://cdn.jsdelivr.net/npm/redoc/bundles/redoc.standalone.js"> </script>
+    <script>
+        var spec = %s;
+        Redoc.init(spec, {}, document.getElementById("redoc-container"));
+    </script>
+</body>
+</html>
+"""  # noqa: E501
diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py
index d0e9000d..d0b5c827 100644
--- a/src/canopy_cli/cli.py
+++ b/src/canopy_cli/cli.py
@@ -551,5 +551,29 @@ def stop(url):
         raise CLIError(msg)
 
 
+@cli.command(
+    help=(
+        """
+        \b
+        Open the Canopy Server docs
+        """
+    )
+)
+def docs():
+    import json
+    from canopy_cli import HTML_TEMPLATE
+    from canopy_server.app import app
+    # generate docs
+
+    filename = "canopy-api-docs.html"
+
+    with open(filename, "w") as fd:
+        print(HTML_TEMPLATE % json.dumps(app.openapi()), file=fd)
+
+    import webbrowser
+
+    webbrowser.open('file://' + os.path.realpath(filename))
+
+
 if __name__ == "__main__":
     cli()
diff --git a/src/canopy_server/__init__.py b/src/canopy_server/__init__.py
index e69de29b..85a89cf2 100644
--- a/src/canopy_server/__init__.py
+++ b/src/canopy_server/__init__.py
@@ -0,0 +1,13 @@
+description = """
+Canopy is an open-source Retrieval Augmented Generation (RAG) framework and context engine built on top of the Pinecone vector database. Canopy enables you to quickly and easily experiment with and build applications using RAG. Start chatting with your documents or text data with a few simple commands.
+
+Canopy provides a configurable built-in server so you can effortlessly deploy a RAG-powered chat application to your existing chat UI or interface. Or you can build your own, custom RAG application using the Canopy lirbary.
+
+## Prerequisites
+
+### Pinecone API key
+To get Pinecone free trial API key and environment register or log into your Pinecone account in the console (https://app.pinecone.io/). You can access your API key from the "API Keys" section in the sidebar of your dashboard, and find the environment name next to it.
+
+### OpenAI API key
+You can find your free trial OpenAI API key https://platform.openai.com/account/api-keys. You might need to login or register to OpenAI services.
+"""  # noqa: E501
diff --git a/src/canopy_server/api_models.py b/src/canopy_server/api_models.py
index dae422f5..c80d542f 100644
--- a/src/canopy_server/api_models.py
+++ b/src/canopy_server/api_models.py
@@ -1,15 +1,26 @@
 from typing import Optional, List
 
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 
 from canopy.models.data_models import Messages, Query, Document
 
 
 class ChatRequest(BaseModel):
-    model: str = ""
-    messages: Messages
-    stream: bool = False
-    user: Optional[str] = None
+    model: str = Field(
+        default="",
+        description="ID of the model to use. If empty, the default model will be used.",  # noqa: E501
+    )
+    messages: Messages = Field(
+        description="A list of messages comprising the conversation so far."
+    )
+    stream: bool = Field(
+        default=False,
+        description="""If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message.""",  # noqa: E501
+    )
+    user: Optional[str] = Field(
+        default=None,
+        description="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.",  # noqa: E501
+    )
 
 
 class ContextQueryRequest(BaseModel):
@@ -19,11 +30,13 @@ class ContextQueryRequest(BaseModel):
 
 class ContextUpsertRequest(BaseModel):
     documents: List[Document]
-    batch_size: int = 200
+    batch_size: int = Field(
+        default=200, description="Batch size for upserting documents to Pinecone."
+    )
 
 
 class ContextDeleteRequest(BaseModel):
-    document_ids: List[str]
+    document_ids: List[str] = Field(description="List of document ids to delete.")
 
 
 class HealthStatus(BaseModel):
@@ -38,5 +51,28 @@ class ChatDebugInfo(BaseModel):
     prompt_tokens: Optional[int] = None
     generated_tokens: Optional[int] = None
 
-    def to_text(self,):
+    def to_text(
+        self,
+    ):
         return self.json()
+
+
+class ShutdownResponse(BaseModel):
+    message: str = Field(
+        default="Shutting down",
+        description="Message indicating the server is shutting down.",
+    )
+
+
+class SuccessUpsertResponse(BaseModel):
+    message: str = Field(
+        default="Success",
+        description="Message indicating the upsert was successful.",
+    )
+
+
+class SuccessDeleteResponse(BaseModel):
+    message: str = Field(
+        default="Success",
+        description="Message indicating the delete was successful.",
+    )
diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index bf34cb83..97b70881 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -22,19 +22,39 @@
 import uvicorn
 from typing import cast
 
-from canopy.models.api_models import StreamingChatResponse, ChatResponse
-from canopy.models.data_models import Context, UserMessage
-from .api_models import \
-     ChatRequest, ContextQueryRequest, \
-     ContextUpsertRequest, HealthStatus, ContextDeleteRequest
+from canopy.models.api_models import (
+    StreamingChatResponse,
+    ChatResponse,
+)
+from canopy.models.data_models import Context, UserMessage, ContextContentResponse
+from .api_models import (
+    ChatRequest,
+    ContextQueryRequest,
+    ContextUpsertRequest,
+    HealthStatus,
+    ContextDeleteRequest,
+    ShutdownResponse,
+    SuccessUpsertResponse,
+    SuccessDeleteResponse,
+)
 
 from canopy.llm.openai import OpenAILLM
 from canopy_cli.errors import ConfigError
+from canopy_server import description
+from canopy import __version__
 
 load_dotenv()  # load env vars before import of openai
 openai.api_key = os.getenv("OPENAI_API_KEY")
 
-app = FastAPI()
+app = FastAPI(
+    title="Canopy API",
+    description=description,
+    version=__version__,
+    license_info={
+        "name": "Apache 2.0",
+        "url": "https://www.apache.org/licenses/LICENSE-2.0.html",
+    },
+)
 
 context_engine: ContextEngine
 chat_engine: ChatEngine
@@ -45,19 +65,29 @@
 
 @app.post(
     "/context/chat/completions",
+    response_model=ChatResponse,
+    responses={500: {"description": "Failed to chat with Canopy"}},  # noqa: E501
 )
 async def chat(
     request: ChatRequest = Body(...),
-):
+) -> ChatResponse:
+    """
+    Chat with Canopy, using the LLM and context engine, and return a response.
+
+    The request schema is following OpenAI's chat completion API schema, but removes the need to configure
+    anything, other than the messages field: for more imformation see: https://platform.openai.com/docs/api-reference/chat/create
+
+    """  # noqa: E501
     try:
         session_id = request.user or "None"  # noqa: F841
         question_id = str(uuid.uuid4())
         logger.debug(f"Received chat request: {request.messages[-1].content}")
-        answer = await run_in_threadpool(chat_engine.chat,
-                                         messages=request.messages,
-                                         stream=request.stream)
+        answer = await run_in_threadpool(
+            chat_engine.chat, messages=request.messages, stream=request.stream
+        )
 
         if request.stream:
+
             def stringify_content(response: StreamingChatResponse):
                 for chunk in response.chunks:
                     chunk.id = question_id
@@ -65,7 +95,7 @@ def stringify_content(response: StreamingChatResponse):
                     yield data
 
             content_stream = stringify_content(cast(StreamingChatResponse, answer))
-            return EventSourceResponse(content_stream, media_type='text/event-stream')
+            return EventSourceResponse(content_stream, media_type="text/event-stream")
 
         else:
             chat_response = cast(ChatResponse, answer)
@@ -74,105 +104,134 @@ def stringify_content(response: StreamingChatResponse):
 
     except Exception as e:
         logger.exception(f"Chat with question_id {question_id} failed")
-        raise HTTPException(
-            status_code=500, detail=f"Internal Service Error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Internal Service Error: {str(e)}")
 
 
 @app.post(
     "/context/query",
+    response_model=ContextContentResponse,
+    responses={
+        500: {"description": "Failed to query the knowledgebase or Build the context"}
+    },
 )
 async def query(
     request: ContextQueryRequest = Body(...),
-):
+) -> ContextContentResponse:
+    """
+    Query the knowledgebase and return a context. Context is a collections of text snippets, each with a source.
+    Query enables tuning the context length (in tokens) such that you can cap the cost of the generation.
+    This method can be used with or without a LLM.
+    """  # noqa: E501
     try:
         context: Context = await run_in_threadpool(
             context_engine.query,
             queries=request.queries,
-            max_context_tokens=request.max_tokens)
+            max_context_tokens=request.max_tokens,
+        )
 
         return context.content
 
     except Exception as e:
         logger.exception(e)
-        raise HTTPException(
-            status_code=500, detail=f"Internal Service Error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Internal Service Error: {str(e)}")
 
 
 @app.post(
     "/context/upsert",
+    response_model=SuccessUpsertResponse,
+    responses={500: {"description": "Failed to upsert documents"}},
 )
 async def upsert(
     request: ContextUpsertRequest = Body(...),
-):
+) -> SuccessUpsertResponse:
+    """
+    Upsert documents into the knowledgebase. Upserting is a way to add new documents or update existing ones.
+    Each document has a unique ID. If a document with the same ID already exists, it will be updated.
+
+    This method will run the processing, chunking and endocing of the data in parallel, and then send the
+    encoded data to the Pinecone Index in batches.
+    """  # noqa: E501
     try:
         logger.info(f"Upserting {len(request.documents)} documents")
-        upsert_results = await run_in_threadpool(
-            kb.upsert,
-            documents=request.documents,
-            batch_size=request.batch_size)
+        await run_in_threadpool(
+            kb.upsert, documents=request.documents, batch_size=request.batch_size
+        )
 
-        return upsert_results
+        return SuccessUpsertResponse()
 
     except Exception as e:
         logger.exception(e)
-        raise HTTPException(
-            status_code=500, detail=f"Internal Service Error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Internal Service Error: {str(e)}")
 
 
 @app.post(
     "/context/delete",
+    response_model=SuccessDeleteResponse,
+    responses={500: {"description": "Failed to delete documents"}},
 )
 async def delete(
     request: ContextDeleteRequest = Body(...),
-):
+) -> SuccessDeleteResponse:
+    """
+    Delete documents from the knowledgebase. Deleting documents is done by their unique ID.
+    """  # noqa: E501
     try:
         logger.info(f"Delete {len(request.document_ids)} documents")
-        await run_in_threadpool(
-            kb.delete,
-            document_ids=request.document_ids)
-        return {"message": "success"}
+        await run_in_threadpool(kb.delete, document_ids=request.document_ids)
+        return SuccessDeleteResponse()
 
     except Exception as e:
         logger.exception(e)
-        raise HTTPException(
-            status_code=500, detail=f"Internal Service Error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Internal Service Error: {str(e)}")
 
 
 @app.get(
     "/health",
+    response_model=HealthStatus,
+    responses={500: {"description": "Failed to connect to Pinecone or LLM"}},
 )
-async def health_check():
+@app.exception_handler(Exception)
+async def health_check() -> HealthStatus:
+    """
+    Health check for the Canopy server. This endpoint checks the connection to Pinecone and the LLM.
+    """  # noqa: E501
     try:
         await run_in_threadpool(kb.verify_index_connection)
     except Exception as e:
         err_msg = f"Failed connecting to Pinecone Index {kb._index_name}"
         logger.exception(err_msg)
         raise HTTPException(
-            status_code=500, detail=f"{err_msg}. Error: {str(e)}") from e
+            status_code=500, detail=f"{err_msg}. Error: {str(e)}"
+        ) from e
 
     try:
         msg = UserMessage(content="This is a health check. Are you alive? Be concise")
-        await run_in_threadpool(llm.chat_completion,
-                                messages=[msg],
-                                max_tokens=50)
+        await run_in_threadpool(llm.chat_completion, messages=[msg], max_tokens=50)
     except Exception as e:
         err_msg = f"Failed to communicate with {llm.__class__.__name__}"
         logger.exception(err_msg)
         raise HTTPException(
-            status_code=500, detail=f"{err_msg}. Error: {str(e)}") from e
+            status_code=500, detail=f"{err_msg}. Error: {str(e)}"
+        ) from e
 
     return HealthStatus(pinecone_status="OK", llm_status="OK")
 
 
-@app.get(
-    "/shutdown"
-)
-async def shutdown():
+@app.get("/shutdown")
+async def shutdown() -> ShutdownResponse:
+    """
+    __WARNING__: Experimental method.
+
+
+    This method will shutdown the server. It is used for testing purposes, and not recommended to be used
+    in production.
+    This method will locate the parent process and send a SIGINT signal to it.
+    """  # noqa: E501
     logger.info("Shutting down")
     proc = current_process()
     pid = proc._parent_pid if "SpawnProcess" in proc.name else proc.pid
     os.kill(pid, signal.SIGINT)
-    return {"message": "Shutting down"}
+    return ShutdownResponse()
 
 
 @app.on_event("startup")
@@ -190,11 +249,11 @@ def _init_logging():
     stdout_handler = logging.StreamHandler(stream=sys.stdout)
     handlers = [file_handler, stdout_handler]
     logging.basicConfig(
-        format='%(asctime)s - %(processName)s - %(name)-10s [%(levelname)-8s]:  '
-               '%(message)s',
+        format="%(asctime)s - %(processName)s - %(name)-10s [%(levelname)-8s]:  "
+        "%(message)s",
         level=os.getenv("CE_LOG_LEVEL", "INFO").upper(),
         handlers=handlers,
-        force=True
+        force=True,
     )
     logger = logging.getLogger(__name__)
 
@@ -211,8 +270,10 @@ def _init_engines():
         _load_config(config_file)
 
     else:
-        logger.info("Did not find config file. Initializing engines with default "
-                    "configuration")
+        logger.info(
+            "Did not find config file. Initializing engines with default "
+            "configuration"
+        )
         Tokenizer.initialize()
         kb = KnowledgeBase(index_name=index_name)
         context_engine = ContextEngine(knowledge_base=kb)
@@ -230,9 +291,7 @@ def _load_config(config_file):
             config = yaml.safe_load(f)
     except Exception as e:
         logger.exception(f"Failed to load config file {config_file}")
-        raise ConfigError(
-            f"Failed to load config file {config_file}. Error: {str(e)}"
-        )
+        raise ConfigError(f"Failed to load config file {config_file}. Error: {str(e)}")
     tokenizer_config = config.get("tokenizer", {})
     Tokenizer.initialize_from_config(tokenizer_config)
     if "chat_engine" not in config:

From bdea5a60a07b8adc9abc5ab780ee94e4623e26d7 Mon Sep 17 00:00:00 2001
From: Roy Miara <miararoy@gmail.com>
Date: Thu, 2 Nov 2023 10:39:13 +0200
Subject: [PATCH 02/31] fix response type in Chat

---
 src/canopy_cli/cli.py    | 2 +-
 src/canopy_server/app.py | 9 ++++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py
index d0b5c827..bc8d005e 100644
--- a/src/canopy_cli/cli.py
+++ b/src/canopy_cli/cli.py
@@ -559,7 +559,7 @@ def stop(url):
         """
     )
 )
-def docs():
+def api_docs():
     import json
     from canopy_cli import HTML_TEMPLATE
     from canopy_server.app import app
diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index 97b70881..7e7a9651 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -20,7 +20,7 @@
 
 from fastapi import FastAPI, HTTPException, Body
 import uvicorn
-from typing import cast
+from typing import cast, Union
 
 from canopy.models.api_models import (
     StreamingChatResponse,
@@ -43,6 +43,9 @@
 from canopy_server import description
 from canopy import __version__
 
+
+APIChatResponse = Union[ChatResponse, EventSourceResponse]
+
 load_dotenv()  # load env vars before import of openai
 openai.api_key = os.getenv("OPENAI_API_KEY")
 
@@ -65,12 +68,12 @@
 
 @app.post(
     "/context/chat/completions",
-    response_model=ChatResponse,
+    response_model=APIChatResponse,
     responses={500: {"description": "Failed to chat with Canopy"}},  # noqa: E501
 )
 async def chat(
     request: ChatRequest = Body(...),
-) -> ChatResponse:
+) -> APIChatResponse:
     """
     Chat with Canopy, using the LLM and context engine, and return a response.
 

From 001bac82c246528875d016795fa024cf5bcc3738 Mon Sep 17 00:00:00 2001
From: Roy Miara <miararoy@gmail.com>
Date: Thu, 2 Nov 2023 10:59:51 +0200
Subject: [PATCH 03/31] ignore line mypy

---
 src/canopy_server/app.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index 7e7a9651..6c8b9562 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -232,7 +232,8 @@ async def shutdown() -> ShutdownResponse:
     """  # noqa: E501
     logger.info("Shutting down")
     proc = current_process()
-    pid = proc._parent_pid if "SpawnProcess" in proc.name else proc.pid
+    # TODO: fix this, BaseProcess does have _parent_pid but it is not in the stubs
+    pid = proc._parent_pid if "SpawnProcess" in proc.name else proc.pid  # type: ignore
     os.kill(pid, signal.SIGINT)
     return ShutdownResponse()
 

From 78a0a3b339d882955b7e67b841b617885d1a5b0a Mon Sep 17 00:00:00 2001
From: Roy Miara <miararoy@gmail.com>
Date: Thu, 2 Nov 2023 12:45:18 +0200
Subject: [PATCH 04/31] remove soon dep. flag

---
 src/canopy/models/data_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py
index 05a941e5..eab29a3f 100644
--- a/src/canopy/models/data_models.py
+++ b/src/canopy/models/data_models.py
@@ -22,7 +22,7 @@ class Query(BaseModel):
     )
     top_k: Optional[int] = Field(
         default=None,
-        description="[soon deprecated] The number of results to return."
+        description="The number of results to return."
     )
     query_params: dict = Field(
         default_factory=dict,

From 2ad53851a0c1e8578554287bf39b9829f32903ba Mon Sep 17 00:00:00 2001
From: Roy Miara <miararoy@gmail.com>
Date: Thu, 2 Nov 2023 12:50:35 +0200
Subject: [PATCH 05/31] Add 'ignored' for model param

---
 src/canopy_server/api_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/canopy_server/api_models.py b/src/canopy_server/api_models.py
index c80d542f..cb6c6049 100644
--- a/src/canopy_server/api_models.py
+++ b/src/canopy_server/api_models.py
@@ -8,7 +8,7 @@
 class ChatRequest(BaseModel):
     model: str = Field(
         default="",
-        description="ID of the model to use. If empty, the default model will be used.",  # noqa: E501
+        description="ID of the model to use. Currecntly this field is ignored and this should be configured on Canopy config.",  # noqa: E501
     )
     messages: Messages = Field(
         description="A list of messages comprising the conversation so far."

From 575301cf6abc6bc36557d76e9741a63ed209517f Mon Sep 17 00:00:00 2001
From: Roy Miara <miararoy@gmail.com>
Date: Thu, 2 Nov 2023 12:51:27 +0200
Subject: [PATCH 06/31] stream descp

---
 src/canopy_server/api_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/canopy_server/api_models.py b/src/canopy_server/api_models.py
index cb6c6049..82c1299b 100644
--- a/src/canopy_server/api_models.py
+++ b/src/canopy_server/api_models.py
@@ -15,7 +15,7 @@ class ChatRequest(BaseModel):
     )
     stream: bool = Field(
         default=False,
-        description="""If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message.""",  # noqa: E501
+        description="""Whether or not to stream the chatbot's response. If set, the response will be server-sent events containing [chat.completion.chunk](https://platform.openai.com/docs/api-reference/chat/streaming) objects""",  # noqa: E501
     )
     user: Optional[str] = Field(
         default=None,

From 257130c0129ccb3670c4ac5073b938d7bf573380 Mon Sep 17 00:00:00 2001
From: Roy Miara <miararoy@gmail.com>
Date: Thu, 2 Nov 2023 12:52:09 +0200
Subject: [PATCH 07/31] user descp

---
 src/canopy_server/api_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/canopy_server/api_models.py b/src/canopy_server/api_models.py
index 82c1299b..868fd4e3 100644
--- a/src/canopy_server/api_models.py
+++ b/src/canopy_server/api_models.py
@@ -19,7 +19,7 @@ class ChatRequest(BaseModel):
     )
     user: Optional[str] = Field(
         default=None,
-        description="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.",  # noqa: E501
+        description="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. Unused, reserved for future extensions",  # noqa: E501
     )
 
 

From 553661cc292ed9ac30a8cc8a511ae5c249849735 Mon Sep 17 00:00:00 2001
From: Roy Miara <miararoy@gmail.com>
Date: Thu, 2 Nov 2023 12:52:57 +0200
Subject: [PATCH 08/31] batch size

---
 src/canopy_server/api_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/canopy_server/api_models.py b/src/canopy_server/api_models.py
index 868fd4e3..b6406a6b 100644
--- a/src/canopy_server/api_models.py
+++ b/src/canopy_server/api_models.py
@@ -31,7 +31,7 @@ class ContextQueryRequest(BaseModel):
 class ContextUpsertRequest(BaseModel):
     documents: List[Document]
     batch_size: int = Field(
-        default=200, description="Batch size for upserting documents to Pinecone."
+        default=200, description="The batch size to use when uploading documents chunks to the Pinecone Index."  # noqa: E501
     )
 
 

From d4a725ea77dbc98d2b059148faa9887ed3f57f35 Mon Sep 17 00:00:00 2001
From: Roy Miara <miararoy@gmail.com>
Date: Thu, 2 Nov 2023 13:00:11 +0200
Subject: [PATCH 09/31] app.py

---
 src/canopy_server/app.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index 6c8b9562..9c375fa5 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -77,8 +77,8 @@ async def chat(
     """
     Chat with Canopy, using the LLM and context engine, and return a response.
 
-    The request schema is following OpenAI's chat completion API schema, but removes the need to configure
-    anything, other than the messages field: for more imformation see: https://platform.openai.com/docs/api-reference/chat/create
+    The request schema is following OpenAI's chat completion API schema: https://platform.openai.com/docs/api-reference/chat/create.  
+    Note that all fields other than `messages` and `stream` are currently ignored. The Canopy server uses the model parameters defined in the `ChatEngine` config for all underlying LLM calls.
 
     """  # noqa: E501
     try:
@@ -121,9 +121,10 @@ async def query(
     request: ContextQueryRequest = Body(...),
 ) -> ContextContentResponse:
     """
-    Query the knowledgebase and return a context. Context is a collections of text snippets, each with a source.
-    Query enables tuning the context length (in tokens) such that you can cap the cost of the generation.
-    This method can be used with or without a LLM.
+    Query the knowledge base for relevant context.  
+    The returned text might be structured or unstructured, depending on the ContextEngine's configuration.
+    Query allows limiting the context length (in tokens), to control LLM costs.
+    This method does not pass through the LLM and uses only retieval and construction from Pinecone DB.
     """  # noqa: E501
     try:
         context: Context = await run_in_threadpool(
@@ -151,8 +152,7 @@ async def upsert(
     Upsert documents into the knowledgebase. Upserting is a way to add new documents or update existing ones.
     Each document has a unique ID. If a document with the same ID already exists, it will be updated.
 
-    This method will run the processing, chunking and endocing of the data in parallel, and then send the
-    encoded data to the Pinecone Index in batches.
+    The documents will be chunked and encoded, then the resulting encoded chunks will be sent to the Pinecone index in batches
     """  # noqa: E501
     try:
         logger.info(f"Upserting {len(request.documents)} documents")

From c7a014cc3285bb485ed62ac330b4c269b93670a7 Mon Sep 17 00:00:00 2001
From: Roy Miara <miararoy@gmail.com>
Date: Thu, 2 Nov 2023 13:11:19 +0200
Subject: [PATCH 10/31] fix trailing whitespace

---
 src/canopy_server/app.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index 9c375fa5..406fbefc 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -77,7 +77,7 @@ async def chat(
     """
     Chat with Canopy, using the LLM and context engine, and return a response.
 
-    The request schema is following OpenAI's chat completion API schema: https://platform.openai.com/docs/api-reference/chat/create.  
+    The request schema is following OpenAI's chat completion API schema: https://platform.openai.com/docs/api-reference/chat/create.
     Note that all fields other than `messages` and `stream` are currently ignored. The Canopy server uses the model parameters defined in the `ChatEngine` config for all underlying LLM calls.
 
     """  # noqa: E501
@@ -121,7 +121,7 @@ async def query(
     request: ContextQueryRequest = Body(...),
 ) -> ContextContentResponse:
     """
-    Query the knowledge base for relevant context.  
+    Query the knowledge base for relevant context.
     The returned text might be structured or unstructured, depending on the ContextEngine's configuration.
     Query allows limiting the context length (in tokens), to control LLM costs.
     This method does not pass through the LLM and uses only retieval and construction from Pinecone DB.

From 1d9d566050dccf62b8cc2a4ab6f2794909059486 Mon Sep 17 00:00:00 2001
From: Roy Miara <miararoy@gmail.com>
Date: Thu, 2 Nov 2023 22:23:26 +0200
Subject: [PATCH 11/31] fix

---
 src/canopy_server/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index 406fbefc..9e016d35 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -68,7 +68,7 @@
 
 @app.post(
     "/context/chat/completions",
-    response_model=APIChatResponse,
+    response_model=None,
     responses={500: {"description": "Failed to chat with Canopy"}},  # noqa: E501
 )
 async def chat(

From acc42c3aea0ea9d8c26608a18d1c2bc6e88570da Mon Sep 17 00:00:00 2001
From: miararoy <miararoy@gmail.com>
Date: Thu, 2 Nov 2023 22:57:03 +0200
Subject: [PATCH 12/31] Apply suggestions from code review

Co-authored-by: byronnlandry <104170519+byronnlandry@users.noreply.github.com>
---
 src/canopy_server/api_models.py | 2 +-
 src/canopy_server/app.py        | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/canopy_server/api_models.py b/src/canopy_server/api_models.py
index b6406a6b..530a2cb1 100644
--- a/src/canopy_server/api_models.py
+++ b/src/canopy_server/api_models.py
@@ -36,7 +36,7 @@ class ContextUpsertRequest(BaseModel):
 
 
 class ContextDeleteRequest(BaseModel):
-    document_ids: List[str] = Field(description="List of document ids to delete.")
+    document_ids: List[str] = Field(description="List of document IDs to delete.")
 
 
 class HealthStatus(BaseModel):
diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index 9e016d35..1870cef6 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -114,7 +114,7 @@ def stringify_content(response: StreamingChatResponse):
     "/context/query",
     response_model=ContextContentResponse,
     responses={
-        500: {"description": "Failed to query the knowledgebase or Build the context"}
+        500: {"description": "Failed to query the knowledge base or build the context"}
     },
 )
 async def query(
@@ -149,8 +149,8 @@ async def upsert(
     request: ContextUpsertRequest = Body(...),
 ) -> SuccessUpsertResponse:
     """
-    Upsert documents into the knowledgebase. Upserting is a way to add new documents or update existing ones.
-    Each document has a unique ID. If a document with the same ID already exists, it will be updated.
+    Upsert documents into the knowledge base. Upserting is a way to add new documents or update existing ones.
+    Each document has a unique ID. If a document with the same ID already exists, it is updated.
 
     The documents will be chunked and encoded, then the resulting encoded chunks will be sent to the Pinecone index in batches
     """  # noqa: E501

From 8227d848033c6bc40a059d6f85aecf9fa694ce6b Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Sun, 5 Nov 2023 10:55:36 +0200
Subject: [PATCH 13/31] [app] Don't use private attribute

Instead of using process._parent_id which is not garuranteed, use `os.getppid()`
---
 src/canopy_server/app.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index 1870cef6..d961084a 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -232,8 +232,7 @@ async def shutdown() -> ShutdownResponse:
     """  # noqa: E501
     logger.info("Shutting down")
     proc = current_process()
-    # TODO: fix this, BaseProcess does have _parent_pid but it is not in the stubs
-    pid = proc._parent_pid if "SpawnProcess" in proc.name else proc.pid  # type: ignore
+    pid = os.getppid() if "SpawnProcess" in proc.name else proc.pid
     os.kill(pid, signal.SIGINT)
     return ShutdownResponse()
 

From 6f4b0a13d39a066adf3116620e08c817ebb06cad Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Sun, 5 Nov 2023 11:00:14 +0200
Subject: [PATCH 14/31] [app] Parent process - use an even better solution

This is the official method by the mp module, which should work across all OSes (hopefully..)
---
 src/canopy_server/app.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index d961084a..5c90f573 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -5,7 +5,7 @@
 import uuid
 
 import openai
-from multiprocessing import current_process
+from multiprocessing import current_process, parent_process
 
 import yaml
 from dotenv import load_dotenv
@@ -232,7 +232,7 @@ async def shutdown() -> ShutdownResponse:
     """  # noqa: E501
     logger.info("Shutting down")
     proc = current_process()
-    pid = os.getppid() if "SpawnProcess" in proc.name else proc.pid
+    pid = parent_process().pid if parent_process() else proc.pid
     os.kill(pid, signal.SIGINT)
     return ShutdownResponse()
 

From 9db8c0f3ccf7dc641778da144958c375bc2ddd7a Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Sun, 5 Nov 2023 11:50:30 +0200
Subject: [PATCH 15/31] [app] Properly handle None case

It shouldn't theotically happen, but who knows...
---
 src/canopy_server/app.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index 5c90f573..ca109a74 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -26,7 +26,7 @@
     StreamingChatResponse,
     ChatResponse,
 )
-from canopy.models.data_models import Context, UserMessage, ContextContentResponse
+from canopy.models.data_models import Context, UserMessage, ContextContents
 from .api_models import (
     ChatRequest,
     ContextQueryRequest,
@@ -112,14 +112,14 @@ def stringify_content(response: StreamingChatResponse):
 
 @app.post(
     "/context/query",
-    response_model=ContextContentResponse,
+    response_model=ContextContents,
     responses={
         500: {"description": "Failed to query the knowledge base or build the context"}
     },
 )
 async def query(
     request: ContextQueryRequest = Body(...),
-) -> ContextContentResponse:
+) -> ContextContents:
     """
     Query the knowledge base for relevant context.
     The returned text might be structured or unstructured, depending on the ContextEngine's configuration.
@@ -232,7 +232,13 @@ async def shutdown() -> ShutdownResponse:
     """  # noqa: E501
     logger.info("Shutting down")
     proc = current_process()
-    pid = parent_process().pid if parent_process() else proc.pid
+    p_process = parent_process()
+    pid = p_process.pid if p_process is not None else proc.pid
+    if not pid:
+        raise HTTPException(
+            status_code=500,
+            detail="Failed to locate parent process. Cannot shutdown server.",
+        )
     os.kill(pid, signal.SIGINT)
     return ShutdownResponse()
 

From 8f019b6a4d324b7c7191a1a540c37cc5ec4d5789 Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Sun, 5 Nov 2023 11:56:27 +0200
Subject: [PATCH 16/31] [models] Rename ContextContentResponse

This was a horrible name that doesn't represent the true meaning of this class
---
 src/canopy/context_engine/models.py              |  4 ++--
 src/canopy/models/data_models.py                 | 15 ++++++++-------
 src/canopy_server/app.py                         |  6 +++---
 tests/unit/context_engine/test_context_engine.py | 10 +++++-----
 4 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/src/canopy/context_engine/models.py b/src/canopy/context_engine/models.py
index bb42946f..5f273425 100644
--- a/src/canopy/context_engine/models.py
+++ b/src/canopy/context_engine/models.py
@@ -2,7 +2,7 @@
 
 from pydantic import BaseModel
 
-from canopy.models.data_models import ContextContent
+from canopy.models.data_models import _ContextContent
 
 
 class ContextSnippet(BaseModel):
@@ -10,7 +10,7 @@ class ContextSnippet(BaseModel):
     text: str
 
 
-class ContextQueryResult(ContextContent):
+class ContextQueryResult(_ContextContent):
     query: str
     snippets: List[ContextSnippet]
 
diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py
index eab29a3f..639100f6 100644
--- a/src/canopy/models/data_models.py
+++ b/src/canopy/models/data_models.py
@@ -56,7 +56,7 @@ def metadata_reseved_fields(cls, v):
         return v
 
 
-class ContextContent(BaseModel, ABC):
+class _ContextContent(BaseModel, ABC):
     # Any context should be able to be represented as well formatted text.
     # In the most minimal case, that could simply be a call to `.json()`.
     @abstractmethod
@@ -64,20 +64,21 @@ def to_text(self, **kwargs) -> str:
         pass
 
 
+ContextContent = Union[_ContextContent, Sequence[_ContextContent]]
+
+
 class Context(BaseModel):
-    content: Union[ContextContent, Sequence[ContextContent]]
+    content: ContextContent
     num_tokens: int = Field(exclude=True)
     debug_info: dict = Field(default_factory=dict, exclude=True)
 
     def to_text(self, **kwargs) -> str:
-        if isinstance(self.content, ContextContent):
-            return self.content.to_text(**kwargs)
-        else:
+        if isinstance(self.content, Sequence):
             return "\n".join([c.to_text(**kwargs) for c in self.content])
+        else:
+            return self.content.to_text(**kwargs)
 
 
-ContextContentResponse = Union[ContextContent, Sequence[ContextContent]]
-
 # --------------------- LLM models ------------------------
 
 
diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index ca109a74..f9957952 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -26,7 +26,7 @@
     StreamingChatResponse,
     ChatResponse,
 )
-from canopy.models.data_models import Context, UserMessage, ContextContents
+from canopy.models.data_models import Context, UserMessage, ContextContent
 from .api_models import (
     ChatRequest,
     ContextQueryRequest,
@@ -112,14 +112,14 @@ def stringify_content(response: StreamingChatResponse):
 
 @app.post(
     "/context/query",
-    response_model=ContextContents,
+    response_model=ContextContent,
     responses={
         500: {"description": "Failed to query the knowledge base or build the context"}
     },
 )
 async def query(
     request: ContextQueryRequest = Body(...),
-) -> ContextContents:
+) -> ContextContent:
     """
     Query the knowledge base for relevant context.
     The returned text might be structured or unstructured, depending on the ContextEngine's configuration.
diff --git a/tests/unit/context_engine/test_context_engine.py b/tests/unit/context_engine/test_context_engine.py
index 6eeefedb..a102db36 100644
--- a/tests/unit/context_engine/test_context_engine.py
+++ b/tests/unit/context_engine/test_context_engine.py
@@ -8,7 +8,7 @@
 from canopy.context_engine.models import ContextQueryResult, ContextSnippet
 from canopy.knowledge_base.base import BaseKnowledgeBase
 from canopy.knowledge_base.models import QueryResult, DocumentWithScore
-from canopy.models.data_models import Query, Context, ContextContent
+from canopy.models.data_models import Query, Context, _ContextContent
 
 
 class TestContextEngine:
@@ -68,7 +68,7 @@ def test_query(context_engine,
         queries = [Query(text="How does photosynthesis work?")]
         max_context_tokens = 100
 
-        mock_context_content = create_autospec(ContextContent)
+        mock_context_content = create_autospec(_ContextContent)
         mock_context_content.to_text.return_value = sample_context_text
         mock_context = Context(content=mock_context_content, num_tokens=21)
 
@@ -93,7 +93,7 @@ def test_query_with_metadata_filter(context_engine,
         queries = [Query(text="How does photosynthesis work?")]
         max_context_tokens = 100
 
-        mock_context_content = create_autospec(ContextContent)
+        mock_context_content = create_autospec(_ContextContent)
         mock_context_content.to_text.return_value = sample_context_text
         mock_context = Context(content=mock_context_content, num_tokens=21)
 
@@ -149,7 +149,7 @@ def test_multiple_queries(context_engine,
         mock_knowledge_base.query.return_value = extended_mock_query_result
 
         combined_text = sample_context_text + "\n" + text
-        mock_context_content = create_autospec(ContextContent)
+        mock_context_content = create_autospec(_ContextContent)
         mock_context_content.to_text.return_value = combined_text
         mock_context = Context(content=mock_context_content, num_tokens=40)
 
@@ -168,7 +168,7 @@ def test_empty_query_results(context_engine,
 
         mock_knowledge_base.query.return_value = []
 
-        mock_context_content = create_autospec(ContextContent)
+        mock_context_content = create_autospec(_ContextContent)
         mock_context_content.to_text.return_value = ""
         mock_context = Context(content=mock_context_content, num_tokens=0)
 

From 08a7f2343da539bcf149fc8f4c5fe5387c833924 Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Sun, 5 Nov 2023 12:12:00 +0200
Subject: [PATCH 17/31] [app] Fix description

Per Nathan and Byron's feedback
---
 src/canopy_server/__init__.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/canopy_server/__init__.py b/src/canopy_server/__init__.py
index 85a89cf2..208f7cfc 100644
--- a/src/canopy_server/__init__.py
+++ b/src/canopy_server/__init__.py
@@ -6,8 +6,9 @@
 ## Prerequisites
 
 ### Pinecone API key
-To get Pinecone free trial API key and environment register or log into your Pinecone account in the console (https://app.pinecone.io/). You can access your API key from the "API Keys" section in the sidebar of your dashboard, and find the environment name next to it.
+If you don't have a Pinecone account, you can sign up for a free Starter plan at https://www.pinecone.io/.
+To find your Pinecone API key and environment log into Pinecone console (https://app.pinecone.io/). You can access your API key from the "API Keys" section in the sidebar of your dashboard, and find the environment name next to it.
 
 ### OpenAI API key
-You can find your free trial OpenAI API key https://platform.openai.com/account/api-keys. You might need to login or register to OpenAI services.
+You can find your free trial OpenAI API key https://platform.openai.com/account/api-keys. You might need to log in or register for OpenAI services.
 """  # noqa: E501

From 3d166000538846269412f94b3967861926fd0505 Mon Sep 17 00:00:00 2001
From: igiloh-pinecone <118673156+igiloh-pinecone@users.noreply.github.com>
Date: Sun, 5 Nov 2023 12:19:53 +0200
Subject: [PATCH 18/31] Apply Byron's and Nathan's suggestions from code review

Co-authored-by: byronnlandry <104170519+byronnlandry@users.noreply.github.com>
---
 src/canopy/models/data_models.py |  8 ++++----
 src/canopy_server/__init__.py    |  2 +-
 src/canopy_server/api_models.py  |  4 ++--
 src/canopy_server/app.py         | 10 +++++-----
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py
index 639100f6..44f1ee92 100644
--- a/src/canopy/models/data_models.py
+++ b/src/canopy/models/data_models.py
@@ -14,11 +14,11 @@ class Query(BaseModel):
     text: str = Field(description="The query text.")
     namespace: str = Field(
         default="",
-        description="The namespace of the query, to learn more about namespaces, see https://docs.pinecone.io/docs/namespaces",  # noqa: E501
+        description="The namespace of the query. To learn more about namespaces, see https://docs.pinecone.io/docs/namespaces",  # noqa: E501
     )
     metadata_filter: Optional[dict] = Field(
         default=None,
-        description="A pinecone metadata filter, to learn more about metadata filters, see https://docs.pinecone.io/docs/metadata-filtering",  # noqa: E501
+        description="A Pinecone metadata filter, to learn more about metadata filters, see https://docs.pinecone.io/docs/metadata-filtering",  # noqa: E501
     )
     top_k: Optional[int] = Field(
         default=None,
@@ -39,7 +39,7 @@ class Document(BaseModel):
     )
     metadata: Metadata = Field(
         default_factory=dict,
-        description="The document metadata, to learn more about metadata, see https://docs.pinecone.io/docs/manage-data",  # noqa: E501
+        description="The document metadata. To learn more about metadata, see https://docs.pinecone.io/docs/manage-data",  # noqa: E501
     )
 
     class Config:
@@ -89,7 +89,7 @@ class Role(Enum):
 
 
 class MessageBase(BaseModel):
-    role: Role = Field(description="The role of the messages author.")
+    role: Role = Field(description="The role of the message's author. Can be one of ['User', 'Assistant', 'System']")
     content: str = Field(description="The contents of the message.")
 
     def dict(self, *args, **kwargs):
diff --git a/src/canopy_server/__init__.py b/src/canopy_server/__init__.py
index 208f7cfc..9ca26cbd 100644
--- a/src/canopy_server/__init__.py
+++ b/src/canopy_server/__init__.py
@@ -1,7 +1,7 @@
 description = """
 Canopy is an open-source Retrieval Augmented Generation (RAG) framework and context engine built on top of the Pinecone vector database. Canopy enables you to quickly and easily experiment with and build applications using RAG. Start chatting with your documents or text data with a few simple commands.
 
-Canopy provides a configurable built-in server so you can effortlessly deploy a RAG-powered chat application to your existing chat UI or interface. Or you can build your own, custom RAG application using the Canopy lirbary.
+Canopy provides a configurable built-in server, so you can effortlessly deploy a RAG-powered chat application to your existing chat UI or interface. Or you can build your own custom RAG application using the Canopy library.
 
 ## Prerequisites
 
diff --git a/src/canopy_server/api_models.py b/src/canopy_server/api_models.py
index 530a2cb1..4541290e 100644
--- a/src/canopy_server/api_models.py
+++ b/src/canopy_server/api_models.py
@@ -8,14 +8,14 @@
 class ChatRequest(BaseModel):
     model: str = Field(
         default="",
-        description="ID of the model to use. Currecntly this field is ignored and this should be configured on Canopy config.",  # noqa: E501
+        description="The ID of the model to use. This field is ignored; instead, configure this field in the Canopy config.",  # noqa: E501
     )
     messages: Messages = Field(
         description="A list of messages comprising the conversation so far."
     )
     stream: bool = Field(
         default=False,
-        description="""Whether or not to stream the chatbot's response. If set, the response will be server-sent events containing [chat.completion.chunk](https://platform.openai.com/docs/api-reference/chat/streaming) objects""",  # noqa: E501
+        description="""Whether or not to stream the chatbot's response. If set, the response is server-sent events containing [chat.completion.chunk](https://platform.openai.com/docs/api-reference/chat/streaming) objects""",  # noqa: E501
     )
     user: Optional[str] = Field(
         default=None,
diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index f9957952..26367591 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -77,7 +77,7 @@ async def chat(
     """
     Chat with Canopy, using the LLM and context engine, and return a response.
 
-    The request schema is following OpenAI's chat completion API schema: https://platform.openai.com/docs/api-reference/chat/create.
+    The request schema follows OpenAI's chat completion API schema: https://platform.openai.com/docs/api-reference/chat/create.
     Note that all fields other than `messages` and `stream` are currently ignored. The Canopy server uses the model parameters defined in the `ChatEngine` config for all underlying LLM calls.
 
     """  # noqa: E501
@@ -122,9 +122,9 @@ async def query(
 ) -> ContextContent:
     """
     Query the knowledge base for relevant context.
-    The returned text might be structured or unstructured, depending on the ContextEngine's configuration.
-    Query allows limiting the context length (in tokens), to control LLM costs.
-    This method does not pass through the LLM and uses only retieval and construction from Pinecone DB.
+    The returned text may be structured or unstructured, depending on the Canopy configuration.
+    Query allows limiting the context length in tokens to control LLM costs.
+    This method does not pass through the LLM and uses only retrieval and construction from Pinecone DB.
     """  # noqa: E501
     try:
         context: Context = await run_in_threadpool(
@@ -152,7 +152,7 @@ async def upsert(
     Upsert documents into the knowledge base. Upserting is a way to add new documents or update existing ones.
     Each document has a unique ID. If a document with the same ID already exists, it is updated.
 
-    The documents will be chunked and encoded, then the resulting encoded chunks will be sent to the Pinecone index in batches
+    The documents are chunked and encoded, then the resulting encoded chunks are sent to the Pinecone index in batches.
     """  # noqa: E501
     try:
         logger.info(f"Upserting {len(request.documents)} documents")

From 2bbba59d4c258b63372d49593663942b321a74ad Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Sun, 5 Nov 2023 12:35:49 +0200
Subject: [PATCH 19/31] [app] Made generating the docs optional

This way they are only generated if the user want to generate them locally
---
 src/canopy_cli/cli.py         | 38 ++++++++++++++++++++++++-----------
 src/canopy_server/__init__.py | 13 ------------
 src/canopy_server/app.py      | 19 ++++++++++++++++--
 3 files changed, 43 insertions(+), 27 deletions(-)

diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py
index af1c6b1f..ecd691c7 100644
--- a/src/canopy_cli/cli.py
+++ b/src/canopy_cli/cli.py
@@ -577,20 +577,34 @@ def stop(url):
         """
     )
 )
-def api_docs():
-    import json
-    from canopy_cli import HTML_TEMPLATE
-    from canopy_server.app import app
-    # generate docs
-
-    filename = "canopy-api-docs.html"
-
-    with open(filename, "w") as fd:
-        print(HTML_TEMPLATE % json.dumps(app.openapi()), file=fd)
-
+@click.option("--url", default="http://0.0.0.0:8000",
+              help="Canopy's service url. Defaults to http://0.0.0.0:8000")
+def api_docs(url):
     import webbrowser
 
-    webbrowser.open('file://' + os.path.realpath(filename))
+    generated_docs = False
+    try:
+        check_service_health(url)
+    except CLIError:
+        msg = (f"Canopy server is not running. Would you like to generate the docs "
+               f"to a local HTML file?")
+        click.confirm(click.style(msg, fg="red"), abort=True)
+        generated_docs = True
+
+    if generated_docs:
+        import json
+        from canopy_cli import HTML_TEMPLATE
+        from canopy_server.app import app
+        # generate docs
+
+        filename = "canopy-api-docs.html"
+        msg = f"Generating docs to {filename}"
+        click.echo(click.style(msg, fg="green"))
+        with open(filename, "w") as fd:
+            print(HTML_TEMPLATE % json.dumps(app.openapi()), file=fd)
+        webbrowser.open('file://' + os.path.realpath(filename))
+    else:
+        webbrowser.open('http://localhost:8000/redoc')
 
 
 if __name__ == "__main__":
diff --git a/src/canopy_server/__init__.py b/src/canopy_server/__init__.py
index 9ca26cbd..8b137891 100644
--- a/src/canopy_server/__init__.py
+++ b/src/canopy_server/__init__.py
@@ -1,14 +1 @@
-description = """
-Canopy is an open-source Retrieval Augmented Generation (RAG) framework and context engine built on top of the Pinecone vector database. Canopy enables you to quickly and easily experiment with and build applications using RAG. Start chatting with your documents or text data with a few simple commands.
 
-Canopy provides a configurable built-in server, so you can effortlessly deploy a RAG-powered chat application to your existing chat UI or interface. Or you can build your own custom RAG application using the Canopy library.
-
-## Prerequisites
-
-### Pinecone API key
-If you don't have a Pinecone account, you can sign up for a free Starter plan at https://www.pinecone.io/.
-To find your Pinecone API key and environment log into Pinecone console (https://app.pinecone.io/). You can access your API key from the "API Keys" section in the sidebar of your dashboard, and find the environment name next to it.
-
-### OpenAI API key
-You can find your free trial OpenAI API key https://platform.openai.com/account/api-keys. You might need to log in or register for OpenAI services.
-"""  # noqa: E501
diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index 26367591..3341c7bb 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -40,7 +40,6 @@
 
 from canopy.llm.openai import OpenAILLM
 from canopy_cli.errors import ConfigError
-from canopy_server import description
 from canopy import __version__
 
 
@@ -49,9 +48,25 @@
 load_dotenv()  # load env vars before import of openai
 openai.api_key = os.getenv("OPENAI_API_KEY")
 
+APP_DESCRIPTION = """
+Canopy is an open-source Retrieval Augmented Generation (RAG) framework and context engine built on top of the Pinecone vector database. Canopy enables you to quickly and easily experiment with and build applications using RAG. Start chatting with your documents or text data with a few simple commands.
+
+Canopy provides a configurable built-in server, so you can effortlessly deploy a RAG-powered chat application to your existing chat UI or interface. Or you can build your own custom RAG application using the Canopy library.
+
+## Prerequisites
+
+### Pinecone API key
+If you don't have a Pinecone account, you can sign up for a free Starter plan at https://www.pinecone.io/.
+To find your Pinecone API key and environment log into Pinecone console (https://app.pinecone.io/). You can access your API key from the "API Keys" section in the sidebar of your dashboard, and find the environment name next to it.
+
+### OpenAI API key
+You can find your free trial OpenAI API key https://platform.openai.com/account/api-keys. You might need to log in or register for OpenAI services.
+"""  # noqa: E501
+
+
 app = FastAPI(
     title="Canopy API",
-    description=description,
+    description=APP_DESCRIPTION,
     version=__version__,
     license_info={
         "name": "Apache 2.0",

From 4ac9ff9d218219e4dd8c9f605d53c3abd1a5d945 Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Sun, 5 Nov 2023 12:40:29 +0200
Subject: [PATCH 20/31] [app] Moved docs template to their own dedicated file

Much more readable
---
 src/canopy_cli/__init__.py            | 28 +--------------------------
 src/canopy_cli/cli.py                 |  2 +-
 src/canopy_server/_redocs_template.py | 27 ++++++++++++++++++++++++++
 3 files changed, 29 insertions(+), 28 deletions(-)
 create mode 100644 src/canopy_server/_redocs_template.py

diff --git a/src/canopy_cli/__init__.py b/src/canopy_cli/__init__.py
index a4791643..8b137891 100644
--- a/src/canopy_cli/__init__.py
+++ b/src/canopy_cli/__init__.py
@@ -1,27 +1 @@
-HTML_TEMPLATE = """<!DOCTYPE html>
-<html>
-<head>
-    <meta http-equiv="content-type" content="text/html; charset=UTF-8">
-    <title>Canopy API Spec</title>
-    <meta charset="utf-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-    <link rel="shortcut icon" href="https://polybit-apps.s3.amazonaws.com/stdlib/users/pinecone/profile/image.png">
-    <style>
-        body {
-            margin: 0;
-            padding: 0;
-        }
-    </style>
-    <style data-styled="" data-styled-version="4.4.1"></style>
-</head>
-<body>
-    <div id="redoc-container"></div>
-    <title>Redoc</title>
-    <script src="https://cdn.jsdelivr.net/npm/redoc/bundles/redoc.standalone.js"> </script>
-    <script>
-        var spec = %s;
-        Redoc.init(spec, {}, document.getElementById("redoc-container"));
-    </script>
-</body>
-</html>
-"""  # noqa: E501
+
diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py
index ecd691c7..5a181a5c 100644
--- a/src/canopy_cli/cli.py
+++ b/src/canopy_cli/cli.py
@@ -593,7 +593,7 @@ def api_docs(url):
 
     if generated_docs:
         import json
-        from canopy_cli import HTML_TEMPLATE
+        from canopy_server._redocs_template import HTML_TEMPLATE
         from canopy_server.app import app
         # generate docs
 
diff --git a/src/canopy_server/_redocs_template.py b/src/canopy_server/_redocs_template.py
new file mode 100644
index 00000000..a4791643
--- /dev/null
+++ b/src/canopy_server/_redocs_template.py
@@ -0,0 +1,27 @@
+HTML_TEMPLATE = """<!DOCTYPE html>
+<html>
+<head>
+    <meta http-equiv="content-type" content="text/html; charset=UTF-8">
+    <title>Canopy API Spec</title>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <link rel="shortcut icon" href="https://polybit-apps.s3.amazonaws.com/stdlib/users/pinecone/profile/image.png">
+    <style>
+        body {
+            margin: 0;
+            padding: 0;
+        }
+    </style>
+    <style data-styled="" data-styled-version="4.4.1"></style>
+</head>
+<body>
+    <div id="redoc-container"></div>
+    <title>Redoc</title>
+    <script src="https://cdn.jsdelivr.net/npm/redoc/bundles/redoc.standalone.js"> </script>
+    <script>
+        var spec = %s;
+        Redoc.init(spec, {}, document.getElementById("redoc-container"));
+    </script>
+</body>
+</html>
+"""  # noqa: E501

From 71c7fc6a8a1cc7d1e5e82658e9848168f222d13f Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Sun, 5 Nov 2023 12:42:38 +0200
Subject: [PATCH 21/31] fix linter issues

---
 src/canopy/models/data_models.py | 3 ++-
 src/canopy_cli/__init__.py       | 1 -
 src/canopy_cli/cli.py            | 4 ++--
 src/canopy_server/__init__.py    | 1 -
 4 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py
index 44f1ee92..3f57ccb6 100644
--- a/src/canopy/models/data_models.py
+++ b/src/canopy/models/data_models.py
@@ -89,7 +89,8 @@ class Role(Enum):
 
 
 class MessageBase(BaseModel):
-    role: Role = Field(description="The role of the message's author. Can be one of ['User', 'Assistant', 'System']")
+    role: Role = Field(description="The role of the message's author. "
+                                   "Can be one of ['User', 'Assistant', 'System']")
     content: str = Field(description="The contents of the message.")
 
     def dict(self, *args, **kwargs):
diff --git a/src/canopy_cli/__init__.py b/src/canopy_cli/__init__.py
index 8b137891..e69de29b 100644
--- a/src/canopy_cli/__init__.py
+++ b/src/canopy_cli/__init__.py
@@ -1 +0,0 @@
-
diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py
index 5a181a5c..6bcd9fe3 100644
--- a/src/canopy_cli/cli.py
+++ b/src/canopy_cli/cli.py
@@ -586,8 +586,8 @@ def api_docs(url):
     try:
         check_service_health(url)
     except CLIError:
-        msg = (f"Canopy server is not running. Would you like to generate the docs "
-               f"to a local HTML file?")
+        msg = ("Canopy server is not running. Would you like to generate the docs "
+               "to a local HTML file?")
         click.confirm(click.style(msg, fg="red"), abort=True)
         generated_docs = True
 
diff --git a/src/canopy_server/__init__.py b/src/canopy_server/__init__.py
index 8b137891..e69de29b 100644
--- a/src/canopy_server/__init__.py
+++ b/src/canopy_server/__init__.py
@@ -1 +0,0 @@
-

From 3e63f95de258e2f600874267dabc4a30a9d06dad Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Sun, 5 Nov 2023 13:09:00 +0200
Subject: [PATCH 22/31] [CLI] Control commands order in help message

This way, running 'canopy --help' prints the errors in the order matching the quick start
---
 src/canopy_cli/cli.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py
index 6bcd9fe3..716ec1a3 100644
--- a/src/canopy_cli/cli.py
+++ b/src/canopy_cli/cli.py
@@ -131,7 +131,29 @@ def _load_kb_config(config_file: Optional[str]) -> Dict[str, Any]:
     return kb_config
 
 
-@click.group(invoke_without_command=True, context_settings=CONTEXT_SETTINGS)
+class CanopyCommandGroup(click.Group):
+    """
+    A custom click Group that lets us control the order of commands in the help menu.
+    """
+    def __init__(self, name=None, commands=None, **attrs):
+        super().__init__(name, commands, **attrs)
+        self._commands_order = {
+            "new": 0,
+            "upsert": 1,
+            "start": 2,
+            "chat": 3,
+            "health": 4,
+            "stop": 5,
+            "api-docs": 6,
+
+        }
+
+    def list_commands(self, ctx):
+        return sorted(self.commands, key=lambda x: self._commands_order.get(x, 1000))
+
+
+@click.group(invoke_without_command=True, context_settings=CONTEXT_SETTINGS,
+             cls=CanopyCommandGroup)
 @click.version_option(__version__, "-v", "--version", prog_name="Canopy")
 @click.pass_context
 def cli(ctx):
@@ -608,4 +630,4 @@ def api_docs(url):
 
 
 if __name__ == "__main__":
-    cli()
+    cli()
\ No newline at end of file

From 3f8acd299be7b8f6211d244444b2a34e13ee653d Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Sun, 5 Nov 2023 13:13:51 +0200
Subject: [PATCH 23/31] [cli] Rename 'service' to 'server'

This conforms with the naming we use in the documentation
---
 src/canopy_cli/cli.py | 56 +++++++++++++++++++++----------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py
index 716ec1a3..a70f2e5b 100644
--- a/src/canopy_cli/cli.py
+++ b/src/canopy_cli/cli.py
@@ -30,7 +30,7 @@
 
 from canopy import __version__
 
-from canopy_server.app import start as start_service
+from canopy_server.app import start as start_server
 from .cli_spinner import Spinner
 from canopy_server.api_models import ChatDebugInfo
 
@@ -43,14 +43,14 @@
 CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
 
 
-def check_service_health(url: str):
+def check_server_health(url: str):
     try:
         res = requests.get(urljoin(url, "/health"))
         res.raise_for_status()
         return res.ok
     except requests.exceptions.ConnectionError:
         msg = f"""
-        Canopy service is not running on {url}.
+        Canopy server is not running on {url}.
         please run `canopy start`
         """
         raise CLIError(msg)
@@ -61,14 +61,14 @@ def check_service_health(url: str):
         else:
             error = str(e)
         msg = (
-            f"Canopy service on {url} is not healthy, failed with error: {error}"
+            f"Canopy server on {url} is not healthy, failed with error: {error}"
         )
         raise CLIError(msg)
 
 
 @retry(reraise=True, wait=wait_fixed(5), stop=stop_after_attempt(6))
-def wait_for_service(chat_service_url: str):
-    check_service_health(chat_service_url)
+def wait_for_server(chat_server_url: str):
+    check_server_health(chat_server_url)
 
 
 def validate_connection():
@@ -169,12 +169,12 @@ def cli(ctx):
         # click.echo(command.get_help(ctx))
 
 
-@cli.command(help="Check if canopy service is running and healthy.")
+@cli.command(help="Check if canopy server is running and healthy.")
 @click.option("--url", default="http://0.0.0.0:8000",
-              help="Canopy's service url. Defaults to http://0.0.0.0:8000")
+              help="Canopy's server url. Defaults to http://0.0.0.0:8000")
 def health(url):
-    check_service_health(url)
-    click.echo(click.style("Canopy service is healthy!", fg="green"))
+    check_server_health(url)
+    click.echo(click.style("Canopy server is healthy!", fg="green"))
     return
 
 
@@ -430,10 +430,10 @@ def _chat(
               help="Print additional debugging information")
 @click.option("--rag/--no-rag", default=True,
               help="Compare RAG-infused Chatbot with vanilla LLM",)
-@click.option("--chat-service-url", default="http://0.0.0.0:8000",
-              help="URL of the Canopy service to use. Defaults to http://0.0.0.0:8000")
-def chat(chat_service_url, rag, debug, stream):
-    check_service_health(chat_service_url)
+@click.option("--chat-server-url", default="http://0.0.0.0:8000",
+              help="URL of the Canopy server to use. Defaults to http://0.0.0.0:8000")
+def chat(chat_server_url, rag, debug, stream):
+    check_server_health(chat_server_url)
     note_msg = (
         "🚨 Note 🚨\n"
         "Chat is a debugging tool, it is not meant to be used for production!"
@@ -445,7 +445,7 @@ def chat(chat_service_url, rag, debug, stream):
     note_white_message = (
         "This method should be used by developers to test the RAG data and model"
         "during development. "
-        "When you are ready to deploy, run the Canopy service as a REST API "
+        "When you are ready to deploy, run the Canopy server as a REST API "
         "backend for your chatbot UI. \n\n"
         "Let's Chat!"
     )
@@ -468,7 +468,7 @@ def chat(chat_service_url, rag, debug, stream):
             history=history_with_pinecone,
             message=message,
             stream=stream,
-            api_base=os.path.join(chat_service_url, "context"),
+            api_base=os.path.join(chat_server_url, "context"),
             print_debug_info=debug,
         )
 
@@ -498,7 +498,7 @@ def chat(chat_service_url, rag, debug, stream):
     help=(
         """
         \b
-        Start the Canopy service.
+        Start the Canopy server.
         This command will launch a uvicorn server that will serve the Canopy API.
 
         If you like to try out the chatbot, run `canopy chat` in a separate terminal
@@ -521,7 +521,7 @@ def start(host: str, port: str, reload: bool,
           config: Optional[str], index_name: Optional[str]):
     note_msg = (
         "🚨 Note 🚨\n"
-        "For debugging only. To run the Canopy service in production, run the command:"
+        "For debugging only. To run the Canopy server in production, run the command:"
         "\n"
         "gunicorn canopy_server.app:app --worker-class uvicorn.workers.UvicornWorker "
         f"--bind {host}:{port} --workers <num_workers>"
@@ -541,30 +541,30 @@ def start(host: str, port: str, reload: bool,
             )
         os.environ["INDEX_NAME"] = index_name
 
-    click.echo(f"Starting Canopy service on {host}:{port}")
-    start_service(host, port=port, reload=reload, config_file=config)
+    click.echo(f"Starting Canopy server on {host}:{port}")
+    start_server(host, port=port, reload=reload, config_file=config)
 
 
 @cli.command(
     help=(
         """
         \b
-        Stop the Canopy service.
-        This command will send a shutdown request to the Canopy service.
+        Stop the Canopy server.
+        This command will send a shutdown request to the Canopy server.
         """
     )
 )
 @click.option("url", "--url", default="http://0.0.0.0:8000",
-              help="URL of the Canopy service to use. Defaults to http://0.0.0.0:8000")
+              help="URL of the Canopy server to use. Defaults to http://0.0.0.0:8000")
 def stop(url):
-    # Check if the service was started using Gunicorn
+    # Check if the server was started using Gunicorn
     res = subprocess.run(["pgrep", "-f", "gunicorn canopy_server.app:app"],
                          capture_output=True)
     output = res.stdout.decode("utf-8").split()
 
     # If Gunicorn was used, kill all Gunicorn processes
     if output:
-        msg = ("It seems that Canopy service was launched using Gunicorn.\n"
+        msg = ("It seems that Canopy server was launched using Gunicorn.\n"
                "Do you want to kill all Gunicorn processes?")
         click.confirm(click.style(msg, fg="red"), abort=True)
         try:
@@ -586,7 +586,7 @@ def stop(url):
         return res.ok
     except requests.exceptions.ConnectionError:
         msg = f"""
-        Could not find Canopy service on {url}.
+        Could not find Canopy server on {url}.
         """
         raise CLIError(msg)
 
@@ -600,13 +600,13 @@ def stop(url):
     )
 )
 @click.option("--url", default="http://0.0.0.0:8000",
-              help="Canopy's service url. Defaults to http://0.0.0.0:8000")
+              help="Canopy's server url. Defaults to http://0.0.0.0:8000")
 def api_docs(url):
     import webbrowser
 
     generated_docs = False
     try:
-        check_service_health(url)
+        check_server_health(url)
     except CLIError:
         msg = ("Canopy server is not running. Would you like to generate the docs "
                "to a local HTML file?")

From 1a29f3a73a33c79a03663fd69571f2a4f9fbf00a Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Sun, 5 Nov 2023 14:30:08 +0200
Subject: [PATCH 24/31] linter

---
 examples/canopy-lib-quickstart.ipynb | 4 ++--
 src/canopy_cli/cli.py                | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb
index 11764922..3b52e57e 100644
--- a/examples/canopy-lib-quickstart.ipynb
+++ b/examples/canopy-lib-quickstart.ipynb
@@ -32,8 +32,8 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
+      "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip available: \u001B[0m\u001B[31;49m22.2.2\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m23.3.1\u001B[0m\n",
+      "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\n"
      ]
     }
    ],
diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py
index a70f2e5b..222839c9 100644
--- a/src/canopy_cli/cli.py
+++ b/src/canopy_cli/cli.py
@@ -630,4 +630,4 @@ def api_docs(url):
 
 
 if __name__ == "__main__":
-    cli()
\ No newline at end of file
+    cli()

From 48c8d1bb17e4fda210e08cd7013ddf3ea7acb759 Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Sun, 5 Nov 2023 23:16:03 +0200
Subject: [PATCH 25/31] Context must contain a ContextContent that implements
 to_text()

In order to support our current StuffingContextBuilder, I added a new StuffingConxtextContent which inherits from ContextContent and implement to_text() correctly.
The app's `/query` path returns a `str`, which is the only guaranteed format of Context. It can be any strucured on unstrucutured data - depending on the ContextBuilder
---
 .../context_builder/stuffing.py               | 19 +++---
 src/canopy/context_engine/models.py           | 22 ++++++-
 src/canopy/models/data_models.py              | 10 +---
 src/canopy_server/app.py                      |  6 +-
 tests/e2e/test_app.py                         |  9 +--
 tests/unit/chat_engine/test_chat_engine.py    | 19 +++---
 .../test_stuffing_context_builder.py          | 58 ++++++++++---------
 .../context_engine/test_context_engine.py     | 16 ++---
 8 files changed, 91 insertions(+), 68 deletions(-)

diff --git a/src/canopy/context_engine/context_builder/stuffing.py b/src/canopy/context_engine/context_builder/stuffing.py
index 18ebc56a..8024d547 100644
--- a/src/canopy/context_engine/context_builder/stuffing.py
+++ b/src/canopy/context_engine/context_builder/stuffing.py
@@ -2,7 +2,8 @@
 from typing import List, Tuple
 
 from canopy.context_engine.context_builder.base import ContextBuilder
-from canopy.context_engine.models import ContextQueryResult, ContextSnippet
+from canopy.context_engine.models import (ContextQueryResult, ContextSnippet,
+                                          StuffingContextContent, )
 from canopy.knowledge_base.models import QueryResult, DocumentWithScore
 from canopy.tokenizer import Tokenizer
 from canopy.models.data_models import Context
@@ -24,12 +25,15 @@ def build(self,
             ContextQueryResult(query=qr.query, snippets=[])
             for qr in query_results]
         debug_info = {"num_docs": len(sorted_docs_with_origin)}
-        context = Context(content=context_query_results,
-                          num_tokens=0,
-                          debug_info=debug_info)
+        context = Context(
+            content=StuffingContextContent(__root__=context_query_results),
+            num_tokens=0,
+            debug_info=debug_info
+        )
 
         if self._tokenizer.token_count(context.to_text()) > max_context_tokens:
-            return Context(content=[], num_tokens=0, debug_info=debug_info)
+            return Context(content=StuffingContextContent(__root__=[]),
+                           num_tokens=1, debug_info=debug_info)
 
         seen_doc_ids = set()
         for doc, origin_query_idx in sorted_docs_with_origin:
@@ -45,8 +49,9 @@ def build(self,
                     context_query_results[origin_query_idx].snippets.pop()
 
         # remove queries with no snippets
-        context.content = [qr for qr in context_query_results
-                           if len(qr.snippets) > 0]
+        context.content = StuffingContextContent(
+            __root__=[qr for qr in context_query_results if len(qr.snippets) > 0]
+        )
 
         context.num_tokens = self._tokenizer.token_count(context.to_text())
         return context
diff --git a/src/canopy/context_engine/models.py b/src/canopy/context_engine/models.py
index 5f273425..1d0266fe 100644
--- a/src/canopy/context_engine/models.py
+++ b/src/canopy/context_engine/models.py
@@ -1,8 +1,8 @@
-from typing import List
+from typing import List, Union
 
 from pydantic import BaseModel
 
-from canopy.models.data_models import _ContextContent
+from canopy.models.data_models import ContextContent
 
 
 class ContextSnippet(BaseModel):
@@ -10,9 +10,25 @@ class ContextSnippet(BaseModel):
     text: str
 
 
-class ContextQueryResult(_ContextContent):
+class ContextQueryResult(BaseModel):
     query: str
     snippets: List[ContextSnippet]
 
+
+class StuffingContextContent(ContextContent):
+    __root__: Union[ContextQueryResult, List[ContextQueryResult]]
+
+    def dict(self, **kwargs):
+        return super().dict(**kwargs)['__root__']
+
+    def __iter__(self):
+        return iter(self.__root__)
+
+    def __getitem__(self, item):
+        return self.__root__[item]
+
+    def __len__(self):
+        return len(self.__root__)
+
     def to_text(self, **kwargs):
         return self.json(**kwargs)
diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py
index 3f57ccb6..1fb365d1 100644
--- a/src/canopy/models/data_models.py
+++ b/src/canopy/models/data_models.py
@@ -56,7 +56,7 @@ def metadata_reseved_fields(cls, v):
         return v
 
 
-class _ContextContent(BaseModel, ABC):
+class ContextContent(BaseModel, ABC):
     # Any context should be able to be represented as well formatted text.
     # In the most minimal case, that could simply be a call to `.json()`.
     @abstractmethod
@@ -64,19 +64,13 @@ def to_text(self, **kwargs) -> str:
         pass
 
 
-ContextContent = Union[_ContextContent, Sequence[_ContextContent]]
-
-
 class Context(BaseModel):
     content: ContextContent
     num_tokens: int = Field(exclude=True)
     debug_info: dict = Field(default_factory=dict, exclude=True)
 
     def to_text(self, **kwargs) -> str:
-        if isinstance(self.content, Sequence):
-            return "\n".join([c.to_text(**kwargs) for c in self.content])
-        else:
-            return self.content.to_text(**kwargs)
+        return self.content.to_text(**kwargs)
 
 
 # --------------------- LLM models ------------------------
diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index 3341c7bb..84aaf530 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -127,14 +127,13 @@ def stringify_content(response: StreamingChatResponse):
 
 @app.post(
     "/context/query",
-    response_model=ContextContent,
     responses={
         500: {"description": "Failed to query the knowledge base or build the context"}
     },
 )
 async def query(
     request: ContextQueryRequest = Body(...),
-) -> ContextContent:
+) -> str:
     """
     Query the knowledge base for relevant context.
     The returned text may be structured or unstructured, depending on the Canopy configuration.
@@ -147,8 +146,7 @@ async def query(
             queries=request.queries,
             max_context_tokens=request.max_tokens,
         )
-
-        return context.content
+        return context.to_text()
 
     except Exception as e:
         logger.exception(e)
diff --git a/tests/e2e/test_app.py b/tests/e2e/test_app.py
index 41357f4a..fdd8c148 100644
--- a/tests/e2e/test_app.py
+++ b/tests/e2e/test_app.py
@@ -1,3 +1,4 @@
+import json
 import os
 from typing import List
 
@@ -27,14 +28,14 @@
 )
 
 
-@retry(stop=stop_after_attempt(60), wait=wait_fixed(1))
+@retry(reraise=True, stop=stop_after_attempt(60), wait=wait_fixed(1))
 def assert_vector_ids_exist(vector_ids: List[str],
                             knowledge_base: KnowledgeBase):
     fetch_response = knowledge_base._index.fetch(ids=vector_ids)
     assert all([v_id in fetch_response["vectors"] for v_id in vector_ids])
 
 
-@retry(stop=stop_after_attempt(60), wait=wait_fixed(1))
+@retry(reraise=True, stop=stop_after_attempt(60), wait=wait_fixed(1))
 def assert_vector_ids_not_exist(vector_ids: List[str],
                                 knowledge_base: KnowledgeBase):
     fetch_response = knowledge_base._index.fetch(ids=vector_ids)
@@ -98,7 +99,7 @@ def test_upsert(client):
     assert upsert_response.is_success
 
 
-@retry(stop=stop_after_attempt(60), wait=wait_fixed(1))
+@retry(reraise=True, stop=stop_after_attempt(60), wait=wait_fixed(1))
 def test_query(client):
     # fetch the context with all the right filters
     query_payload = ContextQueryRequest(
@@ -116,7 +117,7 @@ def test_query(client):
     assert query_response.is_success
 
     # test response is as expected on /query
-    response_as_json = query_response.json()
+    response_as_json = json.loads(query_response.json())
 
     assert (
             response_as_json[0]["query"]
diff --git a/tests/unit/chat_engine/test_chat_engine.py b/tests/unit/chat_engine/test_chat_engine.py
index 9841424a..330fb616 100644
--- a/tests/unit/chat_engine/test_chat_engine.py
+++ b/tests/unit/chat_engine/test_chat_engine.py
@@ -6,7 +6,8 @@
 from canopy.chat_engine import ChatEngine
 from canopy.chat_engine.query_generator import QueryGenerator
 from canopy.context_engine import ContextEngine
-from canopy.context_engine.models import ContextQueryResult, ContextSnippet
+from canopy.context_engine.models import (ContextQueryResult, ContextSnippet,
+                                          StuffingContextContent, )
 from canopy.llm import BaseLLM
 from canopy.models.data_models import SystemMessage
 from canopy.models.api_models import ChatResponse, _Choice, TokenCounts
@@ -58,13 +59,15 @@ def _get_inputs_and_expected(self,
         ]
         mock_queries = [Query(text="How does photosynthesis work?")]
         mock_context = Context(
-            content=ContextQueryResult(
-                query="How does photosynthesis work?",
-
-                snippets=[ContextSnippet(source="ref 1",
-                                         text=self._generate_text(snippet_length)),
-                          ContextSnippet(source="ref 2",
-                                         text=self._generate_text(12))]
+            content=StuffingContextContent(
+                __root__=ContextQueryResult(
+                    query="How does photosynthesis work?",
+
+                    snippets=[ContextSnippet(source="ref 1",
+                                             text=self._generate_text(snippet_length)),
+                              ContextSnippet(source="ref 2",
+                                             text=self._generate_text(12))]
+                )
             ),
             num_tokens=1  # TODO: This is a dummy value. Need to improve.
         )
diff --git a/tests/unit/context_builder/test_stuffing_context_builder.py b/tests/unit/context_builder/test_stuffing_context_builder.py
index bfd9a899..3d487712 100644
--- a/tests/unit/context_builder/test_stuffing_context_builder.py
+++ b/tests/unit/context_builder/test_stuffing_context_builder.py
@@ -1,6 +1,6 @@
 from canopy.context_engine.models import \
-    ContextSnippet, ContextQueryResult
-from canopy.models.data_models import Context
+    (ContextSnippet, ContextQueryResult, StuffingContextContent, )
+from canopy.models.data_models import Context, ContextContent
 from ..stubs.stub_tokenizer import StubTokenizer
 from canopy.knowledge_base.models import \
     QueryResult, DocumentWithScore
@@ -46,22 +46,25 @@ def setup_method(self):
                                               score=1.0)
                         ])
         ]
-        self.full_context = Context(content=[
-            ContextQueryResult(query="test query 1",
-                               snippets=[
-                                   ContextSnippet(
-                                       text=self.text1, source="test_source1"),
-                                   ContextSnippet(
-                                       text=self.text2, source="test_source2")
-                               ]),
-            ContextQueryResult(query="test query 2",
-                               snippets=[
-                                   ContextSnippet(
-                                       text=self.text3, source="test_source3"),
-                                   ContextSnippet(
-                                       text=self.text4, source="test_source4")
-                               ])
-        ], num_tokens=0)
+        self.full_context = Context(
+            content=StuffingContextContent(__root__=[
+                ContextQueryResult(query="test query 1",
+                                   snippets=[
+                                       ContextSnippet(
+                                           text=self.text1, source="test_source1"),
+                                       ContextSnippet(
+                                           text=self.text2, source="test_source2")
+                                   ]),
+                ContextQueryResult(query="test query 2",
+                                   snippets=[
+                                       ContextSnippet(
+                                           text=self.text3, source="test_source3"),
+                                       ContextSnippet(
+                                           text=self.text4, source="test_source4")
+                                   ])
+            ]),
+            num_tokens=0
+        )
         self.full_context.num_tokens = self.tokenizer.token_count(
             self.full_context.to_text())
 
@@ -74,7 +77,7 @@ def test_context_fits_within_max_tokens(self):
     def test_context_exceeds_max_tokens(self):
         context = self.builder.build(self.query_results, max_context_tokens=30)
 
-        expected_context = Context(content=[
+        expected_context = Context(content=StuffingContextContent(__root__=[
             ContextQueryResult(query="test query 1",
                                snippets=[
                                    ContextSnippet(
@@ -85,7 +88,7 @@ def test_context_exceeds_max_tokens(self):
                                    ContextSnippet(
                                        text=self.text3, source="test_source3"),
                                ])
-        ], num_tokens=0)
+        ]), num_tokens=0)
         expected_context.num_tokens = self.tokenizer.token_count(
             expected_context.to_text())
 
@@ -96,13 +99,13 @@ def test_context_exceeds_max_tokens_unordered(self):
         self.query_results[0].documents[0].text = self.text1 * 100
         context = self.builder.build(self.query_results, max_context_tokens=20)
 
-        expected_context = Context(content=[
+        expected_context = Context(content=StuffingContextContent(__root__=[
             ContextQueryResult(query="test query 2",
                                snippets=[
                                    ContextSnippet(
                                        text=self.text3, source="test_source3"),
                                ])
-        ], num_tokens=0)
+        ]), num_tokens=0)
         expected_context.num_tokens = self.tokenizer.token_count(
             expected_context.to_text())
 
@@ -111,18 +114,18 @@ def test_context_exceeds_max_tokens_unordered(self):
 
     def test_whole_query_results_not_fit(self):
         context = self.builder.build(self.query_results, max_context_tokens=10)
-        assert context.num_tokens == 0
+        assert context.num_tokens == 1
         assert context.content == []
 
     def test_max_tokens_zero(self):
         context = self.builder.build(self.query_results, max_context_tokens=0)
-        self.assert_num_tokens(context, 0)
+        self.assert_num_tokens(context, 1)
         assert context.content == []
 
     def test_empty_query_results(self):
         context = self.builder.build([], max_context_tokens=100)
-        self.assert_num_tokens(context, 0)
-        assert len(context.content) == 0
+        self.assert_num_tokens(context, 1)
+        assert context.content == []
 
     def test_documents_with_duplicates(self):
         duplicate_query_results = self.query_results + [
@@ -165,7 +168,7 @@ def test_empty_documents(self):
         ]
         context = self.builder.build(
             empty_query_results, max_context_tokens=100)
-        self.assert_num_tokens(context, 0)
+        self.assert_num_tokens(context, 1)
         assert context.content == []
 
     def assert_num_tokens(self, context: Context, max_tokens: int):
@@ -175,6 +178,7 @@ def assert_num_tokens(self, context: Context, max_tokens: int):
 
     @staticmethod
     def assert_contexts_equal(actual: Context, expected: Context):
+        assert isinstance(actual.content, ContextContent)
         assert actual.num_tokens == expected.num_tokens
         assert len(actual.content) == len(expected.content)
         for actual_qr, expected_qr in zip(actual.content, expected.content):
diff --git a/tests/unit/context_engine/test_context_engine.py b/tests/unit/context_engine/test_context_engine.py
index a102db36..17c85ee8 100644
--- a/tests/unit/context_engine/test_context_engine.py
+++ b/tests/unit/context_engine/test_context_engine.py
@@ -5,10 +5,11 @@
 
 from canopy.context_engine import ContextEngine
 from canopy.context_engine.context_builder.base import ContextBuilder
-from canopy.context_engine.models import ContextQueryResult, ContextSnippet
+from canopy.context_engine.models import (ContextQueryResult, ContextSnippet,
+                                          StuffingContextContent, )
 from canopy.knowledge_base.base import BaseKnowledgeBase
 from canopy.knowledge_base.models import QueryResult, DocumentWithScore
-from canopy.models.data_models import Query, Context, _ContextContent
+from canopy.models.data_models import Query, Context, ContextContent
 
 
 class TestContextEngine:
@@ -68,7 +69,7 @@ def test_query(context_engine,
         queries = [Query(text="How does photosynthesis work?")]
         max_context_tokens = 100
 
-        mock_context_content = create_autospec(_ContextContent)
+        mock_context_content = create_autospec(ContextContent)
         mock_context_content.to_text.return_value = sample_context_text
         mock_context = Context(content=mock_context_content, num_tokens=21)
 
@@ -93,7 +94,7 @@ def test_query_with_metadata_filter(context_engine,
         queries = [Query(text="How does photosynthesis work?")]
         max_context_tokens = 100
 
-        mock_context_content = create_autospec(_ContextContent)
+        mock_context_content = create_autospec(ContextContent)
         mock_context_content.to_text.return_value = sample_context_text
         mock_context = Context(content=mock_context_content, num_tokens=21)
 
@@ -149,7 +150,7 @@ def test_multiple_queries(context_engine,
         mock_knowledge_base.query.return_value = extended_mock_query_result
 
         combined_text = sample_context_text + "\n" + text
-        mock_context_content = create_autospec(_ContextContent)
+        mock_context_content = create_autospec(ContextContent)
         mock_context_content.to_text.return_value = combined_text
         mock_context = Context(content=mock_context_content, num_tokens=40)
 
@@ -168,7 +169,7 @@ def test_empty_query_results(context_engine,
 
         mock_knowledge_base.query.return_value = []
 
-        mock_context_content = create_autospec(_ContextContent)
+        mock_context_content = create_autospec(ContextContent)
         mock_context_content.to_text.return_value = ""
         mock_context = Context(content=mock_context_content, num_tokens=0)
 
@@ -183,7 +184,8 @@ def test_context_query_result_to_text():
         query_result = ContextQueryResult(query="How does photosynthesis work?",
                                           snippets=[ContextSnippet(text="42",
                                                                    source="ref")])
-        context = Context(content=query_result, num_tokens=1)
+        context = Context(content=StuffingContextContent(__root__=query_result),
+                          num_tokens=1)
 
         assert context.to_text() == json.dumps(query_result.dict())
         assert context.to_text(indent=2) == json.dumps(query_result.dict(), indent=2)

From 457b61bda55cc74d2c288db5e281cc6eefb34bc1 Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Sun, 5 Nov 2023 23:44:14 +0200
Subject: [PATCH 26/31] linters

---
 examples/canopy-lib-quickstart.ipynb | 4 ++--
 src/canopy/models/data_models.py     | 2 +-
 src/canopy_server/app.py             | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/canopy-lib-quickstart.ipynb b/examples/canopy-lib-quickstart.ipynb
index ac4981bf..a3e805cc 100644
--- a/examples/canopy-lib-quickstart.ipynb
+++ b/examples/canopy-lib-quickstart.ipynb
@@ -32,8 +32,8 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
+      "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip available: \u001B[0m\u001B[31;49m22.2.2\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m23.3.1\u001B[0m\n",
+      "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\n"
      ]
     }
    ],
diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py
index 1fb365d1..b56ef185 100644
--- a/src/canopy/models/data_models.py
+++ b/src/canopy/models/data_models.py
@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 from enum import Enum
-from typing import Optional, List, Union, Dict, Sequence, Literal
+from typing import Optional, List, Union, Dict, Literal
 
 from pydantic import BaseModel, Field, validator, Extra
 
diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index 84aaf530..a1415e04 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -26,7 +26,7 @@
     StreamingChatResponse,
     ChatResponse,
 )
-from canopy.models.data_models import Context, UserMessage, ContextContent
+from canopy.models.data_models import Context, UserMessage
 from .api_models import (
     ChatRequest,
     ContextQueryRequest,

From 50efc687f689f62fa94f0cd4f08613b17d91e0fd Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Mon, 6 Nov 2023 10:56:08 +0200
Subject: [PATCH 27/31] [context] Simplify ContextContent

- Made StuffingContextContent always a List
- Slightly improved readability of `StuffingContextBuilder`
---
 .../context_engine/context_builder/stuffing.py  | 17 +++++++----------
 src/canopy/context_engine/models.py             |  4 ++--
 tests/unit/chat_engine/test_chat_engine.py      |  4 ++--
 .../unit/context_engine/test_context_engine.py  |  6 +++---
 4 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/src/canopy/context_engine/context_builder/stuffing.py b/src/canopy/context_engine/context_builder/stuffing.py
index 8024d547..4b8402c6 100644
--- a/src/canopy/context_engine/context_builder/stuffing.py
+++ b/src/canopy/context_engine/context_builder/stuffing.py
@@ -25,13 +25,9 @@ def build(self,
             ContextQueryResult(query=qr.query, snippets=[])
             for qr in query_results]
         debug_info = {"num_docs": len(sorted_docs_with_origin)}
-        context = Context(
-            content=StuffingContextContent(__root__=context_query_results),
-            num_tokens=0,
-            debug_info=debug_info
-        )
+        content = StuffingContextContent(__root__=context_query_results)
 
-        if self._tokenizer.token_count(context.to_text()) > max_context_tokens:
+        if self._tokenizer.token_count(content.to_text()) > max_context_tokens:
             return Context(content=StuffingContextContent(__root__=[]),
                            num_tokens=1, debug_info=debug_info)
 
@@ -45,16 +41,17 @@ def build(self,
                     snippet)
                 seen_doc_ids.add(doc.id)
                 # if the context is too long, remove the snippet
-                if self._tokenizer.token_count(context.to_text()) > max_context_tokens:
+                if self._tokenizer.token_count(content.to_text()) > max_context_tokens:
                     context_query_results[origin_query_idx].snippets.pop()
 
         # remove queries with no snippets
-        context.content = StuffingContextContent(
+        content = StuffingContextContent(
             __root__=[qr for qr in context_query_results if len(qr.snippets) > 0]
         )
 
-        context.num_tokens = self._tokenizer.token_count(context.to_text())
-        return context
+        return Context(content=content,
+                       num_tokens=self._tokenizer.token_count(content.to_text()),
+                       debug_info=debug_info)
 
     @staticmethod
     def _round_robin_sort(
diff --git a/src/canopy/context_engine/models.py b/src/canopy/context_engine/models.py
index 1d0266fe..2b21e20a 100644
--- a/src/canopy/context_engine/models.py
+++ b/src/canopy/context_engine/models.py
@@ -1,4 +1,4 @@
-from typing import List, Union
+from typing import List
 
 from pydantic import BaseModel
 
@@ -16,7 +16,7 @@ class ContextQueryResult(BaseModel):
 
 
 class StuffingContextContent(ContextContent):
-    __root__: Union[ContextQueryResult, List[ContextQueryResult]]
+    __root__: List[ContextQueryResult]
 
     def dict(self, **kwargs):
         return super().dict(**kwargs)['__root__']
diff --git a/tests/unit/chat_engine/test_chat_engine.py b/tests/unit/chat_engine/test_chat_engine.py
index 330fb616..d6381a0c 100644
--- a/tests/unit/chat_engine/test_chat_engine.py
+++ b/tests/unit/chat_engine/test_chat_engine.py
@@ -60,14 +60,14 @@ def _get_inputs_and_expected(self,
         mock_queries = [Query(text="How does photosynthesis work?")]
         mock_context = Context(
             content=StuffingContextContent(
-                __root__=ContextQueryResult(
+                __root__=[ContextQueryResult(
                     query="How does photosynthesis work?",
 
                     snippets=[ContextSnippet(source="ref 1",
                                              text=self._generate_text(snippet_length)),
                               ContextSnippet(source="ref 2",
                                              text=self._generate_text(12))]
-                )
+                )]
             ),
             num_tokens=1  # TODO: This is a dummy value. Need to improve.
         )
diff --git a/tests/unit/context_engine/test_context_engine.py b/tests/unit/context_engine/test_context_engine.py
index 17c85ee8..61977f2a 100644
--- a/tests/unit/context_engine/test_context_engine.py
+++ b/tests/unit/context_engine/test_context_engine.py
@@ -184,11 +184,11 @@ def test_context_query_result_to_text():
         query_result = ContextQueryResult(query="How does photosynthesis work?",
                                           snippets=[ContextSnippet(text="42",
                                                                    source="ref")])
-        context = Context(content=StuffingContextContent(__root__=query_result),
+        context = Context(content=StuffingContextContent(__root__=[query_result]),
                           num_tokens=1)
 
-        assert context.to_text() == json.dumps(query_result.dict())
-        assert context.to_text(indent=2) == json.dumps(query_result.dict(), indent=2)
+        assert context.to_text() == json.dumps([query_result.dict()])
+        assert context.to_text(indent=2) == json.dumps([query_result.dict()], indent=2)
 
     @staticmethod
     @pytest.mark.asyncio

From f0b40e970ac23f44545848b98f3d3235d34a8b92 Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Mon, 6 Nov 2023 11:03:01 +0200
Subject: [PATCH 28/31] [context] StuffingContextContent - Removed special
 iterator functions

I changed the tests to use explicit json.loads()
---
 src/canopy/context_engine/models.py           |  9 --------
 .../test_stuffing_context_builder.py          | 23 +++++++++++--------
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/src/canopy/context_engine/models.py b/src/canopy/context_engine/models.py
index 2b21e20a..89618451 100644
--- a/src/canopy/context_engine/models.py
+++ b/src/canopy/context_engine/models.py
@@ -21,14 +21,5 @@ class StuffingContextContent(ContextContent):
     def dict(self, **kwargs):
         return super().dict(**kwargs)['__root__']
 
-    def __iter__(self):
-        return iter(self.__root__)
-
-    def __getitem__(self, item):
-        return self.__root__[item]
-
-    def __len__(self):
-        return len(self.__root__)
-
     def to_text(self, **kwargs):
         return self.json(**kwargs)
diff --git a/tests/unit/context_builder/test_stuffing_context_builder.py b/tests/unit/context_builder/test_stuffing_context_builder.py
index 3d487712..e362d8e5 100644
--- a/tests/unit/context_builder/test_stuffing_context_builder.py
+++ b/tests/unit/context_builder/test_stuffing_context_builder.py
@@ -1,3 +1,5 @@
+import json
+
 from canopy.context_engine.models import \
     (ContextSnippet, ContextQueryResult, StuffingContextContent, )
 from canopy.models.data_models import Context, ContextContent
@@ -153,7 +155,8 @@ def test_source_metadata_missing(self):
         context = self.builder.build(
             missing_metadata_query_results, max_context_tokens=100)
         self.assert_num_tokens(context, 100)
-        assert context.content[0].snippets[0].source == ""
+        content = json.loads(context.to_text())
+        assert content[0]["snippets"][0]["source"] == ""
 
     def test_empty_documents(self):
         empty_query_results = [
@@ -180,11 +183,13 @@ def assert_num_tokens(self, context: Context, max_tokens: int):
     def assert_contexts_equal(actual: Context, expected: Context):
         assert isinstance(actual.content, ContextContent)
         assert actual.num_tokens == expected.num_tokens
-        assert len(actual.content) == len(expected.content)
-        for actual_qr, expected_qr in zip(actual.content, expected.content):
-            assert actual_qr.query == expected_qr.query
-            assert len(actual_qr.snippets) == len(expected_qr.snippets)
-            for actual_snippet, expected_snippet in zip(actual_qr.snippets,
-                                                        expected_qr.snippets):
-                assert actual_snippet.text == expected_snippet.text
-                assert actual_snippet.source == expected_snippet.source
+        actual_content = json.loads(actual.to_text())
+        expected_content = json.loads(expected.to_text())
+        assert len(actual_content) == len(expected_content)
+        for actual_qr, expected_qr in zip(actual_content, expected_content):
+            assert actual_qr["query"] == expected_qr["query"]
+            assert len(actual_qr["snippets"]) == len(expected_qr["snippets"])
+            for actual_snippet, expected_snippet in zip(actual_qr["snippets"],
+                                                        expected_qr["snippets"]):
+                assert actual_snippet["text"] == expected_snippet["text"]
+                assert actual_snippet["source"] == expected_snippet["source"]

From 02637da6819dc18d74fa92cbfe4e2ef6ff70e03f Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Mon, 6 Nov 2023 13:43:53 +0200
Subject: [PATCH 29/31] [context] Moved SuffingContextBuilder's data models
 into the same file

Makes the code more readable and explicit
---
 .../context_builder/stuffing.py               | 34 +++++++++++++++++--
 src/canopy/context_engine/models.py           | 25 --------------
 src/canopy/models/data_models.py              | 15 ++++++--
 src/canopy_server/app.py                      |  5 +--
 tests/unit/chat_engine/test_chat_engine.py    |  5 +--
 .../test_stuffing_context_builder.py          |  5 +--
 .../context_engine/test_context_engine.py     |  5 +--
 7 files changed, 56 insertions(+), 38 deletions(-)
 delete mode 100644 src/canopy/context_engine/models.py

diff --git a/src/canopy/context_engine/context_builder/stuffing.py b/src/canopy/context_engine/context_builder/stuffing.py
index 4b8402c6..5aa9b21b 100644
--- a/src/canopy/context_engine/context_builder/stuffing.py
+++ b/src/canopy/context_engine/context_builder/stuffing.py
@@ -1,13 +1,41 @@
 from itertools import zip_longest
 from typing import List, Tuple
 
+from pydantic import BaseModel
+
 from canopy.context_engine.context_builder.base import ContextBuilder
-from canopy.context_engine.models import (ContextQueryResult, ContextSnippet,
-                                          StuffingContextContent, )
 from canopy.knowledge_base.models import QueryResult, DocumentWithScore
 from canopy.tokenizer import Tokenizer
-from canopy.models.data_models import Context
+from canopy.models.data_models import Context, ContextContent
+
+
+# ------------- DATA MODELS -------------
+
+class ContextSnippet(BaseModel):
+    source: str
+    text: str
+
+
+class ContextQueryResult(BaseModel):
+    query: str
+    snippets: List[ContextSnippet]
+
+
+class StuffingContextContent(ContextContent):
+    __root__: List[ContextQueryResult]
+
+    def dict(self, **kwargs):
+        return super().dict(**kwargs)['__root__']
+
+    # In the case of StuffingContextBuilder, we simply want the text representation to
+    # be a json. Other ContextContent subclasses may render into text differently
+    def to_text(self, **kwargs):
+        # We can't use self.json() since this is mapped back to self.to_text() in the
+        # base class, which would cause infinite recursion.
+        return super(ContextContent, self).json(**kwargs)
+
 
+# ------------- CONTEXT BUILDER ------------- 
 
 class StuffingContextBuilder(ContextBuilder):
 
diff --git a/src/canopy/context_engine/models.py b/src/canopy/context_engine/models.py
deleted file mode 100644
index 89618451..00000000
--- a/src/canopy/context_engine/models.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from typing import List
-
-from pydantic import BaseModel
-
-from canopy.models.data_models import ContextContent
-
-
-class ContextSnippet(BaseModel):
-    source: str
-    text: str
-
-
-class ContextQueryResult(BaseModel):
-    query: str
-    snippets: List[ContextSnippet]
-
-
-class StuffingContextContent(ContextContent):
-    __root__: List[ContextQueryResult]
-
-    def dict(self, **kwargs):
-        return super().dict(**kwargs)['__root__']
-
-    def to_text(self, **kwargs):
-        return self.json(**kwargs)
diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py
index b56ef185..78a07dd8 100644
--- a/src/canopy/models/data_models.py
+++ b/src/canopy/models/data_models.py
@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 from enum import Enum
-from typing import Optional, List, Union, Dict, Literal
+from typing import Optional, List, Union, Dict, Literal, Any
 
 from pydantic import BaseModel, Field, validator, Extra
 
@@ -63,15 +63,26 @@ class ContextContent(BaseModel, ABC):
     def to_text(self, **kwargs) -> str:
         pass
 
+    def __str__(self):
+        return self.to_text()
+
+    def json(self, **kwargs):
+        return self.to_text(**kwargs)
+
 
 class Context(BaseModel):
     content: ContextContent
-    num_tokens: int = Field(exclude=True)
+    num_tokens: int
     debug_info: dict = Field(default_factory=dict, exclude=True)
 
     def to_text(self, **kwargs) -> str:
         return self.content.to_text(**kwargs)
 
+    class Config:
+        @staticmethod
+        # Override the JSON schema, to show `content` as a string in the docs
+        def schema_extra(schema: dict[str, Any]) -> None:
+            schema['properties']['content'] = {'type': 'String', 'title': 'content'}
 
 # --------------------- LLM models ------------------------
 
diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index a1415e04..79af0868 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -127,13 +127,14 @@ def stringify_content(response: StreamingChatResponse):
 
 @app.post(
     "/context/query",
+    response_model=Context,
     responses={
         500: {"description": "Failed to query the knowledge base or build the context"}
     },
 )
 async def query(
     request: ContextQueryRequest = Body(...),
-) -> str:
+) -> Context:
     """
     Query the knowledge base for relevant context.
     The returned text may be structured or unstructured, depending on the Canopy configuration.
@@ -146,7 +147,7 @@ async def query(
             queries=request.queries,
             max_context_tokens=request.max_tokens,
         )
-        return context.to_text()
+        return context
 
     except Exception as e:
         logger.exception(e)
diff --git a/tests/unit/chat_engine/test_chat_engine.py b/tests/unit/chat_engine/test_chat_engine.py
index d6381a0c..ea8d6415 100644
--- a/tests/unit/chat_engine/test_chat_engine.py
+++ b/tests/unit/chat_engine/test_chat_engine.py
@@ -6,8 +6,9 @@
 from canopy.chat_engine import ChatEngine
 from canopy.chat_engine.query_generator import QueryGenerator
 from canopy.context_engine import ContextEngine
-from canopy.context_engine.models import (ContextQueryResult, ContextSnippet,
-                                          StuffingContextContent, )
+from canopy.context_engine.context_builder.stuffing import (ContextSnippet,
+                                                            ContextQueryResult,
+                                                            StuffingContextContent, )
 from canopy.llm import BaseLLM
 from canopy.models.data_models import SystemMessage
 from canopy.models.api_models import ChatResponse, _Choice, TokenCounts
diff --git a/tests/unit/context_builder/test_stuffing_context_builder.py b/tests/unit/context_builder/test_stuffing_context_builder.py
index e362d8e5..4881926b 100644
--- a/tests/unit/context_builder/test_stuffing_context_builder.py
+++ b/tests/unit/context_builder/test_stuffing_context_builder.py
@@ -1,7 +1,8 @@
 import json
 
-from canopy.context_engine.models import \
-    (ContextSnippet, ContextQueryResult, StuffingContextContent, )
+from canopy.context_engine.context_builder.stuffing import (ContextSnippet,
+                                                            ContextQueryResult,
+                                                            StuffingContextContent, )
 from canopy.models.data_models import Context, ContextContent
 from ..stubs.stub_tokenizer import StubTokenizer
 from canopy.knowledge_base.models import \
diff --git a/tests/unit/context_engine/test_context_engine.py b/tests/unit/context_engine/test_context_engine.py
index 61977f2a..ec17c55c 100644
--- a/tests/unit/context_engine/test_context_engine.py
+++ b/tests/unit/context_engine/test_context_engine.py
@@ -5,8 +5,9 @@
 
 from canopy.context_engine import ContextEngine
 from canopy.context_engine.context_builder.base import ContextBuilder
-from canopy.context_engine.models import (ContextQueryResult, ContextSnippet,
-                                          StuffingContextContent, )
+from canopy.context_engine.context_builder.stuffing import (ContextSnippet,
+                                                            ContextQueryResult,
+                                                            StuffingContextContent, )
 from canopy.knowledge_base.base import BaseKnowledgeBase
 from canopy.knowledge_base.models import QueryResult, DocumentWithScore
 from canopy.models.data_models import Query, Context, ContextContent

From db9ca51ce36c2285aebe78be8fbb11fd43eaec9c Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Mon, 6 Nov 2023 16:41:25 +0200
Subject: [PATCH 30/31] [app] `/query` return type - added ContextResponse
 model

KISS solution - simply return a different model than the actual internal `Context`
---
 .../context_engine/context_builder/stuffing.py    |  6 ++----
 src/canopy/models/data_models.py                  |  8 --------
 src/canopy_server/api_models.py                   |  5 +++++
 src/canopy_server/app.py                          |  6 ++++--
 tests/e2e/test_app.py                             | 15 +++++++++------
 5 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/canopy/context_engine/context_builder/stuffing.py b/src/canopy/context_engine/context_builder/stuffing.py
index 5aa9b21b..e1cd7c8d 100644
--- a/src/canopy/context_engine/context_builder/stuffing.py
+++ b/src/canopy/context_engine/context_builder/stuffing.py
@@ -30,12 +30,10 @@ def dict(self, **kwargs):
     # In the case of StuffingContextBuilder, we simply want the text representation to
     # be a json. Other ContextContent subclasses may render into text differently
     def to_text(self, **kwargs):
-        # We can't use self.json() since this is mapped back to self.to_text() in the
-        # base class, which would cause infinite recursion.
-        return super(ContextContent, self).json(**kwargs)
+        return self.json(**kwargs)
 
 
-# ------------- CONTEXT BUILDER ------------- 
+# ------------- CONTEXT BUILDER -------------
 
 class StuffingContextBuilder(ContextBuilder):
 
diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py
index 78a07dd8..58c702b2 100644
--- a/src/canopy/models/data_models.py
+++ b/src/canopy/models/data_models.py
@@ -66,9 +66,6 @@ def to_text(self, **kwargs) -> str:
     def __str__(self):
         return self.to_text()
 
-    def json(self, **kwargs):
-        return self.to_text(**kwargs)
-
 
 class Context(BaseModel):
     content: ContextContent
@@ -78,11 +75,6 @@ class Context(BaseModel):
     def to_text(self, **kwargs) -> str:
         return self.content.to_text(**kwargs)
 
-    class Config:
-        @staticmethod
-        # Override the JSON schema, to show `content` as a string in the docs
-        def schema_extra(schema: dict[str, Any]) -> None:
-            schema['properties']['content'] = {'type': 'String', 'title': 'content'}
 
 # --------------------- LLM models ------------------------
 
diff --git a/src/canopy_server/api_models.py b/src/canopy_server/api_models.py
index e965b8cb..49a7872a 100644
--- a/src/canopy_server/api_models.py
+++ b/src/canopy_server/api_models.py
@@ -31,6 +31,11 @@ class ContextQueryRequest(BaseModel):
     max_tokens: int
 
 
+class ContextResponse(BaseModel):
+    content: str
+    num_tokens: int
+
+
 class ContextUpsertRequest(BaseModel):
     documents: List[Document]
     batch_size: int = Field(
diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index 79af0868..8e18be1c 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -36,6 +36,7 @@
     ShutdownResponse,
     SuccessUpsertResponse,
     SuccessDeleteResponse,
+    ContextResponse,
 )
 
 from canopy.llm.openai import OpenAILLM
@@ -127,7 +128,7 @@ def stringify_content(response: StreamingChatResponse):
 
 @app.post(
     "/context/query",
-    response_model=Context,
+    response_model=ContextResponse,
     responses={
         500: {"description": "Failed to query the knowledge base or build the context"}
     },
@@ -147,7 +148,8 @@ async def query(
             queries=request.queries,
             max_context_tokens=request.max_tokens,
         )
-        return context
+        return ContextResponse(content=context.content.to_text(),
+                               num_tokens=context.num_tokens)
 
     except Exception as e:
         logger.exception(e)
diff --git a/tests/e2e/test_app.py b/tests/e2e/test_app.py
index fdd8c148..141c3b4a 100644
--- a/tests/e2e/test_app.py
+++ b/tests/e2e/test_app.py
@@ -13,7 +13,7 @@
 
 from canopy_server.app import app
 from canopy_server.api_models import (HealthStatus, ContextUpsertRequest,
-                                      ContextQueryRequest)
+                                      ContextQueryRequest, ContextResponse, )
 from .. import Tokenizer
 
 upsert_payload = ContextUpsertRequest(
@@ -102,6 +102,7 @@ def test_upsert(client):
 @retry(reraise=True, stop=stop_after_attempt(60), wait=wait_fixed(1))
 def test_query(client):
     # fetch the context with all the right filters
+    tokenizer = Tokenizer()
     query_payload = ContextQueryRequest(
         queries=[
             {
@@ -116,16 +117,18 @@ def test_query(client):
     query_response = client.post("/context/query", json=query_payload.dict())
     assert query_response.is_success
 
-    # test response is as expected on /query
-    response_as_json = json.loads(query_response.json())
+    query_response = query_response.json()
+    assert (query_response["num_tokens"] ==
+            len(tokenizer.tokenize(query_response["content"])))
 
+    stuffing_content = json.loads(query_response["content"])
     assert (
-            response_as_json[0]["query"]
+            stuffing_content[0]["query"]
             == query_payload.dict()["queries"][0]["text"]
-            and response_as_json[0]["snippets"][0]["text"]
+            and stuffing_content[0]["snippets"][0]["text"]
             == upsert_payload.dict()["documents"][0]["text"]
     )
-    assert (response_as_json[0]["snippets"][0]["source"] ==
+    assert (stuffing_content[0]["snippets"][0]["source"] ==
             upsert_payload.dict()["documents"][0]["source"])
 
 

From 3c828c405badf61f11a2e047dc169a15b11ad1b4 Mon Sep 17 00:00:00 2001
From: ilai <ilai@pinecone.io>
Date: Mon, 6 Nov 2023 16:49:49 +0200
Subject: [PATCH 31/31] Linter fixes + wrong return type

---
 src/canopy/models/data_models.py | 2 +-
 src/canopy_server/app.py         | 2 +-
 tests/e2e/test_app.py            | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/canopy/models/data_models.py b/src/canopy/models/data_models.py
index 58c702b2..dbaa8096 100644
--- a/src/canopy/models/data_models.py
+++ b/src/canopy/models/data_models.py
@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 from enum import Enum
-from typing import Optional, List, Union, Dict, Literal, Any
+from typing import Optional, List, Union, Dict, Literal
 
 from pydantic import BaseModel, Field, validator, Extra
 
diff --git a/src/canopy_server/app.py b/src/canopy_server/app.py
index 8e18be1c..6c67494a 100644
--- a/src/canopy_server/app.py
+++ b/src/canopy_server/app.py
@@ -135,7 +135,7 @@ def stringify_content(response: StreamingChatResponse):
 )
 async def query(
     request: ContextQueryRequest = Body(...),
-) -> Context:
+) -> ContextResponse:
     """
     Query the knowledge base for relevant context.
     The returned text may be structured or unstructured, depending on the Canopy configuration.
diff --git a/tests/e2e/test_app.py b/tests/e2e/test_app.py
index 141c3b4a..70e5adca 100644
--- a/tests/e2e/test_app.py
+++ b/tests/e2e/test_app.py
@@ -13,7 +13,7 @@
 
 from canopy_server.app import app
 from canopy_server.api_models import (HealthStatus, ContextUpsertRequest,
-                                      ContextQueryRequest, ContextResponse, )
+                                      ContextQueryRequest, )
 from .. import Tokenizer
 
 upsert_payload = ContextUpsertRequest(