diff --git a/docs/library.md b/docs/library.md
index d916f44f..24d907cd 100644
--- a/docs/library.md
+++ b/docs/library.md
@@ -19,7 +19,41 @@ The idea behind Canopy library is to provide a framework to build AI application
 
 ## Setup
 
-To setup canopy, please follow the instructions [here](../README.md#setup).
+0. set up a virtual environment (optional)
+```bash
+python3 -m venv canopy-env
+source canopy-env/bin/activate
+```
+more about virtual environments [here](https://docs.python.org/3/tutorial/venv.html)
+
+1. install the package
+```bash
+pip install pinecone-canopy
+```
+
+2. Set up the environment variables
+
+```python
+import os
+
+os.environ["PINECONE_API_KEY"] = "<PINECONE_API_KEY>"
+os.environ["PINECONE_ENVIRONMENT"] = "<PINECONE_ENVIRONMENT>"
+os.environ["OPENAI_API_KEY"] = "<OPENAI_API_KEY>"
+```
+
+<details>
+<summary><b><u>CLICK HERE</u></b> for more information about the environment variables 
+
+<br /> 
+</summary>
+
+| Name                  | Description                                                                                                                 | How to get it?                                                                                                                                                               |
+|-----------------------|-----------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `PINECONE_API_KEY`    | The API key for Pinecone. Used to authenticate to Pinecone services to create indexes and to insert, delete and search data | Register or log into your Pinecone account in the [console](https://app.pinecone.io/). You can access your API key from the "API Keys" section in the sidebar of your dashboard |
+| `PINECONE_ENVIRONMENT`| Determines the Pinecone service cloud environment of your index e.g `west1-gcp`, `us-east-1-aws`, etc                       | You can find the Pinecone environment next to the API key in [console](https://app.pinecone.io/)                                                                             |
+| `OPENAI_API_KEY`      | API key for OpenAI. Used to authenticate to OpenAI's services for embedding and chat API                                    | You can find your OpenAI API key [here](https://platform.openai.com/account/api-keys). You might need to login or register to OpenAI services                                |
+</details>
+
 
 ## Quickstart
 
@@ -118,22 +152,30 @@ To insert data into the knowledge base, you can create a list of documents and u
 
 ```python
 from canopy.models.data_models import Document
-documents = [Document(id="1", text="U2 are an Irish rock band from Dublin, formed in 1976.", source="https://url.com"),
-             Document(id="2", text="Arctic Monkeys are an English rock band formed in Sheffield in 2002.", source="https://another-url.com", metadata={"my-key": "my-value"})]
+documents = [Document(id="1",
+                      text="U2 are an Irish rock band from Dublin, formed in 1976.",
+                      source="https://en.wikipedia.org/wiki/U2"),
+             Document(id="2",
+                      text="Arctic Monkeys are an English rock band formed in Sheffield in 2002.",
+                      source="https://en.wikipedia.org/wiki/Arctic_Monkeys",
+                      metadata={"my-key": "my-value"})]
 kb.upsert(documents)
 ```
 
 Now you can query the knowledge base with the `query` method to find the most similar documents to a given text:
 
 ```python
-from canopy.models.query_models import Query
-results = kb.query([Query("Arctic Monkeys music genre"),
+from canopy.models.data_models import Query
+results = kb.query([Query(text="Arctic Monkeys music genre"),
                     Query(text="U2 music genre",
                           top_k=10,
                           metadata_filter={"my-key": "my-value"})])
 
 print(results[0].documents[0].text)
 # output: Arctic Monkeys are an English rock band formed in Sheffield in 2002.
+
+print(f"score - {results[0].documents[0].score:.4f}")
+# output: score - 0.8942
 ```
 
 ### Step 4: Create a context engine
@@ -153,14 +195,32 @@ context_engine = ContextEngine(kb)
 Then, you can use the `query` method to retrieve the most relevant context for a given query and token budget:
 
 ```python
-result = context_engine.query([Query("Arctic Monkeys music genre")], token_budget=100)
+import json
 
-print(result.content)
-# output: Arctic Monkeys are an English rock band formed in Sheffield in 2002.
+result = context_engine.query([Query(text="Arctic Monkeys music genre")], max_context_tokens=100)
 
-print(result.token_count)
-# output: 17
+print(json.dumps(json.loads(result.to_text()), indent=2, ensure_ascii=False))
+print(f"\n# tokens in context returned: {result.num_tokens}")
 ```
+output:
+```json
+{
+  "query": "Arctic Monkeys music genre",
+  "snippets": [
+    {
+      "source": "https://en.wikipedia.org/wiki/Arctic_Monkeys",
+      "text": "Arctic Monkeys are an English rock band formed in Sheffield in 2002."
+    },
+    {
+      "source": "https://en.wikipedia.org/wiki/U2",
+      "text": "U2 are an Irish rock band from Dublin, formed in 1976."
+    }
+  ]
+}
+
+# tokens in context returned: 89
+```
+
 
 By default, to handle the token budget constraint, the context engine will use the `StuffingContextBuilder` that will stuff as many documents as possible into the context without exceeding the token budget, by the order they have been retrieved from the knowledge base.
 
@@ -190,8 +250,13 @@ chat_engine = ChatEngine(context_engine)
 Then, you can start chatting!
 
 ```python
-chat_engine.chat("what is the genre of Arctic Monkeys band?")
-# output: Arctic Monkeys is a rock band.
+from canopy.models.data_models import MessageBase
+
+response = chat_engine.chat(messages=[MessageBase(role="user", content="what is the genre of Arctic Monkeys band?")], stream=False)
+
+print(response.choices[0].message.content)
+
+# output: The genre of the Arctic Monkeys band is rock. Source: [Wikipedia](https://en.wikipedia.org/wiki/Arctic_Monkeys)
 ```
 
 
diff --git a/src/canopy/knowledge_base/knowledge_base.py b/src/canopy/knowledge_base/knowledge_base.py
index 4d0c5ae2..5ca38085 100644
--- a/src/canopy/knowledge_base/knowledge_base.py
+++ b/src/canopy/knowledge_base/knowledge_base.py
@@ -446,7 +446,7 @@ def _query_index(self,
                                    sparse_vector=query.sparse_values,
                                    top_k=top_k,
                                    namespace=query.namespace,
-                                   metadata_filter=metadata_filter,
+                                   filter=metadata_filter,
                                    include_metadata=True,
                                    _check_return_type=_check_return_type,
                                    **query_params)
diff --git a/src/canopy/knowledge_base/record_encoder/openai.py b/src/canopy/knowledge_base/record_encoder/openai.py
index 3a1ec728..ce18a3f8 100644
--- a/src/canopy/knowledge_base/record_encoder/openai.py
+++ b/src/canopy/knowledge_base/record_encoder/openai.py
@@ -23,6 +23,7 @@ def __init__(self,
         super().__init__(dense_encoder=encoder, batch_size=batch_size, **kwargs)
 
     @retry(
+        reraise=True,
         wait=wait_random_exponential(min=1, max=10),
         stop=stop_after_attempt(3),
         retry=retry_if_exception_type(OPEN_AI_TRANSIENT_EXCEPTIONS),
diff --git a/src/canopy/llm/openai.py b/src/canopy/llm/openai.py
index 812fc5e0..4cb3d43f 100644
--- a/src/canopy/llm/openai.py
+++ b/src/canopy/llm/openai.py
@@ -30,6 +30,7 @@ def available_models(self):
         return [k["id"] for k in openai.Model.list().data]
 
     @retry(
+        reraise=True,
         wait=wait_random_exponential(min=1, max=10),
         stop=stop_after_attempt(3),
         retry=retry_if_exception_type(OPEN_AI_TRANSIENT_EXCEPTIONS),
@@ -66,6 +67,7 @@ def streaming_iterator(response):
         return ChatResponse(**response)
 
     @retry(
+        reraise=True,
         wait=wait_random_exponential(min=1, max=10),
         stop=stop_after_attempt(3),
         retry=retry_if_exception_type(
diff --git a/src/canopy_cli/cli.py b/src/canopy_cli/cli.py
index b3ba714d..7fcf8757 100644
--- a/src/canopy_cli/cli.py
+++ b/src/canopy_cli/cli.py
@@ -64,7 +64,7 @@ def check_service_health(url: str):
         raise CLIError(msg)
 
 
-@retry(wait=wait_fixed(5), stop=stop_after_attempt(6))
+@retry(reraise=True, wait=wait_fixed(5), stop=stop_after_attempt(6))
 def wait_for_service(chat_service_url: str):
     check_service_health(chat_service_url)
 
@@ -193,7 +193,7 @@ def new(index_name: str, config: Optional[str]):
     help=(
         """
         \b
-        Upload local data files containing documents to the Canopy service.
+        Upload local data files to the Canopy service.
 
         Load all the documents from data file or a directory containing multiple data files.
         The allowed formats are .jsonl and .parquet.
@@ -297,6 +297,7 @@ def upsert(index_name: str,
                 raise CLIError(msg)
 
         pbar.update(len(batch))
+    pbar.close()
 
     if failed_docs:
         msg = (
diff --git a/tests/system/knowledge_base/test_knowledge_base.py b/tests/system/knowledge_base/test_knowledge_base.py
index 8095239e..643b81b7 100644
--- a/tests/system/knowledge_base/test_knowledge_base.py
+++ b/tests/system/knowledge_base/test_knowledge_base.py
@@ -71,7 +71,7 @@ def knowledge_base(index_full_name, index_name, chunker, encoder):
     kb = KnowledgeBase(index_name=index_name,
                        record_encoder=encoder,
                        chunker=chunker)
-    kb.create_canopy_index()
+    kb.create_canopy_index(indexed_fields=["my-key"])
 
     return kb
 
@@ -139,6 +139,18 @@ def execute_and_assert_queries(knowledge_base, chunks_to_query):
             f"actual: {q_res.documents}"
 
 
+def assert_query_metadata_filter(knowledge_base: KnowledgeBase,
+                                 metadata_filter: dict,
+                                 num_vectors_expected: int,
+                                 top_k: int = 100):
+    assert top_k > num_vectors_expected, \
+        "the test might return false positive if top_k is not > num_vectors_expected"
+    query = Query(text="test", top_k=top_k, metadata_filter=metadata_filter)
+    query_results = knowledge_base.query([query])
+    assert len(query_results) == 1
+    assert len(query_results[0].documents) == num_vectors_expected
+
+
 @pytest.fixture(scope="module", autouse=True)
 def teardown_knowledge_base(index_full_name, knowledge_base):
     yield
@@ -162,7 +174,7 @@ def documents(random_texts):
     return [Document(id=f"doc_{i}",
                      text=random_texts[i],
                      source=f"source_{i}",
-                     metadata={"test": i})
+                     metadata={"my-key": f"value-{i}"})
             for i in range(5)]
 
 
@@ -170,7 +182,7 @@ def documents(random_texts):
 def documents_large():
     return [Document(id=f"doc_{i}_large",
                      text=f"Sample document {i}",
-                     metadata={"test": i})
+                     metadata={"my-key-large": f"value-{i}"})
             for i in range(1000)]
 
 
@@ -249,6 +261,10 @@ def test_query(knowledge_base, encoded_chunks):
     execute_and_assert_queries(knowledge_base, encoded_chunks)
 
 
+def test_query_with_metadata_filter(knowledge_base, encoded_chunks):
+    assert_query_metadata_filter(knowledge_base, {"my-key": "value-1"}, 2)
+
+
 def test_delete_documents(knowledge_base, encoded_chunks):
     chunk_ids = [chunk.id for chunk in encoded_chunks[-4:]]
     doc_ids = set(doc.document_id for doc in encoded_chunks[-4:])