Configured pydoclint + fixed all issues

Should be working from now on
pinecone-io · Oct 26, 2023 · 62865a8 · 62865a8
1 parent c677fe4
commit 62865a8
Show file tree

Hide file tree

Showing 4 changed files with 64 additions and 29 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -68,5 +68,13 @@ per-file-ignores = [
 exclude = ['.venv']
 max-line-length = 88
 
+# PyDocLint configuration
+style = 'google'
+arg-type-hints-in-docstring = false
+require-return-section-when-returning-nothing = false
+allow-init-docstring = true
+check-return-types = false
+skip-checking-raises = true
+
 [tool.poetry.scripts]
 canopy = "canopy_cli.cli:cli"
diff --git a/src/canopy/knowledge_base/chunker/langchain_text_splitter.py b/src/canopy/knowledge_base/chunker/langchain_text_splitter.py
@@ -38,17 +38,7 @@ def _split_text_with_regex(
 
 
 class TextSplitter(ABC):
-    """Interface for splitting text into chunks.
-
-        Args:
-        chunk_size: Maximum size of chunks to return
-        chunk_overlap: Overlap in characters between chunks
-        length_function: Function that measures the length of given chunks
-        keep_separator: Whether to keep the separator in the chunks
-        add_start_index: If `True`, includes chunk's start index in metadata
-        strip_whitespace: If `True`, strips whitespace from the start and end of
-                          every document
-    """
+    """Interface for splitting text into chunks."""
 
     def __init__(
         self,
@@ -59,6 +49,17 @@ def __init__(
         add_start_index: bool = False,
         strip_whitespace: bool = True,
     ) -> None:
+        """Create a new TextSplitter.
+
+        Args:
+            chunk_size: Maximum size of chunks to return
+            chunk_overlap: Overlap in characters between chunks
+            length_function: Function that measures the length of given chunks
+            keep_separator: Whether to keep the separator in the chunks
+            add_start_index: If `True`, includes chunk's start index in metadata
+            strip_whitespace: If `True`, strips whitespace from the start and end of
+                              every document
+        """
         if chunk_overlap > chunk_size:
             raise ValueError(
                 f"Got a larger chunk overlap ({chunk_overlap}) than chunk size "

diff --git a/src/canopy/knowledge_base/knowledge_base.py b/src/canopy/knowledge_base/knowledge_base.py
@@ -52,16 +52,6 @@ class KnowledgeBase(BaseKnowledgeBase):
     When creating a new Canopy service, the user must first create the underlying Pinecone index.
     This is a one-time setup process - the index will exist on Pinecone's managed service until it is deleted.
 
-    Args:
-        index_name: The name of the underlying Pinecone index.
-        record_encoder: An instance of RecordEncoder to use for encoding documents and queries.
-                                                  Defaults to OpenAIRecordEncoder.
-        chunker: An instance of Chunker to use for chunking documents. Defaults to MarkdownChunker.
-        reranker: An instance of Reranker to use for reranking query results. Defaults to TransparentReranker.
-        default_top_k: The default number of document chunks to return per query. Defaults to 5.
-        index_params: A dictionary of parameters to pass to the index creation API. Defaults to None.
-                      see https://docs.pinecone.io/docs/python-client#create_index
-
     Example:
         >>> from canopy.knowledge_base.knowledge_base import KnowledgeBase
         >>> from tokenizer import Tokenizer
@@ -89,6 +79,44 @@ def __init__(self,
                  reranker: Optional[Reranker] = None,
                  default_top_k: int = 5,
                  ):
+        """
+        Initilize the knowledge base object.
+
+        If the index does not exist, the user must first create it by calling `create_canopy_index()` or the CLI command `canopy new`.
+
+        Note: Canopy will add the prefix --canopy to your selected index name.
+             You can retrieve the full index name knowledge_base.index_name at any time, or find it in the Pinecone console at https://app.pinecone.io/
+
+        Example:
+
+            create a new index:
+            >>> from canopy.knowledge_base.knowledge_base import KnowledgeBase
+            >>> from tokenizer import Tokenizer
+            >>> Tokenizer.initialize()
+            >>> kb = KnowledgeBase(index_name="my_index")
+            >>> kb.create_canopy_index()
+
+        In any future interactions,
+        the user simply needs to connect to the existing index:
+
+            >>> kb = KnowledgeBase(index_name="my_index")
+            >>> kb.connect()
+
+        Args:
+            index_name: The name of the underlying Pinecone index.
+            record_encoder: An instance of RecordEncoder to use for encoding documents and queries.
+                                                      Defaults to OpenAIRecordEncoder.
+            chunker: An instance of Chunker to use for chunking documents. Defaults to MarkdownChunker.
+            reranker: An instance of Reranker to use for reranking query results. Defaults to TransparentReranker.
+            default_top_k: The default number of document chunks to return per query. Defaults to 5.
+
+        Raises:
+            ValueError: If default_top_k is not a positive integer.
+            TypeError: If record_encoder is not an instance of RecordEncoder.
+            TypeError: If chunker is not an instance of Chunker.
+            TypeError: If reranker is not an instance of Reranker.
+
+        """  # noqa: E501
         if default_top_k < 1:
             raise ValueError("default_top_k must be greater than 0")
 
@@ -243,8 +271,6 @@ def create_canopy_index(self,
                          For example, you can set the index's number of replicas by passing {"replicas": 2}.
                          see https://docs.pinecone.io/docs/python-client#create_index
 
-        Returns:
-            None
         """  # noqa: E501
         # validate inputs
         if indexed_fields is None:
@@ -464,8 +490,6 @@ def upsert(self,
                         Defaults to 100.
             show_progress_bar: Whether to show a progress bar while upserting the documents.
 
-        Returns:
-            None
 
         Example:
             >>> from canopy.knowledge_base.knowledge_base import KnowledgeBase

diff --git a/src/canopy/knowledge_base/record_encoder/base.py b/src/canopy/knowledge_base/record_encoder/base.py
@@ -10,14 +10,16 @@ class RecordEncoder(ABC, ConfigurableMixin):
     """
     Base class for all encoders. Encoders are used to encode documents' and queries'
     text into vectors.
-
-        Args:
-            batch_size: The number of documents or queries to encode at once.
-            Defaults to 1.
     """
 
     def __init__(self, batch_size: int = 1):
+        """
+        Initialize the encoder.
 
+        Args:
+            batch_size: The number of documents or queries to encode at once.
+                        Defaults to 1.
+        """
         self.batch_size = batch_size
 
     @abstractmethod