diff --git a/llama_hub/docugami/docugami.ipynb b/llama_hub/docugami/docugami.ipynb index aec2bcff0d..b390c3f643 100644 --- a/llama_hub/docugami/docugami.ipynb +++ b/llama_hub/docugami/docugami.ipynb @@ -81,16 +81,16 @@ "source": [ "from base import DocugamiReader\n", "\n", - "docset_id=\"tjwrr2ekqkc3\"\n", - "docset_name=\"SEC 10-Q reports\"\n", - "document_ids=[\"ui7pkriyckwi\", \"1be3o7ch10iy\"]\n", + "docset_id = \"tjwrr2ekqkc3\"\n", + "docset_name = \"SEC 10-Q reports\"\n", + "document_ids = [\"ui7pkriyckwi\", \"1be3o7ch10iy\"]\n", "\n", "reader = DocugamiReader()\n", "chunks = reader.load_data(docset_id=docset_id, document_ids=document_ids)\n", "\n", "for chunk in chunks[:5]:\n", " print(chunk)\n", - " print(\"*\"*32)" + " print(\"*\" * 32)" ] }, { @@ -164,7 +164,7 @@ } ], "source": [ - "reader.min_text_length = 1024 * 4 # ~1k tokens\n", + "reader.min_text_length = 1024 * 4 # ~1k tokens\n", "reader.max_text_length = 1024 * 24 # ~6k tokens\n", "reader.include_xml_tags = True\n", "chunks = reader.load_data(docset_id=docset_id)\n", @@ -236,7 +236,9 @@ ], "source": [ "# Try out the query engine with example query\n", - "response = query_engine.query(\"How much did Microsoft spend for opex in the latest quarter?\")\n", + "response = query_engine.query(\n", + " \"How much did Microsoft spend for opex in the latest quarter?\"\n", + ")\n", "print(response.response)" ] }, @@ -317,7 +319,9 @@ "response = query_engine.query(\n", " \"What was Microsoft's weighted average discount rate for operating leases as of March 2023?\"\n", ")\n", - "print(response.response) # the correct answer should be 2.7%, listed on page 24 of \"2023 Q2 MSFT.pdf\"" + "print(\n", + " response.response\n", + ") # the correct answer should be 2.7%, listed on page 24 of \"2023 Q2 MSFT.pdf\"" ] }, { @@ -428,7 +432,11 @@ "outputs": [], "source": [ "from llama_index.indices.vector_store.retrievers import VectorIndexAutoRetriever\n", - "from llama_index.vector_stores.types import MetadataInfo, VectorStoreInfo, VectorStoreQueryMode\n", + "from llama_index.vector_stores.types import (\n", + " MetadataInfo,\n", + " VectorStoreInfo,\n", + " VectorStoreQueryMode,\n", + ")\n", "from llama_index.query_engine import RetrieverQueryEngine\n", "\n", "EXCLUDE_KEYS = [\"id\", \"xpath\", \"structure\", \"name\", \"tag\"]\n", diff --git a/llama_hub/microsoft_sharepoint/base.py b/llama_hub/microsoft_sharepoint/base.py index c37c1ce07d..318dae08fc 100644 --- a/llama_hub/microsoft_sharepoint/base.py +++ b/llama_hub/microsoft_sharepoint/base.py @@ -29,7 +29,7 @@ def __init__( client_secret: str, tenant_id: str, filename_as_id: bool = False, - file_extractor: Optional[Dict[str, Union[str, BaseReader]]] = None + file_extractor: Optional[Dict[str, Union[str, BaseReader]]] = None, ) -> None: """ Initializes an instance of SharePoint reader. @@ -350,9 +350,11 @@ def get_metadata(filename: str) -> Any: simple_directory_reader = download_loader("SimpleDirectoryReader") simple_loader = simple_directory_reader( - download_dir, file_metadata=get_metadata, recursive=recursive, + download_dir, + file_metadata=get_metadata, + recursive=recursive, filename_as_id=self.filename_as_id, - file_extractor=self.file_extractor + file_extractor=self.file_extractor, ) documents = simple_loader.load_data() return documents