Skip to content
This repository has been archived by the owner on Mar 1, 2024. It is now read-only.

Commit

Permalink
after make lint
Browse files Browse the repository at this point in the history
  • Loading branch information
Ferdinando Simonetti committed Feb 9, 2024
1 parent e2353ee commit 01f048a
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 11 deletions.
24 changes: 16 additions & 8 deletions llama_hub/docugami/docugami.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -81,16 +81,16 @@
"source": [
"from base import DocugamiReader\n",
"\n",
"docset_id=\"tjwrr2ekqkc3\"\n",
"docset_name=\"SEC 10-Q reports\"\n",
"document_ids=[\"ui7pkriyckwi\", \"1be3o7ch10iy\"]\n",
"docset_id = \"tjwrr2ekqkc3\"\n",
"docset_name = \"SEC 10-Q reports\"\n",
"document_ids = [\"ui7pkriyckwi\", \"1be3o7ch10iy\"]\n",
"\n",
"reader = DocugamiReader()\n",
"chunks = reader.load_data(docset_id=docset_id, document_ids=document_ids)\n",
"\n",
"for chunk in chunks[:5]:\n",
" print(chunk)\n",
" print(\"*\"*32)"
" print(\"*\" * 32)"
]
},
{
Expand Down Expand Up @@ -164,7 +164,7 @@
}
],
"source": [
"reader.min_text_length = 1024 * 4 # ~1k tokens\n",
"reader.min_text_length = 1024 * 4 # ~1k tokens\n",
"reader.max_text_length = 1024 * 24 # ~6k tokens\n",
"reader.include_xml_tags = True\n",
"chunks = reader.load_data(docset_id=docset_id)\n",
Expand Down Expand Up @@ -236,7 +236,9 @@
],
"source": [
"# Try out the query engine with example query\n",
"response = query_engine.query(\"How much did Microsoft spend for opex in the latest quarter?\")\n",
"response = query_engine.query(\n",
" \"How much did Microsoft spend for opex in the latest quarter?\"\n",
")\n",
"print(response.response)"
]
},
Expand Down Expand Up @@ -317,7 +319,9 @@
"response = query_engine.query(\n",
" \"What was Microsoft's weighted average discount rate for operating leases as of March 2023?\"\n",
")\n",
"print(response.response) # the correct answer should be 2.7%, listed on page 24 of \"2023 Q2 MSFT.pdf\""
"print(\n",
" response.response\n",
") # the correct answer should be 2.7%, listed on page 24 of \"2023 Q2 MSFT.pdf\""
]
},
{
Expand Down Expand Up @@ -428,7 +432,11 @@
"outputs": [],
"source": [
"from llama_index.indices.vector_store.retrievers import VectorIndexAutoRetriever\n",
"from llama_index.vector_stores.types import MetadataInfo, VectorStoreInfo, VectorStoreQueryMode\n",
"from llama_index.vector_stores.types import (\n",
" MetadataInfo,\n",
" VectorStoreInfo,\n",
" VectorStoreQueryMode,\n",
")\n",
"from llama_index.query_engine import RetrieverQueryEngine\n",
"\n",
"EXCLUDE_KEYS = [\"id\", \"xpath\", \"structure\", \"name\", \"tag\"]\n",
Expand Down
8 changes: 5 additions & 3 deletions llama_hub/microsoft_sharepoint/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def __init__(
client_secret: str,
tenant_id: str,
filename_as_id: bool = False,
file_extractor: Optional[Dict[str, Union[str, BaseReader]]] = None
file_extractor: Optional[Dict[str, Union[str, BaseReader]]] = None,
) -> None:
"""
Initializes an instance of SharePoint reader.
Expand Down Expand Up @@ -350,9 +350,11 @@ def get_metadata(filename: str) -> Any:
simple_directory_reader = download_loader("SimpleDirectoryReader")

simple_loader = simple_directory_reader(
download_dir, file_metadata=get_metadata, recursive=recursive,
download_dir,
file_metadata=get_metadata,
recursive=recursive,
filename_as_id=self.filename_as_id,
file_extractor=self.file_extractor
file_extractor=self.file_extractor,
)
documents = simple_loader.load_data()
return documents
Expand Down

0 comments on commit 01f048a

Please sign in to comment.