Skip to content
This repository has been archived by the owner on Mar 1, 2024. It is now read-only.

Commit

Permalink
feat: allow use of self-signed cert for MinIO server with MinioReader (
Browse files Browse the repository at this point in the history
…#935)

* feat: add the ability to use self-signed cert for MinIO server with MinioReader

* moved 'import urllib3' away from the if block in MinioReader's base.py

* moved urllib3 import inside load_data() for MinioReader

* minio-client base.py fix; docugami.ipynb

---------

Co-authored-by: Ferdinando Simonetti <[email protected]>
  • Loading branch information
ferdinandosimonetti and Ferdinando Simonetti authored Feb 9, 2024
1 parent 4e1b743 commit ec27ff1
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 8 deletions.
24 changes: 16 additions & 8 deletions llama_hub/docugami/docugami.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -81,16 +81,16 @@
"source": [
"from base import DocugamiReader\n",
"\n",
"docset_id=\"tjwrr2ekqkc3\"\n",
"docset_name=\"SEC 10-Q reports\"\n",
"document_ids=[\"ui7pkriyckwi\", \"1be3o7ch10iy\"]\n",
"docset_id = \"tjwrr2ekqkc3\"\n",
"docset_name = \"SEC 10-Q reports\"\n",
"document_ids = [\"ui7pkriyckwi\", \"1be3o7ch10iy\"]\n",
"\n",
"reader = DocugamiReader()\n",
"chunks = reader.load_data(docset_id=docset_id, document_ids=document_ids)\n",
"\n",
"for chunk in chunks[:5]:\n",
" print(chunk)\n",
" print(\"*\"*32)"
" print(\"*\" * 32)"
]
},
{
Expand Down Expand Up @@ -164,7 +164,7 @@
}
],
"source": [
"reader.min_text_length = 1024 * 4 # ~1k tokens\n",
"reader.min_text_length = 1024 * 4 # ~1k tokens\n",
"reader.max_text_length = 1024 * 24 # ~6k tokens\n",
"reader.include_xml_tags = True\n",
"chunks = reader.load_data(docset_id=docset_id)\n",
Expand Down Expand Up @@ -236,7 +236,9 @@
],
"source": [
"# Try out the query engine with example query\n",
"response = query_engine.query(\"How much did Microsoft spend for opex in the latest quarter?\")\n",
"response = query_engine.query(\n",
" \"How much did Microsoft spend for opex in the latest quarter?\"\n",
")\n",
"print(response.response)"
]
},
Expand Down Expand Up @@ -317,7 +319,9 @@
"response = query_engine.query(\n",
" \"What was Microsoft's weighted average discount rate for operating leases as of March 2023?\"\n",
")\n",
"print(response.response) # the correct answer should be 2.7%, listed on page 24 of \"2023 Q2 MSFT.pdf\""
"print(\n",
" response.response\n",
") # the correct answer should be 2.7%, listed on page 24 of \"2023 Q2 MSFT.pdf\""
]
},
{
Expand Down Expand Up @@ -428,7 +432,11 @@
"outputs": [],
"source": [
"from llama_index.indices.vector_store.retrievers import VectorIndexAutoRetriever\n",
"from llama_index.vector_stores.types import MetadataInfo, VectorStoreInfo, VectorStoreQueryMode\n",
"from llama_index.vector_stores.types import (\n",
" MetadataInfo,\n",
" VectorStoreInfo,\n",
" VectorStoreQueryMode,\n",
")\n",
"from llama_index.query_engine import RetrieverQueryEngine\n",
"\n",
"EXCLUDE_KEYS = [\"id\", \"xpath\", \"structure\", \"name\", \"tag\"]\n",
Expand Down
9 changes: 9 additions & 0 deletions llama_hub/minio/minio-client/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def __init__(
file_metadata: Optional[Callable[[str], Dict]] = None,
minio_endpoint: Optional[str] = None,
minio_secure: bool = False,
minio_cert_check: bool = False,
minio_access_key: Optional[str] = None,
minio_secret_key: Optional[str] = None,
minio_session_token: Optional[str] = None,
Expand Down Expand Up @@ -59,6 +60,8 @@ def __init__(
minio_access_key (Optional[str]): The Minio access key. Default is None.
minio_secret_key (Optional[str]): The Minio secret key. Default is None.
minio_session_token (Optional[str]): The Minio session token.
minio_secure: MinIO server runs in TLS mode
minio_cert_check: allows the usage of a self-signed cert for MinIO server
"""
super().__init__(*args, **kwargs)

Expand All @@ -74,22 +77,28 @@ def __init__(

self.minio_endpoint = minio_endpoint
self.minio_secure = minio_secure
self.minio_cert_check = minio_cert_check
self.minio_access_key = minio_access_key
self.minio_secret_key = minio_secret_key
self.minio_session_token = minio_session_token

def load_data(self) -> List[Document]:
"""Load file(s) from Minio."""
from minio import Minio
import urllib3

minio_client = Minio(
self.minio_endpoint,
secure=self.minio_secure,
cert_check=self.minio_cert_check,
access_key=self.minio_access_key,
secret_key=self.minio_secret_key,
session_token=self.minio_session_token,
)

if not self.minio_cert_check:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

with tempfile.TemporaryDirectory() as temp_dir:
if self.key:
suffix = Path(self.key).suffix
Expand Down

0 comments on commit ec27ff1

Please sign in to comment.