From c7ba76cd2ad7d2f4432eee9adacf0b1c0978181e Mon Sep 17 00:00:00 2001 From: Joshua Ching Date: Mon, 23 Oct 2023 18:32:17 +0800 Subject: [PATCH] added tests and fixed datatype error arising from passing dict to Document --- llama_hub/pdb/base.py | 5 ++++- tests/test_pdb/test_pdb.py | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 tests/test_pdb/test_pdb.py diff --git a/llama_hub/pdb/base.py b/llama_hub/pdb/base.py index 2fff159266..130849bec8 100644 --- a/llama_hub/pdb/base.py +++ b/llama_hub/pdb/base.py @@ -25,9 +25,12 @@ def load_data(self, pdb_ids: List[str]) -> List[Document]: title, abstracts = get_pdb_abstract(pdb_id) primary_citation = abstracts[title] abstract = primary_citation["abstract"] + abstract_text = "\n".join( + ["\n".join([str(k), str(v)]) for k, v in abstract.items()] + ) results.append( Document( - text=abstract, + text=abstract_text, extra_info={"pdb_id": pdb_id, "primary_citation": primary_citation}, ) ) diff --git a/tests/test_pdb/test_pdb.py b/tests/test_pdb/test_pdb.py new file mode 100644 index 0000000000..84fca8a423 --- /dev/null +++ b/tests/test_pdb/test_pdb.py @@ -0,0 +1,20 @@ +import pytest +from llama_hub.pdb.base import PdbAbstractReader +from llama_index.readers.schema.base import Document + + +@pytest.mark.parametrize("pdb_ids", [["1cbs", "125L"]]) # Example PDB ids to test +def test_load_data(pdb_ids): + # Create an instance of the PdbAbstractReader class + reader = PdbAbstractReader() + + # Call the load_data method with the test PDB ids + documents = reader.load_data(pdb_ids) + + # Assert that the returned documents have the expected structure + assert isinstance(documents, list) + assert all(isinstance(doc, Document) for doc in documents) + assert all(doc.text is not None for doc in documents) + assert all(isinstance(doc.extra_info, dict) for doc in documents) + assert all("pdb_id" in doc.extra_info for doc in documents) + assert all("primary_citation" in doc.extra_info for doc in documents)