run-llama · EmanuelCampos · Oct 25, 2023 · Oct 25, 2023 · Oct 25, 2023
diff --git a/llama_hub/library.json b/llama_hub/library.json
@@ -1074,5 +1074,14 @@
       "microsoft 365",
       "microsoft365"
     ]
+  },
+  "TrafilaturaWebReader": {
+    "id": "web/trafilatura_web",
+    "author": "NA",
+    "keywords":[
+      "trafilatura",
+      "web",
+      "web reader"
+    ]
   }
 }
diff --git a/llama_hub/web/trafilatura_web/base.py b/llama_hub/web/trafilatura_web/base.py
@@ -1,4 +1,5 @@
 from typing import List
+from importlib.util import find_spec
 
 from llama_index.readers.base import BaseReader
 from llama_index.readers.schema.base import Document
@@ -12,6 +13,14 @@ class TrafilaturaWebReader(BaseReader):
 
     """
 
+    def __init__(self) -> None:
+
+        if find_spec("trafilatura") is None:
+            raise ImportError(
+                "Missing package: trafilatura.\n"
+                "Please `pip install trafilatura` to use this Reader"
+            )
+
     def load_data(self, urls: List[str]) -> List[Document]:
         """Load data from the urls.