[pre-commit.ci] pre-commit autoupdate (#52)

* [pre-commit.ci] pre-commit autoupdate updates: - [github.com/psf/black: 24.4.2 → 24.10.0](psf/black@24.4.2...24.10.0) - [github.com/pycqa/flake8: 7.1.0 → 7.1.1](PyCQA/flake8@7.1.0...7.1.1) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * correct linter issues --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Inga Ulusoy <[email protected]>
ssciwr · Jan 7, 2025 · 873c0a7 · 873c0a7
1 parent b9d14f8
commit 873c0a7
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 41 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,9 +1,9 @@
 repos:
   - repo: https://github.com/psf/black
-    rev: 24.4.2
+    rev: 24.10.0
     hooks:
     - id: black
   -   repo: https://github.com/pycqa/flake8
-      rev: 7.1.0 
+      rev: 7.1.1 
       hooks:
       -   id: flake8
diff --git a/mailcom/inout.py b/mailcom/inout.py
@@ -4,51 +4,62 @@
 from bs4 import BeautifulSoup
 from dicttoxml import dicttoxml
 
+
 class InoutHandler:
     def __init__(self, directory_name: str):
         """Constructor for the InoutHandler class.
-        
-        Args: 
+
+        Args:
             directory_name (str): The directory where the files are located.
-        """        
+        """
         self.directory_name = directory_name
         # presets
         self.pattern = [".eml", ".html"]
 
     def list_of_files(self):
-        """Method to create a list of Path objects (files) that are present 
+        """Method to create a list of Path objects (files) that are present
         in a directory."""
-        if not os.path.exists(self.directory_name):  # check if given dir exists raises error otherwise
+        if not os.path.exists(
+            self.directory_name
+        ):  # check if given dir exists raises error otherwise
             raise OSError("Path {} does not exist".format(self.directory_name))
         mypath = Path(self.directory_name)
-        self.email_list = [mp.resolve() for mp in mypath.glob("**/*") if mp.suffix in self.pattern]
+        self.email_list = [
+            mp.resolve() for mp in mypath.glob("**/*") if mp.suffix in self.pattern
+        ]
         if len(self.email_list) == 0:
-            raise ValueError("The directory {} does not contain .eml or .html files. Please check that the directory is containing the email data files".format(mypath))
+            raise ValueError(
+                """The directory {} does not contain .eml or .html files.
+                Please check that the directory is containing the email
+                data files""".format(
+                    mypath
+                )
+            )
 
     def get_html_text(self, text_check: str) -> str:
         """Clean up a string if it contains html content.
         Args:
             text_check (str): The string that may contain html content.
-            
+
         Returns:
             str: The (potentially) cleaned up string."""
-        soup = BeautifulSoup(text_check , 'html.parser')
+        soup = BeautifulSoup(text_check, "html.parser")
         if soup.find():
             text_check = soup.get_text()
         return text_check
 
     def get_text(self, file: Path) -> str:
         """Function to extract the textual content and other metadata from an email file.
-        
+
         Args:
             file (Path): The path to the email file.
-            
+
         Returns:
-            str: The textual content of the email. In the future, this will return the 
+            str: The textual content of the email. In the future, this will return the
             complete dictionary with the metadata."""
-        if not file.is_file(): # check if given file exists raises error otherwise
+        if not file.is_file():  # check if given file exists raises error otherwise
             raise OSError("File {} does not exist".format(file))
-        with open(file, 'rb') as fhdl:
+        with open(file, "rb") as fhdl:
             raw_email = fhdl.read()
         ep = eml_parser.EmlParser(include_raw_body=True)
         parsed_eml = ep.decode_email_bytes(raw_email)
@@ -57,28 +68,32 @@ def get_text(self, file: Path) -> str:
         attachments = len(parsed_eml["attachment"]) if "attachment" in parsed_eml else 0
         # find the types of attachements
         if attachments > 0:
-            attachmenttypes = [parsed_eml["attachment"][i]["extension"] for i in range(attachments)]
-        self.email_content = {"content": parsed_eml["body"][0]["content"], 
-                    "date": parsed_eml["header"]["date"], 
-                    "attachment": attachments, 
-                    "attachement type": attachmenttypes
-                    }
-        return(self.email_content["content"])
+            attachmenttypes = [
+                parsed_eml["attachment"][i]["extension"] for i in range(attachments)
+            ]
+        self.email_content = {
+            "content": parsed_eml["body"][0]["content"],
+            "date": parsed_eml["header"]["date"],
+            "attachment": attachments,
+            "attachement type": attachmenttypes,
+        }
+        return self.email_content["content"]
 
     def validate_data(self):
         pass
-    
+
     def data_to_xml(self, text):
-        my_item_func = lambda x: 'content'
-        xml = dicttoxml(text, custom_root='email', item_func = my_item_func)
+        def my_item_func(x):
+            return "content"
+
+        xml = dicttoxml(text, custom_root="email", item_func=my_item_func)
         return xml.decode()
 
-    def write_file(self, text: str, name: str)-> None:
+    def write_file(self, text: str, name: str) -> None:
         """Write the extracted string to a text file.
 
         Args:
             text (str): The string to be written to the file.
             name (str): The name of the file to be written."""
         with open("{}.out".format(name), "w") as file:
             file.write(text)
-
diff --git a/mailcom/test/test_inout.py b/mailcom/test/test_inout.py
@@ -11,12 +11,14 @@
 XML_PATH = Path(pkg / "test" / "data" / "test.out")
 
 TEXT_REF = "J'espère que tu vas bien!"
-XML_REF = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?><email><content type=\"str\">"
+XML_REF = '<?xml version="1.0" encoding="UTF-8" ?><email><content type="str">'
+
 
 @pytest.fixture()
 def get_instant(tmp_path):
     return inout.InoutHandler(tmp_path)
 
+
 def test_list_of_files(get_instant):
     with pytest.raises(ValueError):
         get_instant.list_of_files()
@@ -34,31 +36,37 @@ def test_list_of_files(get_instant):
     get_instant.list_of_files()
     assert get_instant.directory_name / "test3.xml" not in get_instant.email_list
 
+
 def test_get_text(get_instant):
     p = get_instant.directory_name / "test.eml"
     p.write_text("test")
     extracted_text = get_instant.get_text(p)
-    assert extracted_text == 'test'
+    assert extracted_text == "test"
     text = get_instant.get_text(FILE_PATH)
     assert text[0:25] == TEXT_REF
-    assert get_instant.email_content["date"] == datetime.datetime(2024, 4, 17, 15, 13, 56, tzinfo=datetime.timezone.utc)
+    assert get_instant.email_content["date"] == datetime.datetime(
+        2024, 4, 17, 15, 13, 56, tzinfo=datetime.timezone.utc
+    )
     assert get_instant.email_content["attachment"] == 2
-    assert get_instant.email_content["attachement type"] == ['jpg', 'jpg']
+    assert get_instant.email_content["attachement type"] == ["jpg", "jpg"]
     with pytest.raises(OSError):
         get_instant.get_text(get_instant.directory_name / "nonexisting.eml")
 
+
 def test_get_html_text(get_instant):
     html = """<html><head><title>Test</title></head></html>"""
-    assert get_instant.get_html_text(html) == 'Test'
+    assert get_instant.get_html_text(html) == "Test"
     noHtml = """Test"""
-    assert get_instant.get_html_text(noHtml) == 'Test'
-
-def test_data_to_xml(get_instant,tmp_path):
-    xml_content = {"content": "This is nothing more than a test", 
-                    "date": "2024-04-17T15:13:56+00:00", 
-                    "attachment": 2, 
-                    "attachement type": {'jpg', 'jpg'}
-                    }
+    assert get_instant.get_html_text(noHtml) == "Test"
+
+
+def test_data_to_xml(get_instant, tmp_path):
+    xml_content = {
+        "content": "This is nothing more than a test",
+        "date": "2024-04-17T15:13:56+00:00",
+        "attachment": 2,
+        "attachement type": {"jpg", "jpg"},
+    }
     xml = get_instant.data_to_xml(xml_content)
     get_instant.write_file(xml, tmp_path / "test")
     assert filecmp.cmp(XML_PATH, tmp_path / "test.out")