Merge pull request #3 from MinuraPunchihewa/development

[PR] dev to main: added a descriptive README, supported more tasks and the use of DataFrames
MinuraPunchihewa · Apr 18, 2023 · fd1684b · fd1684b
2 parents 6c327e8 + 567257c
commit fd1684b
Show file tree

Hide file tree

Showing 19 changed files with 484 additions and 56 deletions.
diff --git a/README.md b/README.md
@@ -1 +1,103 @@
 # Hugging-Py-Face
+Hugging-Py-Face is a powerful Python package that provides seamless integration with the Hugging Face Inference API, allowing you to easily perform inference on your machine learning models hosted on the Hugging Face Model Hub. 
+
+One of the key benefits of using the Hugging Face Inference API is that it provides a scalable and efficient way to perform inference on your models, by allowing you to easily deploy and serve your models in the cloud. Additionally, the Inference API provides a simple and standardized API that can be used across different programming languages, making it easy to integrate your models with other services and tools.
+
+With Hugging-Py-Face, you can take advantage of these benefits while also enjoying the simplicity and flexibility of using Python.
+
+It allows you to easily customize your API requests, adjust request parameters, handle authentication and access tokens, and interact with a wide range of machine learning models hosted on the Hugging Face Model Hub.
+
+Overall, Hugging-Py-Face is an awesome tool for any machine learning developer or data scientist who wants to perform efficient and scalable inference on their models, while also enjoying the simplicity and flexibility of using Python. Whether you're working on a personal project or a large-scale enterprise application, Hugging-Py-Face can help you achieve your machine learning goals with ease.
+
+## Installation
+### With pip
+```
+pip install hugging_py_face
+```
+
+## Components
+ - NLP (Natural Language Processing): This component deals with processing and analyzing human language.  It includes various techniques such as text classification, text generation, summarization and many more.
+ - Computer Vision: This component deals with the analysis of visual data from the real world. It includes the image classification and object detection techniques.
+ - Audio Processing: This component deals with the analysis of audio signals. It includes the audio classification and speech recognition techniques.
+
+## Usage
+The library will first need to be configured with a User Access Tokens from the Hugging Face website.
+
+### NLP (Natural Language Processing)
+```
+from hugging_py_face import NLP
+
+# initialize the NLP class with the user access token
+nlp = NLP('hf_...')
+
+# perform text classification
+nlp.text_classification("I like you. I love you.")
+
+# perform object detection
+nlp.text_generation("The answer to the universe is")
+```
+
+The inputs to these methods can also be a list of strings. For example:
+```
+nlp.text_classification(["I like you. I love you.", "I hate you. I despise you."])
+```
+
+Additionally, the fill mask, summarization, text classification and text generation tasks can also be performed on a pandas DataFrame. For example:
+```
+nlp.text_classification_in_df(df, 'text')
+# where df is a pandas DataFrame and 'text' is the column name containing the text
+```
+
+### Computer Vision
+```
+from hugging_py_face import ComputerVision
+
+# initialize the ComputerVision class with the user access token
+cp = ComputerVision('hf_...')
+
+# perform image classification
+# the image can be a local file or a URL
+cp.image_classification("cats.jpg")
+
+# perform object detection
+# the image can be a local file or a URL
+cp.object_detection("cats.jpg")
+```
+
+The inputs to these methods can also be a list of images. For example:
+```
+cp.image_classification(["cats.jpg", "dogs.jpg"])
+```
+
+Additionally, the image classification task can also be performed on a pandas DataFrame. For example:
+```
+cp.image_classification_in_df(df, 'images')
+# where df is a pandas DataFrame and 'images' is the column name containing the image file paths or URLs
+```
+
+### Audio Processing
+```
+from hugging_py_face import AudioProcessing
+
+# initialize the AudioProcessing class with the user access token
+ap = AudioProcessing('hf_...')
+
+# perform audio classification
+# the audio file can be a local file or a URL
+ap.audio_classification("dogs.wav")
+
+# perform speech recognition
+# the audio file can be a local file or a URL
+ap.speech_recognition("dogs.wav")
+```
+
+The inputs to these methods can also be a list of audio files. For example:
+```
+ap.audio_classification(["dogs.wav", "cats.wav"])
+```
+
+Additionally, both of the above tasks can also be performed on a pandas DataFrame. For example:
+```
+ap.audio_classification_in_df(df, 'audio')
+# where df is a pandas DataFrame and 'audio' is the column name containing the audio file paths or URLs
+```
diff --git a/hugging_py_face/__about__.py b/hugging_py_face/__about__.py
@@ -1,10 +1,10 @@
 __title__ = 'hugging_py_face'
 __package_name__ = 'hugging_py_face'
-__version__ = '0.0.1'
-__description__ = "Hugging-Py-face, the Python client for the Hugging Face Inference API."
+__version__ = '0.1.0'
+__description__ = "Hugging-Py-Face, the Python client for the Hugging Face Inference API."
 __email__ = "[email protected]"
 __author__ = 'Minura Punchihewa'
-__github__ = 'https://github.com/MinuraPunchihewa/mindsdb-text-to-sql'
-__pypi__ = ''
+__github__ = 'https://github.com/MinuraPunchihewa/hugging_py_face'
+__pypi__ = 'https://pypi.org/project/hugging-py-face/'
 __license__ = ''
 __copyright__ = ''
diff --git a/hugging_py_face/__init__.py b/hugging_py_face/__init__.py
@@ -0,0 +1,3 @@
+from .nlp import NLP
+from .computer_vision import ComputerVision
+from .audio_processing import AudioProcessing
diff --git a/hugging_py_face/audio_processing.py b/hugging_py_face/audio_processing.py
@@ -1,27 +1,61 @@
-from typing import Text, List, Dict, Optional
+from pandas import DataFrame
+from typing import Text, List, Dict, Optional, Union
+
 from .multimedia_processing import MultimediaProcessing
 
 
 class AudioProcessing(MultimediaProcessing):
     def __init__(self, api_token):
         super().__init__(api_token)
 
-    def speech_recognition(self, input: Text, model: Optional[Text] = None) -> Dict:
+    def speech_recognition(self, inputs: Union[Text, List], model: Optional[Text] = None) -> Union[Dict, List]:
         """
         Perform speech recognition on an audio file from a file path or an url.
 
-        :param input: the file path or url to the audio file to perform speech recognition on
+        :param inputs: a string or a list of strings of the file paths or urls of the audio files to perform speech recognition on.
+        :param model: the model to use for the speech recognition task. If not provided, the recommended model from Hugging Face will be used.
+        :return: a dictionary or a list of dictionaries containing the text recognized from the audio file(s).
+        """
+        if type(inputs) == list:
+            return self._query_in_list(inputs, model=model, task="speech-recognition")
+        elif type(inputs) == str:
+            return self._query(inputs, model=model, task="speech-recognition")
+
+    def speech_recognition_in_df(self, df: DataFrame, column: Text, model: Optional[Text] = None) -> DataFrame:
+        """
+        Perform speech recognition on audio files from a DataFrame.
+
+        :param df: a pandas DataFrame containing the audio files to perform speech recognition on.
+        :param column: the name of the column containing the file paths or urls of the audio files to perform speech recognition on.
         :param model: the model to use for the speech recognition task. If not provided, the recommended model from Hugging Face will be used.
-        :return: the text transcription of the audio file
+        :return: a pandas DataFrame with the text recognized from the audio files. The text will be added as a new column called 'predictions' to the original DataFrame.
         """
-        return self._query(input, model=model, task="speech-recognition")
+        predictions = self._query_in_df(df, column, model=model, task="speech-recognition")
+        df["predictions"] = [prediction['text'] for prediction in predictions]
+        return df
 
-    def audio_classification(self, input: Text, model: Optional[Text] = None) -> List:
+    def audio_classification(self, inputs: Text, model: Optional[Text] = None) -> List:
         """
         Classify an audio file from a file path or an url.
 
-        :param input: the file path or url to the audio file to classify
+        :param inputs: a string or a list of strings of the file paths or urls of the audio files to classify.
+        :param model: the model to use for the audio classification task. If not provided, the recommended model from Hugging Face will be used.
+        :return: a list of dictionaries or a list of lists of dictionaries each containing the label and the confidence score for that label.
+        """
+        if type(inputs) == list:
+            return self._query_in_list(inputs, model=model, task="audio-classification")
+        elif type(inputs) == str:
+            return self._query(inputs, model=model, task="audio-classification")
+
+    def audio_classification_in_df(self, df: DataFrame, column: Text, model: Optional[Text] = None) -> DataFrame:
+        """
+        Classify audio files from a DataFrame.
+
+        :param df: a pandas DataFrame containing the audio files to classify.
+        :param column: the name of the column containing the file paths or urls of the audio files to classify.
         :param model: the model to use for the audio classification task. If not provided, the recommended model from Hugging Face will be used.
-        :return: a list containing the labels and the confidence score for each label
+        :return: a pandas DataFrame with the label for the audio files. Each label added will be the one with the highest confidence score for that particular audio file. The label will be added as a new column called 'predictions' to the original DataFrame.
         """
-        return self._query(input, model=model, task="audio-classification")
+        predictions = self._query_in_df(df, column, model=model, task="audio-classification")
+        df["predictions"] = [prediction[0]['label'] for prediction in predictions]
+        return df
diff --git a/hugging_py_face/computer_vision.py b/hugging_py_face/computer_vision.py
@@ -1,27 +1,48 @@
-from typing import Text, List, Optional
+from pandas import DataFrame
+from typing import Text, List, Optional, Union
+
 from .multimedia_processing import MultimediaProcessing
 
 
 class ComputerVision(MultimediaProcessing):
     def __init__(self, api_token):
         super().__init__(api_token)
 
-    def image_classification(self, input: Text, model: Optional[Text] = None) -> List:
+    def image_classification(self, inputs: Union[Text, List], model: Optional[Text] = None) -> List:
         """
         Classify an image from a file path or an url.
 
-        :param input: the file path or url to the image to classify
+        :param inputs: a string or a list of strings of the file paths or urls of the images to classify.
+        :param model: the model to use for the image classification task. If not provided, the recommended model from Hugging Face will be used.
+        :return: a list of dictionaries each containing the label and the confidence score for that label.
+        """
+        if type(inputs) == list:
+            return self._query_in_list(inputs, model=model, task="image-classification")
+        elif type(inputs) == str:
+            return self._query(inputs, model=model, task="image-classification")
+
+    def image_classification_in_df(self, df: DataFrame, column: Text, model: Optional[Text] = None) -> DataFrame:
+        """
+        Classify images from a dataframe.
+
+        :param df: a pandas DataFrame containing the images to classify.
+        :param column: the name of the column containing the file paths or urls of the images to classify.
         :param model: the model to use for the image classification task. If not provided, the recommended model from Hugging Face will be used.
-        :return: a list of dictionaries each containing the label and the confidence score for that label
+        :return: a pandas DataFrame with the label for the images. Each label added will be the one with the highest confidence score for that particular image. The label will be added as a new column called 'predictions' to the original DataFrame.
         """
-        return self._query(input, model=model, task="image-classification")
+        predictions = self._query_in_df(df, column, model=model, task="image-classification")
+        df["predictions"] = [prediction[0]['label'] for prediction in predictions]
+        return df
 
-    def object_detection(self, input: Text, model: Optional[Text] = None) -> List:
+    def object_detection(self, inputs: Union[Text, List], model: Optional[Text] = None) -> List:
         """
         Perform object detection on an image from a file path or an url.
 
-        :param input: the file path or url to the image to perform object detection on
+        :param inputs: a string or a list of strings of the file paths or urls of the images to perform object detection on.
         :param model: the model to use for the object detection task. If not provided, the recommended model from Hugging Face will be used.
-        :return: a list of dictionaries each containing the label, the confidence score for that label, and the bounding box coordinates
+        :return: a list of dictionaries each containing the label, the confidence score for that label, and the bounding box coordinates.
         """
-        return self._query(input, model=model, task="object-detection")
+        if type(inputs) == list:
+            return self._query(inputs, model=model, task="object-detection")
+        elif type(inputs) == str:
+            return self._query(inputs, model=model, task="object-detection")
diff --git a/hugging_py_face/config/config.yaml b/hugging_py_face/config/config.yaml
@@ -8,7 +8,7 @@ TASK_MODEL_MAP:
   text-generation: gpt2
   zero-shot-classification: facebook/bart-large-mnli
   conversational: microsoft/DialoGPT-large
-  feature-extraction: sentence-transformers/paraphrase-xlm-r-multilingual-v1
+  feature-extraction: julien-c/distilbert-feature-extraction
   image-classification: google/vit-base-patch16-224
   object-detection: facebook/detr-resnet-50
   speech-recognition: facebook/wav2vec2-base-960h

diff --git a/hugging_py_face/multimedia_processing.py b/hugging_py_face/multimedia_processing.py
@@ -29,4 +29,10 @@ def _query(self, input: Text, model: Optional[Text] = None, task: Optional[Text]
                 data = f.read()
 
         response = requests.request("POST", api_url, headers=headers, data=data)
-        return json.loads(response.content.decode("utf-8"))
+        return json.loads(response.content.decode("utf-8"))
+
+    def _query_in_list(self, inputs: List[Text], model: Optional[Text] = None, task: Optional[Text] = None) -> List[Union[Dict, List]]:
+        return [self._query(input, model, task) for input in inputs]
+
+    def _query_in_df(self, df, input_column: Text, model: Optional[Text] = None, task: Optional[Text] = None) -> List[Union[Dict, List]]:
+        return self._query_in_list(df[input_column].tolist(), model, task)
diff --git a/hugging_py_face/nlp.py b/hugging_py_face/nlp.py
@@ -98,11 +98,9 @@ def question_answering(self, question: Text, context: Text, model: Optional[Text
         Answer a question using the provided context.
 
         :param question: a string of the question to be answered.
-        :param context: a string of context.
+        :param context: a string of context. This field is required for the question answering task and cannot be left empty.
         :param model: the model to use for the question answering task. If not provided, the recommended model from Hugging Face will be used.
         :return: a dict of the answer.
-
-        # TODO: check if questions can be answered without context
         """
         return self._query(
             {
@@ -170,6 +168,21 @@ def text_generation(self, text: Union[Text, List], parameters: Optional[Dict] =
         """
         return self._query(text, parameters=parameters, options=options, model=model, task='text-generation')
 
+    def text_generation_in_df(self, df: DataFrame, column: Text, parameters: Optional[Dict] = None, options: Optional[Dict] = None, model: Optional[Text] = None) -> DataFrame:
+        """
+        Continue text from a prompt in the column of a DataFrame.
+
+        :param df: a pandas DataFrame containing the strings to be generated from.
+        :param column: the column containing the strings to be generated from.
+        :param parameters: a dict of parameters. For more information, see the `detailed parameters for the text generation task <https://huggingface.co/docs/api-inference/detailed_parameters#text-generation-task>`_.
+        :param options: a dict of options. For more information, see the `detailed parameters for the text generation task <https://huggingface.co/docs/api-inference/detailed_parameters#text-generation-task>`_.
+        :param model: the model to use for the text generation task. If not provided, the recommended model from Hugging Face will be used.
+        :return: a pandas DataFrame with the generated text. The generated text will be added as a new column called 'predictions' to the original DataFrame.
+        """
+        predictions = self._query_in_df(df, column, parameters=parameters, options=options, model=model, task='text-generation')
+        df['predictions'] = [prediction[0]['generated_text'] for prediction in predictions]
+        return df
+
     def zero_shot_classification(self, text: Union[Text, List], candidate_labels: List, parameters: Optional[Dict] = {}, options: Optional[Dict] = None, model: Optional[Text] = None) -> Union[Dict, List]:
         """
         Classify a sentence/paragraph to one of the candidate labels provided.
@@ -191,7 +204,7 @@ def zero_shot_classification(self, text: Union[Text, List], candidate_labels: Li
             task='zero-shot-classification'
         )
 
-    def conversational(self, text: Union[Text, List], past_user_inputs: Optional[Text] = None, generated_responses: Optional[Text] = None, parameters: Optional[Dict] = None, options: Optional[Dict] = None, model: Optional[Text] = None) -> Union[Dict, List]:
+    def conversational(self, text: Union[Text, List], past_user_inputs: Optional[List] = None, generated_responses: Optional[List] = None, parameters: Optional[Dict] = None, options: Optional[Dict] = None, model: Optional[Text] = None) -> Union[Dict, List]:
         """
         Corresponds to any chatbot like structure: pass in some text along with the past_user_inputs and generated_responses to receive a response.
 

diff --git a/tests/test_audio_processing.py → tests/audio_processing/__init__.py b/tests/test_audio_processing.py → tests/audio_processing/__init__.py
diff --git a/tests/audio_processing/test_audio_processing.py b/tests/audio_processing/test_audio_processing.py
@@ -0,0 +1,45 @@
+import os
+import unittest
+from dotenv import load_dotenv
+
+from hugging_py_face.audio_processing import AudioProcessing
+
+load_dotenv()
+
+
+class TestAudioProcessing(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.ap = AudioProcessing(os.environ.get("API_KEY"))
+        cls.inputs = os.path.join(os.path.dirname(__file__), '..', 'resources', 'amused.wav')
+
+    def test_speech_recognition(self):
+        self.assertEqual(
+            self.ap.speech_recognition(self.inputs),
+            {
+                'text': 'I AM PLAYING A SINGLE HAND IN IT LOOKS LIKE A LOSING GAME'
+            },
+        )
+
+    def test_audio_classification(self):
+        self.assertEqual(
+            self.ap.audio_classification(self.inputs),
+            [
+                {
+                    'score': 0.996896505355835,
+                    'label': 'hap'
+                },
+                {
+                    'score': 0.0029580998234450817,
+                    'label': 'sad'
+                },
+                {
+                    'score': 9.905469050863758e-05,
+                    'label': 'neu'
+                },
+                {
+                    'score': 4.624614666681737e-05,
+                    'label': 'ang'
+                }
+            ],
+        )