From bba3ebe1bcb14254ed11b6e9860bba9ce1ec8329 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Mon, 17 Apr 2023 17:03:21 +0530 Subject: [PATCH 01/27] added imports to __init__.py to facilitate easier use --- hugging_py_face/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hugging_py_face/__init__.py b/hugging_py_face/__init__.py index e69de29..8c44002 100644 --- a/hugging_py_face/__init__.py +++ b/hugging_py_face/__init__.py @@ -0,0 +1,3 @@ +from .nlp import NLP +from .computer_vision import ComputerVision +from .audio_processing import AudioProcessing \ No newline at end of file From 2cf4d250a291c04a71b1b82640be3f64d3ecbb66 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Mon, 17 Apr 2023 17:06:17 +0530 Subject: [PATCH 02/27] added some initial content to the README --- README.md | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/README.md b/README.md index 929db69..c8bc600 100644 --- a/README.md +++ b/README.md @@ -1 +1,71 @@ # Hugging-Py-Face +Hugging-Py-Face is a powerful Python package that provides seamless integration with the Hugging Face Inference API, allowing you to easily perform inference on your machine learning models hosted on the Hugging Face Model Hub. + +One of the key benefits of using the Hugging Face Inference API is that it provides a scalable and efficient way to perform inference on your models, by allowing you to easily deploy and serve your models in the cloud. Additionally, the Inference API provides a simple and standardized API that can be used across different programming languages, making it easy to integrate your models with other services and tools. + +With Hugging-Py-Face, you can take advantage of these benefits while also enjoying the simplicity and flexibility of using Python. + +It allows you to easily customize your API requests, adjust request parameters, handle authentication and access tokens, and interact with a wide range of machine learning models hosted on the Hugging Face Model Hub. + +Overall, Hugging-Py-Face is an awesome tool for any machine learning developer or data scientist who wants to perform efficient and scalable inference on their models, while also enjoying the simplicity and flexibility of using Python. Whether you're working on a personal project or a large-scale enterprise application, Hugging-Py-Face can help you achieve your machine learning goals with ease. + +## Installation +### With pip +``` +pip install hugging_py_face +``` + +## Components + - NLP (Natural Language Processing): This component deals with processing and analyzing human language. It includes various techniques such as text classification, text generation, summarization and many more. + - Computer Vision: This component deals with the analysis of visual data from the real world. It includes the image classification and object detection techniques. + - Audio Processing: This component deals with the analysis of audio signals. It includes the audio classification and speech recognition techniques. + +## Usage +The library will first need to be configured with a User Access Tokens from the Hugging Face website. + +### NLP (Natural Language Processing) +``` +from hugging_py_face import NLP + +# initialize the NLP class with the user access token +nlp = NLP('hf_...') + +# perform text classification +nlp.text_classification("I like you. I love you.") + +# perform object detection +nlp.text_generation("The answer to the universe is") +``` + +### Computer Vision +``` +from hugging_py_face import ComputerVision + +# initialize the ComputerVision class with the user access token +cp = ComputerVision('hf_...') + +# perform image classification +# the image can be a local file or a URL +cp.image_classification("cats.jpg") + +# perform object detection +# the image can be a local file or a URL +cp.object_detection("cats.jpg") +``` + +### Audio Processing +``` +from hugging_py_face import AudioProcessing + +# initialize the AudioProcessing class with the user access token +ap = AudioProcessing('hf_...') + +# perform audio classification +# the audio file can be a local file or a URL +ap.audio_classification("dogs.wav") + +# perform speech recognition +# the audio file can be a local file or a URL +ap.speech_recognition("dogs.wav") + +``` \ No newline at end of file From ce112802873e9b35c9747a24fbc69d4f48f0f153 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Mon, 17 Apr 2023 17:23:44 +0530 Subject: [PATCH 03/27] renamed image file used for testing to dogs.jpeg --- tests/resources/{image.jpeg => dogs.jpeg} | Bin 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/resources/{image.jpeg => dogs.jpeg} (100%) diff --git a/tests/resources/image.jpeg b/tests/resources/dogs.jpeg similarity index 100% rename from tests/resources/image.jpeg rename to tests/resources/dogs.jpeg From 87eb34acd3ceffcd17152f28d09cb8dd3f2c6e29 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Mon, 17 Apr 2023 17:24:05 +0530 Subject: [PATCH 04/27] added unit tests for ComputerVision tasks --- tests/test_computer_vision.py | 61 +++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tests/test_computer_vision.py b/tests/test_computer_vision.py index e69de29..89d8b74 100644 --- a/tests/test_computer_vision.py +++ b/tests/test_computer_vision.py @@ -0,0 +1,61 @@ +import os +import unittest +from dotenv import load_dotenv + +from hugging_py_face.computer_vision import ComputerVision + +load_dotenv() + + +class TestComputerVision(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.cp = ComputerVision(os.environ.get("API_KEY")) + + def test_image_classification(self): + input = "resources/dogs.jpeg" + + self.assertEqual( + self.cp.image_classification(input), + [ + { + 'score': 0.9061778783798218, + 'label': 'golden retriever' + }, + { + 'score': 0.06364733725786209, + 'label': 'Labrador retriever' + }, + { + 'score': 0.005189706105738878, + 'label': 'Sussex spaniel' + }, + { + 'score': 0.0026904833503067493, + 'label': 'clumber, clumber spaniel' + }, + { + 'score': 0.0026738110464066267, + 'label': 'cocker spaniel, English cocker spaniel, cocker' + } + ], + ) + + def test_object_detection(self): + input = "resources/dogs.jpeg" + + self.assertEqual( + self.cp.object_detection(input), + [ + { + 'score': 0.9990463852882385, + 'label': 'dog', + 'box': {'xmin': 1329, 'ymin': 961, 'xmax': 2668, 'ymax': 3149} + }, + { + 'score': 0.9985553622245789, + 'label': 'dog', + 'box': {'xmin': 2598, 'ymin': 827, 'xmax': 3902, 'ymax': 3190} + } + ], + ) \ No newline at end of file From 2322cf96eb33dffda2b4dc762a6e2e2f73152388 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Mon, 17 Apr 2023 17:25:11 +0530 Subject: [PATCH 05/27] updated version of initial release to be 0.1.0 --- hugging_py_face/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hugging_py_face/__about__.py b/hugging_py_face/__about__.py index e6d0792..e4fca6f 100644 --- a/hugging_py_face/__about__.py +++ b/hugging_py_face/__about__.py @@ -1,6 +1,6 @@ __title__ = 'hugging_py_face' __package_name__ = 'hugging_py_face' -__version__ = '0.0.1' +__version__ = '0.1.0' __description__ = "Hugging-Py-face, the Python client for the Hugging Face Inference API." __email__ = "minurapunchihewa17@gmail.com" __author__ = 'Minura Punchihewa' From 11e4ad6bc90df97ee5042d2e621c14abb2dc0c9c Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Mon, 17 Apr 2023 17:28:43 +0530 Subject: [PATCH 06/27] updated some information about the package in __about__.py --- hugging_py_face/__about__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hugging_py_face/__about__.py b/hugging_py_face/__about__.py index e4fca6f..c1cfbca 100644 --- a/hugging_py_face/__about__.py +++ b/hugging_py_face/__about__.py @@ -1,10 +1,10 @@ __title__ = 'hugging_py_face' __package_name__ = 'hugging_py_face' __version__ = '0.1.0' -__description__ = "Hugging-Py-face, the Python client for the Hugging Face Inference API." +__description__ = "Hugging-Py-Face, the Python client for the Hugging Face Inference API." __email__ = "minurapunchihewa17@gmail.com" __author__ = 'Minura Punchihewa' -__github__ = 'https://github.com/MinuraPunchihewa/mindsdb-text-to-sql' -__pypi__ = '' +__github__ = 'https://github.com/MinuraPunchihewa/hugging_py_face' +__pypi__ = 'https://pypi.org/project/hugging-py-face/' __license__ = '' __copyright__ = '' \ No newline at end of file From 0f1cda31c019e776c29f2cce0feaf0f30eb54492 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Mon, 17 Apr 2023 20:36:12 +0530 Subject: [PATCH 07/27] added support for the text generation task to be applied on DataFrames --- hugging_py_face/nlp.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/hugging_py_face/nlp.py b/hugging_py_face/nlp.py index 2338324..bd741fe 100644 --- a/hugging_py_face/nlp.py +++ b/hugging_py_face/nlp.py @@ -170,6 +170,21 @@ def text_generation(self, text: Union[Text, List], parameters: Optional[Dict] = """ return self._query(text, parameters=parameters, options=options, model=model, task='text-generation') + def text_generation_in_df(self, df: DataFrame, column: Text, parameters: Optional[Dict] = None, options: Optional[Dict] = None, model: Optional[Text] = None) -> DataFrame: + """ + Continue text from a prompt in the column of a DataFrame. + + :param df: a pandas DataFrame containing the strings to be generated from. + :param column: the column containing the strings to be generated from. + :param parameters: a dict of parameters. For more information, see the `detailed parameters for the text generation task `_. + :param options: a dict of options. For more information, see the `detailed parameters for the text generation task `_. + :param model: the model to use for the text generation task. If not provided, the recommended model from Hugging Face will be used. + :return: a pandas DataFrame with the generated text. The generated text will be added as a new column called 'predictions' to the original DataFrame. + """ + predictions = self._query_in_df(df, column, parameters=parameters, options=options, model=model, task='text-generation') + df['predictions'] = [prediction[0]['generated_text'] for prediction in predictions] + return df + def zero_shot_classification(self, text: Union[Text, List], candidate_labels: List, parameters: Optional[Dict] = {}, options: Optional[Dict] = None, model: Optional[Text] = None) -> Union[Dict, List]: """ Classify a sentence/paragraph to one of the candidate labels provided. From a514842370d69e087b1c700717f660e3d9594b4d Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Mon, 17 Apr 2023 20:38:53 +0530 Subject: [PATCH 08/27] renamed the audio file used for testing to amused.wav --- tests/resources/{audio.wav => amused.wav} | Bin 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/resources/{audio.wav => amused.wav} (100%) diff --git a/tests/resources/audio.wav b/tests/resources/amused.wav similarity index 100% rename from tests/resources/audio.wav rename to tests/resources/amused.wav From 375bd862dfef42dcc569b7d424325d05d666dffe Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Mon, 17 Apr 2023 20:46:34 +0530 Subject: [PATCH 09/27] added unit tests for AudioProcessing tasks --- tests/test_audio_processing.py | 48 ++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/test_audio_processing.py b/tests/test_audio_processing.py index e69de29..ca0cf9a 100644 --- a/tests/test_audio_processing.py +++ b/tests/test_audio_processing.py @@ -0,0 +1,48 @@ +import os +import unittest +from dotenv import load_dotenv + +from hugging_py_face.audio_processing import AudioProcessing + +load_dotenv() + + +class TestAudioProcessing(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.ap = AudioProcessing(os.environ.get("API_KEY")) + + def test_speech_recognition(self): + input = "resources/amused.wav" + + self.assertEqual( + self.ap.speech_recognition(input), + { + 'text': 'I AM PLAYING A SINGLE HAND IN IT LOOKS LIKE A LOSING GAME' + }, + ) + + def test_audio_classification(self): + input = "resources/amused.wav" + + self.assertEqual( + self.ap.audio_classification(input), + [ + { + 'score': 0.996896505355835, + 'label': 'hap' + }, + { + 'score': 0.0029580998234450817, + 'label': 'sad' + }, + { + 'score': 9.905469050863758e-05, + 'label': 'neu' + }, + { + 'score': 4.624614666681737e-05, + 'label': 'ang' + } + ], + ) \ No newline at end of file From ace022ad4233e56aac32aef8e506983ca6cd7440 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Mon, 17 Apr 2023 21:50:40 +0530 Subject: [PATCH 10/27] added two methods to MultimediaProcessing to support operations on lists and DataFrames --- hugging_py_face/multimedia_processing.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hugging_py_face/multimedia_processing.py b/hugging_py_face/multimedia_processing.py index 305ba8d..6e11d61 100644 --- a/hugging_py_face/multimedia_processing.py +++ b/hugging_py_face/multimedia_processing.py @@ -29,4 +29,10 @@ def _query(self, input: Text, model: Optional[Text] = None, task: Optional[Text] data = f.read() response = requests.request("POST", api_url, headers=headers, data=data) - return json.loads(response.content.decode("utf-8")) \ No newline at end of file + return json.loads(response.content.decode("utf-8")) + + def _query_in_list(self, inputs: List[Text], model: Optional[Text] = None, task: Optional[Text] = None) -> List[Union[Dict, List]]: + return [self._query(input, model, task) for input in inputs] + + def _query_in_df(self, df, input_column: Text, model: Optional[Text] = None, task: Optional[Text] = None) -> List[Union[Dict, List]]: + return self._query_in_list(df[input_column].tolist(), model, task) \ No newline at end of file From 2b124c4daec24d5961bc292c17d15d1ac31f8eee Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Mon, 17 Apr 2023 22:02:20 +0530 Subject: [PATCH 11/27] added support for list type inputs for the ComputerVision tasks --- hugging_py_face/computer_vision.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/hugging_py_face/computer_vision.py b/hugging_py_face/computer_vision.py index 58e0c64..e2bf100 100644 --- a/hugging_py_face/computer_vision.py +++ b/hugging_py_face/computer_vision.py @@ -1,4 +1,4 @@ -from typing import Text, List, Optional +from typing import Text, List, Optional, Union from .multimedia_processing import MultimediaProcessing @@ -6,22 +6,28 @@ class ComputerVision(MultimediaProcessing): def __init__(self, api_token): super().__init__(api_token) - def image_classification(self, input: Text, model: Optional[Text] = None) -> List: + def image_classification(self, inputs: Union[Text, List], model: Optional[Text] = None) -> List: """ Classify an image from a file path or an url. - :param input: the file path or url to the image to classify + :param inputs: a string or a list of strings of the file paths or urls of the images to classify :param model: the model to use for the image classification task. If not provided, the recommended model from Hugging Face will be used. :return: a list of dictionaries each containing the label and the confidence score for that label """ - return self._query(input, model=model, task="image-classification") + if type(input) == list: + return self._query_in_list(inputs, model=model, task="image-classification") + elif type(input) == str: + return self._query(inputs, model=model, task="image-classification") - def object_detection(self, input: Text, model: Optional[Text] = None) -> List: + def object_detection(self, inputs: Union[Text, List], model: Optional[Text] = None) -> List: """ Perform object detection on an image from a file path or an url. - :param input: the file path or url to the image to perform object detection on + :param inputs: a string or a list of strings of the file paths or urls of the images to perform object detection on :param model: the model to use for the object detection task. If not provided, the recommended model from Hugging Face will be used. :return: a list of dictionaries each containing the label, the confidence score for that label, and the bounding box coordinates """ - return self._query(input, model=model, task="object-detection") \ No newline at end of file + if type(input) == list: + return self._query(inputs, model=model, task="object-detection") + elif type(input) == str: + return self._query(inputs, model=model, task="object-detection") \ No newline at end of file From 28e86ddc5f4bd6e3603e08752bc132f412821edd Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Mon, 17 Apr 2023 22:12:14 +0530 Subject: [PATCH 12/27] added support for the image classification task to be applied on DataFrames --- hugging_py_face/computer_vision.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/hugging_py_face/computer_vision.py b/hugging_py_face/computer_vision.py index e2bf100..7fbc513 100644 --- a/hugging_py_face/computer_vision.py +++ b/hugging_py_face/computer_vision.py @@ -1,4 +1,6 @@ +from pandas import DataFrame from typing import Text, List, Optional, Union + from .multimedia_processing import MultimediaProcessing @@ -10,22 +12,35 @@ def image_classification(self, inputs: Union[Text, List], model: Optional[Text] """ Classify an image from a file path or an url. - :param inputs: a string or a list of strings of the file paths or urls of the images to classify + :param inputs: a string or a list of strings of the file paths or urls of the images to classify. :param model: the model to use for the image classification task. If not provided, the recommended model from Hugging Face will be used. - :return: a list of dictionaries each containing the label and the confidence score for that label + :return: a list of dictionaries each containing the label and the confidence score for that label. """ if type(input) == list: return self._query_in_list(inputs, model=model, task="image-classification") elif type(input) == str: return self._query(inputs, model=model, task="image-classification") + def image_classification_in_df(self, df: DataFrame, column: Text, model: Optional[Text] = None) -> DataFrame: + """ + Classify images from a dataframe. + + :param df: a pandas DataFrame containing the images to classify. + :param column: the name of the column containing the file paths or urls of the images to classify. + :param model: the model to use for the image classification task. If not provided, the recommended model from Hugging Face will be used. + :return: a pandas DataFrame with the label for the images. Each label added will be the one with the highest confidence score for that particular image. The label will be added as a new column called 'predictions' to the original DataFrame. + """ + predictions = self._query_in_df(df, column, model=model, task="image-classification") + df["predictions"] = [prediction[0]['label'] for prediction in predictions] + return df + def object_detection(self, inputs: Union[Text, List], model: Optional[Text] = None) -> List: """ Perform object detection on an image from a file path or an url. - :param inputs: a string or a list of strings of the file paths or urls of the images to perform object detection on + :param inputs: a string or a list of strings of the file paths or urls of the images to perform object detection on. :param model: the model to use for the object detection task. If not provided, the recommended model from Hugging Face will be used. - :return: a list of dictionaries each containing the label, the confidence score for that label, and the bounding box coordinates + :return: a list of dictionaries each containing the label, the confidence score for that label, and the bounding box coordinates. """ if type(input) == list: return self._query(inputs, model=model, task="object-detection") From a7cbac0b5ac658c25f965b08ddf183478610afd3 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Mon, 17 Apr 2023 23:54:09 +0530 Subject: [PATCH 13/27] added support for list type inputs for the AudioProcessing tasks --- hugging_py_face/audio_processing.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/hugging_py_face/audio_processing.py b/hugging_py_face/audio_processing.py index b124741..cd69ff5 100644 --- a/hugging_py_face/audio_processing.py +++ b/hugging_py_face/audio_processing.py @@ -1,4 +1,4 @@ -from typing import Text, List, Dict, Optional +from typing import Text, List, Dict, Optional, Union from .multimedia_processing import MultimediaProcessing @@ -6,22 +6,28 @@ class AudioProcessing(MultimediaProcessing): def __init__(self, api_token): super().__init__(api_token) - def speech_recognition(self, input: Text, model: Optional[Text] = None) -> Dict: + def speech_recognition(self, inputs: Union[Text, List], model: Optional[Text] = None) -> Union[Dict, List]: """ Perform speech recognition on an audio file from a file path or an url. - :param input: the file path or url to the audio file to perform speech recognition on + :param inputs: a string or a list of strings of the file paths or urls of the audio files to perform speech recognition on. :param model: the model to use for the speech recognition task. If not provided, the recommended model from Hugging Face will be used. - :return: the text transcription of the audio file + :return: a dictionary or a list of dictionaries containing the text recognized from the audio file(s). """ - return self._query(input, model=model, task="speech-recognition") + if type(input) == list: + return self._query_in_list(inputs, model=model, task="speech-recognition") + elif type(input) == str: + return self._query(input, model=model, task="speech-recognition") - def audio_classification(self, input: Text, model: Optional[Text] = None) -> List: + def audio_classification(self, inputs: Text, model: Optional[Text] = None) -> List: """ Classify an audio file from a file path or an url. - :param input: the file path or url to the audio file to classify + :param inputs: a string or a list of strings of the file paths or urls of the audio files to classify. :param model: the model to use for the audio classification task. If not provided, the recommended model from Hugging Face will be used. - :return: a list containing the labels and the confidence score for each label + :return: a list of dictionaries or a list of lists of dictionaries each containing the label and the confidence score for that label. """ - return self._query(input, model=model, task="audio-classification") \ No newline at end of file + if type(input) == list: + return self._query_in_list(inputs, model=model, task="audio-classification") + elif type(input) == str: + return self._query(input, model=model, task="audio-classification") \ No newline at end of file From 2ac145f4f483774aaaed840d0b3ce255a531c1da Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Mon, 17 Apr 2023 23:57:05 +0530 Subject: [PATCH 14/27] added support for the speech recognition and audio classification tasks to be applied on DataFrames --- hugging_py_face/audio_processing.py | 30 ++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/hugging_py_face/audio_processing.py b/hugging_py_face/audio_processing.py index cd69ff5..2724b12 100644 --- a/hugging_py_face/audio_processing.py +++ b/hugging_py_face/audio_processing.py @@ -1,4 +1,6 @@ +from pandas import DataFrame from typing import Text, List, Dict, Optional, Union + from .multimedia_processing import MultimediaProcessing @@ -19,6 +21,19 @@ def speech_recognition(self, inputs: Union[Text, List], model: Optional[Text] = elif type(input) == str: return self._query(input, model=model, task="speech-recognition") + def speech_recognition_in_df(self, df: DataFrame, column: Text, model: Optional[Text] = None) -> DataFrame: + """ + Perform speech recognition on audio files from a DataFrame. + + :param df: a pandas DataFrame containing the audio files to perform speech recognition on. + :param column: the name of the column containing the file paths or urls of the audio files to perform speech recognition on. + :param model: the model to use for the speech recognition task. If not provided, the recommended model from Hugging Face will be used. + :return: a pandas DataFrame with the text recognized from the audio files. The text will be added as a new column called 'predictions' to the original DataFrame. + """ + predictions = self._query_in_df(df, column, model=model, task="speech-recognition") + df["predictions"] = [prediction['text'] for prediction in predictions] + return df + def audio_classification(self, inputs: Text, model: Optional[Text] = None) -> List: """ Classify an audio file from a file path or an url. @@ -30,4 +45,17 @@ def audio_classification(self, inputs: Text, model: Optional[Text] = None) -> Li if type(input) == list: return self._query_in_list(inputs, model=model, task="audio-classification") elif type(input) == str: - return self._query(input, model=model, task="audio-classification") \ No newline at end of file + return self._query(input, model=model, task="audio-classification") + + def audio_classification_in_df(self, df: DataFrame, column: Text, model: Optional[Text] = None) -> DataFrame: + """ + Classify audio files from a DataFrame. + + :param df: a pandas DataFrame containing the audio files to classify. + :param column: the name of the column containing the file paths or urls of the audio files to classify. + :param model: the model to use for the audio classification task. If not provided, the recommended model from Hugging Face will be used. + :return: a pandas DataFrame with the label for the audio files. Each label added will be the one with the highest confidence score for that particular audio file. The label will be added as a new column called 'predictions' to the original DataFrame. + """ + predictions = self._query_in_df(df, column, model=model, task="audio-classification") + df["predictions"] = [prediction[0]['label'] for prediction in predictions] + return df \ No newline at end of file From deb51409588bbd9197befbb9196eedb1e8cdab30 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 18 Apr 2023 00:04:18 +0530 Subject: [PATCH 15/27] bug fix in the AudioClassification tasks --- hugging_py_face/audio_processing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hugging_py_face/audio_processing.py b/hugging_py_face/audio_processing.py index 2724b12..6097b19 100644 --- a/hugging_py_face/audio_processing.py +++ b/hugging_py_face/audio_processing.py @@ -19,7 +19,7 @@ def speech_recognition(self, inputs: Union[Text, List], model: Optional[Text] = if type(input) == list: return self._query_in_list(inputs, model=model, task="speech-recognition") elif type(input) == str: - return self._query(input, model=model, task="speech-recognition") + return self._query(inputs, model=model, task="speech-recognition") def speech_recognition_in_df(self, df: DataFrame, column: Text, model: Optional[Text] = None) -> DataFrame: """ @@ -45,7 +45,7 @@ def audio_classification(self, inputs: Text, model: Optional[Text] = None) -> Li if type(input) == list: return self._query_in_list(inputs, model=model, task="audio-classification") elif type(input) == str: - return self._query(input, model=model, task="audio-classification") + return self._query(inputs, model=model, task="audio-classification") def audio_classification_in_df(self, df: DataFrame, column: Text, model: Optional[Text] = None) -> DataFrame: """ From 6d23cc38a0fc5c93581ab8674d8f27d651570de4 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 18 Apr 2023 10:52:24 +0530 Subject: [PATCH 16/27] added the unit test module for testing NLP tasks applied on DataFrames --- tests/test_nlp_in_df.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 tests/test_nlp_in_df.py diff --git a/tests/test_nlp_in_df.py b/tests/test_nlp_in_df.py new file mode 100644 index 0000000..bb68f6c --- /dev/null +++ b/tests/test_nlp_in_df.py @@ -0,0 +1,29 @@ +import os +import unittest +import pandas as pd +from dotenv import load_dotenv +from pandas.testing import assert_frame_equal + +from hugging_py_face.nlp import NLP + +load_dotenv() + + +class TestNLPInDF(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.nlp = NLP(os.environ.get("API_KEY")) + + def test_text_classification_in_df(self): + texts = ["I like you. I love you", "I don't like you. I hate you"] + df = pd.DataFrame(texts, columns=['texts']) + + assert_frame_equal( + self.nlp.text_classification_in_df(df, 'texts'), + pd.DataFrame( + { + "texts": ["I like you. I love you", "I don't like you. I hate you"], + "predictions": ["POSITIVE", "NEGATIVE"], + } + ), + ) \ No newline at end of file From bddd514dacf8aadd136a871d465db725ab9e534d Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 18 Apr 2023 10:56:49 +0530 Subject: [PATCH 17/27] added the unit test module for testing ComputerVision tasks applied on DataFrames --- tests/test_computer_vision_in_df.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 tests/test_computer_vision_in_df.py diff --git a/tests/test_computer_vision_in_df.py b/tests/test_computer_vision_in_df.py new file mode 100644 index 0000000..4dd729c --- /dev/null +++ b/tests/test_computer_vision_in_df.py @@ -0,0 +1,29 @@ +import os +import unittest +import pandas as pd +from dotenv import load_dotenv +from pandas.testing import assert_frame_equal + +from hugging_py_face.computer_vision import ComputerVision + +load_dotenv() + + +class TestComputerVisionInDF(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.cp = ComputerVision(os.environ.get("API_KEY")) + + def test_image_classification_in_df(self): + paths = ["resources/dogs.jpeg"] + df = pd.DataFrame(paths, columns=['inputs']) + + assert_frame_equal( + self.cp.image_classification_in_df(df, 'inputs'), + pd.DataFrame( + { + "inputs": ["resources/dogs.jpeg"], + "predictions": ["golden retriever"], + } + ), + ) \ No newline at end of file From c7a9715a07f59534a54da877133a9d5f519c70b2 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 18 Apr 2023 11:06:08 +0530 Subject: [PATCH 18/27] added the unit test module for testing AudioProcessing tasks applied on DataFrames --- tests/test_audio_processing_in_df.py | 43 ++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 tests/test_audio_processing_in_df.py diff --git a/tests/test_audio_processing_in_df.py b/tests/test_audio_processing_in_df.py new file mode 100644 index 0000000..8b2871c --- /dev/null +++ b/tests/test_audio_processing_in_df.py @@ -0,0 +1,43 @@ +import os +import unittest +import pandas as pd +from dotenv import load_dotenv +from pandas.testing import assert_frame_equal + +from hugging_py_face.audio_processing import AudioProcessing + +load_dotenv() + + +class TestAudioProcessingInDF(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.ap = AudioProcessing(os.environ.get("API_KEY")) + + def test_speech_recognition_in_df(self): + paths = ["resources/amused.wav"] + df = pd.DataFrame(paths, columns=['inputs']) + + assert_frame_equal( + self.ap.speech_recognition_in_df(df, 'inputs'), + pd.DataFrame( + { + "inputs": ["resources/amused.wav"], + "predictions": ["I AM PLAYING A SINGLE HAND IN IT LOOKS LIKE A LOSING GAME"], + } + ), + ) + + def test_audio_classification_in_df(self): + paths = ["resources/amused.wav"] + df = pd.DataFrame(paths, columns=['inputs']) + + assert_frame_equal( + self.ap.audio_classification_in_df(df, 'inputs'), + pd.DataFrame( + { + "inputs": ["resources/amused.wav"], + "predictions": ["hap"], + } + ), + ) \ No newline at end of file From 4ed49ce722a0047e40c4abc0492834a25ec62459 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 18 Apr 2023 12:06:25 +0530 Subject: [PATCH 19/27] fixed bug in both ComputerVision and AudioProcessing --- hugging_py_face/audio_processing.py | 8 ++++---- hugging_py_face/computer_vision.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hugging_py_face/audio_processing.py b/hugging_py_face/audio_processing.py index 6097b19..532cab2 100644 --- a/hugging_py_face/audio_processing.py +++ b/hugging_py_face/audio_processing.py @@ -16,9 +16,9 @@ def speech_recognition(self, inputs: Union[Text, List], model: Optional[Text] = :param model: the model to use for the speech recognition task. If not provided, the recommended model from Hugging Face will be used. :return: a dictionary or a list of dictionaries containing the text recognized from the audio file(s). """ - if type(input) == list: + if type(inputs) == list: return self._query_in_list(inputs, model=model, task="speech-recognition") - elif type(input) == str: + elif type(inputs) == str: return self._query(inputs, model=model, task="speech-recognition") def speech_recognition_in_df(self, df: DataFrame, column: Text, model: Optional[Text] = None) -> DataFrame: @@ -42,9 +42,9 @@ def audio_classification(self, inputs: Text, model: Optional[Text] = None) -> Li :param model: the model to use for the audio classification task. If not provided, the recommended model from Hugging Face will be used. :return: a list of dictionaries or a list of lists of dictionaries each containing the label and the confidence score for that label. """ - if type(input) == list: + if type(inputs) == list: return self._query_in_list(inputs, model=model, task="audio-classification") - elif type(input) == str: + elif type(inputs) == str: return self._query(inputs, model=model, task="audio-classification") def audio_classification_in_df(self, df: DataFrame, column: Text, model: Optional[Text] = None) -> DataFrame: diff --git a/hugging_py_face/computer_vision.py b/hugging_py_face/computer_vision.py index 7fbc513..88d9c22 100644 --- a/hugging_py_face/computer_vision.py +++ b/hugging_py_face/computer_vision.py @@ -16,9 +16,9 @@ def image_classification(self, inputs: Union[Text, List], model: Optional[Text] :param model: the model to use for the image classification task. If not provided, the recommended model from Hugging Face will be used. :return: a list of dictionaries each containing the label and the confidence score for that label. """ - if type(input) == list: + if type(inputs) == list: return self._query_in_list(inputs, model=model, task="image-classification") - elif type(input) == str: + elif type(inputs) == str: return self._query(inputs, model=model, task="image-classification") def image_classification_in_df(self, df: DataFrame, column: Text, model: Optional[Text] = None) -> DataFrame: @@ -42,7 +42,7 @@ def object_detection(self, inputs: Union[Text, List], model: Optional[Text] = No :param model: the model to use for the object detection task. If not provided, the recommended model from Hugging Face will be used. :return: a list of dictionaries each containing the label, the confidence score for that label, and the bounding box coordinates. """ - if type(input) == list: + if type(inputs) == list: return self._query(inputs, model=model, task="object-detection") - elif type(input) == str: + elif type(inputs) == str: return self._query(inputs, model=model, task="object-detection") \ No newline at end of file From 12037c602a8e49c1e654a05191998168793dc3b5 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 18 Apr 2023 12:42:45 +0530 Subject: [PATCH 20/27] refactored the tests by adding seperated packages for each component --- tests/audio_processing/__init__.py | 0 .../test_audio_processing.py | 9 +++------ .../test_audio_processing_in_df.py | 11 +++++------ tests/computer_vision/__init__.py | 0 .../{ => computer_vision}/test_computer_vision.py | 9 +++------ .../test_computer_vision_in_df.py | 6 +++--- tests/nlp/__init__.py | 0 tests/{ => nlp}/test_nlp.py | 15 ++++++++------- tests/{ => nlp}/test_nlp_in_df.py | 0 9 files changed, 22 insertions(+), 28 deletions(-) create mode 100644 tests/audio_processing/__init__.py rename tests/{ => audio_processing}/test_audio_processing.py (84%) rename tests/{ => audio_processing}/test_audio_processing_in_df.py (75%) create mode 100644 tests/computer_vision/__init__.py rename tests/{ => computer_vision}/test_computer_vision.py (89%) rename tests/{ => computer_vision}/test_computer_vision_in_df.py (76%) create mode 100644 tests/nlp/__init__.py rename tests/{ => nlp}/test_nlp.py (92%) rename tests/{ => nlp}/test_nlp_in_df.py (100%) diff --git a/tests/audio_processing/__init__.py b/tests/audio_processing/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_audio_processing.py b/tests/audio_processing/test_audio_processing.py similarity index 84% rename from tests/test_audio_processing.py rename to tests/audio_processing/test_audio_processing.py index ca0cf9a..ab4ceac 100644 --- a/tests/test_audio_processing.py +++ b/tests/audio_processing/test_audio_processing.py @@ -11,22 +11,19 @@ class TestAudioProcessing(unittest.TestCase): @classmethod def setUpClass(cls): cls.ap = AudioProcessing(os.environ.get("API_KEY")) + cls.inputs = os.path.join(os.path.dirname(__file__), '..', 'resources', 'amused.wav') def test_speech_recognition(self): - input = "resources/amused.wav" - self.assertEqual( - self.ap.speech_recognition(input), + self.ap.speech_recognition(self.inputs), { 'text': 'I AM PLAYING A SINGLE HAND IN IT LOOKS LIKE A LOSING GAME' }, ) def test_audio_classification(self): - input = "resources/amused.wav" - self.assertEqual( - self.ap.audio_classification(input), + self.ap.audio_classification(self.inputs), [ { 'score': 0.996896505355835, diff --git a/tests/test_audio_processing_in_df.py b/tests/audio_processing/test_audio_processing_in_df.py similarity index 75% rename from tests/test_audio_processing_in_df.py rename to tests/audio_processing/test_audio_processing_in_df.py index 8b2871c..bb50744 100644 --- a/tests/test_audio_processing_in_df.py +++ b/tests/audio_processing/test_audio_processing_in_df.py @@ -13,30 +13,29 @@ class TestAudioProcessingInDF(unittest.TestCase): @classmethod def setUpClass(cls): cls.ap = AudioProcessing(os.environ.get("API_KEY")) + cls.inputs = [os.path.join(os.path.dirname(__file__), '..', 'resources', 'amused.wav')] def test_speech_recognition_in_df(self): - paths = ["resources/amused.wav"] - df = pd.DataFrame(paths, columns=['inputs']) + df = pd.DataFrame(self.inputs, columns=['inputs']) assert_frame_equal( self.ap.speech_recognition_in_df(df, 'inputs'), pd.DataFrame( { - "inputs": ["resources/amused.wav"], + "inputs": self.inputs, "predictions": ["I AM PLAYING A SINGLE HAND IN IT LOOKS LIKE A LOSING GAME"], } ), ) def test_audio_classification_in_df(self): - paths = ["resources/amused.wav"] - df = pd.DataFrame(paths, columns=['inputs']) + df = pd.DataFrame(self.inputs, columns=['inputs']) assert_frame_equal( self.ap.audio_classification_in_df(df, 'inputs'), pd.DataFrame( { - "inputs": ["resources/amused.wav"], + "inputs": self.inputs, "predictions": ["hap"], } ), diff --git a/tests/computer_vision/__init__.py b/tests/computer_vision/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_computer_vision.py b/tests/computer_vision/test_computer_vision.py similarity index 89% rename from tests/test_computer_vision.py rename to tests/computer_vision/test_computer_vision.py index 89d8b74..6c79155 100644 --- a/tests/test_computer_vision.py +++ b/tests/computer_vision/test_computer_vision.py @@ -11,12 +11,11 @@ class TestComputerVision(unittest.TestCase): @classmethod def setUpClass(cls): cls.cp = ComputerVision(os.environ.get("API_KEY")) + cls.inputs = os.path.join(os.path.dirname(__file__), '..', 'resources', 'dogs.jpeg') def test_image_classification(self): - input = "resources/dogs.jpeg" - self.assertEqual( - self.cp.image_classification(input), + self.cp.image_classification(self.inputs), [ { 'score': 0.9061778783798218, @@ -42,10 +41,8 @@ def test_image_classification(self): ) def test_object_detection(self): - input = "resources/dogs.jpeg" - self.assertEqual( - self.cp.object_detection(input), + self.cp.object_detection(self.inputs), [ { 'score': 0.9990463852882385, diff --git a/tests/test_computer_vision_in_df.py b/tests/computer_vision/test_computer_vision_in_df.py similarity index 76% rename from tests/test_computer_vision_in_df.py rename to tests/computer_vision/test_computer_vision_in_df.py index 4dd729c..61f21b2 100644 --- a/tests/test_computer_vision_in_df.py +++ b/tests/computer_vision/test_computer_vision_in_df.py @@ -13,16 +13,16 @@ class TestComputerVisionInDF(unittest.TestCase): @classmethod def setUpClass(cls): cls.cp = ComputerVision(os.environ.get("API_KEY")) + cls.inputs = [os.path.join(os.path.dirname(__file__), '..', 'resources', 'dogs.jpeg')] def test_image_classification_in_df(self): - paths = ["resources/dogs.jpeg"] - df = pd.DataFrame(paths, columns=['inputs']) + df = pd.DataFrame(self.inputs, columns=['inputs']) assert_frame_equal( self.cp.image_classification_in_df(df, 'inputs'), pd.DataFrame( { - "inputs": ["resources/dogs.jpeg"], + "inputs": self.inputs, "predictions": ["golden retriever"], } ), diff --git a/tests/nlp/__init__.py b/tests/nlp/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_nlp.py b/tests/nlp/test_nlp.py similarity index 92% rename from tests/test_nlp.py rename to tests/nlp/test_nlp.py index 3d72d9a..c2c5491 100644 --- a/tests/test_nlp.py +++ b/tests/nlp/test_nlp.py @@ -105,13 +105,14 @@ def test_text_generation(self): self.assertEqual( self.nlp.text_generation(text), [ - { - 'generated_text': 'The answer to the universe is that it starts with a small ' - 'group of people. If you ask 10 people what they want on ' - "their computer, you'll get 10 replies that the internet " - 'or a Facebook page. So what about the universe in ' - 'general?\n' - } + {'generated_text': "The answer to the universe is in one's own minds and your " + 'thoughts.\n' + '\n' + 'When the universe is created to bring forth new creation, ' + 'our minds are born anew. The universe is a process ' + 'wherein all things are possible and the universe is one ' + 'created' + } ] ) diff --git a/tests/test_nlp_in_df.py b/tests/nlp/test_nlp_in_df.py similarity index 100% rename from tests/test_nlp_in_df.py rename to tests/nlp/test_nlp_in_df.py From bf207f35d626249b76dc95fac0efddbfcd3ad60a Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 18 Apr 2023 12:57:42 +0530 Subject: [PATCH 21/27] fixed the type hints of the conversational task --- hugging_py_face/nlp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hugging_py_face/nlp.py b/hugging_py_face/nlp.py index bd741fe..8f2e3cf 100644 --- a/hugging_py_face/nlp.py +++ b/hugging_py_face/nlp.py @@ -206,7 +206,7 @@ def zero_shot_classification(self, text: Union[Text, List], candidate_labels: Li task='zero-shot-classification' ) - def conversational(self, text: Union[Text, List], past_user_inputs: Optional[Text] = None, generated_responses: Optional[Text] = None, parameters: Optional[Dict] = None, options: Optional[Dict] = None, model: Optional[Text] = None) -> Union[Dict, List]: + def conversational(self, text: Union[Text, List], past_user_inputs: Optional[Union[Text, List]] = None, generated_responses: Optional[Union[Text, List]] = None, parameters: Optional[Dict] = None, options: Optional[Dict] = None, model: Optional[Text] = None) -> Union[Dict, List]: """ Corresponds to any chatbot like structure: pass in some text along with the past_user_inputs and generated_responses to receive a response. From ffe00d92bd28cd3c65e60e18eefed9a75fa0dfc2 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 18 Apr 2023 13:00:54 +0530 Subject: [PATCH 22/27] updated the recommended model for the conversational task --- hugging_py_face/config/config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hugging_py_face/config/config.yaml b/hugging_py_face/config/config.yaml index 35c5604..8d21cb3 100644 --- a/hugging_py_face/config/config.yaml +++ b/hugging_py_face/config/config.yaml @@ -8,7 +8,7 @@ TASK_MODEL_MAP: text-generation: gpt2 zero-shot-classification: facebook/bart-large-mnli conversational: microsoft/DialoGPT-large - feature-extraction: sentence-transformers/paraphrase-xlm-r-multilingual-v1 + feature-extraction: julien-c/distilbert-feature-extraction image-classification: google/vit-base-patch16-224 object-detection: facebook/detr-resnet-50 speech-recognition: facebook/wav2vec2-base-960h From c0f9e83e6e2d3969f89ce5188fdc8a310a63057c Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 18 Apr 2023 13:03:50 +0530 Subject: [PATCH 23/27] fixed the type hints of the conversational task again to the correct one --- hugging_py_face/nlp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hugging_py_face/nlp.py b/hugging_py_face/nlp.py index 8f2e3cf..5f861c3 100644 --- a/hugging_py_face/nlp.py +++ b/hugging_py_face/nlp.py @@ -206,7 +206,7 @@ def zero_shot_classification(self, text: Union[Text, List], candidate_labels: Li task='zero-shot-classification' ) - def conversational(self, text: Union[Text, List], past_user_inputs: Optional[Union[Text, List]] = None, generated_responses: Optional[Union[Text, List]] = None, parameters: Optional[Dict] = None, options: Optional[Dict] = None, model: Optional[Text] = None) -> Union[Dict, List]: + def conversational(self, text: Union[Text, List], past_user_inputs: Optional[List] = None, generated_responses: Optional[List] = None, parameters: Optional[Dict] = None, options: Optional[Dict] = None, model: Optional[Text] = None) -> Union[Dict, List]: """ Corresponds to any chatbot like structure: pass in some text along with the past_user_inputs and generated_responses to receive a response. From 081e4745f06c96a67b8b3c0eb1f20f0e00410ec2 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 18 Apr 2023 13:04:22 +0530 Subject: [PATCH 24/27] updated the unit tests for the conversational and feature extraction tasks --- tests/nlp/test_nlp.py | 45 +++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/tests/nlp/test_nlp.py b/tests/nlp/test_nlp.py index c2c5491..afb8034 100644 --- a/tests/nlp/test_nlp.py +++ b/tests/nlp/test_nlp.py @@ -134,28 +134,31 @@ def test_zero_shot_classification(self): }, ) - def test_conversational(self): - past_user_inputs = ["Which movie is the best ?"], - generated_responses = ["It's Die Hard for sure."], - text = "Can you explain why ?" + def test_conversational(self): + past_user_inputs = ["Which movie is the best ?"] + generated_responses = ["It's Die Hard for sure."] + text = "Can you explain why ?" - self.assertEqual( - self.nlp.conversational(text, past_user_inputs, generated_responses), - { - "generated_text": "It's the best movie ever.", - "conversation": { - "past_user_inputs": [ - "Which movie is the best ?", - "Can you explain why ?", - ], - "generated_responses": [ - "It's Die Hard for sure.", - "It's the best movie ever.", - ], - }, - "warnings": ["Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation."], + self.assertEqual( + self.nlp.conversational(text, past_user_inputs, generated_responses), + { + "generated_text": "It's the best movie ever.", + "conversation": { + "past_user_inputs": [ + "Which movie is the best ?", + "Can you explain why ?", + ], + "generated_responses": [ + "It's Die Hard for sure.", + "It's the best movie ever.", + ], }, - ) + "warnings": ["Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation."], + }, + ) def test_feature_extraction(self): - pass + text = "Transformers is an awesome library!" + + self.assertEqual(type(self.nlp.feature_extraction(text)), list) + From 5c867fcc1782bff0a973077b4018e514ba52e440 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 18 Apr 2023 13:47:42 +0530 Subject: [PATCH 25/27] updated the docstring for the question answering task --- hugging_py_face/nlp.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hugging_py_face/nlp.py b/hugging_py_face/nlp.py index 5f861c3..cff0a3c 100644 --- a/hugging_py_face/nlp.py +++ b/hugging_py_face/nlp.py @@ -98,11 +98,9 @@ def question_answering(self, question: Text, context: Text, model: Optional[Text Answer a question using the provided context. :param question: a string of the question to be answered. - :param context: a string of context. + :param context: a string of context. This field is required for the question answering task and cannot be left empty. :param model: the model to use for the question answering task. If not provided, the recommended model from Hugging Face will be used. :return: a dict of the answer. - - # TODO: check if questions can be answered without context """ return self._query( { From d5dd8147349700ccb2a2a4ad1f7453dd40f9c7d0 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 18 Apr 2023 14:02:52 +0530 Subject: [PATCH 26/27] added unit tests for all other NLP tasks that support DataFrames --- tests/nlp/test_nlp_in_df.py | 44 ++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/tests/nlp/test_nlp_in_df.py b/tests/nlp/test_nlp_in_df.py index bb68f6c..528b4d0 100644 --- a/tests/nlp/test_nlp_in_df.py +++ b/tests/nlp/test_nlp_in_df.py @@ -14,6 +14,34 @@ class TestNLPInDF(unittest.TestCase): def setUpClass(cls): cls.nlp = NLP(os.environ.get("API_KEY")) + def test_fill_mask_in_df(self): + texts = ["The answer to the universe is [MASK]."] + df = pd.DataFrame(texts, columns=['texts']) + + assert_frame_equal( + self.nlp.fill_mask_in_df(df, 'texts'), + pd.DataFrame( + { + "texts": texts, + "predictions": ["the answer to the universe is no."], + } + ), + ) + + def test_summarize_in_df(self): + texts = ["The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct."] + df = pd.DataFrame(texts, columns=['texts']) + + assert_frame_equal( + self.nlp.summarization_in_df(df, 'texts'), + pd.DataFrame( + { + "texts": texts, + "predictions": ["The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world."], + } + ), + ) + def test_text_classification_in_df(self): texts = ["I like you. I love you", "I don't like you. I hate you"] df = pd.DataFrame(texts, columns=['texts']) @@ -22,8 +50,22 @@ def test_text_classification_in_df(self): self.nlp.text_classification_in_df(df, 'texts'), pd.DataFrame( { - "texts": ["I like you. I love you", "I don't like you. I hate you"], + "texts": texts, "predictions": ["POSITIVE", "NEGATIVE"], } ), + ) + + def test_text_generation_in_df(self): + texts = ["The answer to the universe is"] + df = pd.DataFrame(texts, columns=['texts']) + + assert_frame_equal( + self.nlp.text_generation_in_df(df, 'texts'), + pd.DataFrame( + { + "texts": texts, + "predictions": ["The answer to the universe is that we find the Universe, a very large, unchanging, infinitely intricate, incredibly complex place that could not have been created by God in the first place. We'll explore this in more detail at the end of this"], + } + ), ) \ No newline at end of file From 567257c536e194cbd136df690f5c5c23296064f7 Mon Sep 17 00:00:00 2001 From: Minura Punchihewa Date: Tue, 18 Apr 2023 16:00:47 +0530 Subject: [PATCH 27/27] added the possibility of applying ML operations on DataFrames to the README --- README.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/README.md b/README.md index c8bc600..e4cfd15 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,17 @@ nlp.text_classification("I like you. I love you.") nlp.text_generation("The answer to the universe is") ``` +The inputs to these methods can also be a list of strings. For example: +``` +nlp.text_classification(["I like you. I love you.", "I hate you. I despise you."]) +``` + +Additionally, the fill mask, summarization, text classification and text generation tasks can also be performed on a pandas DataFrame. For example: +``` +nlp.text_classification_in_df(df, 'text') +# where df is a pandas DataFrame and 'text' is the column name containing the text +``` + ### Computer Vision ``` from hugging_py_face import ComputerVision @@ -53,6 +64,17 @@ cp.image_classification("cats.jpg") cp.object_detection("cats.jpg") ``` +The inputs to these methods can also be a list of images. For example: +``` +cp.image_classification(["cats.jpg", "dogs.jpg"]) +``` + +Additionally, the image classification task can also be performed on a pandas DataFrame. For example: +``` +cp.image_classification_in_df(df, 'images') +# where df is a pandas DataFrame and 'images' is the column name containing the image file paths or URLs +``` + ### Audio Processing ``` from hugging_py_face import AudioProcessing @@ -67,5 +89,15 @@ ap.audio_classification("dogs.wav") # perform speech recognition # the audio file can be a local file or a URL ap.speech_recognition("dogs.wav") +``` +The inputs to these methods can also be a list of audio files. For example: +``` +ap.audio_classification(["dogs.wav", "cats.wav"]) +``` + +Additionally, both of the above tasks can also be performed on a pandas DataFrame. For example: +``` +ap.audio_classification_in_df(df, 'audio') +# where df is a pandas DataFrame and 'audio' is the column name containing the audio file paths or URLs ``` \ No newline at end of file