Merge branch 'main' into main-public

intel · Aug 2, 2024 · 8133a7c · 8133a7c
2 parents 00ff3ad + 2962dbf
commit 8133a7c
Show file tree

Hide file tree

Showing 53 changed files with 7,665 additions and 376 deletions.
diff --git a/.github/workflows/stylecheck-lint.yaml b/.github/workflows/stylecheck-lint.yaml
@@ -0,0 +1,29 @@
+name: Style Check and Lint
+on:
+  pull_request:
+    types: [submitted]
+  # run the workflow if changes pushed to main or release branches
+  push:
+    branches: '**'
+    paths: '**'
+
+jobs:
+  stylecheck-lint:
+    name: Check codestsyle
+    runs-on: [ xai-tlt ]
+    container:
+      image: ${{ vars.GHA_IMAGE }}
+      env:
+        http_proxy: ${{ secrets.HTTP_PROXY }}
+        https_proxy: ${{ secrets.HTTPS_PROXY }}
+        no_proxy: ${{ secrets.NO_PROXY }}
+      # credentials:
+      #   username: ${{ secrets.REGISTRY_USER }}
+      #   password: ${{ secrets.REGISTRY_TOKEN }}
+      volumes:
+        - /tf_dataset/dataset/transfer_learning:/tmp/data
+    steps:
+      - name: Check out repository code
+        uses: actions/[email protected]
+      - name: Run stylecheck
+        run: make stylecheck
diff --git a/CODEOWNERS b/CODEOWNERS
@@ -5,7 +5,7 @@
 # the repo. Unless a later match takes precedence,
 # @global-owner1 and @global-owner2 will be requested for
 # review when someone opens a pull request.
-*       @ashahba @daniel-de-leon-user293 @tybrs
+*       @ashahba @daniel-de-leon-user293 @tybrs @mitalipo
 
 # Order is important; the last matching pattern takes the most
 # precedence. When someone opens a pull request that only

diff --git a/Makefile b/Makefile
@@ -17,7 +17,9 @@
 #
 
 VENV_DIR = ".venv"
+VENV_LINT = ".venv/lint"
 ACTIVATE_TEST = "$(VENV_DIR)/bin/activate"
+ACTIVATE_LINT = "$(VENV_LINT)/bin/activate"
 ACTIVATE_DOCS = $(ACTIVATE_TEST)
 ACTIVATE_NOTEBOOK = $(ACTIVATE_TEST)
 
@@ -36,6 +38,14 @@ venv-test: poetry-lock
 		pure-eval==0.2.2 \
 		stack-data==0.6.3
 
+venv-lint: 
+	@echo "Creating a virtual environment for linting $(VENV_LINT)..."
+	@test -d $(VENV_LINT) || python -m virtualenv $(VENV_LINT) || python3 -m virtualenv $(VENV_LINT)
+	@echo "Installing lint dependencies..."
+	@. $(ACTIVATE_LINT) && pip install --no-cache-dir --no-deps \
+		flake8==7.0.0 \
+		black==24.4.2
+
 test-mcg: venv-test
 	@echo "Testing the Model Card Gen API..."
 	@. $(ACTIVATE_TEST) && pytest model_card_gen/tests
@@ -77,6 +87,14 @@ test-notebook: venv-test
 	@. $(ACTIVATE_NOTEBOOK) && \
 	bash run_notebooks.sh $(CURDIR)/notebooks/explainer/imagenet_with_cam/ExplainingImageClassification.ipynb
 
+stylecheck: venv-lint
+	@echo "Checking code style..."
+	@. $(ACTIVATE_LINT) flake8 . --config=tox.ini && echo "Code style is compatible with PEP 8 guidelines" || echo "Code style check failed. Please fix the above code style errors."
+
+fix-codestyle: venv-lint
+	@echo "Fixing code style..."
+	@. $(ACTIVATE_LINT) black . --check --config=pyproject.toml
+
 dist: build-whl
 	@echo "Create binary wheel..."
 

diff --git a/explainer/pyproject.toml b/explainer/pyproject.toml
@@ -24,7 +24,7 @@ datasets = '2.14.4'
 deepdiff = '6.7.1'
 intel-tensorflow = '2.14.0'
 pytest = '8.1.1'
-scikit-learn = '1.4.0'
+scikit-learn = '1.5.0'
 tensorflow-hub = '0.15.0'
 torch = {version = "2.2.0", source = "pytorch-cpu"}
 torchvision = {version = "0.17.0", source = "pytorch-cpu"}

diff --git a/explainer/tests/conftest.py b/explainer/tests/conftest.py
@@ -93,7 +93,7 @@ def custom_tf_CNN():
     Creates and trains a simple TF CNN on the mnist dataset.
     Returns the model, a subset of the test dataset and the class names.
 
-    Taken from https://shap-lrjball.readthedocs.io/en/latest/example_notebooks/deep_explainer/Front%20Page%20DeepExplainer%20MNIST%20Example.html
+    Taken from https://shap-lrjball.readthedocs.io/en/latest/example_notebooks/deep_explainer/Front%20Page%20DeepExplainer%20MNIST%20Example.html # noqa
     """
     import tensorflow as tf
     from tensorflow.keras.datasets import mnist

diff --git a/fuzz/README.md b/fuzz/README.md
@@ -0,0 +1,91 @@
+## Fuzz Testing in Intel Explainable AI Tools
+Fuzz testing is an automated software testing technique that involves providing invalid, unexpected, or random data as inputs to a computer program. The program is then monitored for exceptions such as crashes, failing built-in code assertions, or potential memory leaks. This README details the use of Google's Atheris, a coverage-guided Python fuzzing engine, to conduct fuzz testing in our project. 
+Inside this fuzz folder holds all fuzz testing programs.
+
+### Requirements
+* Python: Version 3.9 or newer
+* Atheris: Google's fuzzing engine for Python
+* Coverage: Code coverage measurement for Python
+
+## Setup
+To prepare your environment for fuzz testing with Atheris, follow these steps:
+
+# Install Dependencies
+```
+pip install -r requirements.txt
+```
+## Running Fuzz Tests
+Example 1 (runs with a starting corpus and stops when interrupted by user):
+```
+python3 -m coverage run fuzz_test.py -atheris_runs=0 ../model_card_gen/intel_ai_safety/model_card_gen/docs/examples/json/
+```
+
+Example 2 (runs for 10000 iterations and adds to coverage report instead of overwriting):
+```
+python3 -m coverage run -a fuzz_dataset.py -atheris_runs=10000
+```
+# Interpreting Results
+When running fuzz tests with Atheris it is important to understand the output to idenfity potential issues effectively. 
+
+### Crashes and Exceptions
+Atheris reports when the fuzzed input causes the program to crash or raise unhandled exceptions. These input are crucial for identifying vulnerabilities. 
+
+~~~
+ERROR: atheris detected an error in fuzz_test.py.
+CRASH: Test input caused an unhandled IndexError exception.
+~~~
+
+In this example, the fuzzer has discovered an input that causes an IndexError in fuzz_test.py. This indicates that the code may not properly handle cases where list or array access is out of bounds. The developer should examine the stack trace provided by Atheris, identify whether there is problematic code, and implement proper bounds checking or error handling. If throwing the exception is the correct and expected behavior, the crash can be silently handled in fuzz_test.py using a try/except block.
+
+### Coverage Metrics
+Atheris provides information about code coverage, which helps in understanding which parts of your code were exercised by the fuzz tests. Low coverage might indicate that additional fuzzing targets or more diverse inputs are needed. 
+
+To generate the coverage report, run the following command inside the fuzz folder:
+
+`python3 -m coverage report`
+
+The output will be:
+
+| Name                                                        | Stmts | Miss | Cover |
+|-------------------------------------------------------------|-------|------|-------|
+| fuzz_test.py                                                | 25    | 6    | 76%   |
+| intel_ai_safety/model_card_gen/__init__.py                  | 0     | 0    | 100%  |
+| intel_ai_safety/model_card_gen/analyze/__init__.py          | 4     | 0    | 100%  |
+| intel_ai_safety/model_card_gen/analyze/analyzer.py          | 26    | 15   | 42%   |
+| ...                                                         | ...   | ...  | ...   |
+| intel_ai_safety/model_card_gen/validation.py                | 26    | 12   | 54%   |
+|-------------------------------------------------------------|-------|------|-------|
+| TOTAL                                                       | 835   | 416  | 50%   |
+
+Remember that the test may not be designed to exercise all the instrumented code, only a certain 
+part or parts of it. It can be more helpful to look at the individual file coverage than the total. 
+
+The coverage report can also be viewed interactively, to inspect files or functions executed, using html:
+
+`cd ../fuzz/htmlcov && python3 -m http.server`
+
+Open http://localhost:8000/index.html in a web browser.
+
+### Leak Detection
+
+If you're using a Python extension module that interfaces with C code, you might encounter memory leaks due to improper memory management in the C layer. Here's an example of how a memory leak might be reported:
+
+```plaintext
+Leak detected: an object of type 'MyCExtension.Object' with a size of 1024 bytes was not freed.
+Call stack of the allocation:
+  File "my_c_extension.py", line 58, in create_object
+    obj = MyCExtension.Object()
+```
+
+Developers should review the create_object function to ensure proper memory management.
+
+### Reproducing Issues 
+For every failure, detected, Atheris outputs a test case that can reproduce the issues. These test cases can help debug and fix the vulnerabilities in your code. When Atheris encounters an issue such as an unhandled exception, it can provide a serialized input that caused the problem. This allows you to reproduce the issue for debugging purposes. Here's an example of the output you might see:
+
+```plaintext
+EXCEPTION: Test input caused a KeyError in your Python code.
+Reproducing input written to: exception-abcdef1234567890.pickle
+To reproduce, run: python3 -m atheris reproduce exception-abcdef1234567890.pickle
+```
+
+In this example, the fuzzer has discovered an input that causes a KeyError in the Python code. The input has been saved to a file named exception-abcdef1234567890.pickle. To reproduce the issue, the developer can run the provided command, which will execute the fuzzer with the exact same input that caused the exception, allowing for consistent reproduction and easier debugging.
diff --git a/fuzz/fuzz_dataset.py b/fuzz/fuzz_dataset.py
@@ -0,0 +1,32 @@
+import atheris
+import numpy
+import random
+import sys
+
+MIN_DATA_LENGTH = 1  # Minimum length of dataset
+MAX_DATA_LENGTH = 1000  # Maximum length of dataset
+
+default_path = "../plugins/model_card_gen/generators/tfma/"
+sys.path.append(default_path)
+
+with atheris.instrument_imports(include=["intel_ai_safety.*"]):
+    from intel_ai_safety.model_card_gen.datasets.torch_datasets import PytorchNumpyDataset
+
+
+def TestOneInput(data):
+    """The entry point for the fuzzer."""
+    fdp = atheris.FuzzedDataProvider(data)
+
+    # Create input and target numpy arrays of random but equal length
+    # Label values will be integers between [0, 10]
+    dataset_length = random.randint(MIN_DATA_LENGTH, MAX_DATA_LENGTH)
+    input_array = numpy.array(fdp.ConsumeRegularFloatList(dataset_length))
+    target_array = numpy.array(fdp.ConsumeIntListInRange(dataset_length, 0, 10))
+
+    dataset = PytorchNumpyDataset(input_array=input_array, target_array=target_array)
+    assert len(dataset.dataset) == dataset_length
+
+
+if __name__ == "__main__":
+    atheris.Setup(sys.argv, TestOneInput)
+    atheris.Fuzz()
diff --git a/fuzz/fuzz_deep_explainer.py b/fuzz/fuzz_deep_explainer.py
@@ -0,0 +1,117 @@
+#!/usr/bin/python3
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Usage:
+    python3 -m coverage run fuzz_deep_explainer.py -atheris_runs=10
+    coverage report -m --omit=../fuzz/config*
+"""
+
+import atheris
+import numpy as np
+import sys
+import itertools
+
+default_path = "../plugins"
+sys.path.append(default_path)
+
+# This tells Atheris to instrument all functions in the library
+with atheris.instrument_imports(include=["intel_ai_safety.explainer.attributions.attributions"]):
+    from intel_ai_safety.explainer.attributions.attributions import deep_explainer
+
+import torch
+from torchvision import datasets, transforms
+from torch import nn, optim
+from torch.nn import functional as F
+torch.manual_seed(0)
+
+batch_size = 128
+num_epochs = 1
+device = torch.device('cpu')
+
+
+# MockNet class to replace the actual Net class for faster testing
+class MockNet(nn.Module):
+    def __init__(self):
+
+        super(MockNet, self).__init__()
+        self.fc_layers = nn.Sequential(
+            nn.Linear(784, 10),  # Assuming input is a flattened MNIST image (28x28)
+            nn.Softmax(dim=1)
+        )
+
+    def forward(self, x):
+        x = x.view(-1, 784)  # Flatten the image
+        x = self.fc_layers(x)
+        return x
+
+
+train_loader = torch.utils.data.DataLoader(
+    datasets.MNIST('mnist_data', train=True, download=True,
+                   transform=transforms.Compose([
+                       transforms.ToTensor()
+                   ])),
+    batch_size=batch_size, shuffle=True)
+
+test_loader = torch.utils.data.DataLoader(
+    datasets.MNIST('mnist_data', train=False, transform=transforms.Compose([
+        transforms.ToTensor()])), batch_size=batch_size, shuffle=True)
+
+
+@atheris.instrument_func
+def test_deep_explainer(input_bytes):
+
+    fdp = atheris.FuzzedDataProvider(input_bytes)
+    # Generate random data based on the fuzzed input
+    num_background = fdp.ConsumeIntInRange(1, 5)
+    num_targets = fdp.ConsumeIntInRange(1, 5)
+    num_classes = fdp.ConsumeIntInRange(2, 10)
+    # The model expects images of shape (batch_size, channels, height, width)
+    # For MNIST, this is typically (batch_size, 1, 28, 28)
+    # Generate random images with the same shape
+    background_images = np.random.rand(num_background, 1, 28, 28).astype(np.float32)
+    target_images = np.random.rand(num_targets, 1, 28, 28).astype(np.float32)
+    # The labels should be a list of strings, one for each class
+    labels = [f"Class {i}" for i in range(num_classes)]
+    # Use the mocked model instead of the actual Net to speedup the test
+    model = MockNet().to(device)
+    # Evaluate the model with a smaller subset of the test data to speedup the test
+    model.eval()
+    test_loss = 0
+    correct = 0
+    y_true = torch.empty(0)
+    y_pred = torch.empty((0, 10))
+    X_test = torch.empty((0, 1, 28, 28))
+
+    with torch.no_grad():
+        for data, target in itertools.islice(test_loader, 10):  # Limit the number of batches
+            data, target = data.to(device), target.to(device)
+            output = model(data)
+            X_test = torch.cat((X_test, data))
+            y_true, y_pred = torch.cat((y_true, target)), torch.cat((y_pred, output))
+
+            test_loss += F.nll_loss(output.log(), target).item()
+            pred = output.max(1, keepdim=True)[1]
+            correct += pred.eq(target.view_as(pred)).sum().item()
+
+    test_loss /= len(test_loader.dataset)
+    classes = np.array(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
+    # Use the generated background and target images for the deep explainer
+    deep_explainer(model, torch.tensor(background_images), torch.tensor(target_images), classes)
+
+    return
+
+
+atheris.Setup(sys.argv, test_deep_explainer)
+atheris.Fuzz()
diff --git a/fuzz/fuzz_test.py b/fuzz/fuzz_test.py
@@ -0,0 +1,52 @@
+import atheris
+import json
+import jsonschema
+import sys
+
+STR_BYTE_COUNT = 10000  # Desired byte count for fuzzed strings
+
+default_path = "../model_card_gen"
+sys.path.append(default_path)
+
+with atheris.instrument_imports(include=["intel_ai_safety.*"]):
+    from intel_ai_safety.model_card_gen.model_card_gen import ModelCardGen
+
+
+def mutate_schema(fdp, json_data):
+    """Recurses through a json object leaving keys and structures intact and
+    randomly generating new data values of the proper type."""
+    if isinstance(json_data, str):
+        return fdp.ConsumeUnicode(STR_BYTE_COUNT)
+    elif isinstance(json_data, list):
+        return [mutate_schema(fdp, json_data[i]) for i in range(len(json_data))]
+    elif isinstance(json_data, dict):
+        return {k: mutate_schema(fdp, v) for k, v in json_data.items()}
+    else:
+        return None
+
+
+def TestOneInput(data):
+    """The entry point for the fuzzer."""
+    try:
+        json_data = json.loads(data)
+    except json.decoder.JSONDecodeError:
+        print("Not valid json")
+        return
+    except UnicodeDecodeError:
+        print("Not valid unicode")
+        return
+
+    fdp = atheris.FuzzedDataProvider(data)
+    model_card_data = mutate_schema(fdp, json_data)
+    try:
+        mcg = ModelCardGen(data_sets={"test": ""}, model_card=model_card_data)
+        if mcg.model_card:
+            mcg.build_model_card()  # Includes scaffold_assets() and export_format()
+    except (ValueError, jsonschema.ValidationError):
+        print("Doesn't match MC schema")
+        return
+
+
+if __name__ == "__main__":
+    atheris.Setup(sys.argv, TestOneInput)
+    atheris.Fuzz()