Skip to content

Commit

Permalink
Merge branch 'main' into main-public
Browse files Browse the repository at this point in the history
  • Loading branch information
ashahba committed Aug 2, 2024
2 parents 00ff3ad + 2962dbf commit 8133a7c
Show file tree
Hide file tree
Showing 53 changed files with 7,665 additions and 376 deletions.
29 changes: 29 additions & 0 deletions .github/workflows/stylecheck-lint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Style Check and Lint
on:
pull_request:
types: [submitted]
# run the workflow if changes pushed to main or release branches
push:
branches: '**'
paths: '**'

jobs:
stylecheck-lint:
name: Check codestsyle
runs-on: [ xai-tlt ]
container:
image: ${{ vars.GHA_IMAGE }}
env:
http_proxy: ${{ secrets.HTTP_PROXY }}
https_proxy: ${{ secrets.HTTPS_PROXY }}
no_proxy: ${{ secrets.NO_PROXY }}
# credentials:
# username: ${{ secrets.REGISTRY_USER }}
# password: ${{ secrets.REGISTRY_TOKEN }}
volumes:
- /tf_dataset/dataset/transfer_learning:/tmp/data
steps:
- name: Check out repository code
uses: actions/[email protected]
- name: Run stylecheck
run: make stylecheck
2 changes: 1 addition & 1 deletion CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# the repo. Unless a later match takes precedence,
# @global-owner1 and @global-owner2 will be requested for
# review when someone opens a pull request.
* @ashahba @daniel-de-leon-user293 @tybrs
* @ashahba @daniel-de-leon-user293 @tybrs @mitalipo

# Order is important; the last matching pattern takes the most
# precedence. When someone opens a pull request that only
Expand Down
18 changes: 18 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
#

VENV_DIR = ".venv"
VENV_LINT = ".venv/lint"
ACTIVATE_TEST = "$(VENV_DIR)/bin/activate"
ACTIVATE_LINT = "$(VENV_LINT)/bin/activate"
ACTIVATE_DOCS = $(ACTIVATE_TEST)
ACTIVATE_NOTEBOOK = $(ACTIVATE_TEST)

Expand All @@ -36,6 +38,14 @@ venv-test: poetry-lock
pure-eval==0.2.2 \
stack-data==0.6.3

venv-lint:
@echo "Creating a virtual environment for linting $(VENV_LINT)..."
@test -d $(VENV_LINT) || python -m virtualenv $(VENV_LINT) || python3 -m virtualenv $(VENV_LINT)
@echo "Installing lint dependencies..."
@. $(ACTIVATE_LINT) && pip install --no-cache-dir --no-deps \
flake8==7.0.0 \
black==24.4.2

test-mcg: venv-test
@echo "Testing the Model Card Gen API..."
@. $(ACTIVATE_TEST) && pytest model_card_gen/tests
Expand Down Expand Up @@ -77,6 +87,14 @@ test-notebook: venv-test
@. $(ACTIVATE_NOTEBOOK) && \
bash run_notebooks.sh $(CURDIR)/notebooks/explainer/imagenet_with_cam/ExplainingImageClassification.ipynb

stylecheck: venv-lint
@echo "Checking code style..."
@. $(ACTIVATE_LINT) flake8 . --config=tox.ini && echo "Code style is compatible with PEP 8 guidelines" || echo "Code style check failed. Please fix the above code style errors."

fix-codestyle: venv-lint
@echo "Fixing code style..."
@. $(ACTIVATE_LINT) black . --check --config=pyproject.toml

dist: build-whl
@echo "Create binary wheel..."

Expand Down
2 changes: 1 addition & 1 deletion explainer/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ datasets = '2.14.4'
deepdiff = '6.7.1'
intel-tensorflow = '2.14.0'
pytest = '8.1.1'
scikit-learn = '1.4.0'
scikit-learn = '1.5.0'
tensorflow-hub = '0.15.0'
torch = {version = "2.2.0", source = "pytorch-cpu"}
torchvision = {version = "0.17.0", source = "pytorch-cpu"}
Expand Down
2 changes: 1 addition & 1 deletion explainer/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def custom_tf_CNN():
Creates and trains a simple TF CNN on the mnist dataset.
Returns the model, a subset of the test dataset and the class names.
Taken from https://shap-lrjball.readthedocs.io/en/latest/example_notebooks/deep_explainer/Front%20Page%20DeepExplainer%20MNIST%20Example.html
Taken from https://shap-lrjball.readthedocs.io/en/latest/example_notebooks/deep_explainer/Front%20Page%20DeepExplainer%20MNIST%20Example.html # noqa
"""
import tensorflow as tf
from tensorflow.keras.datasets import mnist
Expand Down
91 changes: 91 additions & 0 deletions fuzz/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
## Fuzz Testing in Intel Explainable AI Tools
Fuzz testing is an automated software testing technique that involves providing invalid, unexpected, or random data as inputs to a computer program. The program is then monitored for exceptions such as crashes, failing built-in code assertions, or potential memory leaks. This README details the use of Google's Atheris, a coverage-guided Python fuzzing engine, to conduct fuzz testing in our project.
Inside this fuzz folder holds all fuzz testing programs.

### Requirements
* Python: Version 3.9 or newer
* Atheris: Google's fuzzing engine for Python
* Coverage: Code coverage measurement for Python

## Setup
To prepare your environment for fuzz testing with Atheris, follow these steps:

# Install Dependencies
```
pip install -r requirements.txt
```
## Running Fuzz Tests
Example 1 (runs with a starting corpus and stops when interrupted by user):
```
python3 -m coverage run fuzz_test.py -atheris_runs=0 ../model_card_gen/intel_ai_safety/model_card_gen/docs/examples/json/
```

Example 2 (runs for 10000 iterations and adds to coverage report instead of overwriting):
```
python3 -m coverage run -a fuzz_dataset.py -atheris_runs=10000
```
# Interpreting Results
When running fuzz tests with Atheris it is important to understand the output to idenfity potential issues effectively.

### Crashes and Exceptions
Atheris reports when the fuzzed input causes the program to crash or raise unhandled exceptions. These input are crucial for identifying vulnerabilities.

~~~
ERROR: atheris detected an error in fuzz_test.py.
CRASH: Test input caused an unhandled IndexError exception.
~~~

In this example, the fuzzer has discovered an input that causes an IndexError in fuzz_test.py. This indicates that the code may not properly handle cases where list or array access is out of bounds. The developer should examine the stack trace provided by Atheris, identify whether there is problematic code, and implement proper bounds checking or error handling. If throwing the exception is the correct and expected behavior, the crash can be silently handled in fuzz_test.py using a try/except block.

### Coverage Metrics
Atheris provides information about code coverage, which helps in understanding which parts of your code were exercised by the fuzz tests. Low coverage might indicate that additional fuzzing targets or more diverse inputs are needed.

To generate the coverage report, run the following command inside the fuzz folder:

`python3 -m coverage report`

The output will be:

| Name | Stmts | Miss | Cover |
|-------------------------------------------------------------|-------|------|-------|
| fuzz_test.py | 25 | 6 | 76% |
| intel_ai_safety/model_card_gen/__init__.py | 0 | 0 | 100% |
| intel_ai_safety/model_card_gen/analyze/__init__.py | 4 | 0 | 100% |
| intel_ai_safety/model_card_gen/analyze/analyzer.py | 26 | 15 | 42% |
| ... | ... | ... | ... |
| intel_ai_safety/model_card_gen/validation.py | 26 | 12 | 54% |
|-------------------------------------------------------------|-------|------|-------|
| TOTAL | 835 | 416 | 50% |

Remember that the test may not be designed to exercise all the instrumented code, only a certain
part or parts of it. It can be more helpful to look at the individual file coverage than the total.

The coverage report can also be viewed interactively, to inspect files or functions executed, using html:

`cd ../fuzz/htmlcov && python3 -m http.server`

Open http://localhost:8000/index.html in a web browser.

### Leak Detection

If you're using a Python extension module that interfaces with C code, you might encounter memory leaks due to improper memory management in the C layer. Here's an example of how a memory leak might be reported:

```plaintext
Leak detected: an object of type 'MyCExtension.Object' with a size of 1024 bytes was not freed.
Call stack of the allocation:
File "my_c_extension.py", line 58, in create_object
obj = MyCExtension.Object()
```

Developers should review the create_object function to ensure proper memory management.

### Reproducing Issues
For every failure, detected, Atheris outputs a test case that can reproduce the issues. These test cases can help debug and fix the vulnerabilities in your code. When Atheris encounters an issue such as an unhandled exception, it can provide a serialized input that caused the problem. This allows you to reproduce the issue for debugging purposes. Here's an example of the output you might see:

```plaintext
EXCEPTION: Test input caused a KeyError in your Python code.
Reproducing input written to: exception-abcdef1234567890.pickle
To reproduce, run: python3 -m atheris reproduce exception-abcdef1234567890.pickle
```

In this example, the fuzzer has discovered an input that causes a KeyError in the Python code. The input has been saved to a file named exception-abcdef1234567890.pickle. To reproduce the issue, the developer can run the provided command, which will execute the fuzzer with the exact same input that caused the exception, allowing for consistent reproduction and easier debugging.
32 changes: 32 additions & 0 deletions fuzz/fuzz_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import atheris
import numpy
import random
import sys

MIN_DATA_LENGTH = 1 # Minimum length of dataset
MAX_DATA_LENGTH = 1000 # Maximum length of dataset

default_path = "../plugins/model_card_gen/generators/tfma/"
sys.path.append(default_path)

with atheris.instrument_imports(include=["intel_ai_safety.*"]):
from intel_ai_safety.model_card_gen.datasets.torch_datasets import PytorchNumpyDataset


def TestOneInput(data):
"""The entry point for the fuzzer."""
fdp = atheris.FuzzedDataProvider(data)

# Create input and target numpy arrays of random but equal length
# Label values will be integers between [0, 10]
dataset_length = random.randint(MIN_DATA_LENGTH, MAX_DATA_LENGTH)
input_array = numpy.array(fdp.ConsumeRegularFloatList(dataset_length))
target_array = numpy.array(fdp.ConsumeIntListInRange(dataset_length, 0, 10))

dataset = PytorchNumpyDataset(input_array=input_array, target_array=target_array)
assert len(dataset.dataset) == dataset_length


if __name__ == "__main__":
atheris.Setup(sys.argv, TestOneInput)
atheris.Fuzz()
117 changes: 117 additions & 0 deletions fuzz/fuzz_deep_explainer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#!/usr/bin/python3
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Usage:
python3 -m coverage run fuzz_deep_explainer.py -atheris_runs=10
coverage report -m --omit=../fuzz/config*
"""

import atheris
import numpy as np
import sys
import itertools

default_path = "../plugins"
sys.path.append(default_path)

# This tells Atheris to instrument all functions in the library
with atheris.instrument_imports(include=["intel_ai_safety.explainer.attributions.attributions"]):
from intel_ai_safety.explainer.attributions.attributions import deep_explainer

import torch
from torchvision import datasets, transforms
from torch import nn, optim
from torch.nn import functional as F
torch.manual_seed(0)

batch_size = 128
num_epochs = 1
device = torch.device('cpu')


# MockNet class to replace the actual Net class for faster testing
class MockNet(nn.Module):
def __init__(self):

super(MockNet, self).__init__()
self.fc_layers = nn.Sequential(
nn.Linear(784, 10), # Assuming input is a flattened MNIST image (28x28)
nn.Softmax(dim=1)
)

def forward(self, x):
x = x.view(-1, 784) # Flatten the image
x = self.fc_layers(x)
return x


train_loader = torch.utils.data.DataLoader(
datasets.MNIST('mnist_data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor()
])),
batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(
datasets.MNIST('mnist_data', train=False, transform=transforms.Compose([
transforms.ToTensor()])), batch_size=batch_size, shuffle=True)


@atheris.instrument_func
def test_deep_explainer(input_bytes):

fdp = atheris.FuzzedDataProvider(input_bytes)
# Generate random data based on the fuzzed input
num_background = fdp.ConsumeIntInRange(1, 5)
num_targets = fdp.ConsumeIntInRange(1, 5)
num_classes = fdp.ConsumeIntInRange(2, 10)
# The model expects images of shape (batch_size, channels, height, width)
# For MNIST, this is typically (batch_size, 1, 28, 28)
# Generate random images with the same shape
background_images = np.random.rand(num_background, 1, 28, 28).astype(np.float32)
target_images = np.random.rand(num_targets, 1, 28, 28).astype(np.float32)
# The labels should be a list of strings, one for each class
labels = [f"Class {i}" for i in range(num_classes)]
# Use the mocked model instead of the actual Net to speedup the test
model = MockNet().to(device)
# Evaluate the model with a smaller subset of the test data to speedup the test
model.eval()
test_loss = 0
correct = 0
y_true = torch.empty(0)
y_pred = torch.empty((0, 10))
X_test = torch.empty((0, 1, 28, 28))

with torch.no_grad():
for data, target in itertools.islice(test_loader, 10): # Limit the number of batches
data, target = data.to(device), target.to(device)
output = model(data)
X_test = torch.cat((X_test, data))
y_true, y_pred = torch.cat((y_true, target)), torch.cat((y_pred, output))

test_loss += F.nll_loss(output.log(), target).item()
pred = output.max(1, keepdim=True)[1]
correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)
classes = np.array(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
# Use the generated background and target images for the deep explainer
deep_explainer(model, torch.tensor(background_images), torch.tensor(target_images), classes)

return


atheris.Setup(sys.argv, test_deep_explainer)
atheris.Fuzz()
52 changes: 52 additions & 0 deletions fuzz/fuzz_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import atheris
import json
import jsonschema
import sys

STR_BYTE_COUNT = 10000 # Desired byte count for fuzzed strings

default_path = "../model_card_gen"
sys.path.append(default_path)

with atheris.instrument_imports(include=["intel_ai_safety.*"]):
from intel_ai_safety.model_card_gen.model_card_gen import ModelCardGen


def mutate_schema(fdp, json_data):
"""Recurses through a json object leaving keys and structures intact and
randomly generating new data values of the proper type."""
if isinstance(json_data, str):
return fdp.ConsumeUnicode(STR_BYTE_COUNT)
elif isinstance(json_data, list):
return [mutate_schema(fdp, json_data[i]) for i in range(len(json_data))]
elif isinstance(json_data, dict):
return {k: mutate_schema(fdp, v) for k, v in json_data.items()}
else:
return None


def TestOneInput(data):
"""The entry point for the fuzzer."""
try:
json_data = json.loads(data)
except json.decoder.JSONDecodeError:
print("Not valid json")
return
except UnicodeDecodeError:
print("Not valid unicode")
return

fdp = atheris.FuzzedDataProvider(data)
model_card_data = mutate_schema(fdp, json_data)
try:
mcg = ModelCardGen(data_sets={"test": ""}, model_card=model_card_data)
if mcg.model_card:
mcg.build_model_card() # Includes scaffold_assets() and export_format()
except (ValueError, jsonschema.ValidationError):
print("Doesn't match MC schema")
return


if __name__ == "__main__":
atheris.Setup(sys.argv, TestOneInput)
atheris.Fuzz()
Loading

0 comments on commit 8133a7c

Please sign in to comment.