diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index a53b6f3..d6d42d2 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -1,4 +1,4 @@
-name: build
+name: Build GAUCHE and run unit tests.
 
 on:
   push:
@@ -15,31 +15,30 @@ on:
       - "CONTRIBUTING.md"
       - "imgs/"
 
+permissions:
+  contents: read
+
 jobs:
-  build_cpu:
-    runs-on: ubuntu-18.04
+  build:
+
+    runs-on: ubuntu-latest
+
     strategy:
+      fail-fast: true
       matrix:
-        python-version: [3.7, 3.8, 3.9, 3.11]
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v2
-        # See: https://github.com/marketplace/actions/setup-conda
-      - name: Setup anaconda
-        uses: s-weigand/setup-conda@v1
-        with:
-          python-version: ${{ matrix.python-version }}
-          conda-channels: "conda-forge"
+        python-version: ["3.9", "3.10", "3.11"]
 
-      - name: Install GAUCHE
-        run: pip install -e .
-      - name: Install rnxfp
-        run: pip install --no-deps rxnfp
-      - name: Install drfp
-        run: pip install --no-deps drfp
-      - name: Install transformers
-        run: pip install transformers
-      - name: Run unit tests and generate coverage report
-        run: pytest .
-      - name: Test notebook execution
-        run: pytest --nbval-lax notebooks/ --current-env
\ No newline at end of file
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python.
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Set up GAUCHE and install all dependencies.
+      run: |
+        python -m pip install --upgrade pip
+        pip install pytest
+        pip install -e .[all]
+    - name: Run unit tests with pytest.
+      run: |
+        pytest tests/
diff --git a/.github/workflows/code-style.yaml b/.github/workflows/code-style.yaml
index 4893d97..790eb2f 100644
--- a/.github/workflows/code-style.yaml
+++ b/.github/workflows/code-style.yaml
@@ -5,16 +5,16 @@ on: [push]
 jobs:
   black:
     name: "Ensure black compliance"
-    runs-on: "ubuntu-18.04"
+    runs-on: ubuntu-latest
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
 
       - name: Install Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
         with:
-          python-version: 3.9
+          python-version: "3.10"
 
       - name: Install code style checking tools
         run: python -m pip install black
@@ -22,36 +22,17 @@ jobs:
       - name: Run black
         run: black --check .
 
-  interrogate:
-    name: "Ensure docstring coverage"
-    runs-on: "ubuntu-18.04"
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v2
-
-      - name: Install Python
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.9
-
-      - name: Install code style checking tools
-        run: python -m pip install interrogate
-
-      - name: Run interrogate
-        run: interrogate .
-
-
   isort:
     name: "Ensure imports are correctly sorted"
-    runs-on: "ubuntu-18.04"
+    runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
 
       - name: Install Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
         with:
-          python-version: 3.9
+          python-version: "3.10"
 
       - name: Install code style checking tools
         run: python -m pip install isort
diff --git a/benchmarks/benchmark_models.py b/benchmarks/benchmark_models.py
index 927f50c..80caea2 100644
--- a/benchmarks/benchmark_models.py
+++ b/benchmarks/benchmark_models.py
@@ -2,14 +2,13 @@
 GP Model definitions to be used in the benchmarks
 """
 
-from gauche.kernels.fingerprint_kernels.tanimoto_kernel import (
-    TanimotoKernel,
-)
 from gpytorch.distributions import MultivariateNormal
 from gpytorch.kernels import LinearKernel, ScaleKernel
 from gpytorch.means import ConstantMean
 from gpytorch.models import ExactGP
 
+from gauche.kernels.fingerprint_kernels.tanimoto_kernel import TanimotoKernel
+
 
 class TanimotoGP(ExactGP):
     def __init__(self, train_x, train_y, likelihood):
diff --git a/benchmarks/run_benchmark.py b/benchmarks/run_benchmark.py
index 5835a07..da67318 100644
--- a/benchmarks/run_benchmark.py
+++ b/benchmarks/run_benchmark.py
@@ -11,8 +11,6 @@
 import torch
 from benchmark_models import ScalarProductGP, TanimotoGP
 from botorch import fit_gpytorch_model
-from gauche.dataloader import MolPropLoader
-from gauche.dataloader.data_utils import transform_data
 from gpytorch.likelihoods import GaussianLikelihood
 from gpytorch.mlls import ExactMarginalLogLikelihood
 from gpytorch_metrics import (
@@ -23,6 +21,9 @@
 from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
 from sklearn.model_selection import train_test_split
 
+from gauche.dataloader import MolPropLoader
+from gauche.dataloader.data_utils import transform_data
+
 # Remove Graphein warnings
 logging.getLogger("graphein").setLevel("ERROR")
 
diff --git a/notebooks/pretrained_kernel.py b/notebooks/pretrained_kernel.py
index 1a39b05..cb0a85f 100644
--- a/notebooks/pretrained_kernel.py
+++ b/notebooks/pretrained_kernel.py
@@ -1,35 +1,41 @@
-import sys, time, warnings, numpy as np, torch, random
-sys.path.append('..')
-from matplotlib import pyplot as plt
-from rdkit.Chem import MolFromSmiles
-from sklearn.model_selection import train_test_split
+import random
+import sys
+import time
+import warnings
 
+import numpy as np
+import torch
 from botorch import fit_gpytorch_model
 from botorch.acquisition import ExpectedImprovement
 from botorch.exceptions import BadInitialCandidatesWarning
 from botorch.models.gp_regression import SingleTaskGP
-
 from gpytorch.distributions import MultivariateNormal
-from gpytorch.kernels import ScaleKernel, RBFKernel
+from gpytorch.kernels import RBFKernel, ScaleKernel
 from gpytorch.likelihoods import GaussianLikelihood
 from gpytorch.means import ConstantMean
 from gpytorch.mlls import ExactMarginalLogLikelihood
+from matplotlib import pyplot as plt
+from rdkit.Chem import MolFromSmiles
+from sklearn.model_selection import train_test_split
 
 from gauche.dataloader import MolPropLoader
 from gauche.kernels.gnn_kernels.pretrained_kernel import GNN, mol_to_pyg
 
+sys.path.append("..")
+
+
 def set_seed(seed):
     np.random.seed(seed)
     torch.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
     random.seed(seed)
-    torch.backends.cudnn.deterministic=True
+    torch.backends.cudnn.deterministic = True
     torch.backends.cudnn.benchmark = False
     torch.cuda.manual_seed_all(seed)
 
+
 # We define our custom GP surrogate model using the RBF kernel
 class GP_PretrainedKernel(SingleTaskGP):
-
     def __init__(self, train_X, train_Y):
         super().__init__(train_X, train_Y, GaussianLikelihood())
         self.mean_module = ConstantMean()
@@ -41,8 +47,10 @@ def forward(self, x):
         covar_x = self.covar_module(x)
         return MultivariateNormal(mean_x, covar_x)
 
+
 """We define helper functions for the Bayesian optimisation loop. In particular the acquisition function optimisation procedure is framed so as to take the maximum over a discrete set of heldout molecules."""
 
+
 def initialize_model(train_x, train_obj, state_dict=None):
     """
     Initialise model and loss function.
@@ -65,7 +73,9 @@ def initialize_model(train_x, train_obj, state_dict=None):
     return mll, model
 
 
-def optimize_acqf_and_get_observation(acq_func, heldout_inputs, heldout_outputs):
+def optimize_acqf_and_get_observation(
+    acq_func, heldout_inputs, heldout_outputs
+):
     """
     Optimizes the acquisition function, and returns a new candidate and an observation.
 
@@ -79,7 +89,9 @@ def optimize_acqf_and_get_observation(acq_func, heldout_inputs, heldout_outputs)
     # Loop over the discrete set of points to evaluate the acquisition function at.
     acq_vals = []
     for i in range(len(heldout_outputs)):
-        acq_vals.append(acq_func(heldout_inputs[i].unsqueeze(-2)))  # use unsqueeze to append batch dimension
+        acq_vals.append(
+            acq_func(heldout_inputs[i].unsqueeze(-2))
+        )  # use unsqueeze to append batch dimension
 
     # observe new values
     acq_vals = torch.tensor(acq_vals)
@@ -88,8 +100,12 @@ def optimize_acqf_and_get_observation(acq_func, heldout_inputs, heldout_outputs)
     new_obj = heldout_outputs[best_idx].unsqueeze(-1)  # add output dimension
 
     # Delete the selected input and value from the heldout set.
-    heldout_inputs = torch.cat((heldout_inputs[:best_idx], heldout_inputs[best_idx+1:]), axis=0)
-    heldout_outputs = torch.cat((heldout_outputs[:best_idx], heldout_outputs[best_idx+1:]), axis=0)
+    heldout_inputs = torch.cat(
+        (heldout_inputs[:best_idx], heldout_inputs[best_idx + 1 :]), axis=0
+    )
+    heldout_outputs = torch.cat(
+        (heldout_outputs[:best_idx], heldout_outputs[best_idx + 1 :]), axis=0
+    )
 
     return new_x, new_obj, heldout_inputs, heldout_outputs
 
@@ -113,11 +129,16 @@ def update_random_observations(best_random, heldout_inputs, heldout_outputs):
     best_random.append(max(best_random[-1], next_random_best))
 
     # Delete the selected input and value from the heldout set.
-    heldout_inputs = torch.cat((heldout_inputs[:index], heldout_inputs[index+1:]), axis=0)
-    heldout_outputs = torch.cat((heldout_outputs[:index], heldout_outputs[index+1:]), axis=0)
+    heldout_inputs = torch.cat(
+        (heldout_inputs[:index], heldout_inputs[index + 1 :]), axis=0
+    )
+    heldout_outputs = torch.cat(
+        (heldout_outputs[:index], heldout_outputs[index + 1 :]), axis=0
+    )
 
     return best_random, heldout_inputs, heldout_outputs
 
+
 """Run the Bayesian optimisation loop, comparing the analytic (sequential) expected improvement acquisition funciton with a random policy."""
 
 # Bayesian optimisation experiment parameters, number of random trials, split size, batch size
@@ -129,27 +150,27 @@ def update_random_observations(best_random, heldout_inputs, heldout_outputs):
 verbose = False
 set_seed(12)
 
-warnings.filterwarnings('ignore', category=BadInitialCandidatesWarning)
-warnings.filterwarnings('ignore', category=RuntimeWarning)
+warnings.filterwarnings("ignore", category=BadInitialCandidatesWarning)
+warnings.filterwarnings("ignore", category=RuntimeWarning)
 
 best_observed_all_ei, best_random_all = [], []
 
 # Load the Photoswitch dataset
 loader = MolPropLoader()
-loader.load_benchmark("Photoswitch", "../data/property_prediction/Photoswitch.csv")
+loader.load_benchmark(
+    "Photoswitch", "../data/property_prediction/Photoswitch.csv"
+)
 
 # We use the fragprints representations (a concatenation of Morgan fingerprints and RDKit fragment features)
 y = loader.labels
 
 # get PyTorch Geometric featurisation of molecules
-graphs = [
-    mol_to_pyg(MolFromSmiles(smiles)) for smiles in loader.features
-]
+graphs = [mol_to_pyg(MolFromSmiles(smiles)) for smiles in loader.features]
 
 # load pretrained model
 with torch.no_grad():
-    model = GNN(gnn_type='gin')
-    model.load_pretrained('contextpred', device=torch.device("cpu"))
+    model = GNN(gnn_type="gin")
+    model.load_pretrained("contextpred", device=torch.device("cpu"))
 
     X = torch.stack(
         [
@@ -157,18 +178,20 @@ def update_random_observations(best_random, heldout_inputs, heldout_outputs):
                 x=graphs[i].x,
                 edge_index=graphs[i].edge_index,
                 edge_attr=graphs[i].edge_attr,
-            ).mean(0) for i in range(len(graphs))
+            ).mean(0)
+            for i in range(len(graphs))
         ]
     )
 
 # average over multiple random trials (each trial splits the initial training set for the GP in a random manner)
 for trial in range(1, N_TRIALS + 1):
-
     print(f"\nTrial {trial:>2} of {N_TRIALS} ", end="")
     best_observed_ei, best_random = [], []
 
     # Generate initial training data and initialize model
-    train_x_ei, heldout_x_ei, train_y_ei, heldout_y_ei = train_test_split(X, y, test_size=holdout_set_size, random_state=trial)
+    train_x_ei, heldout_x_ei, train_y_ei, heldout_y_ei = train_test_split(
+        X, y, test_size=holdout_set_size, random_state=trial
+    )
     best_observed_value_ei = torch.tensor(np.max(train_y_ei))
 
     # Convert numpy arrays to PyTorch tensors and flatten the label vectors
@@ -186,27 +209,37 @@ def update_random_observations(best_random, heldout_inputs, heldout_outputs):
 
     # run N_ITERS rounds of BayesOpt after the initial random batch
     for iteration in range(1, N_ITERS + 1):
-
         t0 = time.time()
 
         # fit the model
         fit_gpytorch_model(mll_ei)
 
         # Use analytic acquisition function for batch size of 1.
-        EI = ExpectedImprovement(model=model_ei, best_f=(train_y_ei.to(train_y_ei)).max())
+        EI = ExpectedImprovement(
+            model=model_ei, best_f=(train_y_ei.to(train_y_ei)).max()
+        )
 
-        new_x_ei, new_obj_ei, heldout_x_ei, heldout_y_ei = optimize_acqf_and_get_observation(EI,
-                                                                                             heldout_x_ei,
-                                                                                             heldout_y_ei)
+        (
+            new_x_ei,
+            new_obj_ei,
+            heldout_x_ei,
+            heldout_y_ei,
+        ) = optimize_acqf_and_get_observation(EI, heldout_x_ei, heldout_y_ei)
 
         # update training points
         train_x_ei = torch.cat([train_x_ei, new_x_ei])
         train_y_ei = torch.cat([train_y_ei, new_obj_ei])
 
         # update random search progress
-        best_random, heldout_x_random, heldout_y_random = update_random_observations(best_random,
-                                                                                     heldout_inputs=heldout_x_random,
-                                                                                     heldout_outputs=heldout_y_random)
+        (
+            best_random,
+            heldout_x_random,
+            heldout_y_random,
+        ) = update_random_observations(
+            best_random,
+            heldout_inputs=heldout_x_random,
+            heldout_outputs=heldout_y_random,
+        )
         best_value_ei = torch.max(new_obj_ei, best_observed_ei[-1])
         best_observed_ei.append(best_value_ei)
 
@@ -224,7 +257,8 @@ def update_random_observations(best_random, heldout_inputs, heldout_outputs):
             print(
                 f"\nBatch {iteration:>2}: best_value (random, qEI) = "
                 f"({max(best_random):>4.2f}, {best_value_ei:>4.2f}), "
-                f"time = {t1 - t0:>4.2f}.", end=""
+                f"time = {t1 - t0:>4.2f}.",
+                end="",
             )
         else:
             print(".", end="")
@@ -232,10 +266,12 @@ def update_random_observations(best_random, heldout_inputs, heldout_outputs):
     best_observed_all_ei.append(best_observed_ei)
     best_random_all.append(best_random)
 
+
 # Define a confience interval function for plotting.
 def ci(y):
     return 1.96 * y.std(axis=0) / np.sqrt(N_TRIALS)
 
+
 iters = np.arange(N_ITERS + 1)
 
 y_ei = []
@@ -264,14 +300,14 @@ def ci(y):
 lower_ei = y_ei_mean - y_ei_std
 upper_ei = y_ei_mean + y_ei_std
 
-plt.plot(iters, y_rnd_mean, label='Random')
+plt.plot(iters, y_rnd_mean, label="Random")
 plt.fill_between(iters, lower_rnd, upper_rnd, alpha=0.2)
-plt.plot(iters, y_ei_mean, label='EI')
+plt.plot(iters, y_ei_mean, label="EI")
 plt.fill_between(iters, lower_ei, upper_ei, alpha=0.2)
-plt.xlabel('Number of Iterations')
-plt.ylabel('Best Objective Value')
+plt.xlabel("Number of Iterations")
+plt.ylabel("Best Objective Value")
 plt.legend(loc="lower right")
 plt.xticks(list(np.arange(1, 21)))
 plt.show()
 
-"""EI outperforms random search in terms of selecting molecules with high E isomer pi-pi* transition wavelength! It should be noted that the true objective for photoswitch optimisation would consider all transition wavelengths as well as the thermal half-life and this will hopefully be included in a future notebook!"""
\ No newline at end of file
+"""EI outperforms random search in terms of selecting molecules with high E isomer pi-pi* transition wavelength! It should be noted that the true objective for photoswitch optimisation would consider all transition wavelengths as well as the thermal half-life and this will hopefully be included in a future notebook!"""
diff --git a/setup.py b/setup.py
index 05d1bca..c12d661 100644
--- a/setup.py
+++ b/setup.py
@@ -2,11 +2,12 @@
 package setup
 """
 
+import codecs
 import io
 import os
 import re
-import codecs
 from typing import Dict, List
+
 from setuptools import find_packages, setup
 
 HERE = os.path.abspath(os.path.dirname(__file__))