From fa410cd800de2518b12e5d8b5904f341feb36420 Mon Sep 17 00:00:00 2001
From: Leo Klarner <leo.klarner@bnc.ox.ac.uk>
Date: Sat, 9 Dec 2023 20:47:00 +0000
Subject: [PATCH 1/6] Fix documentation generation.

---
 .github/workflows/build_documentation.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_documentation.yaml b/.github/workflows/build_documentation.yaml
index ccd6ae8..30de342 100644
--- a/.github/workflows/build_documentation.yaml
+++ b/.github/workflows/build_documentation.yaml
@@ -16,7 +16,7 @@ jobs:
           pip install sphinx furo myst_parser
       - name: Sphinx build
         run: |
-          sphinx-build docs _build
+          sphinx-build docs/source _build
       - name: Deploy to GitHub Pages
         uses: peaceiris/actions-gh-pages@v3
         if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}

From cac0bfefab7cbefe6d09b2f9809bc59be6592ba5 Mon Sep 17 00:00:00 2001
From: Leo Klarner <leo.klarner@bnc.ox.ac.uk>
Date: Sat, 9 Dec 2023 21:00:01 +0000
Subject: [PATCH 2/6] Fix docs.

---
 .github/workflows/build_documentation.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_documentation.yaml b/.github/workflows/build_documentation.yaml
index 30de342..f98fbb4 100644
--- a/.github/workflows/build_documentation.yaml
+++ b/.github/workflows/build_documentation.yaml
@@ -13,7 +13,7 @@ jobs:
       - uses: actions/setup-python@v3
       - name: Install dependencies
         run: |
-          pip install sphinx furo myst_parser
+          pip install .[all,docs]
       - name: Sphinx build
         run: |
           sphinx-build docs/source _build

From 8e5379f095f628b50abf8f29a99775804a454f34 Mon Sep 17 00:00:00 2001
From: Leo Klarner <leo.klarner@bnc.ox.ac.uk>
Date: Sat, 9 Dec 2023 21:06:47 +0000
Subject: [PATCH 3/6] Updated Python verision in doc workflow.

---
 .github/workflows/build_documentation.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/build_documentation.yaml b/.github/workflows/build_documentation.yaml
index f98fbb4..99012a8 100644
--- a/.github/workflows/build_documentation.yaml
+++ b/.github/workflows/build_documentation.yaml
@@ -11,6 +11,8 @@ jobs:
     steps:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v3
+        with:
+          python-version: "3.11"
       - name: Install dependencies
         run: |
           pip install .[all,docs]

From cbc74e35840713b38f9a61ca9b8d27cb0d8f369d Mon Sep 17 00:00:00 2001
From: Leo Klarner <leo.klarner@bnc.ox.ac.uk>
Date: Sat, 9 Dec 2023 23:23:33 +0000
Subject: [PATCH 4/6] Misc fixes to docstrings and doc generation.

---
 .requirements/docs.in                         | 15 +++-
 README.md                                     |  2 +-
 docs/source/conf.py                           | 28 +++---
 docs/source/index.rst                         | 86 +++++++++++++++++--
 docs/source/modules/dataloader.rst            |  2 +-
 docs/source/readme.rst                        |  1 -
 gauche/dataloader/reaction_loader.py          | 10 +++
 .../braun_blanquet_kernel.py                  |  2 +-
 .../fingerprint_kernels/dice_kernel.py        | 10 +--
 .../fingerprint_kernels/faith_kernel.py       |  2 +-
 .../fingerprint_kernels/forbes_kernel.py      |  4 +-
 .../inner_product_kernel.py                   | 10 +--
 .../intersection_kernel.py                    |  2 +-
 .../fingerprint_kernels/minmax_kernel.py      | 10 +--
 .../fingerprint_kernels/otsuka_kernel.py      |  2 +-
 .../fingerprint_kernels/rand_kernel.py        |  2 +-
 .../rogers_tanimoto_kernel.py                 |  2 +-
 .../fingerprint_kernels/russell_rao_kernel.py |  2 +-
 .../fingerprint_kernels/sogenfrei_kernel.py   |  2 +-
 .../sokal_sneath_kernel.py                    |  2 +-
 .../fingerprint_kernels/tanimoto_kernel.py    | 15 ++--
 gauche/representations/fingerprints.py        |  6 +-
 22 files changed, 158 insertions(+), 59 deletions(-)

diff --git a/.requirements/docs.in b/.requirements/docs.in
index d6bd3be..7368fe6 100644
--- a/.requirements/docs.in
+++ b/.requirements/docs.in
@@ -1,2 +1,15 @@
+furo
+nbsphinx
+nbsphinx-link
+sphinx-copybutton
+m2r2
+nbstripout
+pandoc
+pydocstyle
 sphinx
-furo
\ No newline at end of file
+sphinx-inline-tabs
+sphinxext-opengraph
+sphinxcontrib-gtagjs
+ipython
+watermark
+sphinx_codeautolink
\ No newline at end of file
diff --git a/README.md b/README.md
index 2053b4d..bcc66f0 100644
--- a/README.md
+++ b/README.md
@@ -86,7 +86,7 @@ The easiest way to get started with GAUCHE is to check out our tutorial notebook
 | [GP Regression on Molecules](https://leojklarner.github.io/gauche/notebooks/gp_regression_on_molecules.html)  |  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/leojklarner/gauche/blob/main/notebooks/GP%20Regression%20on%20Molecules.ipynb)   |
 | [Bayesian Optimisation Over Molecules](https://leojklarner.github.io/gauche/notebooks/bayesian_optimisation_over_molecules.html)  |  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/leojklarner/gauche/blob/main/notebooks/Bayesian%20Optimisation%20Over%20Molecules.ipynb)   |
 | [Multioutput Gaussian Processes for Multitask Learning](https://leojklarner.github.io/gauche/notebooks/multitask_gp_regression_on_molecules.html)  |  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/leojklarner/gauche/blob/main/notebooks/Multitask%20GP%20Regression%20on%20Molecules.ipynb)   |
-| [Training GPs on Graphs](https://leojklarner.github.io/gauche/notebooks/Training%20GPs%20on%20Graphs.html)  |  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/leojklarner/gauche/blob/main/notebooks/Training%20GPs%20on%20Graphs.ipynb)   |
+| [Training GPs on Graphs](https://leojklarner.github.io/gauche/notebooks/training_gps_on_graphs.html)  |  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/leojklarner/gauche/blob/main/notebooks/Training%20GPs%20on%20Graphs.ipynb)   |
 | [Sparse GP Regression for Big Molecular Data](https://leojklarner.github.io/gauche/notebooks/sparse_gp_regression_for_big_molecular_data.html)  |  [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/leojklarner/gauche/blob/main/notebooks/Sparse%20GP%20Regression%20for%20Big%20Molecular%20Data.ipynb)   |
 |[Molecular Preference Learning](https://github.com/leojklarner/gauche/blob/main/notebooks/Molecular%20Preference%20Learning.ipynb)|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/leojklarner/gauche/blob/main/notebooks/Molecular%20Preference%20Learning.ipynb) |
 |[Preferential Bayesian Optimisation](https://github.com/leojklarner/gauche/blob/main/notebooks/Preferential%20Bayesian%20Optimisation.ipynb)|[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/leojklarner/gauche/blob/main/notebooks/Preferential%20Bayesian%20Optimisation.ipynb) |
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 67ee1c9..8127d37 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -10,10 +10,12 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-# import os
-# import sys
 
-# sys.path.insert(0, os.path.abspath("../../../gauche"))
+import os
+import sys
+
+print(sys.executable)
+sys.path.insert(0, os.path.abspath("."))
 
 
 # -- Project information -----------------------------------------------------
@@ -23,7 +25,7 @@
 author = "Ryan Rhys-Griffiths"
 
 # The full version, including alpha/beta/rc tags
-release = "0.1.0"
+# release = "1.0.0"
 
 
 # -- General configuration ---------------------------------------------------
@@ -33,15 +35,19 @@
 # ones.
 extensions = [
     "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
     "sphinx.ext.intersphinx",
     "sphinx.ext.viewcode",
-    # "sphinx_copybutton",
-    # "sphinx_inline_tabs",
-    # "sphinxcontrib.gtagjs",
-    # "sphinxext.opengraph",
-    # "m2r2",
-    # "nbsphinx",
-    # "nbsphinx_link",
+    "sphinx_copybutton",
+    "sphinx_inline_tabs",
+    "sphinxcontrib.gtagjs",
+    "sphinxext.opengraph",
+    "m2r2",
+    "nbsphinx",
+    "nbsphinx_link",
+    "sphinx.ext.napoleon",
+    "sphinx_codeautolink",
+    # "sphinx_autorun",
 ]
 
 # Add any paths that contain templates here, relative to this directory.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 973e1c6..e256e54 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -3,16 +3,63 @@
    You can adapt this file completely to your liking, but it should at least
    contain the root `toctree` directive.
 
-Welcome to GAUCHE's documentation!
-==================================
+Documentation
+==================
+
+.. image:: ../../imgs/gauche_banner_1.png
+    :width: 100%
+    :alt: GAUCHE Logo
+    :align: left
+
+**GAUCHE** is a collaborative, open-source software library that aims to make state-of-the-art probabilistic modelling and black-box optimisation techniques more easily accessible to scientific experts in chemistry, materials science and beyond. We provide 30+ bespoke kernels for molecules, chemical reactions and proteins and illustrate how they can be used for Gaussian processes and Bayesian optimisation in 10+ easy-to-adapt tutorial notebooks.
+
+`Paper (NeurIPS 2023) <https://arxiv.org/abs/2212.04450>`_
+
+Overview
+==========
+
+General-purpose Gaussian process (GP) and Bayesian optimisation (BO) libraries do not cater for molecular representations. Likewise, general-purpose molecular machine learning libraries do not consider GPs and BO. To bridge this gap, GAUCHE provides a modular, robust and easy-to-use framework of 30+ parallelisable and batch-GP-compatible implementations of string, fingerprint and graph kernels that operate on a range of widely-used molecular representations.
+
+.. image:: ../../imgs/gauche_overview.png
+    :width: 100 %
+    :alt: GAUCHE Overview
+    :align: left
+
 
+Kernels
+---------
 
-.. include:: readme.rst
+Standard GP packages typically assume continuous input spaces of low and fixed dimensionality. This makes it difficult to apply them to common molecular representations: molecular graphs are discrete objects, SMILES strings vary in length and topological fingerprints tend to be high-dimensional and sparse. To bridge this gap, GAUCHE provides:
+
+* **Fingerprint Kernels** that measure the similarity between bit/count vectors of descriptor by examining the degree to which their elements overlap.
+* **String Kernels** that measure the similarity between strings by examining the degree to which their sub-strings overlap.
+* **Graph Kernels** that measure between graphs by examining the degree to which certain substructural motifs overlap.
+
+Representations
+-----------------
+
+GAUCHE supports any representation that is based on bit/count vectors, strings or graphs. For rapid prototyping and benchmarking, we also provide a range of standard featurisation techniques for molecules, chemical reactions and proteins:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Domain
+     - Representation
+   * - Molecules
+     - ECFP Fingerprints [1], rdkit Fragments, Fragprints, Graphs [2], SMILES [3], SELFIES [4]   
+   * - Chemical Reactions
+     - One-Hot Encoding, Data-Driven Reaction Fingerprints [5], Differential Reaction Fingerprints [6], Reaction SMARTS
+   * - Proteins  
+     - Sequences, Graphs [2]
+
+Getting Started
+-----------------
+
+The easiest way to get started with GAUCHE is to check out our tutorial notebooks:
 
 
 .. toctree::
-   :maxdepth: 2
-   :caption: Tutorials
+   :maxdepth: 1
 
    notebooks/gp_regression_on_molecules.nblink
    notebooks/bayesian_optimisation_over_molecules.nblink
@@ -26,6 +73,16 @@ Welcome to GAUCHE's documentation!
    notebooks/external_graph_kernels.nblink
 
 
+
+Extensions
+-----------------
+
+If there are any specific kernels or representations that you would like to see included in GAUCHE, please reach out or submit an issue/pull request.
+
+
+Gauche's API
+================
+
 .. toctree::
    :maxdepth: 3
    :caption: API Reference
@@ -34,11 +91,26 @@ Welcome to GAUCHE's documentation!
    modules/representations
    modules/dataloader
 
-
-
 Indices and tables
 ==================
 
 * :ref:`genindex`
 * :ref:`modindex`
 * :ref:`search`
+
+References
+==================
+
+.. _bibliography:
+
+[1] Rogers, D. and Hahn, M., 2010. `Extended-connectivity fingerprints. <https://pubs.acs.org/doi/abs/10.1021/ci100050t>`_ Journal of Chemical Information and Modeling, 50(5), pp.742-754.
+
+[2] Jamasb, A., Viñas Torné, R., Ma, E., Du, Y., Harris, C., Huang, K., Hall, D., Lió, P. and Blundell, T., 2022. `Graphein-a Python library for geometric deep learning and network analysis on biomolecular structures and interaction networks <https://proceedings.neurips.cc/paper_files/paper/2022/hash/ade039c1db0391106a3375bd2feb310a-Abstract-Conference.html>`_. Advances in Neural Information Processing Systems, 35, pp.27153-27167.
+
+[3] Weininger, D., 1988. `SMILES, a chemical language and information system. 1. Introduction to methodology and encoding rules. <https://pubs.acs.org/doi/pdf/10.1021/ci00057a005>`_ Journal of Chemical Information and Computer Sciences, 28(1), pp.31-36.
+
+[4] Krenn, M., Häse, F., Nigam, A., Friederich, P. and Aspuru-Guzik, A., 2020. `Self-referencing embedded strings (SELFIES): A 100% robust molecular string representation <https://iopscience.iop.org/article/10.1088/2632-2153/aba947/meta>`_. Machine Learning: Science and Technology, 1(4), p.045024.
+
+[5] Probst, D., Schwaller, P. and Reymond, J.L., 2022. `Reaction classification and yield prediction using the differential reaction fingerprint DRFP <https://pubs.rsc.org/en/content/articlehtml/2022/dd/d1dd00006c>`_. Digital Discovery, 1(2), pp.91-97.
+
+[6] Schwaller, P., Probst, D., Vaucher, A.C., Nair, V.H., Kreutter, D., Laino, T. and Reymond, J.L., 2021. `Mapping the space of chemical reactions using attention-based neural networks <https://www.nature.com/articles/s42256-020-00284-w>`_. Nature Machine Intelligence, 3(2), pp.144-152.
\ No newline at end of file
diff --git a/docs/source/modules/dataloader.rst b/docs/source/modules/dataloader.rst
index 57d6452..0c6cfa1 100644
--- a/docs/source/modules/dataloader.rst
+++ b/docs/source/modules/dataloader.rst
@@ -8,7 +8,7 @@ Dataloader
 
 Molecular Properties
 ----------------------
-.. automodule:: gauche.dataloader.mol_prop_loader
+.. automodule:: gauche.dataloader.molprop_loader
     :members:
 
 Reaction Loader
diff --git a/docs/source/readme.rst b/docs/source/readme.rst
index 57de865..e69de29 100644
--- a/docs/source/readme.rst
+++ b/docs/source/readme.rst
@@ -1 +0,0 @@
-.. mdinclude:: ../../README.md
\ No newline at end of file
diff --git a/gauche/dataloader/reaction_loader.py b/gauche/dataloader/reaction_loader.py
index 82b75e3..eb0cb98 100644
--- a/gauche/dataloader/reaction_loader.py
+++ b/gauche/dataloader/reaction_loader.py
@@ -19,6 +19,16 @@
 
 
 class ReactionLoader(DataLoader):
+    """
+    Data loader class for reaction yield prediction
+    datasets with a single regression target.
+    Expects input to be a csv file with either multiple SMILES 
+    columns or a single reaction SMARTS column.
+    Contains methods to validate the dataset and to
+    transform the SMILES/SMARTS strings into different
+    molecular representations.
+    """
+
     def __init__(self):
         super(ReactionLoader, self).__init__()
         self.task = "reaction_yield_prediction"
diff --git a/gauche/kernels/fingerprint_kernels/braun_blanquet_kernel.py b/gauche/kernels/fingerprint_kernels/braun_blanquet_kernel.py
index d4a3f87..959a8f8 100644
--- a/gauche/kernels/fingerprint_kernels/braun_blanquet_kernel.py
+++ b/gauche/kernels/fingerprint_kernels/braun_blanquet_kernel.py
@@ -16,7 +16,7 @@ def batch_braun_blanquet_sim(
     Braun-Blanquet similarity between two batched tensors, across last 2 dimensions.
     eps argument ensures numerical stability if all zero tensors are added.
 
-    <x1, x2> / max(|x1|, |x2|)
+    :math:`<x1, x2> / max(|x1|, |x2|)`
 
     Where || is the L1 norm and <.> is the inner product
 
diff --git a/gauche/kernels/fingerprint_kernels/dice_kernel.py b/gauche/kernels/fingerprint_kernels/dice_kernel.py
index abbd9d2..6a56f87 100644
--- a/gauche/kernels/fingerprint_kernels/dice_kernel.py
+++ b/gauche/kernels/fingerprint_kernels/dice_kernel.py
@@ -14,7 +14,7 @@ def batch_dice_sim(
     Dice similarity between two batched tensors, across last 2 dimensions.
     eps argument ensures numerical stability if all zero tensors are added.
 
-    (2 * <x1, x2>) / (|x1| + |x2|)
+    :math:`(2 * <x1, x2>) / (|x1| + |x2|)`
 
     Where || is the L1 norm and <.> is the inner product
 
@@ -50,10 +50,10 @@ class DiceKernel(Kernel):
 
      .. math::
 
-    \begin{equation*}
-     k_{\text{Dice}}(\mathbf{x}, \mathbf{x'}) = \frac{2\langle\mathbf{x},
-     \mathbf{x'}\rangle}{\left\lVert\mathbf{x}\right\rVert + \left\lVert\mathbf{x'}\right\rVert}
-    \end{equation*}
+        \begin{equation*}
+        k_{\text{Dice}}(\mathbf{x}, \mathbf{x'}) = \frac{2\langle\mathbf{x},
+        \mathbf{x'}\rangle}{\left\lVert\mathbf{x}\right\rVert + \left\lVert\mathbf{x'}\right\rVert}
+        \end{equation*}
 
     .. note::
 
diff --git a/gauche/kernels/fingerprint_kernels/faith_kernel.py b/gauche/kernels/fingerprint_kernels/faith_kernel.py
index 343bc4e..ee3ae99 100644
--- a/gauche/kernels/fingerprint_kernels/faith_kernel.py
+++ b/gauche/kernels/fingerprint_kernels/faith_kernel.py
@@ -16,7 +16,7 @@ def batch_faith_sim(
     Faith similarity between two batched tensors, across last 2 dimensions.
     eps argument ensures numerical stability if all zero tensors are added.
 
-    (2 * <x1, x2>) + d / 2n
+    :math:`(2 * <x1, x2>) + d / 2n`
 
     Where <.> is the inner product, d is the number of common zeros and n is the dimension of the input vectors
 
diff --git a/gauche/kernels/fingerprint_kernels/forbes_kernel.py b/gauche/kernels/fingerprint_kernels/forbes_kernel.py
index 431171a..c4b43d2 100644
--- a/gauche/kernels/fingerprint_kernels/forbes_kernel.py
+++ b/gauche/kernels/fingerprint_kernels/forbes_kernel.py
@@ -16,9 +16,9 @@ def batch_forbes_sim(
     Forbes similarity between two batched tensors, across last 2 dimensions.
     eps argument ensures numerical stability if all zero tensors are added.
 
-    n * <x1, x2> / (|x1| + |x2|)
+    :math:`n * <x1, x2> / (|x1| + |x2|)`
 
-    Where <.> is the inner product, || is the L1 norm, and n is the dimension of the input vectors
+    Where <.> is the inner product, :math:`||` is the L1 norm, and n is the dimension of the input vectors
 
     Args:
         x1: `[b x n x d]` Tensor where b is the batch dimension
diff --git a/gauche/kernels/fingerprint_kernels/inner_product_kernel.py b/gauche/kernels/fingerprint_kernels/inner_product_kernel.py
index db784df..8c71814 100644
--- a/gauche/kernels/fingerprint_kernels/inner_product_kernel.py
+++ b/gauche/kernels/fingerprint_kernels/inner_product_kernel.py
@@ -16,7 +16,7 @@ def batch_inner_product_sim(
     Inner product similarity between two batched tensors, across last 2 dimensions.
     eps argument ensures numerical stability if all zero tensors are added.
 
-    <x1, x2>
+    :math:`<x1, x2>`
 
     Where <.> is the inner product
 
@@ -47,10 +47,10 @@ class InnerProductKernel(Kernel):
 
      .. math::
 
-    \begin{equation*}
-     k_{\text{Inner Product}}(\mathbf{x}, \mathbf{x'}) = \langle\mathbf{x},
-     \mathbf{x'}\rangle
-    \end{equation*}
+        \begin{equation*}
+        k_{\text{Inner Product}}(\mathbf{x}, \mathbf{x'}) = \langle\mathbf{x},
+        \mathbf{x'}\rangle
+        \end{equation*}
 
     .. note::
 
diff --git a/gauche/kernels/fingerprint_kernels/intersection_kernel.py b/gauche/kernels/fingerprint_kernels/intersection_kernel.py
index b4ea165..dc0a612 100644
--- a/gauche/kernels/fingerprint_kernels/intersection_kernel.py
+++ b/gauche/kernels/fingerprint_kernels/intersection_kernel.py
@@ -17,7 +17,7 @@ def batch_intersection_sim(
     eps argument ensures numerical stability if all zero tensors are added. Must be
     used with binary-valued vectors only
 
-    <x1, x2> + <x1', x2'>
+    :math:`<x1, x2> + <x1', x2'>`
 
     Where <.> is the inner product and x1' and x2' denote the bit flipped vectors such
     that ones and zeros are interchanged
diff --git a/gauche/kernels/fingerprint_kernels/minmax_kernel.py b/gauche/kernels/fingerprint_kernels/minmax_kernel.py
index 290647b..8dd532b 100644
--- a/gauche/kernels/fingerprint_kernels/minmax_kernel.py
+++ b/gauche/kernels/fingerprint_kernels/minmax_kernel.py
@@ -14,9 +14,9 @@ def batch_minmax_sim(
     MinMax similarity between two batched tensors, across last 2 dimensions.
     eps argument ensures numerical stability if all zero tensors are added.
 
-    (|x1| + |x2| - |x1 - x2|) / (|x1| + |x2| + |x1 - x2|)
+    :math:`(|x1| + |x2| - |x1 - x2|) / (|x1| + |x2| + |x1 - x2|)`
 
-    Where || is the L1 norm
+    Where :math:`||` is the L1 norm
 
     Args:
         x1: `[b x n x d]` Tensor where b is the batch dimension
@@ -51,9 +51,9 @@ class MinMaxKernel(Kernel):
 
      .. math::
 
-    \begin{equation*}
-     k_{\text{MinMax}}(\mathbf{x}, \mathbf{x'}) = \frac{\sum_i \min(x_i, x'_i)}
-    \end{equation*}
+        \begin{equation*}
+        k_{\text{MinMax}}(\mathbf{x}, \mathbf{x'}) = \frac{\sum_i \min(x_i, x'_i)}
+        \end{equation*}
 
     .. note::
 
diff --git a/gauche/kernels/fingerprint_kernels/otsuka_kernel.py b/gauche/kernels/fingerprint_kernels/otsuka_kernel.py
index 2c82d07..c4a868a 100644
--- a/gauche/kernels/fingerprint_kernels/otsuka_kernel.py
+++ b/gauche/kernels/fingerprint_kernels/otsuka_kernel.py
@@ -16,7 +16,7 @@ def batch_otsuka_sim(
     Otsuka similarity between two batched tensors, across last 2 dimensions.
     eps argument ensures numerical stability if all zero tensors are added.
 
-    <x1, x2> / sqrt(|x1| + |x2|)
+    :math:`<x1, x2> / sqrt(|x1| + |x2|)`
 
     Where || is the L1 norm and <.> is the inner product
 
diff --git a/gauche/kernels/fingerprint_kernels/rand_kernel.py b/gauche/kernels/fingerprint_kernels/rand_kernel.py
index ad5cd12..6561296 100644
--- a/gauche/kernels/fingerprint_kernels/rand_kernel.py
+++ b/gauche/kernels/fingerprint_kernels/rand_kernel.py
@@ -16,7 +16,7 @@ def batch_rand_sim(
     Rand similarity between two batched tensors, across last 2 dimensions.
     eps argument ensures numerical stability if all zero tensors are added.
 
-    <x1, x2> + d / n
+    :math:`<x1, x2> + d / n`
 
     Where <.> is the inner product, d is the number of common zeros and n is the dimensionality
 
diff --git a/gauche/kernels/fingerprint_kernels/rogers_tanimoto_kernel.py b/gauche/kernels/fingerprint_kernels/rogers_tanimoto_kernel.py
index 8707b3d..6c78e17 100644
--- a/gauche/kernels/fingerprint_kernels/rogers_tanimoto_kernel.py
+++ b/gauche/kernels/fingerprint_kernels/rogers_tanimoto_kernel.py
@@ -16,7 +16,7 @@ def batch_rogers_tanimoto_sim(
     Rogers-Tanimoto similarity between two batched tensors, across last 2 dimensions.
     eps argument ensures numerical stability if all zero tensors are added.
 
-    <x1, x2> + d / 2|x1| + 2|x2| - 3*<x1, x2> + d
+    :math:`<x1, x2> + d / 2|x1| + 2|x2| - 3*<x1, x2> + d`
 
     Where || is the L1 norm and <.> is the inner product and d is the number of common zeros
 
diff --git a/gauche/kernels/fingerprint_kernels/russell_rao_kernel.py b/gauche/kernels/fingerprint_kernels/russell_rao_kernel.py
index 0c9459e..820b4a3 100644
--- a/gauche/kernels/fingerprint_kernels/russell_rao_kernel.py
+++ b/gauche/kernels/fingerprint_kernels/russell_rao_kernel.py
@@ -16,7 +16,7 @@ def batch_russell_rao_sim(
     Russell-Rao similarity between two batched tensors, across last 2 dimensions.
     eps argument ensures numerical stability if all zero tensors are added.
 
-    <x1, x2> / n
+    :math:`<x1, x2> / n`
 
     Where <.> is the inner product and n is the dimension of the vectors x1/x2
 
diff --git a/gauche/kernels/fingerprint_kernels/sogenfrei_kernel.py b/gauche/kernels/fingerprint_kernels/sogenfrei_kernel.py
index 02b506b..b6c6ef3 100644
--- a/gauche/kernels/fingerprint_kernels/sogenfrei_kernel.py
+++ b/gauche/kernels/fingerprint_kernels/sogenfrei_kernel.py
@@ -16,7 +16,7 @@ def batch_sogenfrei_sim(
     Sogenfrei similarity between two batched tensors, across last 2 dimensions.
     eps argument ensures numerical stability if all zero tensors are added.
 
-    <x1, x2>**2 / (|x1| + |x2|)
+    :math:`<x1, x2>**2 / (|x1| + |x2|)`
 
     Where <.> is the inner product and || is the L1 norm
 
diff --git a/gauche/kernels/fingerprint_kernels/sokal_sneath_kernel.py b/gauche/kernels/fingerprint_kernels/sokal_sneath_kernel.py
index da91e23..cb4bc57 100644
--- a/gauche/kernels/fingerprint_kernels/sokal_sneath_kernel.py
+++ b/gauche/kernels/fingerprint_kernels/sokal_sneath_kernel.py
@@ -16,7 +16,7 @@ def batch_sokal_sneath_sim(
     Sokal-Sneath similarity between two batched tensors, across last 2 dimensions.
     eps argument ensures numerical stability if all zero tensors are added.
 
-    <x1, x2> / 2|x1| + 2|x2| - 3*<x1, x2>
+    :math:`<x1, x2> / 2|x1| + 2|x2| - 3*<x1, x2>`
 
     Where <.> is the inner product and || is the L1 norm
 
diff --git a/gauche/kernels/fingerprint_kernels/tanimoto_kernel.py b/gauche/kernels/fingerprint_kernels/tanimoto_kernel.py
index 1dddf98..c289016 100644
--- a/gauche/kernels/fingerprint_kernels/tanimoto_kernel.py
+++ b/gauche/kernels/fingerprint_kernels/tanimoto_kernel.py
@@ -15,16 +15,17 @@ def batch_tanimoto_sim(
     Tanimoto similarity between two batched tensors, across last 2 dimensions.
     eps argument ensures numerical stability if all zero tensors are added. Tanimoto similarity is proportional to:
 
-    (<x, y>) / (||x||^2 + ||y||^2 - <x, y>)
+    :math:`(<x, y>) / (||x||^2 + ||y||^2 - <x, y>)`
 
     where x and y may be bit or count vectors or in set notation:
 
-    |A \cap B | / |A| + |B| - |A \cap B |
+    :math:`|A \\cap B| / |A| + |B| - |A \\cap B|`
 
     Args:
         x1: `[b x n x d]` Tensor where b is the batch dimension
         x2: `[b x m x d]` Tensor
         eps: Float for numerical stability. Default value is 1e-6
+
     Returns:
         Tensor denoting the Tanimoto similarity.
     """
@@ -52,11 +53,11 @@ class TanimotoKernel(Kernel):
 
      .. math::
 
-    \begin{equation*}
-     k_{\text{Tanimoto}}(\mathbf{x}, \mathbf{x'}) = \frac{\langle\mathbf{x},
-     \mathbf{x'}\rangle}{\left\lVert\mathbf{x}\right\rVert^2 + \left\lVert\mathbf{x'}\right\rVert^2 -
-     \langle\mathbf{x}, \mathbf{x'}\rangle}
-    \end{equation*}
+        \begin{equation*}
+        k_{\text{Tanimoto}}(\mathbf{x}, \mathbf{x'}) = \frac{\langle\mathbf{x},
+        \mathbf{x'}\rangle}{\left\lVert\mathbf{x}\right\rVert^2 + \left\lVert\mathbf{x'}\right\rVert^2 -
+        \langle\mathbf{x}, \mathbf{x'}\rangle}
+        \end{equation*}
 
     .. note::
 
diff --git a/gauche/representations/fingerprints.py b/gauche/representations/fingerprints.py
index cb49443..7f5a142 100644
--- a/gauche/representations/fingerprints.py
+++ b/gauche/representations/fingerprints.py
@@ -17,11 +17,9 @@ def one_hot(df: pd.DataFrame) -> np.ndarray:
     Builds reaction representation as a bit vector which indicates whether
     a certain condition, reagent, reactant etc. is present in the reaction.
 
-    :param df: pandas DataFrame with columns representing different
-    parameters of the reaction (e.g. reactants, reagents, conditions).
+    :param df: pandas DataFrame with columns representing different parameters of the reaction (e.g. reactants, reagents, conditions).
     :type df: pandas DataFrame
-    :return: array of shape [len(reaction_smiles), sum(unique values for different columns in df)]
-     with one-hot encoding of reactions
+    :return: array of shape [len(reaction_smiles), sum(unique values for different columns in df)] with one-hot encoding of reactions
     """
     df_ohe = pd.get_dummies(df)
     return df_ohe.to_numpy(dtype=np.float64)

From 7f45c8048f8c911ec49f609aab05a51b022c9560 Mon Sep 17 00:00:00 2001
From: Leo Klarner <leo.klarner@bnc.ox.ac.uk>
Date: Sat, 9 Dec 2023 23:30:39 +0000
Subject: [PATCH 5/6] Added Pandoc install.

---
 .github/workflows/build_documentation.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/build_documentation.yaml b/.github/workflows/build_documentation.yaml
index 99012a8..13cf6bb 100644
--- a/.github/workflows/build_documentation.yaml
+++ b/.github/workflows/build_documentation.yaml
@@ -13,6 +13,12 @@ jobs:
       - uses: actions/setup-python@v3
         with:
           python-version: "3.11"
+
+      - name: Install Pandoc
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y pandoc
+  
       - name: Install dependencies
         run: |
           pip install .[all,docs]

From 7fdaadd01b845a05ed800b1f24c33a761e91b12f Mon Sep 17 00:00:00 2001
From: Leo Klarner <leo.klarner@bnc.ox.ac.uk>
Date: Sat, 9 Dec 2023 23:36:17 +0000
Subject: [PATCH 6/6] Black formatting.

---
 gauche/dataloader/reaction_loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gauche/dataloader/reaction_loader.py b/gauche/dataloader/reaction_loader.py
index eb0cb98..40ea64d 100644
--- a/gauche/dataloader/reaction_loader.py
+++ b/gauche/dataloader/reaction_loader.py
@@ -22,7 +22,7 @@ class ReactionLoader(DataLoader):
     """
     Data loader class for reaction yield prediction
     datasets with a single regression target.
-    Expects input to be a csv file with either multiple SMILES 
+    Expects input to be a csv file with either multiple SMILES
     columns or a single reaction SMARTS column.
     Contains methods to validate the dataset and to
     transform the SMILES/SMARTS strings into different