Skip to content

Commit

Permalink
Update documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
folmos-at-orange committed Jul 5, 2024
1 parent cc51d63 commit 94d2bfc
Show file tree
Hide file tree
Showing 15 changed files with 5,272 additions and 4,813 deletions.
10 changes: 10 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ repos:
hooks:
- id: isort
language_version: python3
exclude: khiops/samples/samples.py|khiops/samples/samples_sklearn.py
- id: isort
alias: isort-samples
name: isort-samples
language_version: python3
files: khiops/samples/samples.py|khiops/samples/samples_sklearn.py
args: [--no-sections]
- repo: https://github.com/lyz-code/yamlfix/
rev: 1.16.0
hooks:
Expand All @@ -25,13 +32,16 @@ repos:
rev: 0.28.1
hooks:
- id: check-github-workflows
name: gh-workflows
args: [--verbose]
- id: check-github-actions
name: gh-actions
args: [--verbose]
- repo: https://github.com/jumanjihouse/pre-commit-hooks
rev: 3.0.0
hooks:
- id: shellcheck
name: shellcheck
- repo: local
hooks:
- id: samples-generation
Expand Down
11 changes: 9 additions & 2 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,21 @@
# List of patterns, relative to source directory, that match files and directories to
# ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ["_templates", "_build", "Thumbs.db", ".DS_Store"]

exclude_patterns = [
"_templates",
"_build",
"Thumbs.db",
".DS_Store",
"**.ipynb_checkpoints",
]
# HTML Theme
# Theme colors and fonts come from https://brand.orange.com
html_theme = "furo"
html_theme_options = {
"light_css_variables": {
"color-brand-primary": "#FF7900",
"color-brand-content": "#F16E00",
"color-brand-visited": "#FF7900",
"color-sidebar-background": "#FFFFFF",
"color-highlighted-background": "#FFD200",
"color-admonition-title--note": "#FF7900",
Expand All @@ -79,6 +85,7 @@
"dark_css_variables": {
"color-brand-primary": "#FF7900",
"color-brand-content": "#F16E00",
"color-brand-visited": "#FF7900",
"color-sidebar-background": "#000000",
"color-highlighted-background": "#FFD200",
"color-admonition-title--note": "#FF7900",
Expand Down
122 changes: 42 additions & 80 deletions doc/convert_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,51 +8,11 @@
import sys
import textwrap


def create_boilerplate_code(script_name):
if script_name == "samples":
boilerplate_code = [
"import os\n",
"from math import sqrt\n",
"from os import path\n",
"\n",
"from khiops import core as kh\n",
"\n",
]
elif script_name == "samples_sklearn":
boilerplate_code = [
"import os\n",
"import pickle\n",
"from os import path\n",
"\n",
"import pandas as pd\n",
"from sklearn import metrics\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.experimental import enable_hist_gradient_boosting\n",
"from sklearn.ensemble import HistGradientBoostingClassifier\n",
"from sklearn.datasets import fetch_20newsgroups\n",
"from sklearn.feature_extraction.text import HashingVectorizer\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"\n",
"from khiops import core as kh\n",
"from khiops.sklearn import (\n",
" KhiopsClassifier,\n",
" KhiopsCoclustering,\n",
" KhiopsEncoder,\n",
" KhiopsRegressor,\n",
")\n",
]
else:
raise ValueError(f"Invalid samples script name '{script_name}'")
return boilerplate_code
import black


def create_header_cells(script_name):
"""Creates the header cells for the notebook"""
boilerplate_code = create_boilerplate_code(script_name)

# Create the boilerplate cells
cells = [
{
Expand All @@ -66,39 +26,41 @@ def create_header_cells(script_name):
"[Khiops](https://khiops.org) before using this this notebook",
],
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {"collapsed": True},
"outputs": [],
"source": boilerplate_code,
},
]
return cells


def create_sample_cell(sample_method):
def create_sample_cells(sample_method):
"""Creates a code cell and an execution cell for the specified method"""

# Create the code block
code, docstring = split_docstring(inspect.getsource(sample_method))
code = textwrap.dedent(code)
code = black.format_str(code, mode=black.Mode())

# Create the cell source as a list of lines
sample_method_source = inspect.getsource(sample_method)
sample_source_list = [line + "\n" for line in sample_method_source.split("\n")]
sample_source_list += ["#Run sample\n", sample_method.__name__ + "()"]
code_list = [line + "\n" for line in code.rstrip().split("\n")]
code_list[-1] = code_list[-1].rstrip()

sample_execution_cell = {
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": sample_source_list,
}
sample_execution_cells = [
{
"cell_type": "markdown",
"metadata": {},
"source": [f"### `{sample_method.__name__}()`\n\n", f"{docstring}\n"],
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": code_list,
},
]

return sample_execution_cell
return sample_execution_cells


def create_rest_page_header(script_name):
boilerplate_code = "".join(create_boilerplate_code(script_name))
indented_boilerplate_code = textwrap.indent(boilerplate_code, " ")
subtitle = "The code snippets on this page demonstrate the basic use of the "
if script_name == "samples":
title = "Samples core"
Expand Down Expand Up @@ -139,38 +101,37 @@ def create_rest_page_header(script_name):
" from khiops.tools import download_datasets\n"
" download_datasets()\n"
"\n"
"Before copying any code snippet make sure to precede it with following\n"
"preamble:\n"
"\n"
".. code-block:: python\n"
"\n"
f"{indented_boilerplate_code}"
"\n"
"Samples\n"
"-------\n"
)


def remove_docstring(source):
docstring_open = source.find('"""')
if docstring_open == -1:
def split_docstring(source):
docstring_open_quote = source.find('"""')
if docstring_open_quote == -1:
source_without_docstring = sample_source
docstring = ""
else:
docstring_close = source[docstring_open + 3 :].find('"""')
source_without_docstring = source[docstring_open + 3 + docstring_close + 4 :]
return source_without_docstring
docstring_close_quote = (
docstring_open_quote + 3 + source[docstring_open_quote + 3 :].find('"""')
)
source_without_docstring = source[docstring_close_quote + 4 :]
docstring = source[docstring_open_quote + 3 : docstring_close_quote]
return source_without_docstring, docstring


def create_rest_page_section(sample_function):
code = f"def {sample_function.__name__}():\n" + remove_docstring(
inspect.getsource(sample_function)
)
indented_code = textwrap.indent(code, " ")
code, _ = split_docstring(inspect.getsource(sample_function))
code = textwrap.dedent(code)
code = black.format_str(code, mode=black.Mode())
code = textwrap.indent(code, " ")
code = code.rstrip()
return (
f".. autofunction:: {sample_function.__name__}\n"
".. code-block:: python\n"
"\n"
f"{indented_code}"
f"{code}"
)


Expand All @@ -184,6 +145,7 @@ def main(args):

# Sanity check
script_path = os.path.join(args.samples_dir, f"{script_name}.py")
print(f"Converting to format '{args.format}' samples script at {script_path}")
if os.path.abspath(script_path) == os.path.abspath(args.output_path):
print("error: input and output paths are the same")
sys.exit(1)
Expand All @@ -210,7 +172,7 @@ def main(args):
notebook_objects = {}
notebook_objects["cells"] = create_header_cells(script_name)
for sample_method in samples.exported_samples:
notebook_objects["cells"].append(create_sample_cell(sample_method))
notebook_objects["cells"].extend(create_sample_cells(sample_method))
notebook_objects["metadata"] = {}
notebook_objects["nbformat"] = 4
notebook_objects["nbformat_minor"] = 2
Expand Down
11 changes: 4 additions & 7 deletions doc/create-doc
Original file line number Diff line number Diff line change
Expand Up @@ -90,21 +90,18 @@ fi

# Create the coursework materials
echo "Creating ZIP files"
(cd "$KHIOPS_TUTORIAL_REPO_DIR" && cp -r data helper_functions.py "../$tutorials_dir")
cd "$tutorials_dir"
mkdir -p exercises
touch exercises/.dummy # Create a dummy so the "exercises" directory is created on unzip
zip "core_tutorials_solutions.zip" Core*.ipynb helper_functions.py data/*/* exercises/.dummy
zip "sklearn_tutorials_solutions.zip" Sklearn*.ipynb helper_functions.py data/*/* exercises/.dummy
zip "core_tutorials_solutions.zip" Core*.ipynb data/*/* exercises/.dummy
zip "sklearn_tutorials_solutions.zip" Sklearn*.ipynb data/*/* exercises/.dummy
cd "$KHIOPS_TUTORIAL_REPO_DIR"
python create-coursework.py
cd coursework
mkdir -p exercises
touch exercises/.dummy # Create a dummy so the "exercises" directory is created on unzip
zip "../../$tutorials_dir/core_tutorials.zip" \
Core*.ipynb helper_functions.py data/*/* exercises/.dummy
zip "../../$tutorials_dir/sklearn_tutorials.zip" \
Sklearn*.ipynb helper_functions.py data/*/* exercises/.dummy
zip "../../$tutorials_dir/core_tutorials.zip" Core*.ipynb data/*/* exercises/.dummy
zip "../../$tutorials_dir/sklearn_tutorials.zip" Sklearn*.ipynb data/*/* exercises/.dummy
cd "../.."

# Create the documentation with Sphinx
Expand Down
Loading

0 comments on commit 94d2bfc

Please sign in to comment.