Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

build: setup excludes for code coverage #45

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions .github/workflows/pytest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,8 @@ jobs:
run: poetry install --no-interaction --no-root
- name: Install library
run: poetry install --no-interaction
- name: Check Docs run
run: |
source .venv/bin/activate
pytest --nbmake docs/*.ipynb
- name: Test with pytest
run: |
source .venv/bin/activate
pytest
pytest --nbmake docs/ tests/

55 changes: 42 additions & 13 deletions docs/example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"\n",
"doenut.set_log_level('WARNING')"
"doenut.set_log_level(\"WARNING\")"
]
},
{
Expand All @@ -36,7 +36,7 @@
"outputs": [],
"source": [
"inputs = pd.read_csv(\"data/inputs.csv\")\n",
"responses = pd.read_csv(\"data/responses.csv\")['Profit']\n"
"responses = pd.read_csv(\"data/responses.csv\")[\"Profit\"]"
]
},
{
Expand Down Expand Up @@ -320,8 +320,13 @@
],
"source": [
"dataset = doenut.data.ModifiableDataSet(inputs, responses)\n",
"basic_scaled_model, basic_unscaled_model = doenut.models.AveragedModel.tune_model(dataset)\n",
"print(f\"Model results: R2 {round(basic_scaled_model.r2, 2)}, Q2 {round(basic_scaled_model.q2, 2)}\")"
"(\n",
" basic_scaled_model,\n",
" basic_unscaled_model,\n",
") = doenut.models.AveragedModel.tune_model(dataset)\n",
"print(\n",
" f\"Model results: R2 {round(basic_scaled_model.r2, 2)}, Q2 {round(basic_scaled_model.q2, 2)}\"\n",
")"
]
},
{
Expand Down Expand Up @@ -380,8 +385,13 @@
"saturated_columns = list(saturated_inputs.columns)\n",
"print(saturated_columns)\n",
"saturated_dataset = doenut.data.ModifiableDataSet(saturated_inputs, responses)\n",
"saturated_scaled_model, saturated_unscaled_model = doenut.models.AveragedModel.tune_model(saturated_dataset)\n",
"print(f\"Model results: R2 {round(saturated_scaled_model.r2, 2)}, Q2 {round(saturated_scaled_model.q2, 2)}\")"
"(\n",
" saturated_scaled_model,\n",
" saturated_unscaled_model,\n",
") = doenut.models.AveragedModel.tune_model(saturated_dataset)\n",
"print(\n",
" f\"Model results: R2 {round(saturated_scaled_model.r2, 2)}, Q2 {round(saturated_scaled_model.q2, 2)}\"\n",
")"
]
},
{
Expand Down Expand Up @@ -421,7 +431,12 @@
}
],
"source": [
"doenut.plot.coeff_plot(saturated_scaled_model.coeffs, labels=saturated_columns, errors='p95', normalise=True)"
"doenut.plot.coeff_plot(\n",
" saturated_scaled_model.coeffs,\n",
" labels=saturated_columns,\n",
" errors=\"p95\",\n",
" normalise=True,\n",
")"
]
},
{
Expand Down Expand Up @@ -481,11 +496,23 @@
}
],
"source": [
"column_selector = [x for x in saturated_columns if x != 'P*Temp.']\n",
"filtered_dataset = doenut.data.ModifiableDataSet(saturated_inputs, responses).filter(column_selector)\n",
"filtered_scaled_model, filtered_unscaled_model = doenut.models.AveragedModel.tune_model(filtered_dataset)\n",
"doenut.plot.coeff_plot(filtered_scaled_model.coeffs, labels=column_selector, errors='p95', normalise=True)\n",
"print(f\"Model results: R2 {round(filtered_scaled_model.r2, 2)}, Q2 {round(filtered_scaled_model.q2, 2)}\")"
"column_selector = [x for x in saturated_columns if x != \"P*Temp.\"]\n",
"filtered_dataset = doenut.data.ModifiableDataSet(\n",
" saturated_inputs, responses\n",
").filter(column_selector)\n",
"(\n",
" filtered_scaled_model,\n",
" filtered_unscaled_model,\n",
") = doenut.models.AveragedModel.tune_model(filtered_dataset)\n",
"doenut.plot.coeff_plot(\n",
" filtered_scaled_model.coeffs,\n",
" labels=column_selector,\n",
" errors=\"p95\",\n",
" normalise=True,\n",
")\n",
"print(\n",
" f\"Model results: R2 {round(filtered_scaled_model.r2, 2)}, Q2 {round(filtered_scaled_model.q2, 2)}\"\n",
")"
]
},
{
Expand All @@ -512,7 +539,9 @@
}
],
"source": [
"doenut.plot.plot_observed_vs_predicted(responses, filtered_unscaled_model.predictions)"
"doenut.plot.plot_observed_vs_predicted(\n",
" responses, filtered_unscaled_model.predictions\n",
")"
]
},
{
Expand Down
49 changes: 34 additions & 15 deletions docs/solar_cells.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
"\n",
"# set the log level\n",
"import logging\n",
"\n",
"doenut.set_log_level(logging.WARNING)"
]
},
Expand Down Expand Up @@ -784,7 +785,9 @@
],
"source": [
"# Now lets look at the averaged coefficients for the fitted model.\n",
"doenut.plot.coeff_plot(model.coeffs, labels=inputs.columns, errors='p95', normalise=True)"
"doenut.plot.coeff_plot(\n",
" model.coeffs, labels=inputs.columns, errors=\"p95\", normalise=True\n",
")"
]
},
{
Expand Down Expand Up @@ -913,7 +916,7 @@
"outputs": [],
"source": [
"# make some empty lists - this is necessary if you rerun the notebook out of order\n",
"sat_source_list, source_list = [],[]\n",
"sat_source_list, source_list = [], []\n",
"# autogenerate the higher order terms.\n",
"sat_inputs_orig, sat_source_list = doenut.add_higher_order_terms(\n",
" inputs, add_squares=True, add_interactions=True, column_list=[]\n",
Expand Down Expand Up @@ -1240,9 +1243,18 @@
],
"source": [
"## this is the full interactioin mode.\n",
"input_selector = ['Donor %', 'Conc.', 'Spin', 'Add.',\n",
" 'Donor %*Conc.', 'Donor %*Spin', 'Donor %*Add.',\n",
" 'Conc.*Spin', 'Conc.*Add.', 'Spin*Add.']\n",
"input_selector = [\n",
" \"Donor %\",\n",
" \"Conc.\",\n",
" \"Spin\",\n",
" \"Add.\",\n",
" \"Donor %*Conc.\",\n",
" \"Donor %*Spin\",\n",
" \"Donor %*Add.\",\n",
" \"Conc.*Spin\",\n",
" \"Conc.*Add.\",\n",
" \"Spin*Add.\",\n",
"]\n",
"\n",
"dataset = (\n",
" doenut.data.ModifiableDataSet(sat_inputs_orig, responses)\n",
Expand Down Expand Up @@ -2473,6 +2485,7 @@
"\n",
" return df_1\n",
"\n",
"\n",
"# slices\n",
"c_key = \"Spin\"\n",
"# y axis\n",
Expand All @@ -2481,24 +2494,27 @@
"x_key = \"Donor %\"\n",
"\n",
"doenut.plot.four_D_contour_plot(\n",
" unscaled_model=model.model, # model chosen for predictions (should be the best model)\n",
" unscaled_model=model.model, # model chosen for predictions (should be the best model)\n",
" x_key=x_key,\n",
" y_key=y_key,\n",
" c_key=c_key,\n",
" x_limits=[inputs[x_key].min(), inputs[x_key].max()], # auto-ccalculate the limits\n",
" x_limits=[\n",
" inputs[x_key].min(),\n",
" inputs[x_key].max(),\n",
" ], # auto-ccalculate the limits\n",
" y_limits=[inputs[y_key].min(), inputs[y_key].max()],\n",
" constants=[500, 1500, 2500], # values for slices in the c dimension\n",
" n_points=60, # number of points to calculate per axis (i.e. 3600 points per graph)\n",
" my_function=my_function, # function defined abve\n",
" input_selector=[], # all inputs\n",
" fig_label=\"Solar Cells\", # label\n",
" constants=[500, 1500, 2500], # values for slices in the c dimension\n",
" n_points=60, # number of points to calculate per axis (i.e. 3600 points per graph)\n",
" my_function=my_function, # function defined abve\n",
" input_selector=[], # all inputs\n",
" fig_label=\"Solar Cells\", # label\n",
" x_label=x_key,\n",
" y_label=y_key,\n",
" constant_label=c_key,\n",
" z_label=\"PCE %\",\n",
" cmap=\"jet\",\n",
" num_of_z_levels=16, # number of contours needed + 1\n",
" z_limits=[0, 12], # min and max range for z (response)\n",
" num_of_z_levels=16, # number of contours needed + 1\n",
" z_limits=[0, 12], # min and max range for z (response)\n",
")"
]
},
Expand Down Expand Up @@ -2673,7 +2689,10 @@
" model.model, sat_inputs_q5, input_selector\n",
")\n",
"letters = [x for x in question_5p2.index]\n",
"[print(f\"{letters[i]}:\\t{predictions[i][0]:.2}% PCE\") for i in range(len(letters))];"
"[\n",
" print(f\"{letters[i]}:\\t{predictions[i][0]:.2}% PCE\")\n",
" for i in range(len(letters))\n",
"];"
]
},
{
Expand Down
9 changes: 9 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,12 @@ build-backend = "poetry.core.masonry.api"

[tool.semantic_release]
version_variable = "pyproject.toml:version"

[tool.coverage.report]
exclude_also = [
"if TYPE_CHECKING:",
"raise AssertionError",
"raise NotImplementedError",
"@(abc\\.)?abstractmethod"
]

4 changes: 2 additions & 2 deletions src/doenut/data/data_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ def __init__(self, inputs: pd.DataFrame, responses: pd.DataFrame):
self.responses = responses
# pd has a nasty habit of converting single column df into series
if isinstance(inputs, pd.Series):
self.inputs = inputs.to_frame(name='inputs')
self.inputs = inputs.to_frame(name="inputs")
if isinstance(responses, pd.Series):
self.responses = responses.to_frame(name='responses')
self.responses = responses.to_frame(name="responses")

def get_inputs(self) -> pd.DataFrame:
return self.inputs
Expand Down
2 changes: 1 addition & 1 deletion src/doenut/data/modifiers/data_set_modifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


class DataSetModifier(ABC):
"""Parent class for all types of modifier.
r"""Parent class for all types of modifier.
They take a dataset in, perform some form of operation on it and then
pass it along

Expand Down
8 changes: 5 additions & 3 deletions src/doenut/designer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


def _check_is_input_dict(data: Dict[Any, Iterable]) -> None:
""" Validate an input dictionary's type.
"""Validate an input dictionary's type.
Most of these functions require a dictionary of lists as their input data
This is a helper function that will throw an appropriate assert if needed.

Expand All @@ -48,7 +48,7 @@ def _check_is_input_dict(data: Dict[Any, Iterable]) -> None:


def get_ranges(data: Dict[Any, Iterable[float]]) -> Dict[Any, List[float]]:
""" Find the ranges of data in an input dictionary
"""Find the ranges of data in an input dictionary

Go through a dictionary of value lists, and return the same, but with
only the min / max value from each in each.
Expand Down Expand Up @@ -133,7 +133,9 @@ def full_fact(data: Dict[Any, List[float]]) -> pd.DataFrame:
return result


def frac_fact(data: Dict[Any, List[float]], resolution: int = None) -> pd.DataFrame:
def frac_fact(
data: Dict[Any, List[float]], resolution: int = None
) -> pd.DataFrame:
"""build a 2-level fractional factorial design

Parameters
Expand Down
16 changes: 11 additions & 5 deletions src/doenut/doenut.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from doenut.data import ModifiableDataSet
from doenut.models import AveragedModel


if TYPE_CHECKING:
import sklearn

Expand All @@ -29,7 +30,11 @@ def set_log_level(level: "str|int") -> None:
level : "str|int"
logging module value representing the desired log level
"""
loggers = [logger for name, logger in logging.root.manager.loggerDict.items() if name.startswith('doenut')]
loggers = [
logger
for name, logger in logging.root.manager.loggerDict.items()
if name.startswith("doenut")
]

for l in loggers:
if isinstance(l, logging.PlaceHolder):
Expand Down Expand Up @@ -551,11 +556,12 @@ def autotune_model(
if i in selected_input_indices:
try:
dependency_dict[x].add(i)
except: # TODO:: fix blank except. I _think_ KeyError
except KeyError as e:
if do_hierarchical:
print(
"Error: Heirarchical model missing lower level terms!!!!"
logger.error(
"Error: Hierarchical model missing lower level terms."
)
raise e
logger.info(f"Dependencies: {dependency_dict}")
# Handy shortcut - since the empty set is considered false,
# we can just test dependency_dict[some_term] to see if there
Expand Down Expand Up @@ -605,7 +611,7 @@ def autotune_model(
have_removed = False

for idx, error_value in insignificant_terms:
# If it has dependents, and you're doing an heirarchical model skip it
# If it has dependents, and you're doing a hierarchical model skip it
if do_hierarchical:
if dependency_dict[idx]:
continue
Expand Down