diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 8be89c0..0db005c 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -37,12 +37,8 @@ jobs: run: poetry install --no-interaction --no-root - name: Install library run: poetry install --no-interaction - - name: Check Docs run - run: | - source .venv/bin/activate - pytest --nbmake docs/*.ipynb - name: Test with pytest run: | source .venv/bin/activate - pytest + pytest --nbmake docs/ tests/ diff --git a/docs/example.ipynb b/docs/example.ipynb index cfe72e1..9c5b317 100644 --- a/docs/example.ipynb +++ b/docs/example.ipynb @@ -19,7 +19,7 @@ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", - "doenut.set_log_level('WARNING')" + "doenut.set_log_level(\"WARNING\")" ] }, { @@ -36,7 +36,7 @@ "outputs": [], "source": [ "inputs = pd.read_csv(\"data/inputs.csv\")\n", - "responses = pd.read_csv(\"data/responses.csv\")['Profit']\n" + "responses = pd.read_csv(\"data/responses.csv\")[\"Profit\"]" ] }, { @@ -320,8 +320,13 @@ ], "source": [ "dataset = doenut.data.ModifiableDataSet(inputs, responses)\n", - "basic_scaled_model, basic_unscaled_model = doenut.models.AveragedModel.tune_model(dataset)\n", - "print(f\"Model results: R2 {round(basic_scaled_model.r2, 2)}, Q2 {round(basic_scaled_model.q2, 2)}\")" + "(\n", + " basic_scaled_model,\n", + " basic_unscaled_model,\n", + ") = doenut.models.AveragedModel.tune_model(dataset)\n", + "print(\n", + " f\"Model results: R2 {round(basic_scaled_model.r2, 2)}, Q2 {round(basic_scaled_model.q2, 2)}\"\n", + ")" ] }, { @@ -380,8 +385,13 @@ "saturated_columns = list(saturated_inputs.columns)\n", "print(saturated_columns)\n", "saturated_dataset = doenut.data.ModifiableDataSet(saturated_inputs, responses)\n", - "saturated_scaled_model, saturated_unscaled_model = doenut.models.AveragedModel.tune_model(saturated_dataset)\n", - "print(f\"Model results: R2 {round(saturated_scaled_model.r2, 2)}, Q2 {round(saturated_scaled_model.q2, 2)}\")" + "(\n", + " saturated_scaled_model,\n", + " saturated_unscaled_model,\n", + ") = doenut.models.AveragedModel.tune_model(saturated_dataset)\n", + "print(\n", + " f\"Model results: R2 {round(saturated_scaled_model.r2, 2)}, Q2 {round(saturated_scaled_model.q2, 2)}\"\n", + ")" ] }, { @@ -421,7 +431,12 @@ } ], "source": [ - "doenut.plot.coeff_plot(saturated_scaled_model.coeffs, labels=saturated_columns, errors='p95', normalise=True)" + "doenut.plot.coeff_plot(\n", + " saturated_scaled_model.coeffs,\n", + " labels=saturated_columns,\n", + " errors=\"p95\",\n", + " normalise=True,\n", + ")" ] }, { @@ -481,11 +496,23 @@ } ], "source": [ - "column_selector = [x for x in saturated_columns if x != 'P*Temp.']\n", - "filtered_dataset = doenut.data.ModifiableDataSet(saturated_inputs, responses).filter(column_selector)\n", - "filtered_scaled_model, filtered_unscaled_model = doenut.models.AveragedModel.tune_model(filtered_dataset)\n", - "doenut.plot.coeff_plot(filtered_scaled_model.coeffs, labels=column_selector, errors='p95', normalise=True)\n", - "print(f\"Model results: R2 {round(filtered_scaled_model.r2, 2)}, Q2 {round(filtered_scaled_model.q2, 2)}\")" + "column_selector = [x for x in saturated_columns if x != \"P*Temp.\"]\n", + "filtered_dataset = doenut.data.ModifiableDataSet(\n", + " saturated_inputs, responses\n", + ").filter(column_selector)\n", + "(\n", + " filtered_scaled_model,\n", + " filtered_unscaled_model,\n", + ") = doenut.models.AveragedModel.tune_model(filtered_dataset)\n", + "doenut.plot.coeff_plot(\n", + " filtered_scaled_model.coeffs,\n", + " labels=column_selector,\n", + " errors=\"p95\",\n", + " normalise=True,\n", + ")\n", + "print(\n", + " f\"Model results: R2 {round(filtered_scaled_model.r2, 2)}, Q2 {round(filtered_scaled_model.q2, 2)}\"\n", + ")" ] }, { @@ -512,7 +539,9 @@ } ], "source": [ - "doenut.plot.plot_observed_vs_predicted(responses, filtered_unscaled_model.predictions)" + "doenut.plot.plot_observed_vs_predicted(\n", + " responses, filtered_unscaled_model.predictions\n", + ")" ] }, { diff --git a/docs/solar_cells.ipynb b/docs/solar_cells.ipynb index b59be78..023ac02 100644 --- a/docs/solar_cells.ipynb +++ b/docs/solar_cells.ipynb @@ -80,6 +80,7 @@ "\n", "# set the log level\n", "import logging\n", + "\n", "doenut.set_log_level(logging.WARNING)" ] }, @@ -784,7 +785,9 @@ ], "source": [ "# Now lets look at the averaged coefficients for the fitted model.\n", - "doenut.plot.coeff_plot(model.coeffs, labels=inputs.columns, errors='p95', normalise=True)" + "doenut.plot.coeff_plot(\n", + " model.coeffs, labels=inputs.columns, errors=\"p95\", normalise=True\n", + ")" ] }, { @@ -913,7 +916,7 @@ "outputs": [], "source": [ "# make some empty lists - this is necessary if you rerun the notebook out of order\n", - "sat_source_list, source_list = [],[]\n", + "sat_source_list, source_list = [], []\n", "# autogenerate the higher order terms.\n", "sat_inputs_orig, sat_source_list = doenut.add_higher_order_terms(\n", " inputs, add_squares=True, add_interactions=True, column_list=[]\n", @@ -1240,9 +1243,18 @@ ], "source": [ "## this is the full interactioin mode.\n", - "input_selector = ['Donor %', 'Conc.', 'Spin', 'Add.',\n", - " 'Donor %*Conc.', 'Donor %*Spin', 'Donor %*Add.',\n", - " 'Conc.*Spin', 'Conc.*Add.', 'Spin*Add.']\n", + "input_selector = [\n", + " \"Donor %\",\n", + " \"Conc.\",\n", + " \"Spin\",\n", + " \"Add.\",\n", + " \"Donor %*Conc.\",\n", + " \"Donor %*Spin\",\n", + " \"Donor %*Add.\",\n", + " \"Conc.*Spin\",\n", + " \"Conc.*Add.\",\n", + " \"Spin*Add.\",\n", + "]\n", "\n", "dataset = (\n", " doenut.data.ModifiableDataSet(sat_inputs_orig, responses)\n", @@ -2473,6 +2485,7 @@ "\n", " return df_1\n", "\n", + "\n", "# slices\n", "c_key = \"Spin\"\n", "# y axis\n", @@ -2481,24 +2494,27 @@ "x_key = \"Donor %\"\n", "\n", "doenut.plot.four_D_contour_plot(\n", - " unscaled_model=model.model, # model chosen for predictions (should be the best model)\n", + " unscaled_model=model.model, # model chosen for predictions (should be the best model)\n", " x_key=x_key,\n", " y_key=y_key,\n", " c_key=c_key,\n", - " x_limits=[inputs[x_key].min(), inputs[x_key].max()], # auto-ccalculate the limits\n", + " x_limits=[\n", + " inputs[x_key].min(),\n", + " inputs[x_key].max(),\n", + " ], # auto-ccalculate the limits\n", " y_limits=[inputs[y_key].min(), inputs[y_key].max()],\n", - " constants=[500, 1500, 2500], # values for slices in the c dimension\n", - " n_points=60, # number of points to calculate per axis (i.e. 3600 points per graph)\n", - " my_function=my_function, # function defined abve\n", - " input_selector=[], # all inputs\n", - " fig_label=\"Solar Cells\", # label\n", + " constants=[500, 1500, 2500], # values for slices in the c dimension\n", + " n_points=60, # number of points to calculate per axis (i.e. 3600 points per graph)\n", + " my_function=my_function, # function defined abve\n", + " input_selector=[], # all inputs\n", + " fig_label=\"Solar Cells\", # label\n", " x_label=x_key,\n", " y_label=y_key,\n", " constant_label=c_key,\n", " z_label=\"PCE %\",\n", " cmap=\"jet\",\n", - " num_of_z_levels=16, # number of contours needed + 1\n", - " z_limits=[0, 12], # min and max range for z (response)\n", + " num_of_z_levels=16, # number of contours needed + 1\n", + " z_limits=[0, 12], # min and max range for z (response)\n", ")" ] }, @@ -2673,7 +2689,10 @@ " model.model, sat_inputs_q5, input_selector\n", ")\n", "letters = [x for x in question_5p2.index]\n", - "[print(f\"{letters[i]}:\\t{predictions[i][0]:.2}% PCE\") for i in range(len(letters))];" + "[\n", + " print(f\"{letters[i]}:\\t{predictions[i][0]:.2}% PCE\")\n", + " for i in range(len(letters))\n", + "];" ] }, { diff --git a/pyproject.toml b/pyproject.toml index c89fade..9e6d103 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,3 +32,12 @@ build-backend = "poetry.core.masonry.api" [tool.semantic_release] version_variable = "pyproject.toml:version" + +[tool.coverage.report] +exclude_also = [ + "if TYPE_CHECKING:", + "raise AssertionError", + "raise NotImplementedError", + "@(abc\\.)?abstractmethod" +] + diff --git a/src/doenut/data/data_set.py b/src/doenut/data/data_set.py index 78ae146..c326772 100644 --- a/src/doenut/data/data_set.py +++ b/src/doenut/data/data_set.py @@ -18,9 +18,9 @@ def __init__(self, inputs: pd.DataFrame, responses: pd.DataFrame): self.responses = responses # pd has a nasty habit of converting single column df into series if isinstance(inputs, pd.Series): - self.inputs = inputs.to_frame(name='inputs') + self.inputs = inputs.to_frame(name="inputs") if isinstance(responses, pd.Series): - self.responses = responses.to_frame(name='responses') + self.responses = responses.to_frame(name="responses") def get_inputs(self) -> pd.DataFrame: return self.inputs diff --git a/src/doenut/data/modifiers/data_set_modifier.py b/src/doenut/data/modifiers/data_set_modifier.py index 01727c9..f6a7b1d 100644 --- a/src/doenut/data/modifiers/data_set_modifier.py +++ b/src/doenut/data/modifiers/data_set_modifier.py @@ -3,7 +3,7 @@ class DataSetModifier(ABC): - """Parent class for all types of modifier. + r"""Parent class for all types of modifier. They take a dataset in, perform some form of operation on it and then pass it along diff --git a/src/doenut/designer.py b/src/doenut/designer.py index 3efe0fb..166e1a2 100644 --- a/src/doenut/designer.py +++ b/src/doenut/designer.py @@ -21,7 +21,7 @@ def _check_is_input_dict(data: Dict[Any, Iterable]) -> None: - """ Validate an input dictionary's type. + """Validate an input dictionary's type. Most of these functions require a dictionary of lists as their input data This is a helper function that will throw an appropriate assert if needed. @@ -48,7 +48,7 @@ def _check_is_input_dict(data: Dict[Any, Iterable]) -> None: def get_ranges(data: Dict[Any, Iterable[float]]) -> Dict[Any, List[float]]: - """ Find the ranges of data in an input dictionary + """Find the ranges of data in an input dictionary Go through a dictionary of value lists, and return the same, but with only the min / max value from each in each. @@ -133,7 +133,9 @@ def full_fact(data: Dict[Any, List[float]]) -> pd.DataFrame: return result -def frac_fact(data: Dict[Any, List[float]], resolution: int = None) -> pd.DataFrame: +def frac_fact( + data: Dict[Any, List[float]], resolution: int = None +) -> pd.DataFrame: """build a 2-level fractional factorial design Parameters diff --git a/src/doenut/doenut.py b/src/doenut/doenut.py index c0a8b57..3003aa3 100644 --- a/src/doenut/doenut.py +++ b/src/doenut/doenut.py @@ -14,6 +14,7 @@ from doenut.data import ModifiableDataSet from doenut.models import AveragedModel + if TYPE_CHECKING: import sklearn @@ -29,7 +30,11 @@ def set_log_level(level: "str|int") -> None: level : "str|int" logging module value representing the desired log level """ - loggers = [logger for name, logger in logging.root.manager.loggerDict.items() if name.startswith('doenut')] + loggers = [ + logger + for name, logger in logging.root.manager.loggerDict.items() + if name.startswith("doenut") + ] for l in loggers: if isinstance(l, logging.PlaceHolder): @@ -551,11 +556,12 @@ def autotune_model( if i in selected_input_indices: try: dependency_dict[x].add(i) - except: # TODO:: fix blank except. I _think_ KeyError + except KeyError as e: if do_hierarchical: - print( - "Error: Heirarchical model missing lower level terms!!!!" + logger.error( + "Error: Hierarchical model missing lower level terms." ) + raise e logger.info(f"Dependencies: {dependency_dict}") # Handy shortcut - since the empty set is considered false, # we can just test dependency_dict[some_term] to see if there @@ -605,7 +611,7 @@ def autotune_model( have_removed = False for idx, error_value in insignificant_terms: - # If it has dependents, and you're doing an heirarchical model skip it + # If it has dependents, and you're doing a hierarchical model skip it if do_hierarchical: if dependency_dict[idx]: continue