From f48b1b0482d3ce205a0abc52bc8d46795f40d515 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Sat, 7 Dec 2024 19:25:09 +0100 Subject: [PATCH 1/8] Show constructor in inheritance tutorial --- docs/model_usage.ipynb | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/docs/model_usage.ipynb b/docs/model_usage.ipynb index 4340d4d..ff798e9 100644 --- a/docs/model_usage.ipynb +++ b/docs/model_usage.ipynb @@ -15,7 +15,7 @@ "for the use with `formulaic-contrasts`. The aim is to build a model that takes a pandas DataFrame and a formulaic formula as input\n", "allows to fit the model to a continuous variable from the dataframe and perform a statistical test for a given contrast. \n", "\n", - "This can be achived with the following class definition. The constructor, the {func}`~formulaic_contrasts.FormulaicContrasts.contrast` and {func}`~formulaic_contrasts.FormulaicContrasts.cond` methods are inherited from the {class}`~formulaic_contrasts.FormulaicContrasts`\n", + "This can be achieved with the following class definition. The constructor, the {func}`~formulaic_contrasts.FormulaicContrasts.contrast` and {func}`~formulaic_contrasts.FormulaicContrasts.cond` methods are inherited from the {class}`~formulaic_contrasts.FormulaicContrasts`\n", "base class:" ] }, @@ -28,9 +28,13 @@ "import formulaic_contrasts\n", "import numpy as np\n", "import statsmodels.api as sm\n", + "import pandas as pd\n", "\n", "\n", "class StatsmodelsOLS(formulaic_contrasts.FormulaicContrasts):\n", + " def __init__(self, data: pd.DataFrame, design: str):\n", + " super().__init__(data, design)\n", + "\n", " def fit(self, variable: str):\n", " self.mod = sm.OLS(self.data[variable], self.design_matrix)\n", " self.mod = self.mod.fit()\n", @@ -371,7 +375,7 @@ " drop_field='non_responder',\n", " column_names=('non_responder',\n", " 'responder'),\n", - " colname_format='{name}[T.{field}]')],\n", + " colname_format='{name}[{field}]')],\n", " 'treatment': [FactorMetadata(name='treatment',\n", " reduced_rank=True,\n", " custom_encoder=False,\n", @@ -379,7 +383,7 @@ " kind=,\n", " drop_field='drugA',\n", " column_names=('drugA', 'drugB'),\n", - " colname_format='{name}[T.{field}]')]})\n" + " colname_format='{name}[{field}]')]})\n" ] } ], @@ -418,7 +422,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -569,12 +573,12 @@ "data": { "text/plain": [ "defaultdict(set,\n", - " {'np.log': {'np.log(biomarker)'},\n", - " 'biomarker': {'np.log(biomarker)'},\n", - " 'C': {'C(response)',\n", - " \"C(treatment, contr.treatment(base='drugB'))\"},\n", + " {'biomarker': {'np.log(biomarker)'},\n", + " 'np.log': {'np.log(biomarker)'},\n", " 'treatment': {\"C(treatment, contr.treatment(base='drugB'))\"},\n", " 'contr.treatment': {\"C(treatment, contr.treatment(base='drugB'))\"},\n", + " 'C': {'C(response)',\n", + " \"C(treatment, contr.treatment(base='drugB'))\"},\n", " 'response': {'C(response)'}})" ] }, From 029739111aaddd41b250fbe0f5c5a445e069fb4a Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Sat, 7 Dec 2024 19:31:54 +0100 Subject: [PATCH 2/8] Improve docstrings --- src/formulaic_contrasts/_contrasts.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/formulaic_contrasts/_contrasts.py b/src/formulaic_contrasts/_contrasts.py index 2924fc1..85ff208 100644 --- a/src/formulaic_contrasts/_contrasts.py +++ b/src/formulaic_contrasts/_contrasts.py @@ -33,6 +33,15 @@ def cond(self, **kwargs): """ Get a contrast vector representing a specific condition. + The `kwargs` are key/value pairs where the key refers to a variable used in the + design and the value represents a category of that variable. Variables not specified + will be filled with their default/baseline value. + + The vectors generated by `.cond` can be combined using standard arithmetic operations + to obtain the desired contrast, e.g. + + >>> contrast = model.cond(treatment="drugA") - model.cond(treatment="placebo") + Parameters ---------- **kwargs @@ -40,7 +49,8 @@ def cond(self, **kwargs): Returns ------- - A contrast vector that aligns to the columns of the design matrix. + A vector with one element per column in the design matrix, + where the kwargs arguments are coded as in the design matrix. """ cond_dict = kwargs if not set(cond_dict.keys()).issubset(self.variables): @@ -60,6 +70,9 @@ def contrast(self, column, baseline, group_to_compare): """ Build a simple contrast for pairwise comparisons. + For more complex contrasts, please use construct a contrast vector using + :func:`~formulaic_contrasts.FormulaicContrasts.cond`. + Parameters ---------- column From 02be0f880f22ac8a78d8b0116a81a81cda9a4a38 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Sat, 14 Dec 2024 20:40:26 +0100 Subject: [PATCH 3/8] Don't use interaction term in model usage --- docs/model_usage.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/model_usage.ipynb b/docs/model_usage.ipynb index ff798e9..d8479cc 100644 --- a/docs/model_usage.ipynb +++ b/docs/model_usage.ipynb @@ -212,7 +212,7 @@ } ], "source": [ - "model = StatsmodelsOLS(df, \"~ treatment * response\")\n", + "model = StatsmodelsOLS(df, \"~ treatment + response\")\n", "model.fit(\"biomarker\")\n", "model.t_test(\n", " model.contrast(\"response\", baseline=\"non_responder\", group_to_compare=\"responder\")\n", @@ -287,7 +287,7 @@ } ], "source": [ - "model = StatsmodelsOLS(df, \"~ treatment * response\")\n", + "model = StatsmodelsOLS(df, \"~ treatment + response\")\n", "model.fit(\"biomarker\")\n", "model.t_test(\n", " model.contrast(\"response\", baseline=\"non_responder\", group_to_compare=\"responder\")\n", @@ -342,7 +342,7 @@ "outputs": [], "source": [ "design_mat = materializer_class(df, record_factor_metadata=True).get_model_matrix(\n", - " \"~ treatment * response\"\n", + " \"~ treatment + response\"\n", ")" ] }, From 48e201d2a954ffc427241072eba8860fff2051e9 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Sat, 14 Dec 2024 20:56:20 +0100 Subject: [PATCH 4/8] Improve .cond and .contrast docstrings --- src/formulaic_contrasts/_contrasts.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/formulaic_contrasts/_contrasts.py b/src/formulaic_contrasts/_contrasts.py index 85ff208..62a7db0 100644 --- a/src/formulaic_contrasts/_contrasts.py +++ b/src/formulaic_contrasts/_contrasts.py @@ -42,6 +42,8 @@ def cond(self, **kwargs): >>> contrast = model.cond(treatment="drugA") - model.cond(treatment="placebo") + For more information on how to build contrasts, see :doc:`/contrasts`. + Parameters ---------- **kwargs @@ -68,7 +70,7 @@ def cond(self, **kwargs): def contrast(self, column, baseline, group_to_compare): """ - Build a simple contrast for pairwise comparisons. + Build a simple contrast for pairwise comparisons of a single variable. For more complex contrasts, please use construct a contrast vector using :func:`~formulaic_contrasts.FormulaicContrasts.cond`. From f126708f1fa83c751999fc7b3a3a8ab8e55dc420 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 14 Dec 2024 19:56:48 +0000 Subject: [PATCH 5/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- CHANGELOG.md | 2 +- docs/contributing.md | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6338823..8acd77a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning][]. ## v0.2.0 -- Rename `FormulaicContrasts.design` to `FormulaicContrasts.design_matrix` +- Rename `FormulaicContrasts.design` to `FormulaicContrasts.design_matrix` ## v0.1.0 diff --git a/docs/contributing.md b/docs/contributing.md index d54236f..da4a605 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -155,11 +155,11 @@ This will automatically create a git tag and trigger a Github workflow that crea Please write documentation for new or changed features and use-cases. This project uses [sphinx][] with the following features: -- The [myst][] extension allows to write documentation in markdown/Markedly Structured Text -- [Numpy-style docstrings][numpydoc] (through the [napoloen][numpydoc-napoleon] extension). -- Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks)) -- [sphinx-autodoc-typehints][], to automatically reference annotated input and output types -- Citations (like {cite:p}`Virshup_2023`) can be included with [sphinxcontrib-bibtex](https://sphinxcontrib-bibtex.readthedocs.io/) +- The [myst][] extension allows to write documentation in markdown/Markedly Structured Text +- [Numpy-style docstrings][numpydoc] (through the [napoloen][numpydoc-napoleon] extension). +- Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks)) +- [sphinx-autodoc-typehints][], to automatically reference annotated input and output types +- Citations (like {cite:p}`Virshup_2023`) can be included with [sphinxcontrib-bibtex](https://sphinxcontrib-bibtex.readthedocs.io/) See scanpy’s {doc}`scanpy:dev/documentation` for more information on how to write your own. @@ -183,10 +183,10 @@ please check out [this feature request][issue-render-notebooks] in the `cookiecu #### Hints -- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. - Only if you do so can sphinx automatically create a link to the external documentation. -- If building the documentation fails because of a missing link that is outside your control, - you can add an entry to the `nitpick_ignore` list in `docs/conf.py` +- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. + Only if you do so can sphinx automatically create a link to the external documentation. +- If building the documentation fails because of a missing link that is outside your control, + you can add an entry to the `nitpick_ignore` list in `docs/conf.py` (docs-building)= From 965515108fd2719f808bbc558113202561342957 Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Sun, 15 Dec 2024 14:36:46 +0100 Subject: [PATCH 6/8] Update contrasts notebook --- docs/contrasts.ipynb | 154 +++++++++++++++++++++++++++++++++++++---- docs/model_usage.ipynb | 10 +-- 2 files changed, 144 insertions(+), 20 deletions(-) diff --git a/docs/contrasts.ipynb b/docs/contrasts.ipynb index d4ec4e4..f80801a 100644 --- a/docs/contrasts.ipynb +++ b/docs/contrasts.ipynb @@ -251,13 +251,135 @@ "For instance, we could \n", "investigate differences between responders and non-responders, independent of treatment by fitting the model \n", "`~ response + treatment` and then comparing the category `\"responder\"` in the column `response` with the category `\"non_responder\"`.\n", - "This can be achieved using the {func}`~formulaic_contrasts.FormulaicContrasts.contrast` method. " + "\n", + "Given the data frame from above and the model `~ response + treatment`, the design matrix contains the following distinct\n", + "entries, encoding the different combinations of response and drug. " ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Interceptresponse[T.responder]treatment[T.drugB]
01.000
101.010
401.001
701.011
\n", + "
" + ], + "text/plain": [ + " Intercept response[T.responder] treatment[T.drugB]\n", + "0 1.0 0 0\n", + "10 1.0 1 0\n", + "40 1.0 0 1\n", + "70 1.0 1 1" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from formulaic import model_matrix\n", + "\n", + "model_matrix(\"~ response + treatment\", df).drop_duplicates()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `response[T.responder]` column encodes `\"responder\"` as 1 and `\"non_responder\"` as 0. The \n", + "intercept is always 1 and the other column is irrelevant for our desired comparison. The entries a contrast vector \n", + "always correspond to the columns of the design matrix. We therefore need a contrast vector\n", + "that compares `(1, 1, 0)` vs. `(1, 0, 0)`:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 0])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "\n", + "contrast = np.array((1, 1, 0)) - np.array((1, 0, 0))\n", + "contrast" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using formulaic-contrast's {func}`~formulaic_contrasts.FormulaicContrasts.cond` function, we can build the same\n", + "contrast vector by specifying the categories of interest:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, "outputs": [ { "data": { @@ -268,7 +390,7 @@ "Name: 0, dtype: float64" ] }, - "execution_count": 3, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -278,11 +400,7 @@ "\n", "mod = FormulaicContrasts(df, \"~ response + treatment\")\n", "\n", - "contrast = mod.contrast(\n", - " column=\"response\",\n", - " baseline=\"non_responder\",\n", - " group_to_compare=\"responder\",\n", - ")\n", + "contrast = mod.cond(response=\"responder\") - mod.cond(response=\"non_responder\")\n", "contrast" ] }, @@ -290,12 +408,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This is equivalent to the following {func}`~formulaic_contrasts.FormulaicContrasts.cond` call:" + "For this very common case of comparing two categories of the same variable, {func}`~formulaic_contrasts.FormulaicContrasts.contrast` \n", + "provides a convenient shortcut for building the same contrast:" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -307,13 +426,18 @@ "Name: 0, dtype: float64" ] }, - "execution_count": 4, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "mod.cond(response=\"responder\") - mod.cond(response=\"non_responder\")" + "contrast = mod.contrast(\n", + " column=\"response\",\n", + " baseline=\"non_responder\",\n", + " group_to_compare=\"responder\",\n", + ")\n", + "contrast" ] }, { @@ -328,7 +452,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -341,7 +465,7 @@ "Name: 0, dtype: float64" ] }, - "execution_count": 5, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -368,7 +492,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -381,7 +505,7 @@ "Name: 0, dtype: float64" ] }, - "execution_count": 6, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } diff --git a/docs/model_usage.ipynb b/docs/model_usage.ipynb index d8479cc..bf5bdf9 100644 --- a/docs/model_usage.ipynb +++ b/docs/model_usage.ipynb @@ -202,7 +202,7 @@ "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", - "c0 -1.6492 0.935 -1.764 0.082 -3.512 0.213\n", + "c0 1.9563 0.775 2.525 0.014 0.413 3.499\n", "==============================================================================" ] }, @@ -277,7 +277,7 @@ "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", - "c0 -1.6492 0.935 -1.764 0.082 -3.512 0.213\n", + "c0 1.9563 0.775 2.525 0.014 0.413 3.499\n", "==============================================================================" ] }, @@ -422,7 +422,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -573,8 +573,8 @@ "data": { "text/plain": [ "defaultdict(set,\n", - " {'biomarker': {'np.log(biomarker)'},\n", - " 'np.log': {'np.log(biomarker)'},\n", + " {'np.log': {'np.log(biomarker)'},\n", + " 'biomarker': {'np.log(biomarker)'},\n", " 'treatment': {\"C(treatment, contr.treatment(base='drugB'))\"},\n", " 'contr.treatment': {\"C(treatment, contr.treatment(base='drugB'))\"},\n", " 'C': {'C(response)',\n", From 45fb03baa4da66e48986269be05b94e2f155d1fb Mon Sep 17 00:00:00 2001 From: Gregor Sturm Date: Sun, 15 Dec 2024 14:41:24 +0100 Subject: [PATCH 7/8] Update CHANGELOG --- CHANGELOG.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8acd77a..319644e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,9 +8,14 @@ and this project adheres to [Semantic Versioning][]. [keep a changelog]: https://keepachangelog.com/en/1.0.0/ [semantic versioning]: https://semver.org/spec/v2.0.0.html +## v1.0.0 + +- Update tutorials and docstrings of `.cond()` and `.contrast()` ([#11](https://github.com/scverse/formulaic-contrasts/pull/11)) +- No other changes, but the API is considered stable now. + ## v0.2.0 -- Rename `FormulaicContrasts.design` to `FormulaicContrasts.design_matrix` +- Rename `FormulaicContrasts.design` to `FormulaicContrasts.design_matrix` ## v0.1.0 From ad40ebffc2505a91d3bfbf4663d3e8b503bf7ce8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 15 Dec 2024 13:41:34 +0000 Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 319644e..b5f7c83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,12 +10,12 @@ and this project adheres to [Semantic Versioning][]. ## v1.0.0 -- Update tutorials and docstrings of `.cond()` and `.contrast()` ([#11](https://github.com/scverse/formulaic-contrasts/pull/11)) -- No other changes, but the API is considered stable now. +- Update tutorials and docstrings of `.cond()` and `.contrast()` ([#11](https://github.com/scverse/formulaic-contrasts/pull/11)) +- No other changes, but the API is considered stable now. ## v0.2.0 -- Rename `FormulaicContrasts.design` to `FormulaicContrasts.design_matrix` +- Rename `FormulaicContrasts.design` to `FormulaicContrasts.design_matrix` ## v0.1.0