Skip to content

Commit

Permalink
add contributing
Browse files Browse the repository at this point in the history
  • Loading branch information
caiodallaqua committed Feb 6, 2024
1 parent 655edf2 commit 9d65f24
Show file tree
Hide file tree
Showing 14 changed files with 313 additions and 177 deletions.
33 changes: 33 additions & 0 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Release
on:
release:
types:
- created

jobs:
publish:
strategy:
fail-fast: false
matrix:
python-version: [3.11]
poetry-version: [1.4.2]
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}
steps:
- name: Checkout the repository
uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
- name: Run image
uses: abatilo/[email protected]
with:
poetry-version: ${{ matrix.poetry-version }}
- name: Publish
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: |
poetry config pypi-token.pypi $PYPI_TOKEN
poetry publish --build
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
__pycache__
.pytest_cache

.ruff_cache
.vscode
pytest-coverage.txt
48 changes: 48 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Contributing

## Development

### Setting up a development environment

If you don't have a local development environment, you can follow these steps to set one up.

Install [poetry](https://python-poetry.org/) and [task](https://taskfile.dev/).

Now, initialize the project:

```bash
task init
```

### Running tests

You can run the tests with:

```bash
task tests
```

This will run the tests with [pytest](https://docs.pytest.org/en/latest/) and show information about the coverage.

### Formatting the code

To look for formatting issues:
```bash
task check-formatting
```

To format the code, you can use the command:

```bash
task formatting
```

### Releasing a new version

To release a new version, you need to follow these steps:

1. Update the version with `poetry version <version>` and commit the changes. This project follows [Semantic Versioning](http://semver.org/), so the version number should follow the format `<major>.<minor>.<patch>`.

2. Create a Github release with the new version number.

3. (Optional) Publish the new version to PyPI with `poetry publish --build`.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,7 @@ pip install git+https://github.com/pier-digital/pier-ds-utils@_version_
```

Replace `_version_` by the specific version you want to use. You can find them [here](https://github.com/pier-digital/pier-ds-utils/tags).

## Contributing

Contributions are welcome! Please read the [contributing guidelines](CONTRIBUTING.md) first.
22 changes: 22 additions & 0 deletions Taskfile.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
version: 3

vars:
DECISIONS_DIR: docs/decisions

tasks:
init:
desc: initializes the project
cmds:
- poetry install -n
tests:
desc: run automated tests
cmds:
- poetry run pytest tests | tee pytest-coverage.txt
check-formatting:
desc: checks formatting
cmds:
- poetry run ruff check .
formatting:
desc: formats the code
cmds:
- poetry run ruff format .
1 change: 1 addition & 0 deletions pier_ds_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
# flake8: noqa
from pier_ds_utils import estimator, transformer
14 changes: 7 additions & 7 deletions pier_ds_utils/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,21 @@ def __init__(
def get_params(self, deep=True):
return {
**self.init_params,
**{'add_constant': self._add_constant, 'os_factor': self.os_factor},
**{"add_constant": self._add_constant, "os_factor": self.os_factor},
}

def fit(self, X, y, **fit_params):
if self._add_constant:
X['const'] = 1
X["const"] = 1

self.model_ = sm.GLM(endog=y, exog=X, **self.init_params)
fit_method = fit_params.pop('fit_method', 'fit')
fit_method = fit_params.pop("fit_method", "fit")
self.results_ = getattr(self.model_, fit_method)(**fit_params)
return self

def predict(self, X, **predict_params):
if self._add_constant:
X['const'] = 1
X["const"] = 1

return self.results_.predict(exog=X, **predict_params) * self.os_factor

Expand All @@ -55,6 +55,6 @@ def __getattr__(self, __name: str) -> Any:

def get_params(self, deep: bool = True) -> dict:
return {
'model': self.model.get_params(deep=deep) if deep else self.model,
'column': self.column,
}
"model": self.model.get_params(deep=deep) if deep else self.model,
"column": self.column,
}
90 changes: 45 additions & 45 deletions pier_ds_utils/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


class BaseCustomTransformer(BaseEstimator, TransformerMixin):
def set_output(self, transform: str = 'pandas') -> BaseEstimator:
def set_output(self, transform: str = "pandas") -> BaseEstimator:
return self


Expand All @@ -19,7 +19,7 @@ def __init__(
default_value: typing.Any = None,
output_column: typing.Optional[str] = None,
):
'''
"""
Transformer to categorize a column into custom categories.
Parameters
Expand All @@ -34,15 +34,15 @@ def __init__(
Value to be used for missing values. If None, missing values will be kept as NaN.
output_column: str
Name of the output column. If None, the original column will be overwritten.
'''
"""
if len(categories) != len(labels):
raise ValueError(
'Number of categories must be the same as number of labels'
"Number of categories must be the same as number of labels"
)

for category in categories:
if not isinstance(category, list):
raise TypeError('Each category must be a list')
raise TypeError("Each category must be a list")

self._column = column
self._categories = categories
Expand All @@ -64,18 +64,18 @@ def from_dict(cls, categories: typing.Dict, **kwargs):

def get_params(self, deep: bool = True) -> dict:
return {
'categories': self.categories_,
'labels': self.labels_,
'default_value': self._default_value,
'output_column': self._output_column,
"categories": self.categories_,
"labels": self.labels_,
"default_value": self._default_value,
"output_column": self._output_column,
}

def fit(self, X, y=None):
return self

def transform(self, X):
values = X[self._column].copy()
output = pd.Series(np.nan, index=X.index, dtype='object')
output = pd.Series(np.nan, index=X.index, dtype="object")

for category, label in zip(self._categories, self._labels):
output.loc[values.isin(category)] = label
Expand All @@ -98,7 +98,7 @@ def __init__(
default_value: typing.Any = None,
output_column: typing.Optional[str] = None,
):
'''
"""
Custom transformer to categorize a numeric column into intervals.
Parameters
Expand All @@ -115,25 +115,25 @@ def __init__(
Value to be used for missing values. If None, missing values will be kept as NaN.
output_column: str
Name of the output column. If None, the original column will be overwritten.
'''
"""
if len(intervals) != len(labels):
raise ValueError('Number of intervals must be the same as number of labels')
raise ValueError("Number of intervals must be the same as number of labels")

for interval in intervals:
if not isinstance(interval, tuple):
raise TypeError('Each interval must be a tuple')
raise TypeError("Each interval must be a tuple")

if len(interval) != 2:
raise ValueError('Each interval must have two elements')
raise ValueError("Each interval must have two elements")

if not isinstance(interval[0], (int, float)) or not isinstance(
interval[1], (int, float)
):
raise TypeError('Each interval element must be a number')
raise TypeError("Each interval element must be a number")

if interval[0] >= interval[1]:
raise ValueError(
'Each interval must have the first element smaller than the second'
"Each interval must have the first element smaller than the second"
)

self._column = column
Expand Down Expand Up @@ -171,19 +171,19 @@ def from_dict(cls, intervals: typing.Dict, **kwargs):

def get_params(self, deep: bool = True) -> dict:
return {
'intervals': self.intervals_,
'labels': self.labels_,
'default_value': self.default_value_,
'output_column': self.output_column_,
'column': self.column_,
"intervals": self.intervals_,
"labels": self.labels_,
"default_value": self.default_value_,
"output_column": self.output_column_,
"column": self.column_,
}

def fit(self, X, y=None):
return self

def transform(self, X):
values = X[self.column_].astype(float).copy()
output = pd.Series(np.nan, index=X.index, dtype='object')
output = pd.Series(np.nan, index=X.index, dtype="object")

for interval, label in zip(self.intervals_, self.labels_):
output.loc[(values >= interval[0]) & (values < interval[1]),] = label
Expand All @@ -205,7 +205,7 @@ def __init__(
default_value: typing.Any = None,
output_column: typing.Optional[str] = None,
):
'''
"""
Custom transformer to categorize a numeric column into intervals given a categorical column.
Parameters
Expand All @@ -221,19 +221,19 @@ def __init__(
Value to be used for missing values. If None, missing values will be kept as NaN.
output_column: str
Name of the output column. If None, the original column will be overwritten.
'''
"""
if not isinstance(interval_categorizers, dict):
raise TypeError('interval_categorizers must be a dict')
raise TypeError("interval_categorizers must be a dict")

for key, value in interval_categorizers.items():
if not isinstance(key, str):
raise TypeError('Keys of interval_categorizers must be strings')
raise TypeError("Keys of interval_categorizers must be strings")

if not isinstance(value, CustomIntervalCategorizer):
raise TypeError(
'Values of interval_categorizers must be CustomIntervalCategorizer'
"Values of interval_categorizers must be CustomIntervalCategorizer"
)

self._category_column = category_column
self._interval_categorizers = interval_categorizers
self._default_categorizer = default_categorizer
Expand All @@ -247,7 +247,7 @@ def category_column_(self) -> str:
@property
def interval_categorizers_(self) -> typing.Dict[str, CustomIntervalCategorizer]:
return self._interval_categorizers

@property
def default_categorizer_(self) -> typing.Optional[CustomIntervalCategorizer]:
return self._default_categorizer
Expand All @@ -258,39 +258,39 @@ def from_dict(cls, **kwargs):

def get_params(self, deep: bool = True) -> dict:
return {
'category_column': self.category_column_,
'interval_categorizers': self.interval_categorizers_,
'default_categorizer': self.default_categorizer_,
'default_value': self._default_value,
'output_column': self._output_column,
"category_column": self.category_column_,
"interval_categorizers": self.interval_categorizers_,
"default_categorizer": self.default_categorizer_,
"default_value": self._default_value,
"output_column": self._output_column,
}

def fit(self, X, y=None):
return self

def transform(self, X):
output = pd.Series(np.nan, index=X.index, dtype='object')
output = pd.Series(np.nan, index=X.index, dtype="object")

for category, interval_categorizer in self.interval_categorizers_.items():
output.loc[
X[self._category_column] == category
] = interval_categorizer.transform(
X.loc[X[self._category_column] == category]
)[
interval_categorizer.get_output_column()
]
)[interval_categorizer.get_output_column()]

if self._default_categorizer is not None:
output.loc[~(X[self.category_column_].isin(self.interval_categorizers_.keys()))] = self._default_categorizer.transform(
X.loc[~(X[self.category_column_].isin(self.interval_categorizers_.keys()))]
)[
self._default_categorizer.get_output_column()
]
output.loc[
~(X[self.category_column_].isin(self.interval_categorizers_.keys()))
] = self._default_categorizer.transform(
X.loc[
~(X[self.category_column_].isin(self.interval_categorizers_.keys()))
]
)[self._default_categorizer.get_output_column()]

if self._default_value is not None:
output.fillna(self._default_value, inplace=True)

output_column = self._output_column or self._category_column
X.loc[:, output_column] = output

return X
return X
Loading

0 comments on commit 9d65f24

Please sign in to comment.