Skip to content

Commit

Permalink
add transformers get params tests
Browse files Browse the repository at this point in the history
  • Loading branch information
caiodallaqua committed Feb 1, 2024
1 parent 240c998 commit 66bace1
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 3 deletions.
4 changes: 2 additions & 2 deletions pier_ds_utils/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,8 @@ def from_dict(cls, intervals: typing.Dict, **kwargs):

def get_params(self, deep: bool = True) -> dict:
return {
'intervals': self.labels_,
'labels': self.intervals_,
'intervals': self.intervals_,
'labels': self.labels_,
'default_value': self.default_value_,
'output_column': self.output_column_,
'column': self.column_,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pier-ds-utils"
version = "0.2.1"
version = "0.2.2"
description = "The pier_ds_utils is an internal library used by data science teams to avoid code duplication in common tasks."
authors = ["caiodallaqua <[email protected]>"]
readme = "README.md"
Expand Down
94 changes: 94 additions & 0 deletions tests/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,28 @@ def test_custom_discrete_categorizer():
]


def test_custom_discrete_categorizer_get_params():
categories = [
['M', 'm', 'Masculino', 'masculino'],
['F', 'f', 'Feminino', 'feminino'],
]
labels = ['M', 'F']
default_value = 'M'

categorizer = ds.transformer.CustomDiscreteCategorizer(
column='gender',
categories=categories,
labels=labels,
default_value=default_value,
)

params = categorizer.get_params()

assert params['categories'] == categories
assert params['labels'] == labels
assert params['default_value'] == default_value


def test_custom_interval_categorizer():
categorizer = ds.transformer.CustomIntervalCategorizer(
column='price',
Expand Down Expand Up @@ -98,6 +120,36 @@ def test_custom_interval_categorizer():
'fx_outras_marcas',
]


def test_custom_interval_categorizer_get_params():
column = 'price'
intervals = [
(498, 2700),
(2700, 3447.6),
(3447.6, 5592),
(5592, 13950),
]
labels = ['fx1_apple', 'fx2_apple', 'fx3_apple', 'fx4_apple']
default_value = 'fx_outras_marcas'
output_column = 'price_fx'

categorizer = ds.transformer.CustomIntervalCategorizer(
column=column,
intervals=intervals,
labels=labels,
default_value=default_value,
output_column=output_column,
)

params = categorizer.get_params()

assert params['column'] == column
assert params['intervals'] == intervals
assert params['labels'] == labels
assert params['default_value'] == default_value
assert params['output_column'] == output_column


def test_custom_interval_categorizer_by_category():
categorizer = ds.transformer.CustomIntervalCategorizerByCategory(
category_column='brand',
Expand Down Expand Up @@ -184,3 +236,45 @@ def test_custom_interval_categorizer_by_category():
'fx_outras_marcas',
'fx_outras_marcas',
]



category_column = 'brand'

intervals = [
(498, 2700),
(2700, 3447.6),
(3447.6, 5592),
(5592, 13950),
]

labels = ['fx1_apple', 'fx2_apple', 'fx3_apple', 'fx4_apple']

categorizer = ds.transformer.CustomIntervalCategorizerByCategory(
category_column=category_column,
interval_categorizers={
'apple': ds.transformer.CustomIntervalCategorizer(
column='price',
intervals=intervals,
labels=labels,
),
'samsung': ds.transformer.CustomIntervalCategorizer(
column='price',
intervals=[
(189, 1500),
(1500, 11340),
],
labels=['fx1_samsung', 'fx2_samsung'],
)
},
default_categorizer=ds.transformer.CustomIntervalCategorizer(
column='price',
intervals=[(240, 5260)],
labels=['fx_outras_marcas'],
),
output_column='price_fx',
)

params = categorizer.get_params()
print(params)
assert params['category_column'] == category_column

0 comments on commit 66bace1

Please sign in to comment.