Skip to content

Commit

Permalink
Comments
Browse files Browse the repository at this point in the history
  • Loading branch information
frances-h committed Jan 23, 2025
1 parent e06247d commit fc272bf
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 30 deletions.
20 changes: 12 additions & 8 deletions rdt/transformers/numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,10 @@ def __init__(
max_value=1.0,
learn_rounding_scheme=False,
):
if min_value == max_value:
error_msg = 'The min_value and max_value for the logit function cannot be equal.'
raise TransformerInputError(error_msg)

Check warning on line 675 in rdt/transformers/numerical.py

View check run for this annotation

Codecov / codecov/patch

rdt/transformers/numerical.py#L674-L675

Added lines #L674 - L675 were not covered by tests

super().__init__(
missing_value_replacement=missing_value_replacement,
missing_value_generation=missing_value_generation,
Expand Down Expand Up @@ -707,18 +711,18 @@ def _transform(self, data):
logit_vals = logit(transformed_vals, self.min_value, self.max_value)
if transformed.ndim == 1:
return logit_vals
else:
transformed[:, 0] = logit_vals
return transformed

transformed[:, 0] = logit_vals
return transformed

def _reverse_transform(self, data):
if not isinstance(data, np.ndarray):
data = data.to_numpy()

sampled_vals = data if data.ndim == 1 else data[:, 0]
reversed = sigmoid(sampled_vals, self.min_value, self.max_value)
reversed_values = sigmoid(sampled_vals, self.min_value, self.max_value)
if data.ndim == 1:
return super()._reverse_transform(reversed)
else:
data[:, 0] = reversed
return super()._reverse_transform(data)
return super()._reverse_transform(reversed_values)

data[:, 0] = reversed_values
return super()._reverse_transform(data)
37 changes: 26 additions & 11 deletions tests/integration/test_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,28 @@
}


def _create_transformer_args_from_data(transformer_args, data):
"""Helper to extract transformer arguments that are data-dependent.
Args:
transformer_args (dict):
The transformer arguments.
data (pd.Series):
The data for the transformer.
Returns:
dict:
The transformer arguments with data-specific arguments added.
"""
if 'FROM_DATA' in transformer_args:
transformer_args = {**transformer_args}
args = transformer_args.pop('FROM_DATA')
for arg, arg_func in args.items():
transformer_args[arg] = arg_func(data)

return transformer_args


def _validate_helper(validator_function, args, steps):
"""Wrap around validation functions to either return a boolean or assert.
Expand Down Expand Up @@ -157,11 +179,7 @@ def _test_transformer_with_dataset(transformer_class, input_data, steps):
"""

transformer_args = TRANSFORMER_ARGS.get(transformer_class.__name__, {})
if 'FROM_DATA' in transformer_args:
transformer_args = {**transformer_args}
args = transformer_args.pop('FROM_DATA')
for arg, arg_func in args.items():
transformer_args[arg] = arg_func(input_data[TEST_COL])
transformer_args = _create_transformer_args_from_data(transformer_args, input_data[TEST_COL])

transformer = transformer_class(**transformer_args)
# Fit
Expand Down Expand Up @@ -217,12 +235,9 @@ def _test_transformer_with_hypertransformer(transformer_class, input_data, steps
transformer_args = TRANSFORMER_ARGS.get(transformer_class.__name__, {})
hypertransformer = HyperTransformer()
if transformer_args:
if 'FROM_DATA' in transformer_args:
transformer_args = {**transformer_args}
args = transformer_args.pop('FROM_DATA')
for arg, arg_func in args.items():
transformer_args[arg] = arg_func(input_data[TEST_COL])

transformer_args = _create_transformer_args_from_data(
transformer_args, input_data[TEST_COL]
)
field_transformers = {TEST_COL: transformer_class(**transformer_args)}

else:
Expand Down
42 changes: 31 additions & 11 deletions tests/unit/transformers/test_numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1889,7 +1889,18 @@ def test___init__(self):
assert ls.max_value == 100.0
assert ls.min_value == 2.0

def test__validate_logit_inputs(self):
def test___init___invalid_inputs(self):
"""Test super() arguments are properly passed and set as attributes."""
# Setup
min_value = 10.0
max_value = 10.0

# Run / Assert
expected_msg = 'The min_value and max_value for the logit function cannot be equal.'
with pytest.raises(TransformerInputError, match=re.escape(expected_msg)):
LogitScaler(max_value=max_value, min_value=min_value)

def test__validate_logit_inputs_with_default_settings(self):
"""Test validating data against input arguments."""
# Setup
ls = LogitScaler()
Expand All @@ -1898,6 +1909,15 @@ def test__validate_logit_inputs(self):
# Run and Assert
ls._validate_logit_inputs(data)

def test__validate_logit_inputs_with_custom_inputs(self):
"""Test validating data against input arguments."""
# Setup
ls = LogitScaler(min_value=0, max_value=100)
data = pd.Series([0.0, 10.1, 20.2, 30.3, 100])

# Run and Assert
ls._validate_logit_inputs(data)

def test__validate_logit_inputs_errors_invalid_value(self):
"""Test error message contains invalid values."""
# Setup
Expand Down Expand Up @@ -1944,7 +1964,7 @@ def test__fit(self):
def test__transform(self, mock_logit):
"""Test the ``transform`` method."""
# Setup
min_value = (1.0,)
min_value = 1.0
max_value = 50.0
ls = LogitScaler(min_value=min_value, max_value=max_value)
ls._validate_logit_inputs = Mock()
Expand All @@ -1965,7 +1985,7 @@ def test__transform(self, mock_logit):
def test__transform_multi_column(self, mock_logit):
"""Test the ``transform`` method with multiple columns."""
# Setup
min_value = (1.0,)
min_value = 1.0
max_value = 50.0
ls = LogitScaler(min_value=min_value, max_value=max_value)
ls._validate_logit_inputs = Mock()
Expand All @@ -1988,7 +2008,7 @@ def test__transform_multi_column(self, mock_logit):
def test__reverse_transform(self, mock_sigmoid, ff_reverse_transform_mock):
"""Test the ``transform`` method."""
# Setup
min_value = (1.0,)
min_value = 1.0
max_value = 50.0
ls = LogitScaler(min_value=min_value, max_value=max_value)
data = pd.Series([1.0, 1.1, 1.2, 1.3, 2.0, 3.0, 4.0])
Expand All @@ -1997,40 +2017,40 @@ def test__reverse_transform(self, mock_sigmoid, ff_reverse_transform_mock):
ls.null_transformer = null_transformer_mock

# Run
reversed = ls._reverse_transform(data)
reversed_values = ls._reverse_transform(data)

# Assert
mock_sigmoid_args = mock_sigmoid.call_args[0]
np.testing.assert_array_equal(mock_sigmoid_args[0], data.to_numpy())
assert mock_sigmoid_args[1] == ls.min_value
assert mock_sigmoid_args[2] == ls.max_value
ff_reverse_transform_mock.assert_called_once_with(mock_sigmoid.return_value)
assert reversed == ff_reverse_transform_mock.return_value
assert reversed_values == ff_reverse_transform_mock.return_value

@patch('rdt.transformers.numerical.FloatFormatter._reverse_transform')
@patch('rdt.transformers.numerical.sigmoid')
def test__reverse_transform_multi_column(self, mock_sigmoid, ff_reverse_transform_mock):
"""Test the ``transform`` method with multiple columns."""
# Setup
min_value = (1.0,)
min_value = 1.0
max_value = 50.0
ls = LogitScaler(min_value=min_value, max_value=max_value)
sampled_data = np.array([1.0, 1.1, 1.2, 1.3, 2.0, 3.0, 4.0])
is_null = np.array([0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0])
data = pd.DataFrame({'column': sampled_data, 'column.is_null': is_null})
null_transformer_mock = Mock()
reversed = np.array([1.0, 1.1, np.nan, np.nan, 2.0, np.nan, np.nan])
null_transformer_mock.reverse_transform.return_value = reversed
reversed_values = np.array([1.0, 1.1, np.nan, np.nan, 2.0, np.nan, np.nan])
null_transformer_mock.reverse_transform.return_value = reversed_values
ls.null_transformer = null_transformer_mock
sigmoid_vals = np.array([3.0, 3.1, 3.3, 3.4, 2.1, 4.0, 4.6])
mock_sigmoid.return_value = sigmoid_vals

# Run
reversed = ls._reverse_transform(data)
reversed_values = ls._reverse_transform(data)

# Assert
ff_reverse_transform_args = ff_reverse_transform_mock.call_args[0]
np.testing.assert_array_equal(
ff_reverse_transform_args[0], np.array([sigmoid_vals, is_null]).T
)
assert reversed == ff_reverse_transform_mock.return_value
assert reversed_values == ff_reverse_transform_mock.return_value

0 comments on commit fc272bf

Please sign in to comment.