Skip to content

Commit

Permalink
difference with random date from col
Browse files Browse the repository at this point in the history
  • Loading branch information
dev-rinchin committed Dec 16, 2024
1 parent b5a2516 commit 1ddfbf7
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 30 deletions.
11 changes: 5 additions & 6 deletions lightautoml/transformers/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,10 @@ def datetime_check(dataset: LAMLDataset):
class TimeToNum(LAMLTransformer):
"""Basic conversion strategy, used in selection one-to-one transformers.
Datetime converted to difference
with basic_date (``basic_date == '2020-01-01'``).
Datetime converted to difference with random date from the corresponding column.
"""

basic_time = "2020-01-01"
basic_interval = "D"

_fname_prefix = "dtdiff"
Expand All @@ -84,9 +82,10 @@ def transform(self, dataset: DatetimeCompatible) -> NumpyDataset:
# transform
roles = NumericRole(np.float32)

new_arr = ((data - np.datetime64(self.basic_time)) / np.timedelta64(1, self.basic_interval)).values.astype(
np.float32
)
new_arr = ((data - data.iloc[0]) / np.timedelta64(1, self.basic_interval)).values.astype(np.float32)
# new_arr = data.apply(lambda x: ((x - x[0]) / np.timedelta64(1, self.basic_interval)), axis=1).values.astype(
# np.float32
# )

# create resulted
output = dataset.empty().to_numpy()
Expand Down
40 changes: 16 additions & 24 deletions scripts/experiments/run_tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,9 @@
RANDOM_STATE = 1234


def map_to_corect_order_of_classes(values, targets_order): # noqa D103
target_mapping = {n: x for (x, n) in enumerate(targets_order)}
mapped = list(map(target_mapping.get, values))

return mapped
def fix_labels(values, renamed_labels): # noqa D103
target_mapping = {n: x for (x, n) in enumerate(renamed_labels)}
return list(map(target_mapping.get, values))


def main(dataset_name: str, cpu_limit: int, memory_limit: int, save_model: bool): # noqa D103
Expand Down Expand Up @@ -64,8 +62,8 @@ def main(dataset_name: str, cpu_limit: int, memory_limit: int, save_model: bool)
else:
(is_test_unique_ok).all(), "Only one class present in test target."

assert train.isnull().values.any() is False, "train has nans in target."
assert test.isnull().values.any() is False, "test has nans in target."
assert train[target_name].isnull().values.any() is np.False_, "train has nans in target."
assert test[target_name].isnull().values.any() is np.False_, "test has nans in target."

task = Task(task_type)

Expand All @@ -75,10 +73,10 @@ def main(dataset_name: str, cpu_limit: int, memory_limit: int, save_model: bool)
task=task,
cpu_limit=cpu_limit,
memory_limit=memory_limit,
timeout=15 * 60,
# general_params={
# "use_algos": [["mlp"]]
# }, # ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn', 'node', 'autoint', 'fttransformer'] or custom torch model
timeout=2 * 60 * 60,
general_params={
"use_algos": [["nn", "mlp", "dense", "denselight", "resnet", "snn", "node", "autoint", "fttransformer"]]
}, # ['nn', 'mlp', 'dense', 'denselight', 'resnet', 'snn', 'node', 'autoint', 'fttransformer'] or custom torch model
# nn_params={"n_epochs": 10, "bs": 512, "num_workers": 0, "path_to_save": None, "freeze_defaults": True},
# nn_pipeline_params={"use_qnt": True, "use_te": False},
reader_params={
Expand Down Expand Up @@ -114,24 +112,18 @@ def main(dataset_name: str, cpu_limit: int, memory_limit: int, save_model: bool)
metric_ho = roc_auc_score(test[target_name].values, test_predictions.data[:, 0])

elif task_type == "multiclass":
not_nan = np.any(~np.isnan(oof_predictions.data), axis=1)
try:
metric_oof = log_loss(train[target_name].values[not_nan], oof_predictions.data[not_nan, :])
metric_oof = log_loss(train[target_name].values, oof_predictions.data)
metric_ho = log_loss(test[target_name], test_predictions.data)
except:
if np.unique(train[target_name].values[not_nan]).shape != np.unique(oof_predictions.data[not_nan, :]).shape:
raise ValueError("Vectors have different number of classes")
# Some datasets can have dtype=float of target,
# so we must map this target for correct log_loss calculating (if we didn't calсulate it in the try block)
# and this mapping must be in the correct order so we extract automl.targets_order and map values
y_true = map_to_corect_order_of_classes(
values=train[target_name].values[not_nan], targets_order=automl.targets_order
# so we must map labels for correct log_loss calculating (if we didn't calсulate it in the try block)
metric_oof = log_loss(
fix_labels(values=train[target_name].values, renamed_labels=automl.targets_order), oof_predictions.data
)
metric_ho = log_loss(
fix_labels(values=test[target_name], renamed_labels=automl.targets_order), test_predictions.data
)
metric_oof = log_loss(y_true, oof_predictions.data[not_nan, :])

y_true = map_to_corect_order_of_classes(values=test[target_name], targets_order=automl.targets_order)

metric_ho = log_loss(y_true, test_predictions.data)

elif task_type == "reg":
metric_oof = task.metric_func(train[target_name].values, oof_predictions.data[:, 0])
Expand Down

0 comments on commit 1ddfbf7

Please sign in to comment.