diff --git a/causalml/metrics/visualize.py b/causalml/metrics/visualize.py index e6b9b226..12534adf 100644 --- a/causalml/metrics/visualize.py +++ b/causalml/metrics/visualize.py @@ -84,6 +84,10 @@ def get_cumlift( or treatment_effect_col in df.columns ) + assert not ( + (df[[outcome_col, treatment_col, treatment_effect_col]].isnull().values.any()) + ) + df = df.copy() np.random.seed(random_seed) random_cols = [] @@ -219,6 +223,10 @@ def get_qini( or treatment_effect_col in df.columns ) + assert not ( + (df[[outcome_col, treatment_col, treatment_effect_col]].isnull().values.any()) + ) + df = df.copy() np.random.seed(random_seed) random_cols = [] @@ -315,6 +323,8 @@ def get_tmlegain( or p_col in df.columns ) + assert not ((df[[outcome_col, treatment_col, p_col]].isnull().values.any())) + inference_col = [x for x in inference_col if x in df.columns] # Initialize TMLE @@ -421,6 +431,8 @@ def get_tmleqini( or p_col in df.columns ) + assert not ((df[[outcome_col, treatment_col, p_col]].isnull().values.any())) + inference_col = [x for x in inference_col if x in df.columns] # Initialize TMLE diff --git a/tests/test_cevae.py b/tests/test_cevae.py index 82762433..84a032f4 100644 --- a/tests/test_cevae.py +++ b/tests/test_cevae.py @@ -38,9 +38,7 @@ def test_CEVAE(): # check the accuracy of the ite accuracy ite = cevae.predict(X).flatten() - auuc_metrics = pd.DataFrame( - {"ite": ite, "W": treatment, "y": y, "treatment_effect_col": tau} - ) + auuc_metrics = pd.DataFrame({"ite": ite, "W": treatment, "y": y, "tau": tau}) cumgain = get_cumgain( auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau" diff --git a/tests/test_ivlearner.py b/tests/test_ivlearner.py index cbf25d43..dc9ad2f4 100644 --- a/tests/test_ivlearner.py +++ b/tests/test_ivlearner.py @@ -71,7 +71,7 @@ def test_drivlearner(): "cate_p": cate_p.flatten(), "W": treatment, "y": y, - "treatment_effect_col": tau, + "tau": tau, } ) diff --git a/tests/test_meta_learners.py b/tests/test_meta_learners.py index c07622ff..46a32eb5 100644 --- a/tests/test_meta_learners.py +++ b/tests/test_meta_learners.py @@ -123,7 +123,7 @@ def test_BaseSRegressor(generate_regression_data): "cate_p": cate_p.flatten(), "W": treatment, "y": y, - "treatment_effect_col": tau, + "tau": tau, } ) @@ -179,7 +179,7 @@ def test_BaseTLearner(generate_regression_data): "cate_p": cate_p.flatten(), "W": treatment, "y": y, - "treatment_effect_col": tau, + "tau": tau, } ) @@ -229,7 +229,7 @@ def test_BaseTRegressor(generate_regression_data): "cate_p": cate_p.flatten(), "W": treatment, "y": y, - "treatment_effect_col": tau, + "tau": tau, } ) @@ -268,7 +268,7 @@ def test_MLPTRegressor(generate_regression_data): "cate_p": cate_p.flatten(), "W": treatment, "y": y, - "treatment_effect_col": tau, + "tau": tau, } ) @@ -307,7 +307,7 @@ def test_XGBTRegressor(generate_regression_data): "cate_p": cate_p.flatten(), "W": treatment, "y": y, - "treatment_effect_col": tau, + "tau": tau, } ) @@ -346,7 +346,7 @@ def test_BaseXLearner(generate_regression_data): "cate_p": cate_p.flatten(), "W": treatment, "y": y, - "treatment_effect_col": tau, + "tau": tau, } ) @@ -398,7 +398,7 @@ def test_BaseXRegressor(generate_regression_data): "cate_p": cate_p.flatten(), "W": treatment, "y": y, - "treatment_effect_col": tau, + "tau": tau, } ) @@ -439,7 +439,7 @@ def test_BaseXLearner_without_p(generate_regression_data): "cate_p": cate_p.flatten(), "W": treatment, "y": y, - "treatment_effect_col": tau, + "tau": tau, } ) @@ -478,7 +478,7 @@ def test_BaseXRegressor_without_p(generate_regression_data): "cate_p": cate_p.flatten(), "W": treatment, "y": y, - "treatment_effect_col": tau, + "tau": tau, } ) @@ -517,7 +517,7 @@ def test_BaseRLearner(generate_regression_data): "cate_p": cate_p.flatten(), "W": treatment, "y": y, - "treatment_effect_col": tau, + "tau": tau, } ) @@ -568,7 +568,7 @@ def test_BaseRRegressor(generate_regression_data): "cate_p": cate_p.flatten(), "W": treatment, "y": y, - "treatment_effect_col": tau, + "tau": tau, } ) @@ -607,7 +607,7 @@ def test_BaseRLearner_without_p(generate_regression_data): "cate_p": cate_p.flatten(), "W": treatment, "y": y, - "treatment_effect_col": tau, + "tau": tau, } ) @@ -646,7 +646,7 @@ def test_BaseRRegressor_without_p(generate_regression_data): "cate_p": cate_p.flatten(), "W": treatment, "y": y, - "treatment_effect_col": tau, + "tau": tau, } ) @@ -698,7 +698,7 @@ def test_BaseSClassifier(generate_classification_data): "tau_pred": tau_pred.flatten(), "W": df_test["treatment_group_key"].values, CONVERSION: df_test[CONVERSION].values, - "treatment_effect_col": df_test["treatment_effect"].values, + "tau": df_test["treatment_effect"].values, } ) @@ -706,7 +706,7 @@ def test_BaseSClassifier(generate_classification_data): auuc_metrics, outcome_col=CONVERSION, treatment_col="W", - treatment_effect_col="treatment_effect_col", + treatment_effect_col="tau", ) # Check if the cumulative gain when using the model's prediction is @@ -742,7 +742,7 @@ def test_BaseTClassifier(generate_classification_data): "tau_pred": tau_pred.flatten(), "W": df_test["treatment_group_key"].values, CONVERSION: df_test[CONVERSION].values, - "treatment_effect_col": df_test["treatment_effect"].values, + "tau": df_test["treatment_effect"].values, } ) @@ -750,7 +750,7 @@ def test_BaseTClassifier(generate_classification_data): auuc_metrics, outcome_col=CONVERSION, treatment_col="W", - treatment_effect_col="treatment_effect_col", + treatment_effect_col="tau", ) # Check if the cumulative gain when using the model's prediction is @@ -812,7 +812,7 @@ def test_BaseXClassifier(generate_classification_data): "tau_pred": tau_pred.flatten(), "W": df_test["treatment_group_key"].values, CONVERSION: df_test[CONVERSION].values, - "treatment_effect_col": df_test["treatment_effect"].values, + "tau": df_test["treatment_effect"].values, } ) @@ -820,7 +820,7 @@ def test_BaseXClassifier(generate_classification_data): auuc_metrics, outcome_col=CONVERSION, treatment_col="W", - treatment_effect_col="treatment_effect_col", + treatment_effect_col="tau", ) # Check if the cumulative gain when using the model's prediction is @@ -861,7 +861,7 @@ def test_BaseRClassifier(generate_classification_data): "tau_pred": tau_pred.flatten(), "W": df_test["treatment_group_key"].values, CONVERSION: df_test[CONVERSION].values, - "treatment_effect_col": df_test["treatment_effect"].values, + "tau": df_test["treatment_effect"].values, } ) @@ -869,7 +869,7 @@ def test_BaseRClassifier(generate_classification_data): auuc_metrics, outcome_col=CONVERSION, treatment_col="W", - treatment_effect_col="treatment_effect_col", + treatment_effect_col="tau", ) # Check if the cumulative gain when using the model's prediction is @@ -912,7 +912,7 @@ def test_BaseRClassifier_with_sample_weights(generate_classification_data): "tau_pred": tau_pred.flatten(), "W": df_test["treatment_group_key"].values, CONVERSION: df_test[CONVERSION].values, - "treatment_effect_col": df_test["treatment_effect"].values, + "tau": df_test["treatment_effect"].values, } ) @@ -920,7 +920,7 @@ def test_BaseRClassifier_with_sample_weights(generate_classification_data): auuc_metrics, outcome_col=CONVERSION, treatment_col="W", - treatment_effect_col="treatment_effect_col", + treatment_effect_col="tau", ) # Check if the cumulative gain when using the model's prediction is @@ -1005,7 +1005,7 @@ def test_BaseDRLearner(generate_regression_data): "cate_p": cate_p.flatten(), "W": treatment, "y": y, - "treatment_effect_col": tau, + "tau": tau, } ) diff --git a/tests/test_visualize.py b/tests/test_visualize.py new file mode 100644 index 00000000..f3bc0669 --- /dev/null +++ b/tests/test_visualize.py @@ -0,0 +1,14 @@ +import pandas as pd +import numpy as np +import pytest +from causalml.metrics.visualize import get_cumlift + + +def test_visualize_get_cumlift_errors_on_nan(): + df = pd.DataFrame( + [[0, np.nan, 0.5], [1, np.nan, 0.1], [1, 1, 0.4], [0, 1, 0.3], [1, 1, 0.2]], + columns=["w", "y", "pred"], + ) + + with pytest.raises(Exception): + get_cumlift(df)