Skip to content

Commit

Permalink
Improvements for symbolic explanations plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
Sarah Krebs committed Dec 22, 2023
1 parent 6a47a7f commit 60580c5
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 73 deletions.
199 changes: 134 additions & 65 deletions deepcave/plugins/hyperparameter/symbolic_explanations.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from deepcave.utils.styled_plotty import get_color, get_hyperparameter_ticks, save_image
from deepcave.utils.symbolic_regression import convert_symb, get_function_set

GRID_POINTS_PER_AXIS = 20
SR_TRAIN_POINTS_PER_AXIS = 20
SAMPLES_PER_HP = 10
MAX_SAMPLES = 10000
MAX_SHOWN_SAMPLES = 100
Expand Down Expand Up @@ -88,63 +88,111 @@ def get_input_layout(register):
md=6,
),
],
className="mb-3",
),
dbc.Row(
[
dbc.Col(
[
dbc.Label("Parsimony Hyperparameter"),
help_button("Controls the complexity of the resulting formulas."),
dbc.Input(id=register("parsimony", type=float), type="float"),
html.Div(
[
dbc.Label("Parsimony coefficient"),
help_button(
"Penalizes the complexity of the resulting formulas."
),
dcc.Slider(
id=register("parsimony", "value", type=int),
marks=dict([i, str(10**i)] for i in range(-8, 1)),
min=-8,
max=0,
step=1,
updatemode="drag",
),
],
)
],
md=6,
),
)
],
className="mb-3",
),
]

@staticmethod
def get_filter_layout(register):
return [
dbc.Row(
html.Details(
[
dbc.Col(
html.Summary("Additional options for symbolic regression configuration"),
dbc.Row(
[
html.Div(
dbc.Col(
[
dbc.Label("Show confidence"),
help_button("Displays the confidence bands."),
dbc.Select(
id=register("show_confidence", ["value", "options"])
dbc.Label("Generations"),
help_button("The number of generations to evolve."),
dbc.Input(
id=register("generations", type=int),
type="number",
min=5,
step=5,
),
]
)
],
md=6,
),
dbc.Col(
[
dbc.Label("Population Size"),
help_button(
"The number of formulas competing in each generation."
),
dbc.Input(
id=register("population_size", type=int),
type="number",
min=1000,
step=1000,
),
],
md=6,
),
],
md=6,
className="mb-3",
style={"marginTop": "0.8em"},
),
dbc.Col(
dbc.Row(
[
html.Div(
dbc.Col(
[
dbc.Label("Show ICE curves"),
dbc.Label("Random seed"),
help_button(
"Displays the ICE curves from which the PDP curve is "
"derivied."
"The random seed to be used in the symbolic regression."
),
dbc.Select(id=register("show_ice", ["value", "options"])),
]
)
dbc.Input(
id=register("random_seed", type=int), type="number", min=0
),
],
md=6,
),
dbc.Col(
[
dbc.Label("Metric"),
help_button(
"The metric to evaluate the fitness of the formulas."
),
dbc.Select(id=register("metric", ["value", "options"])),
],
md=6,
),
],
md=6,
className="mb-3",
),
],
]
),
]

def load_inputs(self):
return {
"parsimony": {"value": "0.0001"},
# "show_ice": {"options": get_select_options(binary=True), "value": "true"},
"parsimony": {"value": "-4"},
"generations": {"value": "10"},
"population_size": {"value": "5000"},
"random_seed": {"value": "0"},
"metric": {
"options": get_select_options(values=["rmse", "mse", "mean absolute error"]),
"value": "rmse",
},
}

def load_dependency_inputs(self, run, previous_inputs, inputs):
Expand All @@ -167,7 +215,6 @@ def load_dependency_inputs(self, run, previous_inputs, inputs):
objective_value = inputs["objective_id"]["value"]
budget_value = inputs["budget_id"]["value"]
hp1_value = inputs["hyperparameter_name_1"]["value"]
parsimony = inputs["parsimony"]["value"]

if objective_value is None:
objective_value = objective_ids[0]
Expand All @@ -184,7 +231,6 @@ def load_dependency_inputs(self, run, previous_inputs, inputs):
"hyperparameter_name_2": {
"options": get_checklist_options([None] + hp_names),
},
"parsimony": {"value": inputs["parsimony"]["value"]},
}

@staticmethod
Expand All @@ -195,7 +241,11 @@ def process(run, inputs):
budget = run.get_budget(inputs["budget_id"])
hp1 = inputs["hyperparameter_name_1"]
hp2 = inputs["hyperparameter_name_2"]
parsimony = inputs["parsimony"]
parsimony = 10 ** inputs["parsimony"]
generations = inputs["generations"]
population_size = inputs["population_size"]
random_seed = inputs["random_seed"]
metric = inputs["metric"]

if objective is None:
raise RuntimeError("Objective not found.")
Expand All @@ -217,37 +267,56 @@ def process(run, inputs):

# Prepare the hyperparameters
selected_hyperparameters = [hp1]
idx1 = run.configspace.get_idx_by_hyperparameter_name(hp1)
idxs = [idx1]
if hp2 is not None and hp2 != "":
selected_hyperparameters += [hp2]
idx2 = run.configspace.get_idx_by_hyperparameter_name(hp2)
idxs += [idx2]

if len(selected_hyperparameters) < len(hp_names):
num_samples = SAMPLES_PER_HP * len(X)
# We limit the samples to max 10k
if num_samples > MAX_SAMPLES:
num_samples = MAX_SAMPLES

# And finally call PDP
pdp = PDP.from_random_points(
surrogate_model,
selected_hyperparameter=selected_hyperparameters,
seed=0,
num_grid_points_per_axis=SR_TRAIN_POINTS_PER_AXIS,
num_samples=num_samples,
)

num_samples = SAMPLES_PER_HP * len(X)
# We limit the samples to max 10k
if num_samples > MAX_SAMPLES:
num_samples = MAX_SAMPLES

# And finally call PDP
pdp = PDP.from_random_points(
surrogate_model,
selected_hyperparameter=selected_hyperparameters,
seed=0,
num_grid_points_per_axis=GRID_POINTS_PER_AXIS,
num_samples=num_samples,
)
x = pdp.x_pdp[:, idxs].tolist()
y = pdp.y_pdp.tolist()

x = pdp.x_pdp.tolist()
y = pdp.y_pdp.tolist()
else:
cs = surrogate_model.config_space
random_samples = np.asarray(
[
config.get_array()
for config in cs.sample_configuration(
SR_TRAIN_POINTS_PER_AXIS ** len(selected_hyperparameters)
)
]
)
x = random_samples.tolist()
y = surrogate_model.predict(random_samples)[0]

symb_params = dict(
population_size=5000,
generations=20,
population_size=population_size,
generations=generations,
function_set=get_function_set(),
metric="rmse",
metric=metric,
parsimony_coefficient=parsimony,
random_state=random_seed,
verbose=1,
)

# run SR on samples
symb_model = SymbolicRegressor(**symb_params, random_state=0)
symb_model = SymbolicRegressor(**symb_params)
symb_model.fit(x, y)

def handler(signo, frame):
Expand All @@ -257,7 +326,9 @@ def handler(signo, frame):
signal.alarm(6) # seconds
while True:
try:
conv_expr = convert_symb(symb_model, n_dim=len(X), n_decimals=3)
conv_expr = convert_symb(
symb_model, n_decimals=3, hp_names=selected_hyperparameters
)
except:
conv_expr = (
"The conversion of the expression failed. Please try another seed or increase "
Expand All @@ -276,14 +347,11 @@ def get_output_layout(register):
def load_outputs(run, inputs, outputs):
# Parse inputs
hp1_name = inputs["hyperparameter_name_1"]
hp1_idx = run.configspace.get_idx_by_hyperparameter_name(hp1_name)
hp1 = run.configspace.get_hyperparameter(hp1_name)

hp2_name = inputs["hyperparameter_name_2"]
hp2_idx = None
hp2 = None
if hp2_name is not None and hp2_name != "":
hp2_idx = run.configspace.get_idx_by_hyperparameter_name(hp2_name)
hp2 = run.configspace.get_hyperparameter(hp2_name)

objective = run.get_objective(inputs["objective_id"])
Expand All @@ -295,10 +363,10 @@ def load_outputs(run, inputs, outputs):
expr = outputs["expr"]

traces = []
if hp2_idx is None: # 1D
if hp2 is None: # 1D
traces += [
go.Scatter(
x=x[:, hp1_idx],
x=x[:, 0],
y=y,
line=dict(color=get_color(0, 1)),
hoverinfo="skip",
Expand All @@ -317,15 +385,16 @@ def load_outputs(run, inputs, outputs):
"yaxis": {
"title": objective_name,
},
"title": f"{objective.name} = {expr}",
}
)
else:
z = y
traces += [
go.Contour(
z=z,
x=x[:, hp1_idx],
y=x[:, hp2_idx],
x=x[:, 0],
y=x[:, 1],
colorbar=dict(
title=objective_name,
),
Expand All @@ -341,11 +410,11 @@ def load_outputs(run, inputs, outputs):
xaxis=dict(tickvals=x_tickvals, ticktext=x_ticktext, title=hp1_name),
yaxis=dict(tickvals=y_tickvals, ticktext=y_ticktext, title=hp2_name),
margin=config.FIGURE_MARGIN,
title=expr,
title=f"{objective.name} = {expr}",
)
)

figure = go.Figure(data=traces, layout=layout)
save_image(figure, "pdp.pdf")
save_image(figure, "symbolic_explanation.pdf")

return figure
23 changes: 15 additions & 8 deletions deepcave/utils/symbolic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def get_function_set():
return function_set


def convert_symb(symb, n_dim: int = None, n_decimals: int = None) -> sympy.core.expr:
def convert_symb(symb, n_decimals: int = None, hp_names: list = None) -> sympy.core.expr:
"""
Convert a fitted symbolic regression to a simplified and potentially rounded mathematical expression.
Warning: eval is used in this function, thus it should not be used on unsanitized input (see
Expand All @@ -32,8 +32,8 @@ def convert_symb(symb, n_dim: int = None, n_decimals: int = None) -> sympy.core.
Parameters
----------
symb: Fitted symbolic regressor to find a simplified expression for.
n_dim: Number of input dimensions. If input has only a single dimension, X0 in expression is exchanged by x.
n_decimals: If set, round floats in the expression to this number of decimals.
hp_names: If set, replace X0 and X1 in the expression by the names given.
Returns
-------
Expand Down Expand Up @@ -78,12 +78,19 @@ def convert_symb(symb, n_dim: int = None, n_decimals: int = None) -> sympy.core.
return symb_str

symb_conv = sympy.sympify(symb_str.replace("[", "").replace("]", ""), locals=converter)
if n_dim == 1:
x, X0 = sympy.symbols("x X0")
symb_conv = symb_conv.subs(X0, x)
if n_dim == 2:
X0, X1 = sympy.symbols("X0 X1", real=True)
symb_conv = symb_conv.subs(X0, X1)
if hp_names is not None:
if len(hp_names) == 1:
X0, hp0 = sympy.symbols(f"X0 {hp_names[0]}")
symb_conv = symb_conv.subs(X0, hp0)
elif len(hp_names) == 2:
X0, hp0, X1, hp1 = sympy.symbols(f"X0 {hp_names[0]} X1 {hp_names[1]}")
symb_conv = symb_conv.subs(X0, hp0)
symb_conv = symb_conv.subs(X1, hp1)
else:
raise ValueError(
f"Numer of hyperparameters to be explained by symbolic explanations must not "
f"be larger than 2"
)

logger.debug("Start to simplify the expression with Sympy.")
symb_simpl = sympy.simplify(symb_conv)
Expand Down

0 comments on commit 60580c5

Please sign in to comment.