Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

199 updated smac3 dataframe converter #200

Merged
merged 2 commits into from
Dec 13, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
212 changes: 143 additions & 69 deletions deepcave/runs/converters/smac3v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
- SMAC3v2Run: Define a SMAC3v2 run object.
"""

from typing import Union
from typing import Dict, List, Optional, Union

import json
import os
Expand Down Expand Up @@ -145,79 +145,153 @@ def from_path(cls, path: Union[Path, str]) -> "SMAC3v2Run":
config_origins = all_data["config_origins"]
configs = all_data["configs"]

instance_ids = []
instance_ids: List[int] = []

first_starttime = None
for (
config_id,
instance_id,
seed,
budget,
cost,
time,
status,
starttime,
endtime,
additional_info,
) in data:
if instance_id not in instance_ids:
instance_ids += [instance_id]

if len(instance_ids) > 1:
raise RuntimeError("Instances are not supported.")

config_id = str(config_id)
config = configs[config_id]

if first_starttime is None:
first_starttime = starttime

starttime = starttime - first_starttime
endtime = endtime - first_starttime

if status == 0:
# still running
continue
elif status == 1:
status = Status.SUCCESS
elif status == 3:
status = Status.TIMEOUT
elif status == 4:
status = Status.MEMORYOUT
else:
status = Status.CRASHED

if status != Status.SUCCESS:
# Costs which failed, should not be included
cost = [None] * len(cost) if isinstance(cost, list) else None
time = None
else:
time = endtime - starttime

# Round budget
if budget:
budget = np.round(budget, 2)
else:
budget = 0.0

origin = None
if config_id in config_origins:
origin = config_origins[config_id]

run.add(
costs=cost + [time] if isinstance(cost, list) else [cost, time],
config=config,
budget=budget,
seed=seed,
start_time=starttime,
end_time=endtime,
status=status,
origin=origin,
additional=additional_info,
)

if isinstance(data, list):
import warnings

warnings.warn(
"The runhistory.json file is in an outdated format.",
DeprecationWarning,
stacklevel=2, # Adjusts the stack level to point to the caller.
)
for (
config_id,
instance_id,
seed,
budget,
cost,
time,
status,
starttime,
endtime,
additional_info,
) in data:
run_dict = run._process_data_entry(
str(config_id),
instance_id,
seed,
budget,
cost,
time,
status,
starttime,
endtime,
additional_info,
first_starttime,
instance_ids,
configs,
config_origins,
)
if run_dict is not None:
run.add(**run_dict)
elif isinstance(data, dict):
for config_id, config_data in data.items():
instance_id = config_data["instance"]
seed = config_data["seed"]
budget = config_data["budget"]
cost = config_data["cost"]
time = config_data["time"]
status = config_data["status"]
starttime = config_data["starttime"]
endtime = config_data["endtime"]
additional_info = config_data["additional_info"]
run_dict = run._process_data_entry(
config_id,
instance_id,
seed,
budget,
cost,
time,
status,
starttime,
endtime,
additional_info,
first_starttime,
instance_ids,
configs,
config_origins,
)
if run_dict is not None:
run.add(**run_dict)
else:
raise RuntimeError("Data in runhistory.json is not in a valid format.")
return run

def _process_data_entry(
self,
config_id: str,
instance_id: int,
seed: int,
budget: Optional[float],
cost: Optional[Union[List[Union[float, None]], float]],
time: Optional[float],
status: int,
starttime: float,
endtime: float,
additional_info: Optional[Dict],
first_starttime: Optional[float],
instance_ids: List[int],
configs: Dict,
config_origins: Dict[str, str],
) -> Optional[Dict]:
if instance_id not in instance_ids:
instance_ids += [instance_id]

if len(instance_ids) > 1:
raise RuntimeError("Instances are not supported.")

config = configs[config_id]

if first_starttime is None:
first_starttime = starttime

starttime = starttime - first_starttime
endtime = endtime - first_starttime

if status == 0:
# still running
return None
elif status == 1:
status = Status.SUCCESS
elif status == 3:
status = Status.TIMEOUT
elif status == 4:
status = Status.MEMORYOUT
else:
status = Status.CRASHED

if status != Status.SUCCESS:
# Costs which failed, should not be included
cost = [None] * len(cost) if isinstance(cost, list) else None
time = None
else:
time = endtime - starttime

# Round budget
if budget:
budget = np.round(budget, 2)
else:
budget = 0.0

origin = None
if config_id in config_origins:
origin = config_origins[config_id]

return {
"costs": cost + [time] if isinstance(cost, list) else [cost, time],
"config": config,
"budget": budget,
"seed": seed,
"start_time": starttime,
"end_time": endtime,
"status": status,
"origin": origin,
"additional": additional_info,
}

@classmethod
def is_valid_run(cls, path_name: str) -> bool:
"""
Expand Down
Loading