-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodel_evaluations.py
280 lines (232 loc) · 12.3 KB
/
model_evaluations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
from main import neural_run
from omegaconf import OmegaConf, DictConfig
import ugle
import argparse
from ugle.logger import log
import pickle
from os.path import exists
from os import makedirs
import pickle
import re
def identify_type(s):
# Check for boolean
if s.lower() in ('true', 'false'):
return bool
elif s.isnumeric():
return int(s)
# Check for float
try:
float(s)
return float
except ValueError:
pass
# If none of the above, it's a string
return str
def run_study(study_override_cfg: DictConfig, algorithm: str, dataset: str, seeds: list, check_save):
"""
Runs a study of one algorithm on one dataset over multiple seeds.
Args:
study_override_cfg (DictConfig): study configuration object
algorithm (str): name of the algorithm to test on
dataset (str): name of the dataset to test on
seeds (list): list of seeds to test over
Returns:
average_results (Dict): the results for each metric averaged over the seeds
"""
study_cfg = study_override_cfg.copy()
average_results = ugle.utils.create_study_tracker(len(seeds), study_cfg.trainer.test_metrics)
if check_save is not None:
study_results = OmegaConf.create({'dataset': dataset,
'model': algorithm,
'average_results': {},
'results': check_save})
for seed_res in check_save:
for res in seed_res['study_output']:
study_cfg.trainer.hps_found_so_far.append(res['args'])
else:
study_results = OmegaConf.create({'dataset': dataset,
'model': algorithm,
'average_results': {},
'results': []})
study_cfg.previous_results = None
# repeat training over all seeds
for idx, seed in enumerate(seeds):
study_cfg.args.random_seed = seed
log.info(f'Study -- {algorithm}:{dataset}:Seed({seed})')
# test results stores the results of one algorithm run
if ugle.utils.is_neural(algorithm):
results = neural_run(override_model=algorithm,
override_dataset=dataset,
override_cfg=study_cfg)
# save study output
study_results.results.append({'seed': seed, 'study_output': results})
average_results = ugle.utils.collate_study_results(average_results, results, idx)
# use first seed hyperparameters and train/test on remaining
if idx == 0 and check_save is not None:
study_cfg.previous_results = results
# add all best hyperparameters if they don't exist to enque trial
if study_cfg.trainer.suggest_hps_from_previous_seed:
for res in results:
study_cfg.trainer.hps_found_so_far.append(res['args'])
if study_cfg.trainer.use_hps_on_all_seeds:
study_cfg.trainer.only_testing = True
if not exists(study_cfg.trainer.results_path):
makedirs(study_cfg.trainer.results_path)
save_path = f"{study_cfg.trainer.results_path}{dataset}_{algorithm}"
pickle.dump(study_results, open(f"{save_path}.pkl", "wb"))
# average results stores the average calculation of statistics
average_results = ugle.utils.calc_average_results(average_results)
study_results.average_results = average_results
# save the result of the study
if not exists(study_cfg.trainer.results_path):
makedirs(study_cfg.trainer.results_path)
save_path = f"{study_cfg.trainer.results_path}{dataset}_{algorithm}"
pickle.dump(study_results, open(f"{save_path}.pkl", "wb"))
return average_results
def run_experiment(exp_cfg_name: str,
dataset_algorithm_override: str=None,
gpu_override: str=None,
test_run: bool=False,
check_save: bool=False):
"""
Run experiments which consists of multiple models and datasets
Args:
exp_cfg_name: location of the yaml file containing experiment configuration
dataset_algorithm_override: dataset_algorithm combination which can be used to run a single experiment if datasets
and algorithms are both empty
"""
# load experiment config
log.info(f'loading experiment: {exp_cfg_name}')
exp_cfg = OmegaConf.load('ugle/configs/experiments/exp_cfg_template.yaml')
exp_cfg = ugle.utils.merge_yaml(exp_cfg, exp_cfg_name)
if dataset_algorithm_override:
exp_cfg.dataset_algo_combinations = [dataset_algorithm_override]
exp_cfg.datasets = []
exp_cfg.algorithms = []
if gpu_override:
exp_cfg.study_override_cfg.trainer.gpu = gpu_override
if test_run:
if not check_save :
exp_cfg.seeds = [42, 69]
exp_cfg.study_override_cfg.args.max_epoch = 1
exp_cfg.study_override_cfg.trainer.n_trials_hyperopt = 2
save_path = exp_cfg.study_override_cfg.trainer.results_path
if check_save is not None:
with open(f"./check_save/{check_save}", 'r') as content_file:
content = content_file.read()
trial_splits = content.split("Trial 249 finished")[1:-1]
dset = dataset_algorithm_override.split('_')[0]
algo = dataset_algorithm_override.split('_')[1]
loaded_results_dict = {'average_results': {},
'dataset': dset,
'model': algo,
'results': []}
for sidx, str_content in enumerate(trial_splits):
seed_dict = {'seed': exp_cfg.seeds[sidx], 'study_output': []}
for match in re.finditer("Best hyperparameters for metric", str_content):
start_metrics_idx = match.end() + 5
end_of_potential_metrics_idx = str_content[start_metrics_idx:].find("\x1b")
pot_metrics = str_content[start_metrics_idx:start_metrics_idx+end_of_potential_metrics_idx-1]
pot_metrics = pot_metrics.split(",")
pot_metrics = [metric[1:] if idx > 0 else metric for idx, metric in enumerate(pot_metrics)]
for valid_metric in exp_cfg.study_override_cfg.trainer.valid_metrics:
if valid_metric in pot_metrics:
study_out_dict = {'args': {},
'metrics': valid_metric,
'results': {},
'validation_results': {}}
metric_dict = {}
for test_metric in exp_cfg.study_override_cfg.trainer.test_metrics:
# find point where results start going metric=val
start_idx = str_content[start_metrics_idx:].find(f"{test_metric}=")
# find the end of the metric result
if test_metric != 'conductance':
stringtofind = ','
else:
stringtofind = "\x1b"
end_idx = str_content[start_metrics_idx+start_idx:].find(stringtofind)
end_idx = end_idx - len(test_metric) - 1
startincontent = start_metrics_idx+len(test_metric)+1+start_idx
metric_number = float(str_content[startincontent:startincontent+end_idx])
metric_dict[test_metric] = metric_number
all_args_stuff = str_content[start_metrics_idx+end_of_potential_metrics_idx-1:startincontent+end_idx]
args_dict = {}
for match in re.finditer(" : ", all_args_stuff):
start_arg = all_args_stuff[:match.start()].rfind(']')
arg = all_args_stuff[start_arg+2:match.start()]
end_value = all_args_stuff[match.start():].find("\x1b")
value = all_args_stuff[match.end():match.start() + end_value]
# chcek if value is float, bool, int or str
if identify_type(value) == float:
args_dict[str(arg)] = float(value)
elif identify_type(value) == bool:
args_dict[str(arg)] = bool(value)
elif identify_type(value) == int:
args_dict[str(arg)] = int(value)
elif identify_type(value) == str:
args_dict[str(arg)] = str(value)
study_out_dict['args'] = args_dict
study_out_dict['results'] = metric_dict
seed_dict['study_output'].append(study_out_dict)
loaded_results_dict['results'].append(seed_dict)
# creating experiment iterator
experiment_tracker = ugle.utils.create_experiment_tracker(exp_cfg)
experiments_cpu = []
if check_save:
# change the seeds in exp_cfg
exp_cfg.seeds = exp_cfg.seeds[sidx+1:]
check_save = loaded_results_dict['results']
else:
check_save = None
for experiment in experiment_tracker:
log.debug(f'starting new experiment ... ...')
log.debug(f'testing dataset: {experiment.dataset}')
log.debug(f'testing algorithm: {experiment.algorithm}')
try:
# run experiment
experiment_results = run_study(exp_cfg.study_override_cfg,
experiment.algorithm,
experiment.dataset,
exp_cfg.seeds,
check_save)
# save result in experiment tracker
experiment.results = experiment_results
ugle.utils.save_experiment_tracker(experiment_tracker, save_path)
# if breaks then tests on cpu
except Exception as e:
log.exception(str(e))
log.info(f"adding to cpu fallback test")
experiments_cpu.append(experiment)
# run all experiments that didn't work on gpu
if experiments_cpu and exp_cfg.run_cpu_fallback:
log.info(f'launching cpu fallback experiments')
exp_cfg.study_override_cfg.trainer.gpu = -1
for experiment in experiments_cpu:
log.debug(f'starting new experiment ...')
log.debug(f'testing dataset: {experiment.dataset}')
log.debug(f'testing algorithm: {experiment.algorithm}')
# run experiment
experiment_results = run_study(exp_cfg.study_override_cfg,
experiment.algorithm,
experiment.dataset,
exp_cfg.seeds)
# save result in experiment tracker
experiment.results = experiment_results
ugle.utils.save_experiment_tracker(experiment_tracker, save_path)
else:
if experiments_cpu:
log.info('The following combinations lead to OOM')
for experiment in experiments_cpu:
log.info(f'{experiment.dataset} : {experiment.algorithm}')
return
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='parsing the experiment to run')
parser.add_argument('-ec', '--experiment_config', type=str, required=True,
help='the location of the experiment config')
parser.add_argument('-da', '--dataset_algorithm_override', type=str, default=None,
help='dataset_algorithm override setting')
parser.add_argument('--gpu', type=str, default=None)
parser.add_argument('--test_run', action='store_true')
parser.add_argument('-cs', type=str, default=None)
parsed, unknown = parser.parse_known_args()
run_experiment(parsed.experiment_config, parsed.dataset_algorithm_override, parsed.gpu, parsed.test_run, parsed.cs)