diff --git a/.gitignore b/.gitignore index a1da87e..8894f08 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ __pycache__/ # data & pretrain-model backup/ +AM/ data/ pretrained/ diff --git a/POMO/TSP/TSPTester.py b/POMO/TSP/TSPTester.py index 1bc8ffe..0ddb717 100644 --- a/POMO/TSP/TSPTester.py +++ b/POMO/TSP/TSPTester.py @@ -219,11 +219,11 @@ def _fine_tune_one_batch(self, fine_tune_data): prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) # Loss - aug_reward = reward.reshape(aug_factor, batch_size, self.env.pomo_size).permute(1, 0, 2).view(batch_size, -1) + aug_reward = reward.reshape(aug_factor, batch_size, self.env.pomo_size).permute(1, 0, 2).reshape(batch_size, -1) # shape: (batch, augmentation * pomo) advantage = aug_reward - aug_reward.float().mean(dim=1, keepdims=True) # shape: (batch, augmentation * pomo) - log_prob = prob_list.log().sum(dim=2).reshape(aug_factor, batch_size, self.env.pomo_size).permute(1, 0, 2).view(batch_size, -1) + log_prob = prob_list.log().sum(dim=2).reshape(aug_factor, batch_size, self.env.pomo_size).permute(1, 0, 2).reshape(batch_size, -1) # size = (batch, augmentation * pomo) loss = -advantage * log_prob # Minus Sign: To Increase REWARD # shape: (batch, augmentation * pomo) diff --git a/POMO/TSP/TSPTrainer_Meta.py b/POMO/TSP/TSPTrainer_Meta.py index b30eb5a..6a0119a 100644 --- a/POMO/TSP/TSPTrainer_Meta.py +++ b/POMO/TSP/TSPTrainer_Meta.py @@ -61,10 +61,15 @@ def __init__(self, self.meta_optimizer = Optimizer(self.meta_model.parameters(), **self.optimizer_params['optimizer']) self.alpha = self.meta_params['alpha'] # for reptile self.task_set = generate_task_set(self.meta_params) - self.min_n, self.max_n, self.task_interval = self.task_set[0][0], self.task_set[-1][0], 5 # [20, 150] / [0, 100] - # self.task_w = {start: 1/(len(self.task_set)//5) for start in range(self.min_n, self.max_n, self.task_interval)} - self.task_w = torch.full((len(self.task_set)//self.task_interval,), 1/(len(self.task_set)//self.task_interval)) - self.ema_est = {i[0]: 1 for i in self.task_set} + if self.meta_params["data_type"] == "size": + self.min_n, self.max_n, self.task_interval = self.task_set[0][0], self.task_set[-1][0], 5 # [20, 150] / [0, 100] + # self.task_w = {start: 1/(len(self.task_set)//5) for start in range(self.min_n, self.max_n, self.task_interval)} + self.task_w = torch.full((len(self.task_set)//self.task_interval,), 1/(len(self.task_set)//self.task_interval)) + # self.ema_est = {i[0]: 1 for i in self.task_set} + elif self.meta_params["data_type"] == "distribution": + self.task_w = torch.full((len(self.task_set),), 1 / len(self.task_set)) + else: + raise NotImplementedError # Restore self.start_epoch = 1 @@ -83,7 +88,7 @@ def __init__(self, def run(self): - start_time = time.time() + start_time, best_mean = time.time(), 1000 self.time_estimator.reset(self.start_epoch) for epoch in range(self.start_epoch, self.meta_params['epochs']+1): self.logger.info('=================================================================') @@ -92,21 +97,34 @@ def run(self): train_score, train_loss = self._train_one_epoch(epoch) self.result_log.append('train_score', epoch, train_score) self.result_log.append('train_loss', epoch, train_loss) + model_save_interval = self.trainer_params['logging']['model_save_interval'] + img_save_interval = self.trainer_params['logging']['img_save_interval'] # Val dir, no_aug_score_list = "../../data/TSP/", [] if self.meta_params["data_type"] == "size": paths = ["tsp50_uniform.pkl", "tsp100_uniform.pkl", "tsp200_uniform.pkl"] elif self.meta_params["data_type"] == "distribution": - paths = ["tsp100_uniform.pkl", "tsp100_cluster.pkl", "tsp100_diagonal.pkl"] + paths = ["tsp100_uniform.pkl", "tsp100_gaussian.pkl", "tsp100_cluster.pkl", "tsp100_diagonal.pkl", "tsp100_tsplib.pkl"] elif self.meta_params["data_type"] == "size_distribution": pass - for val_path in paths: - if self.meta_params['meta_method'] in ['fomaml', 'reptile']: - no_aug_score = self._fast_val(copy.deepcopy(self.meta_model), path=os.path.join(dir, val_path), val_episodes=64, mode="eval") - else: + if epoch <= 1 or (epoch % img_save_interval) == 0: + for val_path in paths: no_aug_score = self._fast_val(self.meta_model, path=os.path.join(dir, val_path), val_episodes=64, mode="eval") - no_aug_score_list.append(round(no_aug_score, 4)) - self.result_log.append('val_score', epoch, no_aug_score_list) + no_aug_score_list.append(round(no_aug_score, 4)) + self.result_log.append('val_score', epoch, no_aug_score_list) + cur_mean = sum(no_aug_score_list) / len(no_aug_score_list) + # save best checkpoint + if cur_mean < best_mean: + best_mean = cur_mean + self.best_meta_model = copy.deepcopy(self.meta_model) + self.logger.info("Saving (best) trained_model") + checkpoint_dict = { + 'epoch': epoch, + 'model_state_dict': self.meta_model.state_dict(), + 'optimizer_state_dict': self.meta_optimizer.state_dict(), + 'result_log': self.result_log.get_raw_data() + } + torch.save(checkpoint_dict, '{}/best_checkpoint.pt'.format(self.result_folder)) # Logs & Checkpoint elapsed_time_str, remain_time_str = self.time_estimator.get_est_string(epoch, self.meta_params['epochs']) @@ -117,10 +135,8 @@ def run(self): all_done = (epoch == self.meta_params['epochs']) else: all_done = (time.time() - start_time) >= self.trainer_params['time_limit'] - model_save_interval = self.trainer_params['logging']['model_save_interval'] - img_save_interval = self.trainer_params['logging']['img_save_interval'] - if epoch > 1: # save latest images, every epoch + if epoch > 1 and (epoch % img_save_interval) == 0: # save latest images, every X epoch self.logger.info("Saving log_image") image_prefix = '{}/latest'.format(self.result_folder) util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['train_score']) @@ -138,11 +154,11 @@ def run(self): } torch.save(checkpoint_dict, '{}/checkpoint-{}.pt'.format(self.result_folder, epoch)) - if all_done or (epoch % img_save_interval) == 0: - image_prefix = '{}/img/checkpoint-{}'.format(self.result_folder, epoch) - util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['train_score']) - util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['val_score']) - util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'], self.result_log, labels=['train_loss']) + # if all_done or (epoch % img_save_interval) == 0: + # image_prefix = '{}/img/checkpoint-{}'.format(self.result_folder, epoch) + # util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['train_score']) + # util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['val_score']) + # util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'], self.result_log, labels=['train_loss']) if all_done: self.logger.info(" *** Training Done *** ") @@ -172,11 +188,11 @@ def _train_one_epoch(self, epoch): end = min(start + 10, self.max_n) # 10 is the size of the sliding window if self.meta_params["curriculum"]: print(">> training task {}".format((start, end))) elif self.meta_params["data_type"] == "distribution": - # Every X iters, evaluating 50 instances for each interval (e.g., [1, 6) / [6, 11) / ...) using LKH3 if epoch != 0 and epoch % self.meta_params['update_weight'] == 0: - self._update_task_weight() - start = torch.multinomial(self.task_w, 1).item() * self.task_interval - end = min(start + self.task_interval, self.max_n) + self._update_task_weight(epoch) + # start = int(min(epoch/self.meta_params["epochs"], 1) * (len(self.task_w)-1)) # linear + # start = int(1/2 * (1-math.cos(math.pi * min(epoch/self.meta_params['epochs'], 1))) * (len(self.task_w)-1)) # cosine + # end = min(start + 5, len(self.task_w)) elif self.meta_params["data_type"] == "size_distribution": pass @@ -189,7 +205,12 @@ def _train_one_epoch(self, epoch): task_params = random.sample(range(start, end+1), 1) if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 100 else self.meta_params['meta_batch_size'] // 2 elif self.meta_params["data_type"] == "distribution": - task_params = random.sample(range(start, end+1), 1) if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] + # sample based on task weights + task_params = self.task_set[torch.multinomial(self.task_w, 1).item()] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] + # task_params = self.task_set[random.sample(torch.topk(self.task_w, 10)[1].tolist(), 1)[0]] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] + # curri: from easy task (small gaps) -> hard task (large gaps) + # selected_idx = torch.sort(self.task_w, descending=False)[1].tolist()[start: end] + # task_params = self.task_set[random.sample(selected_idx, 1)[0]] if self.meta_params['curriculum'] and epoch >= self.meta_params['update_weight'] else random.sample(self.task_set, 1)[0] batch_size = self.meta_params['meta_batch_size'] elif self.meta_params["data_type"] == "size_distribution": pass @@ -381,14 +402,17 @@ def _fast_val(self, model, data=None, path=None, val_episodes=32, mode="eval", r # shape: (batch, pomo) state, reward, done = env.step(selected) prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) - # Loss + + """ if self.meta_params['bootstrap_steps'] != 0: - bootstrap_reward = self._bootstrap(fast_weight, data) + bootstrap_reward = self._bootstrap(fast_weight, data, mode="maml") advantage = reward - bootstrap_reward else: # self.ema_est[data.size(1)] = 0.99 * self.ema_est[data.size(1)] + (1 - 0.99) * (-reward.float().mean().item()) if self.ema_est[data.size(1)] != 1 else -reward.float().mean().item() # reward = reward / self.ema_est[data.size(1)] advantage = reward - reward.float().mean(dim=1, keepdims=True) + """ + advantage = reward - reward.float().mean(dim=1, keepdims=True) log_prob = prob_list.log().sum(dim=2) # for the first/last node, p=1 -> log_p=0 loss = -advantage * log_prob # Minus Sign: To Increase REWARD loss_mean = loss.mean() @@ -411,59 +435,73 @@ def _fast_val(self, model, data=None, path=None, val_episodes=32, mode="eval", r else: return loss_mean - def _bootstrap(self, fast_weight, data): + def _bootstrap(self, fast_weight, data, mode="eval"): """ - Bootstrap using smaller lr; - Only support for MAML now. + mode = "maml": Ref to "Bootstrap Meta-Learning", ICLR 2022; + mode = "eval": Used to update task weights. """ bootstrap_weight = fast_weight - batch_size = data.size(0) + batch_size, aug_factor = data.size(0), 8 bootstrap_reward = torch.full((batch_size, 1), float("-inf")) + if mode == "eval": + optimizer = Optimizer(bootstrap_weight.parameters(), **self.optimizer_params['optimizer']) + # optimizer.load_state_dict(self.meta_optimizer.state_dict()) with torch.enable_grad(): for L in range(self.meta_params['bootstrap_steps']): env = Env(**{'problem_size': data.size(1), 'pomo_size': data.size(1)}) - env.load_problems(batch_size, problems=data, aug_factor=1) + env.load_problems(batch_size, problems=data, aug_factor=aug_factor) reset_state, _, _ = env.reset() - self.meta_model.pre_forward(reset_state, weights=bootstrap_weight) - prob_list = torch.zeros(size=(batch_size, env.pomo_size, 0)) + if mode == "maml": + self.meta_model.pre_forward(reset_state, weights=bootstrap_weight) + elif mode == "eval": + bootstrap_weight.pre_forward(reset_state) + prob_list = torch.zeros(size=(aug_factor * batch_size, env.pomo_size, 0)) state, reward, done = env.pre_step() while not done: - selected, prob = self.meta_model(state, weights=bootstrap_weight) - state, reward, done = env.step(selected) + if mode == "maml": + selected, prob = self.meta_model(state, weights=bootstrap_weight) + elif mode == "eval": + selected, prob = bootstrap_weight(state) + state, reward, done = env.step(selected) # (aug_factor * batch_size, pomo_size) prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) + # (batch, augmentation * pomo) + reward = reward.reshape(aug_factor, batch_size, env.pomo_size).permute(1, 0, 2).reshape(batch_size, -1) advantage = reward - reward.float().mean(dim=1, keepdims=True) - log_prob = prob_list.log().sum(dim=2) + log_prob = prob_list.log().sum(dim=2).reshape(aug_factor, batch_size, env.pomo_size).permute(1, 0, 2).reshape(batch_size, -1) loss = -advantage * log_prob loss_mean = loss.mean() - gradients = torch.autograd.grad(loss_mean, bootstrap_weight.values(), create_graph=False) - bootstrap_weight = OrderedDict( - (name, param - self.optimizer_params['optimizer']['lr'] * grad) - for ((name, param), grad) in zip(bootstrap_weight.items(), gradients) - ) + if mode == "maml": + # TODO: need to update + gradients = torch.autograd.grad(loss_mean, bootstrap_weight.values(), create_graph=False) + bootstrap_weight = OrderedDict( + (name, param - self.optimizer_params['optimizer']['lr'] * grad) + for ((name, param), grad) in zip(bootstrap_weight.items(), gradients) + ) + elif mode == "eval": + optimizer.zero_grad() + loss_mean.backward() + optimizer.step() max_pomo_reward, _ = reward.max(dim=1) max_pomo_reward = max_pomo_reward.view(-1, 1) - bootstrap_reward = torch.where(max_pomo_reward > bootstrap_reward, max_pomo_reward, bootstrap_reward) - score_mean, bootstrap_mean = -max_pomo_reward.float().mean(), -bootstrap_reward.float().mean() - print("Bootstrap step {}: score_mean {}, bootstrap_mean {}".format(L, score_mean, bootstrap_mean)) + bootstrap_reward = torch.where(max_pomo_reward > bootstrap_reward, max_pomo_reward, bootstrap_reward) # (batch_size, 1) return bootstrap_reward + def _get_data(self, batch_size, task_params): if self.meta_params['data_type'] == 'distribution': - assert len(task_params) == 1 - data = get_random_problems(batch_size, self.env_params['problem_size'], num_modes=0, cdist=0, distribution='uniform') - data = self._generate_x_adv(data, eps=task_params[0]) + assert len(task_params) == 2 + data = get_random_problems(batch_size, self.env_params['problem_size'], num_modes=task_params[0], cdist=task_params[1], distribution='gaussian_mixture') elif self.meta_params['data_type'] == 'size': assert len(task_params) == 1 data = get_random_problems(batch_size, task_params[0], num_modes=0, cdist=0, distribution='uniform') elif self.meta_params['data_type'] == "size_distribution": - assert len(task_params) == 2 - data = get_random_problems(batch_size, task_params[0], num_modes=0, cdist=0, distribution='uniform') - data = self._generate_x_adv(data, eps=task_params[1]) + assert len(task_params) == 3 + data = get_random_problems(batch_size, task_params[0], num_modes=task_params[1], cdist=task_params[2], distribution='gaussian_mixture') else: raise NotImplementedError @@ -473,16 +511,17 @@ def _get_val_data(self, batch_size, task_params): if self.meta_params["data_type"] == "size": start1, end1 = min(task_params[0] + 10, self.max_n), min(task_params[0] + 20, self.max_n) val_size = random.sample(range(start1, end1 + 1), 1)[0] + val_data = self._get_data(batch_size, (val_size,)) elif self.meta_params["data_type"] == "distribution": - val_size = task_params[0] + val_data = self._get_data(batch_size, task_params) elif self.meta_params["data_type"] == "size_distribution": pass - - val_data = self._get_data(batch_size, (val_size,)) + else: + raise NotImplementedError return val_data - def _alpha_scheduler(self, iter): + def _alpha_scheduler(self, epoch): """ Update param for Reptile. """ @@ -532,33 +571,56 @@ def minmax(xy_): return data - def _update_task_weight(self): + def _update_task_weight(self, epoch): """ Update the weights of tasks. """ - gap = torch.zeros(len(self.task_set)//self.task_interval) + start_t, gap = time.time(), torch.zeros(self.task_w.size(0)) for i in range(gap.size(0)): - start = i * self.task_interval - end = min(start + self.task_interval, self.max_n) - selected = random.sample([j for j in range(start, end+1)], 1)[0] - data = self._get_data(batch_size=50, task_params=(selected, )) + if self.meta_params["data_type"] == "size": + start = i * self.task_interval + end = min(start + self.task_interval, self.max_n) + selected = random.sample([j for j in range(start, end+1)], 1)[0] + data = self._get_data(batch_size=50, task_params=(selected, )) + elif self.meta_params["data_type"] == "distribution": + selected = self.task_set[i] + data = self._get_data(batch_size=50, task_params=selected) + else: + raise NotImplementedError model_score = self._fast_val(self.meta_model, data=data, mode="eval", return_all=True) model_score = model_score.tolist() - # get results from LKH3 (~14s) - # start_t = time.time() - opts = argparse.ArgumentParser() - opts.cpus, opts.n, opts.progress_bar_mininterval = None, None, 0.1 - dataset = [(instance.cpu().numpy(),) for instance in data] - executable = get_lkh_executable() - global run_func - def run_func(args): - return solve_lkh_log(executable, *args, runs=1, disable_cache=True) # otherwise it directly loads data from dir - results, _ = run_all_in_pool(run_func, "./LKH3_result", dataset, opts, use_multiprocessing=False) - gap_list = [(model_score[j]-results[j][0])/results[j][0]*100 for j in range(len(results))] - gap[i] = sum(gap_list)/len(gap_list) - # print(">> LKH3 finished within {}s".format(time.time()-start_t)) - print(gap) + if self.meta_params["solver"] == "lkh3": + # get results from LKH3 (~14s) + opts = argparse.ArgumentParser() + opts.cpus, opts.n, opts.progress_bar_mininterval = None, None, 0.1 + dataset = [(instance.cpu().numpy(),) for instance in data] + executable = get_lkh_executable() + global run_func + def run_func(args): + return solve_lkh_log(executable, *args, runs=1, disable_cache=True) # otherwise it directly loads data from dir + results, _ = run_all_in_pool(run_func, "./LKH3_result", dataset, opts, use_multiprocessing=False) + gap_list = [(model_score[j]-results[j][0])/results[j][0]*100 for j in range(len(results))] + gap[i] = sum(gap_list)/len(gap_list) + elif self.meta_params["solver"] == "best_model": + best_model_score = self._fast_val(self.best_meta_model, data=data, mode="eval", return_all=True) + best_model_score = best_model_score.tolist() + gap_list = [(model_score[j] - best_model_score[j]) / best_model_score[j] * 100 for j in range(len(best_model_score))] + gap[i] = sum(gap_list) / len(gap_list) + elif self.meta_params["solver"] == "bootstrap": + bootstrap_reward = self._bootstrap(copy.deepcopy(self.meta_model), data, mode="eval") + bootstrap_score = (-bootstrap_reward).view(-1).float().tolist() + gap_list = [(model_score[j] - bootstrap_score[j]) / bootstrap_score[j] * 100 for j in range(len(bootstrap_score))] + gap[i] = sum(gap_list) / len(gap_list) + else: + raise NotImplementedError + print(">> Finish updating task weights within {}s".format(round(time.time()-start_t, 2))) + + # temp = max(1.0 * (1 - epoch / self.meta_params["epochs"]), 0.05) + # temp = max(1.0 - 1/2 * (1 - math.cos(math.pi * min(epoch / self.meta_params['epochs'], 1))), 0.2) + temp = 0.25 + gap_temp = torch.Tensor([i/temp for i in gap.tolist()]) + print(gap, temp) print(">> Old task weights: {}".format(self.task_w)) - self.task_w = torch.softmax(gap, dim=0) + self.task_w = torch.softmax(gap_temp, dim=0) print(">> New task weights: {}".format(self.task_w)) diff --git a/POMO/TSP/TSPTrainer_pomo.py b/POMO/TSP/TSPTrainer_pomo.py index 7b836f2..74cc7b5 100644 --- a/POMO/TSP/TSPTrainer_pomo.py +++ b/POMO/TSP/TSPTrainer_pomo.py @@ -17,6 +17,7 @@ from utils.utils import * from utils.functions import * +from TSP_baseline import * class TSPTrainer: @@ -57,6 +58,7 @@ def __init__(self, self.meta_model = Model(**self.model_params) self.optimizer = Optimizer(self.meta_model.parameters(), **self.optimizer_params['optimizer']) self.task_set = generate_task_set(self.meta_params) + self.task_w = torch.full((len(self.task_set),), 1 / len(self.task_set)) # Restore self.start_epoch = 1 @@ -85,18 +87,21 @@ def run(self): train_score, train_loss = self._train_one_epoch(epoch) self.result_log.append('train_score', epoch, train_score) self.result_log.append('train_loss', epoch, train_loss) + model_save_interval = self.trainer_params['logging']['model_save_interval'] + img_save_interval = self.trainer_params['logging']['img_save_interval'] # Val dir, no_aug_score_list = "../../data/TSP/", [] if self.meta_params["data_type"] == "size": paths = ["tsp50_uniform.pkl", "tsp100_uniform.pkl", "tsp200_uniform.pkl"] elif self.meta_params["data_type"] == "distribution": - paths = ["tsp100_uniform.pkl", "tsp100_cluster.pkl", "tsp100_diagonal.pkl"] + paths = ["tsp100_uniform.pkl", "tsp100_gaussian.pkl", "tsp100_cluster.pkl", "tsp100_diagonal.pkl", "tsp100_tsplib.pkl"] elif self.meta_params["data_type"] == "size_distribution": pass - for val_path in paths: - no_aug_score = self._fast_val(self.meta_model, path=os.path.join(dir, val_path), val_episodes=64) - no_aug_score_list.append(round(no_aug_score, 4)) - self.result_log.append('val_score', epoch, no_aug_score_list) + if epoch <= 1 or (epoch % img_save_interval) == 0: + for val_path in paths: + no_aug_score = self._fast_val(self.meta_model, path=os.path.join(dir, val_path), val_episodes=64) + no_aug_score_list.append(round(no_aug_score, 4)) + self.result_log.append('val_score', epoch, no_aug_score_list) # Logs & Checkpoint elapsed_time_str, remain_time_str = self.time_estimator.get_est_string(epoch, self.meta_params['epochs']) @@ -107,10 +112,8 @@ def run(self): all_done = (epoch == self.meta_params['epochs']) else: all_done = (time.time() - start_time) >= self.trainer_params['time_limit'] - model_save_interval = self.trainer_params['logging']['model_save_interval'] - img_save_interval = self.trainer_params['logging']['img_save_interval'] - if epoch > 1: # save latest images, every epoch + if epoch > 1 and (epoch % img_save_interval) == 0: # save latest images, every X epoch self.logger.info("Saving log_image") image_prefix = '{}/latest'.format(self.result_folder) util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['train_score']) @@ -129,11 +132,11 @@ def run(self): } torch.save(checkpoint_dict, '{}/checkpoint-{}.pt'.format(self.result_folder, epoch)) - if all_done or (epoch % img_save_interval) == 0: - image_prefix = '{}/img/checkpoint-{}'.format(self.result_folder, epoch) - util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['train_score']) - util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['val_score']) - util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'], self.result_log, labels=['train_loss']) + # if all_done or (epoch % img_save_interval) == 0: + # image_prefix = '{}/img/checkpoint-{}'.format(self.result_folder, epoch) + # util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['train_score']) + # util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['val_score']) + # util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'], self.result_log, labels=['train_loss']) if all_done: self.logger.info(" *** Training Done *** ") @@ -149,7 +152,7 @@ def _train_one_epoch(self, epoch): score_AM = AverageMeter() loss_AM = AverageMeter() - # Curriculum learning + # Curriculum learning - TODO: need to update if self.meta_params["data_type"] in ["size", "distribution"]: self.min_n, self.max_n = self.task_set[0][0], self.task_set[-1][0] # [20, 150] / [0, 130] # start = self.min_n + int(epoch/self.meta_params['epochs'] * (self.max_n - self.min_n)) # linear @@ -166,10 +169,11 @@ def _train_one_epoch(self, epoch): task_params = random.sample(range(start, end + 1), 1) if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 100 else self.meta_params['meta_batch_size'] // 2 elif self.meta_params["data_type"] == "distribution": - task_params = random.sample(range(start, end + 1), 1) if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] + task_params = self.task_set[torch.multinomial(self.task_w, 1).item()] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] batch_size = self.meta_params['meta_batch_size'] elif self.meta_params["data_type"] == "size_distribution": pass + data = self._get_data(batch_size, task_params) env_params = {'problem_size': data.size(1), 'pomo_size': data.size(1)} avg_score, avg_loss = self._train_one_batch(data, Env(**env_params)) @@ -220,7 +224,7 @@ def _train_one_batch(self, data, env): return score_mean, loss_mean - def _fast_val(self, model, data=None, path=None, val_episodes=32): + def _fast_val(self, model, data=None, path=None, val_episodes=32, return_all=False): aug_factor = 1 data = torch.Tensor(load_dataset(path)[: val_episodes]) if data is None else data env = Env(**{'problem_size': data.size(1), 'pomo_size': data.size(1)}) @@ -244,22 +248,24 @@ def _fast_val(self, model, data=None, path=None, val_episodes=32): max_pomo_reward, _ = aug_reward.max(dim=2) # get best results from pomo # shape: (augmentation, batch) no_aug_score = -max_pomo_reward[0, :].float().mean() # negative sign to make positive value + print(no_aug_score) - return no_aug_score.detach().item() + if return_all: + return -max_pomo_reward[0, :].float() + else: + return no_aug_score.detach().item() def _get_data(self, batch_size, task_params): if self.meta_params['data_type'] == 'distribution': - assert len(task_params) == 1 - data = get_random_problems(batch_size, self.env_params['problem_size'], num_modes=0, cdist=0, distribution='uniform') - data = self._generate_x_adv(data, eps=task_params[0]) + assert len(task_params) == 2 + data = get_random_problems(batch_size, self.env_params['problem_size'], num_modes=task_params[0], cdist=task_params[1], distribution='gaussian_mixture') elif self.meta_params['data_type'] == 'size': assert len(task_params) == 1 data = get_random_problems(batch_size, task_params[0], num_modes=0, cdist=0, distribution='uniform') elif self.meta_params['data_type'] == "size_distribution": - assert len(task_params) == 2 - data = get_random_problems(batch_size, task_params[0], num_modes=0, cdist=0, distribution='uniform') - data = self._generate_x_adv(data, eps=task_params[1]) + assert len(task_params) == 3 + data = get_random_problems(batch_size, task_params[0], num_modes=task_params[1], cdist=task_params[2], distribution='gaussian_mixture') else: raise NotImplementedError diff --git a/POMO/TSP/TSProblemDef.py b/POMO/TSP/TSProblemDef.py index 7645c26..efe9e60 100644 --- a/POMO/TSP/TSProblemDef.py +++ b/POMO/TSP/TSProblemDef.py @@ -9,12 +9,19 @@ def generate_task_set(meta_params): - if meta_params['data_type'] == "distribution": # focus on the TSP100 with different distributions - task_set = [(eps,) for eps in range(0, 0 + meta_params['num_task'] + 1)] + """ + Current setting: + size: (n,) \in [20, 150] + distribution: (m, c) \in {(0, 0) + [1-9] * [1, 10, 20, 30, 40, 50]} + size_distribution: (n, m, c) \in [30, 50, 70, 90, 110, 130, 150] * {(0, 0) + [2, 4, 6, 8] * [1, 20, 40]} + """ + if meta_params['data_type'] == "distribution": # focus on TSP100 with gaussian mixture distributions + task_set = [(0, 0)] + [(m, c) for m in range(1, 10) for c in [1, 10, 20, 30, 40, 50]] elif meta_params['data_type'] == "size": # focus on uniform distribution with different sizes - task_set = [(n,) for n in range(20, 20 + meta_params['num_task'] + 1)] + task_set = [(n,) for n in range(20, 151)] elif meta_params['data_type'] == "size_distribution": - task_set = [(n, eps) for n in range(20, 20 + meta_params['num_task'] + 1) for eps in range(0, 0 + meta_params['num_task'] + 1, 10)] + dist_set = [(0, 0)] + [(m, c) for m in [2, 4, 6, 8] for c in [1, 20, 40]] + task_set = [(n, m, c) for n in range(30, 151, 20) for (m, c) in dist_set] else: raise NotImplementedError print(">> Generating training task set: {} tasks with type {}".format(len(task_set), meta_params['data_type'])) @@ -93,7 +100,7 @@ def gaussian_mixture(graph_size=100, num_modes=0, cdist=1): xy = MinMaxScaler().fit_transform(xy) return xy - if num_modes == 0 and cdist == 0: + if num_modes == 0: return np.random.uniform(0, 1, [dataset_size, graph_size, 2]) else: res = [] @@ -230,16 +237,17 @@ def generate_tsp_dist(n_samples, n_nodes, distribution): seed_everything(seed=2023) # var-dist test data - for dist in ["uniform", "uniform_rectangle", "gaussian", "cluster", "diagonal", "tsplib"]: - print(">> Generating TSP instances following {} distribution!".format(dist)) - get_random_problems(20000, 100, distribution=dist, path=path) + # for dist in ["uniform", "uniform_rectangle", "gaussian", "cluster", "diagonal", "tsplib"]: + # print(">> Generating TSP instances following {} distribution!".format(dist)) + # get_random_problems(20000, 100, distribution=dist, path=path) - for s in [150, 200]: - print(">> Generating TSP instances of size {}!".format(s)) - get_random_problems(20000, s, distribution="uniform", path=path) + # var-size test data + # for s in [300, 500]: + # print(">> Generating TSP instances of size {}!".format(s)) + # get_random_problems(1000, s, distribution="uniform", path=path) - # data = generate_gaussian_mixture_tsp(dataset_size=64, graph_size=100, num_modes=1, cdist=1) + data = generate_gaussian_mixture_tsp(dataset_size=64, graph_size=100, num_modes=9, cdist=1) # data = load_dataset("../../data/TSP/tsp100_cluster.pkl") # print(type(data), data.size(), data) - # x, y = data[9215, :, 0].tolist(), data[9215, :, -1].tolist() - # show([x], [y], label=["Gaussian Mixture"], title="TSP100", xdes="x", ydes="y", path="./tsp.pdf") + x, y = data[0, :, 0].tolist(), data[0, :, -1].tolist() + show([x], [y], label=["Gaussian Mixture"], title="TSP100", xdes="x", ydes="y", path="./tsp.pdf") diff --git a/POMO/TSP/train.py b/POMO/TSP/train.py index 5f60f89..9645228 100644 --- a/POMO/TSP/train.py +++ b/POMO/TSP/train.py @@ -54,7 +54,7 @@ 'train_batch_size': 64, 'logging': { 'model_save_interval': 5000, - 'img_save_interval': 5000, + 'img_save_interval': 10, 'log_image_params_1': { 'json_foldername': 'log_image_style', 'filename': 'general.json' @@ -75,14 +75,15 @@ 'enable': True, # whether use meta-learning or not 'curriculum': True, # adaptive sample task 'meta_method': 'maml', # choose from ['maml', 'fomaml', 'reptile'] - 'bootstrap_steps': 0, + 'bootstrap_steps': 20, 'data_type': 'distribution', # choose from ["size", "distribution", "size_distribution"] 'epochs': 50000, # the number of meta-model updates: (250*100000) / (1*5*64) 'B': 1, # the number of tasks in a mini-batch 'k': 1, # gradient decent steps in the inner-loop optimization of meta-learning method 'meta_batch_size': 64, # will be divided by 2 if problem_size >= 100 'num_task': 100, # the number of tasks in the training task set: e.g., [20, 150] / [0, 100] - 'update_weight': 1000, # update weight of rach task per X iters + 'update_weight': 2000, # update weight of each task per X iters + 'solver': 'bootstrap', # solver used to update the task weights, choose from ["best_model", "bootstrap", "lkh3"] 'alpha': 0.99, # params for the outer-loop optimization of reptile 'alpha_decay': 0.999, # params for the outer-loop optimization of reptile }