From 8db4b459060561cf21ce27d39f32e53f0f686473 Mon Sep 17 00:00:00 2001 From: RoyalSkye Date: Thu, 25 May 2023 16:28:30 +0800 Subject: [PATCH] update POMO code --- POMO/CVRP/CVRPTrainer_meta.py | 33 +++++------ POMO/CVRP/CVRPTrainer_pomo.py | 39 ++++++------- POMO/CVRP/train.py | 8 ++- POMO/TSP/TSPTrainer_Meta.py | 101 +++++----------------------------- POMO/TSP/TSPTrainer_pomo.py | 88 ++++++----------------------- POMO/TSP/train.py | 7 ++- README.md | 7 ++- 7 files changed, 81 insertions(+), 202 deletions(-) diff --git a/POMO/CVRP/CVRPTrainer_meta.py b/POMO/CVRP/CVRPTrainer_meta.py index 5577e36..7e62ab5 100644 --- a/POMO/CVRP/CVRPTrainer_meta.py +++ b/POMO/CVRP/CVRPTrainer_meta.py @@ -18,7 +18,7 @@ class CVRPTrainer: """ - Implementation of POMO with MAML / FOMAML / Reptile on CVRP. + Implementation of POMO with MAML / FOMAML / Reptile / Bootstrap Meta-learning on CVRP. For MAML & FOMAML, ref to "Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks"; For Reptile, ref to "On First-Order Meta-Learning Algorithms" and "On the generalization of neural combinatorial optimization heuristics". """ @@ -35,6 +35,7 @@ def __init__(self, self.optimizer_params = optimizer_params self.trainer_params = trainer_params self.meta_params = meta_params + assert self.meta_params['data_type'] == "size_distribution", "Not supported, need to modify the code!" # result folder, logger self.logger = getLogger(name='trainer') @@ -62,7 +63,6 @@ def __init__(self, self.alpha = self.meta_params['alpha'] # for reptile self.task_set = generate_task_set(self.meta_params) self.val_data, self.val_opt = {}, {} # for lkh3_offline - assert not (self.meta_params['curriculum'] and self.meta_params["data_type"] in ["size", "distribution"]), "Not Implemented!" if self.meta_params["data_type"] == "size_distribution": # hardcoded - task_set: range(self.min_n, self.max_n, self.task_interval) * self.num_dist self.min_n, self.max_n, self.task_interval, self.num_dist = 50, 200, 5, 11 @@ -71,6 +71,7 @@ def __init__(self, # Restore self.start_epoch = 1 model_load = trainer_params['model_load'] + pretrain_load = trainer_params['pretrain_load'] if model_load['enable']: checkpoint_fullname = '{path}/checkpoint-{epoch}.pt'.format(**model_load) checkpoint = torch.load(checkpoint_fullname, map_location=self.device) @@ -78,7 +79,14 @@ def __init__(self, self.start_epoch = 1 + model_load['epoch'] self.result_log.set_raw_data(checkpoint['result_log']) self.meta_optimizer.load_state_dict(checkpoint['optimizer_state_dict']) - self.logger.info(">> Model loaded from {}".format(checkpoint_fullname)) + self.logger.info('Checkpoint loaded successfully from {}'.format(checkpoint_fullname)) + + elif pretrain_load['enable']: # meta-training on a pretrain model + self.logger.info(">> Loading pretrained model: be careful with the type of the normalization layer!") + checkpoint_fullname = '{path}'.format(**pretrain_load) + checkpoint = torch.load(checkpoint_fullname, map_location=self.device) + self.model.load_state_dict(checkpoint['model_state_dict']) + self.logger.info('Pretrained model loaded successfully from {}'.format(checkpoint_fullname)) # utility self.time_estimator = TimeEstimator() @@ -105,13 +113,7 @@ def run(self): img_save_interval = self.trainer_params['logging']['img_save_interval'] # Val no_aug_score_list = [] - if self.meta_params["data_type"] == "size": - dir = "../../data/CVRP/Size/" - paths = ["cvrp100_uniform.pkl", "cvrp200_uniform.pkl", "cvrp300_uniform.pkl"] - elif self.meta_params["data_type"] == "distribution": - dir = "../../data/CVRP/Distribution/" - paths = ["cvrp100_uniform.pkl", "cvrp100_gaussian.pkl", "cvrp100_cluster.pkl", "cvrp100_diagonal.pkl", "cvrp100_cvrplib.pkl"] - elif self.meta_params["data_type"] == "size_distribution": + if self.meta_params["data_type"] == "size_distribution": dir = "../../data/CVRP/Size_Distribution/" paths = ["cvrp200_uniform.pkl", "cvrp300_rotation.pkl"] if epoch <= 1 or (epoch % img_save_interval) == 0: @@ -196,13 +198,7 @@ def _train_one_epoch(self, epoch): # sample a batch of tasks w, selected_tasks = [1.0] * self.meta_params['B'], [] for b in range(self.meta_params['B']): - if self.meta_params["data_type"] == "size": - task_params = random.sample(self.task_set, 1)[0] - batch_size = meta_batch_size if task_params[0] <= 150 else meta_batch_size // 2 - elif self.meta_params["data_type"] == "distribution": - task_params = random.sample(self.task_set, 1)[0] - batch_size = meta_batch_size - elif self.meta_params["data_type"] == "size_distribution": + if self.meta_params["data_type"] == "size_distribution": selected = torch.multinomial(self.task_w[idx], 1).item() task_params = tasks[selected] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] batch_size = meta_batch_size if task_params[0] <= 150 else meta_batch_size // 2 @@ -525,9 +521,6 @@ def _get_data(self, batch_size, task_params, return_capacity=False): def _get_val_data(self, batch_size, task_params): if self.meta_params["data_type"] == "size": - # start1, end1 = min(task_params[0] + 10, self.max_n), min(task_params[0] + 20, self.max_n) - # val_size = random.sample(range(start1, end1 + 1), 1)[0] - # val_data = self._get_data(batch_size, (val_size,)) val_data = self._get_data(batch_size, task_params) elif self.meta_params["data_type"] == "distribution": val_data = self._get_data(batch_size, task_params) diff --git a/POMO/CVRP/CVRPTrainer_pomo.py b/POMO/CVRP/CVRPTrainer_pomo.py index 86001d8..597e761 100644 --- a/POMO/CVRP/CVRPTrainer_pomo.py +++ b/POMO/CVRP/CVRPTrainer_pomo.py @@ -33,6 +33,7 @@ def __init__(self, self.optimizer_params = optimizer_params self.trainer_params = trainer_params self.meta_params = meta_params + assert self.meta_params['data_type'] == "size_distribution", "Not supported, need to modify the code!" # result folder, logger self.logger = getLogger(name='trainer') @@ -56,7 +57,6 @@ def __init__(self, self.optimizer = Optimizer(self.model.parameters(), **self.optimizer_params['optimizer']) self.task_set = generate_task_set(self.meta_params) self.val_data, self.val_opt = {}, {} # for lkh3_offline - assert not (self.meta_params['curriculum'] and self.meta_params["data_type"] in ["size", "distribution"]), "Not Implemented!" if self.meta_params["data_type"] == "size_distribution": # hardcoded - task_set: range(self.min_n, self.max_n, self.task_interval) * self.num_dist self.min_n, self.max_n, self.task_interval, self.num_dist = 50, 200, 5, 11 @@ -65,6 +65,7 @@ def __init__(self, # Restore self.start_epoch = 1 model_load = trainer_params['model_load'] + pretrain_load = trainer_params['pretrain_load'] if model_load['enable']: checkpoint_fullname = '{path}/checkpoint-{epoch}.pt'.format(**model_load) checkpoint = torch.load(checkpoint_fullname, map_location=self.device) @@ -72,7 +73,14 @@ def __init__(self, self.start_epoch = 1 + model_load['epoch'] self.result_log.set_raw_data(checkpoint['result_log']) self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) - self.logger.info('Saved Model Loaded !!') + self.logger.info('Checkpoint loaded successfully from {}'.format(checkpoint_fullname)) + + elif pretrain_load['enable']: # meta-training on a pretrain model + self.logger.info(">> Loading pretrained model: be careful with the type of the normalization layer!") + checkpoint_fullname = '{path}'.format(**pretrain_load) + checkpoint = torch.load(checkpoint_fullname, map_location=self.device) + self.model.load_state_dict(checkpoint['model_state_dict']) + self.logger.info('Pretrained model loaded successfully from {}'.format(checkpoint_fullname)) # utility self.time_estimator = TimeEstimator() @@ -83,12 +91,12 @@ def run(self): for epoch in range(self.start_epoch, self.meta_params['epochs']+1): self.logger.info('=================================================================') - # lr decay (by 10) to speed up convergence at 90th and 95th iterations - # if epoch in [int(self.meta_params['epochs'] * 0.9)]: - # self.optimizer_params['optimizer']['lr'] /= 10 - # for group in self.optimizer.param_groups: - # group["lr"] /= 10 - # print(">> LR decay to {}".format(group["lr"])) + # lr decay (by 10) to speed up convergence at 90th iteration + if epoch in [int(self.meta_params['epochs'] * 0.9)]: + self.optimizer_params['optimizer']['lr'] /= 10 + for group in self.optimizer.param_groups: + group["lr"] /= 10 + print(">> LR decay to {}".format(group["lr"])) # Train train_score, train_loss = self._train_one_epoch(epoch) @@ -98,13 +106,7 @@ def run(self): img_save_interval = self.trainer_params['logging']['img_save_interval'] # Val no_aug_score_list = [] - if self.meta_params["data_type"] == "size": - dir = "../../data/CVRP/Size/" - paths = ["cvrp100_uniform.pkl", "cvrp200_uniform.pkl", "cvrp300_uniform.pkl"] - elif self.meta_params["data_type"] == "distribution": - dir = "../../data/CVRP/Distribution/" - paths = ["cvrp100_uniform.pkl", "cvrp100_gaussian.pkl", "cvrp100_cluster.pkl", "cvrp100_diagonal.pkl", "cvrp100_cvrplib.pkl"] - elif self.meta_params["data_type"] == "size_distribution": + if self.meta_params["data_type"] == "size_distribution": dir = "../../data/CVRP/Size_Distribution/" paths = ["cvrp200_uniform.pkl", "cvrp300_rotation.pkl"] if epoch <= 1 or (epoch % img_save_interval) == 0: @@ -165,12 +167,7 @@ def _train_one_epoch(self, epoch): # sample a batch of tasks for b in range(self.meta_params['B']): for step in range(self.meta_params['k']): - if self.meta_params["data_type"] == "size": - task_params = random.sample(self.task_set, 1)[0] - batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 150 else self.meta_params['meta_batch_size'] // 2 - elif self.meta_params["data_type"] == "distribution": - task_params = random.sample(self.task_set, 1)[0] - elif self.meta_params["data_type"] == "size_distribution": + if self.meta_params["data_type"] == "size_distribution": task_params = tasks[torch.multinomial(self.task_w[idx], 1).item()] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 150 else self.meta_params['meta_batch_size'] // 2 diff --git a/POMO/CVRP/train.py b/POMO/CVRP/train.py index b6cf5ba..f0f0f18 100644 --- a/POMO/CVRP/train.py +++ b/POMO/CVRP/train.py @@ -30,7 +30,6 @@ 'ff_hidden_dim': 512, 'eval_type': 'argmax', 'meta_update_encoder': True, - # 'norm': 'batch_no_track' } optimizer_params = { @@ -57,11 +56,16 @@ 'filename': 'style_loss_1.json' }, }, + # load previous checkpoint for meta-training 'model_load': { 'enable': False, # enable loading pre-trained model 'path': './result/saved_CVRP20_model', # directory path of pre-trained model and log files saved. 'epoch': 2000, # epoch version of pre-trained model to laod. - + }, + # load pretrain model for meta-training instead of meta-training from scratch + 'pretrain_load': { + 'enable': False, + 'path': '../../pretrained/POMO-CVRP/checkpoint-30500-cvrp100-instance-norm.pt', # be careful with the type of the normalization layer } } diff --git a/POMO/TSP/TSPTrainer_Meta.py b/POMO/TSP/TSPTrainer_Meta.py index 291a1e8..f9dfd77 100644 --- a/POMO/TSP/TSPTrainer_Meta.py +++ b/POMO/TSP/TSPTrainer_Meta.py @@ -19,7 +19,7 @@ class TSPTrainer: """ - Implementation of POMO with MAML / FOMAML / Reptile on TSP. + Implementation of POMO with MAML / FOMAML / Reptile / Bootstrap Meta-learning on TSP. For MAML & FOMAML, ref to "Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks"; For Reptile, ref to "On First-Order Meta-Learning Algorithms" and "On the generalization of neural combinatorial optimization heuristics". """ @@ -36,6 +36,7 @@ def __init__(self, self.optimizer_params = optimizer_params self.trainer_params = trainer_params self.meta_params = meta_params + assert self.meta_params['data_type'] == "size_distribution", "Not supported, need to modify the code!" # result folder, logger self.logger = getLogger(name='trainer') @@ -63,13 +64,7 @@ def __init__(self, self.alpha = self.meta_params['alpha'] # for reptile self.task_set = generate_task_set(self.meta_params) self.val_data, self.val_opt = {}, {} # for lkh3_offline - if self.meta_params["data_type"] == "size": - self.min_n, self.max_n, self.task_interval = self.task_set[0][0], self.task_set[-1][0], 5 - # self.task_w = {start: 1/(len(self.task_set)//5) for start in range(self.min_n, self.max_n, self.task_interval)} - # self.task_w = torch.full((len(self.task_set)//self.task_interval,), 1/(len(self.task_set)//self.task_interval)) - elif self.meta_params["data_type"] == "distribution": - self.task_w = torch.full((len(self.task_set),), 1 / len(self.task_set)) - elif self.meta_params["data_type"] == "size_distribution": + if self.meta_params["data_type"] == "size_distribution": # hardcoded - task_set: range(self.min_n, self.max_n, self.task_interval) * self.num_dist self.min_n, self.max_n, self.task_interval, self.num_dist = 50, 200, 5, 11 self.task_w = torch.full(((self.max_n - self.min_n) // self.task_interval + 1, self.num_dist), 1 / self.num_dist) @@ -77,6 +72,7 @@ def __init__(self, # Restore self.start_epoch = 1 model_load = trainer_params['model_load'] + pretrain_load = trainer_params['pretrain_load'] if model_load['enable']: checkpoint_fullname = '{path}/checkpoint-{epoch}.pt'.format(**model_load) checkpoint = torch.load(checkpoint_fullname, map_location=self.device) @@ -84,7 +80,14 @@ def __init__(self, self.start_epoch = 1 + model_load['epoch'] self.result_log.set_raw_data(checkpoint['result_log']) self.meta_optimizer.load_state_dict(checkpoint['optimizer_state_dict']) - self.logger.info(">> Model loaded from {}".format(checkpoint_fullname)) + self.logger.info('Checkpoint loaded successfully from {}'.format(checkpoint_fullname)) + + elif pretrain_load['enable']: # meta-training on a pretrained model + self.logger.info(">> Loading pretrained model: be careful with the type of the normalization layer!") + checkpoint_fullname = '{path}'.format(**pretrain_load) + checkpoint = torch.load(checkpoint_fullname, map_location=self.device) + self.model.load_state_dict(checkpoint['model_state_dict']) + self.logger.info('Pretrained model loaded successfully from {}'.format(checkpoint_fullname)) # utility self.time_estimator = TimeEstimator() @@ -111,13 +114,7 @@ def run(self): img_save_interval = self.trainer_params['logging']['img_save_interval'] # Val no_aug_score_list = [] - if self.meta_params["data_type"] == "size": - dir = "../../data/TSP/Size/" - paths = ["tsp100_uniform.pkl", "tsp200_uniform.pkl", "tsp300_uniform.pkl"] - elif self.meta_params["data_type"] == "distribution": - dir = "../../data/TSP/Distribution/" - paths = ["tsp100_uniform.pkl", "tsp100_gaussian.pkl", "tsp100_cluster.pkl", "tsp100_diagonal.pkl", "tsp100_tsplib.pkl"] - elif self.meta_params["data_type"] == "size_distribution": + if self.meta_params["data_type"] == "size_distribution": dir = "../../data/TSP/Size_Distribution/" paths = ["tsp200_uniform.pkl", "tsp300_rotation.pkl"] if epoch <= 1 or (epoch % img_save_interval) == 0: @@ -164,12 +161,6 @@ def run(self): } torch.save(checkpoint_dict, '{}/checkpoint-{}.pt'.format(self.result_folder, epoch)) - # if all_done or (epoch % img_save_interval) == 0: - # image_prefix = '{}/img/checkpoint-{}'.format(self.result_folder, epoch) - # util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['train_score']) - # util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['val_score']) - # util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'], self.result_log, labels=['train_loss']) - if all_done: self.logger.info(" *** Training Done *** ") # self.logger.info("Now, printing log array...") @@ -193,15 +184,7 @@ def _train_one_epoch(self, epoch): # Adaptive task scheduler: if self.meta_params['curriculum']: - if self.meta_params["data_type"] == "size": - # start = self.min_n + int(min(epoch / self.meta_params['sch_epoch'], 1) * (self.max_n - self.min_n)) # linear - start = self.min_n + int(1/2 * (1-math.cos(math.pi * min(epoch/self.meta_params['sch_epoch'], 1))) * (self.max_n - self.min_n)) # cosine - end = min(start + 10, self.max_n) # 10 is the size of the sliding window - if self.meta_params["curriculum"]: print(">> training task {}".format((start, end))) - elif self.meta_params["data_type"] == "distribution": - if epoch % self.meta_params['update_weight'] == 0: - self.task_w = self._update_task_weight(self.task_set, self.task_w, epoch) - elif self.meta_params["data_type"] == "size_distribution": + if self.meta_params["data_type"] == "size_distribution": start = self.min_n + int(min(epoch / self.meta_params['sch_epoch'], 1) * (self.max_n - self.min_n)) # linear # start = self.min_n + int(1 / 2 * (1 - math.cos(math.pi * min(epoch / self.meta_params['sch_epoch'], 1))) * (self.max_n - self.min_n)) # cosine n = start // self.task_interval * self.task_interval @@ -216,13 +199,7 @@ def _train_one_epoch(self, epoch): # sample a batch of tasks w, selected_tasks = [1.0] * self.meta_params['B'], [] for b in range(self.meta_params['B']): - if self.meta_params["data_type"] == "size": - task_params = random.sample(range(start, end + 1), 1) if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] - batch_size = meta_batch_size if task_params[0] <= 150 else meta_batch_size // 2 - elif self.meta_params["data_type"] == "distribution": - task_params = self.task_set[torch.multinomial(self.task_w, 1).item()] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] - batch_size = meta_batch_size - elif self.meta_params["data_type"] == "size_distribution": + if self.meta_params["data_type"] == "size_distribution": selected = torch.multinomial(self.task_w[idx], 1).item() task_params = tasks[selected] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] batch_size = meta_batch_size if task_params[0] <= 150 else meta_batch_size // 2 @@ -542,9 +519,6 @@ def _get_data(self, batch_size, task_params): def _get_val_data(self, batch_size, task_params): if self.meta_params["data_type"] == "size": - # start1, end1 = min(task_params[0] + 10, self.max_n), min(task_params[0] + 20, self.max_n) - # val_size = random.sample(range(start1, end1 + 1), 1)[0] - # val_data = self._get_data(batch_size, (val_size,)) val_data = self._get_data(batch_size, task_params) elif self.meta_params["data_type"] == "distribution": val_data = self._get_data(batch_size, task_params) @@ -561,51 +535,6 @@ def _alpha_scheduler(self, epoch): """ self.alpha = max(self.alpha * self.meta_params['alpha_decay'], 0.0001) - def _generate_x_adv(self, data, eps=10.0): - """ - Generate adversarial data based on the current model, also need to generate optimal sol for x_adv. - See also: "Learning to Solve Travelling Salesman Problem with Hardness-adaptive Curriculum" in AAAI 2022. - """ - from torch.autograd import Variable - def minmax(xy_): - # min_max normalization: [b,n,2] - xy_ = (xy_ - xy_.min(dim=1, keepdims=True)[0]) / (xy_.max(dim=1, keepdims=True)[0] - xy_.min(dim=1, keepdims=True)[0]) - return xy_ - - if eps == 0: return data - # generate x_adv - self.meta_model.eval() - aug_factor, batch_size = 1, data.size(0) - env = Env(**{'problem_size': data.size(1), 'pomo_size': data.size(1)}) - with torch.enable_grad(): - data.requires_grad_() - env.load_problems(batch_size, problems=data, aug_factor=aug_factor) - reset_state, _, _ = env.reset() - self.meta_model.pre_forward(reset_state) - prob_list = torch.zeros(size=(aug_factor * batch_size, env.pomo_size, 0)) - state, reward, done = env.pre_step() - while not done: - selected, prob = self.meta_model(state) - state, reward, done = env.step(selected) - prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) - - aug_reward = reward.reshape(aug_factor, batch_size, env.pomo_size).permute(1, 0, 2).view(batch_size, -1) - baseline_reward = aug_reward.float().mean(dim=1, keepdims=True) - advantage = aug_reward - baseline_reward - log_prob = prob_list.log().sum(dim=2).reshape(aug_factor, batch_size, env.pomo_size).permute(1, 0, 2).view(batch_size, -1) - - # delta = torch.autograd.grad(eps * ((advantage / baseline_reward) * log_prob).mean(), data)[0] - delta = torch.autograd.grad(eps * ((-advantage) * log_prob).mean(), data)[0] - data = data.detach() + delta - data = minmax(data) - data = Variable(data, requires_grad=False) - - # generate opt sol - # opt_sol = solve_all_gurobi(data) - # return data, opt_sol - - return data - def _update_task_weight(self, tasks, weights, epoch): """ Update the weights of tasks. diff --git a/POMO/TSP/TSPTrainer_pomo.py b/POMO/TSP/TSPTrainer_pomo.py index 709b56c..96917b7 100644 --- a/POMO/TSP/TSPTrainer_pomo.py +++ b/POMO/TSP/TSPTrainer_pomo.py @@ -34,6 +34,7 @@ def __init__(self, self.optimizer_params = optimizer_params self.trainer_params = trainer_params self.meta_params = meta_params + assert self.meta_params['data_type'] == "size_distribution", "Not supported, need to modify the code!" # result folder, logger self.logger = getLogger(name='trainer') @@ -57,7 +58,6 @@ def __init__(self, self.optimizer = Optimizer(self.model.parameters(), **self.optimizer_params['optimizer']) self.task_set = generate_task_set(self.meta_params) self.val_data, self.val_opt = {}, {} # for lkh3_offline - assert not (self.meta_params['curriculum'] and self.meta_params["data_type"] in ["size", "distribution"]), "Not Implemented!" if self.meta_params["data_type"] == "size_distribution": # hardcoded - task_set: range(self.min_n, self.max_n, self.task_interval) * self.num_dist self.min_n, self.max_n, self.task_interval, self.num_dist = 50, 200, 5, 11 @@ -66,6 +66,7 @@ def __init__(self, # Restore self.start_epoch = 1 model_load = trainer_params['model_load'] + pretrain_load = trainer_params['pretrain_load'] if model_load['enable']: checkpoint_fullname = '{path}/checkpoint-{epoch}.pt'.format(**model_load) checkpoint = torch.load(checkpoint_fullname, map_location=self.device) @@ -73,7 +74,14 @@ def __init__(self, self.start_epoch = 1 + model_load['epoch'] self.result_log.set_raw_data(checkpoint['result_log']) self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) - self.logger.info('Saved Model Loaded !!') + self.logger.info('Checkpoint loaded successfully from {}'.format(checkpoint_fullname)) + + elif pretrain_load['enable']: # meta-training on a pretrain model + self.logger.info(">> Loading pretrained model: be careful with the type of the normalization layer!") + checkpoint_fullname = '{path}'.format(**pretrain_load) + checkpoint = torch.load(checkpoint_fullname, map_location=self.device) + self.model.load_state_dict(checkpoint['model_state_dict']) + self.logger.info('Pretrained model loaded successfully from {}'.format(checkpoint_fullname)) # utility self.time_estimator = TimeEstimator() @@ -85,11 +93,11 @@ def run(self): self.logger.info('=================================================================') # lr decay (by 10) to speed up convergence at 90th iteration - # if epoch in [int(self.meta_params['epochs'] * 0.9)]: - # self.optimizer_params['optimizer']['lr'] /= 10 - # for group in self.optimizer.param_groups: - # group["lr"] /= 10 - # print(">> LR decay to {}".format(group["lr"])) + if epoch in [int(self.meta_params['epochs'] * 0.9)]: + self.optimizer_params['optimizer']['lr'] /= 10 + for group in self.optimizer.param_groups: + group["lr"] /= 10 + print(">> LR decay to {}".format(group["lr"])) # Train train_score, train_loss = self._train_one_epoch(epoch) @@ -99,13 +107,7 @@ def run(self): img_save_interval = self.trainer_params['logging']['img_save_interval'] # Val no_aug_score_list = [] - if self.meta_params["data_type"] == "size": - dir = "../../data/TSP/Size/" - paths = ["tsp100_uniform.pkl", "tsp200_uniform.pkl", "tsp300_uniform.pkl"] - elif self.meta_params["data_type"] == "distribution": - dir = "../../data/TSP/Distribution/" - paths = ["tsp100_uniform.pkl", "tsp100_gaussian.pkl", "tsp100_cluster.pkl", "tsp100_diagonal.pkl", "tsp100_tsplib.pkl"] - elif self.meta_params["data_type"] == "size_distribution": + if self.meta_params["data_type"] == "size_distribution": dir = "../../data/TSP/Size_Distribution/" paths = ["tsp200_uniform.pkl", "tsp300_rotation.pkl"] if epoch <= 1 or (epoch % img_save_interval) == 0: @@ -139,12 +141,6 @@ def run(self): } torch.save(checkpoint_dict, '{}/checkpoint-{}.pt'.format(self.result_folder, epoch)) - # if all_done or (epoch % img_save_interval) == 0: - # image_prefix = '{}/img/checkpoint-{}'.format(self.result_folder, epoch) - # util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['train_score']) - # util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['val_score']) - # util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'], self.result_log, labels=['train_loss']) - if all_done: self.logger.info(" *** Training Done *** ") # self.logger.info("Now, printing log array...") @@ -172,12 +168,7 @@ def _train_one_epoch(self, epoch): # sample a batch of tasks for b in range(self.meta_params['B']): for step in range(self.meta_params['k']): - if self.meta_params["data_type"] == "size": - task_params = random.sample(self.task_set, 1)[0] - batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 150 else self.meta_params['meta_batch_size'] // 2 - elif self.meta_params["data_type"] == "distribution": - task_params = random.sample(self.task_set, 1)[0] - elif self.meta_params["data_type"] == "size_distribution": + if self.meta_params["data_type"] == "size_distribution": task_params = tasks[torch.multinomial(self.task_w[idx], 1).item()] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 150 else self.meta_params['meta_batch_size'] // 2 @@ -262,7 +253,6 @@ def _fast_val(self, model, data=None, path=None, offset=0, val_episodes=32, retu return no_aug_score.detach().item() def _get_data(self, batch_size, task_params): - if self.meta_params['data_type'] == 'distribution': assert len(task_params) == 2 data = get_random_problems(batch_size, self.env_params['problem_size'], num_modes=task_params[0], cdist=task_params[1], distribution='gaussian_mixture', problem="tsp") @@ -277,50 +267,6 @@ def _get_data(self, batch_size, task_params): return data - def _generate_x_adv(self, data, eps=10.0): - """ - Generate adversarial data based on the current model, also need to generate optimal sol for x_adv. - """ - from torch.autograd import Variable - def minmax(xy_): - # min_max normalization: [b,n,2] - xy_ = (xy_ - xy_.min(dim=1, keepdims=True)[0]) / (xy_.max(dim=1, keepdims=True)[0] - xy_.min(dim=1, keepdims=True)[0]) - return xy_ - - if eps == 0: return data - # generate x_adv - self.model.eval() - aug_factor, batch_size = 1, data.size(0) - env = Env(**{'problem_size': data.size(1), 'pomo_size': data.size(1)}) - with torch.enable_grad(): - data.requires_grad_() - env.load_problems(batch_size, problems=data, aug_factor=aug_factor) - reset_state, _, _ = env.reset() - self.model.pre_forward(reset_state) - prob_list = torch.zeros(size=(aug_factor * batch_size, env.pomo_size, 0)) - state, reward, done = env.pre_step() - while not done: - selected, prob = self.model(state) - state, reward, done = env.step(selected) - prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) - - aug_reward = reward.reshape(aug_factor, batch_size, env.pomo_size).permute(1, 0, 2).view(batch_size, -1) - baseline_reward = aug_reward.float().mean(dim=1, keepdims=True) - advantage = aug_reward - baseline_reward - log_prob = prob_list.log().sum(dim=2).reshape(aug_factor, batch_size, env.pomo_size).permute(1, 0, 2).view(batch_size, -1) - - # delta = torch.autograd.grad(eps * ((advantage / baseline_reward) * log_prob).mean(), data)[0] - delta = torch.autograd.grad(eps * ((-advantage) * log_prob).mean(), data)[0] - data = data.detach() + delta - data = minmax(data) - data = Variable(data, requires_grad=False) - - # generate opt sol - # opt_sol = solve_all_gurobi(data) - # return data, opt_sol - - return data - def _update_task_weight(self, tasks, weights, epoch): """ Update the weights of tasks. diff --git a/POMO/TSP/train.py b/POMO/TSP/train.py index 1f4faf7..9a5b978 100644 --- a/POMO/TSP/train.py +++ b/POMO/TSP/train.py @@ -30,7 +30,6 @@ 'ff_hidden_dim': 512, 'eval_type': 'argmax', 'meta_update_encoder': True, - # 'norm': 'batch_no_track' } optimizer_params = { @@ -57,11 +56,17 @@ 'filename': 'style_loss_1.json' }, }, + # load previous checkpoint for meta-training 'model_load': { 'enable': False, # enable loading pre-trained model 'path': '../../pretrained/debug', # directory path of pre-trained model and log files saved. 'epoch': 100000, # epoch version of pre-trained model to laod. }, + # load pretrain model for meta-training instead of meta-training from scratch + 'pretrain_load': { + 'enable': False, + 'path': '../../pretrained/POMO-TSP/checkpoint-3000-tsp100-instance-norm.pt', # be careful with the type of the normalization layer + } } meta_params = { diff --git a/README.md b/README.md index e158513..ff816f6 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ This paper studies a challenging yet realistic setting, which considers generali ### TODO -- [ ] Finish Dependencies & How to Run. +- [ ] Finish Dependencies & How to Run & Take-home Messages. - [ ] Camera-ready. - [ ] Slide and Poster. - [ ] Release Review. @@ -27,6 +27,10 @@ This paper studies a challenging yet realistic setting, which considers generali +### Take-home Messages + + + ### Reviews We would like to thank the anonymous reviewers and (S)ACs of ICML 2023 for their constructive comments and recommendation. We will share the reviews later. @@ -38,6 +42,7 @@ Thank the following repositories, which are baselines of our code: * https://github.com/wouterkool/attention-learn-to-route * https://github.com/yd-kwon/POMO * https://github.com/mit-wu-lab/learning-to-delegate +* "On the generalization of neural combinatorial optimization heuristics" ### Citation