diff --git a/POMO/CVRP/CVRPModel.py b/POMO/CVRP/CVRPModel.py index 480d4f1..4fc5a6f 100644 --- a/POMO/CVRP/CVRPModel.py +++ b/POMO/CVRP/CVRPModel.py @@ -32,13 +32,15 @@ def pre_forward(self, reset_state, weights=None): # shape: (batch, problem+1, embedding) self.decoder.set_kv(self.encoded_nodes, weights=weights) - def forward(self, state, weights=None): + def forward(self, state, weights=None, selected=None, return_probs=False): batch_size = state.BATCH_IDX.size(0) pomo_size = state.BATCH_IDX.size(1) if state.selected_count == 0: # First Move, depot selected = torch.zeros(size=(batch_size, pomo_size), dtype=torch.long) prob = torch.ones(size=(batch_size, pomo_size)) + probs = torch.ones(size=(batch_size, pomo_size, self.encoded_nodes.size(1))) + # shape: (batch, pomo, problem_size+1) # # Use Averaged encoded nodes for decoder input_1 # encoded_nodes_mean = self.encoded_nodes.mean(dim=1, keepdim=True) @@ -53,27 +55,31 @@ def forward(self, state, weights=None): elif state.selected_count == 1: # Second Move, POMO selected = torch.arange(start=1, end=pomo_size+1)[None, :].expand(batch_size, pomo_size) prob = torch.ones(size=(batch_size, pomo_size)) + probs = torch.ones(size=(batch_size, pomo_size, self.encoded_nodes.size(1))) else: encoded_last_node = _get_encoding(self.encoded_nodes, state.current_node) # shape: (batch, pomo, embedding) probs = self.decoder(encoded_last_node, state.load, ninf_mask=state.ninf_mask, weights=weights) # shape: (batch, pomo, problem+1) - - while True: - if self.training or self.model_params['eval_type'] == 'softmax': - selected = probs.reshape(batch_size * pomo_size, -1).multinomial(1).squeeze(dim=1).reshape(batch_size, pomo_size) - # shape: (batch, pomo) - else: - selected = probs.argmax(dim=2) + if selected is None: + while True: + if self.training or self.model_params['eval_type'] == 'softmax': + selected = probs.reshape(batch_size * pomo_size, -1).multinomial(1).squeeze(dim=1).reshape(batch_size, pomo_size) + # shape: (batch, pomo) + else: + selected = probs.argmax(dim=2) + # shape: (batch, pomo) + prob = probs[state.BATCH_IDX, state.POMO_IDX, selected].reshape(batch_size, pomo_size) # shape: (batch, pomo) - + if (prob != 0).all(): + break + else: + selected = selected prob = probs[state.BATCH_IDX, state.POMO_IDX, selected].reshape(batch_size, pomo_size) - # shape: (batch, pomo) - - if (prob != 0).all(): - break + if return_probs: + return selected, prob, probs return selected, prob diff --git a/POMO/CVRP/CVRPTester.py b/POMO/CVRP/CVRPTester.py index 856bc64..8584090 100644 --- a/POMO/CVRP/CVRPTester.py +++ b/POMO/CVRP/CVRPTester.py @@ -7,7 +7,6 @@ from CVRPEnv import CVRPEnv as Env from CVRPModel import CVRPModel as Model -from TSP_gurobi import solve_all_gurobi from utils.utils import * from utils.functions import load_dataset, save_dataset @@ -106,8 +105,8 @@ def _test(self, store_res=True): score_AM.update(score, batch_size) aug_score_AM.update(aug_score, batch_size) episode += batch_size - gap = [max(all_score[i].item() - opt_sol[i], 0) / opt_sol[i] * 100 for i in range(batch_size)] - aug_gap = [max(all_aug_score[i].item() - opt_sol[i], 0) / opt_sol[i] * 100 for i in range(batch_size)] + gap = [(all_score[i].item() - opt_sol[i]) / opt_sol[i] * 100 for i in range(batch_size)] + aug_gap = [(all_aug_score[i].item() - opt_sol[i]) / opt_sol[i] * 100 for i in range(batch_size)] gap_AM.update(sum(gap) / batch_size, batch_size) aug_gap_AM.update(sum(aug_gap) / batch_size, batch_size) diff --git a/POMO/CVRP/CVRPTrainer_meta.py b/POMO/CVRP/CVRPTrainer_meta.py index f62569a..c6fc6f3 100644 --- a/POMO/CVRP/CVRPTrainer_meta.py +++ b/POMO/CVRP/CVRPTrainer_meta.py @@ -54,20 +54,17 @@ def __init__(self, torch.set_default_tensor_type('torch.FloatTensor') # Main Components - self.model_params["norm"] = None # Original "POMO" Paper uses instance/batch normalization + self.model_params["norm"] = None self.meta_model = Model(**self.model_params) self.meta_optimizer = Optimizer(self.meta_model.parameters(), **self.optimizer_params['optimizer']) self.alpha = self.meta_params['alpha'] # for reptile self.task_set = generate_task_set(self.meta_params) self.val_data, self.val_opt = {}, {} # for lkh3_offline - if self.meta_params["data_type"] == "size": - self.min_n, self.max_n, self.task_interval = self.task_set[0][0], self.task_set[-1][0], 5 # [20, 150] - self.task_w = {start: 1 / (len(self.task_set) // 5) for start in range(self.min_n, self.max_n, self.task_interval)} - # self.task_w = torch.full((len(self.task_set)//self.task_interval,), 1/(len(self.task_set)//self.task_interval)) - elif self.meta_params["data_type"] == "distribution": - self.task_w = torch.full((len(self.task_set),), 1 / len(self.task_set)) - else: - raise NotImplementedError + assert not (self.meta_params['curriculum'] and self.meta_params["data_type"] in ["size", "distribution"]), "Not Implemented!" + if self.meta_params["data_type"] == "size_distribution": + # hardcoded - task_set: range(self.min_n, self.max_n, self.task_interval) * self.num_dist + self.min_n, self.max_n, self.task_interval, self.num_dist = 50, 200, 5, 11 + self.task_w = torch.full(((self.max_n - self.min_n) // 5 + 1, self.num_dist), 1 / self.num_dist) # Restore self.start_epoch = 1 @@ -113,7 +110,8 @@ def run(self): dir = "../../data/CVRP/Distribution/" paths = ["cvrp100_uniform.pkl", "cvrp100_gaussian.pkl", "cvrp100_cluster.pkl", "cvrp100_diagonal.pkl", "cvrp100_cvrplib.pkl"] elif self.meta_params["data_type"] == "size_distribution": - pass + dir = "../../data/CVRP/Size_Distribution/" + paths = ["cvrp200_uniform.pkl", "cvrp200_gaussian.pkl", "cvrp300_rotation.pkl"] if epoch <= 1 or (epoch % img_save_interval) == 0: for val_path in paths: no_aug_score = self._fast_val(self.meta_model, path=os.path.join(dir, val_path), val_episodes=64, mode="eval") @@ -165,22 +163,31 @@ def run(self): def _train_one_epoch(self, epoch): """ - 1. Sample B training tasks from task distribution P(T) - 2. Inner-loop: for a batch of tasks T_i, POMO training -> \theta_i - 3. Outer-loop: update meta-model -> \theta_0 + Meta-Learning framework: + 1. Sample B training tasks from task distribution P(T) + 2. Inner-loop: for a batch of tasks T_i, POMO training -> \theta_i + 3. Outer-loop: update meta-model -> \theta_0 + + Adaptive task scheduler: + for size: gradually increase the problem size (Curriculum learning); + for distribution: we compute the relative gaps (w.r.t. LKH3) or estimate the potential improvements of each distribution (i.e., bootstrap) every X iters; + for size_distribution: combine together. """ self.meta_optimizer.zero_grad() score_AM = AverageMeter() loss_AM = AverageMeter() batch_size = self.meta_params['meta_batch_size'] - """ - Adaptive task scheduler: - for size: gradually increase the problem size (Curriculum learning); - for distribution: we compute the relative gaps (w.r.t. LKH3) or estimate the potential improvements of each distribution (i.e., bootstrap) every X iters; - """ - start, end = 0, 0 - pass + # Adaptive task scheduler - Not implemented for "size" and "distribution" + if self.meta_params['curriculum']: + if self.meta_params["data_type"] == "size_distribution": + start = self.min_n + int(min(epoch / self.meta_params['sch_epoch'], 1) * (self.max_n - self.min_n)) # linear + # start = self.min_n + int(1 / 2 * (1 - math.cos(math.pi * min(epoch / self.meta_params['sch_epoch'], 1))) * (self.max_n - self.min_n)) # cosine + n = start // 5 * 5 + idx = (n - self.min_n) // 5 + tasks, weights = self.task_set[idx * 11: (idx + 1) * 11], self.task_w[idx] + if epoch % self.meta_params['update_weight'] == 0: + self.task_w[idx] = self._update_task_weight(tasks, weights, epoch) self._alpha_scheduler(epoch) # for reptile fast_weights, val_loss, meta_grad_dict = [], 0, {(i, j): 0 for i, group in enumerate(self.meta_optimizer.param_groups) for j, _ in enumerate(group['params'])} @@ -188,12 +195,14 @@ def _train_one_epoch(self, epoch): for b in range(self.meta_params['B']): # sample a task if self.meta_params["data_type"] == "size": - task_params = random.sample(range(start, end + 1), 1) if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] - # batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 100 else self.meta_params['meta_batch_size'] // 2 + task_params = random.sample(self.task_set, 1)[0] + batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 150 else self.meta_params['meta_batch_size'] // 2 elif self.meta_params["data_type"] == "distribution": - task_params = self.task_set[torch.multinomial(self.task_w, 1).item()] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] + task_params = random.sample(self.task_set, 1)[0] elif self.meta_params["data_type"] == "size_distribution": - pass + task_params = tasks[torch.multinomial(self.task_w[idx], 1).item()] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] + batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 150 else self.meta_params['meta_batch_size'] // 2 + data = self._get_data(batch_size, task_params) # preparation if self.meta_params['meta_method'] in ['fomaml', 'reptile']: @@ -210,29 +219,51 @@ def _train_one_epoch(self, epoch): # inner-loop optimization for step in range(self.meta_params['k']): - data = self._get_data(batch_size, task_params) + # data = self._get_data(batch_size, task_params) env_params = {'problem_size': data[-1].size(1), 'pomo_size': data[-1].size(1)} self.meta_model.train() if self.meta_params['meta_method'] in ['reptile', 'fomaml']: avg_score, avg_loss = self._train_one_batch(task_model, data, Env(**env_params), optimizer) elif self.meta_params['meta_method'] == 'maml': - avg_score, avg_loss, fast_weight = self._train_one_batch_maml(fast_weight, data, Env(**env_params)) + avg_score, avg_loss, fast_weight = self._train_one_batch_maml(fast_weight, data, Env(**env_params), create_graph=True) score_AM.update(avg_score.item(), batch_size) loss_AM.update(avg_loss.item(), batch_size) + # bootstrap + bootstrap_model = None + if self.meta_params['L'] > 0: + assert self.meta_params['meta_method'] in ['maml', 'fomaml'] + bootstrap_model = Model(**self.model_params) + if self.meta_params['meta_method'] == 'maml': + bootstrap_model = OrderedDict({k: v.clone().detach().requires_grad_(True) for k, v in fast_weight.items()}) + else: + bootstrap_model.load_state_dict(copy.deepcopy(task_model.state_dict())) + bootstrap_optimizer = Optimizer(bootstrap_model.parameters(), **self.optimizer_params['optimizer']) + bootstrap_optimizer.load_state_dict(optimizer.state_dict()) + for step in range(self.meta_params['L']): + # data = self._get_data(batch_size, task_params) + if self.meta_params['meta_method'] == 'maml': + avg_score, avg_loss, bootstrap_model = self._train_one_batch_maml(bootstrap_model, data, Env(**env_params), create_graph=False) + else: + avg_score, avg_loss = self._train_one_batch(bootstrap_model, data, Env(**env_params), bootstrap_optimizer) + val_data = self._get_val_data(batch_size, task_params) self.meta_model.train() if self.meta_params['meta_method'] == 'maml': - val_loss = self._fast_val(fast_weight, data=val_data, mode="maml") / self.meta_params['B'] + val_loss, kl_loss = self._fast_val(fast_weight, data=val_data, mode="maml", bootstrap_model=bootstrap_model) + print(val_loss, kl_loss) + loss = (self.meta_params['beta'] * val_loss + (1 - self.meta_params['beta']) * kl_loss) / self.meta_params['B'] self.meta_optimizer.zero_grad() - val_loss.backward() + loss.backward() for i, group in enumerate(self.meta_optimizer.param_groups): for j, p in enumerate(group['params']): meta_grad_dict[(i, j)] += p.grad elif self.meta_params['meta_method'] == 'fomaml': - val_loss = self._fast_val(task_model, data=val_data, mode="fomaml") / self.meta_params['B'] + val_loss, kl_loss = self._fast_val(task_model, data=val_data, mode="fomaml", bootstrap_model=bootstrap_model) + print(val_loss, kl_loss) + loss = (self.meta_params['beta'] * val_loss + (1 - self.meta_params['beta']) * kl_loss) / self.meta_params['B'] optimizer.zero_grad() - val_loss.backward() + loss.backward() for i, group in enumerate(optimizer.param_groups): for j, p in enumerate(group['params']): meta_grad_dict[(i, j)] += p.grad @@ -300,7 +331,7 @@ def _train_one_batch(self, task_model, data, env, optimizer=None): return score_mean, loss_mean - def _train_one_batch_maml(self, fast_weight, data, env, optimizer=None): + def _train_one_batch_maml(self, fast_weight, data, env, optimizer=None, create_graph=True): batch_size = data[-1].size(0) env.load_problems(batch_size, problems=data, aug_factor=1) @@ -325,22 +356,22 @@ def _train_one_batch_maml(self, fast_weight, data, env, optimizer=None): loss_mean = loss.mean() # 1. update model - in SGD way - # gradients = torch.autograd.grad(loss_mean, fast_weight.values(), create_graph=True) # allow_unused=True + # gradients = torch.autograd.grad(loss_mean, fast_weight.values(), create_graph=create_graph) # allow_unused=True # fast_weight = OrderedDict( # (name, param - self.optimizer_params['optimizer']['lr'] * grad) # for ((name, param), grad) in zip(fast_weight.items(), gradients) # ) # 2. update model - in Adam way - gradients = torch.autograd.grad(loss_mean, fast_weight.values(), create_graph=True) # allow_unused=True + gradients = torch.autograd.grad(loss_mean, fast_weight.values(), create_graph=create_graph) # allow_unused=True w_t, (beta1, beta2), eps = [], self.meta_optimizer.param_groups[0]['betas'], self.meta_optimizer.param_groups[0]['eps'] lr, weight_decay = self.optimizer_params['optimizer']['lr'], self.optimizer_params['optimizer']['weight_decay'] for i, ((name, param), grad) in enumerate(zip(fast_weight.items(), gradients)): if self.meta_optimizer.state_dict()['state'] != {}: - i = i if self.model_params['meta_update_encoder'] else i + 58 # i \in [0, 62] + i = i if self.model_params['meta_update_encoder'] else i + 58 # i \in [0, 62], where encoder \in [0, 57] + decoder \in [58, 62] state = self.meta_optimizer.state_dict()['state'][i] step, exp_avg, exp_avg_sq = state['step'], state['exp_avg'], state['exp_avg_sq'] step += 1 - step = step.item() + step = step.item() if isinstance(step, torch.Tensor) else step # compute grad based on Adam source code using in-place operation # update Adam stat (step, exp_avg and exp_avg_sq have already been updated by in-place operation) # may encounter RuntimeError: (a leaf Variable that requires grad) / (the tensor used during grad computation) cannot use in-place operation. @@ -368,7 +399,7 @@ def _train_one_batch_maml(self, fast_weight, data, env, optimizer=None): return score_mean, loss_mean, fast_weight - def _fast_val(self, model, data=None, path=None, offset=0, val_episodes=32, mode="eval", return_all=False): + def _fast_val(self, model, data=None, path=None, offset=0, val_episodes=32, mode="eval", return_all=False, bootstrap_model=None): aug_factor = 1 if data is None: data = load_dataset(path)[offset: offset + val_episodes] # load dataset from file @@ -392,23 +423,42 @@ def _fast_val(self, model, data=None, path=None, offset=0, val_episodes=32, mode state, reward, done = env.step(selected) elif mode in ["maml", "fomaml"]: - fast_weight = model + fast_weight, kl_loss = model, 0 env.load_problems(batch_size, problems=data, aug_factor=aug_factor) reset_state, _, _ = env.reset() if mode == "maml": self.meta_model.pre_forward(reset_state, weights=fast_weight) + if bootstrap_model is not None: + with torch.no_grad(): + self.meta_model.pre_forward(reset_state, weights=bootstrap_model) else: model.pre_forward(reset_state) + if bootstrap_model is not None: + with torch.no_grad(): + bootstrap_model.pre_forward(reset_state) + prob_list = torch.zeros(size=(batch_size, env.pomo_size, 0)) state, reward, done = env.pre_step() while not done: if mode == "maml": - selected, prob = self.meta_model(state, weights=fast_weight) + selected, prob, probs = self.meta_model(state, weights=fast_weight, return_probs=True) + if bootstrap_model is not None: + probs1 = torch.where(probs > 0, probs, torch.tensor(0.00001)) + with torch.no_grad(): + _, _, bs_probs = self.meta_model(state, weights=bootstrap_model, selected=selected, return_probs=True) + bs_probs = torch.where(bs_probs > 0, bs_probs, torch.tensor(0.00001)) else: - selected, prob = model(state) + selected, prob, probs = model(state, return_probs=True) + if bootstrap_model is not None: + probs1 = torch.where(probs > 0, probs, torch.tensor(0.00001)) + with torch.no_grad(): + _, _, bs_probs = bootstrap_model(state, selected=selected, return_probs=True) + bs_probs = torch.where(bs_probs > 0, bs_probs, torch.tensor(0.00001)) + # shape: (batch, pomo) state, reward, done = env.step(selected) prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) + kl_loss += (bs_probs * (bs_probs.log() - probs1.log())).reshape(batch_size * data[-1].size(1), -1).sum(dim=-1).mean() if bootstrap_model is not None else 0 advantage = reward - reward.float().mean(dim=1, keepdims=True) log_prob = prob_list.log().sum(dim=2) # for the first/last node, p=1 -> log_p=0 @@ -431,48 +481,7 @@ def _fast_val(self, model, data=None, path=None, offset=0, val_episodes=32, mode else: return no_aug_score.detach().item() else: - return loss_mean - - def _bootstrap(self, fast_weight, data, mode="eval"): - """ - mode = "maml": Ref to "Bootstrap Meta-Learning", ICLR 2022 (not implemented for CVRP); - mode = "eval": Used to update task weights. - """ - assert mode in ["eval"], "{} not implemented!".format(mode) - bootstrap_weight = fast_weight - batch_size, aug_factor = data[-1].size(0), 1 - bootstrap_reward = torch.full((batch_size, 1), float("-inf")) - optimizer = Optimizer(bootstrap_weight.parameters(), **self.optimizer_params['optimizer']) - # optimizer.load_state_dict(self.meta_optimizer.state_dict()) - with torch.enable_grad(): - for L in range(self.meta_params['bootstrap_steps']): - env = Env(**{'problem_size': data[-1].size(1), 'pomo_size': data[-1].size(1)}) - env.load_problems(batch_size, problems=data, aug_factor=aug_factor) - reset_state, _, _ = env.reset() - bootstrap_weight.pre_forward(reset_state) - prob_list = torch.zeros(size=(aug_factor * batch_size, env.pomo_size, 0)) - state, reward, done = env.pre_step() - while not done: - selected, prob = bootstrap_weight(state) - state, reward, done = env.step(selected) # (aug_factor * batch_size, pomo_size) - prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) - - # (batch, augmentation * pomo) - reward = reward.reshape(aug_factor, batch_size, env.pomo_size).permute(1, 0, 2).reshape(batch_size, -1) - advantage = reward - reward.float().mean(dim=1, keepdims=True) - log_prob = prob_list.log().sum(dim=2).reshape(aug_factor, batch_size, env.pomo_size).permute(1, 0, 2).reshape(batch_size, -1) - loss = -advantage * log_prob - loss_mean = loss.mean() - - optimizer.zero_grad() - loss_mean.backward() - optimizer.step() - - max_pomo_reward, _ = reward.max(dim=1) - max_pomo_reward = max_pomo_reward.view(-1, 1) - bootstrap_reward = torch.where(max_pomo_reward > bootstrap_reward, max_pomo_reward, bootstrap_reward) # (batch_size, 1) - - return bootstrap_reward + return loss_mean, kl_loss def _get_data(self, batch_size, task_params): """ @@ -504,14 +513,14 @@ def _get_data(self, batch_size, task_params): def _get_val_data(self, batch_size, task_params): if self.meta_params["data_type"] == "size": - start1, end1 = min(task_params[0] + 10, self.max_n), min(task_params[0] + 20, self.max_n) - val_size = random.sample(range(start1, end1 + 1), 1)[0] - val_data = self._get_data(batch_size, (val_size,)) - # val_data = self._get_data(batch_size, task_params) # TODO: which is better? + # start1, end1 = min(task_params[0] + 10, self.max_n), min(task_params[0] + 20, self.max_n) + # val_size = random.sample(range(start1, end1 + 1), 1)[0] + # val_data = self._get_data(batch_size, (val_size,)) + val_data = self._get_data(batch_size, task_params) elif self.meta_params["data_type"] == "distribution": val_data = self._get_data(batch_size, task_params) elif self.meta_params["data_type"] == "size_distribution": - pass + val_data = self._get_data(batch_size, task_params) else: raise NotImplementedError @@ -523,5 +532,49 @@ def _alpha_scheduler(self, epoch): """ self.alpha = max(self.alpha * self.meta_params['alpha_decay'], 0.0001) - def _update_task_weight(self, epoch): - pass + def _update_task_weight(self, tasks, weights, epoch): + """ + Update the weights of tasks. + For LKH3, set MAX_TRIALS = 100 to reduce time. + """ + global run_func + start_t, gap = time.time(), torch.zeros(weights.size(0)) + batch_size = 200 if self.meta_params["solver"] == "lkh3_offline" else 50 + idx = torch.randperm(batch_size)[:50] + for i in range(gap.size(0)): + selected = tasks[i] + data = self._get_data(batch_size=batch_size, task_params=selected) + + # only use lkh3 at the first iteration of updating task weights + if self.meta_params["solver"] == "lkh3_offline": + if selected not in self.val_data.keys(): + self.val_data[selected] = data + opts = argparse.ArgumentParser() + opts.cpus, opts.n, opts.progress_bar_mininterval = None, None, 0.1 + dataset = [(instance.cpu().numpy(),) for instance in data] + executable = get_lkh_executable() + def run_func(args): + return solve_lkh_log(executable, *args, runs=1, disable_cache=True, MAX_TRIALS=100) # otherwise it directly loads data from dir + results, _ = run_all_in_pool(run_func, "./LKH3_result", dataset, opts, use_multiprocessing=False) + self.val_opt[selected] = [j[0] for j in results] + data = self.val_data[selected][idx] + + model_score = self._fast_val(self.meta_model, data=data, mode="eval", return_all=True) + model_score = model_score.tolist() + + if self.meta_params["solver"] == "lkh3_offline": + lkh_score = [self.val_opt[selected][j] for j in idx.tolist()] + gap_list = [(model_score[j] - lkh_score[j]) / lkh_score[j] * 100 for j in range(len(lkh_score))] + gap[i] = sum(gap_list) / len(gap_list) + else: + raise NotImplementedError + print(">> Finish updating task weights within {}s".format(round(time.time() - start_t, 2))) + + temp = 1.0 + gap_temp = torch.Tensor([i / temp for i in gap.tolist()]) + print(gap, temp) + print(">> Old task weights: {}".format(weights)) + weights = torch.softmax(gap_temp, dim=0) + print(">> New task weights: {}".format(weights)) + + return weights diff --git a/POMO/CVRP/CVRPTrainer_pomo.py b/POMO/CVRP/CVRPTrainer_pomo.py index 79510a8..b644217 100644 --- a/POMO/CVRP/CVRPTrainer_pomo.py +++ b/POMO/CVRP/CVRPTrainer_pomo.py @@ -51,11 +51,16 @@ def __init__(self, torch.set_default_tensor_type('torch.FloatTensor') # Main Components - self.model_params["norm"] = "instance" # Original "POMO" Paper uses instance/batch normalization + self.model_params["norm"] = "batch" # Original "POMO" Paper uses batch normalization self.model = Model(**self.model_params) self.optimizer = Optimizer(self.model.parameters(), **self.optimizer_params['optimizer']) self.task_set = generate_task_set(self.meta_params) - self.task_w = torch.full((len(self.task_set),), 1 / len(self.task_set)) + self.val_data, self.val_opt = {}, {} # for lkh3_offline + assert not (self.meta_params['curriculum'] and self.meta_params["data_type"] in ["size", "distribution"]), "Not Implemented!" + if self.meta_params["data_type"] == "size_distribution": + # hardcoded - task_set: range(self.min_n, self.max_n, self.task_interval) * self.num_dist + self.min_n, self.max_n, self.task_interval, self.num_dist = 50, 200, 5, 11 + self.task_w = torch.full(((self.max_n - self.min_n) // 5 + 1, self.num_dist), 1 / self.num_dist) # Restore self.start_epoch = 1 @@ -100,7 +105,8 @@ def run(self): dir = "../../data/CVRP/Distribution/" paths = ["cvrp100_uniform.pkl", "cvrp100_gaussian.pkl", "cvrp100_cluster.pkl", "cvrp100_diagonal.pkl", "cvrp100_cvrplib.pkl"] elif self.meta_params["data_type"] == "size_distribution": - pass + dir = "../../data/CVRP/Size_Distribution/" + paths = ["cvrp200_uniform.pkl", "cvrp200_gaussian.pkl", "cvrp300_rotation.pkl"] if epoch <= 1 or (epoch % img_save_interval) == 0: for val_path in paths: no_aug_score = self._fast_val(self.model, path=os.path.join(dir, val_path), val_episodes=64) @@ -145,20 +151,28 @@ def _train_one_epoch(self, epoch): loss_AM = AverageMeter() batch_size = self.meta_params['meta_batch_size'] - # Adaptive task scheduler - start, end = 0, 0 - pass + # Adaptive task scheduler - Not implemented for "size" and "distribution" + if self.meta_params['curriculum']: + if self.meta_params["data_type"] == "size_distribution": + start = self.min_n + int(min(epoch / self.meta_params['sch_epoch'], 1) * (self.max_n - self.min_n)) # linear + # start = self.min_n + int(1 / 2 * (1 - math.cos(math.pi * min(epoch / self.meta_params['sch_epoch'], 1))) * (self.max_n - self.min_n)) # cosine + n = start // 5 * 5 + idx = (n - self.min_n) // 5 + tasks, weights = self.task_set[idx * 11: (idx + 1) * 11], self.task_w[idx] + if epoch % self.meta_params['update_weight'] == 0: + self.task_w[idx] = self._update_task_weight(tasks, weights, epoch) # sample a batch of tasks for b in range(self.meta_params['B']): for step in range(self.meta_params['k']): if self.meta_params["data_type"] == "size": - task_params = random.sample(range(start, end + 1), 1) if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] - # batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 100 else self.meta_params['meta_batch_size'] // 2 + task_params = random.sample(self.task_set, 1)[0] + batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 150 else self.meta_params['meta_batch_size'] // 2 elif self.meta_params["data_type"] == "distribution": - task_params = self.task_set[torch.multinomial(self.task_w, 1).item()] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] + task_params = random.sample(self.task_set, 1)[0] elif self.meta_params["data_type"] == "size_distribution": - pass + task_params = tasks[torch.multinomial(self.task_w[idx], 1).item()] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] + batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 150 else self.meta_params['meta_batch_size'] // 2 data = self._get_data(batch_size, task_params) env_params = {'problem_size': data[-1].size(1), 'pomo_size': data[-1].size(1)} @@ -272,3 +286,50 @@ def _get_data(self, batch_size, task_params): data = (depot_xy, node_xy, node_demand) return data + + def _update_task_weight(self, tasks, weights, epoch): + """ + Update the weights of tasks. + For LKH3, set MAX_TRIALS = 100 to reduce time. + """ + global run_func + start_t, gap = time.time(), torch.zeros(weights.size(0)) + batch_size = 200 if self.meta_params["solver"] == "lkh3_offline" else 50 + idx = torch.randperm(batch_size)[:50] + for i in range(gap.size(0)): + selected = tasks[i] + data = self._get_data(batch_size=batch_size, task_params=selected) + + # only use lkh3 at the first iteration of updating task weights + if self.meta_params["solver"] == "lkh3_offline": + if selected not in self.val_data.keys(): + self.val_data[selected] = data + opts = argparse.ArgumentParser() + opts.cpus, opts.n, opts.progress_bar_mininterval = None, None, 0.1 + dataset = [(instance.cpu().numpy(),) for instance in data] + executable = get_lkh_executable() + def run_func(args): + return solve_lkh_log(executable, *args, runs=1, disable_cache=True, MAX_TRIALS=100) # otherwise it directly loads data from dir + results, _ = run_all_in_pool(run_func, "./LKH3_result", dataset, opts, use_multiprocessing=False) + self.val_opt[selected] = [j[0] for j in results] + data = self.val_data[selected][idx] + + model_score = self._fast_val(self.meta_model, data=data, mode="eval", return_all=True) + model_score = model_score.tolist() + + if self.meta_params["solver"] == "lkh3_offline": + lkh_score = [self.val_opt[selected][j] for j in idx.tolist()] + gap_list = [(model_score[j] - lkh_score[j]) / lkh_score[j] * 100 for j in range(len(lkh_score))] + gap[i] = sum(gap_list) / len(gap_list) + else: + raise NotImplementedError + print(">> Finish updating task weights within {}s".format(round(time.time() - start_t, 2))) + + temp = 0.25 + gap_temp = torch.Tensor([i / temp for i in gap.tolist()]) + print(gap, temp) + print(">> Old task weights: {}".format(weights)) + weights = torch.softmax(gap_temp, dim=0) + print(">> New task weights: {}".format(weights)) + + return weights diff --git a/POMO/CVRP/CVRP_baseline.py b/POMO/CVRP/CVRP_baseline.py index 06c5650..354b64d 100644 --- a/POMO/CVRP/CVRP_baseline.py +++ b/POMO/CVRP/CVRP_baseline.py @@ -7,7 +7,6 @@ import tempfile import time from datetime import timedelta -import hygese as hgs os.chdir(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, "..") # for utils from utils.functions import check_extension, load_dataset, save_dataset, run_all_in_pool, move_to @@ -37,6 +36,31 @@ def get_lkh_executable(url="http://www.akira.ruc.dk/~keld/research/LKH-3/LKH-3.0 return os.path.abspath(executable) +def get_hgs_executable(url="https://github.com/vidalt/HGS-CVRP.git"): + + cwd = os.path.abspath('hgs') + os.makedirs(cwd, exist_ok=True) + + file = os.path.join(cwd, os.path.split(urlparse(url).path)[-1]) + filedir = os.path.splitext(file)[0] + cwd_build = os.path.join(filedir, "build") + + if not os.path.isdir(filedir): + print("{} not found, downloading and compiling".format(filedir)) + + check_call(f"git clone {url}", cwd=cwd, shell=True) + + assert os.path.isdir(filedir), "Extracting failed, dir {} does not exist".format(filedir) + + os.makedirs(cwd_build, exist_ok=True) + check_call("cmake .. -DCMAKE_BUILD_TYPE=Release", cwd=cwd_build, shell=True) + check_call("make bin", cwd=cwd_build, shell=True) + + executable = os.path.join(cwd_build, "hgs") + assert os.path.isfile(executable), f'Cannot find HGS executable file at {executable}' + return os.path.abspath(executable) + + def solve_lkh(executable, depot, loc, demand, capacity): with tempfile.TemporaryDirectory() as tempdir: @@ -45,16 +69,16 @@ def solve_lkh(executable, depot, loc, demand, capacity): param_filename = os.path.join(tempdir, "params.par") starttime = time.time() - write_vrplib(problem_filename, depot, loc, demand, capacity) + write_vrplib(problem_filename, depot, loc, demand, capacity, method="lkh") params = {"PROBLEM_FILE": problem_filename, "OUTPUT_TOUR_FILE": output_filename} write_lkh_par(param_filename, params) output = check_output([executable, param_filename]) - result = read_vrplib(output_filename, n=len(demand)) + result = read_lkh_vrplib(output_filename, n=len(demand)) duration = time.time() - starttime return result, output, duration -def solve_lkh_log(executable, directory, name, depot, loc, demand, capacity, grid_size=1, runs=1, disable_cache=False): +def solve_lkh_log(executable, directory, name, depot, loc, demand, capacity, grid_size=1, runs=1, disable_cache=False, MAX_TRIALS=10000): problem_filename = os.path.join(directory, "{}.lkh{}.vrp".format(name, runs)) tour_filename = os.path.join(directory, "{}.lkh{}.tour".format(name, runs)) @@ -67,9 +91,9 @@ def solve_lkh_log(executable, directory, name, depot, loc, demand, capacity, gri if os.path.isfile(output_filename) and not disable_cache: tour, duration = load_dataset(output_filename) else: - write_vrplib(problem_filename, depot, loc, demand, capacity, grid_size, name=name) + write_vrplib(problem_filename, depot, loc, demand, capacity, grid_size, name=name, method="lkh") - params = {"PROBLEM_FILE": problem_filename, "OUTPUT_TOUR_FILE": tour_filename, "RUNS": runs, "SEED": 1234} + params = {"PROBLEM_FILE": problem_filename, "OUTPUT_TOUR_FILE": tour_filename, "RUNS": runs, "SEED": 1234, "MAX_TRIALS": MAX_TRIALS} write_lkh_par(param_filename, params) with open(log_filename, 'w') as f: @@ -77,7 +101,41 @@ def solve_lkh_log(executable, directory, name, depot, loc, demand, capacity, gri check_call([executable, param_filename], stdout=f, stderr=f) duration = time.time() - start - tour = read_vrplib(tour_filename, n=len(demand)) + tour = read_lkh_vrplib(tour_filename, n=len(demand)) + + save_dataset((tour, duration), output_filename) + + return calc_vrp_cost(depot, loc, tour), tour, duration + + except Exception as e: + raise + print("Exception occured") + print(e) + return None + + +def solve_hgs_log(executable, directory, name, depot, loc, demand, capacity, grid_size=1, runs=1, disable_cache=False): + + problem_filename = os.path.join(directory, "{}.hgs{}.vrp".format(name, runs)) + tour_filename = os.path.join(directory, "{}.hgs{}.tour".format(name, runs)) + output_filename = os.path.join(directory, "{}.hgs{}.pkl".format(name, runs)) + # param_filename = os.path.join(directory, "{}.hgs{}.par".format(name, runs)) + log_filename = os.path.join(directory, "{}.hgs{}.log".format(name, runs)) + + try: + # May have already been run + if os.path.isfile(output_filename) and not disable_cache: + tour, duration = load_dataset(output_filename) + else: + write_vrplib(problem_filename, depot, loc, demand, capacity, grid_size, name=name, method="hgs") + + with open(log_filename, 'w') as f: + start = time.time() + # we call hgs with its default setting (i.e., -it=20000) + check_call("{} {} {} -it {} -seed {} -round {}".format(executable, problem_filename, tour_filename, 20000, 1234, 0), shell=True, stdout=f, stderr=f) + duration = time.time() - start + + tour = read_hgs_vrplib(tour_filename, n=len(demand)) save_dataset((tour, duration), output_filename) @@ -114,7 +172,7 @@ def write_lkh_par(filename, parameters): f.write("{} = {}\n".format(k, v)) -def read_vrplib(filename, n): +def read_lkh_vrplib(filename, n): with open(filename, 'r') as f: tour = [] dimension = 0 @@ -139,13 +197,32 @@ def read_vrplib(filename, n): return tour[1:].tolist() -def write_vrplib(filename, depot, loc, demand, capacity, grid_size, name="problem"): +def read_hgs_vrplib(filename, n): + tour = [] + num_routes = 0 + with open(filename, 'r') as f: + for line in f: + if line.startswith("Route"): + l = line.strip().split(":") + tour.append(0) + tour.extend(map(int, l[1].split())) + num_routes += 1 + + tour = np.array(tour).astype(int) # depot is 0 and other nodes start with 1 in HGS format + assert len(tour) - num_routes == np.max(tour) == n + + return tour[1:].tolist() + + +def write_vrplib(filename, depot, loc, demand, capacity, grid_size, name="problem", method="lkh"): + # default scale value is 100000 from "https://github.com/wouterkool/attention-learn-to-route" with open(filename, 'w') as f: f.write("\n".join([ "{} : {}".format(k, v) for k, v in ( ("NAME", name), + ("COMMENT", "CVRP Instance"), ("TYPE", "CVRP"), ("DIMENSION", len(loc) + 1), ("EDGE_WEIGHT_TYPE", "EUC_2D"), @@ -155,8 +232,8 @@ def write_vrplib(filename, depot, loc, demand, capacity, grid_size, name="proble f.write("\n") f.write("NODE_COORD_SECTION\n") f.write("\n".join([ + "{}\t{}\t{}".format(i + 1, x, y) if method == "hgs" else # we use -round=0 for hgs "{}\t{}\t{}".format(i + 1, int(x / grid_size * 100000 + 0.5), int(y / grid_size * 100000 + 0.5)) # VRPlib does not take floats - #"{}\t{}\t{}".format(i + 1, x, y) for i, (x, y) in enumerate([depot] + loc) ])) f.write("\n") @@ -175,12 +252,12 @@ def write_vrplib(filename, depot, loc, demand, capacity, grid_size, name="proble if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--method", type=str, default='lkh', choices=["lkh", ]) - parser.add_argument("--datasets", nargs='+', default=["../../data/CVRP/Size/cvrp100_uniform.pkl", ], help="Filename of the dataset(s) to evaluate") + parser.add_argument("--method", type=str, default='hgs', choices=["hgs", "lkh"]) + parser.add_argument("--datasets", nargs='+', default=["../../data/CVRP/Size_Distribution/cvrp100_uniform.pkl", ], help="Filename of the dataset(s) to evaluate") parser.add_argument("-f", action='store_false', help="Set true to overwrite") parser.add_argument("-o", default=None, help="Name of the results file to write") parser.add_argument("--cpus", type=int, help="Number of CPUs to use, defaults to all cores") - parser.add_argument('--disable_cache', action='store_false', help='Disable caching') + parser.add_argument('--disable_cache', action='store_true', help='Disable caching') parser.add_argument('--progress_bar_mininterval', type=float, default=0.1, help='Minimum interval') parser.add_argument('-n', type=int, default=10000, help="Number of instances to process") parser.add_argument('--offset', type=int, default=0, help="Offset where to start processing") @@ -218,24 +295,10 @@ def write_vrplib(filename, depot, loc, demand, capacity, grid_size, name="proble runs = 1 if match[2] == '' else int(match[2]) start_t = time.time() - if method == "lkh": - executable = get_lkh_executable() - - target_dir = os.path.join(results_dir, "{}-{}".format( - dataset_basename, - opts.method - )) - assert opts.f or not os.path.isdir(target_dir), \ - "Target dir already exists! Try running with -f option to overwrite." - - if not os.path.isdir(target_dir): - os.makedirs(target_dir) - - # CVRP contains tuple rather than single loc array - dataset = load_dataset(dataset_path) + if method == "lkh": use_multiprocessing = False - + executable = get_lkh_executable() def run_func(args): directory, name, *args = args depot, loc, demand, capacity, *args = args @@ -250,15 +313,45 @@ def run_func(args): depot, loc, demand, capacity, grid_size, runs=runs, disable_cache=opts.disable_cache ) + elif method == "hgs": + use_multiprocessing = False + executable = get_hgs_executable() + def run_func(args): + directory, name, *args = args + depot, loc, demand, capacity, *args = args + depot = depot[0] if len(depot) == 1 else depot # if depot: [[x, y]] -> [x, y] + grid_size = 1 + if len(args) > 0: + depot_types, customer_types, grid_size = args - # Note: only processing n items is handled by run_all_in_pool - results, parallelism = run_all_in_pool( - run_func, - target_dir, dataset, opts, use_multiprocessing=use_multiprocessing - ) + return solve_hgs_log( + executable, + directory, name, + depot, loc, demand, capacity, grid_size, + runs=runs, disable_cache=opts.disable_cache + ) else: assert False, "Unknown method: {}".format(opts.method) + target_dir = os.path.join(results_dir, "{}-{}".format( + dataset_basename, + opts.method + )) + assert opts.f or not os.path.isdir(target_dir), \ + "Target dir already exists! Try running with -f option to overwrite." + + if not os.path.isdir(target_dir): + os.makedirs(target_dir) + + # CVRP contains tuple rather than single loc array + dataset = load_dataset(dataset_path) + + # Note: only processing n items is handled by run_all_in_pool + results, parallelism = run_all_in_pool( + run_func, + target_dir, dataset, opts, use_multiprocessing=use_multiprocessing + ) + costs, tours, durations = zip(*results) # Not really costs since they should be negative print(">> Solving {} instances within {:.2f}s using {}".format(opts.n, time.time()-start_t, opts.method)) print("Average cost: {} +- {}".format(np.mean(costs), 2 * np.std(costs) / np.sqrt(len(costs)))) diff --git a/POMO/CVRP/test.py b/POMO/CVRP/test.py index 4d9097d..da9b716 100644 --- a/POMO/CVRP/test.py +++ b/POMO/CVRP/test.py @@ -46,8 +46,8 @@ 'test_robustness': False, 'aug_factor': 8, 'aug_batch_size': 100, - 'test_set_path': '../../data/CVRP/Size/cvrp100_uniform.pkl', - 'test_set_opt_sol_path': '../../data/CVRP/Size/opt_cvrp100_uniform.pkl' + 'test_set_path': '../../data/CVRP/Size_Distribution/cvrp100_uniform.pkl', + 'test_set_opt_sol_path': '../../data/CVRP/Size_Distribution/hgs/cvrp100_uniformoffset0n10000-hgs.pkl' } fine_tune_params = { diff --git a/POMO/CVRP/train.py b/POMO/CVRP/train.py index 27d1316..2877dd0 100644 --- a/POMO/CVRP/train.py +++ b/POMO/CVRP/train.py @@ -46,7 +46,7 @@ # 'batch_size': 64, 'logging': { 'model_save_interval': 25000, - 'img_save_interval': 10, + 'img_save_interval': 100, 'log_image_params_1': { 'json_foldername': 'log_image_style', 'filename': 'general.json' @@ -66,19 +66,20 @@ meta_params = { 'enable': True, # whether use meta-learning or not - 'curriculum': False, # adaptive sample task + 'curriculum': True, # adaptive sample task 'meta_method': 'maml', # choose from ['maml', 'fomaml', 'reptile'] - 'bootstrap_steps': 25, - 'data_type': 'size', # choose from ["size", "distribution", "size_distribution"] + 'data_type': 'size_distribution', # choose from ["size", "distribution", "size_distribution"] 'epochs': 250000, # the number of meta-model updates: (250*100000) / (1*5*64) 'B': 1, # the number of tasks in a mini-batch 'k': 1, # gradient decent steps in the inner-loop optimization of meta-learning method + 'L': 0, # bootstrap steps 'meta_batch_size': 64, # will be divided by 2 if problem_size >= 100 - 'update_weight': 1000, # update weight of each task per X iters - 'sch_epoch': 250000, # for the task scheduler of size setting + 'update_weight': 100, # update weight of each task per X iters + 'sch_epoch': 225000, # for the task scheduler of size setting, where sch_epoch = 0.9 * epochs 'solver': 'lkh3_offline', # solver used to update the task weights, choose from ["bootstrap", "lkh3_online", "lkh3_offline", "best_model"] 'alpha': 0.99, # params for the outer-loop optimization of reptile 'alpha_decay': 0.999, # params for the outer-loop optimization of reptile + 'beta': 0.9, # loss weight } logger_params = { @@ -140,7 +141,7 @@ def occumpy_mem(cuda_device): total, used = check_mem(cuda_device) total = int(total) used = int(used) - block_mem = int((total-used) * 0.85) + block_mem = int((total-used) * 0.5) x = torch.cuda.FloatTensor(256, 1024, block_mem) del x diff --git a/POMO/ProblemDef.py b/POMO/ProblemDef.py index 9ab21e8..9fe4734 100644 --- a/POMO/ProblemDef.py +++ b/POMO/ProblemDef.py @@ -12,15 +12,15 @@ def generate_task_set(meta_params): Current setting: size: (n,) \in [20, 150] distribution: (m, c) \in {(0, 0) + [1-9] * [1, 10, 20, 30, 40, 50]} - TODO: size_distribution: (n, m, c) \in [20, 150, 5] * {(0, 0) + [1, 3, 5, 7] * [1, 10, 30, 50]} + size_distribution: (n, m, c) \in [50, 200, 5] * {(0, 0) + (1, 1) + [3, 5, 7] * [10, 30, 50]} """ if meta_params['data_type'] == "distribution": # focus on TSP100 with gaussian mixture distributions task_set = [(0, 0)] + [(m, c) for m in range(1, 10) for c in [1, 10, 20, 30, 40, 50]] elif meta_params['data_type'] == "size": # focus on uniform distribution with different sizes task_set = [(n,) for n in range(20, 151)] elif meta_params['data_type'] == "size_distribution": - dist_set = [(0, 0)] + [(m, c) for m in [1, 3, 5, 7] for c in [1, 10, 30, 50]] - task_set = [(n, m, c) for n in range(20, 151, 5) for (m, c) in dist_set] + dist_set = [(0, 0), (1, 1)] + [(m, c) for m in [3, 5, 7] for c in [10, 30, 50]] + task_set = [(n, m, c) for n in range(50, 201, 5) for (m, c) in dist_set] else: raise NotImplementedError print(">> Generating training task set: {} tasks with type {}".format(len(task_set), meta_params['data_type'])) @@ -249,23 +249,26 @@ def generate_tsp_dist(n_samples, n_nodes, distribution): val seed: 2022 test seed: 2023 """ - path = "../data/TSP/Size" + path = "../data/CVRP/Size_Distribution" if not os.path.exists(path): os.makedirs(path) seed_everything(seed=2023) - # var-dist test data - # for dist in ["uniform", "uniform_rectangle", "gaussian", "diagonal", "tsplib", "cluster"]: - # print(">> Generating TSP instances following {} distribution!".format(dist)) - # get_random_problems(15000, 100, distribution=dist, path=path, problem="tsp") + # test data for Table 1 + # for s in [100, 150, 200]: + # for dist in ["uniform", "gaussian"]: + # print(">> Generating TSP instances following {} distribution!".format(dist)) + # get_random_problems(15000, s, distribution=dist, path=path, problem="cvrp") # var-size test data # for s in [50, 100, 150, 200, 300, 500, 1000]: # print(">> Generating TSP instances of size {}!".format(s)) # get_random_problems(15000, s, distribution="uniform", path=path, problem="tsp") - # data = generate_gaussian_mixture_tsp(dataset_size=64, graph_size=100, num_modes=9, cdist=1) - # data = load_dataset("../../data/TSP/tsp100_cluster.pkl") + # data = generate_gaussian_mixture_tsp(dataset_size=1, graph_size=150, num_modes=3, cdist=10) + data = load_dataset("../data/CVRP/Size_Distribution/cvrp100_uniform.pkl") + print(data[0]) # print(type(data), data.size(), data) + # x, y = [i[0] for i in data[1]], [i[-1] for i in data[1]] # x, y = data[0, :, 0].tolist(), data[0, :, -1].tolist() - # show([x], [y], label=["Gaussian Mixture"], title="TSP100", xdes="x", ydes="y", path="./tsp.pdf") + # show([x], [y], label=["Gaussian Mixture"], title="TSP300", xdes="x", ydes="y", path="./tsp.pdf") diff --git a/POMO/TSP/TSPModel.py b/POMO/TSP/TSPModel.py index 5185d5e..5729770 100644 --- a/POMO/TSP/TSPModel.py +++ b/POMO/TSP/TSPModel.py @@ -23,38 +23,40 @@ def pre_forward(self, reset_state, weights=None): # shape: (batch, problem, EMBEDDING_DIM) self.decoder.set_kv(self.encoded_nodes, weights=weights) - def forward(self, state, weights=None): + def forward(self, state, weights=None, selected=None, return_probs=False): batch_size = state.BATCH_IDX.size(0) pomo_size = state.BATCH_IDX.size(1) if state.current_node is None: selected = torch.arange(pomo_size)[None, :].expand(batch_size, pomo_size) # for torch.gather(dim=1) prob = torch.ones(size=(batch_size, pomo_size)) - + probs = torch.ones(size=(batch_size, pomo_size, self.encoded_nodes.size(1))) encoded_first_node = _get_encoding(self.encoded_nodes, selected) # shape: (batch, pomo, embedding) self.decoder.set_q1(encoded_first_node, weights=weights) # pre-compute fixed part of the context embedding - else: encoded_last_node = _get_encoding(self.encoded_nodes, state.current_node) # shape: (batch, pomo, embedding) probs = self.decoder(encoded_last_node, ninf_mask=state.ninf_mask, weights=weights) # shape: (batch, pomo, problem) - - while True: - if self.training or self.model_params['eval_type'] == 'softmax': - selected = probs.reshape(batch_size * pomo_size, -1).multinomial(1).squeeze(dim=1).reshape(batch_size, pomo_size) - # shape: (batch, pomo) - else: - selected = probs.argmax(dim=2) + if selected is None: + while True: + if self.training or self.model_params['eval_type'] == 'softmax': + selected = probs.reshape(batch_size * pomo_size, -1).multinomial(1).squeeze(dim=1).reshape(batch_size, pomo_size) + # shape: (batch, pomo) + else: + selected = probs.argmax(dim=2) + # shape: (batch, pomo) + prob = probs[state.BATCH_IDX, state.POMO_IDX, selected].reshape(batch_size, pomo_size) # shape: (batch, pomo) - + if (prob != 0).all(): + break + else: + selected = selected prob = probs[state.BATCH_IDX, state.POMO_IDX, selected].reshape(batch_size, pomo_size) - # shape: (batch, pomo) - - if (prob != 0).all(): - break + if return_probs: + return selected, prob, probs return selected, prob diff --git a/POMO/TSP/TSPTester.py b/POMO/TSP/TSPTester.py index c4ba3b0..b1e1218 100644 --- a/POMO/TSP/TSPTester.py +++ b/POMO/TSP/TSPTester.py @@ -112,8 +112,8 @@ def _test(self, store_res=True): score_AM.update(score, batch_size) aug_score_AM.update(aug_score, batch_size) episode += batch_size - gap = [max(all_score[i].item() - opt_sol[i], 0) / opt_sol[i] * 100 for i in range(batch_size)] - aug_gap = [max(all_aug_score[i].item() - opt_sol[i], 0) / opt_sol[i] * 100 for i in range(batch_size)] + gap = [(all_score[i].item() - opt_sol[i]) / opt_sol[i] * 100 for i in range(batch_size)] + aug_gap = [(all_aug_score[i].item() - opt_sol[i]) / opt_sol[i] * 100 for i in range(batch_size)] gap_AM.update(sum(gap)/batch_size, batch_size) aug_gap_AM.update(sum(aug_gap)/batch_size, batch_size) diff --git a/POMO/TSP/TSPTrainer_Meta.py b/POMO/TSP/TSPTrainer_Meta.py index caee82c..51ecc96 100644 --- a/POMO/TSP/TSPTrainer_Meta.py +++ b/POMO/TSP/TSPTrainer_Meta.py @@ -55,20 +55,22 @@ def __init__(self, torch.set_default_tensor_type('torch.FloatTensor') # Main Components - self.model_params["norm"] = None # Original "POMO" Paper uses instance/batch normalization + self.model_params["norm"] = "instance" # instance norm seems to be better than batch norm when dealing with OOD, no norm will destabilize the training self.meta_model = Model(**self.model_params) self.meta_optimizer = Optimizer(self.meta_model.parameters(), **self.optimizer_params['optimizer']) self.alpha = self.meta_params['alpha'] # for reptile self.task_set = generate_task_set(self.meta_params) self.val_data, self.val_opt = {}, {} # for lkh3_offline if self.meta_params["data_type"] == "size": - self.min_n, self.max_n, self.task_interval = self.task_set[0][0], self.task_set[-1][0], 5 # [20, 150] - self.task_w = {start: 1/(len(self.task_set)//5) for start in range(self.min_n, self.max_n, self.task_interval)} + self.min_n, self.max_n, self.task_interval = self.task_set[0][0], self.task_set[-1][0], 5 + # self.task_w = {start: 1/(len(self.task_set)//5) for start in range(self.min_n, self.max_n, self.task_interval)} # self.task_w = torch.full((len(self.task_set)//self.task_interval,), 1/(len(self.task_set)//self.task_interval)) elif self.meta_params["data_type"] == "distribution": self.task_w = torch.full((len(self.task_set),), 1 / len(self.task_set)) - else: - raise NotImplementedError + elif self.meta_params["data_type"] == "size_distribution": + # hardcoded - task_set: range(self.min_n, self.max_n, self.task_interval) * self.num_dist + self.min_n, self.max_n, self.task_interval, self.num_dist = 50, 200, 5, 11 + self.task_w = torch.full(((self.max_n - self.min_n) // 5 + 1, self.num_dist), 1 / self.num_dist) # Restore self.start_epoch = 1 @@ -114,7 +116,8 @@ def run(self): dir = "../../data/TSP/Distribution/" paths = ["tsp100_uniform.pkl", "tsp100_gaussian.pkl", "tsp100_cluster.pkl", "tsp100_diagonal.pkl", "tsp100_tsplib.pkl"] elif self.meta_params["data_type"] == "size_distribution": - pass + dir = "../../data/TSP/Size_Distribution/" + paths = ["tsp200_uniform.pkl", "tsp200_gaussian.pkl", "tsp300_rotation.pkl"] if epoch <= 1 or (epoch % img_save_interval) == 0: for val_path in paths: no_aug_score = self._fast_val(self.meta_model, path=os.path.join(dir, val_path), val_episodes=64, mode="eval") @@ -172,30 +175,39 @@ def run(self): def _train_one_epoch(self, epoch): """ - 1. Sample B training tasks from task distribution P(T) - 2. Inner-loop: for a batch of tasks T_i, POMO training -> \theta_i - 3. Outer-loop: update meta-model -> \theta_0 + Meta-Learning framework: + 1. Sample B training tasks from task distribution P(T) + 2. Inner-loop: for a batch of tasks T_i, POMO training -> \theta_i + 3. Outer-loop: update meta-model -> \theta_0 + + Adaptive task scheduler: + for size: gradually increase the problem size (Curriculum learning); + for distribution: we compute the relative gaps (w.r.t. LKH3) or estimate the potential improvements of each distribution (i.e., bootstrap) every X iters; + for size_distribution: combine together. """ self.meta_optimizer.zero_grad() score_AM = AverageMeter() loss_AM = AverageMeter() batch_size = self.meta_params['meta_batch_size'] - """ - Adaptive task scheduler: - for size: gradually increase the problem size (Curriculum learning); - for distribution: we compute the relative gaps (w.r.t. LKH3) or estimate the potential improvements of each distribution (i.e., bootstrap) every X iters; - """ - if self.meta_params["data_type"] == "size": - # start = self.min_n + int(epoch/self.meta_params['sch_epoch'] * (self.max_n - self.min_n)) # linear - start = self.min_n + int(1/2 * (1-math.cos(math.pi * min(epoch/self.meta_params['sch_epoch'], 1))) * (self.max_n - self.min_n)) # cosine - end = min(start + 10, self.max_n) # 10 is the size of the sliding window - if self.meta_params["curriculum"]: print(">> training task {}".format((start, end))) - elif self.meta_params["data_type"] == "distribution": - if epoch != 0 and epoch % self.meta_params['update_weight'] == 0: - self._update_task_weight(epoch) - elif self.meta_params["data_type"] == "size_distribution": - pass + # Adaptive task scheduler: + if self.meta_params['curriculum']: + if self.meta_params["data_type"] == "size": + # start = self.min_n + int(min(epoch / self.meta_params['sch_epoch'], 1) * (self.max_n - self.min_n)) # linear + start = self.min_n + int(1/2 * (1-math.cos(math.pi * min(epoch/self.meta_params['sch_epoch'], 1))) * (self.max_n - self.min_n)) # cosine + end = min(start + 10, self.max_n) # 10 is the size of the sliding window + if self.meta_params["curriculum"]: print(">> training task {}".format((start, end))) + elif self.meta_params["data_type"] == "distribution": + if epoch % self.meta_params['update_weight'] == 0: + self.task_w = self._update_task_weight(self.task_set, self.task_w, epoch) + elif self.meta_params["data_type"] == "size_distribution": + start = self.min_n + int(min(epoch / self.meta_params['sch_epoch'], 1) * (self.max_n - self.min_n)) # linear + # start = self.min_n + int(1 / 2 * (1 - math.cos(math.pi * min(epoch / self.meta_params['sch_epoch'], 1))) * (self.max_n - self.min_n)) # cosine + n = start // 5 * 5 + idx = (n - self.min_n) // 5 + tasks, weights = self.task_set[idx*11: (idx+1)*11], self.task_w[idx] + if epoch % self.meta_params['update_weight'] == 0: + self.task_w[idx] = self._update_task_weight(tasks, weights, epoch) self._alpha_scheduler(epoch) # for reptile fast_weights, val_loss, meta_grad_dict = [], 0, {(i, j): 0 for i, group in enumerate(self.meta_optimizer.param_groups) for j, _ in enumerate(group['params'])} @@ -204,7 +216,7 @@ def _train_one_epoch(self, epoch): # sample a task if self.meta_params["data_type"] == "size": task_params = random.sample(range(start, end+1), 1) if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] - # batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 100 else self.meta_params['meta_batch_size'] // 2 + batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 150 else self.meta_params['meta_batch_size'] // 2 elif self.meta_params["data_type"] == "distribution": # sample based on task weights task_params = self.task_set[torch.multinomial(self.task_w, 1).item()] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] @@ -213,7 +225,9 @@ def _train_one_epoch(self, epoch): # selected_idx = torch.sort(self.task_w, descending=False)[1].tolist()[start: end] # task_params = self.task_set[random.sample(selected_idx, 1)[0]] if self.meta_params['curriculum'] and epoch >= self.meta_params['update_weight'] else random.sample(self.task_set, 1)[0] elif self.meta_params["data_type"] == "size_distribution": - pass + task_params = tasks[torch.multinomial(self.task_w[idx], 1).item()] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] + batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 150 else self.meta_params['meta_batch_size'] // 2 + data = self._get_data(batch_size, task_params) # preparation if self.meta_params['meta_method'] in ['fomaml', 'reptile']: @@ -227,36 +241,57 @@ def _train_one_epoch(self, epoch): fast_weight = OrderedDict(self.meta_model.decoder.named_parameters()) for k in list(fast_weight.keys()): fast_weight["decoder."+k] = fast_weight.pop(k) - # optimizer = Optimizer(fast_weight.values(), **self.optimizer_params['optimizer']) # inner-loop optimization for step in range(self.meta_params['k']): - data = self._get_data(batch_size, task_params) + # data = self._get_data(batch_size, task_params) env_params = {'problem_size': data.size(1), 'pomo_size': data.size(1)} self.meta_model.train() if self.meta_params['meta_method'] in ['reptile', 'fomaml']: avg_score, avg_loss = self._train_one_batch(task_model, data, Env(**env_params), optimizer) elif self.meta_params['meta_method'] == 'maml': - avg_score, avg_loss, fast_weight = self._train_one_batch_maml(fast_weight, data, Env(**env_params)) + avg_score, avg_loss, fast_weight = self._train_one_batch_maml(fast_weight, data, Env(**env_params), create_graph=True) score_AM.update(avg_score.item(), batch_size) loss_AM.update(avg_loss.item(), batch_size) + # bootstrap + bootstrap_model = None + if self.meta_params['L'] > 0: + assert self.meta_params['meta_method'] in ['maml', 'fomaml'] + bootstrap_model = Model(**self.model_params) + if self.meta_params['meta_method'] == 'maml': + bootstrap_model = OrderedDict({k: v.clone().detach().requires_grad_(True) for k, v in fast_weight.items()}) + else: + bootstrap_model.load_state_dict(copy.deepcopy(task_model.state_dict())) + bootstrap_optimizer = Optimizer(bootstrap_model.parameters(), **self.optimizer_params['optimizer']) + bootstrap_optimizer.load_state_dict(optimizer.state_dict()) + for step in range(self.meta_params['L']): + # data = self._get_data(batch_size, task_params) + if self.meta_params['meta_method'] == 'maml': + avg_score, avg_loss, bootstrap_model = self._train_one_batch_maml(bootstrap_model, data, Env(**env_params), create_graph=False) + else: + avg_score, avg_loss = self._train_one_batch(bootstrap_model, data, Env(**env_params), bootstrap_optimizer) + val_data = self._get_val_data(batch_size, task_params) self.meta_model.train() if self.meta_params['meta_method'] == 'maml': # Old version # val_loss += self._fast_val(fast_weight, data=val_data, mode="maml") / self.meta_params['B'] # New version - Save GPU memory - val_loss = self._fast_val(fast_weight, data=val_data, mode="maml") / self.meta_params['B'] + val_loss, kl_loss = self._fast_val(fast_weight, data=val_data, mode="maml", bootstrap_model=bootstrap_model) + print(val_loss, kl_loss) + loss = (self.meta_params['beta'] * val_loss + (1-self.meta_params['beta']) * kl_loss) / self.meta_params['B'] self.meta_optimizer.zero_grad() - val_loss.backward() + loss.backward() for i, group in enumerate(self.meta_optimizer.param_groups): for j, p in enumerate(group['params']): meta_grad_dict[(i, j)] += p.grad elif self.meta_params['meta_method'] == 'fomaml': - val_loss = self._fast_val(task_model, data=val_data, mode="fomaml") / self.meta_params['B'] + val_loss, kl_loss = self._fast_val(task_model, data=val_data, mode="fomaml", bootstrap_model=bootstrap_model) + print(val_loss, kl_loss) + loss = (self.meta_params['beta'] * val_loss + (1-self.meta_params['beta']) * kl_loss) / self.meta_params['B'] optimizer.zero_grad() - val_loss.backward() + loss.backward() for i, group in enumerate(optimizer.param_groups): for j, p in enumerate(group['params']): meta_grad_dict[(i, j)] += p.grad @@ -329,7 +364,7 @@ def _train_one_batch(self, task_model, data, env, optimizer=None): return score_mean, loss_mean - def _train_one_batch_maml(self, fast_weight, data, env, optimizer=None): + def _train_one_batch_maml(self, fast_weight, data, env, optimizer=None, create_graph=True): batch_size = data.size(0) env.load_problems(batch_size, problems=data, aug_factor=1) @@ -354,22 +389,22 @@ def _train_one_batch_maml(self, fast_weight, data, env, optimizer=None): loss_mean = loss.mean() # 1. update model - in SGD way - # gradients = torch.autograd.grad(loss_mean, fast_weight.values(), create_graph=True) # allow_unused=True + # gradients = torch.autograd.grad(loss_mean, fast_weight.values(), create_graph=create_graph) # allow_unused=True # fast_weight = OrderedDict( # (name, param - self.optimizer_params['optimizer']['lr'] * grad) # for ((name, param), grad) in zip(fast_weight.items(), gradients) # ) # 2. update model - in Adam way - gradients = torch.autograd.grad(loss_mean, fast_weight.values(), create_graph=True) # allow_unused=True + gradients = torch.autograd.grad(loss_mean, fast_weight.values(), create_graph=create_graph) # allow_unused=True w_t, (beta1, beta2), eps = [], self.meta_optimizer.param_groups[0]['betas'], self.meta_optimizer.param_groups[0]['eps'] lr, weight_decay = self.optimizer_params['optimizer']['lr'], self.optimizer_params['optimizer']['weight_decay'] for i, ((name, param), grad) in enumerate(zip(fast_weight.items(), gradients)): if self.meta_optimizer.state_dict()['state'] != {}: - i = i if self.model_params['meta_update_encoder'] else i + 58 # i \in [0, 62] + i = i if self.model_params['meta_update_encoder'] else i + 58 # i \in [0, 62], where encoder \in [0, 57] + decoder \in [58, 62] state = self.meta_optimizer.state_dict()['state'][i] step, exp_avg, exp_avg_sq = state['step'], state['exp_avg'], state['exp_avg_sq'] step += 1 - step = step.item() + step = step.item() if isinstance(step, torch.Tensor) else step # compute grad based on Adam source code using in-place operation # update Adam stat (step, exp_avg and exp_avg_sq have already been updated by in-place operation) # may encounter RuntimeError: (a leaf Variable that requires grad) / (the tensor used during grad computation) cannot use in-place operation. @@ -392,7 +427,7 @@ def _train_one_batch_maml(self, fast_weight, data, env, optimizer=None): """ # 3. update model using optimizer - this method can not work properly. optimizer.zero_grad() - # torch.autograd.grad(loss_mean, fast_weight.values(), create_graph=True) + # torch.autograd.grad(loss_mean, fast_weight.values(), create_graph=create_graph) # print(list(self.meta_model.parameters())[-1]) loss_mean.backward(retain_graph=True, create_graph=True) optimizer.step() # will update meta_model as well... @@ -405,7 +440,7 @@ def _train_one_batch_maml(self, fast_weight, data, env, optimizer=None): return score_mean, loss_mean, fast_weight - def _fast_val(self, model, data=None, path=None, offset=0, val_episodes=32, mode="eval", return_all=False): + def _fast_val(self, model, data=None, path=None, offset=0, val_episodes=32, mode="eval", return_all=False, bootstrap_model=None): aug_factor = 1 data = torch.Tensor(load_dataset(path)[offset: offset+val_episodes]) if data is None else data env = Env(**{'problem_size': data.size(1), 'pomo_size': data.size(1)}) @@ -424,23 +459,42 @@ def _fast_val(self, model, data=None, path=None, offset=0, val_episodes=32, mode state, reward, done = env.step(selected) elif mode in ["maml", "fomaml"]: - fast_weight = model + fast_weight, kl_loss = model, 0 env.load_problems(batch_size, problems=data, aug_factor=aug_factor) reset_state, _, _ = env.reset() if mode == "maml": self.meta_model.pre_forward(reset_state, weights=fast_weight) + if bootstrap_model is not None: + with torch.no_grad(): + self.meta_model.pre_forward(reset_state, weights=bootstrap_model) else: model.pre_forward(reset_state) + if bootstrap_model is not None: + with torch.no_grad(): + bootstrap_model.pre_forward(reset_state) + prob_list = torch.zeros(size=(batch_size, env.pomo_size, 0)) state, reward, done = env.pre_step() while not done: if mode == "maml": - selected, prob = self.meta_model(state, weights=fast_weight) + selected, prob, probs = self.meta_model(state, weights=fast_weight, return_probs=True) + if bootstrap_model is not None: + probs1 = torch.where(probs > 0, probs, torch.tensor(0.00001)) + with torch.no_grad(): + _, _, bs_probs = self.meta_model(state, weights=bootstrap_model, selected=selected, return_probs=True) + bs_probs = torch.where(bs_probs > 0, bs_probs, torch.tensor(0.00001)) else: - selected, prob = model(state) + selected, prob, probs = model(state, return_probs=True) + if bootstrap_model is not None: + probs1 = torch.where(probs > 0, probs, torch.tensor(0.00001)) + with torch.no_grad(): + _, _, bs_probs = bootstrap_model(state, selected=selected, return_probs=True) + bs_probs = torch.where(bs_probs > 0, bs_probs, torch.tensor(0.00001)) + # shape: (batch, pomo) state, reward, done = env.step(selected) prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) + kl_loss += (bs_probs * (bs_probs.log() - probs1.log())).reshape(batch_size * data.size(1), -1).sum(dim=-1).mean() if bootstrap_model is not None else 0 advantage = reward - reward.float().mean(dim=1, keepdims=True) log_prob = prob_list.log().sum(dim=2) # for the first/last node, p=1 -> log_p=0 @@ -463,63 +517,7 @@ def _fast_val(self, model, data=None, path=None, offset=0, val_episodes=32, mode else: return no_aug_score.detach().item() else: - return loss_mean - - def _bootstrap(self, fast_weight, data, mode="eval"): - """ - mode = "maml": Ref to "Bootstrap Meta-Learning", ICLR 2022; - mode = "eval": Used to update task weights. - """ - bootstrap_weight = fast_weight - batch_size, aug_factor = data.size(0), 1 - bootstrap_reward = torch.full((batch_size, 1), float("-inf")) - if mode == "eval": - optimizer = Optimizer(bootstrap_weight.parameters(), **self.optimizer_params['optimizer']) - # optimizer.load_state_dict(self.meta_optimizer.state_dict()) - with torch.enable_grad(): - for L in range(self.meta_params['bootstrap_steps']): - env = Env(**{'problem_size': data.size(1), 'pomo_size': data.size(1)}) - env.load_problems(batch_size, problems=data, aug_factor=aug_factor) - reset_state, _, _ = env.reset() - if mode == "maml": - self.meta_model.pre_forward(reset_state, weights=bootstrap_weight) - elif mode == "eval": - bootstrap_weight.pre_forward(reset_state) - prob_list = torch.zeros(size=(aug_factor * batch_size, env.pomo_size, 0)) - state, reward, done = env.pre_step() - while not done: - if mode == "maml": - selected, prob = self.meta_model(state, weights=bootstrap_weight) - elif mode == "eval": - selected, prob = bootstrap_weight(state) - state, reward, done = env.step(selected) # (aug_factor * batch_size, pomo_size) - prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) - - # (batch, augmentation * pomo) - reward = reward.reshape(aug_factor, batch_size, env.pomo_size).permute(1, 0, 2).reshape(batch_size, -1) - advantage = reward - reward.float().mean(dim=1, keepdims=True) - log_prob = prob_list.log().sum(dim=2).reshape(aug_factor, batch_size, env.pomo_size).permute(1, 0, 2).reshape(batch_size, -1) - loss = -advantage * log_prob - loss_mean = loss.mean() - - if mode == "maml": - # TODO: need to update, SGD -> Adam - gradients = torch.autograd.grad(loss_mean, bootstrap_weight.values(), create_graph=False) - bootstrap_weight = OrderedDict( - (name, param - self.optimizer_params['optimizer']['lr'] * grad) - for ((name, param), grad) in zip(bootstrap_weight.items(), gradients) - ) - raise NotImplementedError - elif mode == "eval": - optimizer.zero_grad() - loss_mean.backward() - optimizer.step() - - max_pomo_reward, _ = reward.max(dim=1) - max_pomo_reward = max_pomo_reward.view(-1, 1) - bootstrap_reward = torch.where(max_pomo_reward > bootstrap_reward, max_pomo_reward, bootstrap_reward) # (batch_size, 1) - - return bootstrap_reward + return loss_mean, kl_loss def _get_data(self, batch_size, task_params): if self.meta_params['data_type'] == 'size': @@ -538,14 +536,14 @@ def _get_data(self, batch_size, task_params): def _get_val_data(self, batch_size, task_params): if self.meta_params["data_type"] == "size": - start1, end1 = min(task_params[0] + 10, self.max_n), min(task_params[0] + 20, self.max_n) - val_size = random.sample(range(start1, end1 + 1), 1)[0] - val_data = self._get_data(batch_size, (val_size,)) - # val_data = self._get_data(batch_size, task_params) # TODO: which is better? + # start1, end1 = min(task_params[0] + 10, self.max_n), min(task_params[0] + 20, self.max_n) + # val_size = random.sample(range(start1, end1 + 1), 1)[0] + # val_data = self._get_data(batch_size, (val_size,)) + val_data = self._get_data(batch_size, task_params) elif self.meta_params["data_type"] == "distribution": val_data = self._get_data(batch_size, task_params) elif self.meta_params["data_type"] == "size_distribution": - pass + val_data = self._get_data(batch_size, task_params) else: raise NotImplementedError @@ -602,56 +600,49 @@ def minmax(xy_): return data - def _update_task_weight(self, epoch): + def _update_task_weight(self, tasks, weights, epoch): """ Update the weights of tasks. + For LKH3, set MAX_TRIALS = 100 to reduce time. """ global run_func - start_t, gap = time.time(), torch.zeros(self.task_w.size(0)) + start_t, gap = time.time(), torch.zeros(weights.size(0)) batch_size = 200 if self.meta_params["solver"] == "lkh3_offline" else 50 idx = torch.randperm(batch_size)[:50] for i in range(gap.size(0)): - if self.meta_params["data_type"] == "size": - start = i * self.task_interval - end = min(start + self.task_interval, self.max_n) - selected = random.sample([j for j in range(start, end+1)], 1)[0] - data = self._get_data(batch_size=batch_size, task_params=(selected, )) - elif self.meta_params["data_type"] == "distribution": - selected = self.task_set[i] - data = self._get_data(batch_size=batch_size, task_params=selected) - else: - raise NotImplementedError + selected = tasks[i] + data = self._get_data(batch_size=batch_size, task_params=selected) # only use lkh3 at the first iteration of updating task weights if self.meta_params["solver"] == "lkh3_offline": - if epoch / self.meta_params['update_weight'] == 1: - self.val_data[i] = data + if selected not in self.val_data.keys(): + self.val_data[selected] = data opts = argparse.ArgumentParser() opts.cpus, opts.n, opts.progress_bar_mininterval = None, None, 0.1 dataset = [(instance.cpu().numpy(),) for instance in data] executable = get_lkh_executable() def run_func(args): - return solve_lkh_log(executable, *args, runs=1, disable_cache=True) # otherwise it directly loads data from dir + return solve_lkh_log(executable, *args, runs=1, disable_cache=True, MAX_TRIALS=100) # otherwise it directly loads data from dir results, _ = run_all_in_pool(run_func, "./LKH3_result", dataset, opts, use_multiprocessing=False) - self.val_opt[i] = [j[0] for j in results] - data = self.val_data[i][idx] + self.val_opt[selected] = [j[0] for j in results] + data = self.val_data[selected][idx] model_score = self._fast_val(self.meta_model, data=data, mode="eval", return_all=True) model_score = model_score.tolist() if self.meta_params["solver"] == "lkh3_online": - # get results from LKH3 (~14s) + # get results from LKH3 opts = argparse.ArgumentParser() opts.cpus, opts.n, opts.progress_bar_mininterval = None, None, 0.1 dataset = [(instance.cpu().numpy(),) for instance in data] executable = get_lkh_executable() def run_func(args): - return solve_lkh_log(executable, *args, runs=1, disable_cache=True) # otherwise it directly loads data from dir + return solve_lkh_log(executable, *args, runs=1, disable_cache=True, MAX_TRIALS=100) # otherwise it directly loads data from dir results, _ = run_all_in_pool(run_func, "./LKH3_result", dataset, opts, use_multiprocessing=False) gap_list = [(model_score[j]-results[j][0])/results[j][0]*100 for j in range(len(results))] gap[i] = sum(gap_list)/len(gap_list) elif self.meta_params["solver"] == "lkh3_offline": - lkh_score = [self.val_opt[i][j] for j in idx.tolist()] + lkh_score = [self.val_opt[selected][j] for j in idx.tolist()] gap_list = [(model_score[j] - lkh_score[j]) / lkh_score[j] * 100 for j in range(len(lkh_score))] gap[i] = sum(gap_list) / len(gap_list) elif self.meta_params["solver"] == "best_model": # not recommend: how to define the best model? (biased to the val dataset) @@ -659,21 +650,58 @@ def run_func(args): best_model_score = best_model_score.tolist() gap_list = [(model_score[j] - best_model_score[j]) / best_model_score[j] * 100 for j in range(len(best_model_score))] gap[i] = sum(gap_list) / len(gap_list) - elif self.meta_params["solver"] == "bootstrap": - data = data[:32] if data.size(1) > 100 else data # reduce memory consumption - bootstrap_reward = self._bootstrap(copy.deepcopy(self.meta_model), data, mode="eval") - bootstrap_score = (-bootstrap_reward).view(-1).float().tolist() - gap_list = [(model_score[j] - bootstrap_score[j]) / bootstrap_score[j] * 100 for j in range(len(bootstrap_score))] - gap[i] = sum(gap_list) / len(gap_list) else: raise NotImplementedError print(">> Finish updating task weights within {}s".format(round(time.time()-start_t, 2))) # temp = max(1.0 * (1 - epoch / self.meta_params["sch_epoch"]), 0.05) # temp = max(1.0 - 1/2 * (1 - math.cos(math.pi * min(epoch / self.meta_params['sch_epoch'], 1))), 0.2) - temp = 0.25 + temp = 1.0 gap_temp = torch.Tensor([i/temp for i in gap.tolist()]) print(gap, temp) - print(">> Old task weights: {}".format(self.task_w)) - self.task_w = torch.softmax(gap_temp, dim=0) - print(">> New task weights: {}".format(self.task_w)) + print(">> Old task weights: {}".format(weights)) + weights = torch.softmax(gap_temp, dim=0) + print(">> New task weights: {}".format(weights)) + + return weights + + def _get_kl_loss(self, bootstrap_model, val_data, slow_tour, slow_probs): + """ + Ref to "Bootstrap Meta-Learning", ICLR 2022; + This function is deprecated since + a. storing probs_list for large-scale COPs on GPU is extremely (memory) expensive (e.g., > 20GB on TSP200); + b. probs_list.cpu() at every step is also extremely (time) expensive. + Instead, we compute KL loss on the fly now, see self._fast_val() + """ + if isinstance(bootstrap_model, torch.nn.Module): + bootstrap_model.eval() + env = Env(**{'problem_size': val_data.size(1), 'pomo_size': val_data.size(1)}) + batch_size = val_data.size(0) + env.load_problems(batch_size, problems=val_data, aug_factor=1) + reset_state, _, _ = env.reset() + + with torch.no_grad(): + if self.meta_params['meta_method'] == 'maml': + self.meta_model.pre_forward(reset_state, weights=bootstrap_model) + else: + bootstrap_model.pre_forward(reset_state) + probs_list = torch.zeros(size=(batch_size, env.pomo_size, env.problem_size, 0)) + state, reward, done = env.pre_step() + selected_idx = 0 + while not done: + if self.meta_params['meta_method'] == 'maml': + selected, prob, probs = self.meta_model(state, weights=bootstrap_model, selected=slow_tour[:, :, selected_idx].reshape(batch_size, -1).long(), return_probs=True) + else: + selected, prob, probs = bootstrap_model(state, selected=slow_tour[:, :, selected_idx].reshape(batch_size, -1).long(), return_probs=True) + # shape: (batch, pomo) + selected_idx += 1 + state, reward, done = env.step(selected) + probs_list = torch.cat((probs_list, probs[:, :, :, None]), dim=3) + probs_list = torch.where(probs_list > 0, probs_list, torch.tensor(0.00001)) + + slow_probs = torch.where(slow_probs > 0, slow_probs, torch.tensor(0.00001)) # avoid log0 + # kl_loss = (probs_list * (probs_list.log() - slow_probs.log())).sum(dim=2).mean() + kl_loss = (probs_list * (probs_list.log() - slow_probs.log())).reshape(batch_size * val_data.size(1), -1).sum(dim=-1).mean() + # kl_loss = torch.nn.KLDivLoss(reduction="batchmean")(slow_probs.log().reshape(batch_size * val_data.size(1), -1), probs_list.reshape(batch_size * val_data.size(1), -1)) + + return kl_loss diff --git a/POMO/TSP/TSPTrainer_pomo.py b/POMO/TSP/TSPTrainer_pomo.py index d96fadf..5e1d831 100644 --- a/POMO/TSP/TSPTrainer_pomo.py +++ b/POMO/TSP/TSPTrainer_pomo.py @@ -52,11 +52,16 @@ def __init__(self, torch.set_default_tensor_type('torch.FloatTensor') # Main Components - self.model_params["norm"] = "instance" # Original "POMO" Paper uses instance/batch normalization + self.model_params["norm"] = "batch" # Original "POMO" Paper uses batch normalization self.model = Model(**self.model_params) self.optimizer = Optimizer(self.model.parameters(), **self.optimizer_params['optimizer']) self.task_set = generate_task_set(self.meta_params) - self.task_w = torch.full((len(self.task_set),), 1 / len(self.task_set)) + self.val_data, self.val_opt = {}, {} # for lkh3_offline + assert not (self.meta_params['curriculum'] and self.meta_params["data_type"] in ["size", "distribution"]), "Not Implemented!" + if self.meta_params["data_type"] == "size_distribution": + # hardcoded - task_set: range(self.min_n, self.max_n, self.task_interval) * self.num_dist + self.min_n, self.max_n, self.task_interval, self.num_dist = 50, 200, 5, 11 + self.task_w = torch.full(((self.max_n - self.min_n) // 5 + 1, self.num_dist), 1 / self.num_dist) # Restore self.start_epoch = 1 @@ -101,7 +106,8 @@ def run(self): dir = "../../data/TSP/Distribution/" paths = ["tsp100_uniform.pkl", "tsp100_gaussian.pkl", "tsp100_cluster.pkl", "tsp100_diagonal.pkl", "tsp100_tsplib.pkl"] elif self.meta_params["data_type"] == "size_distribution": - pass + dir = "../../data/TSP/Size_Distribution/" + paths = ["tsp200_uniform.pkl", "tsp200_gaussian.pkl", "tsp300_rotation.pkl"] if epoch <= 1 or (epoch % img_save_interval) == 0: for val_path in paths: no_aug_score = self._fast_val(self.model, path=os.path.join(dir, val_path), val_episodes=64) @@ -152,26 +158,28 @@ def _train_one_epoch(self, epoch): loss_AM = AverageMeter() batch_size = self.meta_params['meta_batch_size'] - # Adaptive task scheduler - TODO: need to update - if self.meta_params["data_type"] in ["size", "distribution"]: - self.min_n, self.max_n = self.task_set[0][0], self.task_set[-1][0] # [20, 150] / [0, 130] - # start = self.min_n + int(epoch/self.meta_params['epochs'] * (self.max_n - self.min_n)) # linear - start = self.min_n + int(1 / 2 * (1 - math.cos(math.pi * min(epoch / self.meta_params['epochs'], 1))) * (self.max_n - self.min_n)) # cosine - end = min(start + 10, self.max_n) # 10 is the size of the sliding window - if self.meta_params["curriculum"]: print(">> training task {}".format((start, end))) - elif self.meta_params["data_type"] == "size_distribution": - pass + # Adaptive task scheduler - Not implemented for "size" and "distribution" + if self.meta_params['curriculum']: + if self.meta_params["data_type"] == "size_distribution": + start = self.min_n + int(min(epoch / self.meta_params['sch_epoch'], 1) * (self.max_n - self.min_n)) # linear + # start = self.min_n + int(1 / 2 * (1 - math.cos(math.pi * min(epoch / self.meta_params['sch_epoch'], 1))) * (self.max_n - self.min_n)) # cosine + n = start // 5 * 5 + idx = (n - self.min_n) // 5 + tasks, weights = self.task_set[idx * 11: (idx + 1) * 11], self.task_w[idx] + if epoch % self.meta_params['update_weight'] == 0: + self.task_w[idx] = self._update_task_weight(tasks, weights, epoch) # sample a batch of tasks for b in range(self.meta_params['B']): for step in range(self.meta_params['k']): if self.meta_params["data_type"] == "size": - task_params = random.sample(range(start, end + 1), 1) if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] - # batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 100 else self.meta_params['meta_batch_size'] // 2 + task_params = random.sample(self.task_set, 1)[0] + batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 150 else self.meta_params['meta_batch_size'] // 2 elif self.meta_params["data_type"] == "distribution": - task_params = self.task_set[torch.multinomial(self.task_w, 1).item()] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] + task_params = random.sample(self.task_set, 1)[0] elif self.meta_params["data_type"] == "size_distribution": - pass + task_params = tasks[torch.multinomial(self.task_w[idx], 1).item()] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] + batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 150 else self.meta_params['meta_batch_size'] // 2 data = self._get_data(batch_size, task_params) env_params = {'problem_size': data.size(1), 'pomo_size': data.size(1)} @@ -312,3 +320,50 @@ def minmax(xy_): # return data, opt_sol return data + + def _update_task_weight(self, tasks, weights, epoch): + """ + Update the weights of tasks. + For LKH3, set MAX_TRIALS = 100 to reduce time. + """ + global run_func + start_t, gap = time.time(), torch.zeros(weights.size(0)) + batch_size = 200 if self.meta_params["solver"] == "lkh3_offline" else 50 + idx = torch.randperm(batch_size)[:50] + for i in range(gap.size(0)): + selected = tasks[i] + data = self._get_data(batch_size=batch_size, task_params=selected) + + # only use lkh3 at the first iteration of updating task weights + if self.meta_params["solver"] == "lkh3_offline": + if selected not in self.val_data.keys(): + self.val_data[selected] = data + opts = argparse.ArgumentParser() + opts.cpus, opts.n, opts.progress_bar_mininterval = None, None, 0.1 + dataset = [(instance.cpu().numpy(),) for instance in data] + executable = get_lkh_executable() + def run_func(args): + return solve_lkh_log(executable, *args, runs=1, disable_cache=True, MAX_TRIALS=100) # otherwise it directly loads data from dir + results, _ = run_all_in_pool(run_func, "./LKH3_result", dataset, opts, use_multiprocessing=False) + self.val_opt[selected] = [j[0] for j in results] + data = self.val_data[selected][idx] + + model_score = self._fast_val(self.model, data=data, return_all=True) + model_score = model_score.tolist() + + if self.meta_params["solver"] == "lkh3_offline": + lkh_score = [self.val_opt[selected][j] for j in idx.tolist()] + gap_list = [(model_score[j] - lkh_score[j]) / lkh_score[j] * 100 for j in range(len(lkh_score))] + gap[i] = sum(gap_list) / len(gap_list) + else: + raise NotImplementedError + print(">> Finish updating task weights within {}s".format(round(time.time()-start_t, 2))) + + temp = 0.25 + gap_temp = torch.Tensor([i/temp for i in gap.tolist()]) + print(gap, temp) + print(">> Old task weights: {}".format(weights)) + weights = torch.softmax(gap_temp, dim=0) + print(">> New task weights: {}".format(weights)) + + return weights diff --git a/POMO/TSP/TSP_baseline.py b/POMO/TSP/TSP_baseline.py index 4b28672..952674d 100644 --- a/POMO/TSP/TSP_baseline.py +++ b/POMO/TSP/TSP_baseline.py @@ -122,7 +122,7 @@ def get_lkh_executable(url="http://www.akira.ruc.dk/~keld/research/LKH-3/LKH-3.0 return os.path.abspath(executable) -def solve_lkh_log(executable, directory, name, loc, runs=1, disable_cache=False): +def solve_lkh_log(executable, directory, name, loc, runs=1, disable_cache=False, MAX_TRIALS=10000): problem_filename = os.path.join(directory, "{}.lkh{}.vrp".format(name, runs)) tour_filename = os.path.join(directory, "{}.lkh{}.tour".format(name, runs)) @@ -137,7 +137,7 @@ def solve_lkh_log(executable, directory, name, loc, runs=1, disable_cache=False) else: write_tsplib(problem_filename, loc, name=name) - params = {"PROBLEM_FILE": problem_filename, "OUTPUT_TOUR_FILE": tour_filename, "RUNS": runs, "SEED": 1234} + params = {"PROBLEM_FILE": problem_filename, "OUTPUT_TOUR_FILE": tour_filename, "RUNS": runs, "SEED": 1234, "MAX_TRIALS": MAX_TRIALS} write_lkh_par(param_filename, params) with open(log_filename, 'w') as f: @@ -373,13 +373,13 @@ def solve_all_nn(dataset_path, eval_batch_size=1024, no_cuda=False, dataset_n=No parser = argparse.ArgumentParser() parser.add_argument("--method", type=str, default='concorde', choices=["nn", "gurobi", "gurobigap", "gurobit", "concorde", "lkh", "random_insertion", "nearest_insertion", "farthest_insertion"]) - parser.add_argument("--datasets", nargs='+', default=["../../data/TSP/Size/tsp100_uniform.pkl", ], help="Filename of the dataset(s) to evaluate") + parser.add_argument("--datasets", nargs='+', default=["../../data/TSP/Size_Distribution/tsp100_uniform.pkl", ], help="Filename of the dataset(s) to evaluate") parser.add_argument("-f", action='store_false', help="Set true to overwrite") parser.add_argument("-o", default=None, help="Name of the results file to write") parser.add_argument("--cpus", type=int, help="Number of CPUs to use, defaults to all cores") parser.add_argument('--no_cuda', action='store_true', help='Disable CUDA (only for Tsiligirides)') - parser.add_argument('--disable_cache', action='store_false', help='Disable caching') - parser.add_argument('--max_calc_batch_size', type=int, default=10000, help='Size for subbatches') + parser.add_argument('--disable_cache', action='store_true', help='Disable caching') + parser.add_argument('--max_calc_batch_size', type=int, default=1000, help='Size for subbatches') parser.add_argument('--progress_bar_mininterval', type=float, default=0.1, help='Minimum interval') parser.add_argument('-n', type=int, default=10000, help="Number of instances to process") parser.add_argument('--offset', type=int, default=0, help="Offset where to start processing") diff --git a/POMO/TSP/test.py b/POMO/TSP/test.py index 2e2f719..e6ca5ae 100644 --- a/POMO/TSP/test.py +++ b/POMO/TSP/test.py @@ -28,7 +28,7 @@ 'logit_clipping': 10, 'ff_hidden_dim': 512, 'eval_type': 'argmax', - 'norm': None # TODO: which has a better performance? + 'norm': "instance" } tester_params = { @@ -37,7 +37,7 @@ 'seed': 2023, 'model_load': { 'path': '../../pretrained/pomo_pretrained', # directory path of pre-trained model and log files saved. - 'epoch': 250000, # epoch version of pre-trained model to load. + 'epoch': 3100, # epoch version of pre-trained model to load. }, 'test_episodes': 10000, 'test_batch_size': 10000, @@ -45,8 +45,8 @@ 'test_robustness': False, 'aug_factor': 8, 'aug_batch_size': 100, - 'test_set_path': '../../data/TSP/Size/tsp100_uniform.pkl', - 'test_set_opt_sol_path': '../../data/TSP/Size/opt_tsp100_uniform.pkl' + 'test_set_path': '../../data/TSP/Size_Distribution/tsp100_uniform.pkl', + 'test_set_opt_sol_path': '../../data/TSP/Size_Distribution/concorde/tsp100_uniformoffset0n10000-concorde.pkl' } fine_tune_params = { diff --git a/POMO/TSP/train.py b/POMO/TSP/train.py index ba35bb6..0480e82 100644 --- a/POMO/TSP/train.py +++ b/POMO/TSP/train.py @@ -46,7 +46,7 @@ # 'batch_size': 64, 'logging': { 'model_save_interval': 25000, - 'img_save_interval': 10, + 'img_save_interval': 100, 'log_image_params_1': { 'json_foldername': 'log_image_style', 'filename': 'general.json' @@ -58,8 +58,8 @@ }, 'model_load': { 'enable': False, # enable loading pre-trained model - # 'path': './result/saved_tsp20_model', # directory path of pre-trained model and log files saved. - # 'epoch': 510, # epoch version of pre-trained model to laod. + # 'path': '../../pretrained/debug', # directory path of pre-trained model and log files saved. + # 'epoch': 100000, # epoch version of pre-trained model to laod. }, } @@ -67,17 +67,18 @@ 'enable': True, # whether use meta-learning or not 'curriculum': True, # adaptive sample task 'meta_method': 'maml', # choose from ['maml', 'fomaml', 'reptile'] - 'bootstrap_steps': 25, - 'data_type': 'size', # choose from ["size", "distribution", "size_distribution"] + 'data_type': 'size_distribution', # choose from ["size", "distribution", "size_distribution"] 'epochs': 250000, # the number of meta-model updates: (250*100000) / (1*5*64) 'B': 1, # the number of tasks in a mini-batch 'k': 1, # gradient decent steps in the inner-loop optimization of meta-learning method + 'L': 0, # bootstrap steps 'meta_batch_size': 64, # will be divided by 2 if problem_size >= 100 - 'update_weight': 1000, # update weight of each task per X iters - 'sch_epoch': 250000, # for the task scheduler of size setting + 'update_weight': 100, # update weight of each task per X iters + 'sch_epoch': 225000, # for the task scheduler of size setting, where sch_epoch = 0.9 * epochs 'solver': 'lkh3_offline', # solver used to update the task weights, choose from ["bootstrap", "lkh3_online", "lkh3_offline", "best_model"] 'alpha': 0.99, # params for the outer-loop optimization of reptile 'alpha_decay': 0.999, # params for the outer-loop optimization of reptile + 'beta': 0.9, # loss weight } logger_params = { @@ -139,7 +140,7 @@ def occumpy_mem(cuda_device): total, used = check_mem(cuda_device) total = int(total) used = int(used) - block_mem = int((total-used) * 0.85) + block_mem = int((total-used) * 0.5) x = torch.cuda.FloatTensor(256, 1024, block_mem) del x diff --git a/POMO/utils/functions.py b/POMO/utils/functions.py index 1c6cb30..ec05380 100644 --- a/POMO/utils/functions.py +++ b/POMO/utils/functions.py @@ -58,6 +58,9 @@ def seed_everything(seed=2022): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.backends.cudnn.benchmark = False + torch.backends.cudnn.deterministic = True torch.cuda.manual_seed_all(seed)