From b136d7b6df56a9de1bc7957046aa6f36ecb74049 Mon Sep 17 00:00:00 2001 From: RoyalSkye Date: Tue, 27 Sep 2022 22:01:23 +0800 Subject: [PATCH] add anil and bootstrap --- POMO/TSP/TSPModel.py | 9 +++-- POMO/TSP/TSPTester.py | 63 ++++++++++++++++++++++++++++++- POMO/TSP/TSPTrainer_Meta.py | 75 +++++++++++++++++++++++++++++++------ POMO/TSP/TSPTrainer_pomo.py | 7 ++-- POMO/TSP/test_n100.py | 20 +++++----- POMO/TSP/train_n100.py | 21 +++++------ POMO/utils/functions.py | 7 ++++ 7 files changed, 160 insertions(+), 42 deletions(-) diff --git a/POMO/TSP/TSPModel.py b/POMO/TSP/TSPModel.py index 6efacea..53c590e 100644 --- a/POMO/TSP/TSPModel.py +++ b/POMO/TSP/TSPModel.py @@ -10,13 +10,16 @@ def __init__(self, **model_params): super().__init__() self.model_params = model_params - self.encoder = TSP_Encoder(**model_params) - self.decoder = TSP_Decoder(**model_params) + self.encoder = TSP_Encoder(**model_params) # 1187712 (1.19 million) + self.decoder = TSP_Decoder(**model_params) # 82048 (0.08 million) self.encoded_nodes = None # shape: (batch, problem, EMBEDDING_DIM) def pre_forward(self, reset_state, weights=None): - self.encoded_nodes = self.encoder(reset_state.problems, weights=weights) + if weights is not None and self.model_params["meta_update_encoder"]: + self.encoded_nodes = self.encoder(reset_state.problems, weights=weights) + else: + self.encoded_nodes = self.encoder(reset_state.problems, weights=None) # shape: (batch, problem, EMBEDDING_DIM) self.decoder.set_kv(self.encoded_nodes, weights=weights) diff --git a/POMO/TSP/TSPTester.py b/POMO/TSP/TSPTester.py index f615987..072eafb 100644 --- a/POMO/TSP/TSPTester.py +++ b/POMO/TSP/TSPTester.py @@ -8,8 +8,9 @@ from TSPEnv import TSPEnv as Env from TSPModel import TSPModel as Model +from baselines import solve_all_gurobi from utils.utils import * -from utils.functions import load_dataset +from utils.functions import load_dataset, save_dataset class TSPTester: @@ -64,6 +65,22 @@ def __init__(self, self.time_estimator = TimeEstimator() def run(self): + if self.tester_params['test_robustness']: + episode = 0 + test_data = torch.Tensor(self.test_data) + opt_sol = [0] * test_data.size(0) + while episode < test_data.size(0): + remaining = test_data.size(0) - episode + batch_size = min(self.tester_params['test_batch_size'], remaining) + data = torch.Tensor(test_data[episode: episode + batch_size]) + test_data[episode: episode + batch_size], opt_sol[episode: episode + batch_size] = self._generate_x_adv(data, eps=50.0) + episode += batch_size + self.test_data = test_data.cpu().numpy() + self.opt_sol = [i[0] for i in opt_sol] + # save the adv dataset + filename = os.path.split(self.tester_params['test_set_path'])[-1] + save_dataset(self.test_data, './adv_{}'.format(filename)) + save_dataset(opt_sol, './sol_adv_{}'.format(filename)) if self.fine_tune_params['enable']: # fine-tune model on fine-tune dataset (few-shot) self._fine_tune_and_test() @@ -207,7 +224,7 @@ def _fine_tune_one_batch(self, fine_tune_data): log_prob = prob_list.log().sum(dim=2).reshape(aug_factor, batch_size, self.env.pomo_size).permute(1, 0, 2).view(batch_size, -1) # size = (batch, augmentation * pomo) loss = -advantage * log_prob # Minus Sign: To Increase REWARD - # shape: (batch, augmentation * pomo)pretra + # shape: (batch, augmentation * pomo) loss_mean = loss.mean() # Score @@ -218,3 +235,45 @@ def _fine_tune_one_batch(self, fine_tune_data): self.optimizer.zero_grad() loss_mean.backward() self.optimizer.step() + + def _generate_x_adv(self, data, eps=10.0): + """ + Generate adversarial data based on the current model, also need to generate optimal sol for x_adv. + """ + from torch.autograd import Variable + def minmax(xy_): + # min_max normalization: [b,n,2] + xy_ = (xy_ - xy_.min(dim=1, keepdims=True)[0]) / (xy_.max(dim=1, keepdims=True)[0] - xy_.min(dim=1, keepdims=True)[0]) + return xy_ + + # generate x_adv + self.model.eval() + aug_factor, batch_size = 1, data.size(0) + with torch.enable_grad(): + data.requires_grad_() + self.env.load_problems(batch_size, problems=data, aug_factor=aug_factor) + # print(self.env.problems.requires_grad) + reset_state, _, _ = self.env.reset() + self.model.pre_forward(reset_state) + prob_list = torch.zeros(size=(aug_factor * batch_size, self.env.pomo_size, 0)) + state, reward, done = self.env.pre_step() + while not done: + selected, prob = self.model(state) + state, reward, done = self.env.step(selected) + prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) + + aug_reward = reward.reshape(aug_factor, batch_size, self.env.pomo_size).permute(1, 0, 2).view(batch_size, -1) + baseline_reward = aug_reward.float().mean(dim=1, keepdims=True) + advantage = aug_reward - baseline_reward + log_prob = prob_list.log().sum(dim=2).reshape(aug_factor, batch_size, self.env.pomo_size).permute(1, 0, 2).view(batch_size, -1) + + # delta = torch.autograd.grad(eps * ((advantage / baseline_reward) * log_prob).mean(), data)[0] + delta = torch.autograd.grad(eps * ((-advantage) * log_prob).mean(), data)[0] + data = data.detach() + delta + data = minmax(data) + data = Variable(data, requires_grad=False) + + # generate opt sol + opt_sol = solve_all_gurobi(data) + + return data, opt_sol diff --git a/POMO/TSP/TSPTrainer_Meta.py b/POMO/TSP/TSPTrainer_Meta.py index 818ff20..9304f4f 100644 --- a/POMO/TSP/TSPTrainer_Meta.py +++ b/POMO/TSP/TSPTrainer_Meta.py @@ -10,6 +10,7 @@ from TSPModel import TSPModel as Model from torch.optim import Adam as Optimizer +# from torch.optim import SGD as Optimizer from torch.optim.lr_scheduler import MultiStepLR as Scheduler from TSProblemDef import get_random_problems, generate_task_set @@ -20,11 +21,11 @@ class TSPTrainer: """ TODO: 1. val data? and training data, for k steps of inner-loop, should we use the same batch of data? - 2. only meta-update partial para of pomo? Implementation of POMO with MAML / FOMAML / Reptile. For MAML & FOMAML, ref to "Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks"; For Reptile, ref to "On First-Order Meta-Learning Algorithms". Refer to "https://lilianweng.github.io/posts/2018-11-30-meta-learning" + MAML's time and space complexity (i.e., GPU memory) is high, so we only update decoder in inner-loop (similar performance). """ def __init__(self, env_params, @@ -60,9 +61,6 @@ def __init__(self, self.meta_optimizer = Optimizer(self.meta_model.parameters(), **self.optimizer_params['optimizer']) self.alpha = self.meta_params['alpha'] # for reptile self.task_set = generate_task_set(self.meta_params) - # assert self.trainer_params['meta_params']['epochs'] == math.ceil((self.trainer_params['epochs'] * self.trainer_params['train_episodes']) / ( - # self.trainer_params['meta_params']['B'] * self.trainer_params['meta_params']['k'] * - # self.trainer_params['meta_params']['meta_batch_size'])), ">> meta-learning iteration does not match with POMO!" # Restore self.start_epoch = 1 @@ -139,7 +137,6 @@ def run(self): self.logger.info(" *** Training Done *** ") self.logger.info("Now, printing log array...") util_print_log_array(self.logger, self.result_log) - print(val_res) def _train_one_epoch(self, epoch): """ @@ -162,7 +159,12 @@ def _train_one_epoch(self, epoch): task_model = copy.deepcopy(self.meta_model) optimizer = Optimizer(task_model.parameters(), **self.optimizer_params['optimizer']) elif self.meta_params['meta_method'] == 'maml': - fast_weight = OrderedDict(self.meta_model.named_parameters()) + if self.model_params['meta_update_encoder']: + fast_weight = OrderedDict(self.meta_model.named_parameters()) + else: + fast_weight = OrderedDict(self.meta_model.decoder.named_parameters()) + for k in list(fast_weight.keys()): + fast_weight["decoder."+k] = fast_weight.pop(k) for step in range(self.meta_params['k'] + 1): # generate task-specific data @@ -291,6 +293,7 @@ def _train_one_batch_maml(self, fast_weight, data, env): return score_mean, loss_mean, fast_weight def _fast_val(self, model, data=None, val_episodes=32, mode="eval"): + aug_factor = 1 if data is None: val_path = "../../data/TSP/tsp150_uniform.pkl" @@ -304,11 +307,11 @@ def _fast_val(self, model, data=None, val_episodes=32, mode="eval"): env.load_problems(batch_size, problems=data, aug_factor=aug_factor) reset_state, _, _ = env.reset() model.pre_forward(reset_state) - state, reward, done = env.pre_step() - while not done: - selected, _ = model(state) - # shape: (batch, pomo) - state, reward, done = env.step(selected) + state, reward, done = env.pre_step() + while not done: + selected, _ = model(state) + # shape: (batch, pomo) + state, reward, done = env.step(selected) elif mode in ["maml", "fomaml"]: fast_weight = model env.load_problems(batch_size, problems=data, aug_factor=aug_factor) @@ -328,7 +331,11 @@ def _fast_val(self, model, data=None, val_episodes=32, mode="eval"): state, reward, done = env.step(selected) prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) # Loss - advantage = reward - reward.float().mean(dim=1, keepdims=True) + if self.meta_params['bootstrap_steps'] != 0: + bootstrap_reward = self._bootstrap(fast_weight, data) + advantage = reward - bootstrap_reward + else: + advantage = reward - reward.float().mean(dim=1, keepdims=True) log_prob = prob_list.log().sum(dim=2) # for the first/last node, p=1 -> log_p=0 loss = -advantage * log_prob # Minus Sign: To Increase REWARD loss_mean = loss.mean() @@ -347,7 +354,48 @@ def _fast_val(self, model, data=None, val_episodes=32, mode="eval"): else: return loss_mean + def _bootstrap(self, fast_weight, data): + """ + Bootstrap using smaller lr; + Only support for MAML now. + """ + bootstrap_weight = fast_weight + batch_size = data.size(0) + bootstrap_reward = torch.full((batch_size, 1), float("-inf")) + with torch.enable_grad(): + for L in range(self.meta_params['bootstrap_steps']): + env = Env(**{'problem_size': data.size(1), 'pomo_size': data.size(1)}) + env.load_problems(batch_size, problems=data, aug_factor=1) + reset_state, _, _ = env.reset() + self.meta_model.pre_forward(reset_state, weights=bootstrap_weight) + prob_list = torch.zeros(size=(batch_size, env.pomo_size, 0)) + state, reward, done = env.pre_step() + while not done: + selected, prob = self.meta_model(state, weights=bootstrap_weight) + state, reward, done = env.step(selected) + prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) + + advantage = reward - reward.float().mean(dim=1, keepdims=True) + log_prob = prob_list.log().sum(dim=2) + loss = -advantage * log_prob + loss_mean = loss.mean() + + gradients = torch.autograd.grad(loss_mean, bootstrap_weight.values(), create_graph=False) + bootstrap_weight = OrderedDict( + (name, param - self.optimizer_params['optimizer']['lr'] * grad) + for ((name, param), grad) in zip(bootstrap_weight.items(), gradients) + ) + + max_pomo_reward, _ = reward.max(dim=1) + max_pomo_reward = max_pomo_reward.view(-1, 1) + bootstrap_reward = torch.where(max_pomo_reward > bootstrap_reward, max_pomo_reward, bootstrap_reward) + score_mean, bootstrap_mean = -max_pomo_reward.float().mean(), -bootstrap_reward.float().mean() + print("Bootstrap step {}: score_mean {}, bootstrap_mean {}".format(L, score_mean, bootstrap_mean)) + + return bootstrap_reward + def _get_data(self, batch_size, task_params): + if self.meta_params['data_type'] == 'distribution': assert len(task_params) == 2 data = get_random_problems(batch_size, self.env_params['problem_size'], num_modes=task_params[0], cdist=task_params[-1], distribution='gaussian_mixture') @@ -363,4 +411,7 @@ def _get_data(self, batch_size, task_params): return data def _alpha_scheduler(self, iter): + """ + Update param for Reptile. + """ self.alpha = max(self.alpha * self.meta_params['alpha_decay'], 0.0001) diff --git a/POMO/TSP/TSPTrainer_pomo.py b/POMO/TSP/TSPTrainer_pomo.py index ca818d3..ab6b421 100644 --- a/POMO/TSP/TSPTrainer_pomo.py +++ b/POMO/TSP/TSPTrainer_pomo.py @@ -10,6 +10,7 @@ from TSPModel import TSPModel as Model from torch.optim import Adam as Optimizer +# from torch.optim import SGD as Optimizer from torch.optim.lr_scheduler import MultiStepLR as Scheduler from TSProblemDef import get_random_problems, generate_task_set @@ -54,9 +55,6 @@ def __init__(self, self.meta_model = Model(**self.model_params) self.optimizer = Optimizer(self.meta_model.parameters(), **self.optimizer_params['optimizer']) self.task_set = generate_task_set(self.meta_params) - # assert self.trainer_params['meta_params']['epochs'] == math.ceil((self.trainer_params['epochs'] * self.trainer_params['train_episodes']) / ( - # self.trainer_params['meta_params']['B'] * self.trainer_params['meta_params']['k'] * - # self.trainer_params['meta_params']['meta_batch_size'])), ">> meta-learning iteration does not match with POMO!" # Restore self.start_epoch = 1 @@ -130,7 +128,6 @@ def run(self): self.logger.info(" *** Training Done *** ") self.logger.info("Now, printing log array...") util_print_log_array(self.logger, self.result_log) - print(val_res) def _train_one_epoch(self, epoch): """ @@ -149,6 +146,7 @@ def _train_one_epoch(self, epoch): for step in range(self.meta_params['k']): # generate task-specific data + # task_params = random.sample(self.task_set, 1)[0] if self.meta_params['data_type'] == 'distribution': assert len(task_params) == 2 data = get_random_problems(batch_size, self.env_params['problem_size'], num_modes=task_params[0], cdist=task_params[-1], distribution='gaussian_mixture') @@ -205,6 +203,7 @@ def _train_one_batch(self, data, env): # Score max_pomo_reward, _ = reward.max(dim=1) # get best results from pomo score_mean = -max_pomo_reward.float().mean() # negative sign to make positive value + print(score_mean) return score_mean, loss_mean diff --git a/POMO/TSP/test_n100.py b/POMO/TSP/test_n100.py index 98aba0f..518fea9 100644 --- a/POMO/TSP/test_n100.py +++ b/POMO/TSP/test_n100.py @@ -15,8 +15,8 @@ # parameters env_params = { - 'problem_size': 50, - 'pomo_size': 50, + 'problem_size': 100, + 'pomo_size': 100, } model_params = { @@ -35,22 +35,23 @@ 'cuda_device_num': CUDA_DEVICE_NUM, 'seed': 2023, 'model_load': { - 'path': '../../pretrained/tsp50_exp1/pomo_k', # directory path of pre-trained model and log files saved. - 'epoch': 52084, # epoch version of pre-trained model to laod. + 'path': '../../pretrained/var_size_exp1/pomo_adam', # directory path of pre-trained model and log files saved. + 'epoch': 78125, # epoch version of pre-trained model to laod. }, - 'test_episodes': 10000, + 'test_episodes': 7000, 'test_batch_size': 10000, 'augmentation_enable': True, + 'test_robustness': False, 'aug_factor': 8, - 'aug_batch_size': 100, - 'test_set_path': '../../data/TSP/tsp50_cluster.pkl', - 'test_set_opt_sol_path': '../../data/TSP/gurobi/tsp50_cluster.pkl', + 'aug_batch_size': 50, + 'test_set_path': './adv_tsp100_uniform.pkl', # '../../data/TSP/tsp100_uniform.pkl' + 'test_set_opt_sol_path': './sol_adv_tsp100_uniform.pkl', # '../../data/TSP/gurobi/tsp100_uniform.pkl' 'fine_tune_params': { 'enable': True, # evaluate few-shot generalization 'fine_tune_episodes': 3000, # how many data used to fine-tune the pretrained model 'k': 50, # gradient decent steps in the inner-loop optimization of meta-learning method 'fine_tune_batch_size': 64, # the batch size of the inner-loop optimization - 'fine_tune_set_path': '../../data/TSP/tsp50_cluster.pkl', + 'fine_tune_set_path': './adv_tsp100_uniform.pkl', 'augmentation_enable': False, 'optimizer': { 'lr': 1e-4 * 0.1, @@ -101,5 +102,4 @@ def _print_config(): if __name__ == "__main__": # TODO: 1. why not use test dataset to fine-tune the model? - # 2. the implementation of our method main() diff --git a/POMO/TSP/train_n100.py b/POMO/TSP/train_n100.py index ec7aa5a..359677e 100644 --- a/POMO/TSP/train_n100.py +++ b/POMO/TSP/train_n100.py @@ -11,7 +11,7 @@ DEBUG_MODE = False USE_CUDA = not DEBUG_MODE and torch.cuda.is_available() -CUDA_DEVICE_NUM = 1 # $ nohup python -u train_n100.py 2>&1 &, no need to use CUDA_VISIBLE_DEVICES=0 +CUDA_DEVICE_NUM = 0 # $ nohup python -u train_n100.py 2>&1 &, no need to use CUDA_VISIBLE_DEVICES=0 ########################################################################################## # parameters @@ -30,6 +30,7 @@ 'logit_clipping': 10, 'ff_hidden_dim': 512, 'eval_type': 'argmax', + 'meta_update_encoder': False, } optimizer_params = { @@ -49,12 +50,12 @@ 'seed': 1234, 'epochs': 500, 'time_limit': 86400, - 'stop_criterion': 'time', # epochs or time + 'stop_criterion': 'epochs', # epochs or time 'train_episodes': 100000, # number of instances per epoch 'train_batch_size': 64, 'logging': { - 'model_save_interval': 520, - 'img_save_interval': 520, + 'model_save_interval': 13020, + 'img_save_interval': 13020, 'log_image_params_1': { 'json_foldername': 'log_image_style', 'filename': 'general.json' @@ -74,13 +75,14 @@ # For reptile, performance is quite well, however, after several iteration, the improvement in inner-loop is trivial. 'meta_params': { 'enable': True, # whether use meta-learning or not - 'meta_method': 'maml', # choose from ['maml', 'fomaml', 'reptile', 'ours'] + 'meta_method': 'maml', # choose from ['maml', 'fomaml', 'reptile'] + 'bootstrap_steps': 1, 'data_type': 'size', # choose from ["size", "distribution", "size_distribution"] - 'epochs': 52084, # the number of meta-model updates: (500*100000) / (3*50*64) + 'epochs': 130209, # the number of meta-model updates: (250*100000) / (1*5*64) 'B': 1, # the number of tasks in a mini-batch - 'k': 3, # gradient decent steps in the inner-loop optimization of meta-learning method + 'k': 1, # gradient decent steps in the inner-loop optimization of meta-learning method 'meta_batch_size': 64, # the batch size of the inner-loop optimization - 'num_task': 10, # the number of tasks in the training task set + 'num_task': 5, # the number of tasks in the training task set 'alpha': 0.99, # params for the outer-loop optimization of reptile 'alpha_decay': 0.999, # params for the outer-loop optimization of reptile } @@ -110,9 +112,6 @@ def main(): elif trainer_params['meta_params']['meta_method'] in ['maml', 'fomaml', 'reptile']: print(">> Start POMO-{} Training.".format(trainer_params['meta_params']['meta_method'])) trainer = Trainer_Meta(env_params=env_params, model_params=model_params, optimizer_params=optimizer_params, trainer_params=trainer_params) - elif trainer_params['meta_params']['meta_method'] == 'ours': - print(">> Start POMO-Ours Training.") - trainer = Trainer_Scheduler(env_params=env_params, model_params=model_params, optimizer_params=optimizer_params, trainer_params=trainer_params) else: raise NotImplementedError diff --git a/POMO/utils/functions.py b/POMO/utils/functions.py index 87bd79b..e9dab83 100644 --- a/POMO/utils/functions.py +++ b/POMO/utils/functions.py @@ -40,6 +40,13 @@ def seed_everything(seed=2022): torch.cuda.manual_seed_all(seed) +def display_num_param(net): + nb_param = 0 + for param in net.parameters(): + nb_param += param.numel() + print('There are {} ({:.2f} million) parameters in this neural network'.format(nb_param, nb_param/1e6)) + + def show(x, y, label, title, xdes, ydes, path, x_scale="linear", dpi=300): plt.style.use('fast') # bmh, fivethirtyeight, Solarize_Light2 plt.figure(figsize=(8, 8))