fix bugs

RoyalSkye · Dec 23, 2022 · 56e60f7 · 56e60f7
1 parent cdeec9e
commit 56e60f7
Show file tree

Hide file tree

Showing 11 changed files with 91 additions and 80 deletions.
diff --git a/POMO/CVRP/CVRPTester.py b/POMO/CVRP/CVRPTester.py
@@ -178,6 +178,10 @@ def _fine_tune_and_test(self):
         score_list, aug_score_list, gap_list, aug_gap_list = [], [], [], []
 
         for k in range(self.fine_tune_params['k']):
+            if k in [int(self.fine_tune_params['k'] * 0.4)] and self.fine_tune_params['lr_decay']:
+                for group in self.optimizer.param_groups:
+                    group["lr"] /= 10
+                    print(">> LR decay to {}".format(group["lr"]))
             # score, aug_score, gap, aug_gap = self._test(store_res=False)
             # score_list.append(score); aug_score_list.append(aug_score)
             # gap_list.append(gap); aug_gap_list.append(aug_gap)

diff --git a/POMO/CVRP/CVRPTrainer_meta.py b/POMO/CVRP/CVRPTrainer_meta.py
@@ -20,8 +20,7 @@ class CVRPTrainer:
     """
     Implementation of POMO with MAML / FOMAML / Reptile on CVRP.
     For MAML & FOMAML, ref to "Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks";
-    For Reptile, ref to "On First-Order Meta-Learning Algorithms".
-    Refer to "https://lilianweng.github.io/posts/2018-11-30-meta-learning"
+    For Reptile, ref to "On First-Order Meta-Learning Algorithms" and "On the generalization of neural combinatorial optimization heuristics".
     """
     def __init__(self,
                  env_params,
@@ -54,7 +53,7 @@ def __init__(self,
             torch.set_default_tensor_type('torch.FloatTensor')
 
         # Main Components
-        self.model_params["norm"] = None
+        self.model_params["norm"] = 'instance'
         self.meta_model = Model(**self.model_params)
         self.meta_optimizer = Optimizer(self.meta_model.parameters(), **self.optimizer_params['optimizer'])
         self.alpha = self.meta_params['alpha']  # for reptile
@@ -89,7 +88,7 @@ def run(self):
             self.logger.info('=================================================================')
 
             # lr decay (by 10) to speed up convergence at 90th and 95th iterations
-            if epoch in [int(self.meta_params['epochs'] * 0.9), int(self.meta_params['epochs'] * 0.95)]:
+            if epoch in [int(self.meta_params['epochs'] * 0.9)]:
                 self.optimizer_params['optimizer']['lr'] /= 10
                 for group in self.meta_optimizer.param_groups:
                     group["lr"] /= 10
@@ -111,7 +110,7 @@ def run(self):
                 paths = ["cvrp100_uniform.pkl", "cvrp100_gaussian.pkl", "cvrp100_cluster.pkl", "cvrp100_diagonal.pkl", "cvrp100_cvrplib.pkl"]
             elif self.meta_params["data_type"] == "size_distribution":
                 dir = "../../data/CVRP/Size_Distribution/"
-                paths = ["cvrp200_uniform.pkl", "cvrp200_gaussian.pkl", "cvrp300_rotation.pkl"]
+                paths = ["cvrp200_uniform.pkl", "cvrp300_rotation.pkl"]
             if epoch <= 1 or (epoch % img_save_interval) == 0:
                 for val_path in paths:
                     no_aug_score = self._fast_val(self.meta_model, path=os.path.join(dir, val_path), val_episodes=64, mode="eval")
@@ -174,9 +173,8 @@ def _train_one_epoch(self, epoch):
             for size_distribution: combine together.
         """
         self.meta_optimizer.zero_grad()
-        score_AM = AverageMeter()
-        loss_AM = AverageMeter()
-        batch_size = self.meta_params['meta_batch_size']
+        score_AM, loss_AM = AverageMeter(), AverageMeter()
+        meta_batch_size = self.meta_params['meta_batch_size']
 
         # Adaptive task scheduler - Not implemented for "size" and "distribution"
         if self.meta_params['curriculum']:
@@ -196,13 +194,14 @@ def _train_one_epoch(self, epoch):
             # sample a task
             if self.meta_params["data_type"] == "size":
                 task_params = random.sample(self.task_set, 1)[0]
-                batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 150 else self.meta_params['meta_batch_size'] // 2
+                batch_size = meta_batch_size if (task_params[0] <= 150 and self.meta_params['k'] + self.meta_params['L'] == 1) else meta_batch_size // 2
             elif self.meta_params["data_type"] == "distribution":
                 task_params = random.sample(self.task_set, 1)[0]
+                batch_size = meta_batch_size if self.meta_params['k'] + self.meta_params['L'] == 1 else meta_batch_size // 2
             elif self.meta_params["data_type"] == "size_distribution":
                 task_params = tasks[torch.multinomial(self.task_w[idx], 1).item()] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0]
-                batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 150 else self.meta_params['meta_batch_size'] // 2
-            data = self._get_data(batch_size, task_params)
+                batch_size = meta_batch_size if (task_params[0] <= 150 and self.meta_params['k'] + self.meta_params['L'] == 1) else meta_batch_size // 2
+            train_data = self._get_data(meta_batch_size, task_params)
 
             # preparation
             if self.meta_params['meta_method'] in ['fomaml', 'reptile']:
@@ -219,7 +218,8 @@ def _train_one_epoch(self, epoch):
 
             # inner-loop optimization
             for step in range(self.meta_params['k']):
-                # data = self._get_data(batch_size, task_params)
+                idx = torch.randperm(meta_batch_size)[:batch_size]
+                data = (train_data[0][idx], train_data[1][idx], train_data[2][idx]) if self.meta_params['meta_method'] != "reptile" else self._get_data(meta_batch_size, task_params)
                 env_params = {'problem_size': data[-1].size(1), 'pomo_size': data[-1].size(1)}
                 self.meta_model.train()
                 if self.meta_params['meta_method'] in ['reptile', 'fomaml']:
@@ -241,7 +241,8 @@ def _train_one_epoch(self, epoch):
                     bootstrap_optimizer = Optimizer(bootstrap_model.parameters(), **self.optimizer_params['optimizer'])
                     bootstrap_optimizer.load_state_dict(optimizer.state_dict())
             for step in range(self.meta_params['L']):
-                # data = self._get_data(batch_size, task_params)
+                idx = torch.randperm(meta_batch_size)[:batch_size]
+                data = (train_data[0][idx], train_data[1][idx], train_data[2][idx]) if self.meta_params['meta_method'] != "reptile" else self._get_data(meta_batch_size, task_params)
                 if self.meta_params['meta_method'] == 'maml':
                     avg_score, avg_loss, bootstrap_model = self._train_one_batch_maml(bootstrap_model, data, Env(**env_params), create_graph=False)
                 else:
@@ -367,7 +368,7 @@ def _train_one_batch_maml(self, fast_weight, data, env, optimizer=None, create_g
         lr, weight_decay = self.optimizer_params['optimizer']['lr'], self.optimizer_params['optimizer']['weight_decay']
         for i, ((name, param), grad) in enumerate(zip(fast_weight.items(), gradients)):
             if self.meta_optimizer.state_dict()['state'] != {}:
-                i = i if self.model_params['meta_update_encoder'] else i + 58  # i \in [0, 62], where encoder \in [0, 57] + decoder \in [58, 62]
+                i = i if self.model_params['meta_update_encoder'] else i + 82  # (with norm layer): i \in [0, 86], where encoder \in [0, 81] + decoder \in [82, 86]
                 state = self.meta_optimizer.state_dict()['state'][i]
                 step, exp_avg, exp_avg_sq = state['step'], state['exp_avg'], state['exp_avg_sq']
                 step += 1
@@ -483,7 +484,7 @@ def _fast_val(self, model, data=None, path=None, offset=0, val_episodes=32, mode
         else:
             return loss_mean, kl_loss
 
-    def _get_data(self, batch_size, task_params):
+    def _get_data(self, batch_size, task_params, return_capacity=False):
         """
         Return CVRP data with the form of:
         depot_xy: [batch_size, 1, 2]
@@ -504,7 +505,7 @@ def _get_data(self, batch_size, task_params):
             raise NotImplementedError
 
         # normalized node_demand by capacity & only return (depot_xy, node_xy, node_demand)
-        if len(data) == 4:
+        if len(data) == 4 and not return_capacity:
             depot_xy, node_xy, node_demand, capacity = data
             node_demand = node_demand / capacity.view(-1, 1)
             data = (depot_xy, node_xy, node_demand)
@@ -543,21 +544,23 @@ def _update_task_weight(self, tasks, weights, epoch):
         idx = torch.randperm(batch_size)[:50]
         for i in range(gap.size(0)):
             selected = tasks[i]
-            data = self._get_data(batch_size=batch_size, task_params=selected)
+            data = self._get_data(batch_size=batch_size, task_params=selected, return_capacity=True)
 
             # only use lkh3 at the first iteration of updating task weights
             if self.meta_params["solver"] == "lkh3_offline":
                 if selected not in self.val_data.keys():
-                    self.val_data[selected] = data
+                    self.val_data[selected] = data  # (depot, loc, demand, capacity)
                     opts = argparse.ArgumentParser()
                     opts.cpus, opts.n, opts.progress_bar_mininterval = None, None, 0.1
-                    dataset = [(instance.cpu().numpy(),) for instance in data]
+                    dataset = [attr.cpu().tolist() for attr in data]
+                    dataset = [(dataset[0][i][0], dataset[1][i], [int(d) for d in dataset[2][i]], int(dataset[3][i])) for i in range(data[0].size(0))]
                     executable = get_lkh_executable()
                     def run_func(args):
                         return solve_lkh_log(executable, *args, runs=1, disable_cache=True, MAX_TRIALS=100)  # otherwise it directly loads data from dir
                     results, _ = run_all_in_pool(run_func, "./LKH3_result", dataset, opts, use_multiprocessing=False)
                     self.val_opt[selected] = [j[0] for j in results]
-                data = self.val_data[selected][idx]
+                data = [attr[idx] for attr in self.val_data[selected]]
+                data = (data[0], data[1], data[2] / data[3].view(-1, 1))
 
             model_score = self._fast_val(self.meta_model, data=data, mode="eval", return_all=True)
             model_score = model_score.tolist()

diff --git a/POMO/CVRP/CVRPTrainer_pomo.py b/POMO/CVRP/CVRPTrainer_pomo.py
@@ -84,7 +84,7 @@ def run(self):
             self.logger.info('=================================================================')
 
             # lr decay (by 10) to speed up convergence at 90th and 95th iterations
-            if epoch in [int(self.meta_params['epochs'] * 0.9), int(self.meta_params['epochs'] * 0.95)]:
+            if epoch in [int(self.meta_params['epochs'] * 0.9)]:
                 self.optimizer_params['optimizer']['lr'] /= 10
                 for group in self.optimizer.param_groups:
                     group["lr"] /= 10
@@ -106,7 +106,7 @@ def run(self):
                 paths = ["cvrp100_uniform.pkl", "cvrp100_gaussian.pkl", "cvrp100_cluster.pkl", "cvrp100_diagonal.pkl", "cvrp100_cvrplib.pkl"]
             elif self.meta_params["data_type"] == "size_distribution":
                 dir = "../../data/CVRP/Size_Distribution/"
-                paths = ["cvrp200_uniform.pkl", "cvrp200_gaussian.pkl", "cvrp300_rotation.pkl"]
+                paths = ["cvrp200_uniform.pkl", "cvrp300_rotation.pkl"]
             if epoch <= 1 or (epoch % img_save_interval) == 0:
                 for val_path in paths:
                     no_aug_score = self._fast_val(self.model, path=os.path.join(dir, val_path), val_episodes=64)
@@ -259,7 +259,7 @@ def _fast_val(self, model, data=None, path=None, offset=0, val_episodes=32, retu
         else:
             return no_aug_score.detach().item()
 
-    def _get_data(self, batch_size, task_params):
+    def _get_data(self, batch_size, task_params, return_capacity=False):
         """
         Return CVRP data with the form of:
         depot_xy: [batch_size, 1, 2]
@@ -280,7 +280,7 @@ def _get_data(self, batch_size, task_params):
             raise NotImplementedError
 
         # normalized node_demand by capacity & only return (depot_xy, node_xy, node_demand)
-        if len(data) == 4:
+        if len(data) == 4 and not return_capacity:
             depot_xy, node_xy, node_demand, capacity = data
             node_demand = node_demand / capacity.view(-1, 1)
             data = (depot_xy, node_xy, node_demand)
@@ -298,21 +298,23 @@ def _update_task_weight(self, tasks, weights, epoch):
         idx = torch.randperm(batch_size)[:50]
         for i in range(gap.size(0)):
             selected = tasks[i]
-            data = self._get_data(batch_size=batch_size, task_params=selected)
+            data = self._get_data(batch_size=batch_size, task_params=selected, return_capacity=True)
 
             # only use lkh3 at the first iteration of updating task weights
             if self.meta_params["solver"] == "lkh3_offline":
                 if selected not in self.val_data.keys():
                     self.val_data[selected] = data
                     opts = argparse.ArgumentParser()
                     opts.cpus, opts.n, opts.progress_bar_mininterval = None, None, 0.1
-                    dataset = [(instance.cpu().numpy(),) for instance in data]
+                    dataset = [attr.cpu().tolist() for attr in data]
+                    dataset = [(dataset[0][i][0], dataset[1][i], [int(d) for d in dataset[2][i]], int(dataset[3][i])) for i in range(data[0].size(0))]
                     executable = get_lkh_executable()
                     def run_func(args):
                         return solve_lkh_log(executable, *args, runs=1, disable_cache=True, MAX_TRIALS=100)  # otherwise it directly loads data from dir
                     results, _ = run_all_in_pool(run_func, "./LKH3_result", dataset, opts, use_multiprocessing=False)
                     self.val_opt[selected] = [j[0] for j in results]
-                data = self.val_data[selected][idx]
+                data = [attr[idx] for attr in self.val_data[selected]]
+                data = (data[0], data[1], data[2] / data[3].view(-1, 1))
 
             model_score = self._fast_val(self.meta_model, data=data, mode="eval", return_all=True)
             model_score = model_score.tolist()
@@ -325,7 +327,7 @@ def run_func(args):
                 raise NotImplementedError
         print(">> Finish updating task weights within {}s".format(round(time.time() - start_t, 2)))
 
-        temp = 0.25
+        temp = 1.0
         gap_temp = torch.Tensor([i / temp for i in gap.tolist()])
         print(gap, temp)
         print(">> Old task weights: {}".format(weights))

diff --git a/POMO/CVRP/CVRP_baseline.py b/POMO/CVRP/CVRP_baseline.py
@@ -259,7 +259,7 @@ def write_vrplib(filename, depot, loc, demand, capacity, grid_size, name="proble
     parser.add_argument("--cpus", type=int, help="Number of CPUs to use, defaults to all cores")
     parser.add_argument('--disable_cache', action='store_true', help='Disable caching')
     parser.add_argument('--progress_bar_mininterval', type=float, default=0.1, help='Minimum interval')
-    parser.add_argument('-n', type=int, default=10000, help="Number of instances to process")
+    parser.add_argument('-n', type=int, default=1000, help="Number of instances to process")
     parser.add_argument('--offset', type=int, default=0, help="Offset where to start processing")
     parser.add_argument('--results_dir', default='baseline_results', help="Name of results directory")
 

diff --git a/POMO/CVRP/test.py b/POMO/CVRP/test.py
@@ -52,12 +52,13 @@
 
 fine_tune_params = {
     'enable': False,  # evaluate few-shot generalization
-    'fine_tune_episodes': 500,  # how many data used to fine-tune the pretrained model
-    'k': 20,  # fine-tune steps/epochs
-    'fine_tune_batch_size': 64,  # the batch size of the inner-loop optimization
-    'augmentation_enable': False,
+    'fine_tune_episodes': 1000,  # how many data used to fine-tune the pretrained model
+    'k': 50,  # fine-tune steps/epochs
+    'fine_tune_batch_size': 32,  # the batch size of the inner-loop optimization
+    'augmentation_enable': True,
+    'lr_decay': True,
     'optimizer': {
-        'lr': 1e-4 * 0.1,
+        'lr': 1e-4,
         'weight_decay': 1e-6
     }
 }

diff --git a/POMO/ProblemDef.py b/POMO/ProblemDef.py
@@ -55,7 +55,7 @@ def get_random_problems(batch_size, problem_size, num_modes=0, cdist=0, distribu
     # save as List
     if path is not None:
         if problem == "tsp":
-            with open(os.path.join(path, "tsp{}_{}.pkl".format(problem_size, distribution)), "wb") as f:
+            with open(os.path.join(path, "tsp{}_{}_{}_{}.pkl".format(problem_size, distribution, num_modes, cdist)), "wb") as f:
                 pickle.dump(problems.tolist(), f, pickle.HIGHEST_PROTOCOL)
         else:
             with open(os.path.join(path, "cvrp{}_{}.pkl".format(problem_size, distribution)), "wb") as f:
@@ -249,25 +249,27 @@ def generate_tsp_dist(n_samples, n_nodes, distribution):
     val seed: 2022
     test seed: 2023
     """
-    path = "../data/CVRP/Size_Distribution"
+    path = "../data/TSP/Size_Distribution"
     if not os.path.exists(path):
         os.makedirs(path)
     seed_everything(seed=2023)
 
     # test data for Table 1
-    # for s in [100, 150, 200]:
-    #     for dist in ["uniform", "gaussian"]:
+    # for s in [200, 300]:
+    #     for dist in ["uniform"]:
     #         print(">> Generating TSP instances following {} distribution!".format(dist))
-    #         get_random_problems(15000, s, distribution=dist, path=path, problem="cvrp")
+    #         get_random_problems(2000, s, distribution=dist, path=path, problem="tsp")
+    for m, c in [(3, 5), (6, 20), (9, 50)]:
+        get_random_problems(2000, 200, num_modes=m, cdist=c, distribution="gaussian_mixture", path=path, problem="tsp")
 
     # var-size test data
     # for s in [50, 100, 150, 200, 300, 500, 1000]:
     #     print(">> Generating TSP instances of size {}!".format(s))
     #     get_random_problems(15000, s, distribution="uniform", path=path, problem="tsp")
 
     # data = generate_gaussian_mixture_tsp(dataset_size=1, graph_size=150, num_modes=3, cdist=10)
-    data = load_dataset("../data/CVRP/Size_Distribution/cvrp100_uniform.pkl")
-    print(data[0])
+    # data = load_dataset("../data/CVRP/Size_Distribution/cvrp100_uniform.pkl")
+    # print(data[0])
     # print(type(data), data.size(), data)
     # x, y = [i[0] for i in data[1]], [i[-1] for i in data[1]]
     # x, y = data[0, :, 0].tolist(), data[0, :, -1].tolist()

diff --git a/POMO/TSP/TSPTester.py b/POMO/TSP/TSPTester.py
@@ -56,7 +56,7 @@ def __init__(self,
         checkpoint_fullname = '{path}/checkpoint-{epoch}.pt'.format(**model_load)
         checkpoint = torch.load(checkpoint_fullname, map_location=self.device)
         self.model.load_state_dict(checkpoint['model_state_dict'])
-        # self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])  # TODO: which performance is good? load or not load?
+        # self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
         self.logger.info(">> Model loaded from {}".format(checkpoint_fullname))
 
         # utility
@@ -185,6 +185,10 @@ def _fine_tune_and_test(self):
         score_list, aug_score_list, gap_list, aug_gap_list = [], [], [], []
 
         for k in range(self.fine_tune_params['k']):
+            if k in [int(self.fine_tune_params['k'] * 0.4)] and self.fine_tune_params['lr_decay']:
+                for group in self.optimizer.param_groups:
+                    group["lr"] /= 10
+                    print(">> LR decay to {}".format(group["lr"]))
             # score, aug_score, gap, aug_gap = self._test(store_res=False)
             # score_list.append(score); aug_score_list.append(aug_score)
             # gap_list.append(gap); aug_gap_list.append(aug_gap)