From 238cbc87e96381ba83912db4ed5f8c844b3b4071 Mon Sep 17 00:00:00 2001 From: RoyalSkye Date: Sat, 19 Nov 2022 20:52:03 +0800 Subject: [PATCH] add cvrp code --- EAS/run_search.py | 34 +- EAS/source/cvrp/model.py | 111 ++--- EAS/source/cvrp/read_data.py | 16 +- EAS/source/eas_emb.py | 4 +- EAS/source/eas_lay.py | 5 +- EAS/source/eas_tab.py | 2 +- EAS/source/tsp/model.py | 4 +- POMO/CVRP/CVRPEnv.py | 238 +++++++++ POMO/CVRP/CVRPModel.py | 413 +++++++++++++++ POMO/CVRP/CVRPTester.py | 249 +++++++++ POMO/CVRP/CVRPTrainer_meta.py | 527 ++++++++++++++++++++ POMO/CVRP/CVRPTrainer_pomo.py | 274 ++++++++++ POMO/CVRP/CVRP_baseline.py | 270 ++++++++++ POMO/CVRP/test.py | 121 +++++ POMO/CVRP/train.py | 151 ++++++ POMO/{TSP/TSProblemDef.py => ProblemDef.py} | 64 ++- POMO/TSP/TSPEnv.py | 5 +- POMO/TSP/TSPModel.py | 14 +- POMO/TSP/TSPTester.py | 2 +- POMO/TSP/TSPTrainer_Meta.py | 143 ++++-- POMO/TSP/TSPTrainer_pomo.py | 82 ++- POMO/TSP/TSP_baseline.py | 31 +- POMO/TSP/install_concorde.sh | 27 + POMO/TSP/test.py | 10 +- POMO/TSP/train.py | 10 +- 25 files changed, 2542 insertions(+), 265 deletions(-) create mode 100644 POMO/CVRP/CVRPEnv.py create mode 100644 POMO/CVRP/CVRPModel.py create mode 100644 POMO/CVRP/CVRPTester.py create mode 100644 POMO/CVRP/CVRPTrainer_meta.py create mode 100644 POMO/CVRP/CVRPTrainer_pomo.py create mode 100644 POMO/CVRP/CVRP_baseline.py create mode 100644 POMO/CVRP/test.py create mode 100644 POMO/CVRP/train.py rename POMO/{TSP/TSProblemDef.py => ProblemDef.py} (77%) create mode 100755 POMO/TSP/install_concorde.sh diff --git a/EAS/run_search.py b/EAS/run_search.py index 626b45c..4d4256e 100644 --- a/EAS/run_search.py +++ b/EAS/run_search.py @@ -5,6 +5,7 @@ import pickle import sys import time +import math import random import numpy as np @@ -28,28 +29,27 @@ def get_config(): - # TODO: Check CVRP env.py parser = argparse.ArgumentParser(description='Efficient Active Search') parser.add_argument('-problem', default="TSP", type=str, choices=['TSP', 'CVRP']) - parser.add_argument('-method', default="eas-tab", type=str, choices=['eas-emb', 'eas-lay', 'eas-tab'], help="EAS method") - parser.add_argument('-model_path', default="../pretrained/checkpoint-50000.pt", type=str, help="Path of the trained model weights") - parser.add_argument('-instances_path', default="../data/TSP/Size/tsp100_uniform.pkl", type=str, help="Path of the instances") + parser.add_argument('-method', default="eas-emb", type=str, choices=['eas-emb', 'eas-lay', 'eas-tab'], help="EAS method") + parser.add_argument('-model_path', default="../pretrained/pomo_pretrained/checkpoint-30500.pt", type=str, help="Path of the trained model weights") + parser.add_argument('-instances_path', default="../data/TSP/Size/tsp100_gaussian.pkl", type=str, help="Path of the instances") parser.add_argument('-sol_path', default="../data/TSP/Size/opt_tsp100_uniform.pkl", type=str, help="Path of the optimal sol") - parser.add_argument('-num_instances', default=30, type=int, help="Maximum number of instances that should be solved") + parser.add_argument('-num_instances', default=10000, type=int, help="Maximum number of instances that should be solved") parser.add_argument('-instances_offset', default=0, type=int) parser.add_argument('-round_distances', default=False, action='store_true', help="Round distances to the nearest integer. Required to solve .vrp instances") parser.add_argument('-max_iter', default=200, type=int, help="Maximum number of EAS iterations") parser.add_argument('-max_runtime', default=100000, type=int, help="Maximum runtime of EAS per batch in seconds") - parser.add_argument('-batch_size', default=30, type=int) # Set to 1 for single instance search + parser.add_argument('-batch_size', default=150, type=int) # Set to 1 for single instance search parser.add_argument('-p_runs', default=1, type=int) # If batch_size is 1, set this to > 1 to do multiple runs for the instance in parallel parser.add_argument('-output_path', default="EAS_results", type=str) parser.add_argument('-norm', default="none", choices=['instance', 'batch', 'none'], type=str) - parser.add_argument('-gpu_id', default=0, type=int) + parser.add_argument('-gpu_id', default=2, type=int) parser.add_argument('-seed', default=2023, type=int, help="random seed") # EAS-Emb and EAS-Lay parameters - parser.add_argument('-param_lambda', default=0.012, type=float) + parser.add_argument('-param_lambda', default=0.0058, type=float) parser.add_argument('-param_lr', default=0.0032, type=float) # EAS-Tab parameters @@ -81,21 +81,9 @@ def read_instance_data(config): problem_size = instance_data[0].shape[1] - 1 # The vehicle capacity (here called demand_scaler) is hardcoded for these instances as follows - if problem_size == 20: - demand_scaler = 30 - elif problem_size == 50: - demand_scaler = 40 - elif problem_size == 100: - demand_scaler = 50 - elif problem_size == 125: - demand_scaler = 55 - elif problem_size == 150: - demand_scaler = 60 - elif problem_size == 200: - demand_scaler = 70 - else: - raise NotImplementedError - instance_data_scaled = instance_data[0], instance_data[1] / demand_scaler + # demand_scaler = math.ceil(30 + problem_size / 5) if problem_size >= 20 else 20 + # instance_data_scaled = instance_data[0], instance_data[1] / demand_scaler # already done in fun(read_instance_pkl) + instance_data_scaled = instance_data[0], instance_data[1] else: # Read in .vrp instance(s) that have the VRPLIB format. In this case the distances between customers diff --git a/EAS/source/cvrp/model.py b/EAS/source/cvrp/model.py index b6f6d63..2a05cec 100644 --- a/EAS/source/cvrp/model.py +++ b/EAS/source/cvrp/model.py @@ -16,11 +16,12 @@ def __init__(self, **model_params): # shape: (batch, problem+1, EMBEDDING_DIM) def pre_forward(self, reset_state): - depot_xy = reset_state.depot_xy + # print(reset_state.data) # (batch, problem+1, 3) + depot_xy = reset_state.data[:, [0], 0:2] # shape: (batch, 1, 2) - node_xy = reset_state.node_xy + node_xy = reset_state.data[:, 1:, 0:2] # shape: (batch, problem, 2) - node_demand = reset_state.node_demand + node_demand = reset_state.data[:, 1:, 2] # shape: (batch, problem) node_xy_demand = torch.cat((node_xy, node_demand[:, :, None]), dim=2) # shape: (batch, problem, 3) @@ -29,52 +30,15 @@ def pre_forward(self, reset_state): # shape: (batch, problem+1, embedding) self.decoder.set_kv(self.encoded_nodes) - def forward(self, state): - batch_size = state.BATCH_IDX.size(0) - pomo_size = state.BATCH_IDX.size(1) - - - if state.selected_count == 0: # First Move, depot - selected = torch.zeros(size=(batch_size, pomo_size), dtype=torch.long) - prob = torch.ones(size=(batch_size, pomo_size)) - - # # Use Averaged encoded nodes for decoder input_1 - # encoded_nodes_mean = self.encoded_nodes.mean(dim=1, keepdim=True) - # # shape: (batch, 1, embedding) - # self.decoder.set_q1(encoded_nodes_mean) - - # # Use encoded_depot for decoder input_2 - # encoded_first_node = self.encoded_nodes[:, [0], :] - # # shape: (batch, 1, embedding) - # self.decoder.set_q2(encoded_first_node) - - elif state.selected_count == 1: # Second Move, POMO - selected = torch.arange(start=1, end=pomo_size+1)[None, :].expand(batch_size, pomo_size) - prob = torch.ones(size=(batch_size, pomo_size)) - + def forward(self, state, selected=None): + if selected is not None: # First Move - depot or Second Move - POMO + pass else: encoded_last_node = _get_encoding(self.encoded_nodes, state.current_node) # shape: (batch, pomo, embedding) - probs = self.decoder(encoded_last_node, state.load, ninf_mask=state.ninf_mask) + probs = self.decoder(encoded_last_node, state.loaded, ninf_mask=state.ninf_mask) # shape: (batch, pomo, problem+1) - - if self.training or self.model_params['eval_type'] == 'softmax': - while True: # to fix pytorch.multinomial bug on selecting 0 probability elements - with torch.no_grad(): - selected = probs.reshape(batch_size * pomo_size, -1).multinomial(1) \ - .squeeze(dim=1).reshape(batch_size, pomo_size) - # shape: (batch, pomo) - prob = probs[state.BATCH_IDX, state.POMO_IDX, selected].reshape(batch_size, pomo_size) - # shape: (batch, pomo) - if (prob != 0).all(): - break - - else: - selected = probs.argmax(dim=2) - # shape: (batch, pomo) - prob = None # value not needed. Can be anything. - - return selected, prob + return probs def _get_encoding(encoded_nodes, node_index_to_pick): @@ -141,9 +105,9 @@ def __init__(self, **model_params): self.Wv = nn.Linear(embedding_dim, head_num * qkv_dim, bias=False) self.multi_head_combine = nn.Linear(head_num * qkv_dim, embedding_dim) - self.add_n_normalization_1 = AddAndInstanceNormalization(**model_params) + self.add_n_normalization_1 = Add_And_Normalization_Module(**model_params) self.feed_forward = FeedForward(**model_params) - self.add_n_normalization_2 = AddAndInstanceNormalization(**model_params) + self.add_n_normalization_2 = Add_And_Normalization_Module(**model_params) def forward(self, input1): # input1.shape: (batch, problem+1, embedding) @@ -319,50 +283,37 @@ def multi_head_attention(q, k, v, rank2_ninf_mask=None, rank3_ninf_mask=None): return out_concat -class AddAndInstanceNormalization(nn.Module): +class Add_And_Normalization_Module(nn.Module): def __init__(self, **model_params): super().__init__() embedding_dim = model_params['embedding_dim'] - self.norm = nn.InstanceNorm1d(embedding_dim, affine=True, track_running_stats=False) + if model_params["norm"] == "batch": + self.norm = nn.BatchNorm1d(embedding_dim, affine=True, track_running_stats=True) + elif model_params["norm"] == "instance": + self.norm = nn.InstanceNorm1d(embedding_dim, affine=True, track_running_stats=False) + else: + self.norm = None def forward(self, input1, input2): # input.shape: (batch, problem, embedding) - added = input1 + input2 - # shape: (batch, problem, embedding) - - transposed = added.transpose(1, 2) - # shape: (batch, embedding, problem) - - normalized = self.norm(transposed) - # shape: (batch, embedding, problem) - - back_trans = normalized.transpose(1, 2) - # shape: (batch, problem, embedding) + if isinstance(self.norm, nn.InstanceNorm1d): + transposed = added.transpose(1, 2) + # shape: (batch, embedding, problem) + normalized = self.norm(transposed) + # shape: (batch, embedding, problem) + back_trans = normalized.transpose(1, 2) + # shape: (batch, problem, embedding) + elif isinstance(self.norm, nn.BatchNorm1d): + batch_s, problem_s, embedding_dim = input1.size(0), input1.size(1), input1.size(2) + normalized = self.norm(added.reshape(batch_s * problem_s, embedding_dim)) + back_trans = normalized.reshape(batch_s, problem_s, embedding_dim) + else: + back_trans = added return back_trans -class AddAndBatchNormalization(nn.Module): - def __init__(self, **model_params): - super().__init__() - embedding_dim = model_params['embedding_dim'] - self.norm_by_EMB = nn.BatchNorm1d(embedding_dim, affine=True) - # 'Funny' Batch_Norm, as it will normalized by EMB dim - - def forward(self, input1, input2): - # input.shape: (batch, problem, embedding) - - batch_s = input1.size(0) - problem_s = input1.size(1) - embedding_dim = input1.size(2) - - added = input1 + input2 - normalized = self.norm_by_EMB(added.reshape(batch_s * problem_s, embedding_dim)) - back_trans = normalized.reshape(batch_s, problem_s, embedding_dim) - - return back_trans - class FeedForward(nn.Module): def __init__(self, **model_params): super().__init__() diff --git a/EAS/source/cvrp/read_data.py b/EAS/source/cvrp/read_data.py index 2165b3a..62e331d 100644 --- a/EAS/source/cvrp/read_data.py +++ b/EAS/source/cvrp/read_data.py @@ -1,5 +1,6 @@ -import numpy as np +import math import pickle +import numpy as np def read_instance_pkl(instances_path): @@ -9,13 +10,14 @@ def read_instance_pkl(instances_path): coord = [] demands = [] for instance_data in instances_data: - coord.append([instance_data[0]]) - coord[-1].extend(instance_data[1]) - coord[-1] = np.array(coord[-1]) - demands.append(np.array(instance_data[2])) + coord.append(instance_data[0]) # depot + coord[-1].extend(instance_data[1]) # nodes + coord[-1] = np.array(coord[-1]) # (1 + problem_size, 2) + demands.append(np.array(instance_data[2]) / instance_data[3]) + + coord = np.stack(coord) # (dataset_size, problem_size+1, 2) + demands = np.stack(demands) # (dataset_size, problem_size) - coord = np.stack(coord) - demands = np.stack(demands) return coord, demands diff --git a/EAS/source/eas_emb.py b/EAS/source/eas_emb.py index 078d4ae..402dad9 100644 --- a/EAS/source/eas_emb.py +++ b/EAS/source/eas_emb.py @@ -64,7 +64,7 @@ def run_eas_emb(model, instance_data, problem_size, config, get_episode_data_fn, if config.problem == "CVRP": # First Move is given first_action = LongTensor(np.zeros((batch_s, group_s))) # start from node_0-depot - # TODO: CVRP model need to do something? + # model(group_state, selected=first_action) # do nothing for CVRP group_state, reward, done = env.step(first_action) solutions.append(first_action.unsqueeze(2)) step += 1 @@ -73,7 +73,7 @@ def run_eas_emb(model, instance_data, problem_size, config, get_episode_data_fn, second_action = LongTensor(np.arange(group_s) % problem_size)[None, :].expand(batch_s, group_s).clone() if iter > 0: second_action[:, -1] = incumbent_solutions_expanded[:, step] # Teacher forcing imitation learning loss - model(group_state, selected=second_action) # for the first step, set_q1 TODO: check for CVRP model + model(group_state, selected=second_action) # for the first step, set_q1 for TSP, do nothing for CVRP group_state, reward, done = env.step(second_action) solutions.append(second_action.unsqueeze(2)) step += 1 diff --git a/EAS/source/eas_lay.py b/EAS/source/eas_lay.py index 948aba5..4065efb 100644 --- a/EAS/source/eas_lay.py +++ b/EAS/source/eas_lay.py @@ -273,17 +273,16 @@ def run_eas_lay(model, instance_data, problem_size, config, get_episode_data_fn, if config.problem == "CVRP": # First Move is given first_action = LongTensor(np.zeros((batch_s, group_s))) # start from node_0-depot - # TODO: CVRP model need to do something? + # model_modified(group_state, selected=first_action) # do nothing for CVRP group_state, reward, done = env.step(first_action) solutions.append(first_action.unsqueeze(2)) step += 1 # First/Second Move is given second_action = LongTensor(np.arange(group_s) % problem_size)[None, :].expand(batch_s, group_s).clone() - if iter > 0: second_action[:, -1] = incumbent_solutions_expanded[:, step] # Teacher forcing the imitation learning loss - model_modified(group_state, selected=second_action) # for the first step, set_q1 TODO: check for CVRP model + model_modified(group_state, selected=second_action) # for the first step, set_q1 for TSP, do nothing for CVRP group_state, reward, done = env.step(second_action) solutions.append(second_action.unsqueeze(2)) step += 1 diff --git a/EAS/source/eas_tab.py b/EAS/source/eas_tab.py index 3682c78..b1114ce 100644 --- a/EAS/source/eas_tab.py +++ b/EAS/source/eas_tab.py @@ -71,7 +71,7 @@ def run_eas_tab(model, instance_data, problem_size, config, get_episode_data_fn, last_action = first_action elif config.problem == "CVRP": # start from node_0-depot first_action = LongTensor(np.zeros((batch_s, group_s))) - # TODO: CVRP model need to do something? + # model(group_state, selected=first_action) # do nothing for CVRP group_state, reward, done = env.step(first_action) last_action = first_action diff --git a/EAS/source/tsp/model.py b/EAS/source/tsp/model.py index 5a144e9..af95d8e 100644 --- a/EAS/source/tsp/model.py +++ b/EAS/source/tsp/model.py @@ -28,9 +28,9 @@ def forward(self, state, selected=None): else: encoded_last_node = _get_encoding(self.encoded_nodes, state.current_node) # shape: (batch, pomo, embedding) - prob = self.decoder(encoded_last_node, ninf_mask=state.ninf_mask) + probs = self.decoder(encoded_last_node, ninf_mask=state.ninf_mask) # shape: (batch, pomo, problem) - return prob + return probs def _get_encoding(encoded_nodes, node_index_to_pick): diff --git a/POMO/CVRP/CVRPEnv.py b/POMO/CVRP/CVRPEnv.py new file mode 100644 index 0000000..2dfc827 --- /dev/null +++ b/POMO/CVRP/CVRPEnv.py @@ -0,0 +1,238 @@ +from dataclasses import dataclass +import torch + +from ProblemDef import get_random_problems, augment_xy_data_by_8_fold + + +@dataclass +class Reset_State: + depot_xy: torch.Tensor = None + # shape: (batch, 1, 2) + node_xy: torch.Tensor = None + # shape: (batch, problem, 2) + node_demand: torch.Tensor = None + # shape: (batch, problem) + + +@dataclass +class Step_State: + BATCH_IDX: torch.Tensor = None + POMO_IDX: torch.Tensor = None + # shape: (batch, pomo) + selected_count: int = None + load: torch.Tensor = None + # shape: (batch, pomo) + current_node: torch.Tensor = None + # shape: (batch, pomo) + ninf_mask: torch.Tensor = None + # shape: (batch, pomo, problem+1) + finished: torch.Tensor = None + # shape: (batch, pomo) + + +class CVRPEnv: + def __init__(self, **env_params): + + # Const @INIT + #################################### + self.env_params = env_params + self.problem_size = env_params['problem_size'] + self.pomo_size = env_params['pomo_size'] + + self.FLAG__use_saved_problems = False + self.saved_depot_xy = None + self.saved_node_xy = None + self.saved_node_demand = None + self.saved_index = None + + # Const @Load_Problem + #################################### + self.batch_size = None + self.BATCH_IDX = None + self.POMO_IDX = None + # IDX.shape: (batch, pomo) + self.depot_node_xy = None + # shape: (batch, problem+1, 2) + self.depot_node_demand = None + # shape: (batch, problem+1) + + # Dynamic-1 + #################################### + self.selected_count = None + self.current_node = None + # shape: (batch, pomo) + self.selected_node_list = None + # shape: (batch, pomo, 0~) + + # Dynamic-2 + #################################### + self.at_the_depot = None + # shape: (batch, pomo) + self.load = None + # shape: (batch, pomo) + self.visited_ninf_flag = None + # shape: (batch, pomo, problem+1) + self.ninf_mask = None + # shape: (batch, pomo, problem+1) + self.finished = None + # shape: (batch, pomo) + + # states to return + #################################### + self.reset_state = Reset_State() + self.step_state = Step_State() + + def use_saved_problems(self, filename, device): + # TODO: Update data format + self.FLAG__use_saved_problems = True + loaded_dict = torch.load(filename, map_location=device) + self.saved_depot_xy = loaded_dict['depot_xy'] + self.saved_node_xy = loaded_dict['node_xy'] + self.saved_node_demand = loaded_dict['node_demand'] + self.saved_index = 0 + + def load_problems(self, batch_size, problems=None, aug_factor=1): + if problems is not None: + depot_xy, node_xy, node_demand = problems + elif self.FLAG__use_saved_problems: + depot_xy = self.saved_depot_xy[self.saved_index:self.saved_index + batch_size] + node_xy = self.saved_node_xy[self.saved_index:self.saved_index+batch_size] + node_demand = self.saved_node_demand[self.saved_index:self.saved_index+batch_size] + self.saved_index += batch_size + else: + depot_xy, node_xy, node_demand = get_random_problems(batch_size, self.problem_size, distribution='uniform', problem="cvrp") + self.batch_size = depot_xy.size(0) + + if aug_factor > 1: + if aug_factor == 8: + self.batch_size = self.batch_size * 8 + depot_xy = augment_xy_data_by_8_fold(depot_xy) + node_xy = augment_xy_data_by_8_fold(node_xy) + node_demand = node_demand.repeat(8, 1) + else: + raise NotImplementedError + + self.depot_node_xy = torch.cat((depot_xy, node_xy), dim=1) + # shape: (batch, problem+1, 2) + depot_demand = torch.zeros(size=(self.batch_size, 1)) + # shape: (batch, 1) + self.depot_node_demand = torch.cat((depot_demand, node_demand), dim=1) + # shape: (batch, problem+1) + + self.BATCH_IDX = torch.arange(self.batch_size)[:, None].expand(self.batch_size, self.pomo_size) + self.POMO_IDX = torch.arange(self.pomo_size)[None, :].expand(self.batch_size, self.pomo_size) + + self.reset_state.depot_xy = depot_xy + self.reset_state.node_xy = node_xy + self.reset_state.node_demand = node_demand + + self.step_state.BATCH_IDX = self.BATCH_IDX + self.step_state.POMO_IDX = self.POMO_IDX + + def reset(self): + self.selected_count = 0 + self.current_node = None + # shape: (batch, pomo) + self.selected_node_list = torch.zeros((self.batch_size, self.pomo_size, 0), dtype=torch.long) + # shape: (batch, pomo, 0~) + + self.at_the_depot = torch.ones(size=(self.batch_size, self.pomo_size), dtype=torch.bool) + # shape: (batch, pomo) + self.load = torch.ones(size=(self.batch_size, self.pomo_size)) + # shape: (batch, pomo) + self.visited_ninf_flag = torch.zeros(size=(self.batch_size, self.pomo_size, self.problem_size+1)) + # shape: (batch, pomo, problem+1) + self.ninf_mask = torch.zeros(size=(self.batch_size, self.pomo_size, self.problem_size+1)) + # shape: (batch, pomo, problem+1) + self.finished = torch.zeros(size=(self.batch_size, self.pomo_size), dtype=torch.bool) + # shape: (batch, pomo) + + reward = None + done = False + return self.reset_state, reward, done + + def pre_step(self): + self.step_state.selected_count = self.selected_count + self.step_state.load = self.load + self.step_state.current_node = self.current_node + self.step_state.ninf_mask = self.ninf_mask + self.step_state.finished = self.finished + + reward = None + done = False + return self.step_state, reward, done + + def step(self, selected): + # selected.shape: (batch, pomo) + + # Dynamic-1 + #################################### + self.selected_count += 1 + self.current_node = selected + # shape: (batch, pomo) + self.selected_node_list = torch.cat((self.selected_node_list, self.current_node[:, :, None]), dim=2) + # shape: (batch, pomo, 0~) + + # Dynamic-2 + #################################### + self.at_the_depot = (selected == 0) + + demand_list = self.depot_node_demand[:, None, :].expand(self.batch_size, self.pomo_size, -1) + # shape: (batch, pomo, problem+1) + gathering_index = selected[:, :, None] + # shape: (batch, pomo, 1) + selected_demand = demand_list.gather(dim=2, index=gathering_index).squeeze(dim=2) + # shape: (batch, pomo) + self.load -= selected_demand + self.load[self.at_the_depot] = 1 # refill loaded at the depot + + self.visited_ninf_flag[self.BATCH_IDX, self.POMO_IDX, selected] = float('-inf') + # shape: (batch, pomo, problem+1) + self.visited_ninf_flag[:, :, 0][~self.at_the_depot] = 0 # depot is considered unvisited, unless you are AT the depot + + self.ninf_mask = self.visited_ninf_flag.clone() + round_error_epsilon = 0.00001 + demand_too_large = self.load[:, :, None] + round_error_epsilon < demand_list + # shape: (batch, pomo, problem+1) + self.ninf_mask[demand_too_large] = float('-inf') + # shape: (batch, pomo, problem+1) + + newly_finished = (self.visited_ninf_flag == float('-inf')).all(dim=2) + # shape: (batch, pomo) + self.finished = self.finished + newly_finished + # shape: (batch, pomo) + + # do not mask depot for finished episode. + self.ninf_mask[:, :, 0][self.finished] = 0 + + self.step_state.selected_count = self.selected_count + self.step_state.load = self.load + self.step_state.current_node = self.current_node + self.step_state.ninf_mask = self.ninf_mask + self.step_state.finished = self.finished + + # returning values + done = self.finished.all() + if done: + reward = -self._get_travel_distance() # note the minus sign! + else: + reward = None + + return self.step_state, reward, done + + def _get_travel_distance(self): + gathering_index = self.selected_node_list[:, :, :, None].expand(-1, -1, -1, 2) + # shape: (batch, pomo, selected_list_length, 2) + all_xy = self.depot_node_xy[:, None, :, :].expand(-1, self.pomo_size, -1, -1) + # shape: (batch, pomo, problem+1, 2) + + ordered_seq = all_xy.gather(dim=2, index=gathering_index) + # shape: (batch, pomo, selected_list_length, 2) + + rolled_seq = ordered_seq.roll(dims=2, shifts=-1) + segment_lengths = ((ordered_seq-rolled_seq)**2).sum(3).sqrt() + # shape: (batch, pomo, selected_list_length) + + travel_distances = segment_lengths.sum(2) + # shape: (batch, pomo) + return travel_distances diff --git a/POMO/CVRP/CVRPModel.py b/POMO/CVRP/CVRPModel.py new file mode 100644 index 0000000..480d4f1 --- /dev/null +++ b/POMO/CVRP/CVRPModel.py @@ -0,0 +1,413 @@ + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class CVRPModel(nn.Module): + + def __init__(self, **model_params): + super().__init__() + self.model_params = model_params + + self.encoder = CVRP_Encoder(**model_params) + self.decoder = CVRP_Decoder(**model_params) + self.encoded_nodes = None + # shape: (batch, problem+1, EMBEDDING_DIM) + + def pre_forward(self, reset_state, weights=None): + depot_xy = reset_state.depot_xy + # shape: (batch, 1, 2) + node_xy = reset_state.node_xy + # shape: (batch, problem, 2) + node_demand = reset_state.node_demand + # shape: (batch, problem) + node_xy_demand = torch.cat((node_xy, node_demand[:, :, None]), dim=2) + # shape: (batch, problem, 3) + + if weights is not None and self.model_params["meta_update_encoder"]: + self.encoded_nodes = self.encoder(depot_xy, node_xy_demand, weights=weights) + else: + self.encoded_nodes = self.encoder(depot_xy, node_xy_demand, weights=None) + # shape: (batch, problem+1, embedding) + self.decoder.set_kv(self.encoded_nodes, weights=weights) + + def forward(self, state, weights=None): + batch_size = state.BATCH_IDX.size(0) + pomo_size = state.BATCH_IDX.size(1) + + if state.selected_count == 0: # First Move, depot + selected = torch.zeros(size=(batch_size, pomo_size), dtype=torch.long) + prob = torch.ones(size=(batch_size, pomo_size)) + + # # Use Averaged encoded nodes for decoder input_1 + # encoded_nodes_mean = self.encoded_nodes.mean(dim=1, keepdim=True) + # # shape: (batch, 1, embedding) + # self.decoder.set_q1(encoded_nodes_mean, weights=weights) + + # # Use encoded_depot for decoder input_2 + # encoded_first_node = self.encoded_nodes[:, [0], :] + # # shape: (batch, 1, embedding) + # self.decoder.set_q2(encoded_first_node, weights=weights) + + elif state.selected_count == 1: # Second Move, POMO + selected = torch.arange(start=1, end=pomo_size+1)[None, :].expand(batch_size, pomo_size) + prob = torch.ones(size=(batch_size, pomo_size)) + + else: + encoded_last_node = _get_encoding(self.encoded_nodes, state.current_node) + # shape: (batch, pomo, embedding) + probs = self.decoder(encoded_last_node, state.load, ninf_mask=state.ninf_mask, weights=weights) + # shape: (batch, pomo, problem+1) + + while True: + if self.training or self.model_params['eval_type'] == 'softmax': + selected = probs.reshape(batch_size * pomo_size, -1).multinomial(1).squeeze(dim=1).reshape(batch_size, pomo_size) + # shape: (batch, pomo) + else: + selected = probs.argmax(dim=2) + # shape: (batch, pomo) + + prob = probs[state.BATCH_IDX, state.POMO_IDX, selected].reshape(batch_size, pomo_size) + # shape: (batch, pomo) + + if (prob != 0).all(): + break + + return selected, prob + + +def _get_encoding(encoded_nodes, node_index_to_pick): + # encoded_nodes.shape: (batch, problem, embedding) + # node_index_to_pick.shape: (batch, pomo) + + batch_size = node_index_to_pick.size(0) + pomo_size = node_index_to_pick.size(1) + embedding_dim = encoded_nodes.size(2) + + gathering_index = node_index_to_pick[:, :, None].expand(batch_size, pomo_size, embedding_dim) + # shape: (batch, pomo, embedding) + + picked_nodes = encoded_nodes.gather(dim=1, index=gathering_index) + # shape: (batch, pomo, embedding) + + return picked_nodes + + +######################################## +# ENCODER +######################################## + +class CVRP_Encoder(nn.Module): + def __init__(self, **model_params): + super().__init__() + self.model_params = model_params + embedding_dim = self.model_params['embedding_dim'] + encoder_layer_num = self.model_params['encoder_layer_num'] + + self.embedding_depot = nn.Linear(2, embedding_dim) + self.embedding_node = nn.Linear(3, embedding_dim) + self.layers = nn.ModuleList([EncoderLayer(**model_params) for _ in range(encoder_layer_num)]) + + def forward(self, depot_xy, node_xy_demand, weights=None): + # depot_xy.shape: (batch, 1, 2) + # node_xy_demand.shape: (batch, problem, 3) + if weights is None: + embedded_depot = self.embedding_depot(depot_xy) + # shape: (batch, 1, embedding) + embedded_node = self.embedding_node(node_xy_demand) + # shape: (batch, problem, embedding) + out = torch.cat((embedded_depot, embedded_node), dim=1) + # shape: (batch, problem+1, embedding) + for layer in self.layers: + out = layer(out) + else: + embedded_depot = F.linear(depot_xy, weights['encoder.embedding_depot.weight'], weights['encoder.embedding_depot.bias']) + embedded_node = F.linear(node_xy_demand, weights['encoder.embedding_node.weight'], weights['encoder.embedding_node.bias']) + out = torch.cat((embedded_depot, embedded_node), dim=1) + for idx, layer in enumerate(self.layers): + out = layer(out, weights=weights, index=idx) + + return out + # shape: (batch, problem+1, embedding) + + +class EncoderLayer(nn.Module): + def __init__(self, **model_params): + super().__init__() + self.model_params = model_params + embedding_dim = self.model_params['embedding_dim'] + head_num = self.model_params['head_num'] + qkv_dim = self.model_params['qkv_dim'] + + self.Wq = nn.Linear(embedding_dim, head_num * qkv_dim, bias=False) + self.Wk = nn.Linear(embedding_dim, head_num * qkv_dim, bias=False) + self.Wv = nn.Linear(embedding_dim, head_num * qkv_dim, bias=False) + self.multi_head_combine = nn.Linear(head_num * qkv_dim, embedding_dim) + + self.add_n_normalization_1 = Add_And_Normalization_Module(**model_params) + self.feed_forward = FeedForward(**model_params) + self.add_n_normalization_2 = Add_And_Normalization_Module(**model_params) + + def forward(self, input1, weights=None, index=0): + # input1.shape: (batch, problem+1, embedding) + if weights is None: + head_num = self.model_params['head_num'] + q = reshape_by_heads(self.Wq(input1), head_num=head_num) + k = reshape_by_heads(self.Wk(input1), head_num=head_num) + v = reshape_by_heads(self.Wv(input1), head_num=head_num) + # qkv shape: (batch, head_num, problem, qkv_dim) + out_concat = multi_head_attention(q, k, v) + # shape: (batch, problem, head_num*qkv_dim) + multi_head_out = self.multi_head_combine(out_concat) + # shape: (batch, problem, embedding) + out1 = self.add_n_normalization_1(input1, multi_head_out) + out2 = self.feed_forward(out1) + out3 = self.add_n_normalization_2(out1, out2) + else: + head_num = self.model_params['head_num'] + q = reshape_by_heads(F.linear(input1, weights['encoder.layers.{}.Wq.weight'.format(index)], bias=None), head_num=head_num) + k = reshape_by_heads(F.linear(input1, weights['encoder.layers.{}.Wk.weight'.format(index)], bias=None), head_num=head_num) + v = reshape_by_heads(F.linear(input1, weights['encoder.layers.{}.Wv.weight'.format(index)], bias=None), head_num=head_num) + out_concat = multi_head_attention(q, k, v) + multi_head_out = F.linear(out_concat, weights['encoder.layers.{}.multi_head_combine.weight'.format(index)], weights['encoder.layers.{}.multi_head_combine.bias'.format(index)]) + if self.model_params['norm'] is None: + out1 = self.add_n_normalization_1(input1, multi_head_out) + else: + out1 = self.add_n_normalization_1(input1, multi_head_out, weights={'weight': weights['encoder.layers.{}.add_n_normalization_1.norm.weight'.format(index)], 'bias': weights['encoder.layers.{}.add_n_normalization_1.norm.bias'.format(index)]}) + out2 = self.feed_forward(out1, weights={'weight1': weights['encoder.layers.{}.feed_forward.W1.weight'.format(index)], 'bias1': weights['encoder.layers.{}.feed_forward.W1.bias'.format(index)], + 'weight2': weights['encoder.layers.{}.feed_forward.W2.weight'.format(index)], 'bias2': weights['encoder.layers.{}.feed_forward.W2.bias'.format(index)]}) + if self.model_params['norm'] is None: + out3 = self.add_n_normalization_2(out1, out2) + else: + out3 = self.add_n_normalization_2(out1, out2, weights={'weight': weights['encoder.layers.{}.add_n_normalization_2.norm.weight'.format(index)], 'bias': weights['encoder.layers.{}.add_n_normalization_2.norm.bias'.format(index)]}) + + return out3 + # shape: (batch, problem, embedding) + + +######################################## +# DECODER +######################################## + +class CVRP_Decoder(nn.Module): + def __init__(self, **model_params): + super().__init__() + self.model_params = model_params + embedding_dim = self.model_params['embedding_dim'] + head_num = self.model_params['head_num'] + qkv_dim = self.model_params['qkv_dim'] + + # self.Wq_1 = nn.Linear(embedding_dim, head_num * qkv_dim, bias=False) + # self.Wq_2 = nn.Linear(embedding_dim, head_num * qkv_dim, bias=False) + self.Wq_last = nn.Linear(embedding_dim+1, head_num * qkv_dim, bias=False) + self.Wk = nn.Linear(embedding_dim, head_num * qkv_dim, bias=False) + self.Wv = nn.Linear(embedding_dim, head_num * qkv_dim, bias=False) + + self.multi_head_combine = nn.Linear(head_num * qkv_dim, embedding_dim) + + self.k = None # saved key, for multi-head attention + self.v = None # saved value, for multi-head_attention + self.single_head_key = None # saved, for single-head attention + # self.q1 = None # saved q1, for multi-head attention + # self.q2 = None # saved q2, for multi-head attention + + def set_kv(self, encoded_nodes, weights=None): + # encoded_nodes.shape: (batch, problem+1, embedding) + if weights is None: + head_num = self.model_params['head_num'] + self.k = reshape_by_heads(self.Wk(encoded_nodes), head_num=head_num) + self.v = reshape_by_heads(self.Wv(encoded_nodes), head_num=head_num) + # shape: (batch, head_num, problem+1, qkv_dim) + self.single_head_key = encoded_nodes.transpose(1, 2) + # shape: (batch, embedding, problem+1) + else: + head_num = self.model_params['head_num'] + self.k = reshape_by_heads(F.linear(encoded_nodes, weights['decoder.Wk.weight'], bias=None), head_num=head_num) + self.v = reshape_by_heads(F.linear(encoded_nodes, weights['decoder.Wv.weight'], bias=None), head_num=head_num) + self.single_head_key = encoded_nodes.transpose(1, 2) + + def set_q1(self, encoded_q1, weights=None): + # encoded_q.shape: (batch, n, embedding) # n can be 1 or pomo + head_num = self.model_params['head_num'] + if weights is None: + self.q1 = reshape_by_heads(self.Wq_1(encoded_q1), head_num=head_num) + # shape: (batch, head_num, n, qkv_dim) + else: + self.q1 = reshape_by_heads(F.linear(encoded_q1, weights['decoder.Wq_1.weight'], bias=None), head_num=head_num) + + def set_q2(self, encoded_q2, weights=None): + # encoded_q.shape: (batch, n, embedding) # n can be 1 or pomo + head_num = self.model_params['head_num'] + if weights is None: + self.q2 = reshape_by_heads(self.Wq_2(encoded_q2), head_num=head_num) + # shape: (batch, head_num, n, qkv_dim) + else: + self.q2 = reshape_by_heads(F.linear(encoded_q2, weights['decoder.Wq_2.weight'], bias=None), head_num=head_num) + + def forward(self, encoded_last_node, load, ninf_mask, weights=None): + # encoded_last_node.shape: (batch, pomo, embedding) + # load.shape: (batch, pomo) + # ninf_mask.shape: (batch, pomo, problem) + if weights is None: + head_num = self.model_params['head_num'] + # Multi-Head Attention + ####################################################### + input_cat = torch.cat((encoded_last_node, load[:, :, None]), dim=2) + # shape = (batch, group, EMBEDDING_DIM+1) + q_last = reshape_by_heads(self.Wq_last(input_cat), head_num=head_num) + # shape: (batch, head_num, pomo, qkv_dim) + # q = self.q1 + self.q2 + q_last + # # shape: (batch, head_num, pomo, qkv_dim) + q = q_last + # shape: (batch, head_num, pomo, qkv_dim) + out_concat = multi_head_attention(q, self.k, self.v, rank3_ninf_mask=ninf_mask) + # shape: (batch, pomo, head_num*qkv_dim) + mh_atten_out = self.multi_head_combine(out_concat) + # shape: (batch, pomo, embedding) + # Single-Head Attention, for probability calculation + ####################################################### + score = torch.matmul(mh_atten_out, self.single_head_key) + # shape: (batch, pomo, problem) + sqrt_embedding_dim = self.model_params['sqrt_embedding_dim'] + logit_clipping = self.model_params['logit_clipping'] + score_scaled = score / sqrt_embedding_dim + # shape: (batch, pomo, problem) + score_clipped = logit_clipping * torch.tanh(score_scaled) + score_masked = score_clipped + ninf_mask + probs = F.softmax(score_masked, dim=2) + # shape: (batch, pomo, problem) + else: + head_num = self.model_params['head_num'] + input_cat = torch.cat((encoded_last_node, load[:, :, None]), dim=2) + q_last = reshape_by_heads(F.linear(input_cat, weights['decoder.Wq_last.weight'], bias=None), head_num=head_num) + q = q_last + out_concat = multi_head_attention(q, self.k, self.v, rank3_ninf_mask=ninf_mask) + mh_atten_out = F.linear(out_concat, weights['decoder.multi_head_combine.weight'], weights['decoder.multi_head_combine.bias']) + score = torch.matmul(mh_atten_out, self.single_head_key) + sqrt_embedding_dim = self.model_params['sqrt_embedding_dim'] + logit_clipping = self.model_params['logit_clipping'] + score_scaled = score / sqrt_embedding_dim + score_clipped = logit_clipping * torch.tanh(score_scaled) + score_masked = score_clipped + ninf_mask + probs = F.softmax(score_masked, dim=2) + + return probs + + +######################################## +# NN SUB CLASS / FUNCTIONS +######################################## + +def reshape_by_heads(qkv, head_num): + # q.shape: (batch, n, head_num*key_dim) : n can be either 1 or PROBLEM_SIZE + + batch_s = qkv.size(0) + n = qkv.size(1) + + q_reshaped = qkv.reshape(batch_s, n, head_num, -1) + # shape: (batch, n, head_num, key_dim) + + q_transposed = q_reshaped.transpose(1, 2) + # shape: (batch, head_num, n, key_dim) + + return q_transposed + + +def multi_head_attention(q, k, v, rank2_ninf_mask=None, rank3_ninf_mask=None): + # q shape: (batch, head_num, n, key_dim) : n can be either 1 or PROBLEM_SIZE + # k,v shape: (batch, head_num, problem, key_dim) + # rank2_ninf_mask.shape: (batch, problem) + # rank3_ninf_mask.shape: (batch, group, problem) + + batch_s = q.size(0) + head_num = q.size(1) + n = q.size(2) + key_dim = q.size(3) + + input_s = k.size(2) + + score = torch.matmul(q, k.transpose(2, 3)) + # shape: (batch, head_num, n, problem) + + score_scaled = score / torch.sqrt(torch.tensor(key_dim, dtype=torch.float)) + if rank2_ninf_mask is not None: + score_scaled = score_scaled + rank2_ninf_mask[:, None, None, :].expand(batch_s, head_num, n, input_s) + if rank3_ninf_mask is not None: + score_scaled = score_scaled + rank3_ninf_mask[:, None, :, :].expand(batch_s, head_num, n, input_s) + + weights = nn.Softmax(dim=3)(score_scaled) + # shape: (batch, head_num, n, problem) + + out = torch.matmul(weights, v) + # shape: (batch, head_num, n, key_dim) + + out_transposed = out.transpose(1, 2) + # shape: (batch, n, head_num, key_dim) + + out_concat = out_transposed.reshape(batch_s, n, head_num * key_dim) + # shape: (batch, n, head_num*key_dim) + + return out_concat + + +class Add_And_Normalization_Module(nn.Module): + def __init__(self, **model_params): + super().__init__() + embedding_dim = model_params['embedding_dim'] + if model_params["norm"] == "batch": + self.norm = nn.BatchNorm1d(embedding_dim, affine=True, track_running_stats=True) + elif model_params["norm"] == "instance": + self.norm = nn.InstanceNorm1d(embedding_dim, affine=True, track_running_stats=False) + else: + self.norm = None + + def forward(self, input1, input2, weights=None): + # input.shape: (batch, problem, embedding) + if weights is None: + added = input1 + input2 + if isinstance(self.norm, nn.InstanceNorm1d): + transposed = added.transpose(1, 2) + # shape: (batch, embedding, problem) + normalized = self.norm(transposed) + # shape: (batch, embedding, problem) + back_trans = normalized.transpose(1, 2) + # shape: (batch, problem, embedding) + elif isinstance(self.norm, nn.BatchNorm1d): + batch_s, problem_s, embedding_dim = input1.size(0), input1.size(1), input1.size(2) + normalized = self.norm(added.reshape(batch_s * problem_s, embedding_dim)) + back_trans = normalized.reshape(batch_s, problem_s, embedding_dim) + else: + back_trans = added + else: + added = input1 + input2 + if isinstance(self.norm, nn.InstanceNorm1d): + transposed = added.transpose(1, 2) + normalized = F.instance_norm(transposed, weight=weights['weight'], bias=weights['bias']) + back_trans = normalized.transpose(1, 2) + elif isinstance(self.norm, nn.BatchNorm1d): + batch_s, problem_s, embedding_dim = input1.size(0), input1.size(1), input1.size(2) + normalized = F.batch_norm(added.reshape(batch_s * problem_s, embedding_dim), running_mean=self.norm.running_mean, running_var=self.norm.running_var, weight=weights['weight'], bias=weights['bias'], training=True) + back_trans = normalized.reshape(batch_s, problem_s, embedding_dim) + else: + back_trans = added + + return back_trans + + +class FeedForward(nn.Module): + def __init__(self, **model_params): + super().__init__() + embedding_dim = model_params['embedding_dim'] + ff_hidden_dim = model_params['ff_hidden_dim'] + + self.W1 = nn.Linear(embedding_dim, ff_hidden_dim) + self.W2 = nn.Linear(ff_hidden_dim, embedding_dim) + + def forward(self, input1, weights=None): + # input.shape: (batch, problem, embedding) + if weights is None: + return self.W2(F.relu(self.W1(input1))) + else: + output = F.relu(F.linear(input1, weights['weight1'], bias=weights['bias1'])) + return F.linear(output, weights['weight2'], bias=weights['bias2']) diff --git a/POMO/CVRP/CVRPTester.py b/POMO/CVRP/CVRPTester.py new file mode 100644 index 0000000..856bc64 --- /dev/null +++ b/POMO/CVRP/CVRPTester.py @@ -0,0 +1,249 @@ +import os, time +import pickle +from logging import getLogger +import torch +from torch.optim import Adam as Optimizer + +from CVRPEnv import CVRPEnv as Env +from CVRPModel import CVRPModel as Model + +from TSP_gurobi import solve_all_gurobi +from utils.utils import * +from utils.functions import load_dataset, save_dataset + + +class CVRPTester: + def __init__(self, + env_params, + model_params, + tester_params, + fine_tune_params): + + # save arguments + self.env_params = env_params + self.model_params = model_params + self.tester_params = tester_params + self.fine_tune_params = fine_tune_params + + # result folder, logger + self.logger = getLogger(name='trainer') + self.result_folder = get_result_folder() + + # cuda + USE_CUDA = self.tester_params['use_cuda'] + if USE_CUDA: + cuda_device_num = self.tester_params['cuda_device_num'] + torch.cuda.set_device(cuda_device_num) + self.device = torch.device('cuda', cuda_device_num) + torch.set_default_tensor_type('torch.cuda.FloatTensor') + else: + self.device = torch.device('cpu') + torch.set_default_tensor_type('torch.FloatTensor') + + # ENV and MODEL + self.model = Model(**self.model_params) + self.env = Env(**self.env_params) # we assume instances in the test/fine-tune dataset have the same problem size. + self.optimizer = Optimizer(self.model.parameters(), **self.fine_tune_params['optimizer']) + self.score_list, self.aug_score_list, self.gap_list, self.aug_gap_list = [], [], [], [] + + # load dataset + self.test_data = load_dataset(tester_params['test_set_path'])[: self.tester_params['test_episodes']] + opt_sol = load_dataset(tester_params['test_set_opt_sol_path'])[: self.tester_params['test_episodes']] # [(obj, route), ...] + self.opt_sol = [i[0] for i in opt_sol] + + # Restore + model_load = tester_params['model_load'] + checkpoint_fullname = '{path}/checkpoint-{epoch}.pt'.format(**model_load) + checkpoint = torch.load(checkpoint_fullname, map_location=self.device) + self.model.load_state_dict(checkpoint['model_state_dict']) + # self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) + self.logger.info(">> Model loaded from {}".format(checkpoint_fullname)) + + # utility + self.time_estimator = TimeEstimator() + + def run(self): + start_time = time.time() + if self.tester_params['test_robustness']: + # How to generate x_adv for CVRP (e.g., discrete demand) is still an open problem. + raise NotImplementedError + if self.fine_tune_params['enable']: + # fine-tune model on (little) data which has the same distribution of the test dataset (few-shot) + start = self.tester_params['test_episodes'] + self.fine_tune_data = load_dataset(self.tester_params['test_set_path'])[start: start + self.fine_tune_params['fine_tune_episodes']] # load dataset from file + self._fine_tune_and_test() + else: + # test the model on test dataset (zero-shot) + self._test(store_res=True) + print(">> Evaluation on {} finished within {:.2f}s".format(self.tester_params['test_set_path'], time.time() - start_time)) + + # save results to file + with open(os.path.split(self.tester_params['test_set_path'])[-1], 'wb') as f: + result = {"score_list": self.score_list, "aug_score_list": self.aug_score_list, "gap_list": self.gap_list, "aug_gap_list": self.aug_gap_list} + pickle.dump(result, f, pickle.HIGHEST_PROTOCOL) + print(">> Save final results to {}".format(os.path.split(self.tester_params['test_set_path'])[-1])) + + def _test(self, store_res=True): + self.time_estimator.reset() + score_AM, gap_AM = AverageMeter(), AverageMeter() + aug_score_AM, aug_gap_AM = AverageMeter(), AverageMeter() + + test_num_episode = self.tester_params['test_episodes'] + assert len(self.test_data) == test_num_episode, "the number of test instances does not match!" + episode = 0 + while episode < test_num_episode: + remaining = test_num_episode - episode + batch_size = min(self.tester_params['test_batch_size'], remaining) + # load data + data = self.test_data[episode: episode + batch_size] + depot_xy, node_xy, node_demand, capacity = [i[0] for i in data], [i[1] for i in data], [i[2] for i in data], [i[3] for i in data] + depot_xy, node_xy, node_demand, capacity = torch.Tensor(depot_xy), torch.Tensor(node_xy), torch.Tensor(node_demand), torch.Tensor(capacity) + node_demand = node_demand / capacity.view(-1, 1) + data = (depot_xy, node_xy, node_demand) + + score, aug_score, all_score, all_aug_score = self._test_one_batch(data) + opt_sol = self.opt_sol[episode: episode + batch_size] + score_AM.update(score, batch_size) + aug_score_AM.update(aug_score, batch_size) + episode += batch_size + gap = [max(all_score[i].item() - opt_sol[i], 0) / opt_sol[i] * 100 for i in range(batch_size)] + aug_gap = [max(all_aug_score[i].item() - opt_sol[i], 0) / opt_sol[i] * 100 for i in range(batch_size)] + gap_AM.update(sum(gap) / batch_size, batch_size) + aug_gap_AM.update(sum(aug_gap) / batch_size, batch_size) + + if store_res: + self.score_list += all_score.tolist() + self.aug_score_list += all_aug_score.tolist() + self.gap_list += gap + self.aug_gap_list += aug_gap + + elapsed_time_str, remain_time_str = self.time_estimator.get_est_string(episode, test_num_episode) + self.logger.info("episode {:3d}/{:3d}, Elapsed[{}], Remain[{}], score:{:.3f}, aug_score:{:.3f}".format( + episode, test_num_episode, elapsed_time_str, remain_time_str, score, aug_score)) + + all_done = (episode == test_num_episode) + + if all_done: + self.logger.info(" *** Test Done *** ") + self.logger.info(" NO-AUG SCORE: {:.4f}, Gap: {:.4f} ".format(score_AM.avg, gap_AM.avg)) + self.logger.info(" AUGMENTATION SCORE: {:.4f}, Gap: {:.4f} ".format(aug_score_AM.avg, aug_gap_AM.avg)) + print("{:.3f} ({:.3f}%)".format(score_AM.avg, gap_AM.avg)) + print("{:.3f} ({:.3f}%)".format(aug_score_AM.avg, aug_gap_AM.avg)) + + return score_AM.avg, aug_score_AM.avg, gap_AM.avg, aug_gap_AM.avg + + def _test_one_batch(self, test_data): + # Augmentation + if self.tester_params['augmentation_enable']: + aug_factor = self.tester_params['aug_factor'] + else: + aug_factor = 1 + + # Ready + self.model.eval() + batch_size = test_data[-1].size(0) + with torch.no_grad(): + self.env.load_problems(batch_size, problems=test_data, aug_factor=aug_factor) + reset_state, _, _ = self.env.reset() + self.model.pre_forward(reset_state) + + # POMO Rollout + state, reward, done = self.env.pre_step() + while not done: + selected, _ = self.model(state) + # shape: (batch, pomo) + state, reward, done = self.env.step(selected) + + # Return + aug_reward = reward.reshape(aug_factor, batch_size, self.env.pomo_size) + # shape: (augmentation, batch, pomo) + + max_pomo_reward, _ = aug_reward.max(dim=2) # get best results from pomo + # shape: (augmentation, batch) + no_aug_score = -max_pomo_reward[0, :].float() # negative sign to make positive value + no_aug_score_mean = no_aug_score.mean() + + max_aug_pomo_reward, _ = max_pomo_reward.max(dim=0) # get best results from augmentation + # shape: (batch,) + aug_score = -max_aug_pomo_reward.float() # negative sign to make positive value + aug_score_mean = aug_score.mean() + + return no_aug_score_mean.item(), aug_score_mean.item(), no_aug_score, aug_score + + def _fine_tune_and_test(self): + """ + Evaluate few-shot generalization: fine-tune k steps on a small fine-tune dataset + """ + fine_tune_episode = self.fine_tune_params['fine_tune_episodes'] + assert len(self.fine_tune_data) == fine_tune_episode, "the number of fine-tune instances does not match!" + score_list, aug_score_list, gap_list, aug_gap_list = [], [], [], [] + + for k in range(self.fine_tune_params['k']): + # score, aug_score, gap, aug_gap = self._test(store_res=False) + # score_list.append(score); aug_score_list.append(aug_score) + # gap_list.append(gap); aug_gap_list.append(aug_gap) + self.logger.info("Start fine-tune step {}".format(k + 1)) + episode = 0 + while episode < fine_tune_episode: + remaining = fine_tune_episode - episode + batch_size = min(self.fine_tune_params['fine_tune_batch_size'], remaining) + # load data + data = self.fine_tune_data[episode:episode + batch_size] + depot_xy, node_xy, node_demand, capacity = [i[0] for i in data], [i[1] for i in data], [i[2] for i in data], [i[3] for i in data] + depot_xy, node_xy, node_demand, capacity = torch.Tensor(depot_xy), torch.Tensor(node_xy), torch.Tensor(node_demand), torch.Tensor(capacity) + node_demand = node_demand / capacity.view(-1, 1) + data = (depot_xy, node_xy, node_demand) + self._fine_tune_one_batch(data) + episode += batch_size + + score, aug_score, gap, aug_gap = self._test(store_res=True) + score_list.append(score); aug_score_list.append(aug_score) + gap_list.append(gap); aug_gap_list.append(aug_gap) + + print("Final score_list: {}".format(score_list)) + print("Final aug_score_list {}".format(aug_score_list)) + print("Final gap_list: {}".format(gap_list)) + print("Final aug_gap_list: {}".format(aug_gap_list)) + + def _fine_tune_one_batch(self, fine_tune_data): + # Augmentation + if self.fine_tune_params['augmentation_enable']: + aug_factor = self.tester_params['aug_factor'] + else: + aug_factor = 1 + + self.model.train() + batch_size = fine_tune_data[-1].size(0) + self.env.load_problems(batch_size, problems=fine_tune_data, aug_factor=aug_factor) + reset_state, _, _ = self.env.reset() + self.model.pre_forward(reset_state) + prob_list = torch.zeros(size=(aug_factor * batch_size, self.env.pomo_size, 0)) + # shape: (augmentation * batch, pomo, 0~problem) + + # POMO Rollout, please note that the reward is negative (i.e., -length of route). + state, reward, done = self.env.pre_step() + while not done: + selected, prob = self.model(state) + # shape: (augmentation * batch, pomo) + state, reward, done = self.env.step(selected) + prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) + + # Loss + aug_reward = reward.reshape(aug_factor, batch_size, self.env.pomo_size).permute(1, 0, 2).reshape(batch_size, -1) + # shape: (batch, augmentation * pomo) + advantage = aug_reward - aug_reward.float().mean(dim=1, keepdims=True) + # shape: (batch, augmentation * pomo) + log_prob = prob_list.log().sum(dim=2).reshape(aug_factor, batch_size, self.env.pomo_size).permute(1, 0, 2).reshape(batch_size, -1) + # size = (batch, augmentation * pomo) + loss = -advantage * log_prob # Minus Sign: To Increase REWARD + # shape: (batch, augmentation * pomo) + loss_mean = loss.mean() + + # Score + max_pomo_reward, _ = aug_reward.max(dim=1) # get best results from pomo + score_mean = -max_pomo_reward.float().mean() # negative sign to make positive value + + # Step & Return + self.optimizer.zero_grad() + loss_mean.backward() + self.optimizer.step() diff --git a/POMO/CVRP/CVRPTrainer_meta.py b/POMO/CVRP/CVRPTrainer_meta.py new file mode 100644 index 0000000..f62569a --- /dev/null +++ b/POMO/CVRP/CVRPTrainer_meta.py @@ -0,0 +1,527 @@ +import os +import copy +import math +import time +import random +import torch +from logging import getLogger +from collections import OrderedDict +from torch.optim import Adam as Optimizer + +from CVRPEnv import CVRPEnv as Env +from CVRPModel import CVRPModel as Model +from ProblemDef import get_random_problems, generate_task_set +from utils.utils import * +from utils.functions import * +from CVRP_baseline import * + + +class CVRPTrainer: + """ + Implementation of POMO with MAML / FOMAML / Reptile on CVRP. + For MAML & FOMAML, ref to "Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks"; + For Reptile, ref to "On First-Order Meta-Learning Algorithms". + Refer to "https://lilianweng.github.io/posts/2018-11-30-meta-learning" + """ + def __init__(self, + env_params, + model_params, + optimizer_params, + trainer_params, + meta_params): + + # save arguments + self.env_params = env_params + self.model_params = model_params + self.optimizer_params = optimizer_params + self.trainer_params = trainer_params + self.meta_params = meta_params + + # result folder, logger + self.logger = getLogger(name='trainer') + self.result_folder = get_result_folder() + self.result_log = LogData() + + # cuda + USE_CUDA = self.trainer_params['use_cuda'] + if USE_CUDA: + cuda_device_num = self.trainer_params['cuda_device_num'] + torch.cuda.set_device(cuda_device_num) + self.device = torch.device('cuda', cuda_device_num) + torch.set_default_tensor_type('torch.cuda.FloatTensor') + else: + self.device = torch.device('cpu') + torch.set_default_tensor_type('torch.FloatTensor') + + # Main Components + self.model_params["norm"] = None # Original "POMO" Paper uses instance/batch normalization + self.meta_model = Model(**self.model_params) + self.meta_optimizer = Optimizer(self.meta_model.parameters(), **self.optimizer_params['optimizer']) + self.alpha = self.meta_params['alpha'] # for reptile + self.task_set = generate_task_set(self.meta_params) + self.val_data, self.val_opt = {}, {} # for lkh3_offline + if self.meta_params["data_type"] == "size": + self.min_n, self.max_n, self.task_interval = self.task_set[0][0], self.task_set[-1][0], 5 # [20, 150] + self.task_w = {start: 1 / (len(self.task_set) // 5) for start in range(self.min_n, self.max_n, self.task_interval)} + # self.task_w = torch.full((len(self.task_set)//self.task_interval,), 1/(len(self.task_set)//self.task_interval)) + elif self.meta_params["data_type"] == "distribution": + self.task_w = torch.full((len(self.task_set),), 1 / len(self.task_set)) + else: + raise NotImplementedError + + # Restore + self.start_epoch = 1 + model_load = trainer_params['model_load'] + if model_load['enable']: + checkpoint_fullname = '{path}/checkpoint-{epoch}.pt'.format(**model_load) + checkpoint = torch.load(checkpoint_fullname, map_location=self.device) + self.meta_model.load_state_dict(checkpoint['model_state_dict']) + self.start_epoch = 1 + model_load['epoch'] + self.result_log.set_raw_data(checkpoint['result_log']) + self.meta_optimizer.load_state_dict(checkpoint['optimizer_state_dict']) + self.logger.info(">> Model loaded from {}".format(checkpoint_fullname)) + + # utility + self.time_estimator = TimeEstimator() + + def run(self): + + start_time, best_mean = time.time(), 1000 + self.time_estimator.reset(self.start_epoch) + for epoch in range(self.start_epoch, self.meta_params['epochs'] + 1): + self.logger.info('=================================================================') + + # lr decay (by 10) to speed up convergence at 90th and 95th iterations + if epoch in [int(self.meta_params['epochs'] * 0.9), int(self.meta_params['epochs'] * 0.95)]: + self.optimizer_params['optimizer']['lr'] /= 10 + for group in self.meta_optimizer.param_groups: + group["lr"] /= 10 + print(">> LR decay to {}".format(group["lr"])) + + # Train + train_score, train_loss = self._train_one_epoch(epoch) + self.result_log.append('train_score', epoch, train_score) + self.result_log.append('train_loss', epoch, train_loss) + model_save_interval = self.trainer_params['logging']['model_save_interval'] + img_save_interval = self.trainer_params['logging']['img_save_interval'] + # Val + no_aug_score_list = [] + if self.meta_params["data_type"] == "size": + dir = "../../data/CVRP/Size/" + paths = ["cvrp100_uniform.pkl", "cvrp200_uniform.pkl", "cvrp300_uniform.pkl"] + elif self.meta_params["data_type"] == "distribution": + dir = "../../data/CVRP/Distribution/" + paths = ["cvrp100_uniform.pkl", "cvrp100_gaussian.pkl", "cvrp100_cluster.pkl", "cvrp100_diagonal.pkl", "cvrp100_cvrplib.pkl"] + elif self.meta_params["data_type"] == "size_distribution": + pass + if epoch <= 1 or (epoch % img_save_interval) == 0: + for val_path in paths: + no_aug_score = self._fast_val(self.meta_model, path=os.path.join(dir, val_path), val_episodes=64, mode="eval") + no_aug_score_list.append(round(no_aug_score, 4)) + self.result_log.append('val_score', epoch, no_aug_score_list) + cur_mean = sum(no_aug_score_list) / len(no_aug_score_list) + # save best checkpoint (conditioned on the val datasets!) + if cur_mean < best_mean: + best_mean = cur_mean + self.best_meta_model = copy.deepcopy(self.meta_model) + self.logger.info("Saving (best) trained_model") + checkpoint_dict = { + 'epoch': epoch, + 'model_state_dict': self.meta_model.state_dict(), + 'optimizer_state_dict': self.meta_optimizer.state_dict(), + 'result_log': self.result_log.get_raw_data() + } + torch.save(checkpoint_dict, '{}/best_checkpoint.pt'.format(self.result_folder)) + + # Logs & Checkpoint + elapsed_time_str, remain_time_str = self.time_estimator.get_est_string(epoch, self.meta_params['epochs']) + self.logger.info("Epoch {:3d}/{:3d}({:.2f}%): Time Est.: Elapsed[{}], Remain[{}], Val Score: {}".format( + epoch, self.meta_params['epochs'], epoch / self.meta_params['epochs'] * 100, elapsed_time_str, remain_time_str, no_aug_score_list)) + + all_done = (epoch == self.meta_params['epochs']) + + if epoch > 1 and (epoch % img_save_interval) == 0: # save latest images, every X epoch + self.logger.info("Saving log_image") + image_prefix = '{}/latest'.format(self.result_folder) + util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['train_score']) + util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['val_score']) + util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'], self.result_log, labels=['train_loss']) + + # Save Model + if all_done or (epoch % model_save_interval) == 0: + self.logger.info("Saving trained_model") + checkpoint_dict = { + 'epoch': epoch, + 'model_state_dict': self.meta_model.state_dict(), + 'optimizer_state_dict': self.meta_optimizer.state_dict(), + 'result_log': self.result_log.get_raw_data() + } + torch.save(checkpoint_dict, '{}/checkpoint-{}.pt'.format(self.result_folder, epoch)) + + if all_done: + self.logger.info(" *** Training Done *** ") + # self.logger.info("Now, printing log array...") + # util_print_log_array(self.logger, self.result_log) + + def _train_one_epoch(self, epoch): + """ + 1. Sample B training tasks from task distribution P(T) + 2. Inner-loop: for a batch of tasks T_i, POMO training -> \theta_i + 3. Outer-loop: update meta-model -> \theta_0 + """ + self.meta_optimizer.zero_grad() + score_AM = AverageMeter() + loss_AM = AverageMeter() + batch_size = self.meta_params['meta_batch_size'] + + """ + Adaptive task scheduler: + for size: gradually increase the problem size (Curriculum learning); + for distribution: we compute the relative gaps (w.r.t. LKH3) or estimate the potential improvements of each distribution (i.e., bootstrap) every X iters; + """ + start, end = 0, 0 + pass + + self._alpha_scheduler(epoch) # for reptile + fast_weights, val_loss, meta_grad_dict = [], 0, {(i, j): 0 for i, group in enumerate(self.meta_optimizer.param_groups) for j, _ in enumerate(group['params'])} + + for b in range(self.meta_params['B']): + # sample a task + if self.meta_params["data_type"] == "size": + task_params = random.sample(range(start, end + 1), 1) if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] + # batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 100 else self.meta_params['meta_batch_size'] // 2 + elif self.meta_params["data_type"] == "distribution": + task_params = self.task_set[torch.multinomial(self.task_w, 1).item()] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] + elif self.meta_params["data_type"] == "size_distribution": + pass + + # preparation + if self.meta_params['meta_method'] in ['fomaml', 'reptile']: + task_model = copy.deepcopy(self.meta_model) + optimizer = Optimizer(task_model.parameters(), **self.optimizer_params['optimizer']) + optimizer.load_state_dict(self.meta_optimizer.state_dict()) + elif self.meta_params['meta_method'] == 'maml': + if self.model_params['meta_update_encoder']: + fast_weight = OrderedDict(self.meta_model.named_parameters()) + else: + fast_weight = OrderedDict(self.meta_model.decoder.named_parameters()) + for k in list(fast_weight.keys()): + fast_weight["decoder." + k] = fast_weight.pop(k) + + # inner-loop optimization + for step in range(self.meta_params['k']): + data = self._get_data(batch_size, task_params) + env_params = {'problem_size': data[-1].size(1), 'pomo_size': data[-1].size(1)} + self.meta_model.train() + if self.meta_params['meta_method'] in ['reptile', 'fomaml']: + avg_score, avg_loss = self._train_one_batch(task_model, data, Env(**env_params), optimizer) + elif self.meta_params['meta_method'] == 'maml': + avg_score, avg_loss, fast_weight = self._train_one_batch_maml(fast_weight, data, Env(**env_params)) + score_AM.update(avg_score.item(), batch_size) + loss_AM.update(avg_loss.item(), batch_size) + + val_data = self._get_val_data(batch_size, task_params) + self.meta_model.train() + if self.meta_params['meta_method'] == 'maml': + val_loss = self._fast_val(fast_weight, data=val_data, mode="maml") / self.meta_params['B'] + self.meta_optimizer.zero_grad() + val_loss.backward() + for i, group in enumerate(self.meta_optimizer.param_groups): + for j, p in enumerate(group['params']): + meta_grad_dict[(i, j)] += p.grad + elif self.meta_params['meta_method'] == 'fomaml': + val_loss = self._fast_val(task_model, data=val_data, mode="fomaml") / self.meta_params['B'] + optimizer.zero_grad() + val_loss.backward() + for i, group in enumerate(optimizer.param_groups): + for j, p in enumerate(group['params']): + meta_grad_dict[(i, j)] += p.grad + elif self.meta_params['meta_method'] == 'reptile': + fast_weights.append(task_model.state_dict()) + + # outer-loop optimization (update meta-model) + if self.meta_params['meta_method'] == 'maml': + self.meta_optimizer.zero_grad() + for i, group in enumerate(self.meta_optimizer.param_groups): + for j, p in enumerate(group['params']): + p.grad = meta_grad_dict[(i, j)] + self.meta_optimizer.step() + elif self.meta_params['meta_method'] == 'fomaml': + self.meta_optimizer.zero_grad() + for i, group in enumerate(self.meta_optimizer.param_groups): + for j, p in enumerate(group['params']): + p.grad = meta_grad_dict[(i, j)] + self.meta_optimizer.step() + elif self.meta_params['meta_method'] == 'reptile': + state_dict = {params_key: (self.meta_model.state_dict()[params_key] + self.alpha * torch.mean(torch.stack([fast_weight[params_key] - self.meta_model.state_dict()[params_key] for fast_weight in fast_weights], dim=0), dim=0)) for params_key in self.meta_model.state_dict()} + self.meta_model.load_state_dict(state_dict) + + # Log Once, for each epoch + self.logger.info('Meta Iteration {:3d}: alpha: {:6f}, Score: {:.4f}, Loss: {:.4f}'.format(epoch, self.alpha, score_AM.avg, loss_AM.avg)) + + return score_AM.avg, loss_AM.avg + + def _train_one_batch(self, task_model, data, env, optimizer=None): + + task_model.train() + batch_size = data[-1].size(0) + env.load_problems(batch_size, problems=data, aug_factor=1) + reset_state, _, _ = env.reset() + task_model.pre_forward(reset_state) + prob_list = torch.zeros(size=(batch_size, env.pomo_size, 0)) + # shape: (batch, pomo, 0~problem) + + # POMO Rollout + state, reward, done = env.pre_step() + while not done: + selected, prob = task_model(state) + # shape: (batch, pomo) + state, reward, done = env.step(selected) + prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) + + # Loss + advantage = reward - reward.float().mean(dim=1, keepdims=True) + # shape: (batch, pomo) + log_prob = prob_list.log().sum(dim=2) # for the first/last node, p=1 -> log_p=0 + # size = (batch, pomo) + loss = -advantage * log_prob # Minus Sign: To Increase REWARD + # shape: (batch, pomo) + loss_mean = loss.mean() + + # update model + optimizer.zero_grad() + loss_mean.backward() + optimizer.step() + + # Score + max_pomo_reward, _ = reward.max(dim=1) # get best results from pomo + score_mean = -max_pomo_reward.float().mean() # negative sign to make positive value + print(score_mean) + + return score_mean, loss_mean + + def _train_one_batch_maml(self, fast_weight, data, env, optimizer=None): + + batch_size = data[-1].size(0) + env.load_problems(batch_size, problems=data, aug_factor=1) + reset_state, _, _ = env.reset() + self.meta_model.pre_forward(reset_state, weights=fast_weight) + prob_list = torch.zeros(size=(batch_size, env.pomo_size, 0)) + # shape: (batch, pomo, 0~problem) + + # POMO Rollout + state, reward, done = env.pre_step() + while not done: + selected, prob = self.meta_model(state, weights=fast_weight) + # shape: (batch, pomo) + state, reward, done = env.step(selected) + prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) + + # Loss + advantage = reward - reward.float().mean(dim=1, keepdims=True) + log_prob = prob_list.log().sum(dim=2) # for the first/last node, p=1 -> log_p=0 + loss = -advantage * log_prob # Minus Sign: To Increase REWARD + # shape: (batch, pomo) + loss_mean = loss.mean() + + # 1. update model - in SGD way + # gradients = torch.autograd.grad(loss_mean, fast_weight.values(), create_graph=True) # allow_unused=True + # fast_weight = OrderedDict( + # (name, param - self.optimizer_params['optimizer']['lr'] * grad) + # for ((name, param), grad) in zip(fast_weight.items(), gradients) + # ) + # 2. update model - in Adam way + gradients = torch.autograd.grad(loss_mean, fast_weight.values(), create_graph=True) # allow_unused=True + w_t, (beta1, beta2), eps = [], self.meta_optimizer.param_groups[0]['betas'], self.meta_optimizer.param_groups[0]['eps'] + lr, weight_decay = self.optimizer_params['optimizer']['lr'], self.optimizer_params['optimizer']['weight_decay'] + for i, ((name, param), grad) in enumerate(zip(fast_weight.items(), gradients)): + if self.meta_optimizer.state_dict()['state'] != {}: + i = i if self.model_params['meta_update_encoder'] else i + 58 # i \in [0, 62] + state = self.meta_optimizer.state_dict()['state'][i] + step, exp_avg, exp_avg_sq = state['step'], state['exp_avg'], state['exp_avg_sq'] + step += 1 + step = step.item() + # compute grad based on Adam source code using in-place operation + # update Adam stat (step, exp_avg and exp_avg_sq have already been updated by in-place operation) + # may encounter RuntimeError: (a leaf Variable that requires grad) / (the tensor used during grad computation) cannot use in-place operation. + grad = grad.add(param, alpha=weight_decay) + exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1) + exp_avg_sq.mul_(beta2).addcmul_(grad, grad.conj(), value=1 - beta2) + bias_correction1 = 1 - beta1 ** step + bias_correction2 = 1 - beta2 ** step + step_size = lr / bias_correction1 + bias_correction2_sqrt = math.sqrt(bias_correction2) + denom = (exp_avg_sq.sqrt() / bias_correction2_sqrt).add_(eps) + # param.addcdiv_(exp_avg, denom, value=-step_size) + param = param - step_size * exp_avg / denom + self.meta_optimizer.state_dict()['state'][i]['exp_avg'] = exp_avg.clone().detach() + self.meta_optimizer.state_dict()['state'][i]['exp_avg_sq'] = exp_avg_sq.clone().detach() + else: + param = param - lr * grad + w_t.append((name, param)) + fast_weight = OrderedDict(w_t) + + # Score + max_pomo_reward, _ = reward.max(dim=1) # get best results from pomo + score_mean = -max_pomo_reward.float().mean() # negative sign to make positive value + print(score_mean) + + return score_mean, loss_mean, fast_weight + + def _fast_val(self, model, data=None, path=None, offset=0, val_episodes=32, mode="eval", return_all=False): + aug_factor = 1 + if data is None: + data = load_dataset(path)[offset: offset + val_episodes] # load dataset from file + depot_xy, node_xy, node_demand, capacity = [i[0] for i in data], [i[1] for i in data], [i[2] for i in data], [i[3] for i in data] + depot_xy, node_xy, node_demand, capacity = torch.Tensor(depot_xy), torch.Tensor(node_xy), torch.Tensor(node_demand), torch.Tensor(capacity) + node_demand = node_demand / capacity.view(-1, 1) + data = (depot_xy, node_xy, node_demand) + env = Env(**{'problem_size': data[-1].size(1), 'pomo_size': data[-1].size(1)}) + + batch_size = data[-1].size(0) + if mode == "eval": + model.eval() + with torch.no_grad(): + env.load_problems(batch_size, problems=data, aug_factor=aug_factor) + reset_state, _, _ = env.reset() + model.pre_forward(reset_state) + state, reward, done = env.pre_step() + while not done: + selected, _ = model(state) + # shape: (batch, pomo) + state, reward, done = env.step(selected) + + elif mode in ["maml", "fomaml"]: + fast_weight = model + env.load_problems(batch_size, problems=data, aug_factor=aug_factor) + reset_state, _, _ = env.reset() + if mode == "maml": + self.meta_model.pre_forward(reset_state, weights=fast_weight) + else: + model.pre_forward(reset_state) + prob_list = torch.zeros(size=(batch_size, env.pomo_size, 0)) + state, reward, done = env.pre_step() + while not done: + if mode == "maml": + selected, prob = self.meta_model(state, weights=fast_weight) + else: + selected, prob = model(state) + # shape: (batch, pomo) + state, reward, done = env.step(selected) + prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) + + advantage = reward - reward.float().mean(dim=1, keepdims=True) + log_prob = prob_list.log().sum(dim=2) # for the first/last node, p=1 -> log_p=0 + loss = -advantage * log_prob # Minus Sign: To Increase REWARD + loss_mean = loss.mean() + else: + raise NotImplementedError + + # Return + aug_reward = reward.reshape(aug_factor, batch_size, env.pomo_size) + # shape: (augmentation, batch, pomo) + max_pomo_reward, _ = aug_reward.max(dim=2) # get best results from pomo + # shape: (augmentation, batch) + no_aug_score = -max_pomo_reward[0, :].float().mean() # negative sign to make positive value + print(no_aug_score) + + if mode == "eval": + if return_all: + return -max_pomo_reward[0, :].float() + else: + return no_aug_score.detach().item() + else: + return loss_mean + + def _bootstrap(self, fast_weight, data, mode="eval"): + """ + mode = "maml": Ref to "Bootstrap Meta-Learning", ICLR 2022 (not implemented for CVRP); + mode = "eval": Used to update task weights. + """ + assert mode in ["eval"], "{} not implemented!".format(mode) + bootstrap_weight = fast_weight + batch_size, aug_factor = data[-1].size(0), 1 + bootstrap_reward = torch.full((batch_size, 1), float("-inf")) + optimizer = Optimizer(bootstrap_weight.parameters(), **self.optimizer_params['optimizer']) + # optimizer.load_state_dict(self.meta_optimizer.state_dict()) + with torch.enable_grad(): + for L in range(self.meta_params['bootstrap_steps']): + env = Env(**{'problem_size': data[-1].size(1), 'pomo_size': data[-1].size(1)}) + env.load_problems(batch_size, problems=data, aug_factor=aug_factor) + reset_state, _, _ = env.reset() + bootstrap_weight.pre_forward(reset_state) + prob_list = torch.zeros(size=(aug_factor * batch_size, env.pomo_size, 0)) + state, reward, done = env.pre_step() + while not done: + selected, prob = bootstrap_weight(state) + state, reward, done = env.step(selected) # (aug_factor * batch_size, pomo_size) + prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) + + # (batch, augmentation * pomo) + reward = reward.reshape(aug_factor, batch_size, env.pomo_size).permute(1, 0, 2).reshape(batch_size, -1) + advantage = reward - reward.float().mean(dim=1, keepdims=True) + log_prob = prob_list.log().sum(dim=2).reshape(aug_factor, batch_size, env.pomo_size).permute(1, 0, 2).reshape(batch_size, -1) + loss = -advantage * log_prob + loss_mean = loss.mean() + + optimizer.zero_grad() + loss_mean.backward() + optimizer.step() + + max_pomo_reward, _ = reward.max(dim=1) + max_pomo_reward = max_pomo_reward.view(-1, 1) + bootstrap_reward = torch.where(max_pomo_reward > bootstrap_reward, max_pomo_reward, bootstrap_reward) # (batch_size, 1) + + return bootstrap_reward + + def _get_data(self, batch_size, task_params): + """ + Return CVRP data with the form of: + depot_xy: [batch_size, 1, 2] + node_xy: [batch_size, problem_size, 2] + node_demand (unnormalized): [batch_size, problem_size] + capacity: [batch_size] + """ + if self.meta_params['data_type'] == 'distribution': + assert len(task_params) == 2 + data = get_random_problems(batch_size, self.env_params['problem_size'], num_modes=task_params[0], cdist=task_params[1], distribution='gaussian_mixture', problem="cvrp") + elif self.meta_params['data_type'] == 'size': + assert len(task_params) == 1 + data = get_random_problems(batch_size, task_params[0], num_modes=0, cdist=0, distribution='uniform', problem="cvrp") + elif self.meta_params['data_type'] == "size_distribution": + assert len(task_params) == 3 + data = get_random_problems(batch_size, task_params[0], num_modes=task_params[1], cdist=task_params[2], distribution='gaussian_mixture', problem="cvrp") + else: + raise NotImplementedError + + # normalized node_demand by capacity & only return (depot_xy, node_xy, node_demand) + if len(data) == 4: + depot_xy, node_xy, node_demand, capacity = data + node_demand = node_demand / capacity.view(-1, 1) + data = (depot_xy, node_xy, node_demand) + + return data + + def _get_val_data(self, batch_size, task_params): + if self.meta_params["data_type"] == "size": + start1, end1 = min(task_params[0] + 10, self.max_n), min(task_params[0] + 20, self.max_n) + val_size = random.sample(range(start1, end1 + 1), 1)[0] + val_data = self._get_data(batch_size, (val_size,)) + # val_data = self._get_data(batch_size, task_params) # TODO: which is better? + elif self.meta_params["data_type"] == "distribution": + val_data = self._get_data(batch_size, task_params) + elif self.meta_params["data_type"] == "size_distribution": + pass + else: + raise NotImplementedError + + return val_data + + def _alpha_scheduler(self, epoch): + """ + Update param for Reptile. + """ + self.alpha = max(self.alpha * self.meta_params['alpha_decay'], 0.0001) + + def _update_task_weight(self, epoch): + pass diff --git a/POMO/CVRP/CVRPTrainer_pomo.py b/POMO/CVRP/CVRPTrainer_pomo.py new file mode 100644 index 0000000..79510a8 --- /dev/null +++ b/POMO/CVRP/CVRPTrainer_pomo.py @@ -0,0 +1,274 @@ +import os +import copy +import math +import time +import random +import torch +from logging import getLogger +from collections import OrderedDict +from torch.optim import Adam as Optimizer + +from CVRPEnv import CVRPEnv as Env +from CVRPModel import CVRPModel as Model +from ProblemDef import get_random_problems, generate_task_set +from utils.utils import * +from utils.functions import * +from CVRP_baseline import * + + +class CVRPTrainer: + """ + Implementation of POMO under the same training setting of POMO + meta-learning methods. + """ + def __init__(self, + env_params, + model_params, + optimizer_params, + trainer_params, + meta_params): + + # save arguments + self.env_params = env_params + self.model_params = model_params + self.optimizer_params = optimizer_params + self.trainer_params = trainer_params + self.meta_params = meta_params + + # result folder, logger + self.logger = getLogger(name='trainer') + self.result_folder = get_result_folder() + self.result_log = LogData() + + # cuda + USE_CUDA = self.trainer_params['use_cuda'] + if USE_CUDA: + cuda_device_num = self.trainer_params['cuda_device_num'] + torch.cuda.set_device(cuda_device_num) + self.device = torch.device('cuda', cuda_device_num) + torch.set_default_tensor_type('torch.cuda.FloatTensor') + else: + self.device = torch.device('cpu') + torch.set_default_tensor_type('torch.FloatTensor') + + # Main Components + self.model_params["norm"] = "instance" # Original "POMO" Paper uses instance/batch normalization + self.model = Model(**self.model_params) + self.optimizer = Optimizer(self.model.parameters(), **self.optimizer_params['optimizer']) + self.task_set = generate_task_set(self.meta_params) + self.task_w = torch.full((len(self.task_set),), 1 / len(self.task_set)) + + # Restore + self.start_epoch = 1 + model_load = trainer_params['model_load'] + if model_load['enable']: + checkpoint_fullname = '{path}/checkpoint-{epoch}.pt'.format(**model_load) + checkpoint = torch.load(checkpoint_fullname, map_location=self.device) + self.model.load_state_dict(checkpoint['model_state_dict']) + self.start_epoch = 1 + model_load['epoch'] + self.result_log.set_raw_data(checkpoint['result_log']) + self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) + self.logger.info('Saved Model Loaded !!') + + # utility + self.time_estimator = TimeEstimator() + + def run(self): + start_time = time.time() + self.time_estimator.reset(self.start_epoch) + for epoch in range(self.start_epoch, self.meta_params['epochs']+1): + self.logger.info('=================================================================') + + # lr decay (by 10) to speed up convergence at 90th and 95th iterations + if epoch in [int(self.meta_params['epochs'] * 0.9), int(self.meta_params['epochs'] * 0.95)]: + self.optimizer_params['optimizer']['lr'] /= 10 + for group in self.optimizer.param_groups: + group["lr"] /= 10 + print(">> LR decay to {}".format(group["lr"])) + + # Train + train_score, train_loss = self._train_one_epoch(epoch) + self.result_log.append('train_score', epoch, train_score) + self.result_log.append('train_loss', epoch, train_loss) + model_save_interval = self.trainer_params['logging']['model_save_interval'] + img_save_interval = self.trainer_params['logging']['img_save_interval'] + # Val + no_aug_score_list = [] + if self.meta_params["data_type"] == "size": + dir = "../../data/CVRP/Size/" + paths = ["cvrp100_uniform.pkl", "cvrp200_uniform.pkl", "cvrp300_uniform.pkl"] + elif self.meta_params["data_type"] == "distribution": + dir = "../../data/CVRP/Distribution/" + paths = ["cvrp100_uniform.pkl", "cvrp100_gaussian.pkl", "cvrp100_cluster.pkl", "cvrp100_diagonal.pkl", "cvrp100_cvrplib.pkl"] + elif self.meta_params["data_type"] == "size_distribution": + pass + if epoch <= 1 or (epoch % img_save_interval) == 0: + for val_path in paths: + no_aug_score = self._fast_val(self.model, path=os.path.join(dir, val_path), val_episodes=64) + no_aug_score_list.append(round(no_aug_score, 4)) + self.result_log.append('val_score', epoch, no_aug_score_list) + + # Logs & Checkpoint + elapsed_time_str, remain_time_str = self.time_estimator.get_est_string(epoch, self.meta_params['epochs']) + self.logger.info("Epoch {:3d}/{:3d}({:.2f}%): Time Est.: Elapsed[{}], Remain[{}], Val Score: {}".format( + epoch, self.meta_params['epochs'], epoch / self.meta_params['epochs'] * 100, elapsed_time_str, remain_time_str, no_aug_score_list)) + + all_done = (epoch == self.meta_params['epochs']) + + if epoch > 1 and (epoch % img_save_interval) == 0: # save latest images, every X epoch + self.logger.info("Saving log_image") + image_prefix = '{}/latest'.format(self.result_folder) + util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['train_score']) + util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['val_score']) + util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'], self.result_log, labels=['train_loss']) + + # Save Model + if all_done or (epoch % model_save_interval) == 0: + self.logger.info("Saving trained_model") + checkpoint_dict = { + 'epoch': epoch, + 'model_state_dict': self.model.state_dict(), + 'optimizer_state_dict': self.optimizer.state_dict(), + 'result_log': self.result_log.get_raw_data() + } + torch.save(checkpoint_dict, '{}/checkpoint-{}.pt'.format(self.result_folder, epoch)) + + if all_done: + self.logger.info(" *** Training Done *** ") + # self.logger.info("Now, printing log array...") + # util_print_log_array(self.logger, self.result_log) + + def _train_one_epoch(self, epoch): + """ + POMO Training, equivalent to the original POMO implementation. + """ + score_AM = AverageMeter() + loss_AM = AverageMeter() + batch_size = self.meta_params['meta_batch_size'] + + # Adaptive task scheduler + start, end = 0, 0 + pass + + # sample a batch of tasks + for b in range(self.meta_params['B']): + for step in range(self.meta_params['k']): + if self.meta_params["data_type"] == "size": + task_params = random.sample(range(start, end + 1), 1) if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] + # batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 100 else self.meta_params['meta_batch_size'] // 2 + elif self.meta_params["data_type"] == "distribution": + task_params = self.task_set[torch.multinomial(self.task_w, 1).item()] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] + elif self.meta_params["data_type"] == "size_distribution": + pass + + data = self._get_data(batch_size, task_params) + env_params = {'problem_size': data[-1].size(1), 'pomo_size': data[-1].size(1)} + avg_score, avg_loss = self._train_one_batch(data, Env(**env_params)) + score_AM.update(avg_score.item(), batch_size) + loss_AM.update(avg_loss.item(), batch_size) + + # Log Once, for each epoch + self.logger.info('Meta Iteration {:3d}: Score: {:.4f}, Loss: {:.4f}'.format(epoch, score_AM.avg, loss_AM.avg)) + + return score_AM.avg, loss_AM.avg + + def _train_one_batch(self, data, env): + + self.model.train() + batch_size = data[-1].size(0) + env.load_problems(batch_size, problems=data, aug_factor=1) + reset_state, _, _ = env.reset() + self.model.pre_forward(reset_state) + prob_list = torch.zeros(size=(batch_size, env.pomo_size, 0)) + # shape: (batch, pomo, 0~problem) + + # POMO Rollout + state, reward, done = env.pre_step() + while not done: + selected, prob = self.model(state) + # shape: (batch, pomo) + state, reward, done = env.step(selected) + prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) + + # Loss + advantage = reward - reward.float().mean(dim=1, keepdims=True) + # shape: (batch, pomo) + log_prob = prob_list.log().sum(dim=2) + # size = (batch, pomo) + loss = -advantage * log_prob # Minus Sign: To Increase REWARD + # shape: (batch, pomo) + loss_mean = loss.mean() + + # update model + self.optimizer.zero_grad() + loss_mean.backward() + self.optimizer.step() + + # Score + max_pomo_reward, _ = reward.max(dim=1) # get best results from pomo + score_mean = -max_pomo_reward.float().mean() # negative sign to make positive value + print(score_mean) + + return score_mean, loss_mean + + def _fast_val(self, model, data=None, path=None, offset=0, val_episodes=32, return_all=False): + aug_factor = 1 + if data is None: + data = load_dataset(path)[offset: offset+val_episodes] # load dataset from file + depot_xy, node_xy, node_demand, capacity = [i[0] for i in data], [i[1] for i in data], [i[2] for i in data], [i[3] for i in data] + depot_xy, node_xy, node_demand, capacity = torch.Tensor(depot_xy), torch.Tensor(node_xy), torch.Tensor(node_demand), torch.Tensor(capacity) + node_demand = node_demand / capacity.view(-1, 1) + data = (depot_xy, node_xy, node_demand) + env = Env(**{'problem_size': data[-1].size(1), 'pomo_size': data[-1].size(1)}) + + model.eval() + batch_size = data[-1].size(0) + with torch.no_grad(): + env.load_problems(batch_size, problems=data, aug_factor=aug_factor) + reset_state, _, _ = env.reset() + model.pre_forward(reset_state) + state, reward, done = env.pre_step() + while not done: + selected, _ = model(state) + # shape: (batch, pomo) + state, reward, done = env.step(selected) + + # Return + aug_reward = reward.reshape(aug_factor, batch_size, env.pomo_size) + # shape: (augmentation, batch, pomo) + max_pomo_reward, _ = aug_reward.max(dim=2) # get best results from pomo + # shape: (augmentation, batch) + no_aug_score = -max_pomo_reward[0, :].float().mean() # negative sign to make positive value + print(no_aug_score) + + if return_all: + return -max_pomo_reward[0, :].float() + else: + return no_aug_score.detach().item() + + def _get_data(self, batch_size, task_params): + """ + Return CVRP data with the form of: + depot_xy: [batch_size, 1, 2] + node_xy: [batch_size, problem_size, 2] + node_demand (unnormalized): [batch_size, problem_size] + capacity: [batch_size] + """ + if self.meta_params['data_type'] == 'distribution': + assert len(task_params) == 2 + data = get_random_problems(batch_size, self.env_params['problem_size'], num_modes=task_params[0], cdist=task_params[1], distribution='gaussian_mixture', problem="cvrp") + elif self.meta_params['data_type'] == 'size': + assert len(task_params) == 1 + data = get_random_problems(batch_size, task_params[0], num_modes=0, cdist=0, distribution='uniform', problem="cvrp") + elif self.meta_params['data_type'] == "size_distribution": + assert len(task_params) == 3 + data = get_random_problems(batch_size, task_params[0], num_modes=task_params[1], cdist=task_params[2], distribution='gaussian_mixture', problem="cvrp") + else: + raise NotImplementedError + + # normalized node_demand by capacity & only return (depot_xy, node_xy, node_demand) + if len(data) == 4: + depot_xy, node_xy, node_demand, capacity = data + node_demand = node_demand / capacity.view(-1, 1) + data = (depot_xy, node_xy, node_demand) + + return data diff --git a/POMO/CVRP/CVRP_baseline.py b/POMO/CVRP/CVRP_baseline.py new file mode 100644 index 0000000..06c5650 --- /dev/null +++ b/POMO/CVRP/CVRP_baseline.py @@ -0,0 +1,270 @@ +import argparse +import os, sys +import numpy as np +import re +from subprocess import check_call, check_output +from urllib.parse import urlparse +import tempfile +import time +from datetime import timedelta +import hygese as hgs +os.chdir(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, "..") # for utils +from utils.functions import check_extension, load_dataset, save_dataset, run_all_in_pool, move_to + + +def get_lkh_executable(url="http://www.akira.ruc.dk/~keld/research/LKH-3/LKH-3.0.7.tgz"): + + cwd = os.path.abspath("lkh") + os.makedirs(cwd, exist_ok=True) + + file = os.path.join(cwd, os.path.split(urlparse(url).path)[-1]) + filedir = os.path.splitext(file)[0] + + if not os.path.isdir(filedir): + print("{} not found, downloading and compiling".format(filedir)) + + check_call(["wget", url], cwd=cwd) + assert os.path.isfile(file), "Download failed, {} does not exist".format(file) + check_call(["tar", "xvfz", file], cwd=cwd) + + assert os.path.isdir(filedir), "Extracting failed, dir {} does not exist".format(filedir) + check_call("make", cwd=filedir) + os.remove(file) + + executable = os.path.join(filedir, "LKH") + assert os.path.isfile(executable) + return os.path.abspath(executable) + + +def solve_lkh(executable, depot, loc, demand, capacity): + + with tempfile.TemporaryDirectory() as tempdir: + problem_filename = os.path.join(tempdir, "problem.vrp") + output_filename = os.path.join(tempdir, "output.tour") + param_filename = os.path.join(tempdir, "params.par") + + starttime = time.time() + write_vrplib(problem_filename, depot, loc, demand, capacity) + params = {"PROBLEM_FILE": problem_filename, "OUTPUT_TOUR_FILE": output_filename} + write_lkh_par(param_filename, params) + output = check_output([executable, param_filename]) + result = read_vrplib(output_filename, n=len(demand)) + duration = time.time() - starttime + return result, output, duration + + +def solve_lkh_log(executable, directory, name, depot, loc, demand, capacity, grid_size=1, runs=1, disable_cache=False): + + problem_filename = os.path.join(directory, "{}.lkh{}.vrp".format(name, runs)) + tour_filename = os.path.join(directory, "{}.lkh{}.tour".format(name, runs)) + output_filename = os.path.join(directory, "{}.lkh{}.pkl".format(name, runs)) + param_filename = os.path.join(directory, "{}.lkh{}.par".format(name, runs)) + log_filename = os.path.join(directory, "{}.lkh{}.log".format(name, runs)) + + try: + # May have already been run + if os.path.isfile(output_filename) and not disable_cache: + tour, duration = load_dataset(output_filename) + else: + write_vrplib(problem_filename, depot, loc, demand, capacity, grid_size, name=name) + + params = {"PROBLEM_FILE": problem_filename, "OUTPUT_TOUR_FILE": tour_filename, "RUNS": runs, "SEED": 1234} + write_lkh_par(param_filename, params) + + with open(log_filename, 'w') as f: + start = time.time() + check_call([executable, param_filename], stdout=f, stderr=f) + duration = time.time() - start + + tour = read_vrplib(tour_filename, n=len(demand)) + + save_dataset((tour, duration), output_filename) + + return calc_vrp_cost(depot, loc, tour), tour, duration + + except Exception as e: + raise + print("Exception occured") + print(e) + return None + + +def calc_vrp_cost(depot, loc, tour): + assert (np.sort(tour)[-len(loc):] == np.arange(len(loc)) + 1).all(), "All nodes must be visited once!" + # TODO validate capacity constraints + loc_with_depot = np.vstack((np.array(depot)[None, :], np.array(loc))) + sorted_locs = loc_with_depot[np.concatenate(([0], tour, [0]))] + return np.linalg.norm(sorted_locs[1:] - sorted_locs[:-1], axis=-1).sum() + + +def write_lkh_par(filename, parameters): + default_parameters = { # Use none to include as flag instead of kv + "SPECIAL": None, + "MAX_TRIALS": 10000, + "RUNS": 10, + "TRACE_LEVEL": 1, + "SEED": 0 + } + with open(filename, 'w') as f: + for k, v in {**default_parameters, **parameters}.items(): + if v is None: + f.write("{}\n".format(k)) + else: + f.write("{} = {}\n".format(k, v)) + + +def read_vrplib(filename, n): + with open(filename, 'r') as f: + tour = [] + dimension = 0 + started = False + for line in f: + if started: + loc = int(line) + if loc == -1: + break + tour.append(loc) + if line.startswith("DIMENSION"): + dimension = int(line.split(" ")[-1]) + + if line.startswith("TOUR_SECTION"): + started = True + + assert len(tour) == dimension + tour = np.array(tour).astype(int) - 1 # Subtract 1 as depot is 1 and should be 0 + tour[tour > n] = 0 # Any nodes above the number of nodes there are is also depot + assert tour[0] == 0 # Tour should start with depot + assert tour[-1] != 0 # Tour should not end with depot + return tour[1:].tolist() + + +def write_vrplib(filename, depot, loc, demand, capacity, grid_size, name="problem"): + + with open(filename, 'w') as f: + f.write("\n".join([ + "{} : {}".format(k, v) + for k, v in ( + ("NAME", name), + ("TYPE", "CVRP"), + ("DIMENSION", len(loc) + 1), + ("EDGE_WEIGHT_TYPE", "EUC_2D"), + ("CAPACITY", capacity) + ) + ])) + f.write("\n") + f.write("NODE_COORD_SECTION\n") + f.write("\n".join([ + "{}\t{}\t{}".format(i + 1, int(x / grid_size * 100000 + 0.5), int(y / grid_size * 100000 + 0.5)) # VRPlib does not take floats + #"{}\t{}\t{}".format(i + 1, x, y) + for i, (x, y) in enumerate([depot] + loc) + ])) + f.write("\n") + f.write("DEMAND_SECTION\n") + f.write("\n".join([ + "{}\t{}".format(i + 1, d) + for i, d in enumerate([0] + demand) + ])) + f.write("\n") + f.write("DEPOT_SECTION\n") + f.write("1\n") + f.write("-1\n") + f.write("EOF\n") + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument("--method", type=str, default='lkh', choices=["lkh", ]) + parser.add_argument("--datasets", nargs='+', default=["../../data/CVRP/Size/cvrp100_uniform.pkl", ], help="Filename of the dataset(s) to evaluate") + parser.add_argument("-f", action='store_false', help="Set true to overwrite") + parser.add_argument("-o", default=None, help="Name of the results file to write") + parser.add_argument("--cpus", type=int, help="Number of CPUs to use, defaults to all cores") + parser.add_argument('--disable_cache', action='store_false', help='Disable caching') + parser.add_argument('--progress_bar_mininterval', type=float, default=0.1, help='Minimum interval') + parser.add_argument('-n', type=int, default=10000, help="Number of instances to process") + parser.add_argument('--offset', type=int, default=0, help="Offset where to start processing") + parser.add_argument('--results_dir', default='baseline_results', help="Name of results directory") + + opts = parser.parse_args() + + assert opts.o is None or len(opts.datasets) == 1, "Cannot specify result filename with more than one dataset" + + for dataset_path in opts.datasets: + + assert os.path.isfile(check_extension(dataset_path)), "File does not exist!" + + dataset_basename, ext = os.path.splitext(os.path.split(dataset_path)[-1]) + + if opts.o is None: + results_dir = os.path.join(opts.results_dir, "cvrp_{}".format(opts.method), dataset_basename) + os.makedirs(results_dir, exist_ok=True) + + out_file = os.path.join(results_dir, "{}{}{}-{}{}".format( + dataset_basename, + "offset{}".format(opts.offset) if opts.offset is not None else "", + "n{}".format(opts.n) if opts.n is not None else "", + opts.method, ext + )) + else: + out_file = opts.o + + assert opts.f or not os.path.isfile( + out_file), "File already exists! Try running with -f option to overwrite." + + match = re.match(r'^([a-z_]+)(\d*)$', opts.method) + assert match + method = match[1] + runs = 1 if match[2] == '' else int(match[2]) + + start_t = time.time() + if method == "lkh": + executable = get_lkh_executable() + + target_dir = os.path.join(results_dir, "{}-{}".format( + dataset_basename, + opts.method + )) + assert opts.f or not os.path.isdir(target_dir), \ + "Target dir already exists! Try running with -f option to overwrite." + + if not os.path.isdir(target_dir): + os.makedirs(target_dir) + + # CVRP contains tuple rather than single loc array + dataset = load_dataset(dataset_path) + + use_multiprocessing = False + + def run_func(args): + directory, name, *args = args + depot, loc, demand, capacity, *args = args + depot = depot[0] if len(depot) == 1 else depot # if depot: [[x, y]] -> [x, y] + grid_size = 1 + if len(args) > 0: + depot_types, customer_types, grid_size = args + + return solve_lkh_log( + executable, + directory, name, + depot, loc, demand, capacity, grid_size, + runs=runs, disable_cache=opts.disable_cache + ) + + # Note: only processing n items is handled by run_all_in_pool + results, parallelism = run_all_in_pool( + run_func, + target_dir, dataset, opts, use_multiprocessing=use_multiprocessing + ) + else: + assert False, "Unknown method: {}".format(opts.method) + + costs, tours, durations = zip(*results) # Not really costs since they should be negative + print(">> Solving {} instances within {:.2f}s using {}".format(opts.n, time.time()-start_t, opts.method)) + print("Average cost: {} +- {}".format(np.mean(costs), 2 * np.std(costs) / np.sqrt(len(costs)))) + print("Average serial duration: {} +- {}".format(np.mean(durations), 2 * np.std(durations) / np.sqrt(len(durations)))) + print("Average parallel duration: {}".format(np.mean(durations) / parallelism)) + print("Calculated total duration: {}".format(timedelta(seconds=int(np.sum(durations) / parallelism)))) + + results = [(i[0], i[1]) for i in results] + save_dataset(results, out_file) # [(obj, route), ...] diff --git a/POMO/CVRP/test.py b/POMO/CVRP/test.py new file mode 100644 index 0000000..4d9097d --- /dev/null +++ b/POMO/CVRP/test.py @@ -0,0 +1,121 @@ +import os, sys +os.chdir(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, "..") # for utils +import torch +import logging +from utils.utils import create_logger, copy_all_src +from utils.functions import seed_everything, check_null_hypothesis +from CVRPTester import CVRPTester as Tester + +DEBUG_MODE = False +USE_CUDA = not DEBUG_MODE and torch.cuda.is_available() +CUDA_DEVICE_NUM = 0 + +########################################################################################## +# parameters + +env_params = { + 'problem_size': 100, + 'pomo_size': 100, +} + +model_params = { + 'embedding_dim': 128, + 'sqrt_embedding_dim': 128**(1/2), + 'encoder_layer_num': 6, + 'qkv_dim': 16, + 'head_num': 8, + 'logit_clipping': 10, + 'ff_hidden_dim': 512, + 'eval_type': 'argmax', + 'norm': None +} + + +tester_params = { + 'use_cuda': USE_CUDA, + 'cuda_device_num': CUDA_DEVICE_NUM, + 'seed': 2023, + 'model_load': { + 'path': '../../pretrained/pomo_pretrained', # directory path of pre-trained model and log files saved. + 'epoch': 30500, # epoch version of pre-trained model to load. + }, + 'test_episodes': 10000, + 'test_batch_size': 10000, + 'augmentation_enable': True, + 'test_robustness': False, + 'aug_factor': 8, + 'aug_batch_size': 100, + 'test_set_path': '../../data/CVRP/Size/cvrp100_uniform.pkl', + 'test_set_opt_sol_path': '../../data/CVRP/Size/opt_cvrp100_uniform.pkl' +} + +fine_tune_params = { + 'enable': False, # evaluate few-shot generalization + 'fine_tune_episodes': 500, # how many data used to fine-tune the pretrained model + 'k': 20, # fine-tune steps/epochs + 'fine_tune_batch_size': 64, # the batch size of the inner-loop optimization + 'augmentation_enable': False, + 'optimizer': { + 'lr': 1e-4 * 0.1, + 'weight_decay': 1e-6 + } +} + +if tester_params['augmentation_enable']: + tester_params['test_batch_size'] = tester_params['aug_batch_size'] + +logger_params = { + 'log_file': { + 'desc': 'test_cvrp', + 'filename': 'log.txt' + } +} + + +def main(): + if DEBUG_MODE: + _set_debug_mode() + + create_logger(**logger_params) + _print_config() + + seed_everything(tester_params['seed']) + + tester = Tester(env_params=env_params, + model_params=model_params, + tester_params=tester_params, + fine_tune_params=fine_tune_params) + + copy_all_src(tester.result_folder) + + tester.run() + + +def _set_debug_mode(): + global tester_params + tester_params['test_episodes'] = 10 + + +def _print_config(): + logger = logging.getLogger('root') + logger.info('DEBUG_MODE: {}'.format(DEBUG_MODE)) + logger.info('USE_CUDA: {}, CUDA_DEVICE_NUM: {}'.format(USE_CUDA, CUDA_DEVICE_NUM)) + [logger.info(g_key + "{}".format(globals()[g_key])) for g_key in globals().keys() if g_key.endswith('params')] + + +def t_test(path1, path2): + """ + Conduct T-test to check the null hypothesis. If p < 0.05, the null hypothesis is rejected. + """ + import pickle + with open(path1, 'rb') as f1: + results1 = pickle.load(f1) + with open(path2, 'rb') as f2: + results2 = pickle.load(f2) + check_null_hypothesis(results1["score_list"], results2["score_list"]) + check_null_hypothesis(results1["aug_score_list"], results2["aug_score_list"]) + + +if __name__ == "__main__": + main() diff --git a/POMO/CVRP/train.py b/POMO/CVRP/train.py new file mode 100644 index 0000000..27d1316 --- /dev/null +++ b/POMO/CVRP/train.py @@ -0,0 +1,151 @@ +import os, sys +os.chdir(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, "..") # for utils +import torch +import logging +from utils.utils import create_logger, copy_all_src +from utils.functions import seed_everything +from CVRPTrainer_pomo import CVRPTrainer as Trainer_pomo +from CVRPTrainer_meta import CVRPTrainer as Trainer_meta + +DEBUG_MODE = False +USE_CUDA = not DEBUG_MODE and torch.cuda.is_available() +CUDA_DEVICE_NUM = 0 + +########################################################################################## +# parameters + +env_params = { + 'problem_size': 100, + 'pomo_size': 100, +} + +model_params = { + 'embedding_dim': 128, + 'sqrt_embedding_dim': 128**(1/2), + 'encoder_layer_num': 6, + 'qkv_dim': 16, + 'head_num': 8, + 'logit_clipping': 10, + 'ff_hidden_dim': 512, + 'eval_type': 'argmax', + 'meta_update_encoder': True, +} + +optimizer_params = { + 'optimizer': { + 'lr': 1e-4, + 'weight_decay': 1e-6 + }, +} + +trainer_params = { + 'use_cuda': USE_CUDA, + 'cuda_device_num': CUDA_DEVICE_NUM, + 'seed': 1234, + # 'batch_size': 64, + 'logging': { + 'model_save_interval': 25000, + 'img_save_interval': 10, + 'log_image_params_1': { + 'json_foldername': 'log_image_style', + 'filename': 'general.json' + }, + 'log_image_params_2': { + 'json_foldername': 'log_image_style', + 'filename': 'style_loss_1.json' + }, + }, + 'model_load': { + 'enable': False, # enable loading pre-trained model + # 'path': './result/saved_CVRP20_model', # directory path of pre-trained model and log files saved. + # 'epoch': 2000, # epoch version of pre-trained model to laod. + + } +} + +meta_params = { + 'enable': True, # whether use meta-learning or not + 'curriculum': False, # adaptive sample task + 'meta_method': 'maml', # choose from ['maml', 'fomaml', 'reptile'] + 'bootstrap_steps': 25, + 'data_type': 'size', # choose from ["size", "distribution", "size_distribution"] + 'epochs': 250000, # the number of meta-model updates: (250*100000) / (1*5*64) + 'B': 1, # the number of tasks in a mini-batch + 'k': 1, # gradient decent steps in the inner-loop optimization of meta-learning method + 'meta_batch_size': 64, # will be divided by 2 if problem_size >= 100 + 'update_weight': 1000, # update weight of each task per X iters + 'sch_epoch': 250000, # for the task scheduler of size setting + 'solver': 'lkh3_offline', # solver used to update the task weights, choose from ["bootstrap", "lkh3_online", "lkh3_offline", "best_model"] + 'alpha': 0.99, # params for the outer-loop optimization of reptile + 'alpha_decay': 0.999, # params for the outer-loop optimization of reptile +} + +logger_params = { + 'log_file': { + 'desc': 'train_cvrp', + 'filename': 'run_log' + } +} + + +def main(): + if DEBUG_MODE: + _set_debug_mode() + + create_logger(**logger_params) + _print_config() + + seed_everything(trainer_params['seed']) + + if not meta_params['enable']: + print(">> Start CVRP-POMO Training.") + trainer = Trainer_pomo(env_params=env_params, model_params=model_params, optimizer_params=optimizer_params, trainer_params=trainer_params, meta_params=meta_params) + elif meta_params['meta_method'] in ['maml', 'fomaml', 'reptile']: + print(">> Start CVRP-POMO-{} Training.".format(meta_params['meta_method'])) + trainer = Trainer_meta(env_params=env_params, model_params=model_params, optimizer_params=optimizer_params, trainer_params=trainer_params, meta_params=meta_params) + else: + raise NotImplementedError + + copy_all_src(trainer.result_folder) + + trainer.run() + + +def _set_debug_mode(): + global trainer_params + trainer_params['epochs'] = 2 + trainer_params['train_episodes'] = 4 + trainer_params['train_batch_size'] = 2 + + +def _print_config(): + logger = logging.getLogger('root') + logger.info('DEBUG_MODE: {}'.format(DEBUG_MODE)) + logger.info('USE_CUDA: {}, CUDA_DEVICE_NUM: {}'.format(USE_CUDA, CUDA_DEVICE_NUM)) + [logger.info(g_key + "{}".format(globals()[g_key])) for g_key in globals().keys() if g_key.endswith('params')] + + +def check_mem(cuda_device): + devices_info = os.popen('"/usr/bin/nvidia-smi" --query-gpu=memory.total,memory.used --format=csv,nounits,noheader').read().strip().split("\n") + total, used = devices_info[int(cuda_device)].split(',') + return total, used + + +def occumpy_mem(cuda_device): + """ + Occupy GPU memory in advance for size setting. + """ + torch.cuda.set_device(cuda_device) + total, used = check_mem(cuda_device) + total = int(total) + used = int(used) + block_mem = int((total-used) * 0.85) + x = torch.cuda.FloatTensor(256, 1024, block_mem) + del x + + +if __name__ == "__main__": + if meta_params["data_type"] in ["size", "size_distribution"]: + occumpy_mem(CUDA_DEVICE_NUM) # reserve GPU memory for large size instances + main() diff --git a/POMO/TSP/TSProblemDef.py b/POMO/ProblemDef.py similarity index 77% rename from POMO/TSP/TSProblemDef.py rename to POMO/ProblemDef.py index efe9e60..9ab21e8 100644 --- a/POMO/TSP/TSProblemDef.py +++ b/POMO/ProblemDef.py @@ -1,10 +1,9 @@ import os, sys +import math import glob import torch import pickle import numpy as np -os.chdir(os.path.dirname(os.path.abspath(__file__))) -sys.path.insert(0, "..") # for utils from utils.functions import show, seed_everything, load_dataset, save_dataset @@ -13,15 +12,15 @@ def generate_task_set(meta_params): Current setting: size: (n,) \in [20, 150] distribution: (m, c) \in {(0, 0) + [1-9] * [1, 10, 20, 30, 40, 50]} - size_distribution: (n, m, c) \in [30, 50, 70, 90, 110, 130, 150] * {(0, 0) + [2, 4, 6, 8] * [1, 20, 40]} + TODO: size_distribution: (n, m, c) \in [20, 150, 5] * {(0, 0) + [1, 3, 5, 7] * [1, 10, 30, 50]} """ if meta_params['data_type'] == "distribution": # focus on TSP100 with gaussian mixture distributions task_set = [(0, 0)] + [(m, c) for m in range(1, 10) for c in [1, 10, 20, 30, 40, 50]] elif meta_params['data_type'] == "size": # focus on uniform distribution with different sizes task_set = [(n,) for n in range(20, 151)] elif meta_params['data_type'] == "size_distribution": - dist_set = [(0, 0)] + [(m, c) for m in [2, 4, 6, 8] for c in [1, 20, 40]] - task_set = [(n, m, c) for n in range(30, 151, 20) for (m, c) in dist_set] + dist_set = [(0, 0)] + [(m, c) for m in [1, 3, 5, 7] for c in [1, 10, 30, 50]] + task_set = [(n, m, c) for n in range(20, 151, 5) for (m, c) in dist_set] else: raise NotImplementedError print(">> Generating training task set: {} tasks with type {}".format(len(task_set), meta_params['data_type'])) @@ -30,32 +29,48 @@ def generate_task_set(meta_params): return task_set -def get_random_problems(batch_size, problem_size, num_modes=0, cdist=0, distribution='uniform', path=None): +def get_random_problems(batch_size, problem_size, num_modes=0, cdist=0, distribution='uniform', path=None, problem="tsp"): """ Generate TSP data within range of [0, 1] """ + assert problem in ["tsp", "cvrp"], "Problems not support." + # uniform distribution problems.shape: (batch, problem, 2) if distribution == "uniform": problems = np.random.uniform(0, 1, [batch_size, problem_size, 2]) # problems = torch.rand(size=(batch_size, problem_size, 2)) elif distribution == "gaussian_mixture": problems = generate_gaussian_mixture_tsp(batch_size, problem_size, num_modes=num_modes, cdist=cdist) - elif distribution in ["uniform_rectangle", "gaussian", "cluster", "diagonal", "tsplib"]: + elif distribution in ["uniform_rectangle", "gaussian", "cluster", "diagonal", "tsplib", "cvrplib"]: problems = generate_tsp_dist(batch_size, problem_size, distribution) else: raise NotImplementedError - # save as + if problem == "cvrp": + depot_xy = np.random.uniform(size=(batch_size, 1, 2)) # shape: (batch, 1, 2) + node_demand = np.random.randint(1, 10, size=(batch_size, problem_size)) # (unnormalized) shape: (batch, problem) + demand_scaler = math.ceil(30 + problem_size/5) if problem_size >= 20 else 20 + capacity = np.full(batch_size, demand_scaler) + + # save as List if path is not None: - with open(os.path.join(path, "tsp{}_{}.pkl".format(problem_size, distribution)), "wb") as f: - pickle.dump(problems, f, pickle.HIGHEST_PROTOCOL) - problems = problems[: batch_size] + if problem == "tsp": + with open(os.path.join(path, "tsp{}_{}.pkl".format(problem_size, distribution)), "wb") as f: + pickle.dump(problems.tolist(), f, pickle.HIGHEST_PROTOCOL) + else: + with open(os.path.join(path, "cvrp{}_{}.pkl".format(problem_size, distribution)), "wb") as f: + pickle.dump(list(zip(depot_xy.tolist(), problems.tolist(), node_demand.tolist(), capacity.tolist())), f, pickle.HIGHEST_PROTOCOL) # [(depot_xy, problems, node_demand), ...] # return tensor if not torch.is_tensor(problems): problems = torch.Tensor(problems) + if problem == "cvrp": + depot_xy, node_demand, capacity = torch.Tensor(depot_xy), torch.Tensor(node_demand), torch.Tensor(capacity) - return problems + if problem == "tsp": + return problems + else: + return depot_xy, problems, node_demand, capacity def augment_xy_data_by_8_fold(problems): @@ -100,8 +115,10 @@ def gaussian_mixture(graph_size=100, num_modes=0, cdist=1): xy = MinMaxScaler().fit_transform(xy) return xy - if num_modes == 0: + if num_modes == 0: # (0, 0) - uniform return np.random.uniform(0, 1, [dataset_size, graph_size, 2]) + elif num_modes == 1 and cdist == 1: # (1, 1) - gaussian + return generate_tsp_dist(dataset_size, graph_size, "gaussian") else: res = [] for i in range(dataset_size): @@ -114,6 +131,7 @@ def generate_tsp_dist(n_samples, n_nodes, distribution): Generate tsp instances with different distributions: ["cluster", "uniform_rectangle", "diagonal", "gaussian", "tsplib"] from "Generative Adversarial Training for Neural Combinatorial Optimization Models". """ + print(">> Generating datasets: {}-{}-{}".format(n_samples, n_nodes, distribution)) if distribution == "cluster": # time-consuming x = [] for i in range(n_samples): @@ -174,8 +192,8 @@ def generate_tsp_dist(n_samples, n_nodes, distribution): x = np.random.multivariate_normal(mean, cov, [1, n_nodes]) data.append(x) x = np.concatenate(data, 0) - elif distribution == "tsplib": - file_names = glob.glob("../../data/TSP/tsplib/*.tsp") + elif distribution in ["tsplib", "cvrplib"]: + file_names = glob.glob("../data/TSP/tsplib/*.tsp") if distribution == "tsplib" else glob.glob("../data/CVRP/cvrplib/Vrp-Set-X/*.vrp") data = [] for file_name in file_names: with open(file_name, "r") as f: @@ -231,23 +249,23 @@ def generate_tsp_dist(n_samples, n_nodes, distribution): val seed: 2022 test seed: 2023 """ - path = "../../data/TSP" + path = "../data/TSP/Size" if not os.path.exists(path): os.makedirs(path) seed_everything(seed=2023) # var-dist test data - # for dist in ["uniform", "uniform_rectangle", "gaussian", "cluster", "diagonal", "tsplib"]: + # for dist in ["uniform", "uniform_rectangle", "gaussian", "diagonal", "tsplib", "cluster"]: # print(">> Generating TSP instances following {} distribution!".format(dist)) - # get_random_problems(20000, 100, distribution=dist, path=path) + # get_random_problems(15000, 100, distribution=dist, path=path, problem="tsp") # var-size test data - # for s in [300, 500]: + # for s in [50, 100, 150, 200, 300, 500, 1000]: # print(">> Generating TSP instances of size {}!".format(s)) - # get_random_problems(1000, s, distribution="uniform", path=path) + # get_random_problems(15000, s, distribution="uniform", path=path, problem="tsp") - data = generate_gaussian_mixture_tsp(dataset_size=64, graph_size=100, num_modes=9, cdist=1) + # data = generate_gaussian_mixture_tsp(dataset_size=64, graph_size=100, num_modes=9, cdist=1) # data = load_dataset("../../data/TSP/tsp100_cluster.pkl") # print(type(data), data.size(), data) - x, y = data[0, :, 0].tolist(), data[0, :, -1].tolist() - show([x], [y], label=["Gaussian Mixture"], title="TSP100", xdes="x", ydes="y", path="./tsp.pdf") + # x, y = data[0, :, 0].tolist(), data[0, :, -1].tolist() + # show([x], [y], label=["Gaussian Mixture"], title="TSP100", xdes="x", ydes="y", path="./tsp.pdf") diff --git a/POMO/TSP/TSPEnv.py b/POMO/TSP/TSPEnv.py index 4401f59..60f2154 100644 --- a/POMO/TSP/TSPEnv.py +++ b/POMO/TSP/TSPEnv.py @@ -1,8 +1,7 @@ - from dataclasses import dataclass import torch -from TSProblemDef import get_random_problems, augment_xy_data_by_8_fold +from ProblemDef import get_random_problems, augment_xy_data_by_8_fold @dataclass @@ -54,7 +53,7 @@ def load_problems(self, batch_size, problems=None, aug_factor=1): self.problems = problems else: self.batch_size = batch_size - self.problems = get_random_problems(batch_size, self.problem_size, distribution='uniform') + self.problems = get_random_problems(batch_size, self.problem_size, distribution='uniform', problem="tsp") # problems.shape: (batch, problem, 2) if aug_factor > 1: diff --git a/POMO/TSP/TSPModel.py b/POMO/TSP/TSPModel.py index df5a109..5185d5e 100644 --- a/POMO/TSP/TSPModel.py +++ b/POMO/TSP/TSPModel.py @@ -124,8 +124,8 @@ def __init__(self, **model_params): self.addAndNormalization2 = Add_And_Normalization_Module(**model_params) def forward(self, input1, weights=None, index=0): + # input1.shape: (batch, problem, EMBEDDING_DIM) if weights is None: - # input.shape: (batch, problem, EMBEDDING_DIM) head_num = self.model_params['head_num'] q = reshape_by_heads(self.Wq(input1), head_num=head_num) k = reshape_by_heads(self.Wk(input1), head_num=head_num) @@ -210,9 +210,9 @@ def set_q1(self, encoded_q1, weights=None): self.q_first = reshape_by_heads(F.linear(encoded_q1, weights['decoder.Wq_first.weight'], bias=None), head_num=head_num) def forward(self, encoded_last_node, ninf_mask, weights=None): + # encoded_last_node.shape: (batch, pomo, embedding) + # ninf_mask.shape: (batch, pomo, problem) if weights is None: - # encoded_last_node.shape: (batch, pomo, embedding) - # ninf_mask.shape: (batch, pomo, problem) head_num = self.model_params['head_num'] # Multi-Head Attention ####################################################### @@ -321,8 +321,8 @@ def __init__(self, **model_params): self.norm = None def forward(self, input1, input2, weights=None): + # input.shape: (batch, problem, embedding) if weights is None: - # input.shape: (batch, problem, embedding) added = input1 + input2 if isinstance(self.norm, nn.InstanceNorm1d): transposed = added.transpose(1, 2) @@ -333,7 +333,7 @@ def forward(self, input1, input2, weights=None): # shape: (batch, problem, embedding) elif isinstance(self.norm, nn.BatchNorm1d): batch, problem, embedding = added.size() - normalized = self.norm(added.reshape(-1, embedding)) + normalized = self.norm(added.reshape(batch * problem, embedding)) back_trans = normalized.reshape(batch, problem, embedding) else: back_trans = added @@ -345,7 +345,7 @@ def forward(self, input1, input2, weights=None): back_trans = normalized.transpose(1, 2) elif isinstance(self.norm, nn.BatchNorm1d): batch, problem, embedding = added.size() - normalized = F.batch_norm(added.reshape(-1, embedding), running_mean=self.norm.running_mean, running_var=self.norm.running_var, weight=weights['weight'], bias=weights['bias'], training=True) + normalized = F.batch_norm(added.reshape(batch * problem, embedding), running_mean=self.norm.running_mean, running_var=self.norm.running_var, weight=weights['weight'], bias=weights['bias'], training=True) back_trans = normalized.reshape(batch, problem, embedding) else: back_trans = added @@ -363,8 +363,8 @@ def __init__(self, **model_params): self.W2 = nn.Linear(ff_hidden_dim, embedding_dim) def forward(self, input1, weights=None): + # input.shape: (batch, problem, embedding) if weights is None: - # input.shape: (batch, problem, embedding) return self.W2(F.relu(self.W1(input1))) else: output = F.relu(F.linear(input1, weights['weight1'], bias=weights['bias1'])) diff --git a/POMO/TSP/TSPTester.py b/POMO/TSP/TSPTester.py index 572eeb0..c4ba3b0 100644 --- a/POMO/TSP/TSPTester.py +++ b/POMO/TSP/TSPTester.py @@ -56,7 +56,7 @@ def __init__(self, checkpoint_fullname = '{path}/checkpoint-{epoch}.pt'.format(**model_load) checkpoint = torch.load(checkpoint_fullname, map_location=self.device) self.model.load_state_dict(checkpoint['model_state_dict']) - self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) # TODO: which performance is good? load or not load? + # self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) # TODO: which performance is good? load or not load? self.logger.info(">> Model loaded from {}".format(checkpoint_fullname)) # utility diff --git a/POMO/TSP/TSPTrainer_Meta.py b/POMO/TSP/TSPTrainer_Meta.py index d0fcab7..caee82c 100644 --- a/POMO/TSP/TSPTrainer_Meta.py +++ b/POMO/TSP/TSPTrainer_Meta.py @@ -6,14 +6,12 @@ import torch from logging import getLogger from collections import OrderedDict - -from TSPEnv import TSPEnv as Env -from TSPModel import TSPModel as Model - from torch.optim import Adam as Optimizer # from torch.optim import SGD as Optimizer -from TSProblemDef import get_random_problems, generate_task_set +from TSPEnv import TSPEnv as Env +from TSPModel import TSPModel as Model +from ProblemDef import get_random_problems, generate_task_set from utils.utils import * from utils.functions import * from TSP_baseline import * @@ -57,7 +55,7 @@ def __init__(self, torch.set_default_tensor_type('torch.FloatTensor') # Main Components - self.model_params["norm"] = None + self.model_params["norm"] = None # Original "POMO" Paper uses instance/batch normalization self.meta_model = Model(**self.model_params) self.meta_optimizer = Optimizer(self.meta_model.parameters(), **self.optimizer_params['optimizer']) self.alpha = self.meta_params['alpha'] # for reptile @@ -94,6 +92,13 @@ def run(self): for epoch in range(self.start_epoch, self.meta_params['epochs']+1): self.logger.info('=================================================================') + # lr decay (by 10) to speed up convergence at 90th and 95th iterations + if epoch in [int(self.meta_params['epochs'] * 0.9), int(self.meta_params['epochs'] * 0.95)]: + self.optimizer_params['optimizer']['lr'] /= 10 + for group in self.meta_optimizer.param_groups: + group["lr"] /= 10 + print(">> LR decay to {}".format(group["lr"])) + # Train train_score, train_loss = self._train_one_epoch(epoch) self.result_log.append('train_score', epoch, train_score) @@ -101,16 +106,18 @@ def run(self): model_save_interval = self.trainer_params['logging']['model_save_interval'] img_save_interval = self.trainer_params['logging']['img_save_interval'] # Val - dir, no_aug_score_list = "../../data/TSP/", [] + no_aug_score_list = [] if self.meta_params["data_type"] == "size": - paths = ["tsp50_uniform.pkl", "tsp100_uniform.pkl", "tsp200_uniform.pkl"] + dir = "../../data/TSP/Size/" + paths = ["tsp100_uniform.pkl", "tsp200_uniform.pkl", "tsp300_uniform.pkl"] elif self.meta_params["data_type"] == "distribution": + dir = "../../data/TSP/Distribution/" paths = ["tsp100_uniform.pkl", "tsp100_gaussian.pkl", "tsp100_cluster.pkl", "tsp100_diagonal.pkl", "tsp100_tsplib.pkl"] elif self.meta_params["data_type"] == "size_distribution": pass if epoch <= 1 or (epoch % img_save_interval) == 0: for val_path in paths: - no_aug_score = self._fast_val(self.meta_model, path=os.path.join(dir, val_path), offset=10000, val_episodes=64, mode="eval") + no_aug_score = self._fast_val(self.meta_model, path=os.path.join(dir, val_path), val_episodes=64, mode="eval") no_aug_score_list.append(round(no_aug_score, 4)) self.result_log.append('val_score', epoch, no_aug_score_list) cur_mean = sum(no_aug_score_list) / len(no_aug_score_list) @@ -133,10 +140,6 @@ def run(self): epoch, self.meta_params['epochs'], epoch/self.meta_params['epochs']*100, elapsed_time_str, remain_time_str, no_aug_score_list)) all_done = (epoch == self.meta_params['epochs']) - # if self.trainer_params['stop_criterion'] == "epochs": - # all_done = (epoch == self.meta_params['epochs']) - # else: - # all_done = (time.time() - start_time) >= self.trainer_params['time_limit'] if epoch > 1 and (epoch % img_save_interval) == 0: # save latest images, every X epoch self.logger.info("Saving log_image") @@ -145,8 +148,8 @@ def run(self): util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['val_score']) util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'], self.result_log, labels=['train_loss']) + # Save Model if all_done or (epoch % model_save_interval) == 0: - # save checkpoint self.logger.info("Saving trained_model") checkpoint_dict = { 'epoch': epoch, @@ -176,11 +179,12 @@ def _train_one_epoch(self, epoch): self.meta_optimizer.zero_grad() score_AM = AverageMeter() loss_AM = AverageMeter() + batch_size = self.meta_params['meta_batch_size'] """ Adaptive task scheduler: - for size: gradually increase the problem size (Curriculum learning) - for distribution: we compute the relative gaps (w.r.t. LKH3) or estimate the potential improvements of each distribution every X iters. + for size: gradually increase the problem size (Curriculum learning); + for distribution: we compute the relative gaps (w.r.t. LKH3) or estimate the potential improvements of each distribution (i.e., bootstrap) every X iters; """ if self.meta_params["data_type"] == "size": # start = self.min_n + int(epoch/self.meta_params['sch_epoch'] * (self.max_n - self.min_n)) # linear @@ -193,14 +197,13 @@ def _train_one_epoch(self, epoch): elif self.meta_params["data_type"] == "size_distribution": pass - self._alpha_scheduler(epoch) - fast_weights, val_loss, fomaml_grad = [], 0, [] + self._alpha_scheduler(epoch) # for reptile + fast_weights, val_loss, meta_grad_dict = [], 0, {(i, j): 0 for i, group in enumerate(self.meta_optimizer.param_groups) for j, _ in enumerate(group['params'])} - # sample a batch of tasks - for i in range(self.meta_params['B']): + for b in range(self.meta_params['B']): + # sample a task if self.meta_params["data_type"] == "size": task_params = random.sample(range(start, end+1), 1) if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] - batch_size = self.meta_params['meta_batch_size'] # batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 100 else self.meta_params['meta_batch_size'] // 2 elif self.meta_params["data_type"] == "distribution": # sample based on task weights @@ -209,13 +212,14 @@ def _train_one_epoch(self, epoch): # curri: from easy task (small gaps) -> hard task (large gaps) # selected_idx = torch.sort(self.task_w, descending=False)[1].tolist()[start: end] # task_params = self.task_set[random.sample(selected_idx, 1)[0]] if self.meta_params['curriculum'] and epoch >= self.meta_params['update_weight'] else random.sample(self.task_set, 1)[0] - batch_size = self.meta_params['meta_batch_size'] elif self.meta_params["data_type"] == "size_distribution": pass + # preparation if self.meta_params['meta_method'] in ['fomaml', 'reptile']: task_model = copy.deepcopy(self.meta_model) optimizer = Optimizer(task_model.parameters(), **self.optimizer_params['optimizer']) + optimizer.load_state_dict(self.meta_optimizer.state_dict()) elif self.meta_params['meta_method'] == 'maml': if self.model_params['meta_update_encoder']: fast_weight = OrderedDict(self.meta_model.named_parameters()) @@ -223,8 +227,7 @@ def _train_one_epoch(self, epoch): fast_weight = OrderedDict(self.meta_model.decoder.named_parameters()) for k in list(fast_weight.keys()): fast_weight["decoder."+k] = fast_weight.pop(k) - optimizer = Optimizer(fast_weight.values(), **self.optimizer_params['optimizer']) - optimizer.load_state_dict(self.meta_optimizer.state_dict()) + # optimizer = Optimizer(fast_weight.values(), **self.optimizer_params['optimizer']) # inner-loop optimization for step in range(self.meta_params['k']): @@ -234,39 +237,50 @@ def _train_one_epoch(self, epoch): if self.meta_params['meta_method'] in ['reptile', 'fomaml']: avg_score, avg_loss = self._train_one_batch(task_model, data, Env(**env_params), optimizer) elif self.meta_params['meta_method'] == 'maml': - avg_score, avg_loss, fast_weight = self._train_one_batch_maml(fast_weight, data, Env(**env_params), optimizer) + avg_score, avg_loss, fast_weight = self._train_one_batch_maml(fast_weight, data, Env(**env_params)) score_AM.update(avg_score.item(), batch_size) loss_AM.update(avg_loss.item(), batch_size) val_data = self._get_val_data(batch_size, task_params) self.meta_model.train() if self.meta_params['meta_method'] == 'maml': - val_loss = self._fast_val(fast_weight, data=val_data, mode="maml") - val_loss /= self.meta_params['B'] + # Old version + # val_loss += self._fast_val(fast_weight, data=val_data, mode="maml") / self.meta_params['B'] + # New version - Save GPU memory + val_loss = self._fast_val(fast_weight, data=val_data, mode="maml") / self.meta_params['B'] + self.meta_optimizer.zero_grad() val_loss.backward() + for i, group in enumerate(self.meta_optimizer.param_groups): + for j, p in enumerate(group['params']): + meta_grad_dict[(i, j)] += p.grad elif self.meta_params['meta_method'] == 'fomaml': - val_loss = self._fast_val(task_model, data=val_data, mode="fomaml") - grad = torch.autograd.grad(val_loss, task_model.parameters()) - fomaml_grad.append(grad) - self.meta_optimizer.load_state_dict(optimizer.state_dict()) + val_loss = self._fast_val(task_model, data=val_data, mode="fomaml") / self.meta_params['B'] + optimizer.zero_grad() + val_loss.backward() + for i, group in enumerate(optimizer.param_groups): + for j, p in enumerate(group['params']): + meta_grad_dict[(i, j)] += p.grad elif self.meta_params['meta_method'] == 'reptile': fast_weights.append(task_model.state_dict()) # outer-loop optimization (update meta-model) if self.meta_params['meta_method'] == 'maml': - # print(self.meta_model.encoder.embedding.weight.grad.norm(p=2).cpu().item()) - # print(self.meta_model.decoder.multi_head_combine.weight.grad.norm(p=2).cpu().item()) - # grad_norms = clip_grad_norms(self.meta_optimizer.param_groups, max_norm=1.0) - # print(grad_norms[0]) + # Old version + # self.meta_optimizer.zero_grad() + # val_loss.backward() + # self.meta_optimizer.step() + # New version - Save GPU memory + self.meta_optimizer.zero_grad() + for i, group in enumerate(self.meta_optimizer.param_groups): + for j, p in enumerate(group['params']): + p.grad = meta_grad_dict[(i, j)] self.meta_optimizer.step() elif self.meta_params['meta_method'] == 'fomaml': - updated_weights = self.meta_model.state_dict() - for gradients in fomaml_grad: - updated_weights = OrderedDict( - (name, param - self.optimizer_params['optimizer']['lr'] / self.meta_params['B'] * grad) - for ((name, param), grad) in zip(updated_weights.items(), gradients) - ) - self.meta_model.load_state_dict(updated_weights) + self.meta_optimizer.zero_grad() + for i, group in enumerate(self.meta_optimizer.param_groups): + for j, p in enumerate(group['params']): + p.grad = meta_grad_dict[(i, j)] + self.meta_optimizer.step() elif self.meta_params['meta_method'] == 'reptile': state_dict = {params_key: (self.meta_model.state_dict()[params_key] + self.alpha * torch.mean(torch.stack([fast_weight[params_key] - self.meta_model.state_dict()[params_key] for fast_weight in fast_weights], dim=0), dim=0)) for params_key in self.meta_model.state_dict()} self.meta_model.load_state_dict(state_dict) @@ -339,16 +353,50 @@ def _train_one_batch_maml(self, fast_weight, data, env, optimizer=None): # shape: (batch, pomo) loss_mean = loss.mean() - # update model + # 1. update model - in SGD way # gradients = torch.autograd.grad(loss_mean, fast_weight.values(), create_graph=True) # allow_unused=True # fast_weight = OrderedDict( # (name, param - self.optimizer_params['optimizer']['lr'] * grad) # for ((name, param), grad) in zip(fast_weight.items(), gradients) # ) + # 2. update model - in Adam way + gradients = torch.autograd.grad(loss_mean, fast_weight.values(), create_graph=True) # allow_unused=True + w_t, (beta1, beta2), eps = [], self.meta_optimizer.param_groups[0]['betas'], self.meta_optimizer.param_groups[0]['eps'] + lr, weight_decay = self.optimizer_params['optimizer']['lr'], self.optimizer_params['optimizer']['weight_decay'] + for i, ((name, param), grad) in enumerate(zip(fast_weight.items(), gradients)): + if self.meta_optimizer.state_dict()['state'] != {}: + i = i if self.model_params['meta_update_encoder'] else i + 58 # i \in [0, 62] + state = self.meta_optimizer.state_dict()['state'][i] + step, exp_avg, exp_avg_sq = state['step'], state['exp_avg'], state['exp_avg_sq'] + step += 1 + step = step.item() + # compute grad based on Adam source code using in-place operation + # update Adam stat (step, exp_avg and exp_avg_sq have already been updated by in-place operation) + # may encounter RuntimeError: (a leaf Variable that requires grad) / (the tensor used during grad computation) cannot use in-place operation. + grad = grad.add(param, alpha=weight_decay) + exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1) + exp_avg_sq.mul_(beta2).addcmul_(grad, grad.conj(), value=1 - beta2) + bias_correction1 = 1 - beta1 ** step + bias_correction2 = 1 - beta2 ** step + step_size = lr / bias_correction1 + bias_correction2_sqrt = math.sqrt(bias_correction2) + denom = (exp_avg_sq.sqrt() / bias_correction2_sqrt).add_(eps) + # param.addcdiv_(exp_avg, denom, value=-step_size) + param = param - step_size * exp_avg / denom + self.meta_optimizer.state_dict()['state'][i]['exp_avg'] = exp_avg.clone().detach() + self.meta_optimizer.state_dict()['state'][i]['exp_avg_sq'] = exp_avg_sq.clone().detach() + else: + param = param - lr * grad + w_t.append((name, param)) + fast_weight = OrderedDict(w_t) + """ + # 3. update model using optimizer - this method can not work properly. optimizer.zero_grad() # torch.autograd.grad(loss_mean, fast_weight.values(), create_graph=True) + # print(list(self.meta_model.parameters())[-1]) loss_mean.backward(retain_graph=True, create_graph=True) - optimizer.step() + optimizer.step() # will update meta_model as well... + """ # Score max_pomo_reward, _ = reward.max(dim=1) # get best results from pomo @@ -476,13 +524,13 @@ def _bootstrap(self, fast_weight, data, mode="eval"): def _get_data(self, batch_size, task_params): if self.meta_params['data_type'] == 'size': assert len(task_params) == 1 - data = get_random_problems(batch_size, task_params[0], num_modes=0, cdist=0, distribution='uniform') + data = get_random_problems(batch_size, task_params[0], num_modes=0, cdist=0, distribution='uniform', problem="tsp") elif self.meta_params['data_type'] == 'distribution': assert len(task_params) == 2 - data = get_random_problems(batch_size, self.env_params['problem_size'], num_modes=task_params[0], cdist=task_params[1], distribution='gaussian_mixture') + data = get_random_problems(batch_size, self.env_params['problem_size'], num_modes=task_params[0], cdist=task_params[1], distribution='gaussian_mixture', problem="tsp") elif self.meta_params['data_type'] == "size_distribution": assert len(task_params) == 3 - data = get_random_problems(batch_size, task_params[0], num_modes=task_params[1], cdist=task_params[2], distribution='gaussian_mixture') + data = get_random_problems(batch_size, task_params[0], num_modes=task_params[1], cdist=task_params[2], distribution='gaussian_mixture', problem="tsp") else: raise NotImplementedError @@ -493,6 +541,7 @@ def _get_val_data(self, batch_size, task_params): start1, end1 = min(task_params[0] + 10, self.max_n), min(task_params[0] + 20, self.max_n) val_size = random.sample(range(start1, end1 + 1), 1)[0] val_data = self._get_data(batch_size, (val_size,)) + # val_data = self._get_data(batch_size, task_params) # TODO: which is better? elif self.meta_params["data_type"] == "distribution": val_data = self._get_data(batch_size, task_params) elif self.meta_params["data_type"] == "size_distribution": diff --git a/POMO/TSP/TSPTrainer_pomo.py b/POMO/TSP/TSPTrainer_pomo.py index 6845375..d96fadf 100644 --- a/POMO/TSP/TSPTrainer_pomo.py +++ b/POMO/TSP/TSPTrainer_pomo.py @@ -6,15 +6,12 @@ import torch from logging import getLogger from collections import OrderedDict - -from TSPEnv import TSPEnv as Env -from TSPModel import TSPModel as Model - from torch.optim import Adam as Optimizer # from torch.optim import SGD as Optimizer -from torch.optim.lr_scheduler import MultiStepLR as Scheduler -from TSProblemDef import get_random_problems, generate_task_set +from TSPEnv import TSPEnv as Env +from TSPModel import TSPModel as Model +from ProblemDef import get_random_problems, generate_task_set from utils.utils import * from utils.functions import * from TSP_baseline import * @@ -56,8 +53,8 @@ def __init__(self, # Main Components self.model_params["norm"] = "instance" # Original "POMO" Paper uses instance/batch normalization - self.meta_model = Model(**self.model_params) - self.optimizer = Optimizer(self.meta_model.parameters(), **self.optimizer_params['optimizer']) + self.model = Model(**self.model_params) + self.optimizer = Optimizer(self.model.parameters(), **self.optimizer_params['optimizer']) self.task_set = generate_task_set(self.meta_params) self.task_w = torch.full((len(self.task_set),), 1 / len(self.task_set)) @@ -67,23 +64,28 @@ def __init__(self, if model_load['enable']: checkpoint_fullname = '{path}/checkpoint-{epoch}.pt'.format(**model_load) checkpoint = torch.load(checkpoint_fullname, map_location=self.device) - self.meta_model.load_state_dict(checkpoint['model_state_dict']) + self.model.load_state_dict(checkpoint['model_state_dict']) self.start_epoch = 1 + model_load['epoch'] self.result_log.set_raw_data(checkpoint['result_log']) self.optimizer.load_state_dict(checkpoint['optimizer_state_dict']) - # self.scheduler.last_epoch = model_load['epoch']-1 self.logger.info('Saved Model Loaded !!') # utility self.time_estimator = TimeEstimator() def run(self): - start_time = time.time() self.time_estimator.reset(self.start_epoch) for epoch in range(self.start_epoch, self.meta_params['epochs']+1): self.logger.info('=================================================================') + # lr decay (by 10) to speed up convergence at 90th and 95th iterations + if epoch in [int(self.meta_params['epochs'] * 0.9), int(self.meta_params['epochs'] * 0.95)]: + self.optimizer_params['optimizer']['lr'] /= 10 + for group in self.optimizer.param_groups: + group["lr"] /= 10 + print(">> LR decay to {}".format(group["lr"])) + # Train train_score, train_loss = self._train_one_epoch(epoch) self.result_log.append('train_score', epoch, train_score) @@ -91,16 +93,18 @@ def run(self): model_save_interval = self.trainer_params['logging']['model_save_interval'] img_save_interval = self.trainer_params['logging']['img_save_interval'] # Val - dir, no_aug_score_list = "../../data/TSP/", [] + no_aug_score_list = [] if self.meta_params["data_type"] == "size": - paths = ["tsp50_uniform.pkl", "tsp100_uniform.pkl", "tsp200_uniform.pkl"] + dir = "../../data/TSP/Size/" + paths = ["tsp100_uniform.pkl", "tsp200_uniform.pkl", "tsp300_uniform.pkl"] elif self.meta_params["data_type"] == "distribution": + dir = "../../data/TSP/Distribution/" paths = ["tsp100_uniform.pkl", "tsp100_gaussian.pkl", "tsp100_cluster.pkl", "tsp100_diagonal.pkl", "tsp100_tsplib.pkl"] elif self.meta_params["data_type"] == "size_distribution": pass if epoch <= 1 or (epoch % img_save_interval) == 0: for val_path in paths: - no_aug_score = self._fast_val(self.meta_model, path=os.path.join(dir, val_path), val_episodes=64) + no_aug_score = self._fast_val(self.model, path=os.path.join(dir, val_path), val_episodes=64) no_aug_score_list.append(round(no_aug_score, 4)) self.result_log.append('val_score', epoch, no_aug_score_list) @@ -118,14 +122,13 @@ def run(self): util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_1'], self.result_log, labels=['val_score']) util_save_log_image_with_label(image_prefix, self.trainer_params['logging']['log_image_params_2'], self.result_log, labels=['train_loss']) + # Save Model if all_done or (epoch % model_save_interval) == 0: - # save checkpoint self.logger.info("Saving trained_model") checkpoint_dict = { 'epoch': epoch, - 'model_state_dict': self.meta_model.state_dict(), + 'model_state_dict': self.model.state_dict(), 'optimizer_state_dict': self.optimizer.state_dict(), - # 'scheduler_state_dict': self.scheduler.state_dict(), 'result_log': self.result_log.get_raw_data() } torch.save(checkpoint_dict, '{}/checkpoint-{}.pt'.format(self.result_folder, epoch)) @@ -143,14 +146,13 @@ def run(self): def _train_one_epoch(self, epoch): """ - 1. Sample B training tasks from task distribution P(T) - 2. inner-loop: for a batch of tasks T_i, do reptile -> \theta_i - 3. outer-loop: update meta-model -> \theta_0 + POMO Training, equivalent to the original POMO implementation. """ score_AM = AverageMeter() loss_AM = AverageMeter() + batch_size = self.meta_params['meta_batch_size'] - # Curriculum learning - TODO: need to update + # Adaptive task scheduler - TODO: need to update if self.meta_params["data_type"] in ["size", "distribution"]: self.min_n, self.max_n = self.task_set[0][0], self.task_set[-1][0] # [20, 150] / [0, 130] # start = self.min_n + int(epoch/self.meta_params['epochs'] * (self.max_n - self.min_n)) # linear @@ -161,15 +163,13 @@ def _train_one_epoch(self, epoch): pass # sample a batch of tasks - for i in range(self.meta_params['B']): + for b in range(self.meta_params['B']): for step in range(self.meta_params['k']): if self.meta_params["data_type"] == "size": task_params = random.sample(range(start, end + 1), 1) if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] - batch_size = self.meta_params['meta_batch_size'] # batch_size = self.meta_params['meta_batch_size'] if task_params[0] <= 100 else self.meta_params['meta_batch_size'] // 2 elif self.meta_params["data_type"] == "distribution": task_params = self.task_set[torch.multinomial(self.task_w, 1).item()] if self.meta_params['curriculum'] else random.sample(self.task_set, 1)[0] - batch_size = self.meta_params['meta_batch_size'] elif self.meta_params["data_type"] == "size_distribution": pass @@ -186,18 +186,18 @@ def _train_one_epoch(self, epoch): def _train_one_batch(self, data, env): - self.meta_model.train() + self.model.train() batch_size = data.size(0) env.load_problems(batch_size, problems=data, aug_factor=1) reset_state, _, _ = env.reset() - self.meta_model.pre_forward(reset_state) + self.model.pre_forward(reset_state) prob_list = torch.zeros(size=(batch_size, env.pomo_size, 0)) # shape: (batch, pomo, 0~problem) # POMO Rollout, please note that the reward is negative (i.e., -length of route). state, reward, done = env.pre_step() while not done: - selected, prob = self.meta_model(state) + selected, prob = self.model(state) # shape: (batch, pomo) state, reward, done = env.step(selected) prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) @@ -223,9 +223,9 @@ def _train_one_batch(self, data, env): return score_mean, loss_mean - def _fast_val(self, model, data=None, path=None, val_episodes=32, return_all=False): + def _fast_val(self, model, data=None, path=None, offset=0, val_episodes=32, return_all=False): aug_factor = 1 - data = torch.Tensor(load_dataset(path)[: val_episodes]) if data is None else data + data = torch.Tensor(load_dataset(path)[offset: offset+val_episodes]) if data is None else data env = Env(**{'problem_size': data.size(1), 'pomo_size': data.size(1)}) model.eval() @@ -234,12 +234,11 @@ def _fast_val(self, model, data=None, path=None, val_episodes=32, return_all=Fal env.load_problems(batch_size, problems=data, aug_factor=aug_factor) reset_state, _, _ = env.reset() model.pre_forward(reset_state) - - state, reward, done = env.pre_step() - while not done: - selected, _ = model(state) - # shape: (batch, pomo) - state, reward, done = env.step(selected) + state, reward, done = env.pre_step() + while not done: + selected, _ = model(state) + # shape: (batch, pomo) + state, reward, done = env.step(selected) # Return aug_reward = reward.reshape(aug_factor, batch_size, env.pomo_size) @@ -258,19 +257,18 @@ def _get_data(self, batch_size, task_params): if self.meta_params['data_type'] == 'distribution': assert len(task_params) == 2 - data = get_random_problems(batch_size, self.env_params['problem_size'], num_modes=task_params[0], cdist=task_params[1], distribution='gaussian_mixture') + data = get_random_problems(batch_size, self.env_params['problem_size'], num_modes=task_params[0], cdist=task_params[1], distribution='gaussian_mixture', problem="tsp") elif self.meta_params['data_type'] == 'size': assert len(task_params) == 1 - data = get_random_problems(batch_size, task_params[0], num_modes=0, cdist=0, distribution='uniform') + data = get_random_problems(batch_size, task_params[0], num_modes=0, cdist=0, distribution='uniform', problem="tsp") elif self.meta_params['data_type'] == "size_distribution": assert len(task_params) == 3 - data = get_random_problems(batch_size, task_params[0], num_modes=task_params[1], cdist=task_params[2], distribution='gaussian_mixture') + data = get_random_problems(batch_size, task_params[0], num_modes=task_params[1], cdist=task_params[2], distribution='gaussian_mixture', problem="tsp") else: raise NotImplementedError return data - def _generate_x_adv(self, data, eps=10.0): """ Generate adversarial data based on the current model, also need to generate optimal sol for x_adv. @@ -283,18 +281,18 @@ def minmax(xy_): if eps == 0: return data # generate x_adv - self.meta_model.eval() + self.model.eval() aug_factor, batch_size = 1, data.size(0) env = Env(**{'problem_size': data.size(1), 'pomo_size': data.size(1)}) with torch.enable_grad(): data.requires_grad_() env.load_problems(batch_size, problems=data, aug_factor=aug_factor) reset_state, _, _ = env.reset() - self.meta_model.pre_forward(reset_state) + self.model.pre_forward(reset_state) prob_list = torch.zeros(size=(aug_factor * batch_size, env.pomo_size, 0)) state, reward, done = env.pre_step() while not done: - selected, prob = self.meta_model(state) + selected, prob = self.model(state) state, reward, done = env.step(selected) prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2) diff --git a/POMO/TSP/TSP_baseline.py b/POMO/TSP/TSP_baseline.py index a38870d..4b28672 100644 --- a/POMO/TSP/TSP_baseline.py +++ b/POMO/TSP/TSP_baseline.py @@ -100,7 +100,7 @@ def solve_concorde_log(executable, directory, name, loc, disable_cache=False): def get_lkh_executable(url="http://www.akira.ruc.dk/~keld/research/LKH-3/LKH-3.0.7.tgz"): - cwd = os.path.abspath(os.path.join("lkh")) + cwd = os.path.abspath("lkh") os.makedirs(cwd, exist_ok=True) file = os.path.join(cwd, os.path.split(urlparse(url).path)[-1]) @@ -372,18 +372,18 @@ def solve_all_nn(dataset_path, eval_batch_size=1024, no_cuda=False, dataset_n=No if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--method", type=str, default='lkh', choices=['nn', "gurobi", "gurobigap", "gurobit", "concorde", "lkh", "random_insertion", "nearest_insertion", "farthest_insertion"]) - parser.add_argument("--datasets", nargs='+', default=["../../data/TSP/tsp50_uniform.pkl", ], help="Filename of the dataset(s) to evaluate") - parser.add_argument("-f", action='store_true', help="Set true to overwrite") + parser.add_argument("--method", type=str, default='concorde', choices=["nn", "gurobi", "gurobigap", "gurobit", "concorde", "lkh", "random_insertion", "nearest_insertion", "farthest_insertion"]) + parser.add_argument("--datasets", nargs='+', default=["../../data/TSP/Size/tsp100_uniform.pkl", ], help="Filename of the dataset(s) to evaluate") + parser.add_argument("-f", action='store_false', help="Set true to overwrite") parser.add_argument("-o", default=None, help="Name of the results file to write") parser.add_argument("--cpus", type=int, help="Number of CPUs to use, defaults to all cores") parser.add_argument('--no_cuda', action='store_true', help='Disable CUDA (only for Tsiligirides)') - parser.add_argument('--disable_cache', action='store_true', help='Disable caching') - parser.add_argument('--max_calc_batch_size', type=int, default=1000, help='Size for subbatches') + parser.add_argument('--disable_cache', action='store_false', help='Disable caching') + parser.add_argument('--max_calc_batch_size', type=int, default=10000, help='Size for subbatches') parser.add_argument('--progress_bar_mininterval', type=float, default=0.1, help='Minimum interval') - parser.add_argument('-n', type=int, default=1000, help="Number of instances to process") + parser.add_argument('-n', type=int, default=10000, help="Number of instances to process") parser.add_argument('--offset', type=int, default=0, help="Offset where to start processing") - parser.add_argument('--results_dir', default='results', help="Name of results directory") + parser.add_argument('--results_dir', default='baseline_results', help="Name of results directory") opts = parser.parse_args() @@ -396,12 +396,12 @@ def solve_all_nn(dataset_path, eval_batch_size=1024, no_cuda=False, dataset_n=No dataset_basename, ext = os.path.splitext(os.path.split(dataset_path)[-1]) if opts.o is None: - results_dir = os.path.join(opts.results_dir, "tsp", dataset_basename) + results_dir = os.path.join(opts.results_dir, "tsp_{}".format(opts.method), dataset_basename) os.makedirs(results_dir, exist_ok=True) out_file = os.path.join(results_dir, "{}{}{}-{}{}".format( dataset_basename, - "offs{}".format(opts.offset) if opts.offset is not None else "", + "offset{}".format(opts.offset) if opts.offset is not None else "", "n{}".format(opts.n) if opts.n is not None else "", opts.method, ext )) @@ -416,6 +416,7 @@ def solve_all_nn(dataset_path, eval_batch_size=1024, no_cuda=False, dataset_n=No method = match[1] runs = 1 if match[2] == '' else int(match[2]) + start_t = time.time() if method == "nn": assert opts.offset is None, "Offset not supported for nearest neighbor" @@ -442,7 +443,7 @@ def solve_all_nn(dataset_path, eval_batch_size=1024, no_cuda=False, dataset_n=No if method == "concorde": use_multiprocessing = False - executable = os.path.abspath(os.path.join('problems', 'tsp', 'concorde', 'concorde', 'TSP', 'concorde')) + executable = os.path.abspath(os.path.join('concorde', 'concorde', 'TSP', 'concorde')) def run_func(args): return solve_concorde_log(executable, *args, disable_cache=opts.disable_cache) @@ -477,10 +478,12 @@ def run_func(args): assert False, "Unknown method: {}".format(opts.method) costs, tours, durations = zip(*results) # Not really costs since they should be negative + print(">> Solving {} instances within {:.2f}s using {}".format(opts.n, time.time()-start_t, opts.method)) print("Average cost: {} +- {}".format(np.mean(costs), 2 * np.std(costs) / np.sqrt(len(costs)))) - print("Average serial duration: {} +- {}".format( - np.mean(durations), 2 * np.std(durations) / np.sqrt(len(durations)))) + print("Average serial duration: {} +- {}".format(np.mean(durations), 2 * np.std(durations) / np.sqrt(len(durations)))) print("Average parallel duration: {}".format(np.mean(durations) / parallelism)) print("Calculated total duration: {}".format(timedelta(seconds=int(np.sum(durations) / parallelism)))) - save_dataset((results, parallelism), out_file) + results = [(i[0], i[1]) for i in results] + # print(results) + save_dataset(results, out_file) # [(obj, route), ...] diff --git a/POMO/TSP/install_concorde.sh b/POMO/TSP/install_concorde.sh new file mode 100755 index 0000000..447d678 --- /dev/null +++ b/POMO/TSP/install_concorde.sh @@ -0,0 +1,27 @@ +#!/bin/bash +mkdir concorde +cd concorde +mkdir qsopt +cd qsopt +# Download qsopt +if [[ "$OSTYPE" == "darwin"* ]]; then + curl -O http://www.math.uwaterloo.ca/~bico/qsopt/beta/codes/mac64/qsopt.a + curl -O http://www.math.uwaterloo.ca/~bico/qsopt/beta/codes/mac64/qsopt.h + curl -O http://www.math.uwaterloo.ca/~bico/qsopt/beta/codes/mac64/qsopt +else + wget http://www.math.uwaterloo.ca/~bico/qsopt/beta/codes/centos/qsopt.a + wget http://www.math.uwaterloo.ca/~bico/qsopt/beta/codes/centos/qsopt.h + wget http://www.math.uwaterloo.ca/~bico/qsopt/beta/codes/centos/qsopt +fi +cd .. +wget http://www.math.uwaterloo.ca/tsp/concorde/downloads/codes/src/co031219.tgz +tar xf co031219.tgz +cd concorde +if [[ "$OSTYPE" == "darwin"* ]]; then + ./configure --with-qsopt=$(pwd)/../qsopt --host=powerpc-apple-macos +else + ./configure --with-qsopt=$(realpath ../qsopt) +fi +make +TSP/concorde -s 99 -k 100 +cd ../.. \ No newline at end of file diff --git a/POMO/TSP/test.py b/POMO/TSP/test.py index 18ff821..2e2f719 100644 --- a/POMO/TSP/test.py +++ b/POMO/TSP/test.py @@ -36,8 +36,8 @@ 'cuda_device_num': CUDA_DEVICE_NUM, 'seed': 2023, 'model_load': { - 'path': '../../pretrained/TSP10_50/pomo_batch_31', # directory path of pre-trained model and log files saved. - 'epoch': 50000, # epoch version of pre-trained model to laod. + 'path': '../../pretrained/pomo_pretrained', # directory path of pre-trained model and log files saved. + 'epoch': 250000, # epoch version of pre-trained model to load. }, 'test_episodes': 10000, 'test_batch_size': 10000, @@ -45,12 +45,12 @@ 'test_robustness': False, 'aug_factor': 8, 'aug_batch_size': 100, - 'test_set_path': '../../data/TSP/tsp100_uniform.pkl', - 'test_set_opt_sol_path': '../../data/TSP/gurobi/tsp100_uniform.pkl' + 'test_set_path': '../../data/TSP/Size/tsp100_uniform.pkl', + 'test_set_opt_sol_path': '../../data/TSP/Size/opt_tsp100_uniform.pkl' } fine_tune_params = { - 'enable': True, # evaluate few-shot generalization + 'enable': False, # evaluate few-shot generalization 'fine_tune_episodes': 500, # how many data used to fine-tune the pretrained model 'k': 20, # fine-tune steps/epochs 'fine_tune_batch_size': 64, # the batch size of the inner-loop optimization diff --git a/POMO/TSP/train.py b/POMO/TSP/train.py index b62d59b..ba35bb6 100644 --- a/POMO/TSP/train.py +++ b/POMO/TSP/train.py @@ -45,7 +45,7 @@ 'seed': 1234, # 'batch_size': 64, 'logging': { - 'model_save_interval': 10000, + 'model_save_interval': 25000, 'img_save_interval': 10, 'log_image_params_1': { 'json_foldername': 'log_image_style', @@ -69,12 +69,12 @@ 'meta_method': 'maml', # choose from ['maml', 'fomaml', 'reptile'] 'bootstrap_steps': 25, 'data_type': 'size', # choose from ["size", "distribution", "size_distribution"] - 'epochs': 50000, # the number of meta-model updates: (250*100000) / (1*5*64) + 'epochs': 250000, # the number of meta-model updates: (250*100000) / (1*5*64) 'B': 1, # the number of tasks in a mini-batch 'k': 1, # gradient decent steps in the inner-loop optimization of meta-learning method 'meta_batch_size': 64, # will be divided by 2 if problem_size >= 100 'update_weight': 1000, # update weight of each task per X iters - 'sch_epoch': 30000, # for the task scheduler of size setting + 'sch_epoch': 250000, # for the task scheduler of size setting 'solver': 'lkh3_offline', # solver used to update the task weights, choose from ["bootstrap", "lkh3_online", "lkh3_offline", "best_model"] 'alpha': 0.99, # params for the outer-loop optimization of reptile 'alpha_decay': 0.999, # params for the outer-loop optimization of reptile @@ -98,10 +98,10 @@ def main(): seed_everything(trainer_params['seed']) if not meta_params['enable']: - print(">> Start POMO Training.") + print(">> Start TSP-POMO Training.") trainer = Trainer_pomo(env_params=env_params, model_params=model_params, optimizer_params=optimizer_params, trainer_params=trainer_params, meta_params=meta_params) elif meta_params['meta_method'] in ['maml', 'fomaml', 'reptile']: - print(">> Start POMO-{} Training.".format(meta_params['meta_method'])) + print(">> Start TSP-POMO-{} Training.".format(meta_params['meta_method'])) trainer = Trainer_meta(env_params=env_params, model_params=model_params, optimizer_params=optimizer_params, trainer_params=trainer_params, meta_params=meta_params) else: raise NotImplementedError