Skip to content

Commit

Permalink
add anil and bootstrap
Browse files Browse the repository at this point in the history
  • Loading branch information
RoyalSkye committed Sep 27, 2022
1 parent 5b470c6 commit b136d7b
Show file tree
Hide file tree
Showing 7 changed files with 160 additions and 42 deletions.
9 changes: 6 additions & 3 deletions POMO/TSP/TSPModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,16 @@ def __init__(self, **model_params):
super().__init__()
self.model_params = model_params

self.encoder = TSP_Encoder(**model_params)
self.decoder = TSP_Decoder(**model_params)
self.encoder = TSP_Encoder(**model_params) # 1187712 (1.19 million)
self.decoder = TSP_Decoder(**model_params) # 82048 (0.08 million)
self.encoded_nodes = None
# shape: (batch, problem, EMBEDDING_DIM)

def pre_forward(self, reset_state, weights=None):
self.encoded_nodes = self.encoder(reset_state.problems, weights=weights)
if weights is not None and self.model_params["meta_update_encoder"]:
self.encoded_nodes = self.encoder(reset_state.problems, weights=weights)
else:
self.encoded_nodes = self.encoder(reset_state.problems, weights=None)
# shape: (batch, problem, EMBEDDING_DIM)
self.decoder.set_kv(self.encoded_nodes, weights=weights)

Expand Down
63 changes: 61 additions & 2 deletions POMO/TSP/TSPTester.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
from TSPEnv import TSPEnv as Env
from TSPModel import TSPModel as Model

from baselines import solve_all_gurobi
from utils.utils import *
from utils.functions import load_dataset
from utils.functions import load_dataset, save_dataset


class TSPTester:
Expand Down Expand Up @@ -64,6 +65,22 @@ def __init__(self,
self.time_estimator = TimeEstimator()

def run(self):
if self.tester_params['test_robustness']:
episode = 0
test_data = torch.Tensor(self.test_data)
opt_sol = [0] * test_data.size(0)
while episode < test_data.size(0):
remaining = test_data.size(0) - episode
batch_size = min(self.tester_params['test_batch_size'], remaining)
data = torch.Tensor(test_data[episode: episode + batch_size])
test_data[episode: episode + batch_size], opt_sol[episode: episode + batch_size] = self._generate_x_adv(data, eps=50.0)
episode += batch_size
self.test_data = test_data.cpu().numpy()
self.opt_sol = [i[0] for i in opt_sol]
# save the adv dataset
filename = os.path.split(self.tester_params['test_set_path'])[-1]
save_dataset(self.test_data, './adv_{}'.format(filename))
save_dataset(opt_sol, './sol_adv_{}'.format(filename))
if self.fine_tune_params['enable']:
# fine-tune model on fine-tune dataset (few-shot)
self._fine_tune_and_test()
Expand Down Expand Up @@ -207,7 +224,7 @@ def _fine_tune_one_batch(self, fine_tune_data):
log_prob = prob_list.log().sum(dim=2).reshape(aug_factor, batch_size, self.env.pomo_size).permute(1, 0, 2).view(batch_size, -1)
# size = (batch, augmentation * pomo)
loss = -advantage * log_prob # Minus Sign: To Increase REWARD
# shape: (batch, augmentation * pomo)pretra
# shape: (batch, augmentation * pomo)
loss_mean = loss.mean()

# Score
Expand All @@ -218,3 +235,45 @@ def _fine_tune_one_batch(self, fine_tune_data):
self.optimizer.zero_grad()
loss_mean.backward()
self.optimizer.step()

def _generate_x_adv(self, data, eps=10.0):
"""
Generate adversarial data based on the current model, also need to generate optimal sol for x_adv.
"""
from torch.autograd import Variable
def minmax(xy_):
# min_max normalization: [b,n,2]
xy_ = (xy_ - xy_.min(dim=1, keepdims=True)[0]) / (xy_.max(dim=1, keepdims=True)[0] - xy_.min(dim=1, keepdims=True)[0])
return xy_

# generate x_adv
self.model.eval()
aug_factor, batch_size = 1, data.size(0)
with torch.enable_grad():
data.requires_grad_()
self.env.load_problems(batch_size, problems=data, aug_factor=aug_factor)
# print(self.env.problems.requires_grad)
reset_state, _, _ = self.env.reset()
self.model.pre_forward(reset_state)
prob_list = torch.zeros(size=(aug_factor * batch_size, self.env.pomo_size, 0))
state, reward, done = self.env.pre_step()
while not done:
selected, prob = self.model(state)
state, reward, done = self.env.step(selected)
prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2)

aug_reward = reward.reshape(aug_factor, batch_size, self.env.pomo_size).permute(1, 0, 2).view(batch_size, -1)
baseline_reward = aug_reward.float().mean(dim=1, keepdims=True)
advantage = aug_reward - baseline_reward
log_prob = prob_list.log().sum(dim=2).reshape(aug_factor, batch_size, self.env.pomo_size).permute(1, 0, 2).view(batch_size, -1)

# delta = torch.autograd.grad(eps * ((advantage / baseline_reward) * log_prob).mean(), data)[0]
delta = torch.autograd.grad(eps * ((-advantage) * log_prob).mean(), data)[0]
data = data.detach() + delta
data = minmax(data)
data = Variable(data, requires_grad=False)

# generate opt sol
opt_sol = solve_all_gurobi(data)

return data, opt_sol
75 changes: 63 additions & 12 deletions POMO/TSP/TSPTrainer_Meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from TSPModel import TSPModel as Model

from torch.optim import Adam as Optimizer
# from torch.optim import SGD as Optimizer
from torch.optim.lr_scheduler import MultiStepLR as Scheduler
from TSProblemDef import get_random_problems, generate_task_set

Expand All @@ -20,11 +21,11 @@
class TSPTrainer:
"""
TODO: 1. val data? and training data, for k steps of inner-loop, should we use the same batch of data?
2. only meta-update partial para of pomo?
Implementation of POMO with MAML / FOMAML / Reptile.
For MAML & FOMAML, ref to "Model-Agnostic Meta-Learning for Fast Adaptation of Deep Networks";
For Reptile, ref to "On First-Order Meta-Learning Algorithms".
Refer to "https://lilianweng.github.io/posts/2018-11-30-meta-learning"
MAML's time and space complexity (i.e., GPU memory) is high, so we only update decoder in inner-loop (similar performance).
"""
def __init__(self,
env_params,
Expand Down Expand Up @@ -60,9 +61,6 @@ def __init__(self,
self.meta_optimizer = Optimizer(self.meta_model.parameters(), **self.optimizer_params['optimizer'])
self.alpha = self.meta_params['alpha'] # for reptile
self.task_set = generate_task_set(self.meta_params)
# assert self.trainer_params['meta_params']['epochs'] == math.ceil((self.trainer_params['epochs'] * self.trainer_params['train_episodes']) / (
# self.trainer_params['meta_params']['B'] * self.trainer_params['meta_params']['k'] *
# self.trainer_params['meta_params']['meta_batch_size'])), ">> meta-learning iteration does not match with POMO!"

# Restore
self.start_epoch = 1
Expand Down Expand Up @@ -139,7 +137,6 @@ def run(self):
self.logger.info(" *** Training Done *** ")
self.logger.info("Now, printing log array...")
util_print_log_array(self.logger, self.result_log)
print(val_res)

def _train_one_epoch(self, epoch):
"""
Expand All @@ -162,7 +159,12 @@ def _train_one_epoch(self, epoch):
task_model = copy.deepcopy(self.meta_model)
optimizer = Optimizer(task_model.parameters(), **self.optimizer_params['optimizer'])
elif self.meta_params['meta_method'] == 'maml':
fast_weight = OrderedDict(self.meta_model.named_parameters())
if self.model_params['meta_update_encoder']:
fast_weight = OrderedDict(self.meta_model.named_parameters())
else:
fast_weight = OrderedDict(self.meta_model.decoder.named_parameters())
for k in list(fast_weight.keys()):
fast_weight["decoder."+k] = fast_weight.pop(k)

for step in range(self.meta_params['k'] + 1):
# generate task-specific data
Expand Down Expand Up @@ -291,6 +293,7 @@ def _train_one_batch_maml(self, fast_weight, data, env):
return score_mean, loss_mean, fast_weight

def _fast_val(self, model, data=None, val_episodes=32, mode="eval"):

aug_factor = 1
if data is None:
val_path = "../../data/TSP/tsp150_uniform.pkl"
Expand All @@ -304,11 +307,11 @@ def _fast_val(self, model, data=None, val_episodes=32, mode="eval"):
env.load_problems(batch_size, problems=data, aug_factor=aug_factor)
reset_state, _, _ = env.reset()
model.pre_forward(reset_state)
state, reward, done = env.pre_step()
while not done:
selected, _ = model(state)
# shape: (batch, pomo)
state, reward, done = env.step(selected)
state, reward, done = env.pre_step()
while not done:
selected, _ = model(state)
# shape: (batch, pomo)
state, reward, done = env.step(selected)
elif mode in ["maml", "fomaml"]:
fast_weight = model
env.load_problems(batch_size, problems=data, aug_factor=aug_factor)
Expand All @@ -328,7 +331,11 @@ def _fast_val(self, model, data=None, val_episodes=32, mode="eval"):
state, reward, done = env.step(selected)
prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2)
# Loss
advantage = reward - reward.float().mean(dim=1, keepdims=True)
if self.meta_params['bootstrap_steps'] != 0:
bootstrap_reward = self._bootstrap(fast_weight, data)
advantage = reward - bootstrap_reward
else:
advantage = reward - reward.float().mean(dim=1, keepdims=True)
log_prob = prob_list.log().sum(dim=2) # for the first/last node, p=1 -> log_p=0
loss = -advantage * log_prob # Minus Sign: To Increase REWARD
loss_mean = loss.mean()
Expand All @@ -347,7 +354,48 @@ def _fast_val(self, model, data=None, val_episodes=32, mode="eval"):
else:
return loss_mean

def _bootstrap(self, fast_weight, data):
"""
Bootstrap using smaller lr;
Only support for MAML now.
"""
bootstrap_weight = fast_weight
batch_size = data.size(0)
bootstrap_reward = torch.full((batch_size, 1), float("-inf"))
with torch.enable_grad():
for L in range(self.meta_params['bootstrap_steps']):
env = Env(**{'problem_size': data.size(1), 'pomo_size': data.size(1)})
env.load_problems(batch_size, problems=data, aug_factor=1)
reset_state, _, _ = env.reset()
self.meta_model.pre_forward(reset_state, weights=bootstrap_weight)
prob_list = torch.zeros(size=(batch_size, env.pomo_size, 0))
state, reward, done = env.pre_step()
while not done:
selected, prob = self.meta_model(state, weights=bootstrap_weight)
state, reward, done = env.step(selected)
prob_list = torch.cat((prob_list, prob[:, :, None]), dim=2)

advantage = reward - reward.float().mean(dim=1, keepdims=True)
log_prob = prob_list.log().sum(dim=2)
loss = -advantage * log_prob
loss_mean = loss.mean()

gradients = torch.autograd.grad(loss_mean, bootstrap_weight.values(), create_graph=False)
bootstrap_weight = OrderedDict(
(name, param - self.optimizer_params['optimizer']['lr'] * grad)
for ((name, param), grad) in zip(bootstrap_weight.items(), gradients)
)

max_pomo_reward, _ = reward.max(dim=1)
max_pomo_reward = max_pomo_reward.view(-1, 1)
bootstrap_reward = torch.where(max_pomo_reward > bootstrap_reward, max_pomo_reward, bootstrap_reward)
score_mean, bootstrap_mean = -max_pomo_reward.float().mean(), -bootstrap_reward.float().mean()
print("Bootstrap step {}: score_mean {}, bootstrap_mean {}".format(L, score_mean, bootstrap_mean))

return bootstrap_reward

def _get_data(self, batch_size, task_params):

if self.meta_params['data_type'] == 'distribution':
assert len(task_params) == 2
data = get_random_problems(batch_size, self.env_params['problem_size'], num_modes=task_params[0], cdist=task_params[-1], distribution='gaussian_mixture')
Expand All @@ -363,4 +411,7 @@ def _get_data(self, batch_size, task_params):
return data

def _alpha_scheduler(self, iter):
"""
Update param for Reptile.
"""
self.alpha = max(self.alpha * self.meta_params['alpha_decay'], 0.0001)
7 changes: 3 additions & 4 deletions POMO/TSP/TSPTrainer_pomo.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from TSPModel import TSPModel as Model

from torch.optim import Adam as Optimizer
# from torch.optim import SGD as Optimizer
from torch.optim.lr_scheduler import MultiStepLR as Scheduler
from TSProblemDef import get_random_problems, generate_task_set

Expand Down Expand Up @@ -54,9 +55,6 @@ def __init__(self,
self.meta_model = Model(**self.model_params)
self.optimizer = Optimizer(self.meta_model.parameters(), **self.optimizer_params['optimizer'])
self.task_set = generate_task_set(self.meta_params)
# assert self.trainer_params['meta_params']['epochs'] == math.ceil((self.trainer_params['epochs'] * self.trainer_params['train_episodes']) / (
# self.trainer_params['meta_params']['B'] * self.trainer_params['meta_params']['k'] *
# self.trainer_params['meta_params']['meta_batch_size'])), ">> meta-learning iteration does not match with POMO!"

# Restore
self.start_epoch = 1
Expand Down Expand Up @@ -130,7 +128,6 @@ def run(self):
self.logger.info(" *** Training Done *** ")
self.logger.info("Now, printing log array...")
util_print_log_array(self.logger, self.result_log)
print(val_res)

def _train_one_epoch(self, epoch):
"""
Expand All @@ -149,6 +146,7 @@ def _train_one_epoch(self, epoch):

for step in range(self.meta_params['k']):
# generate task-specific data
# task_params = random.sample(self.task_set, 1)[0]
if self.meta_params['data_type'] == 'distribution':
assert len(task_params) == 2
data = get_random_problems(batch_size, self.env_params['problem_size'], num_modes=task_params[0], cdist=task_params[-1], distribution='gaussian_mixture')
Expand Down Expand Up @@ -205,6 +203,7 @@ def _train_one_batch(self, data, env):
# Score
max_pomo_reward, _ = reward.max(dim=1) # get best results from pomo
score_mean = -max_pomo_reward.float().mean() # negative sign to make positive value
print(score_mean)

return score_mean, loss_mean

Expand Down
20 changes: 10 additions & 10 deletions POMO/TSP/test_n100.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
# parameters

env_params = {
'problem_size': 50,
'pomo_size': 50,
'problem_size': 100,
'pomo_size': 100,
}

model_params = {
Expand All @@ -35,22 +35,23 @@
'cuda_device_num': CUDA_DEVICE_NUM,
'seed': 2023,
'model_load': {
'path': '../../pretrained/tsp50_exp1/pomo_k', # directory path of pre-trained model and log files saved.
'epoch': 52084, # epoch version of pre-trained model to laod.
'path': '../../pretrained/var_size_exp1/pomo_adam', # directory path of pre-trained model and log files saved.
'epoch': 78125, # epoch version of pre-trained model to laod.
},
'test_episodes': 10000,
'test_episodes': 7000,
'test_batch_size': 10000,
'augmentation_enable': True,
'test_robustness': False,
'aug_factor': 8,
'aug_batch_size': 100,
'test_set_path': '../../data/TSP/tsp50_cluster.pkl',
'test_set_opt_sol_path': '../../data/TSP/gurobi/tsp50_cluster.pkl',
'aug_batch_size': 50,
'test_set_path': './adv_tsp100_uniform.pkl', # '../../data/TSP/tsp100_uniform.pkl'
'test_set_opt_sol_path': './sol_adv_tsp100_uniform.pkl', # '../../data/TSP/gurobi/tsp100_uniform.pkl'
'fine_tune_params': {
'enable': True, # evaluate few-shot generalization
'fine_tune_episodes': 3000, # how many data used to fine-tune the pretrained model
'k': 50, # gradient decent steps in the inner-loop optimization of meta-learning method
'fine_tune_batch_size': 64, # the batch size of the inner-loop optimization
'fine_tune_set_path': '../../data/TSP/tsp50_cluster.pkl',
'fine_tune_set_path': './adv_tsp100_uniform.pkl',
'augmentation_enable': False,
'optimizer': {
'lr': 1e-4 * 0.1,
Expand Down Expand Up @@ -101,5 +102,4 @@ def _print_config():

if __name__ == "__main__":
# TODO: 1. why not use test dataset to fine-tune the model?
# 2. the implementation of our method
main()
Loading

0 comments on commit b136d7b

Please sign in to comment.