From 4a7e5fb43ba8d606d5ef0f5a2a9ab5b45cc903f2 Mon Sep 17 00:00:00 2001 From: irisliucy Date: Sun, 28 Jun 2020 20:54:34 -0700 Subject: [PATCH 1/4] Add vanilla gru --- .mdlrc | 1 - src/garage/torch/modules/__init__.py | 3 ++ src/garage/torch/modules/gru_module.py | 42 +++++++++++++++++++ tests/garage/torch/modules/test_gru_module.py | 25 +++++++++++ 4 files changed, 70 insertions(+), 1 deletion(-) delete mode 100644 .mdlrc create mode 100644 src/garage/torch/modules/gru_module.py create mode 100644 tests/garage/torch/modules/test_gru_module.py diff --git a/.mdlrc b/.mdlrc deleted file mode 100644 index 2a622e8dab..0000000000 --- a/.mdlrc +++ /dev/null @@ -1 +0,0 @@ -tags "tables", "~MD013" diff --git a/src/garage/torch/modules/__init__.py b/src/garage/torch/modules/__init__.py index 1e07d6b04a..9d3f38ac46 100644 --- a/src/garage/torch/modules/__init__.py +++ b/src/garage/torch/modules/__init__.py @@ -1,3 +1,4 @@ +<<<<<<< HEAD """PyTorch Modules.""" # yapf: disable # isort:skip_file @@ -8,6 +9,7 @@ from garage.torch.modules.gaussian_mlp_module import ( GaussianMLPTwoHeadedModule) # noqa: E501 from garage.torch.modules.gaussian_mlp_module import GaussianMLPModule +from garage.torch.modules.gru_module import GRUModule from garage.torch.modules.mlp_module import MLPModule from garage.torch.modules.multi_headed_mlp_module import MultiHeadedMLPModule # DiscreteCNNModule must go after MLPModule @@ -23,4 +25,5 @@ 'GaussianMLPModule', 'GaussianMLPIndependentStdModule', 'GaussianMLPTwoHeadedModule', + 'GRUModule', ] diff --git a/src/garage/torch/modules/gru_module.py b/src/garage/torch/modules/gru_module.py new file mode 100644 index 0000000000..024ab5ba91 --- /dev/null +++ b/src/garage/torch/modules/gru_module.py @@ -0,0 +1,42 @@ +"""GRU in Pytorch.""" +import torch +from torch import nn +from torch.autograd import Variable + + +class GRUModule(nn.Module): + + def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, + bias=True): + super().__init__() + # Hidden dimensions + self._hidden_dim = hidden_dim + # Number of hidden layers + self._layer_dim = layer_dim + self._gru_cell = nn.GRUCell(input_dim, hidden_dim) + # self.gru_cell = GRUCell(input_dim, hidden_dim, layer_dim) + self._fc = nn.Linear(hidden_dim, output_dim) + + def forward(self, x): + + # Initialize hidden state with zeros + #print(x.shape,"x.shape")100, 28, 28 + if torch.cuda.is_available(): + h0 = Variable( + torch.zeros(self._layer_dim, x.size(0), + self._hidden_dim).cuda()) + else: + h0 = Variable( + torch.zeros(self._layer_dim, x.size(0), self._hidden_dim)) + + outs = [] + hn = h0[0, :, :] + + for seq in range(x.size(1)): + hn = self._gru_cell(x[:, seq, :], hn) + outs.append(hn) + out = outs[-1].squeeze() + out = self._fc(out) + # out.size() --> 100, 10 + # return out + return outs, out, hn, h0 diff --git a/tests/garage/torch/modules/test_gru_module.py b/tests/garage/torch/modules/test_gru_module.py new file mode 100644 index 0000000000..e1d457372e --- /dev/null +++ b/tests/garage/torch/modules/test_gru_module.py @@ -0,0 +1,25 @@ +"""Test GRUModule.""" +import torch + +from garage.torch.modules import GRUModule + + +class TestGRUModule: + """Test GRUModule.""" + + def setup_method(self): + self.input_dim = 28 + self.hidden_dim = 128 + self.layer_dim = 1 # ONLY CHANGE IS HERE FROM ONE LAYER TO TWO LAYER + self.output_dim = 10 + + self.batch_size = 100 + self.input = torch.zeros( + (self.batch_size, self.input_dim, self.input_dim)) + + def test_output_values(self): + model = GRUModule(self.input_dim, self.hidden_dim, self.layer_dim, + self.output_dim) + + output = model(self.input) # read step output + assert output[1].size() == (self.batch_size, self.output_dim) From d2b79e871935dd4dac69ff3f73a95e3f6bd2be39 Mon Sep 17 00:00:00 2001 From: irisliucy Date: Mon, 29 Jun 2020 13:10:10 -0700 Subject: [PATCH 2/4] Add gaussian gru policy --- src/garage/torch/modules/__init__.py | 3 + .../torch/modules/gaussian_gru_module.py | 195 ++++++++++++++++++ src/garage/torch/modules/gru_module.py | 13 +- src/garage/torch/policies/__init__.py | 2 + .../torch/policies/gaussian_gru_policy.py | 63 ++++++ tests/garage/torch/modules/test_gru_module.py | 23 ++- 6 files changed, 294 insertions(+), 5 deletions(-) create mode 100644 src/garage/torch/modules/gaussian_gru_module.py create mode 100644 src/garage/torch/policies/gaussian_gru_policy.py diff --git a/src/garage/torch/modules/__init__.py b/src/garage/torch/modules/__init__.py index 9d3f38ac46..2e298ed91d 100644 --- a/src/garage/torch/modules/__init__.py +++ b/src/garage/torch/modules/__init__.py @@ -1,9 +1,11 @@ <<<<<<< HEAD +<<<<<<< HEAD """PyTorch Modules.""" # yapf: disable # isort:skip_file from garage.torch.modules.categorical_cnn_module import CategoricalCNNModule from garage.torch.modules.cnn_module import CNNModule +from garage.torch.modules.gaussian_gru_module import GaussianGRUModule from garage.torch.modules.gaussian_mlp_module import ( GaussianMLPIndependentStdModule) # noqa: E501 from garage.torch.modules.gaussian_mlp_module import ( @@ -25,5 +27,6 @@ 'GaussianMLPModule', 'GaussianMLPIndependentStdModule', 'GaussianMLPTwoHeadedModule', + 'GaussianGRUModule', 'GRUModule', ] diff --git a/src/garage/torch/modules/gaussian_gru_module.py b/src/garage/torch/modules/gaussian_gru_module.py new file mode 100644 index 0000000000..a80a11a595 --- /dev/null +++ b/src/garage/torch/modules/gaussian_gru_module.py @@ -0,0 +1,195 @@ +"""Gaussian GRU Module.""" +import abc + +import torch +from torch import nn +from torch.distributions import Normal +from torch.distributions.independent import Independent + +from garage.torch.distributions import TanhNormal +from garage.torch.modules import GRUModule + + +class GaussianGRUBaseModule(nn.Module): + """Gaussian GRU Module. + + A model represented by a Gaussian distribution + which is parameterized by a Gated Recurrent Unit (GRU). + """ + + def __init__( + self, + input_dim, + output_dim, + hidden_dim=(32, 32), + hidden_nonlinearity=torch.tanh, + hidden_w_init=nn.init.xavier_uniform_, + hidden_b_init=nn.init.zeros_, + recurrent_nonlinearity=torch.sigmoid, + recurrent_w_init=nn.init.xavier_uniform_, + output_nonlinearity=None, + output_w_init=nn.init.xavier_uniform_, + output_b_init=nn.init.zeros_, + # hidden_state_init=nn.init.zeros_, + # hidden_state_init_trainable=False, + learn_std=True, + init_std=1.0, + # min_std=1e-6, + # max_std=None, + # std_share_network=False, + std_parameterization='exp', + layer_normalization=False, + normal_distribution_cls=Normal): + super().__init__() + self._input_dim = input_dim + self._output_dim = output_dim + self._hidden_dim = hidden_dim + self._hidden_nonlinearity = hidden_nonlinearity + self._hidden_w_init = hidden_w_init + self._hidden_b_init = hidden_b_init + self._recurrent_nonlinearity = recurrent_nonlinearity + self._recurrent_w_init = recurrent_w_init + self._output_nonlinearity = output_nonlinearity + self._output_w_init = output_w_init + self._output_b_init = output_b_init + # self._hidden_state_init = hidden_state_init + # self._hidden_state_init_trainable = hidden_state_init_trainable + self._learn_std = learn_std + # self._min_std = min_std + # self._max_std = max_std + # self._std_share_network = std_share_network + self._std_parameterization = std_parameterization, + self._layer_normalization = layer_normalization + + if self._std_parameterization not in ('exp', 'softplus'): + raise NotImplementedError + init_std_param = torch.Tensor([init_std]).log() + if self._learn_std: + self._init_std = torch.nn.Parameter(init_std_param) + else: + self._init_std = init_std_param + self.register_buffer('init_std', self._init_std) + + def to(self, *args, **kwargs): + """Move the module to the specified device. + + Args: + *args: args to pytorch to function. + **kwargs: keyword args to pytorch to function. + + """ + super().to(*args, **kwargs) + buffers = dict(self.named_buffers()) + if not isinstance(self._init_std, torch.nn.Parameter): + self._init_std = buffers['init_std'] + + @abc.abstractmethod + def _get_mean_and_log_std(self, *inputs): + pass + + def forward(self): + """Forward method. + + Args: + *inputs: Input to the module. + + Returns: + torch.distributions.independent.Independent: Independent + distribution. + + """ + (mean, step_mean, log_std, step_log_std, step_hidden, + hidden_init) = self._get_mean_and_log_std(*inputs) + + if self._std_parameterization == 'exp': + std = log_std_var.exp() + else: + std = log_std_var.exp().exp().add(1.).log() + dist = self._norm_dist_class(mean, std) + if not isinstance(dist, TanhNormal): + # Makes it so that a sample from the distribution is treated as a + # single sample and not dist.batch_shape samples. + dist = Independent(dist, 1) + + # return dist + return (dist, step_mean, step_log_std, step_hidden, hidden_init) + + +class GaussianGRUModule(GaussianGRUBaseModule): + """GaussianMLPModule that mean and std share the same network. + + """ + + def __init__( + self, + input_dim, + output_dim, + hidden_dim=(32, 32), + hidden_nonlinearity=torch.tanh, + hidden_w_init=nn.init.xavier_uniform_, + hidden_b_init=nn.init.zeros_, + recurrent_nonlinearity=torch.sigmoid, + recurrent_w_init=nn.init.xavier_uniform_, + output_nonlinearity=None, + output_w_init=nn.init.xavier_uniform_, + output_b_init=nn.init.zeros_, + # hidden_state_init=nn.init.zeros_, + learn_std=True, + init_std=1.0, + std_parameterization='exp', + layer_normalization=False, + normal_distribution_cls=Normal): + super().__init__( + input_dim=input_dim, + output_dim=output_dim, + hidden_dim=hidden_dim, + hidden_nonlinearity=hidden_nonlinearity, + hidden_w_init=hidden_w_init, + hidden_b_init=hidden_b_init, + output_nonlinearity=output_nonlinearity, + output_w_init=output_w_init, + output_b_init=output_b_init, + learn_std=learn_std, + init_std=init_std, + # min_std=min_std, + # max_std=max_std, + std_parameterization=std_parameterization, + layer_normalization=layer_normalization, + normal_distribution_cls=normal_distribution_cls) + + self._mean_gru_module = GRUModule(input_dim=self._input_dim, + output_dim=self._output_dim, + hidden_dim=self._hidden_dim, + layer_dim=1) + + def _get_mean_and_log_std(self, *inputs): + """Get mean and std of Gaussian distribution given inputs. + + Args: + *inputs: Input to the module. + + Returns: + torch.Tensor: The mean of Gaussian distribution. + torch.Tensor: The variance of Gaussian distribution. + + """ + assert len(inputs) == 1 + (mean_outputs, step_mean_outputs, step_hidden, + hidden_init_var) = self._mean_gru_module(*inputs) + + broadcast_shape = list(inputs.shape[:-1]) + [self._input_dim] + uncentered_log_std = torch.zeros(*broadcast_shape) + self._init_std + + step_broadcast_shape = list(inputs[0].shape[:-1]) + [self._input_dim] + uncentered_step_log_std = torch.zeros( + *broadcast_shape) + self._init_std + + return (mean_outputs, step_mean_outputs, uncentered_log_std, + uncentered_step_log_std, step_hidden, hidden_init_var) + + # mean = self._mean_gru_module(*inputs) + + # broadcast_shape = list(inputs[0].shape[:-1]) + [self._action_dim] + # uncentered_log_std = torch.zeros(*broadcast_shape) + self._init_std + + # return mean, uncentered_log_std diff --git a/src/garage/torch/modules/gru_module.py b/src/garage/torch/modules/gru_module.py index 024ab5ba91..353e026b90 100644 --- a/src/garage/torch/modules/gru_module.py +++ b/src/garage/torch/modules/gru_module.py @@ -6,10 +6,15 @@ class GRUModule(nn.Module): - def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, - bias=True): + def __init__( + self, + input_dim, + hidden_dim, + # hidden_nonlinearity, + layer_dim, + output_dim, + bias=True): super().__init__() - # Hidden dimensions self._hidden_dim = hidden_dim # Number of hidden layers self._layer_dim = layer_dim @@ -37,6 +42,8 @@ def forward(self, x): outs.append(hn) out = outs[-1].squeeze() out = self._fc(out) + outs = torch.stack(outs) # convert list of tensors to tensor + outs = self._fc(outs) # out.size() --> 100, 10 # return out return outs, out, hn, h0 diff --git a/src/garage/torch/policies/__init__.py b/src/garage/torch/policies/__init__.py index a6c1ffb609..24d2312f59 100644 --- a/src/garage/torch/policies/__init__.py +++ b/src/garage/torch/policies/__init__.py @@ -6,6 +6,7 @@ DeterministicMLPPolicy) from garage.torch.policies.discrete_qf_argmax_policy import ( DiscreteQFArgmaxPolicy) +from garage.torch.policies.gaussian_gru_policy import GaussianGRUPolicy from garage.torch.policies.gaussian_mlp_policy import GaussianMLPPolicy from garage.torch.policies.policy import Policy from garage.torch.policies.tanh_gaussian_mlp_policy import ( @@ -15,6 +16,7 @@ 'CategoricalCNNPolicy', 'DeterministicMLPPolicy', 'DiscreteQFArgmaxPolicy', + 'GaussianGRUPolicy', 'GaussianMLPPolicy', 'Policy', 'TanhGaussianMLPPolicy', diff --git a/src/garage/torch/policies/gaussian_gru_policy.py b/src/garage/torch/policies/gaussian_gru_policy.py new file mode 100644 index 0000000000..53a07fb1ba --- /dev/null +++ b/src/garage/torch/policies/gaussian_gru_policy.py @@ -0,0 +1,63 @@ +""""Gaussian GRU Policy.""" +import torch +from torch import nn + +from garage.torch.modules import GaussianGRUModule +from garage.torch.policies.stochastic_policy import StochasticPolicy + + +class GaussianGRUPolicy(StochasticPolicy): + """Gaussian GRU Policy. + """ + + def __init__( + self, + env_spec, + hidden_sizes=(32, 32), + hidden_nonlinearity=torch.tanh, + hidden_w_init=nn.init.xavier_uniform_, + hidden_b_init=nn.init.zeros_, + output_nonlinearity=None, + output_w_init=nn.init.xavier_uniform_, + output_b_init=nn.init.zeros_, + learn_std=True, + init_std=1.0, + # min_std=1e-6, + # max_std=None, + std_parameterization='exp', + layer_normalization=False, + name='GaussianGRUPolicy'): + super().__init__(env_spec, name) + self._obs_dim = env_spec.observation_space.flat_dim + self._action_dim = env_spec.action_space.flat_dim + self._module = GaussianGRUModule( + input_dim=self._obs_dim, + output_dim=self._action_dim, + hidden_sizes=hidden_sizes, + hidden_nonlinearity=hidden_nonlinearity, + hidden_w_init=hidden_w_init, + hidden_b_init=hidden_b_init, + output_nonlinearity=output_nonlinearity, + output_w_init=output_w_init, + output_b_init=output_b_init, + learn_std=learn_std, + init_std=init_std, + # min_std=min_std, + # max_std=max_std, + std_parameterization=std_parameterization, + layer_normalization=layer_normalization) + + def forward(self, observations): + """Compute the action distributions from the observations. + + Args: + observations (torch.Tensor): Batch of observations on default + torch device. + + Returns: + torch.distributions.Distribution: Batch distribution of actions. + dict[str, torch.Tensor]: Additional agent_info, as torch Tensors + + """ + dist = self._module(observations) + return (dist, dict(mean=dist.mean, log_std=(dist.variance**.5).log())) diff --git a/tests/garage/torch/modules/test_gru_module.py b/tests/garage/torch/modules/test_gru_module.py index e1d457372e..ee0ea22885 100644 --- a/tests/garage/torch/modules/test_gru_module.py +++ b/tests/garage/torch/modules/test_gru_module.py @@ -1,4 +1,7 @@ """Test GRUModule.""" +import pickle +import numpy as np + import torch from garage.torch.modules import GRUModule @@ -21,5 +24,21 @@ def test_output_values(self): model = GRUModule(self.input_dim, self.hidden_dim, self.layer_dim, self.output_dim) - output = model(self.input) # read step output - assert output[1].size() == (self.batch_size, self.output_dim) + outputs, output, _, _ = model(self.input) # read step output + assert output.size() == (self.batch_size, self.output_dim + ) # (batch_size, output_dim) + assert outputs.shape == (self.input_dim, self.batch_size, + self.output_dim + ) # (input_dim, batch_size, output_dim) + + def test_is_pickleable(self): + model = GRUModule(self.input_dim, self.hidden_dim, self.layer_dim, + self.output_dim) + outputs1, output1, _, _ = model(self.input) + + h = pickle.dumps(model) + model_pickled = pickle.loads(h) + outputs2, output2, _, _ = model_pickled(self.input) + + assert np.array_equal(torch.all(torch.eq(outputs1, outputs2)), True) + assert np.array_equal(torch.all(torch.eq(output1, output2)), True) From c1d9a175ec7376c19e58fe4d6567f2cc4dfd2254 Mon Sep 17 00:00:00 2001 From: irisliucy Date: Tue, 30 Jun 2020 08:24:28 -0700 Subject: [PATCH 3/4] Add PPO & TRPO exampls --- examples/torch/ppo_pendulum_gru.py | 55 +++++++++++++++++++ examples/torch/trpo_pendulum.py | 2 +- examples/torch/trpo_pendulum_gru.py | 55 +++++++++++++++++++ .../torch/modules/gaussian_gru_module.py | 19 +++---- src/garage/torch/modules/gru_module.py | 13 +++-- .../torch/policies/gaussian_gru_policy.py | 12 ++-- .../torch/policies/gaussian_mlp_policy.py | 1 + 7 files changed, 136 insertions(+), 21 deletions(-) create mode 100755 examples/torch/ppo_pendulum_gru.py create mode 100755 examples/torch/trpo_pendulum_gru.py diff --git a/examples/torch/ppo_pendulum_gru.py b/examples/torch/ppo_pendulum_gru.py new file mode 100755 index 0000000000..2a7e5416c7 --- /dev/null +++ b/examples/torch/ppo_pendulum_gru.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +"""This is an example to train a task with PPO algorithm (PyTorch). + +Here it runs InvertedDoublePendulum-v2 environment with 100 iterations. +""" +import torch + +from garage import wrap_experiment +from garage.envs import GarageEnv +from garage.experiment import LocalRunner +from garage.experiment.deterministic import set_seed +from garage.torch.algos import PPO +from garage.torch.policies import GaussianMLPPolicy, GaussianGRUPolicy +from garage.torch.value_functions import GaussianMLPValueFunction + + +@wrap_experiment +def ppo_pendulum(ctxt=None, seed=1): + """Train PPO with InvertedDoublePendulum-v2 environment. + + Args: + ctxt (garage.experiment.ExperimentContext): The experiment + configuration used by LocalRunner to create the snapshotter. + seed (int): Used to seed the random number generator to produce + determinism. + + """ + set_seed(seed) + env = GarageEnv(env_name='InvertedDoublePendulum-v2') + + runner = LocalRunner(ctxt) + + policy = GaussianGRUPolicy( + env.spec, + hidden_dim=64, # [64, 64] + hidden_nonlinearity=torch.tanh, + output_nonlinearity=None) + + value_function = GaussianMLPValueFunction(env_spec=env.spec, + hidden_sizes=(32, 32), + hidden_nonlinearity=torch.tanh, + output_nonlinearity=None) + + algo = PPO(env_spec=env.spec, + policy=policy, + value_function=value_function, + max_path_length=100, + discount=0.99, + center_adv=False) + + runner.setup(algo, env) + runner.train(n_epochs=100, batch_size=10000) + + +ppo_pendulum(seed=1) diff --git a/examples/torch/trpo_pendulum.py b/examples/torch/trpo_pendulum.py index 4ea973e175..3cbdbb138c 100755 --- a/examples/torch/trpo_pendulum.py +++ b/examples/torch/trpo_pendulum.py @@ -9,7 +9,7 @@ from garage.envs import GymEnv from garage.experiment.deterministic import set_seed from garage.torch.algos import TRPO -from garage.torch.policies import GaussianMLPPolicy +from garage.torch.policies import GaussianMLPPolicy, GaussianGRUPolicy from garage.torch.value_functions import GaussianMLPValueFunction from garage.trainer import Trainer diff --git a/examples/torch/trpo_pendulum_gru.py b/examples/torch/trpo_pendulum_gru.py new file mode 100755 index 0000000000..6691345d69 --- /dev/null +++ b/examples/torch/trpo_pendulum_gru.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +"""This is an example to train a task with TRPO algorithm (PyTorch). + +Here it runs InvertedDoublePendulum-v2 environment with 100 iterations. +""" +import torch + +from garage import wrap_experiment +from garage.envs import GarageEnv +from garage.experiment import LocalRunner +from garage.experiment.deterministic import set_seed +from garage.torch.algos import TRPO +from garage.torch.policies import GaussianMLPPolicy, GaussianGRUPolicy +from garage.torch.value_functions import GaussianMLPValueFunction + + +@wrap_experiment +def trpo_pendulum_gru(ctxt=None, seed=1): + """Train TRPO with InvertedDoublePendulum-v2 environment. + + Args: + ctxt (garage.experiment.ExperimentContext): The experiment + configuration used by LocalRunner to create the snapshotter. + seed (int): Used to seed the random number generator to produce + determinism. + + """ + set_seed(seed) + env = GarageEnv(env_name='InvertedDoublePendulum-v2') + + runner = LocalRunner(ctxt) + + policy = GaussianGRUPolicy(env.spec, + hidden_dim=32, # [32, 32] + hidden_nonlinearity=torch.tanh, + output_nonlinearity=None) + + + value_function = GaussianMLPValueFunction(env_spec=env.spec, + hidden_sizes=(32, 32), + hidden_nonlinearity=torch.tanh, + output_nonlinearity=None) + + algo = TRPO(env_spec=env.spec, + policy=policy, + value_function=value_function, + max_path_length=100, + discount=0.99, + center_adv=False) + + runner.setup(algo, env) + runner.train(n_epochs=100, batch_size=1024) + + +trpo_pendulum_gru(seed=1) diff --git a/src/garage/torch/modules/gaussian_gru_module.py b/src/garage/torch/modules/gaussian_gru_module.py index a80a11a595..b07904a253 100644 --- a/src/garage/torch/modules/gaussian_gru_module.py +++ b/src/garage/torch/modules/gaussian_gru_module.py @@ -7,7 +7,7 @@ from torch.distributions.independent import Independent from garage.torch.distributions import TanhNormal -from garage.torch.modules import GRUModule +from garage.torch.modules.gru_module import GRUModule class GaussianGRUBaseModule(nn.Module): @@ -60,9 +60,10 @@ def __init__( # self._std_share_network = std_share_network self._std_parameterization = std_parameterization, self._layer_normalization = layer_normalization + self._norm_dist_class = normal_distribution_cls - if self._std_parameterization not in ('exp', 'softplus'): - raise NotImplementedError + # if self._std_parameterization not in ('exp', 'softplus'): + # raise NotImplementedError init_std_param = torch.Tensor([init_std]).log() if self._learn_std: self._init_std = torch.nn.Parameter(init_std_param) @@ -87,7 +88,7 @@ def to(self, *args, **kwargs): def _get_mean_and_log_std(self, *inputs): pass - def forward(self): + def forward(self, *inputs): """Forward method. Args: @@ -102,9 +103,9 @@ def forward(self): hidden_init) = self._get_mean_and_log_std(*inputs) if self._std_parameterization == 'exp': - std = log_std_var.exp() + std = log_std.exp() else: - std = log_std_var.exp().exp().add(1.).log() + std = log_std.exp().exp().add(1.).log() dist = self._norm_dist_class(mean, std) if not isinstance(dist, TanhNormal): # Makes it so that a sample from the distribution is treated as a @@ -177,12 +178,10 @@ def _get_mean_and_log_std(self, *inputs): (mean_outputs, step_mean_outputs, step_hidden, hidden_init_var) = self._mean_gru_module(*inputs) - broadcast_shape = list(inputs.shape[:-1]) + [self._input_dim] - uncentered_log_std = torch.zeros(*broadcast_shape) + self._init_std + uncentered_log_std = torch.zeros(inputs[0].size(-1)) + self._init_std - step_broadcast_shape = list(inputs[0].shape[:-1]) + [self._input_dim] uncentered_step_log_std = torch.zeros( - *broadcast_shape) + self._init_std + inputs[0].size(-1)) + self._init_std return (mean_outputs, step_mean_outputs, uncentered_log_std, uncentered_step_log_std, step_hidden, hidden_init_var) diff --git a/src/garage/torch/modules/gru_module.py b/src/garage/torch/modules/gru_module.py index 353e026b90..92d0f8cac7 100644 --- a/src/garage/torch/modules/gru_module.py +++ b/src/garage/torch/modules/gru_module.py @@ -22,23 +22,24 @@ def __init__( # self.gru_cell = GRUCell(input_dim, hidden_dim, layer_dim) self._fc = nn.Linear(hidden_dim, output_dim) - def forward(self, x): + def forward(self, *input): + # input = input[0] + input = Variable(input[0].view(-1, input[0].size(0), input[0].size(1))) # Initialize hidden state with zeros - #print(x.shape,"x.shape")100, 28, 28 if torch.cuda.is_available(): h0 = Variable( - torch.zeros(self._layer_dim, x.size(0), + torch.zeros(self._layer_dim, input.size(0), self._hidden_dim).cuda()) else: h0 = Variable( - torch.zeros(self._layer_dim, x.size(0), self._hidden_dim)) + torch.zeros(self._layer_dim, input.size(0), self._hidden_dim)) outs = [] hn = h0[0, :, :] - for seq in range(x.size(1)): - hn = self._gru_cell(x[:, seq, :], hn) + for seq in range(input.size(1)): + hn = self._gru_cell(input[:, seq, :], hn) outs.append(hn) out = outs[-1].squeeze() out = self._fc(out) diff --git a/src/garage/torch/policies/gaussian_gru_policy.py b/src/garage/torch/policies/gaussian_gru_policy.py index 53a07fb1ba..49e448ee91 100644 --- a/src/garage/torch/policies/gaussian_gru_policy.py +++ b/src/garage/torch/policies/gaussian_gru_policy.py @@ -1,6 +1,7 @@ """"Gaussian GRU Policy.""" import torch from torch import nn +from torch.distributions.independent import Independent from garage.torch.modules import GaussianGRUModule from garage.torch.policies.stochastic_policy import StochasticPolicy @@ -13,7 +14,7 @@ class GaussianGRUPolicy(StochasticPolicy): def __init__( self, env_spec, - hidden_sizes=(32, 32), + hidden_dim=(32, 32), hidden_nonlinearity=torch.tanh, hidden_w_init=nn.init.xavier_uniform_, hidden_b_init=nn.init.zeros_, @@ -33,7 +34,7 @@ def __init__( self._module = GaussianGRUModule( input_dim=self._obs_dim, output_dim=self._action_dim, - hidden_sizes=hidden_sizes, + hidden_dim=hidden_dim, hidden_nonlinearity=hidden_nonlinearity, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, @@ -42,8 +43,6 @@ def __init__( output_b_init=output_b_init, learn_std=learn_std, init_std=init_std, - # min_std=min_std, - # max_std=max_std, std_parameterization=std_parameterization, layer_normalization=layer_normalization) @@ -59,5 +58,10 @@ def forward(self, observations): dict[str, torch.Tensor]: Additional agent_info, as torch Tensors """ + dist = self._module(observations) + dist = dist[0] + assert isinstance(dist, torch.distributions.independent.Independent) + assert dist.event_shape == torch.Size((1, )) + assert dist.batch_shape == torch.Size((1, )) return (dist, dict(mean=dist.mean, log_std=(dist.variance**.5).log())) diff --git a/src/garage/torch/policies/gaussian_mlp_policy.py b/src/garage/torch/policies/gaussian_mlp_policy.py index fbfb6179f2..a26f32d63d 100644 --- a/src/garage/torch/policies/gaussian_mlp_policy.py +++ b/src/garage/torch/policies/gaussian_mlp_policy.py @@ -99,4 +99,5 @@ def forward(self, observations): """ dist = self._module(observations) + assert isinstance(dist, torch.distributions.independent.Independent) return (dist, dict(mean=dist.mean, log_std=(dist.variance**.5).log())) From 23a9f8da2a2dc65137301c6397e808e694611633 Mon Sep 17 00:00:00 2001 From: irisliucy Date: Thu, 5 Nov 2020 09:44:47 -0800 Subject: [PATCH 4/4] Add torch RNN support --- examples/torch/ppo_pendulum_gru.py | 23 ++++---- examples/torch/trpo_pendulum.py | 2 +- examples/torch/trpo_pendulum_gru.py | 16 +++--- src/garage/torch/modules/__init__.py | 2 - .../torch/modules/gaussian_gru_module.py | 54 ++++++++++--------- src/garage/torch/modules/gru_module.py | 5 +- .../torch/policies/gaussian_gru_policy.py | 16 +++--- tests/garage/torch/modules/test_gru_module.py | 6 +-- 8 files changed, 63 insertions(+), 61 deletions(-) diff --git a/examples/torch/ppo_pendulum_gru.py b/examples/torch/ppo_pendulum_gru.py index 2a7e5416c7..22ded6c781 100755 --- a/examples/torch/ppo_pendulum_gru.py +++ b/examples/torch/ppo_pendulum_gru.py @@ -6,33 +6,33 @@ import torch from garage import wrap_experiment -from garage.envs import GarageEnv -from garage.experiment import LocalRunner +from garage.envs import GymEnv from garage.experiment.deterministic import set_seed from garage.torch.algos import PPO from garage.torch.policies import GaussianMLPPolicy, GaussianGRUPolicy from garage.torch.value_functions import GaussianMLPValueFunction +from garage.trainer import Trainer @wrap_experiment -def ppo_pendulum(ctxt=None, seed=1): - """Train PPO with InvertedDoublePendulum-v2 environment. +def ppo_pendulum_gru(ctxt=None, seed=1): + """Train PPO with InvertedDoublePendulum-v2 environment with GRU. Args: ctxt (garage.experiment.ExperimentContext): The experiment - configuration used by LocalRunner to create the snapshotter. + configuration used by Trainer to create the snapshotter. seed (int): Used to seed the random number generator to produce determinism. """ set_seed(seed) - env = GarageEnv(env_name='InvertedDoublePendulum-v2') + env = GymEnv('InvertedDoublePendulum-v2') - runner = LocalRunner(ctxt) + trainer = Trainer(ctxt) policy = GaussianGRUPolicy( env.spec, - hidden_dim=64, # [64, 64] + hidden_dim= 64,#(64,64), # [64, 64] hidden_nonlinearity=torch.tanh, output_nonlinearity=None) @@ -44,12 +44,11 @@ def ppo_pendulum(ctxt=None, seed=1): algo = PPO(env_spec=env.spec, policy=policy, value_function=value_function, - max_path_length=100, discount=0.99, center_adv=False) - runner.setup(algo, env) - runner.train(n_epochs=100, batch_size=10000) + trainer.setup(algo, env) + trainer.train(n_epochs=100, batch_size=10000) -ppo_pendulum(seed=1) +ppo_pendulum_gru(seed=1) diff --git a/examples/torch/trpo_pendulum.py b/examples/torch/trpo_pendulum.py index 3cbdbb138c..4ea973e175 100755 --- a/examples/torch/trpo_pendulum.py +++ b/examples/torch/trpo_pendulum.py @@ -9,7 +9,7 @@ from garage.envs import GymEnv from garage.experiment.deterministic import set_seed from garage.torch.algos import TRPO -from garage.torch.policies import GaussianMLPPolicy, GaussianGRUPolicy +from garage.torch.policies import GaussianMLPPolicy from garage.torch.value_functions import GaussianMLPValueFunction from garage.trainer import Trainer diff --git a/examples/torch/trpo_pendulum_gru.py b/examples/torch/trpo_pendulum_gru.py index 6691345d69..f1d96a92aa 100755 --- a/examples/torch/trpo_pendulum_gru.py +++ b/examples/torch/trpo_pendulum_gru.py @@ -6,12 +6,12 @@ import torch from garage import wrap_experiment -from garage.envs import GarageEnv -from garage.experiment import LocalRunner +from garage.envs import GymEnv from garage.experiment.deterministic import set_seed from garage.torch.algos import TRPO from garage.torch.policies import GaussianMLPPolicy, GaussianGRUPolicy from garage.torch.value_functions import GaussianMLPValueFunction +from garage.trainer import Trainer @wrap_experiment @@ -20,15 +20,15 @@ def trpo_pendulum_gru(ctxt=None, seed=1): Args: ctxt (garage.experiment.ExperimentContext): The experiment - configuration used by LocalRunner to create the snapshotter. + configuration used by Trainer to create the snapshotter. seed (int): Used to seed the random number generator to produce determinism. """ set_seed(seed) - env = GarageEnv(env_name='InvertedDoublePendulum-v2') + env = GymEnv('InvertedDoublePendulum-v2') - runner = LocalRunner(ctxt) + trainer = Trainer(ctxt) policy = GaussianGRUPolicy(env.spec, hidden_dim=32, # [32, 32] @@ -44,12 +44,12 @@ def trpo_pendulum_gru(ctxt=None, seed=1): algo = TRPO(env_spec=env.spec, policy=policy, value_function=value_function, - max_path_length=100, + # max_path_length=100, discount=0.99, center_adv=False) - runner.setup(algo, env) - runner.train(n_epochs=100, batch_size=1024) + trainer.setup(algo, env) + trainer.train(n_epochs=100, batch_size=1024) trpo_pendulum_gru(seed=1) diff --git a/src/garage/torch/modules/__init__.py b/src/garage/torch/modules/__init__.py index 2e298ed91d..9e9c6c611d 100644 --- a/src/garage/torch/modules/__init__.py +++ b/src/garage/torch/modules/__init__.py @@ -1,5 +1,3 @@ -<<<<<<< HEAD -<<<<<<< HEAD """PyTorch Modules.""" # yapf: disable # isort:skip_file diff --git a/src/garage/torch/modules/gaussian_gru_module.py b/src/garage/torch/modules/gaussian_gru_module.py index b07904a253..000dba62b3 100644 --- a/src/garage/torch/modules/gaussian_gru_module.py +++ b/src/garage/torch/modules/gaussian_gru_module.py @@ -5,11 +5,12 @@ from torch import nn from torch.distributions import Normal from torch.distributions.independent import Independent +import torch.nn.functional as F +from garage.torch import global_device, set_gpu_mode from garage.torch.distributions import TanhNormal from garage.torch.modules.gru_module import GRUModule - class GaussianGRUBaseModule(nn.Module): """Gaussian GRU Module. @@ -30,13 +31,8 @@ def __init__( output_nonlinearity=None, output_w_init=nn.init.xavier_uniform_, output_b_init=nn.init.zeros_, - # hidden_state_init=nn.init.zeros_, - # hidden_state_init_trainable=False, learn_std=True, init_std=1.0, - # min_std=1e-6, - # max_std=None, - # std_share_network=False, std_parameterization='exp', layer_normalization=False, normal_distribution_cls=Normal): @@ -52,18 +48,15 @@ def __init__( self._output_nonlinearity = output_nonlinearity self._output_w_init = output_w_init self._output_b_init = output_b_init - # self._hidden_state_init = hidden_state_init - # self._hidden_state_init_trainable = hidden_state_init_trainable self._learn_std = learn_std - # self._min_std = min_std - # self._max_std = max_std - # self._std_share_network = std_share_network self._std_parameterization = std_parameterization, self._layer_normalization = layer_normalization self._norm_dist_class = normal_distribution_cls - # if self._std_parameterization not in ('exp', 'softplus'): - # raise NotImplementedError + self.continuous_action_space = True# continuous_action_space + self.log_std_dev = nn.Parameter(init_std * torch.ones(( self._output_dim), dtype=torch.float), requires_grad=self._learn_std) + self.covariance_eye = torch.eye(int(self._output_dim)).unsqueeze(0) + init_std_param = torch.Tensor([init_std]).log() if self._learn_std: self._init_std = torch.nn.Parameter(init_std_param) @@ -88,7 +81,7 @@ def to(self, *args, **kwargs): def _get_mean_and_log_std(self, *inputs): pass - def forward(self, *inputs): + def forward(self, *inputs, terminal=None): """Forward method. Args: @@ -99,6 +92,26 @@ def forward(self, *inputs): distribution. """ + if torch.cuda.is_available(): + set_gpu_mode(True) + else: + set_gpu_mode(False) + device = global_device() + + _ , _ , _ , _, policy_logits_out, _ = self._get_mean_and_log_std(*inputs) + if self.continuous_action_space: + cov_matrix = self.covariance_eye.to(device).expand(self._input_dim, self._output_dim, self._output_dim) * torch.exp(self._init_std.to(device)) + # We define the distribution on the CPU since otherwise operations fail with CUDA illegal memory access error. + policy_dist = torch.distributions.multivariate_normal.MultivariateNormal(policy_logits_out.to("cpu"), cov_matrix.to("cpu")) + else: + policy_dist = torch.distributions.Categorical(F.softmax(policy_logits_out, dim=1).to("cpu")) + + # if not isinstance(policy_dist, TanhNormal): + # # # Makes it so that a sample from the distribution is treated as a + # # # single sample and not dist.batch_shape samples. + # policy_dist = Independent(policy_dist, 1) + return policy_dist + (mean, step_mean, log_std, step_log_std, step_hidden, hidden_init) = self._get_mean_and_log_std(*inputs) @@ -112,8 +125,8 @@ def forward(self, *inputs): # single sample and not dist.batch_shape samples. dist = Independent(dist, 1) - # return dist - return (dist, step_mean, step_log_std, step_hidden, hidden_init) + return dist + # return (dist, step_mean, step_log_std, step_hidden, hidden_init) class GaussianGRUModule(GaussianGRUBaseModule): @@ -152,8 +165,6 @@ def __init__( output_b_init=output_b_init, learn_std=learn_std, init_std=init_std, - # min_std=min_std, - # max_std=max_std, std_parameterization=std_parameterization, layer_normalization=layer_normalization, normal_distribution_cls=normal_distribution_cls) @@ -185,10 +196,3 @@ def _get_mean_and_log_std(self, *inputs): return (mean_outputs, step_mean_outputs, uncentered_log_std, uncentered_step_log_std, step_hidden, hidden_init_var) - - # mean = self._mean_gru_module(*inputs) - - # broadcast_shape = list(inputs[0].shape[:-1]) + [self._action_dim] - # uncentered_log_std = torch.zeros(*broadcast_shape) + self._init_std - - # return mean, uncentered_log_std diff --git a/src/garage/torch/modules/gru_module.py b/src/garage/torch/modules/gru_module.py index 92d0f8cac7..f7daa45ca7 100644 --- a/src/garage/torch/modules/gru_module.py +++ b/src/garage/torch/modules/gru_module.py @@ -18,12 +18,12 @@ def __init__( self._hidden_dim = hidden_dim # Number of hidden layers self._layer_dim = layer_dim + print(input_dim, hidden_dim) self._gru_cell = nn.GRUCell(input_dim, hidden_dim) # self.gru_cell = GRUCell(input_dim, hidden_dim, layer_dim) self._fc = nn.Linear(hidden_dim, output_dim) def forward(self, *input): - # input = input[0] input = Variable(input[0].view(-1, input[0].size(0), input[0].size(1))) # Initialize hidden state with zeros @@ -34,7 +34,6 @@ def forward(self, *input): else: h0 = Variable( torch.zeros(self._layer_dim, input.size(0), self._hidden_dim)) - outs = [] hn = h0[0, :, :] @@ -45,6 +44,4 @@ def forward(self, *input): out = self._fc(out) outs = torch.stack(outs) # convert list of tensors to tensor outs = self._fc(outs) - # out.size() --> 100, 10 - # return out return outs, out, hn, h0 diff --git a/src/garage/torch/policies/gaussian_gru_policy.py b/src/garage/torch/policies/gaussian_gru_policy.py index 49e448ee91..5865a939cb 100644 --- a/src/garage/torch/policies/gaussian_gru_policy.py +++ b/src/garage/torch/policies/gaussian_gru_policy.py @@ -5,6 +5,7 @@ from garage.torch.modules import GaussianGRUModule from garage.torch.policies.stochastic_policy import StochasticPolicy +from garage.torch.modules.gru_module import GRUModule class GaussianGRUPolicy(StochasticPolicy): @@ -23,15 +24,15 @@ def __init__( output_b_init=nn.init.zeros_, learn_std=True, init_std=1.0, - # min_std=1e-6, - # max_std=None, std_parameterization='exp', layer_normalization=False, name='GaussianGRUPolicy'): super().__init__(env_spec, name) self._obs_dim = env_spec.observation_space.flat_dim self._action_dim = env_spec.action_space.flat_dim - self._module = GaussianGRUModule( + print(self._obs_dim, self._action_dim, hidden_dim) + + self._actor = GaussianGRUModule( input_dim=self._obs_dim, output_dim=self._action_dim, hidden_dim=hidden_dim, @@ -45,6 +46,11 @@ def __init__( init_std=init_std, std_parameterization=std_parameterization, layer_normalization=layer_normalization) + + self._critic = GRUModule(input_dim=self._obs_dim, + output_dim=self._action_dim, + hidden_dim=hidden_dim, + layer_dim=1) def forward(self, observations): """Compute the action distributions from the observations. @@ -59,9 +65,7 @@ def forward(self, observations): """ - dist = self._module(observations) - dist = dist[0] + dist = self._actor(observations) assert isinstance(dist, torch.distributions.independent.Independent) assert dist.event_shape == torch.Size((1, )) - assert dist.batch_shape == torch.Size((1, )) return (dist, dict(mean=dist.mean, log_std=(dist.variance**.5).log())) diff --git a/tests/garage/torch/modules/test_gru_module.py b/tests/garage/torch/modules/test_gru_module.py index ee0ea22885..4fe602a05f 100644 --- a/tests/garage/torch/modules/test_gru_module.py +++ b/tests/garage/torch/modules/test_gru_module.py @@ -25,11 +25,11 @@ def test_output_values(self): self.output_dim) outputs, output, _, _ = model(self.input) # read step output - assert output.size() == (self.batch_size, self.output_dim + assert output.size() == (self.input_dim, self.output_dim ) # (batch_size, output_dim) - assert outputs.shape == (self.input_dim, self.batch_size, + assert outputs.shape == (self.batch_size, self.input_dim, self.output_dim - ) # (input_dim, batch_size, output_dim) + ) def test_is_pickleable(self): model = GRUModule(self.input_dim, self.hidden_dim, self.layer_dim,