-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodel.py
113 lines (85 loc) · 3.2 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env python3.8
'''
Written by: Saksham Consul 04/18/2023
Model file
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Normal
LOG_SIG_MAX = 2
LOG_SIG_MIN = -20
epsilon = 1e-6
# Initialize Policy weights
def weights_init_(m):
if isinstance(m, nn.Linear):
torch.nn.init.xavier_uniform_(m.weight, gain=1)
torch.nn.init.constant_(m.bias, 0)
class ValueNetwork(nn.Module):
def __init__(self, num_inputs, hidden_dim):
super(ValueNetwork, self).__init__()
self.linear1 = nn.Linear(num_inputs, hidden_dim)
self.linear2 = nn.Linear(hidden_dim, hidden_dim)
self.linear3 = nn.Linear(hidden_dim, 1)
self.apply(weights_init_)
def forward(self, state):
x = F.relu(self.linear1(state))
x = F.relu(self.linear2(x))
x = self.linear3(x)
return x
class QNetwork(nn.Module):
def __init__(self, num_inputs, num_actions, hidden_dim):
super(QNetwork, self).__init__()
# Q1 architecture
self.linear1 = nn.Linear(num_inputs + num_actions, hidden_dim)
self.linear2 = nn.Linear(hidden_dim, hidden_dim)
self.linear3 = nn.Linear(hidden_dim, 1)
# Q2 architecture
self.linear4 = nn.Linear(num_inputs + num_actions, hidden_dim)
self.linear5 = nn.Linear(hidden_dim, hidden_dim)
self.linear6 = nn.Linear(hidden_dim, 1)
self.apply(weights_init_)
def forward(self, state, action):
xu = torch.cat([state, action], 1)
x1 = F.relu(self.linear1(xu))
x1 = F.relu(self.linear2(x1))
x1 = self.linear3(x1)
x2 = F.relu(self.linear4(xu))
x2 = F.relu(self.linear5(x2))
x2 = self.linear6(x2)
return x1, x2
class DeterministicPolicy(nn.Module):
def __init__(self, num_inputs, num_actions, hidden_dim, action_space=None):
super(DeterministicPolicy, self).__init__()
self.linear1 = nn.Linear(num_inputs, hidden_dim)
self.linear2 = nn.Linear(hidden_dim, hidden_dim)
print(num_actions)
self.mean = nn.Linear(hidden_dim, num_actions)
self.noise = torch.Tensor(num_actions)
self.apply(weights_init_)
# # action rescaling
# if action_space is None:
# self.action_scale = 1.
# self.action_bias = 0.
# else:
# self.action_scale = torch.FloatTensor(
# (len(action_space)) / 2.)
# self.action_bias = torch.FloatTensor(
# (len(action_space)) / 2.)
def forward(self, state):
x = F.relu(self.linear1(state))
x = F.relu(self.linear2(x))
# * self.action_scale + self.action_bias
mean = torch.tanh(self.mean(x))
return mean
def sample(self, state):
mean = self.forward(state)
noise = self.noise.normal_(0., std=0.1)
noise = noise.clamp(-0.25, 0.25)
action = mean + noise
return action, torch.tensor(0.), mean
def to(self, device):
# self.action_scale = self.action_scale.to(device)
# self.action_bias = self.action_bias.to(device)
self.noise = self.noise.to(device)
return super(DeterministicPolicy, self).to(device)