forked from udacity/P1_Facial_Keypoints
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodels.py
126 lines (89 loc) · 3.94 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import torch
import torch.nn as nn
import torch.nn.functional as F
# can use the below import should you choose to initialize the weights of your Net
import torch.nn.init as I
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# 1 input image channel (grayscale), 10 output channels/feature maps
# 3x3 square convolution kernel
## output size = (W-F)/S +1 = (28-3)/1 +1 = 26
# the output Tensor for one image, will have the dimensions: (10, 26, 26)
# after one pool layer, this becomes (10, 13, 13)
self.conv1 = nn.Conv2d(1, 10, 3)
# maxpool layer
# pool with kernel_size=2, stride=2
self.pool = nn.MaxPool2d(2, 2)
# second conv layer: 10 inputs, 20 outputs, 3x3 conv
## output size = (W-F)/S +1 = (13-3)/1 +1 = 11
# the output tensor will have dimensions: (20, 11, 11)
# after another pool layer this becomes (20, 5, 5); 5.5 is rounded down
self.conv2 = nn.Conv2d(10, 20, 3)
# 20 outputs * the 5*5 filtered/pooled map size
# 10 output channels (for the 10 classes)
self.fc1 = nn.Linear(58320, 50)
self.fc1_drop = nn.Dropout(p=0.4)
# 10 outputs
self.fc2 = nn.Linear(50,136)
# define the feedforward behavior
def forward(self, x):
# two conv/relu + pool layers
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
# prep for linear layer
# flatten the inputs into a vector
x = x.view(x.size(0), -1)
# one linear layer
x = F.relu(self.fc1(x))
x = self.fc1_drop(x)
x = self.fc2(x)
# a softmax layer to convert the 10 outputs into a distribution of class scores
# x = F.log_softmax(x, dim=1)
# final output
return x
# from paper https://arxiv.org/pdf/1710.00977.pdf
class NaimishNet(nn.Module):
def __init__(self):
super(NaimishNet, self).__init__()
# 1 input image channel (grayscale), 10 output channels/feature maps
# 3x3 square convolution kernel
## output size = (W-F)/S +1 = (28-3)/1 +1 = 26
# the output Tensor for one image, will have the dimensions: (10, 26, 26)
# after one pool layer, this becomes (10, 13, 13)
# input 1*224*224
self.conv1 = nn.Conv2d(1, 32, 4) # output 32x221x221
self.pool1 = nn.MaxPool2d(4, 4) # output 32x55x55
self.drop1 = nn.Dropout(0.1)
self.conv2 = nn.Conv2d(32, 64, 3) # output 64x53x53
self.pool2 = nn.MaxPool2d(2, 2) # output 64x26x26
self.drop2 = nn.Dropout(0.2)
self.conv3 = nn.Conv2d(64, 128, 2) # output 128x25x25
self.pool3 = nn.MaxPool2d(2, 2) # output 128x12x12
self.drop3 = nn.Dropout(0.3)
self.conv4 = nn.Conv2d(128, 256, 1) # output 256x12x12
self.pool4 = nn.MaxPool2d(2, 2) # output 256x6x6
self.drop4 = nn.Dropout(0.4)
self.fc1 = nn.Linear(256*6*6,1000) # output 1000
self.drop_fc1 = nn.Dropout(0.5)
self.fc2 = nn.Linear(1000,1000) # output 1000
self.drop_fc2 = nn.Dropout(0.6)
self.fc3 = nn.Linear(1000,68*2) # output 136 (68,2)
# define the feedforward behavior
def forward(self, x):
x = self.pool1(F.elu(self.conv1(x)))
x = self.drop1(x)
x = self.pool2(F.elu(self.conv2(x)))
x = self.drop2(x)
x = self.pool3(F.elu(self.conv3(x)))
x = self.drop3(x)
x = self.pool4(F.elu(self.conv4(x)))
x = self.drop4(x)
x = x.view(x.size(0), -1)
x = F.elu(self.fc1(x))
x = self.drop_fc1(x)
x = self.fc2(x)
x = self.drop_fc2(x)
x = self.fc3(x)
# final output
return x