-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCNNcost.m
279 lines (253 loc) · 11.6 KB
/
CNNcost.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
function [cost, grad, preds] = cnnCost(theta,images,labels,numClasses,...
c1filterDim,c1numFilters,c3filterDim,c3numFilters,...
s2poolDim,s4poolDim,c5Dim,c6Dim,pred)
% Calcualte cost and gradient for a single layer convolutional
% neural network followed by a softmax layer with cross entropy
% objective.
%
% Parameters:
% theta - unrolled parameter vector
% images - stores images in imageDim x imageDim x numImges
% array
% numClasses - number of classes to predict
% filterDim - dimension of convolutional filter
% numFilters - number of convolutional filters
% poolDim - dimension of pooling area
% pred - boolean only forward propagate and return
% predictions
%
%
% Returns:
% cost - cross entropy cost
% grad - gradient with respect to theta (if pred==False)
% preds - list of predictions for each example (if pred==True)
%
%
% Note:
% ALL pooling layer has no sigmoid function
%
if ~exist('pred','var')
pred = false;
preds = 0;
end;
imageDim = size(images,1); % height/width of image
numImages = size(images,3); % number of images
%% Reshape parameters and setup gradient matrices
% Wc is filterDim x filterDim x numFilters parameter matrix
% bc is the corresponding bias
% Wd is numClasses x hiddenSize parameter matrix where hiddenSize
% is the number of output units from the convolutional layer
% bd is corresponding bias
[Wc1, Wc3, Wc5, Wc6, Wc7, bc1, bc3, bc5, bc6, bc7] = cnnParamsToStack(theta,imageDim,...
c1filterDim,c1numFilters,c3filterDim,c3numFilters,...
s2poolDim,s4poolDim,c5Dim,c6Dim,numClasses);
% Same sizes as Wc,Wd,bc,bd. Used to hold gradient w.r.t above params.
Wc1_grad = zeros(size(Wc1));
Wc3_grad = zeros(size(Wc3));
Wc5_grad = zeros(size(Wc5));
Wc6_grad = zeros(size(Wc6));
Wc7_grad = zeros(size(Wc7));
bc1_grad = zeros(size(bc1));
bc3_grad = zeros(size(bc3));
bc5_grad = zeros(size(bc5));
bc6_grad = zeros(size(bc6));
bc7_grad = zeros(size(bc7));
%%======================================================================
%% STEP 1a: Forward Propagation
% In this step you will forward propagate the input through the
% convolutional and subsampling (mean pooling) layers. You will then use
% the responses from the convolution and pooling layer as the input to a
% standard softmax layer.
%% Convolutional Layer
% For each image and each filter, convolve the image with the filter, add
% the bias and apply the sigmoid nonlinearity. Then subsample the
% convolved activations with mean pooling. Store the results of the
% convolution in activations and the results of the pooling in
% activationsPooled. You will need to save the convolved activations for
% backpropagation.
c1convDim = imageDim-c1filterDim+1; % dimension of convolved output
assert( mod(c1convDim,s2poolDim)==0, 'c1Dim can not divided by s2poolDim');
s2outputDim = (c1convDim)/s2poolDim; % dimension of subsampled output
% convDim x convDim x numFilters x numImages tensor for storing activations
% c1activations = zeros(c1convDim,c1convDim,c1numFilters,numImages);
% outputDim x outputDim x numFilters x numImages tensor for storing
% subsampled activat
% s2activationsPooled = zeros(s2outputDim,s2outputDim,c1numFilters,numImages);
%%% YOUR CODE HERE %%%
c1convolvedFeatures = cnnConvolve(c1filterDim,c1numFilters,images,Wc1,bc1);
c1activations = c1convolvedFeatures; % has add sigmoid in cnnConvolve
s2pooledFeatures = cnnPool(s2poolDim,c1activations); % not sigmoid function here
s2activationsPooled = s2pooledFeatures;
% s2-c3 is fullconnected network not Lenet-5 here is a convolution
c3convDim = s2outputDim - c3filterDim + 1;
c3convolvedImage = zeros(c3convDim,c3convDim,c3numFilters,numImages);
c3activations = zeros(c3convDim,c3convDim,c3numFilters/c1numFilters,numImages);
%%%%%%%%%%%%%%%
% 0 1 2 ... 15
% Wc3 = 0 [ 1 7 13 ... 91
% 1 2 8 14 ... 92
% 2 3 ........
% . .......... 95
% 5 6 12 ..... 96 ]
%%%%%%%%%%%%%
for imageNum =1:numImages
c1filterNum = 1;
c1num =1;
for c3filterNum = 1:c3numFilters
kernel = rot90(Wc3(:,:,c3filterNum),2);
c3convolvedImage(:,:,c3filterNum,imageNum) = conv2(...
s2activationsPooled(:,:,c1num,imageNum),kernel,'valid');
% s2activationsPooled(:,:,c1filterNum,imageNum),kernel,'valid'); %
% the line before I write wrong!! because I don't take care of the
% computation order!!!
% We should compute every pooling-feature of S2
c1num = c1num+1;
if mod(c3filterNum,c1numFilters)==0 % sum to one featuremap in c3 layer
res = sum(c3convolvedImage(:,:, (c3filterNum-c1numFilters +1):c3filterNum,imageNum),3) + bc3(c1filterNum);
c3activations(:,:,c3filterNum/c1numFilters,imageNum) = 1./(1+exp(-res));
c1filterNum = c1filterNum + 1;
c1num = 1;
end
end
end
% s4 c5 c6 c7
assert ( mod(c3convDim,s4poolDim)==0, 's4dim can not divided by s4poolDim');
s4outputDim = c3convDim/s4poolDim;
s4activationsPooled = cnnPool(s4poolDim,c3activations); % no sigmoid here
s4activationsPooled = reshape(s4activationsPooled,[],numImages);
c5activations = bsxfun(@plus,Wc5*s4activationsPooled ,bc5);
c5activations = 1./(1+exp(-c5activations));
c6activations = bsxfun(@plus,Wc6*c5activations,bc6);
c6activations = 1./(1+exp(-c6activations));
% softmax
s = bsxfun(@plus,Wc7*c6activations,bc7);
s = bsxfun(@minus,s,max(s,[],1));
s = exp(s);
s = bsxfun(@rdivide,s,sum(s));
probs = s;
% logistic regression
% s = 1./(1+exp(-s));
% probs =s;
% Makes predictions given probs and returns without backproagating errors.
if pred
% not return the class but the problity
% [~,preds] = max(probs,[],1);
% preds = preds';
preds = probs;
grad = 0;
cost = 0;
return;
end;
%%======================================================================
%% STEP 1b: Calculate Cost
% In this step you will use the labels given as input and the probs
% calculate above to evaluate the cross entropy objective. Store your
% results in cost.
cost = 0; % save objective into cost
%%% YOUR CODE HERE %%%
groundTrue = full(sparse(labels,1:numImages,1));
% groundTrue = labels;
cost = -(1./numImages).*sum(sum(groundTrue.*log(probs))); % only CE here?
%%======================================================================
%% STEP 1c: Backpropagation
% Backpropagate errors through the softmax and convolutional/subsampling
% layers. Store the errors for the next step to calculate the gradient.
% Backpropagating the error w.r.t the softmax layer is as usual. To
% backpropagate through the pooling layer, you will need to upsample the
% error with respect to the pooling layer for each filter and each image.
% Use the kron function and a matrix of ones to do this upsampling
% quickly.
%%% YOUR CODE HERE %%%
thetac7 = -(groundTrue-s);
% thetac7 = -(groundTrue -s).*s.*(1-s);
thetac6 = Wc7'*thetac7.*c6activations.*(1-c6activations); % here the NO order has a lot different! wc7 means wc6
thetac5 = Wc6'*thetac6.*c5activations.*(1-c5activations);
thetas4 = Wc5'*thetac5; % here no sigmoid function s4 is a pooling layer
% unroll the theta
assert(mod(c3numFilters,c1numFilters)==0, 'c3numFilters can not divided by c1numFilters');
c3numtheta = c3numFilters/c1numFilters; % the theta of c3 is different from c3numFilters
thetas4 = reshape(thetas4,s4outputDim,s4outputDim,c3numtheta,numImages); % to get kron
thetac3 = zeros(c3convDim,c3convDim,c3numtheta,numImages);
for imageNum =1:numImages
for c3thetaNum = 1:c3numtheta
%thetaConvolved(:,:,filterNum,imageNum) = 1./((poolDim).^2).*(kron(thetaPool(:,:,filterNum,imageNum),ones(poolDim)));
thetac3(:,:,c3thetaNum,imageNum) = 1./((s4poolDim).^2).*(kron(thetas4(:,:,c3thetaNum,imageNum),...
ones(s4poolDim))).*c3activations(:,:,c3thetaNum,imageNum).*(1-c3activations(:,:,c3thetaNum,imageNum));
end
end
% handle the full connection between s2-c3
%%%%%%%%%%%%%%%
% 0 1 2 ... 15
% Wc3 = 0 [ 1 7 13 ... 91
% 1 2 8 14 ... 92
% 2 3 ........
% . .......... 95
% 5 6 12 ..... 96 ]
%%%%%%%%%%%%%
Wc3_ = reshape(Wc3,c3filterDim,c3filterDim,c1numFilters,c3numFilters/c1numFilters);
thetas2 = zeros(s2outputDim,s2outputDim,c1numFilters,numImages);
for imageNum = 1:numImages
for i = 1:c1numFilters
for j = 1:c3numFilters/c1numFilters % 16
% kernel = rot90(Wc3_(:,:,i,j),2); %here is BP of c3 -> s2
kernel = Wc3_(:,:,i,j);
% so do not neet rot180 degree.
thetas2(:,:,i,imageNum) = thetas2(:,:,i,imageNum)+...
conv2(thetac3(:,:,j,imageNum),kernel,'full');
% s2 is pooling layer, so do not grad_F.
end
end
end
% (1/c1numFilters) I added in my opion It's wrong when I added
% thetas2 = (1/c1numFilters).*thetas2; % thetac3 is combin of s2numFilters(c1numFilters);
% spooling to convolution s2 -> c1
thetac1 = zeros(c1convDim,c1convDim,c1numFilters,numImages);
for imageNum =1:numImages
for c1filterNum = 1:c1numFilters
%thetaConvolved(:,:,filterNum,imageNum) = 1./((poolDim).^2).*(kron(thetaPool(:,:,filterNum,imageNum),ones(poolDim)));
thetac1(:,:,c1filterNum,imageNum) = 1./((s2poolDim).^2).*(kron(thetas2(:,:,c1filterNum,imageNum),...
ones(s2poolDim))).*c1activations(:,:,c1filterNum,imageNum).*(1-c1activations(:,:,c1filterNum,imageNum));
end
end
%%======================================================================
%% STEP 1d: Gradient Calculation
% After backpropagating the errors above, we can use them to calculate the
% gradient with respect to all the parameters. The gradient w.r.t the
% softmax layer is calculated as usual. To calculate the gradient w.r.t.
% a filter in the convolutional layer, convolve the backpropagated error
% for that filter with each image and aggregate over images.
%%% YOUR CODE HERE %%%
m = numImages;
Wc7_grad = 1./m*(thetac7*c6activations');
bc7_grad = 1./m*(sum(thetac7,2));
Wc6_grad = 1./m*(thetac6*c5activations');
bc6_grad = 1./m*(sum(thetac6,2));
Wc5_grad = 1./m*(thetac5*s4activationsPooled');
bc5_grad = 1./m*(sum(thetac5,2));
Wc3_grad_t = zeros([size(Wc3_grad),numImages]);
% bc_grad_t = zeros(size(bc_grad),
for imageNum = 1:numImages
for indx = 1:c1numFilters % 6
for indy = 1:c3numFilters/c1numFilters % 16
% kernel = rot90(squeeze(thetac3(:,:,indy,imageNum)),2);
kernel = rot90(thetac3(:,:,indy,imageNum),2);
Wc3_grad_t(:,:,(indy-1)*c1numFilters+indx,imageNum) = conv2(s2activationsPooled(:,:,indx,imageNum),kernel,'valid');
end % this step has no error, I take code wrong in my step of computation of C3 in feedforward step
end
end
Wc3_grad = (1./m).*sum(Wc3_grad_t,4);
bc3_grad = (1./m).*squeeze(sum(sum(sum(thetac3,4),1),2)); % here !
% Wc1_grad
Wc1_grad_t = zeros([size(Wc1_grad),numImages]);
for imageNum = 1:numImages
for filterNum = 1:c1numFilters
kernel = rot90(thetac1(:,:,filterNum,imageNum),2);
Wc1_grad_t(:,:,filterNum,imageNum) = conv2(images(:,:,imageNum),kernel,'valid');
end
end
Wc1_grad = (1./m).*sum(Wc1_grad_t,4);
bc1_grad = (1./m).*squeeze(sum(sum(sum(thetac1,4),1),2));
%% Unroll gradient into grad vector for minFunc
grad = [Wc1_grad(:) ; Wc3_grad(:); Wc5_grad(:); Wc6_grad(:); Wc7_grad(:);...
bc1_grad(:); bc3_grad(:) ; bc5_grad(:); bc6_grad(:); bc7_grad(:)];
end