Skip to content

Commit

Permalink
Add 2-D convnet demo
Browse files Browse the repository at this point in the history
Train on 32x32 NORB data; 2 C-S layers followed by 1 fully-connected layer
See README
  • Loading branch information
gwtaylor committed Apr 5, 2010
0 parents commit 3745a65
Show file tree
Hide file tree
Showing 20 changed files with 2,146 additions and 0 deletions.
164 changes: 164 additions & 0 deletions CG_SMALLNORB_CLASSIFY_C2.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
function [f, df] = CG_SMALLNORB_CLASSIFY_C2(VV,Dim,XX,target, ...
connections)

l1 = Dim(1);
l2 = Dim(2);
l3 = Dim(3);
l4 = Dim(4);
l5 = Dim(5);
l6 = Dim(6);
l7 = Dim(7);
l8 = Dim(8);

[nr,nc,numcases] = size(XX);
num_connect = size(connections,2);

filtersize1=l1;
nummaps1=l2;
downsample1=l3;
filtersize2=l4;
nummaps2=l5;
downsample2=l6;
outsize=l7;
numlabels=l8;

outr1=nr-filtersize1+1;
outc1=nc-filtersize1+1;
nr1 = outr1/downsample1; nc1 = outc1/downsample1; %downsampled dimensions

outr2=nr1-filtersize2+1;
outc2=nc1-filtersize2+1;
nr2 = outr2/downsample2; nc2 = outc2/downsample2;

%deconversion of vectorized parameters
filters1 = reshape(VV(1:l1*l1*l2),[l1 l1 l2]);
xxx = l1*l1*l2;
convcoeff1 = reshape(VV(xxx+1:xxx+l2),l2,1);
xxx = xxx+l2;
filters2 = reshape(VV(xxx+1:xxx+l4*l4*(num_connect*l5)),[l4 l4 num_connect*l5]);
xxx = xxx+l4*l4*(num_connect*l5);
convcoeff2 = reshape(VV(xxx+1:xxx+l5),l5,1);
xxx = xxx+l5;
w_class = reshape(VV(xxx+1:xxx+(l7+1)*l8),l7+1,l8);

%forward pass
%returns output map of convolution layer (used in backprop)
[yy,map1,y1,map2] = convnet_forward2(XX,filters1,convcoeff1,downsample1,filters2, ...
convcoeff2,downsample2,connections);
yy = [yy ones(numcases,1)]; %extra dimension (for bias)

%go through classifier
targetout = convnet_probs(yy,w_class);

%cross-entropy error generalized to multi-class
f = -sum(sum( target(:,1:end).*log(targetout)));

delta5 = (targetout-target(:,1:end));
dw_class = yy'*delta5;

%subsampling layer has no parameters
%if the layer following the subsampling layer is a fully connected layer
%then the sensitivity maps can be computed by vanilla backprop
delta4 = (delta5*w_class');
delta4 = delta4(:,1:end-1);

%delta4 is in vector form
%but now we need to reshape it so we can do the element wise multiplication
%This is simply reversing the reshaping operations performed in the forward
%pass
delta4r=transpose(delta4);
delta4r=reshape(delta4r,[nr2 nc2 nummaps2 numcases]);
delta4r=permute(delta4r,[1 2 4 3]); %last dimension is filters

%need to upsample the downsampling layer's sensitivity map to make it the
%same size as the convolutional layer's map
%repeat for each map in the convolutional layer
delta3 = zeros(outr2,outc2,numcases,nummaps2);
dfilters2=zeros(filtersize2,filtersize2,numcases,num_connect*nummaps2);
dconvcoeff2=zeros(nummaps2,1);
for jj=1:nummaps2
%upsample the deltas to make them compatible size
up = reshape(kron(delta4r(:,:,:,jj),ones(downsample2)),[outr2 outc2 numcases]);
%for sigmoid nonlinearity
%delta3(:,:,:,jj)=convcoeff2(jj)*map2(:,:,:,jj).*(1-map2(:,:,:,jj)).*up;
%for tanh nonlinearity
delta3(:,:,:,jj)=convcoeff2(jj)*(1-map2(:,:,:,jj).^2).*up;

tmp=map2(:,:,:,jj).*up; %dconvcoeff not summed
dconvcoeff2(jj)=sum(tmp(:)); %summing over pixels and cases
end

%filter gradients - this is where it is different for the 2-layer model
%note that filters in the second convolutional layer correspond to both
%inputs and outputs (for first layer there was just a single input)
for jj=1:nummaps2 %iterate through output
for kk=1:num_connect; %iterate through prev layer feature maps (input)
input_map = connections(jj,kk);
filteridx=num_connect*(jj-1)+kk; %index to 3rd dim in filters2
for cc=1:numcases
%here we perform cross-correlation by rotating the kernel 180 deg
%where the kernel is the sensitivity maps and applying convolution
krnl = rot180(delta3(:,:,cc,jj));
dfilters2(:,:,cc,filteridx)=rot180(conv2(y1(:,:,cc,input_map), ...
krnl,'valid'));
end
end
end
dfilters2=squeeze(sum(dfilters2,3)); %sum over cases

%subsampling layer 1
%layer l+1 was not a fully-connected layer, it was convolutional
%so this is slighly harder than subsampling layer 2 (calculation of delta4)
delta2 = zeros(nr1,nc1,numcases,nummaps1);
for jj=1:nummaps1
for cc=1:numcases
%only add the contribution if subsampling layer 1 is connected to
%convolutional layer 2
%rows gives the output maps we need to consider
%for each row, cols gives the connection index (we need this for the
%filter number .. i.e. 3rd dimension of filters2)
[rows,cols] = find(connections==jj);
for kk=1:length(rows)
map_out = rows(kk);
filteridx=num_connect*(map_out-1)+cols(kk); %3rd index in filters2
krnl = rot180(filters2(:,:,filteridx));
delta2(:,:,cc,jj) = delta2(:,:,cc,jj)+ ...
conv2(delta3(:,:,cc,map_out),krnl, 'full');
end
end
end

%convolutional layer 1
%need to upsample the downsampling layer's sensitivity map to make it the
%same size as the convolutional layer's map
%repeat for each map in the convolutional layer
delta1 = zeros(outr1,outc1,numcases,nummaps1);
dfilters1=zeros(filtersize1,filtersize1,numcases,nummaps1);
dconvcoeff1=zeros(nummaps1,1);
for jj=1:nummaps1
%upsample the deltas to make them compatible size
up = reshape(kron(delta2(:,:,:,jj),ones(downsample1)),[outr1 outc1 numcases]);
%for sigmoid nonlinearity
%delta3(:,:,:,jj)=convcoeff2(jj)*map2(:,:,:,jj).*(1-map2(:,:,:,jj)).*up;
%for tanh nonlinearity
delta1(:,:,:,jj)=convcoeff1(jj)*(1-map1(:,:,:,jj).^2).*up;

tmp=map1(:,:,:,jj).*up; %dconvcoeff not summed
dconvcoeff1(jj)=sum(tmp(:)); %summing over pixels and cases
end

%filter gradients - simple for convolutional layer 1
%since there is a single input (the data)
for jj=1:nummaps1
%filter gradients
for cc=1:numcases
%here we perform cross-correlation by rotating the kernel 180 deg
%where the kernel is the sensitivity maps and applying convolution
krnl = rot180(delta1(:,:,cc,jj));
dfilters1(:,:,cc,jj)=rot180(conv2(XX(:,:,cc),krnl,'valid'));
end
end

dfilters1=squeeze(sum(dfilters1,3)); %sum over cases

df = [dfilters1(:);dconvcoeff1(:);dfilters2(:);dconvcoeff2(:);dw_class(:)];
171 changes: 171 additions & 0 deletions CG_SMALLNORB_CLASSIFY_C2f.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
function [f, df] = CG_SMALLNORB_CLASSIFY_C2f(VV,Dim,XX,target, ...
connections)

l1 = Dim(1);
l2 = Dim(2);
l3 = Dim(3);
l4 = Dim(4);
l5 = Dim(5);
l6 = Dim(6);
l7 = Dim(7);
l8 = Dim(8);

[nr,nc,numcases] = size(XX);
num_connect = size(connections,2);

filtersize1=l1;
nummaps1=l2;
downsample1=l3;
filtersize2=l4;
nummaps2=l5;
downsample2=l6;
outsize=l7;
numlabels=l8;

outr1=nr-filtersize1+1;
outc1=nc-filtersize1+1;
nr1 = outr1/downsample1; nc1 = outc1/downsample1; %downsampled dimensions

outr2=nr1-filtersize2+1;
outc2=nc1-filtersize2+1;
nr2 = outr2/downsample2; nc2 = outc2/downsample2;

%deconversion of vectorized parameters
filters1 = reshape(VV(1:l1*l1*l2),[l1 l1 l2]);
xxx = l1*l1*l2;
convcoeff1 = reshape(VV(xxx+1:xxx+l2),l2,1);
xxx = xxx+l2;
filters2 = reshape(VV(xxx+1:xxx+l4*l4*(num_connect*l5)),[l4 l4 num_connect*l5]);
xxx = xxx+l4*l4*(num_connect*l5);
convcoeff2 = reshape(VV(xxx+1:xxx+l5),l5,1);
xxx = xxx+l5;
w_class = reshape(VV(xxx+1:xxx+(l7+1)*l8),l7+1,l8);

%forward pass
%returns output map of convolution layer (used in backprop)
[yy,map1,y1,map2] = convnet_forward2f(XX,filters1,convcoeff1,downsample1,filters2, ...
convcoeff2,downsample2,connections);
yy = [yy ones(numcases,1,'single')]; %extra dimension (for bias)

%go through classifier
targetout = convnet_probs(yy,w_class);

%cross-entropy error generalized to multi-class
f = -sum(sum( target.*log(targetout)));

delta5 = (targetout-target);
dw_class = yy'*delta5;

%subsampling layer has no parameters
%if the layer following the subsampling layer is a fully connected layer
%then the sensitivity maps can be computed by vanilla backprop
delta4 = (delta5*w_class');
delta4 = delta4(:,1:end-1);

%delta4 is in vector form
%but now we need to reshape it so we can do the element wise multiplication
%This is simply reversing the reshaping operations performed in the forward
%pass
delta4r=transpose(delta4);
delta4r=reshape(delta4r,[nr2 nc2 nummaps2 numcases]);
delta4r=permute(delta4r,[1 2 4 3]); %last dimension is filters

%need to upsample the downsampling layer's sensitivity map to make it the
%same size as the convolutional layer's map
%repeat for each map in the convolutional layer
delta3 = zeros(outr2,outc2,numcases,nummaps2,'single');
dfilters2=zeros(filtersize2,filtersize2,numcases,num_connect*nummaps2,'single');
dconvcoeff2=zeros(nummaps2,1,'single');
for jj=1:nummaps2
%upsample the deltas to make them compatible size
up = reshape(kron(delta4r(:,:,:,jj),ones(downsample2,'single')), ...
[outr2 outc2 numcases]);
%for sigmoid nonlinearity
%delta3(:,:,:,jj)=convcoeff2(jj)*map2(:,:,:,jj).*(1-map2(:,:,:,jj)).*up;
%for tanh nonlinearity
delta3(:,:,:,jj)=convcoeff2(jj)*(1-map2(:,:,:,jj).^2).*up;

tmp=map2(:,:,:,jj).*up; %dconvcoeff not summed
dconvcoeff2(jj)=sum(tmp(:)); %summing over pixels and cases
end

%filter gradients - this is where it is different for the 2-layer model
%note that filters in the second convolutional layer correspond to both
%inputs and outputs (for first layer there was just a single input)

for jj=1:nummaps2 %iterate through output
for kk=1:num_connect; %iterate through prev layer feature maps (input)
input_map = connections(jj,kk);
filteridx=num_connect*(jj-1)+kk; %index to 3rd dim in filters2
for cc=1:numcases
%here we perform cross-correlation by rotating the kernel 180 deg
%where the kernel is the sensitivity maps and applying convolution
krnl = rot180(delta3(:,:,cc,jj));
% dfilters2(:,:,cc,filteridx)=rot180(conv2(y1(:,:,cc,input_map), ...
% krnl,'valid'));
%unfortunately, the kernel here (rotated sensitivity at one layer up)
%is case-specific and so I do not think we can do this in parallel
dfilters2(:,:,cc,filteridx)=rot180(ipp_mt_conv2(y1(:,:,cc,input_map), ...
krnl,'valid'));
end
end
end
dfilters2=squeeze(sum(dfilters2,3)); %sum over cases

%subsampling layer 1
%layer l+1 was not a fully-connected layer, it was convolutional
%so this is slighly harder than subsampling layer 2 (calculation of delta4)
delta2 = zeros(nr1,nc1,numcases,nummaps1,'single');
for jj=1:nummaps1
%only add the contribution if subsampling layer 1 is connected to
%convolutional layer 2
%rows gives the output maps we need to consider
%for each row, cols gives the connection index (we need this for the
%filter number .. i.e. 3rd dimension of filters2)
[rows,cols] = find(connections==jj);
for kk=1:length(rows)
map_out = rows(kk);
filteridx=num_connect*(map_out-1)+cols(kk); %3rd index in filters2
krnl = rot180(filters2(:,:,filteridx));
delta2(:,:,:,jj) = delta2(:,:,:,jj)+ ...
ipp_mt_conv2(delta3(:,:,:,map_out),krnl, 'full');
end
end

%convolutional layer 1
%need to upsample the downsampling layer's sensitivity map to make it the
%same size as the convolutional layer's map
%repeat for each map in the convolutional layer
delta1 = zeros(outr1,outc1,numcases,nummaps1,'single');
dfilters1=zeros(filtersize1,filtersize1,numcases,nummaps1,'single');
dconvcoeff1=zeros(nummaps1,1,'single');
for jj=1:nummaps1
%upsample the deltas to make them compatible size
up = reshape(kron(delta2(:,:,:,jj),ones(downsample1,'single')),[outr1 outc1 numcases]);
%for sigmoid nonlinearity
%delta3(:,:,:,jj)=convcoeff2(jj)*map2(:,:,:,jj).*(1-map2(:,:,:,jj)).*up;
%for tanh nonlinearity
delta1(:,:,:,jj)=convcoeff1(jj)*(1-map1(:,:,:,jj).^2).*up;

tmp=map1(:,:,:,jj).*up; %dconvcoeff not summed
dconvcoeff1(jj)=sum(tmp(:)); %summing over pixels and cases
end

%filter gradients - simple for convolutional layer 1
%since there is a single input (the data)
for jj=1:nummaps1
%filter gradients
for cc=1:numcases
%here we perform cross-correlation by rotating the kernel 180 deg
%where the kernel is the sensitivity maps and applying convolution
krnl = rot180(delta1(:,:,cc,jj));
%dfilters1(:,:,cc,jj)=rot180(conv2(XX(:,:,cc),krnl,'valid'));
%unfortunately, the kernel here (rotated sensitivity at one layer up)
%is case-specific and so I do not think we can do this in parallel
dfilters1(:,:,cc,jj)=rot180(ipp_mt_conv2(XX(:,:,cc),krnl,'valid'));
end
end

dfilters1=squeeze(sum(dfilters1,3)); %sum over cases

df = [dfilters1(:);dconvcoeff1(:);dfilters2(:);dconvcoeff2(:);dw_class(:)];
20 changes: 20 additions & 0 deletions CG_SMALLNORB_CLASSIFY_CINIT.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
function [f, df] = CG_SMALLNORB_CLASSIFY_CINIT(VV,Dim,yy,targets)

%do backprop only on the classification weights, w_class
%note that these weights include a bias

l1 = Dim(1);
l2 = Dim(2);

N = size(yy,1);
% Do deconversion.
w_class = reshape(VV,l1+1,l2);
yy = [yy ones(N,1)];

targetout = convnet_probs(yy,w_class);

f = -sum(sum( targets(:,1:end).*log(targetout))) ;
delta3 = (targetout-targets(:,1:end));
dw_class = yy'*delta3;

df = dw_class(:);
Loading

0 comments on commit 3745a65

Please sign in to comment.