alexnet sppnet zfnet实现 以及readme

yuangu · yuangu · commit 3d8f1ed2c1a9 · 2019-09-17T19:28:45.000+08:00
diff --git a/README.md b/README.md
@@ -0,0 +1,20 @@
+
+
+### 一、介绍
+本项目为本人研习 李玉鑑，张婷，单传辉，刘兆英等 所著的 《深度学习-卷积神经网络从入门到精通》（2018年7月第1版第1次印刷），使用pytorch实现的案例。
+
+
+### 二、学习建议
+* 理解什么是卷积（好像没有书写了这个，哈哈。另注：我是看完前面的讲解，再看果程C
+的回答理解的）。 [传送门](https://www.zhihu.com/question/22298352/answer/228543288)
+* 跳过书中第二章 《预备知识》，改阅读或者参考 由 [日] 涌井良幸 涌井贞美 著 杨瑞龙 译的 《深度学习的数学》。
+* 神经元科普视频： [传送门](https://b23.tv/av65013444)
+
+
+### 三、遇到的问题
+* 实现过程中发现书的有部分笔误，或者讲述不清的地方。
+* 训练数据下载链接失效，或者测试数据残缺。（代码目前我只提供了module，数据准备的代码已经实现，等验证后补上。）
+
+### 四、学习交流
+微信: yyguzi  (请标注 cnn)
+
diff --git a/alexnet/module.py b/alexnet/module.py
@@ -0,0 +1,60 @@
+#-*-coding:utf-8-*-
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+from PIL import Image
+import numpy as np
+
+class AlexNet(nn.Module):
+
+    def __init__(self):
+        super(AlexNet, self).__init__()
+
+        self.conv1 = nn.Conv2d(3, 96, (11, 11), stride=4) 
+        self.conv2 = nn.Conv2d(96, 256,(5,5),stride=1 )
+        self.conv3 = nn.Conv2d(256, 384,(3,3),stride=1 )
+        self.conv4 = nn.Conv2d(384, 384, (3,3),stride=1 )
+        self.conv5 = nn.Conv2d(384, 256, (3,3),stride=1 )
+
+        self.fc1 = nn.Linear(256 * 2 * 2, 4096)
+        self.fc2 = nn.Linear(4096, 4096)
+        self.fc3 = nn.Linear(4096, 1000)
+
+    def forward(self, x):
+        # 第一层 
+        x =  F.local_response_norm(F.max_pool2d(F.relu( self.conv1(x)), kernel_size=3,stride=2), 4,alpha=0.001/9.0,beta=0.75)
+        # 第二层
+        x = F.local_response_norm(F.max_pool2d(F.relu( self.conv2(x)), kernel_size=3,stride=2), 4,alpha=0.001/9.0,beta=0.75)
+        # 第三层
+        x = F.relu(self.conv3(x))
+        # 第四层
+        x = F.relu(self.conv4(x))
+        # 第五层
+        x = F.max_pool2d( F.relu(self.conv5(x)), (3,3) ,stride = 2)
+        #数据转化成一维向量
+        x = x.view(x.size(0), -1 ) 
+        # 第六层
+        x = F.dropout(F.relu(self.fc1(x)), p= 0.5)
+        # 第七层
+        x = F.dropout(F.relu(self.fc2(x)), p= 0.5)
+        # 第八层
+        x = F.softmax(self.fc3(x))
+
+
+        print(x.size())
+        return x
+
+
+if __name__ == "__main__":
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    img = Image.open('../test.JPEG')
+
+    img = img.resize((227, 227),Image.ANTIALIAS).convert('RGB')
+    np_data = np.array(img)
+    
+    net = AlexNet().to(device)
+    input = torch.from_numpy(np_data).transpose(2,0).unsqueeze(0).float()
+
+    net(input)
diff --git a/sppnet/moudle.py b/sppnet/moudle.py
@@ -0,0 +1,110 @@
+# -*-coding:utf-8-*
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+from PIL import Image
+import numpy as np
+import math
+from functools import reduce
+
+def spatial_pyramid_pool(input,  out_size):
+    num_sample, channel, w, h = input.size()
+
+    for i, tmp_size in enumerate(out_size):
+        w_wid = int(math.ceil(w / tmp_size))
+        h_wid = int(math.ceil(h / tmp_size))
+
+        w_pad = int(math.ceil(w_wid * tmp_size - w + 1)/2)
+        h_pad = int(math.ceil(h_wid * tmp_size - h + 1)/2)
+
+        maxpool = nn.MaxPool2d((w_wid, h_wid), stride=(w_wid, h_wid), padding=(w_pad, h_pad))
+
+        x = maxpool(input)
+        if(i == 0):
+            spp = x.view(num_sample, -1)
+        else:
+            spp = torch.cat((spp, x.view(num_sample, -1) ), 1)
+    return spp
+
+class SPPNet(nn.Module):
+    
+    def __init__(self):
+        super(SPPNet, self).__init__()
+        self.output_num = (4,2,1)
+
+        #第一层
+        self.layer1 = nn.Sequential(
+            nn.Conv2d(3, 96, (7,7), stride=2),  # 227 * 227 * 3 -> 110 * 110* 96
+            nn.ReLU(), #原书缺失此步骤
+            nn.MaxPool2d((3,3), 2, padding= 1) , # 110 * 110* 96 -> 55 * 55 * 96  
+            nn.LocalResponseNorm(5)  #原书缺失此步骤
+        )
+
+        #第二层
+        self.layer2 = nn.Sequential(
+            nn.Conv2d(96, 256,  (5, 5), stride=2), # 55 * 55 * 96  -> 26 * 26 * 256
+            nn.ReLU(), #原书缺失此步骤
+            nn.MaxPool2d((3,3), 2, padding= 1), # 26 * 26 * 256 -> 13 * 13 * 256
+            nn.LocalResponseNorm(5)  #原书缺失此步骤
+        )
+
+        self.layer3 = nn.Sequential(
+            nn.Conv2d(256, 384, (3, 3), stride= 1, padding=1),  # 13 * 13 * 256 -> 13 * 13 * 384
+            nn.ReLU(), #原书缺失此步骤
+        )
+
+        self.layer4 = nn.Sequential(
+            nn.Conv2d(384, 384, (3, 3), stride= 1 , padding=1),  # 13 * 13 * 384 -> 13 * 13 * 384
+            nn.ReLU(), #原书缺失此步骤
+        )
+
+        self.layer5 = nn.Sequential(
+            nn.Conv2d(384, 256,  (3, 3), stride= 1, padding=1), # 13 * 13 * 384 -> 13 * 13 * 256
+            nn.ReLU(), #原书缺失此步骤
+            nn.MaxPool2d((3, 3), 2) #  13 * 13 * 256 -> 6*6*256
+        )
+
+        # self.layer6 = nn.Sequential(
+        #     nn.Linear(6*6*256, 4096),
+        #     nn.ReLU(), #原书缺失此步骤
+        # )
+
+        self.layer7 = nn.Sequential(
+            nn.Linear(reduce(lambda x,y: x + y * y * 256,  self.output_num, 0), 4096),
+            nn.ReLU(), #原书缺失此步骤
+        )
+
+        self.layer8 = nn.Sequential(
+            nn.Linear(4096, 1000),
+            nn.Softmax(dim=1)
+        )
+
+    def forward(self, x):
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.layer5(x)
+        x = spatial_pyramid_pool(x, self.output_num)
+
+        x = x.view(x.size(0), -1)
+        x = self.layer7(x)
+        x = self.layer8(x)
+        return x
+
+
+
+if __name__ == "__main__":
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    img = Image.open('../test.JPEG')
+
+    #img = img.resize((227, 227),Image.ANTIALIAS).convert('RGB')
+    img =  img .convert('RGB')
+    np_data = np.array(img)
+    
+    net = SPPNet().to(device)
+    input = torch.from_numpy(np_data).transpose(2,0).unsqueeze(0).float()
+
+    net(input)
diff --git a/test.JPEG b/test.JPEG
diff --git a/zfnet/moudle.py b/zfnet/moudle.py
@@ -0,0 +1,88 @@
+#-*-coding:utf-8-*
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision
+from PIL import Image
+import numpy as np
+
+# 参考https://github.com/amir-saniyan/ZFNet/blob/master/zfnet.py
+
+class ZFNet(nn.Module):
+
+    def __init__(self):
+        super(ZFNet, self).__init__()
+
+        #第一层
+        self.layer1 = nn.Sequential(
+            nn.Conv2d(3, 96, (7,7), stride=2),  # 227 * 227 * 3 -> 110 * 110* 96
+            nn.ReLU(), #原书缺失此步骤
+            nn.MaxPool2d((3,3), 2, padding= 1) , # 110 * 110* 96 -> 55 * 55 * 96  
+            nn.LocalResponseNorm(5)  #原书缺失此步骤
+        )
+
+        #第二层
+        self.layer2 = nn.Sequential(
+            nn.Conv2d(96, 256,  (5, 5), stride=2), # 55 * 55 * 96  -> 26 * 26 * 256
+            nn.ReLU(), #原书缺失此步骤
+            nn.MaxPool2d((3,3), 2, padding= 1), # 26 * 26 * 256 -> 13 * 13 * 256
+            nn.LocalResponseNorm(5)  #原书缺失此步骤
+        )
+
+        self.layer3 = nn.Sequential(
+            nn.Conv2d(256, 384, (3, 3), stride= 1, padding=1),  # 13 * 13 * 256 -> 13 * 13 * 384
+            nn.ReLU(), #原书缺失此步骤
+        )
+
+        self.layer4 = nn.Sequential(
+            nn.Conv2d(384, 384, (3, 3), stride= 1 , padding=1),  # 13 * 13 * 384 -> 13 * 13 * 384
+            nn.ReLU(), #原书缺失此步骤
+        )
+
+        self.layer5 = nn.Sequential(
+            nn.Conv2d(384, 256,  (3, 3), stride= 1, padding=1), # 13 * 13 * 384 -> 13 * 13 * 256
+            nn.ReLU(), #原书缺失此步骤
+            nn.MaxPool2d((3, 3), 2) #  13 * 13 * 256 -> 6*6*256
+        )
+
+        self.layer6 = nn.Sequential(
+            nn.Linear(6*6*256, 4096),
+            nn.ReLU(), #原书缺失此步骤
+        )
+
+        self.layer7 = nn.Sequential(
+            nn.Linear(4096, 4096),
+            nn.ReLU(), #原书缺失此步骤
+        )
+
+        self.layer8 = nn.Sequential(
+            nn.Linear(4096, 1000),
+            nn.Softmax(dim=1)
+        )
+
+    def forward(self, x):
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.layer5(x)
+        x = x.view(x.size(0), -1)
+        x = self.layer6(x)
+        x = self.layer7(x)
+        x = self.layer8(x)
+        return x
+
+
+
+if __name__ == "__main__":
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    img = Image.open('../test.JPEG')
+
+    img = img.resize((227, 227),Image.ANTIALIAS).convert('RGB')
+    np_data = np.array(img)
+    
+    net = ZFNet().to(device)
+    input = torch.from_numpy(np_data).transpose(2,0).unsqueeze(0).float()
+
+    net(input)