From 8e218edf2381886f3e857b4c67b4c5fc287ff40b Mon Sep 17 00:00:00 2001
From: Rosco <rosco_pecoltran@msn.com>
Date: Thu, 9 Feb 2017 19:08:44 +0100
Subject: [PATCH] remove original code

---
 CMakeLists.txt                                |   65 -
 LICENSE                                       |   44 -
 README.md                                     |   94 --
 cmake/Misc.cmake                              |   52 -
 cmake/Modules/FindAtlas.cmake                 |   52 -
 cmake/Modules/FindGFlags.cmake                |   50 -
 cmake/Modules/FindOpenBLAS.cmake              |   64 -
 cmake/Modules/FindvecLib.cmake                |   42 -
 cmake/ProtoBuf.cmake                          |   90 --
 cmake/Targets.cmake                           |  174 --
 cmake/Utils.cmake                             |  382 -----
 .../CaffeSimple.xcodeproj/project.pbxproj     |  404 -----
 examples/ios/simple/CaffeSimple/AppDelegate.h |   17 -
 .../ios/simple/CaffeSimple/AppDelegate.mm     |   52 -
 .../AppIcon.appiconset/Contents.json          |   48 -
 .../Base.lproj/LaunchScreen.storyboard        |   27 -
 .../CaffeSimple/Base.lproj/Main.storyboard    |   60 -
 examples/ios/simple/CaffeSimple/ImageReader.h |   20 -
 .../ios/simple/CaffeSimple/ImageReader.mm     |  133 --
 examples/ios/simple/CaffeSimple/Info.plist    |   38 -
 .../ios/simple/CaffeSimple/ViewController.h   |   21 -
 .../ios/simple/CaffeSimple/ViewController.mm  |   88 --
 examples/ios/simple/CaffeSimple/data/61.png   |  Bin 287 -> 0 bytes
 .../simple/CaffeSimple/data/test_image.jpg    |  Bin 5330 -> 0 bytes
 .../simple/CaffeSimple/data/test_image.png    |  Bin 287 -> 0 bytes
 examples/ios/simple/CaffeSimple/main.m        |   16 -
 examples/mnist/test_lenet.sh                  |    3 -
 include/caffe/blob.hpp                        |  282 ----
 include/caffe/caffe.hpp                       |   25 -
 include/caffe/common.hpp                      |  209 ---
 include/caffe/data_transformer.hpp            |  154 --
 include/caffe/filler.hpp                      |  295 ----
 include/caffe/glog_wrapper.hpp                |   81 -
 include/caffe/layer.hpp                       |  477 ------
 include/caffe/layer_factory.hpp               |  141 --
 include/caffe/layers/base_conv_layer.hpp      |  174 --
 include/caffe/layers/base_data_layer.hpp      |   88 --
 include/caffe/layers/bnll_layer.hpp           |   70 -
 include/caffe/layers/concat_layer.hpp         |   87 -
 include/caffe/layers/conv_layer.hpp           |   84 -
 include/caffe/layers/dropout_layer.hpp        |   80 -
 include/caffe/layers/eltwise_layer.hpp        |   51 -
 include/caffe/layers/flatten_layer.hpp        |   61 -
 include/caffe/layers/im2col_layer.hpp         |   65 -
 include/caffe/layers/inner_product_layer.hpp  |   52 -
 include/caffe/layers/input_layer.hpp          |   44 -
 include/caffe/layers/loss_layer.hpp           |   53 -
 include/caffe/layers/lrn_layer.hpp            |   94 --
 include/caffe/layers/memory_data_layer.hpp    |   63 -
 include/caffe/layers/neuron_layer.hpp         |   32 -
 include/caffe/layers/pooling_layer.hpp        |   60 -
 include/caffe/layers/power_layer.hpp          |   89 --
 include/caffe/layers/relu_layer.hpp           |   85 -
 include/caffe/layers/sigmoid_layer.hpp        |   71 -
 include/caffe/layers/softmax_layer.hpp        |   50 -
 include/caffe/layers/softmax_loss_layer.hpp   |  130 --
 include/caffe/layers/split_layer.hpp          |   45 -
 include/caffe/layers/tanh_layer.hpp           |   73 -
 include/caffe/net.hpp                         |  351 -----
 include/caffe/syncedmem.hpp                   |   95 --
 include/caffe/util/benchmark.hpp              |   61 -
 include/caffe/util/blocking_queue.hpp         |   45 -
 include/caffe/util/cudnn.hpp                  |  153 --
 include/caffe/util/device_alternate.hpp       |   96 --
 include/caffe/util/format.hpp                 |   18 -
 include/caffe/util/im2col.hpp                 |   60 -
 include/caffe/util/insert_splits.hpp          |   26 -
 include/caffe/util/io.hpp                     |  156 --
 include/caffe/util/math_functions.hpp         |  283 ----
 include/caffe/util/mkl_alternate.hpp          |  102 --
 include/caffe/util/rng.hpp                    |   60 -
 include/caffe/util/upgrade_proto.hpp          |   88 --
 patch/diff.py                                 |   23 -
 patch/include/caffe/caffe.hpp.patch           |   17 -
 patch/include/caffe/common.hpp.patch          |   52 -
 .../caffe/layers/base_data_layer.hpp.patch    |   28 -
 patch/include/caffe/net.hpp.patch             |   45 -
 patch/include/caffe/util/benchmark.hpp.patch  |   28 -
 patch/include/caffe/util/io.hpp.patch         |   28 -
 .../caffe/util/math_functions.hpp.patch       |   15 -
 patch/include/caffe/util/rng.hpp.patch        |   53 -
 patch/patch.py                                |   25 -
 patch/src/caffe/common.cpp.patch              |   66 -
 .../caffe/layers/base_data_layer.cpp.patch    |   36 -
 patch/src/caffe/net.cpp.patch                 |  157 --
 patch/src/caffe/util/benchmark.cpp.patch      |  111 --
 patch/src/caffe/util/io.cpp.patch             |   12 -
 patch/src/caffe/util/math_functions.cpp.patch |  104 --
 patch/synced_version                          |    1 -
 patch/tools/caffe.cpp.patch                   |  157 --
 src/caffe/CMakeLists.txt                      |   19 -
 src/caffe/blob.cpp                            |  561 -------
 src/caffe/common.cpp                          |  353 -----
 src/caffe/data_transformer.cpp                |  545 -------
 src/caffe/layer_factory.cpp                   |  267 ----
 src/caffe/layers/base_conv_layer.cpp          |  396 -----
 src/caffe/layers/base_data_layer.cpp          |  144 --
 src/caffe/layers/bnll_layer.cpp               |   47 -
 src/caffe/layers/concat_layer.cpp             |  104 --
 src/caffe/layers/conv_layer.cpp               |   81 -
 src/caffe/layers/dropout_layer.cpp            |   75 -
 src/caffe/layers/eltwise_layer.cpp            |  160 --
 src/caffe/layers/flatten_layer.cpp            |   44 -
 src/caffe/layers/im2col_layer.cpp             |  193 ---
 src/caffe/layers/inner_product_layer.cpp      |  150 --
 src/caffe/layers/input_layer.cpp              |   27 -
 src/caffe/layers/loss_layer.cpp               |   27 -
 src/caffe/layers/lrn_layer.cpp                |  257 ---
 src/caffe/layers/memory_data_layer.cpp        |  123 --
 src/caffe/layers/neuron_layer.cpp             |   15 -
 src/caffe/layers/pooling_layer.cpp            |  316 ----
 src/caffe/layers/power_layer.cpp              |  102 --
 src/caffe/layers/relu_layer.cpp               |   45 -
 src/caffe/layers/sigmoid_layer.cpp            |   47 -
 src/caffe/layers/softmax_layer.cpp            |   95 --
 src/caffe/layers/softmax_loss_layer.cpp       |  158 --
 src/caffe/layers/split_layer.cpp              |   59 -
 src/caffe/layers/tanh_layer.cpp               |   44 -
 src/caffe/net.cpp                             | 1012 ------------
 src/caffe/proto/caffe.proto                   | 1404 -----------------
 src/caffe/syncedmem.cpp                       |  186 ---
 src/caffe/util/benchmark.cpp                  |  207 ---
 src/caffe/util/im2col.cpp                     |  234 ---
 src/caffe/util/insert_splits.cpp              |  126 --
 src/caffe/util/io.cpp                         |  241 ---
 src/caffe/util/math_functions.cpp             |  418 -----
 src/caffe/util/upgrade_proto.cpp              | 1106 -------------
 third_party/android-cmake                     |    1 -
 third_party/build-openblas.sh                 |  119 --
 third_party/build-protobuf-3.1.0.sh           |  184 ---
 third_party/ios-cmake                         |    1 -
 tools/CMakeLists.txt                          |   35 -
 tools/caffe.cpp                               |  488 ------
 133 files changed, 18169 deletions(-)
 delete mode 100644 CMakeLists.txt
 delete mode 100644 LICENSE
 delete mode 100644 README.md
 delete mode 100644 cmake/Misc.cmake
 delete mode 100644 cmake/Modules/FindAtlas.cmake
 delete mode 100644 cmake/Modules/FindGFlags.cmake
 delete mode 100644 cmake/Modules/FindOpenBLAS.cmake
 delete mode 100644 cmake/Modules/FindvecLib.cmake
 delete mode 100644 cmake/ProtoBuf.cmake
 delete mode 100644 cmake/Targets.cmake
 delete mode 100644 cmake/Utils.cmake
 delete mode 100644 examples/ios/simple/CaffeSimple.xcodeproj/project.pbxproj
 delete mode 100644 examples/ios/simple/CaffeSimple/AppDelegate.h
 delete mode 100644 examples/ios/simple/CaffeSimple/AppDelegate.mm
 delete mode 100644 examples/ios/simple/CaffeSimple/Assets.xcassets/AppIcon.appiconset/Contents.json
 delete mode 100644 examples/ios/simple/CaffeSimple/Base.lproj/LaunchScreen.storyboard
 delete mode 100644 examples/ios/simple/CaffeSimple/Base.lproj/Main.storyboard
 delete mode 100644 examples/ios/simple/CaffeSimple/ImageReader.h
 delete mode 100644 examples/ios/simple/CaffeSimple/ImageReader.mm
 delete mode 100644 examples/ios/simple/CaffeSimple/Info.plist
 delete mode 100644 examples/ios/simple/CaffeSimple/ViewController.h
 delete mode 100644 examples/ios/simple/CaffeSimple/ViewController.mm
 delete mode 100644 examples/ios/simple/CaffeSimple/data/61.png
 delete mode 100644 examples/ios/simple/CaffeSimple/data/test_image.jpg
 delete mode 100644 examples/ios/simple/CaffeSimple/data/test_image.png
 delete mode 100644 examples/ios/simple/CaffeSimple/main.m
 delete mode 100644 examples/mnist/test_lenet.sh
 delete mode 100644 include/caffe/blob.hpp
 delete mode 100644 include/caffe/caffe.hpp
 delete mode 100644 include/caffe/common.hpp
 delete mode 100644 include/caffe/data_transformer.hpp
 delete mode 100644 include/caffe/filler.hpp
 delete mode 100644 include/caffe/glog_wrapper.hpp
 delete mode 100644 include/caffe/layer.hpp
 delete mode 100644 include/caffe/layer_factory.hpp
 delete mode 100644 include/caffe/layers/base_conv_layer.hpp
 delete mode 100644 include/caffe/layers/base_data_layer.hpp
 delete mode 100644 include/caffe/layers/bnll_layer.hpp
 delete mode 100644 include/caffe/layers/concat_layer.hpp
 delete mode 100644 include/caffe/layers/conv_layer.hpp
 delete mode 100644 include/caffe/layers/dropout_layer.hpp
 delete mode 100644 include/caffe/layers/eltwise_layer.hpp
 delete mode 100644 include/caffe/layers/flatten_layer.hpp
 delete mode 100644 include/caffe/layers/im2col_layer.hpp
 delete mode 100644 include/caffe/layers/inner_product_layer.hpp
 delete mode 100644 include/caffe/layers/input_layer.hpp
 delete mode 100644 include/caffe/layers/loss_layer.hpp
 delete mode 100644 include/caffe/layers/lrn_layer.hpp
 delete mode 100644 include/caffe/layers/memory_data_layer.hpp
 delete mode 100644 include/caffe/layers/neuron_layer.hpp
 delete mode 100644 include/caffe/layers/pooling_layer.hpp
 delete mode 100644 include/caffe/layers/power_layer.hpp
 delete mode 100644 include/caffe/layers/relu_layer.hpp
 delete mode 100644 include/caffe/layers/sigmoid_layer.hpp
 delete mode 100644 include/caffe/layers/softmax_layer.hpp
 delete mode 100644 include/caffe/layers/softmax_loss_layer.hpp
 delete mode 100644 include/caffe/layers/split_layer.hpp
 delete mode 100644 include/caffe/layers/tanh_layer.hpp
 delete mode 100644 include/caffe/net.hpp
 delete mode 100644 include/caffe/syncedmem.hpp
 delete mode 100644 include/caffe/util/benchmark.hpp
 delete mode 100644 include/caffe/util/blocking_queue.hpp
 delete mode 100644 include/caffe/util/cudnn.hpp
 delete mode 100644 include/caffe/util/device_alternate.hpp
 delete mode 100644 include/caffe/util/format.hpp
 delete mode 100644 include/caffe/util/im2col.hpp
 delete mode 100644 include/caffe/util/insert_splits.hpp
 delete mode 100644 include/caffe/util/io.hpp
 delete mode 100644 include/caffe/util/math_functions.hpp
 delete mode 100644 include/caffe/util/mkl_alternate.hpp
 delete mode 100644 include/caffe/util/rng.hpp
 delete mode 100644 include/caffe/util/upgrade_proto.hpp
 delete mode 100755 patch/diff.py
 delete mode 100644 patch/include/caffe/caffe.hpp.patch
 delete mode 100644 patch/include/caffe/common.hpp.patch
 delete mode 100644 patch/include/caffe/layers/base_data_layer.hpp.patch
 delete mode 100644 patch/include/caffe/net.hpp.patch
 delete mode 100644 patch/include/caffe/util/benchmark.hpp.patch
 delete mode 100644 patch/include/caffe/util/io.hpp.patch
 delete mode 100644 patch/include/caffe/util/math_functions.hpp.patch
 delete mode 100644 patch/include/caffe/util/rng.hpp.patch
 delete mode 100755 patch/patch.py
 delete mode 100644 patch/src/caffe/common.cpp.patch
 delete mode 100644 patch/src/caffe/layers/base_data_layer.cpp.patch
 delete mode 100644 patch/src/caffe/net.cpp.patch
 delete mode 100644 patch/src/caffe/util/benchmark.cpp.patch
 delete mode 100644 patch/src/caffe/util/io.cpp.patch
 delete mode 100644 patch/src/caffe/util/math_functions.cpp.patch
 delete mode 100644 patch/synced_version
 delete mode 100644 patch/tools/caffe.cpp.patch
 delete mode 100644 src/caffe/CMakeLists.txt
 delete mode 100644 src/caffe/blob.cpp
 delete mode 100644 src/caffe/common.cpp
 delete mode 100644 src/caffe/data_transformer.cpp
 delete mode 100644 src/caffe/layer_factory.cpp
 delete mode 100644 src/caffe/layers/base_conv_layer.cpp
 delete mode 100644 src/caffe/layers/base_data_layer.cpp
 delete mode 100644 src/caffe/layers/bnll_layer.cpp
 delete mode 100644 src/caffe/layers/concat_layer.cpp
 delete mode 100644 src/caffe/layers/conv_layer.cpp
 delete mode 100644 src/caffe/layers/dropout_layer.cpp
 delete mode 100644 src/caffe/layers/eltwise_layer.cpp
 delete mode 100644 src/caffe/layers/flatten_layer.cpp
 delete mode 100644 src/caffe/layers/im2col_layer.cpp
 delete mode 100644 src/caffe/layers/inner_product_layer.cpp
 delete mode 100644 src/caffe/layers/input_layer.cpp
 delete mode 100644 src/caffe/layers/loss_layer.cpp
 delete mode 100644 src/caffe/layers/lrn_layer.cpp
 delete mode 100644 src/caffe/layers/memory_data_layer.cpp
 delete mode 100644 src/caffe/layers/neuron_layer.cpp
 delete mode 100644 src/caffe/layers/pooling_layer.cpp
 delete mode 100644 src/caffe/layers/power_layer.cpp
 delete mode 100644 src/caffe/layers/relu_layer.cpp
 delete mode 100644 src/caffe/layers/sigmoid_layer.cpp
 delete mode 100644 src/caffe/layers/softmax_layer.cpp
 delete mode 100644 src/caffe/layers/softmax_loss_layer.cpp
 delete mode 100644 src/caffe/layers/split_layer.cpp
 delete mode 100644 src/caffe/layers/tanh_layer.cpp
 delete mode 100644 src/caffe/net.cpp
 delete mode 100644 src/caffe/proto/caffe.proto
 delete mode 100644 src/caffe/syncedmem.cpp
 delete mode 100644 src/caffe/util/benchmark.cpp
 delete mode 100644 src/caffe/util/im2col.cpp
 delete mode 100644 src/caffe/util/insert_splits.cpp
 delete mode 100644 src/caffe/util/io.cpp
 delete mode 100644 src/caffe/util/math_functions.cpp
 delete mode 100644 src/caffe/util/upgrade_proto.cpp
 delete mode 160000 third_party/android-cmake
 delete mode 100755 third_party/build-openblas.sh
 delete mode 100755 third_party/build-protobuf-3.1.0.sh
 delete mode 160000 third_party/ios-cmake
 delete mode 100644 tools/CMakeLists.txt
 delete mode 100644 tools/caffe.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
deleted file mode 100644
index a4920c0..0000000
--- a/CMakeLists.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-cmake_minimum_required(VERSION 2.8.7)
-if(POLICY CMP0046)
-    cmake_policy(SET CMP0046 NEW)
-endif()
-if(POLICY CMP0054)
-    cmake_policy(SET CMP0054 NEW)
-endif()
-
-set(CAFFE_INCLUDE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include)
-include_directories(${CAFFE_INCLUDE_DIRS})
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
-
-#add_definitions(-DUSE_EIGEN)
-add_definitions(-DCPU_ONLY)
-
-include(./cmake/Utils.cmake)
-include(./cmake/Misc.cmake)
-include(./cmake/Targets.cmake)
-
-# Set CXX_FLAGS for different platform
-if(NOT MSVC)
-    SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -std=c++0x -Wno-sign-compare")
-    SET(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++0x -Wno-sign-compare")
-    if(IOS)
-        set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fembed-bitcode")
-        set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fembed-bitcode")
-    endif()
-endif()
-
-# Set search path for cross compile
-if(THIRD_PARTY)
-    set(SAVE_PATH ${CMAKE_FIND_ROOT_PATH})
-    set(CMAKE_FIND_ROOT_PATH ${CMAKE_SOURCE_DIR}/third_party/protobuf ${CMAKE_SOURCE_DIR}/third_party/OpenBLAS)
-    include(./cmake/ProtoBuf.cmake)
-    include(./cmake/Modules/FindOpenBLAS.cmake)
-    set(CMAKE_FIND_ROOT_PATH ${SAVE_PATH})
-else()
-    include(./cmake/ProtoBuf.cmake)
-    include(./cmake/Modules/FindOpenBLAS.cmake)
-endif()
-include(./cmake/Modules/FindvecLib.cmake)
-include(./cmake/Modules/FindAtlas.cmake)
-# Select blas lib for different platform
-# APPLE only, 1st
-if(VECLIB_FOUND)
-    include_directories(${vecLib_INCLUDE_DIR})
-    list(APPEND Caffe_LINKER_LIBS ${vecLib_LINKER_LIBS})
-elseif(OpenBLAS_FOUND)
-    include_directories(${OpenBLAS_INCLUDE_DIR})
-    list(APPEND Caffe_LINKER_LIBS ${OpenBLAS_LIB})
-elseif(ATLAS_FOUND)
-    include_directories(${Atlas_CBLAS_INCLUDE_DIR})
-    list(APPEND Caffe_LINKER_LIBS ${Atlas_CBLAS_LIBRARY})
-else()
-    MESSAGE(FATAL_ERROR "BLAS (VecLib/OpenBLAS/Atlas) library not found.")
-endif()
-
-caffe_set_caffe_link()
-
-add_subdirectory(src/caffe)
-
-if(TOOLS)
-    add_subdirectory(tools)
-endif()
-
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 0c99adc..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,44 +0,0 @@
-COPYRIGHT
-
-All contributions by the University of California:
-Copyright (c) 2014-2017 The Regents of the University of California (Regents)
-All rights reserved.
-
-All other contributions:
-Copyright (c) 2014-2017, the respective contributors
-All rights reserved.
-
-Caffe uses a shared copyright model: each contributor holds copyright over
-their contributions to Caffe. The project versioning records all such
-contribution and copyright details. If a contributor wants to further mark
-their specific copyright on a particular contribution, they should indicate
-their copyright solely in the commit message of the change when it is
-committed.
-
-LICENSE
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met: 
-
-1. Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer. 
-2. Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution. 
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-CONTRIBUTION AGREEMENT
-
-By contributing to the BVLC/caffe repository through pull-request, comment,
-or otherwise, the contributor releases their content to the
-license and copyright terms herein.
diff --git a/README.md b/README.md
deleted file mode 100644
index 99e4d14..0000000
--- a/README.md
+++ /dev/null
@@ -1,94 +0,0 @@
-Caffe on Mobile Devices
-==================
-
-iOS/Android Caffe lib with demo APP (CPU_ONLY, NO_BACKWARD, NO_BOOST, NO_HDF5, NO_LEVELDB)
-
-# For iPhone Simulator
-
-## Step 1: Build Caffe-Mobile Lib with cmake
-
-```
-$ git clone --recursive https://github.com/solrex/caffe-mobile.git
-$ cd caffe-mobile/third_party
-$ ./build-protobuf-3.1.0.sh iPhoneSimulator
-$ mkdir ../build
-$ cd ../build
-$ cmake .. -DCMAKE_TOOLCHAIN_FILE=../third_party/ios-cmake/toolchain/iOS.cmake \
-  -DIOS_PLATFORM=SIMULATOR -DTHIRD_PARTY=1
-$ make -j 4
-```
-
-## Step 2: Build iOS App: CaffeSimple with Xcode
-
- - For CaffeSimple to run, you need a pre-trained LeNet on MNIST caffe model and the weight file.
-Follow the instructions in [Training LeNet on MNIST with Caffe](http://caffe.berkeleyvision.org/gathered/examples/mnist.html) to train your LeNet Model on MNIST. Then copy the model file `caffe/examples/mnist/lenet.prototxt` and the trained weight file `caffe/examples/mnist/lenet_iter_10000.caffemodel` to CaffeSimple app directory.
-
-```
-$ cp $CAFFE/examples/mnist/lenet.prototxt \
-     $CAFFE_MOBILE/examples/ios/simple/CaffeSimple/data/net.prototxt
-$ cp $CAFFE/examples/mnist/lenet_iter_10000.caffemodel \
-     $CAFFE_MOBILE/examples/ios/simple/CaffeSimple/data/weight.caffemodel
-```
-
- - Load the Xcode project inside the `$CAFFE_MOBILE/examples/ios/simple/` folder, and press Command-R to build and run it on the simulator.
-
-# For iPhone
-
-## Step 1: Build Caffe-Mobile Lib with cmake
-
-```
-$ git clone --recursive https://github.com/solrex/caffe-mobile.git
-$ cd caffe-mobile/third_party
-$ ./build-protobuf-3.1.0.sh iPhoneOS
-$ mkdir ../build
-$ cd ../build
-$ cmake .. -DCMAKE_TOOLCHAIN_FILE=../third_party/ios-cmake/toolchain/iOS.cmake \
-  -DIOS_PLATFORM=OS -DTHIRD_PARTY=1
-$ make -j 4
-```
-
-## Step 2: Build iOS App: CaffeSimple with Xcode
-
- - For CaffeSimple to run, you need a pre-trained LeNet on MNIST caffe model and the weight file.
-Follow the instructions in [Training LeNet on MNIST with Caffe](http://caffe.berkeleyvision.org/gathered/examples/mnist.html) to train your LeNet Model on MNIST. Then copy the model file `caffe/examples/mnist/lenet.prototxt` and the trained weight file `caffe/examples/mnist/lenet_iter_10000.caffemodel` to CaffeSimple app directory.
-
-```
-$ cp $CAFFE/examples/mnist/lenet.prototxt $CAFFE_MOBILE/examples/ios/simple/CaffeSimple/data/
-$ cp $CAFFE/examples/mnist/lenet_iter_10000.caffemodel $CAFFE_MOBILE/examples/ios/simple/CaffeSimple/data/
-```
-
- - Load the Xcode project inside the `$CAFFE_MOBILE/examples/ios/simple/` folder, and press Command-R to build and run it on your connected device.
-
-# For MacOSX & Ubuntu
-
-## Step 1: Install dependency
-
-```
-$ brew install protobuf # MacOSX
-$ sudo apt install libprotobuf-dev protobuf-compiler libatlas-dev # Ubuntu
-```
-
-## Step 2: Build Caffe-Mobile Lib with cmake
-
-```
-$ git clone --recursive https://github.com/solrex/caffe-mobile.git
-$ mkdir build
-$ cd ../build
-$ cmake ..
-$ make -j 4
-```
-
-## Step 3: Build Caffe-bin with cmake
-
-```
-$ brew install gflags
-$ cmake .. -DTOOLS
-$ make -j 4
-```
-
-# Thanks
-
- - Based on https://github.com/BVLC/caffe
- - Inspired by https://github.com/chyh1990/caffe-compact
- - Use https://github.com/Yangqing/ios-cmake
- - Use https://github.com/taka-no-me/android-cmake
diff --git a/cmake/Misc.cmake b/cmake/Misc.cmake
deleted file mode 100644
index 9dd2609..0000000
--- a/cmake/Misc.cmake
+++ /dev/null
@@ -1,52 +0,0 @@
-# ---[ Configuration types
-set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "Possible configurations" FORCE)
-mark_as_advanced(CMAKE_CONFIGURATION_TYPES)
-
-if(DEFINED CMAKE_BUILD_TYPE)
-  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS ${CMAKE_CONFIGURATION_TYPES})
-endif()
-
-# --[ If user doesn't specify build type then assume release
-if("${CMAKE_BUILD_TYPE}" STREQUAL "")
-  set(CMAKE_BUILD_TYPE Release)
-endif()
-
-if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
-  set(CMAKE_COMPILER_IS_CLANGXX TRUE)
-endif()
-
-# ---[ Solution folders
-caffe_option(USE_PROJECT_FOLDERS "IDE Solution folders" (MSVC_IDE OR CMAKE_GENERATOR MATCHES Xcode) )
-
-if(USE_PROJECT_FOLDERS)
-  set_property(GLOBAL PROPERTY USE_FOLDERS ON)
-  set_property(GLOBAL PROPERTY PREDEFINED_TARGETS_FOLDER "CMakeTargets")
-endif()
-
-# ---[ Install options
-if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
-  set(CMAKE_INSTALL_PREFIX "${PROJECT_BINARY_DIR}/install" CACHE PATH "Default install path" FORCE)
-endif()
-
-# ---[ RPATH settings
-set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE CACHE BOOLEAN "Use link paths for shared library rpath")
-set(CMAKE_MACOSX_RPATH TRUE)
-
-list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES ${CMAKE_INSTALL_PREFIX}/lib __is_systtem_dir)
-if(${__is_systtem_dir} STREQUAL -1)
-  set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_PREFIX}/lib)
-endif()
-
-# ---[ Funny target
-if(UNIX OR APPLE)
-  add_custom_target(symlink_to_build COMMAND "ln" "-sf" "${PROJECT_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/build"
-                                     COMMENT "Adding symlink: <caffe_root>/build -> ${PROJECT_BINARY_DIR}" )
-endif()
-
-# ---[ Set debug postfix
-set(Caffe_DEBUG_POSTFIX "-d")
-
-set(Caffe_POSTFIX "")
-if(CMAKE_BUILD_TYPE MATCHES "Debug")
-  set(Caffe_POSTFIX ${Caffe_DEBUG_POSTFIX})
-endif()
diff --git a/cmake/Modules/FindAtlas.cmake b/cmake/Modules/FindAtlas.cmake
deleted file mode 100644
index 9c665a4..0000000
--- a/cmake/Modules/FindAtlas.cmake
+++ /dev/null
@@ -1,52 +0,0 @@
-# Find the Atlas (and Lapack) libraries
-#
-# The following variables are optionally searched for defaults
-#  Atlas_ROOT_DIR:            Base directory where all Atlas components are found
-#
-# The following are set after configuration is done:
-#  Atlas_FOUND
-#  Atlas_INCLUDE_DIRS
-#  Atlas_LIBRARIES
-#  Atlas_LIBRARYRARY_DIRS
-
-set(Atlas_INCLUDE_SEARCH_PATHS
-  /usr/include/atlas
-  /usr/include/atlas-base
-  $ENV{Atlas_ROOT_DIR}
-  $ENV{Atlas_ROOT_DIR}/include
-)
-
-set(Atlas_LIB_SEARCH_PATHS
-  /usr/lib/atlas
-  /usr/lib/atlas-base
-  $ENV{Atlas_ROOT_DIR}
-  $ENV{Atlas_ROOT_DIR}/lib
-)
-
-find_path(Atlas_CBLAS_INCLUDE_DIR   NAMES cblas.h   PATHS ${Atlas_INCLUDE_SEARCH_PATHS})
-find_path(Atlas_CLAPACK_INCLUDE_DIR NAMES clapack.h PATHS ${Atlas_INCLUDE_SEARCH_PATHS})
-
-find_library(Atlas_CBLAS_LIBRARY NAMES  ptcblas_r ptcblas cblas_r cblas       PATHS ${Atlas_LIB_SEARCH_PATHS})
-find_library(Atlas_BLAS_LIBRARY NAMES   atlas_r   atlas                       PATHS ${Atlas_LIB_SEARCH_PATHS})
-find_library(Atlas_LAPACK_LIBRARY NAMES lapack alapack_r alapack lapack_atlas PATHS ${Atlas_LIB_SEARCH_PATHS})
-
-set(LOOKED_FOR
-  Atlas_CBLAS_INCLUDE_DIR
-  Atlas_CLAPACK_INCLUDE_DIR
-
-  Atlas_CBLAS_LIBRARY
-  Atlas_BLAS_LIBRARY
-  Atlas_LAPACK_LIBRARY
-)
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(Atlas DEFAULT_MSG ${LOOKED_FOR})
-
-if(ATLAS_FOUND)
-  set(Atlas_INCLUDE_DIR ${Atlas_CBLAS_INCLUDE_DIR} ${Atlas_CLAPACK_INCLUDE_DIR})
-  set(Atlas_LIBRARIES ${Atlas_LAPACK_LIBRARY} ${Atlas_CBLAS_LIBRARY} ${Atlas_BLAS_LIBRARY})
-  mark_as_advanced(${LOOKED_FOR})
-
-  message(STATUS "Found Atlas (include: ${Atlas_CBLAS_INCLUDE_DIR}, library: ${Atlas_BLAS_LIBRARY})")
-endif(ATLAS_FOUND)
-
diff --git a/cmake/Modules/FindGFlags.cmake b/cmake/Modules/FindGFlags.cmake
deleted file mode 100644
index 29b60f0..0000000
--- a/cmake/Modules/FindGFlags.cmake
+++ /dev/null
@@ -1,50 +0,0 @@
-# - Try to find GFLAGS
-#
-# The following variables are optionally searched for defaults
-#  GFLAGS_ROOT_DIR:            Base directory where all GFLAGS components are found
-#
-# The following are set after configuration is done:
-#  GFLAGS_FOUND
-#  GFLAGS_INCLUDE_DIRS
-#  GFLAGS_LIBRARIES
-#  GFLAGS_LIBRARYRARY_DIRS
-
-include(FindPackageHandleStandardArgs)
-
-set(GFLAGS_ROOT_DIR "" CACHE PATH "Folder contains Gflags")
-
-# We are testing only a couple of files in the include directories
-if(WIN32)
-    find_path(GFLAGS_INCLUDE_DIR gflags/gflags.h
-        PATHS ${GFLAGS_ROOT_DIR}/src/windows)
-else()
-    find_path(GFLAGS_INCLUDE_DIR gflags/gflags.h
-        PATHS ${GFLAGS_ROOT_DIR})
-endif()
-
-if(MSVC)
-    find_library(GFLAGS_LIBRARY_RELEASE
-        NAMES libgflags
-        PATHS ${GFLAGS_ROOT_DIR}
-        PATH_SUFFIXES Release)
-
-    find_library(GFLAGS_LIBRARY_DEBUG
-        NAMES libgflags-debug
-        PATHS ${GFLAGS_ROOT_DIR}
-        PATH_SUFFIXES Debug)
-
-    set(GFLAGS_LIBRARY optimized ${GFLAGS_LIBRARY_RELEASE} debug ${GFLAGS_LIBRARY_DEBUG})
-else()
-    find_library(GFLAGS_LIBRARY gflags)
-endif()
-
-find_package_handle_standard_args(GFlags DEFAULT_MSG GFLAGS_INCLUDE_DIR GFLAGS_LIBRARY)
-
-
-if(GFLAGS_FOUND)
-    set(GFLAGS_INCLUDE_DIRS ${GFLAGS_INCLUDE_DIR})
-    set(GFLAGS_LIBRARIES ${GFLAGS_LIBRARY})
-    message(STATUS "Found gflags  (include: ${GFLAGS_INCLUDE_DIR}, library: ${GFLAGS_LIBRARY})")
-    mark_as_advanced(GFLAGS_LIBRARY_DEBUG GFLAGS_LIBRARY_RELEASE
-                     GFLAGS_LIBRARY GFLAGS_INCLUDE_DIR GFLAGS_ROOT_DIR)
-endif()
diff --git a/cmake/Modules/FindOpenBLAS.cmake b/cmake/Modules/FindOpenBLAS.cmake
deleted file mode 100644
index a6512ae..0000000
--- a/cmake/Modules/FindOpenBLAS.cmake
+++ /dev/null
@@ -1,64 +0,0 @@
-
-
-SET(Open_BLAS_INCLUDE_SEARCH_PATHS
-  /usr/include
-  /usr/include/openblas
-  /usr/include/openblas-base
-  /usr/local/include
-  /usr/local/include/openblas
-  /usr/local/include/openblas-base
-  /opt/OpenBLAS/include
-  $ENV{OpenBLAS_HOME}
-  $ENV{OpenBLAS_HOME}/include
-)
-
-SET(Open_BLAS_LIB_SEARCH_PATHS
-        /lib/
-        /lib/openblas-base
-        /lib64/
-        /usr/lib
-        /usr/lib/openblas-base
-        /usr/lib64
-        /usr/local/lib
-        /usr/local/lib64
-        /opt/OpenBLAS/lib
-        $ENV{OpenBLAS}cd
-        $ENV{OpenBLAS}/lib
-        $ENV{OpenBLAS_HOME}
-        $ENV{OpenBLAS_HOME}/lib
- )
-
-FIND_PATH(OpenBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${Open_BLAS_INCLUDE_SEARCH_PATHS})
-FIND_LIBRARY(OpenBLAS_LIB NAMES openblas PATHS ${Open_BLAS_LIB_SEARCH_PATHS})
-
-SET(OpenBLAS_FOUND ON)
-
-#    Check include files
-IF(NOT OpenBLAS_INCLUDE_DIR)
-    SET(OpenBLAS_FOUND OFF)
-    MESSAGE(STATUS "Could not find OpenBLAS include. Turning OpenBLAS_FOUND off")
-ENDIF()
-
-#    Check libraries
-IF(NOT OpenBLAS_LIB)
-    SET(OpenBLAS_FOUND OFF)
-    MESSAGE(STATUS "Could not find OpenBLAS lib. Turning OpenBLAS_FOUND off")
-ENDIF()
-
-IF (OpenBLAS_FOUND)
-  IF (NOT OpenBLAS_FIND_QUIETLY)
-    MESSAGE(STATUS "Found OpenBLAS libraries: ${OpenBLAS_LIB}")
-    MESSAGE(STATUS "Found OpenBLAS include: ${OpenBLAS_INCLUDE_DIR}")
-  ENDIF (NOT OpenBLAS_FIND_QUIETLY)
-ELSE (OpenBLAS_FOUND)
-  IF (OpenBLAS_FIND_REQUIRED)
-    MESSAGE(FATAL_ERROR "Could not find OpenBLAS")
-  ENDIF (OpenBLAS_FIND_REQUIRED)
-ENDIF (OpenBLAS_FOUND)
-
-MARK_AS_ADVANCED(
-    OpenBLAS_INCLUDE_DIR
-    OpenBLAS_LIB
-    OpenBLAS
-)
-
diff --git a/cmake/Modules/FindvecLib.cmake b/cmake/Modules/FindvecLib.cmake
deleted file mode 100644
index f6c690d..0000000
--- a/cmake/Modules/FindvecLib.cmake
+++ /dev/null
@@ -1,42 +0,0 @@
-# Find the vecLib libraries as part of Accelerate.framework or as standalon framework
-#
-# The following are set after configuration is done:
-#  VECLIB_FOUND
-#  vecLib_INCLUDE_DIR
-#  vecLib_LINKER_LIBS
-
-
-if(NOT APPLE)
-  return()
-endif()
-
-set(__veclib_include_suffix "Frameworks/vecLib.framework/Versions/Current/Headers")
-
-if(IOS)
-	find_path(vecLib_INCLUDE_DIR vecLib.h
-          DOC "vecLib include directory"
-          PATHS ${CMAKE_IOS_SDK_ROOT}/System/Library/Frameworks/Accelerate.framework//Frameworks/vecLib.framework/Headers/
-          NO_DEFAULT_PATH)
-else()
-	find_path(vecLib_INCLUDE_DIR vecLib.h
-          DOC "vecLib include directory"
-          PATHS /System/Library/Frameworks/Accelerate.framework/Versions/Current/${__veclib_include_suffix}
-                /System/Library/${__veclib_include_suffix}
-                /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
-          NO_DEFAULT_PATH)
-endif()
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(vecLib DEFAULT_MSG vecLib_INCLUDE_DIR)
-
-if(VECLIB_FOUND)
-  if(vecLib_INCLUDE_DIR MATCHES "^/System/Library/Frameworks/vecLib.framework.*")
-    set(vecLib_LINKER_LIBS -lcblas "-framework vecLib")
-    message(STATUS "Found standalone vecLib.framework")
-  else()
-    set(vecLib_LINKER_LIBS -lcblas "-framework Accelerate")
-    message(STATUS "Found vecLib as part of Accelerate.framework")
-  endif()
-
-  mark_as_advanced(vecLib_INCLUDE_DIR)
-endif()
diff --git a/cmake/ProtoBuf.cmake b/cmake/ProtoBuf.cmake
deleted file mode 100644
index 73f647f..0000000
--- a/cmake/ProtoBuf.cmake
+++ /dev/null
@@ -1,90 +0,0 @@
-# Finds Google Protocol Buffers library and compilers and extends
-# the standard cmake script with version and python generation support
-
-find_package( Protobuf REQUIRED )
-include_directories(SYSTEM ${PROTOBUF_INCLUDE_DIR})
-list(APPEND Caffe_LINKER_LIBS ${PROTOBUF_LIBRARIES})
-
-# As of Ubuntu 14.04 protoc is no longer a part of libprotobuf-dev package
-# and should be installed separately as in: sudo apt-get install protobuf-compiler
-if(EXISTS ${PROTOBUF_PROTOC_EXECUTABLE})
-  message(STATUS "Found PROTOBUF Compiler: ${PROTOBUF_PROTOC_EXECUTABLE}")
-else()
-  message(FATAL_ERROR "Could not find PROTOBUF Compiler")
-endif()
-
-if(PROTOBUF_FOUND)
-  # fetches protobuf version
-  caffe_parse_header(${PROTOBUF_INCLUDE_DIR}/google/protobuf/stubs/common.h VERION_LINE GOOGLE_PROTOBUF_VERSION)
-  string(REGEX MATCH "([0-9])00([0-9])00([0-9])" PROTOBUF_VERSION ${GOOGLE_PROTOBUF_VERSION})
-  set(PROTOBUF_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}")
-  unset(GOOGLE_PROTOBUF_VERSION)
-endif()
-
-# place where to generate protobuf sources
-set(proto_gen_folder "${PROJECT_BINARY_DIR}/include/caffe/proto")
-include_directories("${PROJECT_BINARY_DIR}/include")
-
-set(PROTOBUF_GENERATE_CPP_APPEND_PATH TRUE)
-
-################################################################################################
-# Modification of standard 'protobuf_generate_cpp()' with output dir parameter and python support
-# Usage:
-#   caffe_protobuf_generate_cpp_py(<output_dir> <srcs_var> <hdrs_var> <python_var> <proto_files>)
-function(caffe_protobuf_generate_cpp_py output_dir srcs_var hdrs_var python_var)
-  if(NOT ARGN)
-    message(SEND_ERROR "Error: caffe_protobuf_generate_cpp_py() called without any proto files")
-    return()
-  endif()
-
-  if(PROTOBUF_GENERATE_CPP_APPEND_PATH)
-    # Create an include path for each file specified
-    foreach(fil ${ARGN})
-      get_filename_component(abs_fil ${fil} ABSOLUTE)
-      get_filename_component(abs_path ${abs_fil} PATH)
-      list(FIND _protoc_include ${abs_path} _contains_already)
-      if(${_contains_already} EQUAL -1)
-        list(APPEND _protoc_include -I ${abs_path})
-      endif()
-    endforeach()
-  else()
-    set(_protoc_include -I ${CMAKE_CURRENT_SOURCE_DIR})
-  endif()
-
-  if(DEFINED PROTOBUF_IMPORT_DIRS)
-    foreach(dir ${PROTOBUF_IMPORT_DIRS})
-      get_filename_component(abs_path ${dir} ABSOLUTE)
-      list(FIND _protoc_include ${abs_path} _contains_already)
-      if(${_contains_already} EQUAL -1)
-        list(APPEND _protoc_include -I ${abs_path})
-      endif()
-    endforeach()
-  endif()
-
-  set(${srcs_var})
-  set(${hdrs_var})
-  set(${python_var})
-  foreach(fil ${ARGN})
-    get_filename_component(abs_fil ${fil} ABSOLUTE)
-    get_filename_component(fil_we ${fil} NAME_WE)
-
-    list(APPEND ${srcs_var} "${output_dir}/${fil_we}.pb.cc")
-    list(APPEND ${hdrs_var} "${output_dir}/${fil_we}.pb.h")
-    list(APPEND ${python_var} "${output_dir}/${fil_we}_pb2.py")
-
-    add_custom_command(
-      OUTPUT "${output_dir}/${fil_we}.pb.cc"
-             "${output_dir}/${fil_we}.pb.h"
-             "${output_dir}/${fil_we}_pb2.py"
-      COMMAND ${CMAKE_COMMAND} -E make_directory "${output_dir}"
-      COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --cpp_out    ${output_dir} ${_protoc_include} ${abs_fil}
-      COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} --python_out ${output_dir} ${_protoc_include} ${abs_fil}
-      DEPENDS ${abs_fil}
-      COMMENT "Running C++/Python protocol buffer compiler on ${fil}" VERBATIM )
-  endforeach()
-
-  set_source_files_properties(${${srcs_var}} ${${hdrs_var}} ${${python_var}} PROPERTIES GENERATED TRUE)
-  set(${srcs_var} ${${srcs_var}} PARENT_SCOPE)
-  set(${hdrs_var} ${${hdrs_var}} PARENT_SCOPE)
-  set(${python_var} ${${python_var}} PARENT_SCOPE)
-endfunction()
diff --git a/cmake/Targets.cmake b/cmake/Targets.cmake
deleted file mode 100644
index 090f86c..0000000
--- a/cmake/Targets.cmake
+++ /dev/null
@@ -1,174 +0,0 @@
-################################################################################################
-# Defines global Caffe_LINK flag, This flag is required to prevent linker from excluding
-# some objects which are not addressed directly but are registered via static constructors
-macro(caffe_set_caffe_link)
-  if(BUILD_SHARED_LIBS)
-    set(Caffe_LINK caffe)
-  else()
-    if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
-      set(Caffe_LINK -Wl,-force_load caffe)
-    elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-      set(Caffe_LINK -Wl,--whole-archive caffe -Wl,--no-whole-archive)
-    endif()
-  endif()
-endmacro()
-################################################################################################
-# Convenient command to setup source group for IDEs that support this feature (VS, XCode)
-# Usage:
-#   caffe_source_group(<group> GLOB[_RECURSE] <globbing_expression>)
-function(caffe_source_group group)
-  cmake_parse_arguments(CAFFE_SOURCE_GROUP "" "" "GLOB;GLOB_RECURSE" ${ARGN})
-  if(CAFFE_SOURCE_GROUP_GLOB)
-    file(GLOB srcs1 ${CAFFE_SOURCE_GROUP_GLOB})
-    source_group(${group} FILES ${srcs1})
-  endif()
-
-  if(CAFFE_SOURCE_GROUP_GLOB_RECURSE)
-    file(GLOB_RECURSE srcs2 ${CAFFE_SOURCE_GROUP_GLOB_RECURSE})
-    source_group(${group} FILES ${srcs2})
-  endif()
-endfunction()
-
-################################################################################################
-# Collecting sources from globbing and appending to output list variable
-# Usage:
-#   caffe_collect_sources(<output_variable> GLOB[_RECURSE] <globbing_expression>)
-function(caffe_collect_sources variable)
-  cmake_parse_arguments(CAFFE_COLLECT_SOURCES "" "" "GLOB;GLOB_RECURSE" ${ARGN})
-  if(CAFFE_COLLECT_SOURCES_GLOB)
-    file(GLOB srcs1 ${CAFFE_COLLECT_SOURCES_GLOB})
-    set(${variable} ${variable} ${srcs1})
-  endif()
-
-  if(CAFFE_COLLECT_SOURCES_GLOB_RECURSE)
-    file(GLOB_RECURSE srcs2 ${CAFFE_COLLECT_SOURCES_GLOB_RECURSE})
-    set(${variable} ${variable} ${srcs2})
-  endif()
-endfunction()
-
-################################################################################################
-# Short command getting caffe sources (assuming standard Caffe code tree)
-# Usage:
-#   caffe_pickup_caffe_sources(<root>)
-function(caffe_pickup_caffe_sources root)
-  # put all files in source groups (visible as subfolder in many IDEs)
-  caffe_source_group("Include"        GLOB "${root}/include/caffe/*.h*")
-  caffe_source_group("Include\\Util"  GLOB "${root}/include/caffe/util/*.h*")
-  caffe_source_group("Include"        GLOB "${PROJECT_BINARY_DIR}/caffe_config.h*")
-  caffe_source_group("Source"         GLOB "${root}/src/caffe/*.cpp")
-  caffe_source_group("Source\\Util"   GLOB "${root}/src/caffe/util/*.cpp")
-  caffe_source_group("Source\\Layers" GLOB "${root}/src/caffe/layers/*.cpp")
-  caffe_source_group("Source\\Cuda"   GLOB "${root}/src/caffe/layers/*.cu")
-  caffe_source_group("Source\\Cuda"   GLOB "${root}/src/caffe/util/*.cu")
-  caffe_source_group("Source\\Proto"  GLOB "${root}/src/caffe/proto/*.proto")
-
-  # source groups for test target
-  caffe_source_group("Include"      GLOB "${root}/include/caffe/test/test_*.h*")
-  caffe_source_group("Source"       GLOB "${root}/src/caffe/test/test_*.cpp")
-  caffe_source_group("Source\\Cuda" GLOB "${root}/src/caffe/test/test_*.cu")
-
-  # collect files
-  file(GLOB test_hdrs    ${root}/include/caffe/test/test_*.h*)
-  file(GLOB test_srcs    ${root}/src/caffe/test/test_*.cpp)
-  file(GLOB_RECURSE hdrs ${root}/include/caffe/*.h*)
-  file(GLOB_RECURSE srcs ${root}/src/caffe/*.cpp)
-  list(REMOVE_ITEM  hdrs ${test_hdrs})
-  list(REMOVE_ITEM  srcs ${test_srcs})
-
-  # adding headers to make the visible in some IDEs (Qt, VS, Xcode)
-  list(APPEND srcs ${hdrs} ${PROJECT_BINARY_DIR}/caffe_config.h)
-  list(APPEND test_srcs ${test_hdrs})
-
-  # collect cuda files
-  file(GLOB    test_cuda ${root}/src/caffe/test/test_*.cu)
-  file(GLOB_RECURSE cuda ${root}/src/caffe/*.cu)
-  list(REMOVE_ITEM  cuda ${test_cuda})
-
-  # add proto to make them editable in IDEs too
-  file(GLOB_RECURSE proto_files ${root}/src/caffe/*.proto)
-  list(APPEND srcs ${proto_files})
-
-  # convert to absolute paths
-  caffe_convert_absolute_paths(srcs)
-  caffe_convert_absolute_paths(cuda)
-  caffe_convert_absolute_paths(test_srcs)
-  caffe_convert_absolute_paths(test_cuda)
-
-  # propagate to parent scope
-  set(srcs ${srcs} PARENT_SCOPE)
-  set(cuda ${cuda} PARENT_SCOPE)
-  set(test_srcs ${test_srcs} PARENT_SCOPE)
-  set(test_cuda ${test_cuda} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Short command for setting default target properties
-# Usage:
-#   caffe_default_properties(<target>)
-function(caffe_default_properties target)
-  set_target_properties(${target} PROPERTIES
-    DEBUG_POSTFIX ${Caffe_DEBUG_POSTFIX}
-    ARCHIVE_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib"
-    LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib"
-    RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin")
-  # make sure we build all external dependencies first
-  if (DEFINED external_project_dependencies)
-    add_dependencies(${target} ${external_project_dependencies})
-  endif()
-endfunction()
-
-################################################################################################
-# Short command for setting runtime directory for build target
-# Usage:
-#   caffe_set_runtime_directory(<target> <dir>)
-function(caffe_set_runtime_directory target dir)
-  set_target_properties(${target} PROPERTIES
-    RUNTIME_OUTPUT_DIRECTORY "${dir}")
-endfunction()
-
-################################################################################################
-# Short command for setting solution folder property for target
-# Usage:
-#   caffe_set_solution_folder(<target> <folder>)
-function(caffe_set_solution_folder target folder)
-  if(USE_PROJECT_FOLDERS)
-    set_target_properties(${target} PROPERTIES FOLDER "${folder}")
-  endif()
-endfunction()
-
-################################################################################################
-# Reads lines from input file, prepends source directory to each line and writes to output file
-# Usage:
-#   caffe_configure_testdatafile(<testdatafile>)
-function(caffe_configure_testdatafile file)
-  file(STRINGS ${file} __lines)
-  set(result "")
-  foreach(line ${__lines})
-    set(result "${result}${PROJECT_SOURCE_DIR}/${line}\n")
-  endforeach()
-  file(WRITE ${file}.gen.cmake ${result})
-endfunction()
-
-################################################################################################
-# Filter out all files that are not included in selected list
-# Usage:
-#   caffe_leave_only_selected_tests(<filelist_variable> <selected_list>)
-function(caffe_leave_only_selected_tests file_list)
-  if(NOT ARGN)
-    return() # blank list means leave all
-  endif()
-  string(REPLACE "," ";" __selected ${ARGN})
-  list(APPEND __selected caffe_main)
-
-  set(result "")
-  foreach(f ${${file_list}})
-    get_filename_component(name ${f} NAME_WE)
-    string(REGEX REPLACE "^test_" "" name ${name})
-    list(FIND __selected ${name} __index)
-    if(NOT __index EQUAL -1)
-      list(APPEND result ${f})
-    endif()
-  endforeach()
-  set(${file_list} ${result} PARENT_SCOPE)
-endfunction()
-
diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
deleted file mode 100644
index 653de5f..0000000
--- a/cmake/Utils.cmake
+++ /dev/null
@@ -1,382 +0,0 @@
-################################################################################################
-# Command alias for debugging messages
-# Usage:
-#   dmsg(<message>)
-function(dmsg)
-  message(STATUS ${ARGN})
-endfunction()
-
-################################################################################################
-# Removes duplicates from list(s)
-# Usage:
-#   caffe_list_unique(<list_variable> [<list_variable>] [...])
-macro(caffe_list_unique)
-  foreach(__lst ${ARGN})
-    if(${__lst})
-      list(REMOVE_DUPLICATES ${__lst})
-    endif()
-  endforeach()
-endmacro()
-
-################################################################################################
-# Clears variables from list
-# Usage:
-#   caffe_clear_vars(<variables_list>)
-macro(caffe_clear_vars)
-  foreach(_var ${ARGN})
-    unset(${_var})
-  endforeach()
-endmacro()
-
-################################################################################################
-# Removes duplicates from string
-# Usage:
-#   caffe_string_unique(<string_variable>)
-function(caffe_string_unique __string)
-  if(${__string})
-    set(__list ${${__string}})
-    separate_arguments(__list)
-    list(REMOVE_DUPLICATES __list)
-    foreach(__e ${__list})
-      set(__str "${__str} ${__e}")
-    endforeach()
-    set(${__string} ${__str} PARENT_SCOPE)
-  endif()
-endfunction()
-
-################################################################################################
-# Prints list element per line
-# Usage:
-#   caffe_print_list(<list>)
-function(caffe_print_list)
-  foreach(e ${ARGN})
-    message(STATUS ${e})
-  endforeach()
-endfunction()
-
-################################################################################################
-# Function merging lists of compiler flags to single string.
-# Usage:
-#   caffe_merge_flag_lists(out_variable <list1> [<list2>] [<list3>] ...)
-function(caffe_merge_flag_lists out_var)
-  set(__result "")
-  foreach(__list ${ARGN})
-    foreach(__flag ${${__list}})
-      string(STRIP ${__flag} __flag)
-      set(__result "${__result} ${__flag}")
-    endforeach()
-  endforeach()
-  string(STRIP ${__result} __result)
-  set(${out_var} ${__result} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Converts all paths in list to absolute
-# Usage:
-#   caffe_convert_absolute_paths(<list_variable>)
-function(caffe_convert_absolute_paths variable)
-  set(__dlist "")
-  foreach(__s ${${variable}})
-    get_filename_component(__abspath ${__s} ABSOLUTE)
-    list(APPEND __list ${__abspath})
-  endforeach()
-  set(${variable} ${__list} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Reads set of version defines from the header file
-# Usage:
-#   caffe_parse_header(<file> <define1> <define2> <define3> ..)
-macro(caffe_parse_header FILENAME FILE_VAR)
-  set(vars_regex "")
-  set(__parnet_scope OFF)
-  set(__add_cache OFF)
-  foreach(name ${ARGN})
-    if("${name}" STREQUAL "PARENT_SCOPE")
-      set(__parnet_scope ON)
-    elseif("${name}" STREQUAL "CACHE")
-      set(__add_cache ON)
-    elseif(vars_regex)
-      set(vars_regex "${vars_regex}|${name}")
-    else()
-      set(vars_regex "${name}")
-    endif()
-  endforeach()
-  if(EXISTS "${FILENAME}")
-    file(STRINGS "${FILENAME}" ${FILE_VAR} REGEX "#define[ \t]+(${vars_regex})[ \t]+[0-9]+" )
-  else()
-    unset(${FILE_VAR})
-  endif()
-  foreach(name ${ARGN})
-    if(NOT "${name}" STREQUAL "PARENT_SCOPE" AND NOT "${name}" STREQUAL "CACHE")
-      if(${FILE_VAR})
-        if(${FILE_VAR} MATCHES ".+[ \t]${name}[ \t]+([0-9]+).*")
-          string(REGEX REPLACE ".+[ \t]${name}[ \t]+([0-9]+).*" "\\1" ${name} "${${FILE_VAR}}")
-        else()
-          set(${name} "")
-        endif()
-        if(__add_cache)
-          set(${name} ${${name}} CACHE INTERNAL "${name} parsed from ${FILENAME}" FORCE)
-        elseif(__parnet_scope)
-          set(${name} "${${name}}" PARENT_SCOPE)
-        endif()
-      else()
-        unset(${name} CACHE)
-      endif()
-    endif()
-  endforeach()
-endmacro()
-
-################################################################################################
-# Reads single version define from the header file and parses it
-# Usage:
-#   caffe_parse_header_single_define(<library_name> <file> <define_name>)
-function(caffe_parse_header_single_define LIBNAME HDR_PATH VARNAME)
-  set(${LIBNAME}_H "")
-  if(EXISTS "${HDR_PATH}")
-    file(STRINGS "${HDR_PATH}" ${LIBNAME}_H REGEX "^#define[ \t]+${VARNAME}[ \t]+\"[^\"]*\".*$" LIMIT_COUNT 1)
-  endif()
-
-  if(${LIBNAME}_H)
-    string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MAJOR "${${LIBNAME}_H}")
-    string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MINOR  "${${LIBNAME}_H}")
-    string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_PATCH "${${LIBNAME}_H}")
-    set(${LIBNAME}_VERSION_MAJOR ${${LIBNAME}_VERSION_MAJOR} ${ARGN} PARENT_SCOPE)
-    set(${LIBNAME}_VERSION_MINOR ${${LIBNAME}_VERSION_MINOR} ${ARGN} PARENT_SCOPE)
-    set(${LIBNAME}_VERSION_PATCH ${${LIBNAME}_VERSION_PATCH} ${ARGN} PARENT_SCOPE)
-    set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_MAJOR}.${${LIBNAME}_VERSION_MINOR}.${${LIBNAME}_VERSION_PATCH}" PARENT_SCOPE)
-
-    # append a TWEAK version if it exists:
-    set(${LIBNAME}_VERSION_TWEAK "")
-    if("${${LIBNAME}_H}" MATCHES "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+).*$")
-      set(${LIBNAME}_VERSION_TWEAK "${CMAKE_MATCH_1}" ${ARGN} PARENT_SCOPE)
-    endif()
-    if(${LIBNAME}_VERSION_TWEAK)
-      set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}.${${LIBNAME}_VERSION_TWEAK}" ${ARGN} PARENT_SCOPE)
-    else()
-      set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}" ${ARGN} PARENT_SCOPE)
-    endif()
-  endif()
-endfunction()
-
-########################################################################################################
-# An option that the user can select. Can accept condition to control when option is available for user.
-# Usage:
-#   caffe_option(<option_variable> "doc string" <initial value or boolean expression> [IF <condition>])
-function(caffe_option variable description value)
-  set(__value ${value})
-  set(__condition "")
-  set(__varname "__value")
-  foreach(arg ${ARGN})
-    if(arg STREQUAL "IF" OR arg STREQUAL "if")
-      set(__varname "__condition")
-    else()
-      list(APPEND ${__varname} ${arg})
-    endif()
-  endforeach()
-  unset(__varname)
-  if("${__condition}" STREQUAL "")
-    set(__condition 2 GREATER 1)
-  endif()
-
-  if(${__condition})
-    if("${__value}" MATCHES ";")
-      if(${__value})
-        option(${variable} "${description}" ON)
-      else()
-        option(${variable} "${description}" OFF)
-      endif()
-    elseif(DEFINED ${__value})
-      if(${__value})
-        option(${variable} "${description}" ON)
-      else()
-        option(${variable} "${description}" OFF)
-      endif()
-    else()
-      option(${variable} "${description}" ${__value})
-    endif()
-  else()
-    unset(${variable} CACHE)
-  endif()
-endfunction()
-
-################################################################################################
-# Utility macro for comparing two lists. Used for CMake debugging purposes
-# Usage:
-#   caffe_compare_lists(<list_variable> <list2_variable> [description])
-function(caffe_compare_lists list1 list2 desc)
-  set(__list1 ${${list1}})
-  set(__list2 ${${list2}})
-  list(SORT __list1)
-  list(SORT __list2)
-  list(LENGTH __list1 __len1)
-  list(LENGTH __list2 __len2)
-
-  if(NOT ${__len1} EQUAL ${__len2})
-    message(FATAL_ERROR "Lists are not equal. ${__len1} != ${__len2}. ${desc}")
-  endif()
-
-  foreach(__i RANGE 1 ${__len1})
-    math(EXPR __index "${__i}- 1")
-    list(GET __list1 ${__index} __item1)
-    list(GET __list2 ${__index} __item2)
-    if(NOT ${__item1} STREQUAL ${__item2})
-      message(FATAL_ERROR "Lists are not equal. Differ at element ${__index}. ${desc}")
-    endif()
-  endforeach()
-endfunction()
-
-################################################################################################
-# Command for disabling warnings for different platforms (see below for gcc and VisualStudio)
-# Usage:
-#   caffe_warnings_disable(<CMAKE_[C|CXX]_FLAGS[_CONFIGURATION]> -Wshadow /wd4996 ..,)
-macro(caffe_warnings_disable)
-  set(_flag_vars "")
-  set(_msvc_warnings "")
-  set(_gxx_warnings "")
-
-  foreach(arg ${ARGN})
-    if(arg MATCHES "^CMAKE_")
-      list(APPEND _flag_vars ${arg})
-    elseif(arg MATCHES "^/wd")
-      list(APPEND _msvc_warnings ${arg})
-    elseif(arg MATCHES "^-W")
-      list(APPEND _gxx_warnings ${arg})
-    endif()
-  endforeach()
-
-  if(NOT _flag_vars)
-    set(_flag_vars CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
-  endif()
-
-  if(MSVC AND _msvc_warnings)
-    foreach(var ${_flag_vars})
-      foreach(warning ${_msvc_warnings})
-        set(${var} "${${var}} ${warning}")
-      endforeach()
-    endforeach()
-  elseif((CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX) AND _gxx_warnings)
-    foreach(var ${_flag_vars})
-      foreach(warning ${_gxx_warnings})
-        if(NOT warning MATCHES "^-Wno-")
-          string(REPLACE "${warning}" "" ${var} "${${var}}")
-          string(REPLACE "-W" "-Wno-" warning "${warning}")
-        endif()
-        set(${var} "${${var}} ${warning}")
-      endforeach()
-    endforeach()
-  endif()
-  caffe_clear_vars(_flag_vars _msvc_warnings _gxx_warnings)
-endmacro()
-
-################################################################################################
-# Helper function get current definitions
-# Usage:
-#   caffe_get_current_definitions(<definitions_variable>)
-function(caffe_get_current_definitions definitions_var)
-  get_property(current_definitions DIRECTORY PROPERTY COMPILE_DEFINITIONS)
-  set(result "")
-
-  foreach(d ${current_definitions})
-    list(APPEND result -D${d})
-  endforeach()
-
-  caffe_list_unique(result)
-  set(${definitions_var} ${result} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Helper function get current includes/definitions
-# Usage:
-#   caffe_get_current_cflags(<cflagslist_variable>)
-function(caffe_get_current_cflags cflags_var)
-  get_property(current_includes DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
-  caffe_convert_absolute_paths(current_includes)
-  caffe_get_current_definitions(cflags)
-
-  foreach(i ${current_includes})
-    list(APPEND cflags "-I${i}")
-  endforeach()
-
-  caffe_list_unique(cflags)
-  set(${cflags_var} ${cflags} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Helper function to parse current linker libs into link directories, libflags and osx frameworks
-# Usage:
-#   caffe_parse_linker_libs(<Caffe_LINKER_LIBS_var> <directories_var> <libflags_var> <frameworks_var>)
-function(caffe_parse_linker_libs Caffe_LINKER_LIBS_variable folders_var flags_var frameworks_var)
-
-  set(__unspec "")
-  set(__debug "")
-  set(__optimized "")
-  set(__framework "")
-  set(__varname "__unspec")
-
-  # split libs into debug, optimized, unspecified and frameworks
-  foreach(list_elem ${${Caffe_LINKER_LIBS_variable}})
-    if(list_elem STREQUAL "debug")
-      set(__varname "__debug")
-    elseif(list_elem STREQUAL "optimized")
-      set(__varname "__optimized")
-    elseif(list_elem MATCHES "^-framework[ \t]+([^ \t].*)")
-      list(APPEND __framework -framework ${CMAKE_MATCH_1})
-    else()
-      list(APPEND ${__varname} ${list_elem})
-      set(__varname "__unspec")
-    endif()
-  endforeach()
-
-  # attach debug or optimized libs to unspecified according to current configuration
-  if(CMAKE_BUILD_TYPE MATCHES "Debug")
-    set(__libs ${__unspec} ${__debug})
-  else()
-    set(__libs ${__unspec} ${__optimized})
-  endif()
-
-  set(libflags "")
-  set(folders "")
-
-  # convert linker libraries list to link flags
-  foreach(lib ${__libs})
-    if(TARGET ${lib})
-      list(APPEND folders $<TARGET_LINKER_FILE_DIR:${lib}>)
-      list(APPEND libflags -l${lib})
-    elseif(lib MATCHES "^-l.*")
-      list(APPEND libflags ${lib})
-    elseif(IS_ABSOLUTE ${lib})
-      get_filename_component(folder  ${lib} PATH)
-      get_filename_component(filename ${lib} NAME)
-      string(REGEX REPLACE "\\.[^.]*$" "" filename_without_shortest_ext ${filename})
-
-      string(REGEX MATCH "^lib(.*)" __match ${filename_without_shortest_ext})
-      list(APPEND libflags -l${CMAKE_MATCH_1})
-      list(APPEND folders    ${folder})
-    else()
-      message(FATAL_ERROR "Logic error. Need to update cmake script")
-    endif()
-  endforeach()
-
-  caffe_list_unique(libflags folders)
-
-  set(${folders_var} ${folders} PARENT_SCOPE)
-  set(${flags_var} ${libflags} PARENT_SCOPE)
-  set(${frameworks_var} ${__framework} PARENT_SCOPE)
-endfunction()
-
-################################################################################################
-# Helper function to detect Darwin version, i.e. 10.8, 10.9, 10.10, ....
-# Usage:
-#   caffe_detect_darwin_version(<version_variable>)
-function(caffe_detect_darwin_version output_var)
-  if(APPLE)
-    execute_process(COMMAND /usr/bin/sw_vers -productVersion
-                    RESULT_VARIABLE __sw_vers OUTPUT_VARIABLE __sw_vers_out
-                    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
-
-    set(${output_var} ${__sw_vers_out} PARENT_SCOPE)
-  else()
-    set(${output_var} "" PARENT_SCOPE)
-  endif()
-endfunction()
diff --git a/examples/ios/simple/CaffeSimple.xcodeproj/project.pbxproj b/examples/ios/simple/CaffeSimple.xcodeproj/project.pbxproj
deleted file mode 100644
index 38a9643..0000000
--- a/examples/ios/simple/CaffeSimple.xcodeproj/project.pbxproj
+++ /dev/null
@@ -1,404 +0,0 @@
-// !$*UTF8*$!
-{
-	archiveVersion = 1;
-	classes = {
-	};
-	objectVersion = 46;
-	objects = {
-
-/* Begin PBXBuildFile section */
-		1965DBAF1E447C130064AD55 /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 1965DBAE1E447C130064AD55 /* main.m */; };
-		1965DBB21E447C130064AD55 /* AppDelegate.mm in Sources */ = {isa = PBXBuildFile; fileRef = 1965DBB11E447C130064AD55 /* AppDelegate.mm */; };
-		1965DBB51E447C130064AD55 /* ViewController.mm in Sources */ = {isa = PBXBuildFile; fileRef = 1965DBB41E447C130064AD55 /* ViewController.mm */; };
-		1965DBB81E447C130064AD55 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 1965DBB61E447C130064AD55 /* Main.storyboard */; };
-		1965DBBA1E447C130064AD55 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 1965DBB91E447C130064AD55 /* Assets.xcassets */; };
-		1965DBBD1E447C130064AD55 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 1965DBBB1E447C130064AD55 /* LaunchScreen.storyboard */; };
-		19F82F1F1E44C2C300695409 /* ImageReader.mm in Sources */ = {isa = PBXBuildFile; fileRef = 19F82F1E1E44C2C300695409 /* ImageReader.mm */; };
-		19F82F221E44D8D100695409 /* Accelerate.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 19F82F211E44D8D100695409 /* Accelerate.framework */; };
-		19F82F271E44DB2000695409 /* libproto.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 19F82F261E44DB2000695409 /* libproto.a */; };
-		19F82F291E44DB4900695409 /* libprotobuf-lite.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 19F82F281E44DB4900695409 /* libprotobuf-lite.a */; };
-		19F82F2B1E44DBD900695409 /* libprotobuf.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 19F82F2A1E44DBD900695409 /* libprotobuf.a */; };
-		19F82F531E4847C200695409 /* test_image.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 19F82F511E4847C200695409 /* test_image.jpg */; };
-		19F82F541E4847C200695409 /* test_image.png in Resources */ = {isa = PBXBuildFile; fileRef = 19F82F521E4847C200695409 /* test_image.png */; };
-		19F82F571E48485C00695409 /* net.prototxt in Resources */ = {isa = PBXBuildFile; fileRef = 19F82F551E48485C00695409 /* net.prototxt */; };
-		19F82F581E48485C00695409 /* weight.caffemodel in Resources */ = {isa = PBXBuildFile; fileRef = 19F82F561E48485C00695409 /* weight.caffemodel */; };
-/* End PBXBuildFile section */
-
-/* Begin PBXFileReference section */
-		1965DBAA1E447C130064AD55 /* CaffeSimple.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = CaffeSimple.app; sourceTree = BUILT_PRODUCTS_DIR; };
-		1965DBAE1E447C130064AD55 /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = "<group>"; };
-		1965DBB01E447C130064AD55 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = "<group>"; };
-		1965DBB11E447C130064AD55 /* AppDelegate.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = AppDelegate.mm; sourceTree = "<group>"; };
-		1965DBB31E447C130064AD55 /* ViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ViewController.h; sourceTree = "<group>"; };
-		1965DBB41E447C130064AD55 /* ViewController.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = ViewController.mm; sourceTree = "<group>"; };
-		1965DBB71E447C130064AD55 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
-		1965DBB91E447C130064AD55 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
-		1965DBBC1E447C130064AD55 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
-		1965DBBE1E447C130064AD55 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
-		19F82F1B1E44BD1E00695409 /* ImageReader.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = ImageReader.h; sourceTree = "<group>"; };
-		19F82F1E1E44C2C300695409 /* ImageReader.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = ImageReader.mm; sourceTree = "<group>"; };
-		19F82F211E44D8D100695409 /* Accelerate.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Accelerate.framework; path = System/Library/Frameworks/Accelerate.framework; sourceTree = SDKROOT; };
-		19F82F231E44D9A100695409 /* libcaffe.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libcaffe.a; path = ../../../build/lib/libcaffe.a; sourceTree = "<group>"; };
-		19F82F261E44DB2000695409 /* libproto.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libproto.a; path = ../../../build/lib/libproto.a; sourceTree = "<group>"; };
-		19F82F281E44DB4900695409 /* libprotobuf-lite.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = "libprotobuf-lite.a"; path = "../../../third_party/protobuf/protobuf/platform/x86_64/lib/libprotobuf-lite.a"; sourceTree = "<group>"; };
-		19F82F2A1E44DBD900695409 /* libprotobuf.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libprotobuf.a; path = ../../../third_party/protobuf/protobuf/platform/x86_64/lib/libprotobuf.a; sourceTree = "<group>"; };
-		19F82F511E4847C200695409 /* test_image.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; name = test_image.jpg; path = data/test_image.jpg; sourceTree = "<group>"; };
-		19F82F521E4847C200695409 /* test_image.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = test_image.png; path = data/test_image.png; sourceTree = "<group>"; };
-		19F82F551E48485C00695409 /* net.prototxt */ = {isa = PBXFileReference; lastKnownFileType = text; name = net.prototxt; path = data/net.prototxt; sourceTree = "<group>"; };
-		19F82F561E48485C00695409 /* weight.caffemodel */ = {isa = PBXFileReference; lastKnownFileType = file; name = weight.caffemodel; path = data/weight.caffemodel; sourceTree = "<group>"; };
-/* End PBXFileReference section */
-
-/* Begin PBXFrameworksBuildPhase section */
-		1965DBA71E447C130064AD55 /* Frameworks */ = {
-			isa = PBXFrameworksBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				19F82F2B1E44DBD900695409 /* libprotobuf.a in Frameworks */,
-				19F82F291E44DB4900695409 /* libprotobuf-lite.a in Frameworks */,
-				19F82F271E44DB2000695409 /* libproto.a in Frameworks */,
-				19F82F221E44D8D100695409 /* Accelerate.framework in Frameworks */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-/* End PBXFrameworksBuildPhase section */
-
-/* Begin PBXGroup section */
-		1965DBA11E447C130064AD55 = {
-			isa = PBXGroup;
-			children = (
-				1965DBAC1E447C130064AD55 /* CaffeSimple */,
-				1965DBAB1E447C130064AD55 /* Products */,
-				19F82F201E44D8D100695409 /* Frameworks */,
-			);
-			sourceTree = "<group>";
-		};
-		1965DBAB1E447C130064AD55 /* Products */ = {
-			isa = PBXGroup;
-			children = (
-				1965DBAA1E447C130064AD55 /* CaffeSimple.app */,
-			);
-			name = Products;
-			sourceTree = "<group>";
-		};
-		1965DBAC1E447C130064AD55 /* CaffeSimple */ = {
-			isa = PBXGroup;
-			children = (
-				19F82F4B1E45783D00695409 /* data */,
-				1965DBB01E447C130064AD55 /* AppDelegate.h */,
-				1965DBB11E447C130064AD55 /* AppDelegate.mm */,
-				19F82F1B1E44BD1E00695409 /* ImageReader.h */,
-				19F82F1E1E44C2C300695409 /* ImageReader.mm */,
-				1965DBB31E447C130064AD55 /* ViewController.h */,
-				1965DBB41E447C130064AD55 /* ViewController.mm */,
-				1965DBB61E447C130064AD55 /* Main.storyboard */,
-				1965DBB91E447C130064AD55 /* Assets.xcassets */,
-				1965DBBB1E447C130064AD55 /* LaunchScreen.storyboard */,
-				1965DBBE1E447C130064AD55 /* Info.plist */,
-				1965DBAD1E447C130064AD55 /* Supporting Files */,
-			);
-			path = CaffeSimple;
-			sourceTree = "<group>";
-		};
-		1965DBAD1E447C130064AD55 /* Supporting Files */ = {
-			isa = PBXGroup;
-			children = (
-				1965DBAE1E447C130064AD55 /* main.m */,
-			);
-			name = "Supporting Files";
-			sourceTree = "<group>";
-		};
-		19F82F201E44D8D100695409 /* Frameworks */ = {
-			isa = PBXGroup;
-			children = (
-				19F82F2A1E44DBD900695409 /* libprotobuf.a */,
-				19F82F281E44DB4900695409 /* libprotobuf-lite.a */,
-				19F82F261E44DB2000695409 /* libproto.a */,
-				19F82F231E44D9A100695409 /* libcaffe.a */,
-				19F82F211E44D8D100695409 /* Accelerate.framework */,
-			);
-			name = Frameworks;
-			sourceTree = "<group>";
-		};
-		19F82F4B1E45783D00695409 /* data */ = {
-			isa = PBXGroup;
-			children = (
-				19F82F551E48485C00695409 /* net.prototxt */,
-				19F82F561E48485C00695409 /* weight.caffemodel */,
-				19F82F521E4847C200695409 /* test_image.png */,
-				19F82F511E4847C200695409 /* test_image.jpg */,
-			);
-			name = data;
-			sourceTree = "<group>";
-		};
-/* End PBXGroup section */
-
-/* Begin PBXNativeTarget section */
-		1965DBA91E447C130064AD55 /* CaffeSimple */ = {
-			isa = PBXNativeTarget;
-			buildConfigurationList = 1965DBC11E447C130064AD55 /* Build configuration list for PBXNativeTarget "CaffeSimple" */;
-			buildPhases = (
-				1965DBA61E447C130064AD55 /* Sources */,
-				1965DBA71E447C130064AD55 /* Frameworks */,
-				1965DBA81E447C130064AD55 /* Resources */,
-			);
-			buildRules = (
-			);
-			dependencies = (
-			);
-			name = CaffeSimple;
-			productName = CaffeSimple;
-			productReference = 1965DBAA1E447C130064AD55 /* CaffeSimple.app */;
-			productType = "com.apple.product-type.application";
-		};
-/* End PBXNativeTarget section */
-
-/* Begin PBXProject section */
-		1965DBA21E447C130064AD55 /* Project object */ = {
-			isa = PBXProject;
-			attributes = {
-				LastUpgradeCheck = 0820;
-				ORGANIZATIONNAME = com.yangwenbo;
-				TargetAttributes = {
-					1965DBA91E447C130064AD55 = {
-						CreatedOnToolsVersion = 8.2.1;
-						DevelopmentTeam = 4X58F8M83Y;
-						ProvisioningStyle = Automatic;
-					};
-				};
-			};
-			buildConfigurationList = 1965DBA51E447C130064AD55 /* Build configuration list for PBXProject "CaffeSimple" */;
-			compatibilityVersion = "Xcode 3.2";
-			developmentRegion = English;
-			hasScannedForEncodings = 0;
-			knownRegions = (
-				en,
-				Base,
-			);
-			mainGroup = 1965DBA11E447C130064AD55;
-			productRefGroup = 1965DBAB1E447C130064AD55 /* Products */;
-			projectDirPath = "";
-			projectRoot = "";
-			targets = (
-				1965DBA91E447C130064AD55 /* CaffeSimple */,
-			);
-		};
-/* End PBXProject section */
-
-/* Begin PBXResourcesBuildPhase section */
-		1965DBA81E447C130064AD55 /* Resources */ = {
-			isa = PBXResourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				19F82F571E48485C00695409 /* net.prototxt in Resources */,
-				19F82F581E48485C00695409 /* weight.caffemodel in Resources */,
-				19F82F531E4847C200695409 /* test_image.jpg in Resources */,
-				19F82F541E4847C200695409 /* test_image.png in Resources */,
-				1965DBBD1E447C130064AD55 /* LaunchScreen.storyboard in Resources */,
-				1965DBBA1E447C130064AD55 /* Assets.xcassets in Resources */,
-				1965DBB81E447C130064AD55 /* Main.storyboard in Resources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-/* End PBXResourcesBuildPhase section */
-
-/* Begin PBXSourcesBuildPhase section */
-		1965DBA61E447C130064AD55 /* Sources */ = {
-			isa = PBXSourcesBuildPhase;
-			buildActionMask = 2147483647;
-			files = (
-				19F82F1F1E44C2C300695409 /* ImageReader.mm in Sources */,
-				1965DBB51E447C130064AD55 /* ViewController.mm in Sources */,
-				1965DBB21E447C130064AD55 /* AppDelegate.mm in Sources */,
-				1965DBAF1E447C130064AD55 /* main.m in Sources */,
-			);
-			runOnlyForDeploymentPostprocessing = 0;
-		};
-/* End PBXSourcesBuildPhase section */
-
-/* Begin PBXVariantGroup section */
-		1965DBB61E447C130064AD55 /* Main.storyboard */ = {
-			isa = PBXVariantGroup;
-			children = (
-				1965DBB71E447C130064AD55 /* Base */,
-			);
-			name = Main.storyboard;
-			sourceTree = "<group>";
-		};
-		1965DBBB1E447C130064AD55 /* LaunchScreen.storyboard */ = {
-			isa = PBXVariantGroup;
-			children = (
-				1965DBBC1E447C130064AD55 /* Base */,
-			);
-			name = LaunchScreen.storyboard;
-			sourceTree = "<group>";
-		};
-/* End PBXVariantGroup section */
-
-/* Begin XCBuildConfiguration section */
-		1965DBBF1E447C130064AD55 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				ALWAYS_SEARCH_USER_PATHS = NO;
-				CLANG_ANALYZER_NONNULL = YES;
-				CLANG_CXX_LANGUAGE_STANDARD = "c++0x";
-				CLANG_CXX_LIBRARY = "libc++";
-				CLANG_ENABLE_MODULES = YES;
-				CLANG_ENABLE_OBJC_ARC = YES;
-				CLANG_WARN_BOOL_CONVERSION = YES;
-				CLANG_WARN_CONSTANT_CONVERSION = YES;
-				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
-				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
-				CLANG_WARN_EMPTY_BODY = YES;
-				CLANG_WARN_ENUM_CONVERSION = YES;
-				CLANG_WARN_INFINITE_RECURSION = YES;
-				CLANG_WARN_INT_CONVERSION = YES;
-				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
-				CLANG_WARN_SUSPICIOUS_MOVE = YES;
-				CLANG_WARN_UNREACHABLE_CODE = YES;
-				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
-				COPY_PHASE_STRIP = NO;
-				DEBUG_INFORMATION_FORMAT = dwarf;
-				ENABLE_STRICT_OBJC_MSGSEND = YES;
-				ENABLE_TESTABILITY = YES;
-				GCC_C_LANGUAGE_STANDARD = gnu99;
-				GCC_DYNAMIC_NO_PIC = NO;
-				GCC_NO_COMMON_BLOCKS = YES;
-				GCC_OPTIMIZATION_LEVEL = 0;
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					CPU_ONLY,
-					USE_ACCELERATE,
-				);
-				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
-				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
-				GCC_WARN_UNDECLARED_SELECTOR = YES;
-				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
-				GCC_WARN_UNUSED_FUNCTION = YES;
-				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = (
-					../../../include,
-					../../../build/include,
-					../../../third_party/protobuf/include,
-				);
-				IPHONEOS_DEPLOYMENT_TARGET = 10.2;
-				LIBRARY_SEARCH_PATHS = (
-					../../../build/lib,
-					../../../third_party/protobuf/lib,
-				);
-				MTL_ENABLE_DEBUG_INFO = YES;
-				ONLY_ACTIVE_ARCH = YES;
-				OTHER_LDFLAGS = (
-					"-Wl,-force_load",
-					../../../build/lib/libcaffe.a,
-				);
-				SDKROOT = iphoneos;
-			};
-			name = Debug;
-		};
-		1965DBC01E447C130064AD55 /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				ALWAYS_SEARCH_USER_PATHS = NO;
-				CLANG_ANALYZER_NONNULL = YES;
-				CLANG_CXX_LANGUAGE_STANDARD = "c++0x";
-				CLANG_CXX_LIBRARY = "libc++";
-				CLANG_ENABLE_MODULES = YES;
-				CLANG_ENABLE_OBJC_ARC = YES;
-				CLANG_WARN_BOOL_CONVERSION = YES;
-				CLANG_WARN_CONSTANT_CONVERSION = YES;
-				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
-				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
-				CLANG_WARN_EMPTY_BODY = YES;
-				CLANG_WARN_ENUM_CONVERSION = YES;
-				CLANG_WARN_INFINITE_RECURSION = YES;
-				CLANG_WARN_INT_CONVERSION = YES;
-				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
-				CLANG_WARN_SUSPICIOUS_MOVE = YES;
-				CLANG_WARN_UNREACHABLE_CODE = YES;
-				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
-				COPY_PHASE_STRIP = NO;
-				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
-				ENABLE_NS_ASSERTIONS = NO;
-				ENABLE_STRICT_OBJC_MSGSEND = YES;
-				GCC_C_LANGUAGE_STANDARD = gnu99;
-				GCC_NO_COMMON_BLOCKS = YES;
-				GCC_PREPROCESSOR_DEFINITIONS = (
-					CPU_ONLY,
-					USE_ACCELERATE,
-				);
-				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
-				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
-				GCC_WARN_UNDECLARED_SELECTOR = YES;
-				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
-				GCC_WARN_UNUSED_FUNCTION = YES;
-				GCC_WARN_UNUSED_VARIABLE = YES;
-				HEADER_SEARCH_PATHS = (
-					../../../include,
-					../../../build/include,
-					../../../third_party/protobuf/include,
-				);
-				IPHONEOS_DEPLOYMENT_TARGET = 10.2;
-				LIBRARY_SEARCH_PATHS = (
-					../../../build/lib,
-					../../../third_party/protobuf/protobuf/platform/x86_64/lib,
-				);
-				MTL_ENABLE_DEBUG_INFO = NO;
-				OTHER_LDFLAGS = (
-					"-Wl,-force_load",
-					../../../build/lib/libcaffe.a,
-				);
-				SDKROOT = iphoneos;
-				VALIDATE_PRODUCT = YES;
-			};
-			name = Release;
-		};
-		1965DBC21E447C130064AD55 /* Debug */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
-				DEVELOPMENT_TEAM = 4X58F8M83Y;
-				INFOPLIST_FILE = CaffeSimple/Info.plist;
-				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
-				PRODUCT_BUNDLE_IDENTIFIER = com.yangwenbo.CaffeSimple;
-				PRODUCT_NAME = "$(TARGET_NAME)";
-			};
-			name = Debug;
-		};
-		1965DBC31E447C130064AD55 /* Release */ = {
-			isa = XCBuildConfiguration;
-			buildSettings = {
-				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
-				DEVELOPMENT_TEAM = 4X58F8M83Y;
-				INFOPLIST_FILE = CaffeSimple/Info.plist;
-				LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
-				PRODUCT_BUNDLE_IDENTIFIER = com.yangwenbo.CaffeSimple;
-				PRODUCT_NAME = "$(TARGET_NAME)";
-			};
-			name = Release;
-		};
-/* End XCBuildConfiguration section */
-
-/* Begin XCConfigurationList section */
-		1965DBA51E447C130064AD55 /* Build configuration list for PBXProject "CaffeSimple" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				1965DBBF1E447C130064AD55 /* Debug */,
-				1965DBC01E447C130064AD55 /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
-		1965DBC11E447C130064AD55 /* Build configuration list for PBXNativeTarget "CaffeSimple" */ = {
-			isa = XCConfigurationList;
-			buildConfigurations = (
-				1965DBC21E447C130064AD55 /* Debug */,
-				1965DBC31E447C130064AD55 /* Release */,
-			);
-			defaultConfigurationIsVisible = 0;
-			defaultConfigurationName = Release;
-		};
-/* End XCConfigurationList section */
-	};
-	rootObject = 1965DBA21E447C130064AD55 /* Project object */;
-}
diff --git a/examples/ios/simple/CaffeSimple/AppDelegate.h b/examples/ios/simple/CaffeSimple/AppDelegate.h
deleted file mode 100644
index f0bd505..0000000
--- a/examples/ios/simple/CaffeSimple/AppDelegate.h
+++ /dev/null
@@ -1,17 +0,0 @@
-//
-//  AppDelegate.h
-//  CaffeSimple
-//
-//  Created by Wenbo Yang on 2017/2/3.
-//  Copyright © 2017年 com.yangwenbo. All rights reserved.
-//
-
-#import <UIKit/UIKit.h>
-
-@interface AppDelegate : UIResponder <UIApplicationDelegate>
-
-@property (strong, nonatomic) UIWindow *window;
-
-
-@end
-
diff --git a/examples/ios/simple/CaffeSimple/AppDelegate.mm b/examples/ios/simple/CaffeSimple/AppDelegate.mm
deleted file mode 100644
index 1f0b30b..0000000
--- a/examples/ios/simple/CaffeSimple/AppDelegate.mm
+++ /dev/null
@@ -1,52 +0,0 @@
-//
-//  AppDelegate.m
-//  CaffeSimple
-//
-//  Created by Wenbo Yang on 2017/2/3.
-//  Copyright © 2017年 com.yangwenbo. All rights reserved.
-//
-
-#import "AppDelegate.h"
-
-
-@interface AppDelegate ()
-
-@end
-
-@implementation AppDelegate
-
-
-- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {
-    // Override point for customization after application launch.
-    return YES;
-}
-
-
-- (void)applicationWillResignActive:(UIApplication *)application {
-    // Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state.
-    // Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game.
-}
-
-
-- (void)applicationDidEnterBackground:(UIApplication *)application {
-    // Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later.
-    // If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits.
-}
-
-
-- (void)applicationWillEnterForeground:(UIApplication *)application {
-    // Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background.
-}
-
-
-- (void)applicationDidBecomeActive:(UIApplication *)application {
-    // Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface.
-}
-
-
-- (void)applicationWillTerminate:(UIApplication *)application {
-    // Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:.
-}
-
-
-@end
diff --git a/examples/ios/simple/CaffeSimple/Assets.xcassets/AppIcon.appiconset/Contents.json b/examples/ios/simple/CaffeSimple/Assets.xcassets/AppIcon.appiconset/Contents.json
deleted file mode 100644
index b8236c6..0000000
--- a/examples/ios/simple/CaffeSimple/Assets.xcassets/AppIcon.appiconset/Contents.json
+++ /dev/null
@@ -1,48 +0,0 @@
-{
-  "images" : [
-    {
-      "idiom" : "iphone",
-      "size" : "20x20",
-      "scale" : "2x"
-    },
-    {
-      "idiom" : "iphone",
-      "size" : "20x20",
-      "scale" : "3x"
-    },
-    {
-      "idiom" : "iphone",
-      "size" : "29x29",
-      "scale" : "2x"
-    },
-    {
-      "idiom" : "iphone",
-      "size" : "29x29",
-      "scale" : "3x"
-    },
-    {
-      "idiom" : "iphone",
-      "size" : "40x40",
-      "scale" : "2x"
-    },
-    {
-      "idiom" : "iphone",
-      "size" : "40x40",
-      "scale" : "3x"
-    },
-    {
-      "idiom" : "iphone",
-      "size" : "60x60",
-      "scale" : "2x"
-    },
-    {
-      "idiom" : "iphone",
-      "size" : "60x60",
-      "scale" : "3x"
-    }
-  ],
-  "info" : {
-    "version" : 1,
-    "author" : "xcode"
-  }
-}
\ No newline at end of file
diff --git a/examples/ios/simple/CaffeSimple/Base.lproj/LaunchScreen.storyboard b/examples/ios/simple/CaffeSimple/Base.lproj/LaunchScreen.storyboard
deleted file mode 100644
index fdf3f97..0000000
--- a/examples/ios/simple/CaffeSimple/Base.lproj/LaunchScreen.storyboard
+++ /dev/null
@@ -1,27 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="11134" systemVersion="15F34" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
-    <dependencies>
-        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="11106"/>
-        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
-    </dependencies>
-    <scenes>
-        <!--View Controller-->
-        <scene sceneID="EHf-IW-A2E">
-            <objects>
-                <viewController id="01J-lp-oVM" sceneMemberID="viewController">
-                    <layoutGuides>
-                        <viewControllerLayoutGuide type="top" id="Llm-lL-Icb"/>
-                        <viewControllerLayoutGuide type="bottom" id="xb3-aO-Qok"/>
-                    </layoutGuides>
-                    <view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
-                        <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
-                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
-                        <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
-                    </view>
-                </viewController>
-                <placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
-            </objects>
-            <point key="canvasLocation" x="53" y="375"/>
-        </scene>
-    </scenes>
-</document>
diff --git a/examples/ios/simple/CaffeSimple/Base.lproj/Main.storyboard b/examples/ios/simple/CaffeSimple/Base.lproj/Main.storyboard
deleted file mode 100644
index 880ede0..0000000
--- a/examples/ios/simple/CaffeSimple/Base.lproj/Main.storyboard
+++ /dev/null
@@ -1,60 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="11762" systemVersion="16C67" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
-    <device id="retina4_0" orientation="portrait">
-        <adaptation id="fullscreen"/>
-    </device>
-    <dependencies>
-        <deployment identifier="iOS"/>
-        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="11757"/>
-        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
-    </dependencies>
-    <scenes>
-        <!--View Controller-->
-        <scene sceneID="tne-QT-ifu">
-            <objects>
-                <viewController id="BYZ-38-t0r" customClass="ViewController" sceneMemberID="viewController">
-                    <layoutGuides>
-                        <viewControllerLayoutGuide type="top" id="y3c-jy-aDJ"/>
-                        <viewControllerLayoutGuide type="bottom" id="wfy-db-euE"/>
-                    </layoutGuides>
-                    <view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
-                        <rect key="frame" x="0.0" y="0.0" width="320" height="568"/>
-                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
-                        <subviews>
-                            <button opaque="NO" contentMode="scaleToFill" fixedFrame="YES" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="QCP-4f-7dk" userLabel="Runtest">
-                                <rect key="frame" x="83.616926503339187" y="40" width="150" height="29.999999999999972"/>
-                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
-                                <color key="backgroundColor" white="0.33333333333333331" alpha="1" colorSpace="calibratedWhite"/>
-                                <fontDescription key="fontDescription" name=".AppleSystemUIFont" family=".AppleSystemUIFont" pointSize="15"/>
-                                <state key="normal" title="Run Caffe Model">
-                                    <color key="titleColor" red="0.93725490570000003" green="0.93725490570000003" blue="0.95686274770000002" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
-                                </state>
-                                <connections>
-                                    <action selector="RunCaffeModel:" destination="BYZ-38-t0r" eventType="touchUpInside" id="PvE-VA-pYw"/>
-                                </connections>
-                            </button>
-                            <textView clipsSubviews="YES" multipleTouchEnabled="YES" contentMode="scaleToFill" fixedFrame="YES" editable="NO" text="Loading caffe model..." textAlignment="natural" translatesAutoresizingMaskIntoConstraints="NO" id="pa4-KM-kFg">
-                                <rect key="frame" x="10" y="80" width="300" height="480"/>
-                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
-                                <color key="backgroundColor" white="1" alpha="1" colorSpace="calibratedWhite"/>
-                                <fontDescription key="fontDescription" type="system" pointSize="14"/>
-                                <textInputTraits key="textInputTraits"/>
-                            </textView>
-                            <imageView userInteractionEnabled="NO" contentMode="scaleToFill" horizontalHuggingPriority="251" verticalHuggingPriority="251" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="72m-SR-XmC">
-                                <rect key="frame" x="20" y="40" width="28" height="28"/>
-                                <autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMaxY="YES"/>
-                            </imageView>
-                        </subviews>
-                        <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
-                    </view>
-                    <connections>
-                        <outlet property="console" destination="pa4-KM-kFg" id="Pdq-iE-Kyh"/>
-                        <outlet property="test_image" destination="72m-SR-XmC" id="OPL-w5-0EU"/>
-                    </connections>
-                </viewController>
-                <placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
-            </objects>
-            <point key="canvasLocation" x="22.5" y="32.74647887323944"/>
-        </scene>
-    </scenes>
-</document>
diff --git a/examples/ios/simple/CaffeSimple/ImageReader.h b/examples/ios/simple/CaffeSimple/ImageReader.h
deleted file mode 100644
index 589fd2f..0000000
--- a/examples/ios/simple/CaffeSimple/ImageReader.h
+++ /dev/null
@@ -1,20 +0,0 @@
-//
-//  ImageReader.h
-//  CaffeSimple
-//
-//  Created by Wenbo Yang on 2017/2/3.
-//  Copyright © 2017年 com.yangwenbo. All rights reserved.
-//
-
-#ifndef ImageReader_h
-#define ImageReader_h
-
-#include "caffe/caffe.hpp"
-
-NSString* FilePathForResourceName(NSString* name, NSString* extension);
-
-bool ReadImageToBlob(NSString *file_name,
-                     const std::vector<float> &mean,
-                     caffe::Blob<float>* input_layer);
-
-#endif /* ImageReader_h */
diff --git a/examples/ios/simple/CaffeSimple/ImageReader.mm b/examples/ios/simple/CaffeSimple/ImageReader.mm
deleted file mode 100644
index 4e6a71a..0000000
--- a/examples/ios/simple/CaffeSimple/ImageReader.mm
+++ /dev/null
@@ -1,133 +0,0 @@
-//
-//  ImageReader.m
-//  CaffeSimple
-//
-//  Created by Wenbo Yang on 2017/2/3.
-//  Copyright © 2017年 com.yangwenbo. All rights reserved.
-//
-#include "ImageReader.h"
-
-#import <Foundation/Foundation.h>
-
-NSString* FilePathForResourceName(NSString* name, NSString* extension) {
-    NSString* file_path = [[NSBundle mainBundle] pathForResource:name ofType:extension];
-    if (file_path == NULL) {
-        LOG(FATAL) << "Couldn't find '" << [name UTF8String] << "."
-	       << [extension UTF8String] << "' in bundle.";
-    }
-    return file_path;
-}
-
-// Read a jpg/png image from file to Caffe input_layer.
-// Modified on tensorflow ios example, URL: https://github.com/tensorflow/tensorflow/\
-// blob/master/tensorflow/contrib/ios_examples/simple/ios_image_load.mm
-bool ReadImageToBlob(NSString *file_name,
-                     const std::vector<float> &mean,
-                     caffe::Blob<float>* input_layer) {
-    // Get file size
-    FILE* file_handle = fopen([file_name UTF8String], "rb");
-    fseek(file_handle, 0, SEEK_END);
-    const size_t bytes_in_file = ftell(file_handle);
-    fseek(file_handle, 0, SEEK_SET);
-    
-    // Read file bytes
-    std::vector<uint8_t> file_data(bytes_in_file);
-    fread(file_data.data(), 1, bytes_in_file, file_handle);
-    fclose(file_handle);
-    CFDataRef file_data_ref = CFDataCreateWithBytesNoCopy(NULL, file_data.data(),
-                                                          bytes_in_file,
-                                                          kCFAllocatorNull);
-    CGDataProviderRef image_provider = CGDataProviderCreateWithCFData(file_data_ref);
-    
-    // Determine file type, Read image 
-    NSString *suffix = [file_name pathExtension];
-    CGImageRef image;
-    if ([suffix isEqualToString: @"png"]) {
-        image = CGImageCreateWithPNGDataProvider(image_provider, NULL, true,
-                                                 kCGRenderingIntentDefault);
-    } else if ([suffix isEqualToString: @"jpg"] ||
-               [suffix isEqualToString: @"jpeg"]) {
-        image = CGImageCreateWithJPEGDataProvider(image_provider, NULL, true,
-                                                  kCGRenderingIntentDefault);
-    } else {
-        CFRelease(image_provider);
-        CFRelease(file_data_ref);
-        LOG(ERROR) << "Unknown suffix for file" << file_name;
-        return 1;
-    }
-    
-    // Get Image width and height
-    size_t width = CGImageGetWidth(image);
-    size_t height = CGImageGetHeight(image);
-    size_t bits_per_component = CGImageGetBitsPerComponent(image);
-    size_t bits_per_pixel = CGImageGetBitsPerPixel(image);
-    
-    LOG(INFO) << "CGImage width:" << width << " height:" << height << " BitsPerComponent:" << bits_per_component << " BitsPerPixel:" << bits_per_pixel;
-    
-    size_t image_channels = bits_per_pixel/bits_per_component;
-    CGColorSpaceRef color_space;
-    uint32_t bitmapInfo = 0;
-    if (image_channels == 1) {
-        color_space = CGColorSpaceCreateDeviceGray();
-        bitmapInfo = kCGImageAlphaNone;
-    } else if (image_channels == 4) {
-        // Remove alpha channel
-        color_space = CGColorSpaceCreateDeviceRGB();
-        //bitmapInfo = kCGImageAlphaPremultipliedLast | kCGBitmapByteOrder32Big;
-        bitmapInfo = kCGImageAlphaNoneSkipLast | kCGBitmapByteOrder32Big;
-    } else {
-        // FIXME: image convert
-        LOG(ERROR) << "Image channel:" << image_channels;
-        return false;
-    }
-
-    // Read Image to bitmap
-    size_t bytes_per_row = image_channels * width;
-    size_t bytes_in_image = bytes_per_row * height;
-    std::vector<uint8_t> result(bytes_in_image);
-    CGContextRef context = CGBitmapContextCreate(result.data(), width, height,
-                                                 bits_per_component, bytes_per_row, color_space,
-                                                 bitmapInfo);
-    LOG(INFO) << "bytes_per_row: " << bytes_per_row;
-    // Release resources
-    CGColorSpaceRelease(color_space);
-    CGContextDrawImage(context, CGRectMake(0, 0, width, height), image);
-    CGContextRelease(context);
-    CFRelease(image);
-    CFRelease(image_provider);
-    CFRelease(file_data_ref);
-    
-    // Convert Bitmap (channels*width*height) to Matrix (width*height*channels)
-    // Remove alpha channel
-    int input_channels = input_layer->channels();
-    LOG(INFO) << "image_channels:" << image_channels << " input_channels:" << input_channels;
-    if (input_channels == 3 && image_channels != 4) {
-        LOG(ERROR) << "image_channels input_channels not match.";
-        return false;
-    } else if (input_channels == 1 && image_channels != 1) {
-        LOG(ERROR) << "image_channels input_channels not match.";
-        return false;
-    }
-    //int input_width = input_layer->width();
-    //int input_height = input_layer->height();
-    
-    float *input_data = input_layer->mutable_cpu_data();
-   
-    for (size_t h = 0; h < height; h++) {
-        for (size_t w = 0; w < width; w++) {
-            for (size_t c = 0; c < input_channels; c++) {
-                // OpenCV use BGR instead of RGB
-                size_t cc = c;
-                if (input_channels == 3) {
-                    cc = 2 - c;
-                }
-                // Convert uint8_t to float
-                input_data[c*width*height + h*width + w] = static_cast<float>(result[h*width*image_channels + w*image_channels + cc]);
-                if (mean.size() == input_channels) {
-                    input_data[c*width*height + h*width + w] -= mean[c];
-                }
-            }
-        }
-    }
-    return true;
-}
diff --git a/examples/ios/simple/CaffeSimple/Info.plist b/examples/ios/simple/CaffeSimple/Info.plist
deleted file mode 100644
index 38e98af..0000000
--- a/examples/ios/simple/CaffeSimple/Info.plist
+++ /dev/null
@@ -1,38 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-	<key>CFBundleDevelopmentRegion</key>
-	<string>en</string>
-	<key>CFBundleExecutable</key>
-	<string>$(EXECUTABLE_NAME)</string>
-	<key>CFBundleIdentifier</key>
-	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
-	<key>CFBundleInfoDictionaryVersion</key>
-	<string>6.0</string>
-	<key>CFBundleName</key>
-	<string>$(PRODUCT_NAME)</string>
-	<key>CFBundlePackageType</key>
-	<string>APPL</string>
-	<key>CFBundleShortVersionString</key>
-	<string>1.0</string>
-	<key>CFBundleVersion</key>
-	<string>1</string>
-	<key>LSRequiresIPhoneOS</key>
-	<true/>
-	<key>UILaunchStoryboardName</key>
-	<string>LaunchScreen</string>
-	<key>UIMainStoryboardFile</key>
-	<string>Main</string>
-	<key>UIRequiredDeviceCapabilities</key>
-	<array>
-		<string>armv7</string>
-	</array>
-	<key>UISupportedInterfaceOrientations</key>
-	<array>
-		<string>UIInterfaceOrientationPortrait</string>
-		<string>UIInterfaceOrientationLandscapeLeft</string>
-		<string>UIInterfaceOrientationLandscapeRight</string>
-	</array>
-</dict>
-</plist>
diff --git a/examples/ios/simple/CaffeSimple/ViewController.h b/examples/ios/simple/CaffeSimple/ViewController.h
deleted file mode 100644
index be38149..0000000
--- a/examples/ios/simple/CaffeSimple/ViewController.h
+++ /dev/null
@@ -1,21 +0,0 @@
-//
-//  ViewController.h
-//  CaffeSimple
-//
-//  Created by Wenbo Yang on 2017/2/3.
-//  Copyright © 2017年 com.yangwenbo. All rights reserved.
-//
-
-#import <UIKit/UIKit.h>
-
-@interface ViewController : UIViewController
-
-#pragma mark Console
-@property (nonatomic,strong) IBOutlet UITextView *console;
-
-@property (nonatomic,strong) IBOutlet UIImageView *test_image;
-
-#pragma mark ClickEvent
-- (IBAction)RunCaffeModel:(UIButton *)btn;
-
-@end
diff --git a/examples/ios/simple/CaffeSimple/ViewController.mm b/examples/ios/simple/CaffeSimple/ViewController.mm
deleted file mode 100644
index a41e5d5..0000000
--- a/examples/ios/simple/CaffeSimple/ViewController.mm
+++ /dev/null
@@ -1,88 +0,0 @@
-//
-//  ViewController.m
-//  CaffeSimple
-//
-//  Created by Wenbo Yang on 2017/2/3.
-//  Copyright © 2017年 com.yangwenbo. All rights reserved.
-//
-
-#import "ViewController.h"
-
-#include <numeric>
-#include "caffe/caffe.hpp"
-#include "ImageReader.h"
-
-@interface ViewController ()
-
-@end
-
-@implementation ViewController
-
-caffe::Net<float> *_net;
-
-- (void)viewDidLoad {
-    [super viewDidLoad];
-    // Do any additional setup after loading the view, typically from a nib.
-    NSString *test_file_path = FilePathForResourceName(@"test_image", @"png");
-    UIImage *image = [UIImage imageWithContentsOfFile:test_file_path];
-    [_test_image setImage:image];
-}
-
-
-- (void)didReceiveMemoryWarning {
-    [super didReceiveMemoryWarning];
-    // Dispose of any resources that can be recreated.
-}
-
-- (void)viewWillAppear:(BOOL)animated{
-}
-
-- (void)viewDidAppear:(BOOL)animated {
-    caffe::CPUTimer timer;
-    timer.Start();
-    NSString *modle_path = FilePathForResourceName(@"net", @"prototxt");
-    _net = new caffe::Net<float>([modle_path UTF8String], caffe::TEST);
-    NSString *weight_path = FilePathForResourceName(@"weight", @"caffemodel");
-    _net->CopyTrainedLayersFrom([weight_path UTF8String]);
-    caffe::Blob<float> *input_layer = _net->input_blobs()[0];
-    timer.Stop();
-    [_console insertText:[NSString stringWithFormat:@"%fms\n", timer.MilliSeconds()]];
-    LOG(INFO) << "Input layer info: channels:" << input_layer->channels()
-    << " width: " << input_layer->width() << " Height:" << input_layer->height();
-
-}
-
-- (void)RunCaffeModel:(UIButton *)btn {
-    caffe::CPUTimer timer;
-    [_console insertText:@"\nCaffe infering..."];
-    caffe::Blob<float> *input_layer = _net->input_blobs()[0];
-    NSString *test_file_path = FilePathForResourceName(@"test_image", @"png");
-    timer.Start();
-    //std::vector<float> mean({81.3, 107.3, 105.3});
-    std::vector<float> mean;
-    if(! ReadImageToBlob(test_file_path, mean, input_layer)) {
-        LOG(INFO) << "ReadImageToBlob failed";
-        [_console insertText:@"ReadImageToBlob failed"];
-        return;
-    }
-    _net->Forward();
-    timer.Stop();
-    [_console insertText:[NSString stringWithFormat:@"%fms\n", timer.MilliSeconds()]];
-    
-    [_console insertText:@"Inference result(sorted):\n"];
-    caffe::Blob<float> *output_layer = _net->output_blobs()[0];
-    const float *begin = output_layer->cpu_data();
-    const float *end = begin + output_layer->channels();
-    std::vector<float> result(begin, end);
-    std::vector<size_t> result_idx(result.size());
-    std::iota(result_idx.begin(), result_idx.end(), 0);
-    std::sort(result_idx.begin(), result_idx.end(),
-              [&result](size_t l, size_t r){return result[l] > result[r];});
-    for (int i=0; i<result_idx.size(); i++) {
-        LOG(INFO) << "result[" << result_idx[i] << "]=" << result[result_idx[i]];
-        [_console insertText:[NSString stringWithFormat:@"result[%lu] \t= %f\n", result_idx[i], result[result_idx[i]] ] ];
-    }
-}
-
-
-@end
diff --git a/examples/ios/simple/CaffeSimple/data/61.png b/examples/ios/simple/CaffeSimple/data/61.png
deleted file mode 100644
index 88083cccd5bc5c316b0187167a1f4bf2bb795864..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 287
zcmV+)0pR|LP)<h;3K|Lk000e1NJLTq000~S000~a00000S7v}n0002yNkl<Zc${Nk
z7}P*pUmuqgXU@;Re=nRre}1YXFNWNXe}Dh}h2X1_sB-3~|NT2&UvCo|;xhf-zlErh
zU4Q=`7lyKD|GkVMDF|hHeMFbc{QEZz!cuzr_wNEU2TlF=R0Bw<&HVT8-xf3nF$f>~
zcb-$p<MrRa54thD!XSA0@AVJ={=RAu#E@cONCVpS_lYm2E(W(%|G=OeLq@9U88Bpa
z7CT)2`$H5>ZrO{!f4?u%<7JQj`uCp&nq&jeu1m4?^Ur|n<VQEQ?c=}y5G$W3VLC^;
lxaZutbLY=hW9S<?0089`eNq;_m~j9A002ovPDHLkV1lq!hCKiP

diff --git a/examples/ios/simple/CaffeSimple/data/test_image.jpg b/examples/ios/simple/CaffeSimple/data/test_image.jpg
deleted file mode 100644
index e72bde72d2ae8073c6ec1974d648b6271a31974e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 5330
zcmbtY2|Scr|3A+>&nyOGC$bE(L^Na>TlQrv*-4wGG0a%TG>ni^U2(%rt3>w}bt~nn
z6qPnfX)lWQUUC(wu5M`~^PXo=df)r`|No!Q|2&`b%=i4x@>|dEoOvYoC0#(#--qP`
z5Ci}}_y9>a2=vb6#{&=)1k3;c6hJ_f0SXZUA5g#tm=ptG41WP2>>_-lXzxP5W4u%H
z8Zbk6sp+X{yi_55mL&r)Jy<~)z5*;#Y$Sz>cXf&%lz=*{f0azxt4|&)CC`*HS)QJ;
zAz{HjtUzz6fJ!{JP?$`B6H)}}BHu9gn7J|Y=;X&xMh&0<EP(=>lO}Wz2?@m46r>8c
z(zSnhedxrs0N{=VzSa+2|9gWN$4%qFVNI}R#iemlpi!IwAop>EqI3Xcra`*lvUDMi
zp)G-GQbd?1#0#Jq<;LeiGSKto3b-OZhwdw4XVHDxDJg8awWT#ZM3frOPyPp~@k#%m
zl9DXO<;II1ta52d;Q{d163U+!$M(W86XH4!+dF(5zs%rA&4Jhry4XON?hyvDKg3JK
zNs;al&w#ilUgQ%A@ob3i^Td8QwgCWD&*nu$L#zw2zhKFnV2B+d=5f+I=Rs@=aTSlt
z!u<qohF;~Tvmzjd{LnX|)G&NJj6KXbj_c(;M#tilQUh=q3?ThjT1Gfdm&%;Y^Tg$Z
zys*whwtoo3Ffy_C++<&TJY0vUB}@;&*Mx1Nmmql#&esj%Qf``5U)Uygrt>0jpIHMy
zGDsIi;B%pDq=<OF4+~-+h%<R2Kb#KrC7l%}OXCdMgft=&hvB+I9mzgiK_qTNJT6wT
zMc%#;LtBwggD8XzxF8iQfo~2FfNnqsX@Czhq$mNfK?<yND8UR^ec>nptRgrf4J1K&
z0>r)F`tdeC1D_*=J*glbj!cI0Ca81}2j`E?!`DcK6>s6|_DR>}jPtXE{5*Ak2V&UI
z1Fv8|527H@C8cCS`BGrp6H>*nF9G7oT&!UnS%VO%tYb3Z{WzEYaW1KFMx2z_*gRaz
zG1)w!%mR=N`*7K$nqwLm1Dge_6BdZMU<@fY5gd^PETnX2DQ$x5cg=C#`oD2q3T2(F
z{bYMh*!VoOS2|od8EPmPw@(_BW(3NeG(P*gHKiOkXo*dQ!qWAjtVMgJ663nl*N9^I
zoLeP>|AW`QduxKb#SeZ9o*Wm_NQwKexPZKf?Zn%}r^LH}4uAh7b`bj@dPIB-p9kOM
z37HfRxb5-C;lno{>NpVrW0A#?QgB}+!w2`vf6MvZXJhO9#|j84<%`FxyL1&i!ct(x
zJ<FB))fe(-Lw}?}Yl~r=;;~@?BWAMi#<)z{0j)WwrvQ3qanlt*uTeaCS;|Ff?GEh3
zf;)1pIq`xGT0-HXy*a~N;rCwL58Q$|ALf8HGZ=>%uNZq6HH>$Rr{6~~?lE37IvAC(
zX8_%d)}v?8lV~G)9nfK`5j~Hdl%g8+G<=SHoA+ackL{x$BOH%6&Ui%QI;M}u)a2No
zq@mPa6V}4VPk1VEe5X#1J>0&NBL?@_|4pLF9{n*RjCn`8>u82FD;k+*Mzf{4(-4|I
ze5`415E;@8Xn`~h*y~3#p?T4!ezV8e9>?Xt_d6b86M5l#7K?$s6On-H$AfVolIAS-
zxW50XBmD<0@r=fwysHp-xHHE%jpc+1o)dPJM#v94D-v?zgJn<-5wt75+XXO}(0>?%
zXJR}H!H7iWOMUncI|0+htS}bLOu>MTxns7N+c?Ivl{0+sTs4-xEihNuI~()Dn0OwW
ztN~no%wBj#`)BDV?+h-<kz_*hCYearmTE`xBKeVg0G(uo(@8T){t!FhqtdyV@a_SC
zXR0tu#82R*)7{}Mhf8M(IF>W$)(k6ny5V;wd<+2ZLZx>hMEyEPEXo+8Nv~!^K!x`y
zRiFt^RYQ1MTR^SsffGD4z2O}o7({>=un6kGgPs&Y%jSU9ARiQg4PYxM2Rp$YPz`E9
z9XJk7gY)1LxCU;64)6$`C%s?*41y0}7(oy+B9Evbnus1^gqS1N2osr&cq4&G7!rdl
zMiLMKB1UqMpO7MC6H<;;BGt$d<Ro$)xq`GI50Gx8ANd>kLO=;}1XY3#!I&_Uz$Ca6
z0tgX=g@gpcQbIN%kFcIlMyMjx5E=;Q3D*c6gf7AWVF(4NEUJ#`qZTL=^+bcwd1yQ;
zLRX@NXenBS*1{anirzuH&{ybZjD)FRbj$*J(GQEnIM`BbC02};WBaiNtOdJ+bz_6r
zC{doMO*AJu5?RE##026pVm`5yxR+Q@ya+RNFYyD3L{f+Tv4?((AtjQQlZr{dk`9rY
zNNuDZ(h!+U)+C#gUC6;?HaVS~Pu@<hA)h0+lY7aZD6$kiiZ#WDGLIsltf7=r4p7cg
z+9@w6!!n98Mly~v!7_0&Su!Oudt@4AZprk@3{#b;CR7({1T~Smnp#G!rCy{yroNMv
zl{JuckPVgP%dV0wlRYeZS+-mDvz)S=nVhHG0y(kVdbxdaO>z(9-pR|$8_T=N&y!D=
zUoT%Re?k7K{3n_U&638V#nEzU6|@FgJME2vjDnGZyTT%cY=u&VqYAeaUMb2b8Y_A#
zvK3b<Rw$lSyr(#%q@rY{6s(k@RIGGB>55XHGFjPJ*;_eIIZt`F@&)A{6;#Dg#Z!f&
z@{`IQm5VC9sw7ntRhDX^YLV(6sy9^!)s)q2)S}cf)hg7^sCB8M>c;9U^<?!D^*Z%V
z^)DJa8txigjddEe8tob%r%avVHibK-aLS=69aFw&(lxy`lQcJK9@l)Lg=v{-g=#I+
z`c3Pi)~l(iQ=O)ArWQ`EoBB|jplzldroCKyxAt}IAst;EUmc;&cAX14uXNRQ-E<Rm
zx9FbL?blP$o2|#!+oE?)?<HN0?oLmpm(g43gZkR~e)?klO8uMq!v-b>kp^oF4jDW#
zq#7~};|;eMHX9C3)0-AFEoa)nX%CHLjF?6|qisg5Mjwq$jH8VUjE@`lnM^TZnPi(B
zG<j?)Z|Z8g)U?X<?sU?0=JcfLzfQkxMliEA<C#^M-7+Vb+nMvte>HELL7d?*LolOi
z#ytx;3wMhQi-Q*3mg<&)mTN4JTfUh&ZRWh08)vr89A(%tlHuw0$V%DD-)fCjqt)Nm
zX4V|*3hTSG<Y)QJ%AIv$)>|8M8?Mc7Hur5+Y=dkIY@2Pr+S%KQ>}u`$?Wfr<w%=iY
zkEzNGVHPtlI}jZ_9C95_JA8Jub4+)vbNtK6!YRq=fK#8di8I%Ek8_WUfeYKE(xq!Q
zefHwnyJmN}>btUCt6ZPC8M?)}?R9(MKHWXhy~h2G2g5_;QSb55)6sK<XOkD|<?B`G
zb<<nfJJP${`>BtCPlC@upFv++-{rndenh_jzm0zPSUM~YtC}_FZ|}dtza>B}AUvQv
z;8~zqpeV30h!Er-v^nVU9HTjcImd%RFe`X-@RJaekfkA~LP?<^p%tOe!>qz`!dk;s
z!ehe^gnx+eh$xA86loeMj%<!nh*}U;9rZEVD|%CO*W8(NSIoT@qZPxCIWdnsFKXW2
zc|-HP=Wm_gyTEoq{({bhCJQqcURk8ID0$JDScO=2?9s)<#nFooEFNJ8v#Z!cOIS;G
zEE(i@a<*{>;@sl4#PxGsxSO~y;$7l5#rGw+By3LT=ehDqd9V1M{Br)=M8CwHi64`K
zllCP^lB1IkrBG59r<@ch3z7sask*6|sck}YVZN|yspHbEO9w>(qP=NI+QPIG>8kJ#
z=5?{Dc&)fQV|GSG#;0Xb%Z_F$WePH{XPITK%X*RRll}X0Y&mCnbB=z_>YVNs?kg%+
zA}g1yY|b^z&CBgw<-2PCYU=8w)i>5K)@)ky{-@}l8uN7WR^|1q^<8@~pO!DmzgOT?
zu=8i^Xa3JO*UehDecfnbT;bIsMp0?eNHM4Q>MvHml>H)EAHV))iCxKW8%P@j8#*_-
zZrr~~X;b#5p3Om<8@AB56mEIHHFoQjQk&A9+o;>b+q%jE%T8>cwtd6)k#b&nM}<el
zp&i;g*6sNCYuvBxzq$QZyHjUp(atZs_`B{``c^hn8CR9=ChgAH{d`Z<p4Q))zgO?o
z+FP{u>psE0uKi*ATdM7=s}E=&C^<+tC_dO<v!LeoA6|c)sI{oAJfwc8=rA}eKK$~?
z;v=1P0d>tsnMZ4n86DeEuTo#wfHY(^3?Anl?>Z54qV1&r$>v6<#`;s1r}mxJKV5M~
z?M%s8>e;-rl5^SThMI&;ug)i&?`e*0esp2(g}W`GEjKR)T)ceA=hB5%_ttZlT`r%#
z;&|ocRp!;>*X*t}T(`Ymf5Y}h{Y|@@4Y%xXow)6AyRpr=?QFYi`}sRwcP`y!-M!W^
zr=zVivh)7E1^2q{$KCIHko@57!;FW+k5<7;eBl$tC#6rdpH}^A`o9i!*>#=i_UgX=
zEd1G{9!}4|pJ{)N^yWR6e_r}R??rW=b>HcJpZ?n~=fCV75DpB#%73l&ddC~nH^=^R
z`|JAP+`-<rqPLR2i{EL!+xOn)ee+Pr(9;hoABH~`e$xE3|1<M*>zC*+FNT+mkVneC
zPXF3C8Zi1;k|L2vnt&_?Xaj8oQ38?{{DT2PgvT{3V^;*&CZHIRM5f44W#NE&Mfiyb
zMF|*6B;ucTkQJ~WU`j+~eJgj8N+_FbuvFE0^~Oqyp~sQ)YGG}I(`GFZt&yRsYfRD9
zGBP$Xoo;4hYiG}NaP;)@_VM*&`G-eDMn%Jm3MY;mpTOfMrlpHBmStvT|CF~jzu@O}
zg_|~S*;=}-Y<qdt?mfTn-M7E`K;6+}^$o{QoNR2q&~ouo>*Xs~+wa`%=)8CT!Ncxn
zJ%9E-f6>?f^6lU6-Vc5F`04W($s_o8mLM%C0cwMyP$LW$BAJL6G6f#Wuu#SeRa)R7
zDk}@m$MGV00>5@)l81nTo5mH4<_`27TJ2g_(0cA+z_3Sbp00XD^s)BQ`3E1w=ij_x
z<dmCkX{pOBU%*JUn0q+3tT=6<eTf9HcRsw;T~Hv%iRlT>Z>+D~oG|s8Se)GXaUfNd
zlXiu1tTcYTZD{E-#O-C$pI;=mA)61UnnXD<Uu)pxSn7z?*X^}pdD)A4)Owp^Z`Idw
z8Fwo0Ri1Y3`&hl=XO6Y!JLe5HyAL{%476_yo}2q=sRUr3Z;!B^xfbUtE3_ySemPrY
zxjCgvhc+58pkwh^Epy0x?Z(&Fk8G~rx+QC|YiZpZIalQYN!!Truj^+UuDo$he$?n}
z^>ARO<_NbkKv~v7b?VwXZz9((ny<c5|J1sU!`F<TopO&%-WYqNJofd_@bt|2Emoi2
zjcO<q4(XVC<#Vb=oChVKfVOsb%O>*~8GmUt6$#t&yKbZ#KDDP2R*#J2gunfzq{VgJ
zO0&1mx9_>I>tSY5%-Ih59gEAnnx0eJd*?>14e=md^4R}AgsH8TD`_9i?W-!3Joq;z
C=0L{)

diff --git a/examples/ios/simple/CaffeSimple/data/test_image.png b/examples/ios/simple/CaffeSimple/data/test_image.png
deleted file mode 100644
index 88083cccd5bc5c316b0187167a1f4bf2bb795864..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 287
zcmV+)0pR|LP)<h;3K|Lk000e1NJLTq000~S000~a00000S7v}n0002yNkl<Zc${Nk
z7}P*pUmuqgXU@;Re=nRre}1YXFNWNXe}Dh}h2X1_sB-3~|NT2&UvCo|;xhf-zlErh
zU4Q=`7lyKD|GkVMDF|hHeMFbc{QEZz!cuzr_wNEU2TlF=R0Bw<&HVT8-xf3nF$f>~
zcb-$p<MrRa54thD!XSA0@AVJ={=RAu#E@cONCVpS_lYm2E(W(%|G=OeLq@9U88Bpa
z7CT)2`$H5>ZrO{!f4?u%<7JQj`uCp&nq&jeu1m4?^Ur|n<VQEQ?c=}y5G$W3VLC^;
lxaZutbLY=hW9S<?0089`eNq;_m~j9A002ovPDHLkV1lq!hCKiP

diff --git a/examples/ios/simple/CaffeSimple/main.m b/examples/ios/simple/CaffeSimple/main.m
deleted file mode 100644
index 5958421..0000000
--- a/examples/ios/simple/CaffeSimple/main.m
+++ /dev/null
@@ -1,16 +0,0 @@
-//
-//  main.m
-//  CaffeSimple
-//
-//  Created by Wenbo Yang on 2017/2/3.
-//  Copyright © 2017年 com.yangwenbo. All rights reserved.
-//
-
-#import <UIKit/UIKit.h>
-#import "AppDelegate.h"
-
-int main(int argc, char * argv[]) {
-    @autoreleasepool {
-        return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class]));
-    }
-}
diff --git a/examples/mnist/test_lenet.sh b/examples/mnist/test_lenet.sh
deleted file mode 100644
index 78a0638..0000000
--- a/examples/mnist/test_lenet.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env sh
-
-./build/tools/caffe test --model examples/mnist/lenet_train_test.prototxt --weights examples/mnist/lenet_iter_10000.caffemodel
diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp
deleted file mode 100644
index 2f59471..0000000
--- a/include/caffe/blob.hpp
+++ /dev/null
@@ -1,282 +0,0 @@
-#ifndef CAFFE_BLOB_HPP_
-#define CAFFE_BLOB_HPP_
-
-#include <algorithm>
-#include <string>
-#include <vector>
-
-#include "caffe/common.hpp"
-#include "caffe/proto/caffe.pb.h"
-#include "caffe/syncedmem.hpp"
-
-const int kMaxBlobAxes = 32;
-
-namespace caffe {
-
-/**
- * @brief A wrapper around SyncedMemory holders serving as the basic
- *        computational unit through which Layer%s, Net%s, and Solver%s
- *        interact.
- *
- * TODO(dox): more thorough description.
- */
-template <typename Dtype>
-class Blob {
- public:
-  Blob()
-       : data_(), diff_(), count_(0), capacity_(0) {}
-
-  /// @brief Deprecated; use <code>Blob(const vector<int>& shape)</code>.
-  explicit Blob(const int num, const int channels, const int height,
-      const int width);
-  explicit Blob(const vector<int>& shape);
-
-  /// @brief Deprecated; use <code>Reshape(const vector<int>& shape)</code>.
-  void Reshape(const int num, const int channels, const int height,
-      const int width);
-  /**
-   * @brief Change the dimensions of the blob, allocating new memory if
-   *        necessary.
-   *
-   * This function can be called both to create an initial allocation
-   * of memory, and to adjust the dimensions of a top blob during Layer::Reshape
-   * or Layer::Forward. When changing the size of blob, memory will only be
-   * reallocated if sufficient memory does not already exist, and excess memory
-   * will never be freed.
-   *
-   * Note that reshaping an input blob and immediately calling Net::Backward is
-   * an error; either Net::Forward or Net::Reshape need to be called to
-   * propagate the new input shape to higher layers.
-   */
-  void Reshape(const vector<int>& shape);
-  void Reshape(const BlobShape& shape);
-  void ReshapeLike(const Blob& other);
-  inline string shape_string() const {
-    ostringstream stream;
-    for (int i = 0; i < shape_.size(); ++i) {
-      stream << shape_[i] << " ";
-    }
-    stream << "(" << count_ << ")";
-    return stream.str();
-  }
-  inline const vector<int>& shape() const { return shape_; }
-  /**
-   * @brief Returns the dimension of the index-th axis (or the negative index-th
-   *        axis from the end, if index is negative).
-   *
-   * @param index the axis index, which may be negative as it will be
-   *        "canonicalized" using CanonicalAxisIndex.
-   *        Dies on out of range index.
-   */
-  inline int shape(int index) const {
-    return shape_[CanonicalAxisIndex(index)];
-  }
-  inline int num_axes() const { return shape_.size(); }
-  inline int count() const { return count_; }
-
-  /**
-   * @brief Compute the volume of a slice; i.e., the product of dimensions
-   *        among a range of axes.
-   *
-   * @param start_axis The first axis to include in the slice.
-   *
-   * @param end_axis The first axis to exclude from the slice.
-   */
-  inline int count(int start_axis, int end_axis) const {
-    CHECK_LE(start_axis, end_axis);
-    CHECK_GE(start_axis, 0);
-    CHECK_GE(end_axis, 0);
-    CHECK_LE(start_axis, num_axes());
-    CHECK_LE(end_axis, num_axes());
-    int count = 1;
-    for (int i = start_axis; i < end_axis; ++i) {
-      count *= shape(i);
-    }
-    return count;
-  }
-  /**
-   * @brief Compute the volume of a slice spanning from a particular first
-   *        axis to the final axis.
-   *
-   * @param start_axis The first axis to include in the slice.
-   */
-  inline int count(int start_axis) const {
-    return count(start_axis, num_axes());
-  }
-
-  /**
-   * @brief Returns the 'canonical' version of a (usually) user-specified axis,
-   *        allowing for negative indexing (e.g., -1 for the last axis).
-   *
-   * @param axis_index the axis index.
-   *        If 0 <= index < num_axes(), return index.
-   *        If -num_axes <= index <= -1, return (num_axes() - (-index)),
-   *        e.g., the last axis index (num_axes() - 1) if index == -1,
-   *        the second to last if index == -2, etc.
-   *        Dies on out of range index.
-   */
-  inline int CanonicalAxisIndex(int axis_index) const {
-    CHECK_GE(axis_index, -num_axes())
-        << "axis " << axis_index << " out of range for " << num_axes()
-        << "-D Blob with shape " << shape_string();
-    CHECK_LT(axis_index, num_axes())
-        << "axis " << axis_index << " out of range for " << num_axes()
-        << "-D Blob with shape " << shape_string();
-    if (axis_index < 0) {
-      return axis_index + num_axes();
-    }
-    return axis_index;
-  }
-
-  /// @brief Deprecated legacy shape accessor num: use shape(0) instead.
-  inline int num() const { return LegacyShape(0); }
-  /// @brief Deprecated legacy shape accessor channels: use shape(1) instead.
-  inline int channels() const { return LegacyShape(1); }
-  /// @brief Deprecated legacy shape accessor height: use shape(2) instead.
-  inline int height() const { return LegacyShape(2); }
-  /// @brief Deprecated legacy shape accessor width: use shape(3) instead.
-  inline int width() const { return LegacyShape(3); }
-  inline int LegacyShape(int index) const {
-    CHECK_LE(num_axes(), 4)
-        << "Cannot use legacy accessors on Blobs with > 4 axes.";
-    CHECK_LT(index, 4);
-    CHECK_GE(index, -4);
-    if (index >= num_axes() || index < -num_axes()) {
-      // Axis is out of range, but still in [0, 3] (or [-4, -1] for reverse
-      // indexing) -- this special case simulates the one-padding used to fill
-      // extraneous axes of legacy blobs.
-      return 1;
-    }
-    return shape(index);
-  }
-
-  inline int offset(const int n, const int c = 0, const int h = 0,
-      const int w = 0) const {
-    CHECK_GE(n, 0);
-    CHECK_LE(n, num());
-    CHECK_GE(channels(), 0);
-    CHECK_LE(c, channels());
-    CHECK_GE(height(), 0);
-    CHECK_LE(h, height());
-    CHECK_GE(width(), 0);
-    CHECK_LE(w, width());
-    return ((n * channels() + c) * height() + h) * width() + w;
-  }
-
-  inline int offset(const vector<int>& indices) const {
-    CHECK_LE(indices.size(), num_axes());
-    int offset = 0;
-    for (int i = 0; i < num_axes(); ++i) {
-      offset *= shape(i);
-      if (indices.size() > i) {
-        CHECK_GE(indices[i], 0);
-        CHECK_LT(indices[i], shape(i));
-        offset += indices[i];
-      }
-    }
-    return offset;
-  }
-  /**
-   * @brief Copy from a source Blob.
-   *
-   * @param source the Blob to copy from
-   * @param copy_diff if false, copy the data; if true, copy the diff
-   * @param reshape if false, require this Blob to be pre-shaped to the shape
-   *        of other (and die otherwise); if true, Reshape this Blob to other's
-   *        shape if necessary
-   */
-  void CopyFrom(const Blob<Dtype>& source, bool copy_diff = false,
-      bool reshape = false);
-
-  inline Dtype data_at(const int n, const int c, const int h,
-      const int w) const {
-    return cpu_data()[offset(n, c, h, w)];
-  }
-
-  inline Dtype diff_at(const int n, const int c, const int h,
-      const int w) const {
-    return cpu_diff()[offset(n, c, h, w)];
-  }
-
-  inline Dtype data_at(const vector<int>& index) const {
-    return cpu_data()[offset(index)];
-  }
-
-  inline Dtype diff_at(const vector<int>& index) const {
-    return cpu_diff()[offset(index)];
-  }
-
-  inline const shared_ptr<SyncedMemory>& data() const {
-    CHECK(data_);
-    return data_;
-  }
-
-  inline const shared_ptr<SyncedMemory>& diff() const {
-    CHECK(diff_);
-    return diff_;
-  }
-
-  const Dtype* cpu_data() const;
-  void set_cpu_data(Dtype* data);
-  const int* gpu_shape() const;
-  const Dtype* gpu_data() const;
-  void set_gpu_data(Dtype* data);
-  const Dtype* cpu_diff() const;
-  const Dtype* gpu_diff() const;
-  Dtype* mutable_cpu_data();
-  Dtype* mutable_gpu_data();
-  Dtype* mutable_cpu_diff();
-  Dtype* mutable_gpu_diff();
-  void Update();
-  void FromProto(const BlobProto& proto, bool reshape = true);
-  void ToProto(BlobProto* proto, bool write_diff = false) const;
-
-  /// @brief Compute the sum of absolute values (L1 norm) of the data.
-  Dtype asum_data() const;
-  /// @brief Compute the sum of absolute values (L1 norm) of the diff.
-  Dtype asum_diff() const;
-  /// @brief Compute the sum of squares (L2 norm squared) of the data.
-  Dtype sumsq_data() const;
-  /// @brief Compute the sum of squares (L2 norm squared) of the diff.
-  Dtype sumsq_diff() const;
-
-  /// @brief Scale the blob data by a constant factor.
-  void scale_data(Dtype scale_factor);
-  /// @brief Scale the blob diff by a constant factor.
-  void scale_diff(Dtype scale_factor);
-
-  /**
-   * @brief Set the data_ shared_ptr to point to the SyncedMemory holding the
-   *        data_ of Blob other -- useful in Layer%s which simply perform a copy
-   *        in their Forward pass.
-   *
-   * This deallocates the SyncedMemory holding this Blob's data_, as
-   * shared_ptr calls its destructor when reset with the "=" operator.
-   */
-  void ShareData(const Blob& other);
-  /**
-   * @brief Set the diff_ shared_ptr to point to the SyncedMemory holding the
-   *        diff_ of Blob other -- useful in Layer%s which simply perform a copy
-   *        in their Forward pass.
-   *
-   * This deallocates the SyncedMemory holding this Blob's diff_, as
-   * shared_ptr calls its destructor when reset with the "=" operator.
-   */
-  void ShareDiff(const Blob& other);
-
-  bool ShapeEquals(const BlobProto& other);
-
- protected:
-  shared_ptr<SyncedMemory> data_;
-  shared_ptr<SyncedMemory> diff_;
-  shared_ptr<SyncedMemory> shape_data_;
-  vector<int> shape_;
-  int count_;
-  int capacity_;
-
-  DISABLE_COPY_AND_ASSIGN(Blob);
-};  // class Blob
-
-}  // namespace caffe
-
-#endif  // CAFFE_BLOB_HPP_
diff --git a/include/caffe/caffe.hpp b/include/caffe/caffe.hpp
deleted file mode 100644
index 596371a..0000000
--- a/include/caffe/caffe.hpp
+++ /dev/null
@@ -1,25 +0,0 @@
-// caffe.hpp is the header file that you need to include in your code. It wraps
-// all the internal caffe header files into one for simpler inclusion.
-
-#ifndef CAFFE_CAFFE_HPP_
-#define CAFFE_CAFFE_HPP_
-
-#include "caffe/blob.hpp"
-#include "caffe/common.hpp"
-#include "caffe/filler.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/layer_factory.hpp"
-#include "caffe/net.hpp"
-#ifdef NO_CAFFE_MOBILE
-#include "caffe/parallel.hpp"
-#endif
-#include "caffe/proto/caffe.pb.h"
-#ifdef NO_CAFFE_MOBILE
-#include "caffe/solver.hpp"
-#include "caffe/solver_factory.hpp"
-#endif
-#include "caffe/util/benchmark.hpp"
-#include "caffe/util/io.hpp"
-#include "caffe/util/upgrade_proto.hpp"
-
-#endif  // CAFFE_CAFFE_HPP_
diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp
deleted file mode 100644
index 5ce7996..0000000
--- a/include/caffe/common.hpp
+++ /dev/null
@@ -1,209 +0,0 @@
-#ifndef CAFFE_COMMON_HPP_
-#define CAFFE_COMMON_HPP_
-
-#ifdef USE_BOOST
-#include <boost/shared_ptr.hpp>
-#else
-#include <memory>
-#endif
-#ifdef NO_CAFFE_MOBILE
-#include <gflags/gflags.h>
-#endif
-#ifdef USE_GLOG
-#include <glog/logging.h>
-#else
-#include "caffe/glog_wrapper.hpp"
-#endif
-
-#include <climits>
-#include <cmath>
-#include <fstream>  // NOLINT(readability/streams)
-#include <iostream>  // NOLINT(readability/streams)
-#include <map>
-#include <set>
-#include <sstream>
-#include <string>
-#include <utility>  // pair
-#include <vector>
-
-#include "caffe/util/device_alternate.hpp"
-
-// Convert macro to string
-#define STRINGIFY(m) #m
-#define AS_STRING(m) STRINGIFY(m)
-
-#ifdef NO_CAFFE_MOBILE
-// gflags 2.1 issue: namespace google was changed to gflags without warning.
-// Luckily we will be able to use GFLAGS_GFLAGS_H_ to detect if it is version
-// 2.1. If yes, we will add a temporary solution to redirect the namespace.
-// TODO(Yangqing): Once gflags solves the problem in a more elegant way, let's
-// remove the following hack.
-#ifndef GFLAGS_GFLAGS_H_
-namespace gflags = google;
-#endif  // GFLAGS_GFLAGS_H_
-#endif  // NO_CAFFE_MOBILE
-
-// Disable the copy and assignment operator for a class.
-#define DISABLE_COPY_AND_ASSIGN(classname) \
-private:\
-  classname(const classname&);\
-  classname& operator=(const classname&)
-
-// Instantiate a class with float and double specifications.
-#define INSTANTIATE_CLASS(classname) \
-  char gInstantiationGuard##classname; \
-  template class classname<float>; \
-  template class classname<double>
-
-#define INSTANTIATE_LAYER_GPU_FORWARD(classname) \
-  template void classname<float>::Forward_gpu( \
-      const std::vector<Blob<float>*>& bottom, \
-      const std::vector<Blob<float>*>& top); \
-  template void classname<double>::Forward_gpu( \
-      const std::vector<Blob<double>*>& bottom, \
-      const std::vector<Blob<double>*>& top);
-
-#define INSTANTIATE_LAYER_GPU_BACKWARD(classname) \
-  template void classname<float>::Backward_gpu( \
-      const std::vector<Blob<float>*>& top, \
-      const std::vector<bool>& propagate_down, \
-      const std::vector<Blob<float>*>& bottom); \
-  template void classname<double>::Backward_gpu( \
-      const std::vector<Blob<double>*>& top, \
-      const std::vector<bool>& propagate_down, \
-      const std::vector<Blob<double>*>& bottom)
-
-#define INSTANTIATE_LAYER_GPU_FUNCS(classname) \
-  INSTANTIATE_LAYER_GPU_FORWARD(classname); \
-  INSTANTIATE_LAYER_GPU_BACKWARD(classname)
-
-// A simple macro to mark codes that are not implemented, so that when the code
-// is executed we will see a fatal log.
-#define NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented Yet"
-
-// See PR #1236
-namespace cv { class Mat; }
-
-namespace caffe {
-
-#ifdef USE_BOOST
-// We will use the boost shared_ptr instead of the new C++11 one mainly
-// because cuda does not work (at least now) well with C++11 features.
-using boost::shared_ptr;
-#else
-using std::shared_ptr;
-#endif
-
-// Common functions and classes from std that caffe often uses.
-using std::fstream;
-using std::ios;
-using std::isnan;
-using std::isinf;
-using std::iterator;
-using std::make_pair;
-using std::map;
-using std::ostringstream;
-using std::pair;
-using std::set;
-using std::string;
-using std::stringstream;
-using std::vector;
-
-// A global initialization function that you should call in your main function.
-// Currently it initializes google flags and google logging.
-void GlobalInit(int* pargc, char*** pargv);
-
-// A singleton class to hold common caffe stuff, such as the handler that
-// caffe is going to use for cublas, curand, etc.
-class Caffe {
- public:
-  ~Caffe();
-
-  // Thread local context for Caffe. Moved to common.cpp instead of
-  // including boost/thread.hpp to avoid a boost/NVCC issues (#1009, #1010)
-  // on OSX. Also fails on Linux with CUDA 7.0.18.
-  static Caffe& Get();
-
-  enum Brew { CPU, GPU };
-
-  // This random number generator facade hides boost and CUDA rng
-  // implementation from one another (for cross-platform compatibility).
-  class RNG {
-   public:
-    RNG();
-    explicit RNG(unsigned int seed);
-    explicit RNG(const RNG&);
-    RNG& operator=(const RNG&);
-    void* generator();
-   private:
-    class Generator;
-    shared_ptr<Generator> generator_;
-  };
-
-  // Getters for boost rng, curand, and cublas handles
-  inline static RNG& rng_stream() {
-    if (!Get().random_generator_) {
-      Get().random_generator_.reset(new RNG());
-    }
-    return *(Get().random_generator_);
-  }
-#ifndef CPU_ONLY
-  inline static cublasHandle_t cublas_handle() { return Get().cublas_handle_; }
-  inline static curandGenerator_t curand_generator() {
-    return Get().curand_generator_;
-  }
-#endif
-
-  // Returns the mode: running on CPU or GPU.
-  inline static Brew mode() { return Get().mode_; }
-  // The setters for the variables
-  // Sets the mode. It is recommended that you don't change the mode halfway
-  // into the program since that may cause allocation of pinned memory being
-  // freed in a non-pinned way, which may cause problems - I haven't verified
-  // it personally but better to note it here in the header file.
-  inline static void set_mode(Brew mode) { Get().mode_ = mode; }
-  // Sets the random seed of both boost and curand
-  static void set_random_seed(const unsigned int seed);
-  // Sets the device. Since we have cublas and curand stuff, set device also
-  // requires us to reset those values.
-  static void SetDevice(const int device_id);
-  // Prints the current GPU status.
-  static void DeviceQuery();
-  // Check if specified device is available
-  static bool CheckDevice(const int device_id);
-  // Search from start_id to the highest possible device ordinal,
-  // return the ordinal of the first available device.
-  static int FindDevice(const int start_id = 0);
-  // Parallel training
-  inline static int solver_count() { return Get().solver_count_; }
-  inline static void set_solver_count(int val) { Get().solver_count_ = val; }
-  inline static int solver_rank() { return Get().solver_rank_; }
-  inline static void set_solver_rank(int val) { Get().solver_rank_ = val; }
-  inline static bool multiprocess() { return Get().multiprocess_; }
-  inline static void set_multiprocess(bool val) { Get().multiprocess_ = val; }
-  inline static bool root_solver() { return Get().solver_rank_ == 0; }
-
- protected:
-#ifndef CPU_ONLY
-  cublasHandle_t cublas_handle_;
-  curandGenerator_t curand_generator_;
-#endif
-  shared_ptr<RNG> random_generator_;
-
-  Brew mode_;
-
-  // Parallel training
-  int solver_count_;
-  int solver_rank_;
-  bool multiprocess_;
-
- private:
-  // The private constructor to avoid duplicate instantiation.
-  Caffe();
-
-  DISABLE_COPY_AND_ASSIGN(Caffe);
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_COMMON_HPP_
diff --git a/include/caffe/data_transformer.hpp b/include/caffe/data_transformer.hpp
deleted file mode 100644
index 97b4ee6..0000000
--- a/include/caffe/data_transformer.hpp
+++ /dev/null
@@ -1,154 +0,0 @@
-#ifndef CAFFE_DATA_TRANSFORMER_HPP
-#define CAFFE_DATA_TRANSFORMER_HPP
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/common.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-namespace caffe {
-
-/**
- * @brief Applies common transformations to the input data, such as
- * scaling, mirroring, substracting the image mean...
- */
-template <typename Dtype>
-class DataTransformer {
- public:
-  explicit DataTransformer(const TransformationParameter& param, Phase phase);
-  virtual ~DataTransformer() {}
-
-  /**
-   * @brief Initialize the Random number generations if needed by the
-   *    transformation.
-   */
-  void InitRand();
-
-  /**
-   * @brief Applies the transformation defined in the data layer's
-   * transform_param block to the data.
-   *
-   * @param datum
-   *    Datum containing the data to be transformed.
-   * @param transformed_blob
-   *    This is destination blob. It can be part of top blob's data if
-   *    set_cpu_data() is used. See data_layer.cpp for an example.
-   */
-  void Transform(const Datum& datum, Blob<Dtype>* transformed_blob);
-
-  /**
-   * @brief Applies the transformation defined in the data layer's
-   * transform_param block to a vector of Datum.
-   *
-   * @param datum_vector
-   *    A vector of Datum containing the data to be transformed.
-   * @param transformed_blob
-   *    This is destination blob. It can be part of top blob's data if
-   *    set_cpu_data() is used. See memory_layer.cpp for an example.
-   */
-  void Transform(const vector<Datum> & datum_vector,
-                Blob<Dtype>* transformed_blob);
-
-#ifdef USE_OPENCV
-  /**
-   * @brief Applies the transformation defined in the data layer's
-   * transform_param block to a vector of Mat.
-   *
-   * @param mat_vector
-   *    A vector of Mat containing the data to be transformed.
-   * @param transformed_blob
-   *    This is destination blob. It can be part of top blob's data if
-   *    set_cpu_data() is used. See memory_layer.cpp for an example.
-   */
-  void Transform(const vector<cv::Mat> & mat_vector,
-                Blob<Dtype>* transformed_blob);
-
-  /**
-   * @brief Applies the transformation defined in the data layer's
-   * transform_param block to a cv::Mat
-   *
-   * @param cv_img
-   *    cv::Mat containing the data to be transformed.
-   * @param transformed_blob
-   *    This is destination blob. It can be part of top blob's data if
-   *    set_cpu_data() is used. See image_data_layer.cpp for an example.
-   */
-  void Transform(const cv::Mat& cv_img, Blob<Dtype>* transformed_blob);
-#endif  // USE_OPENCV
-
-  /**
-   * @brief Applies the same transformation defined in the data layer's
-   * transform_param block to all the num images in a input_blob.
-   *
-   * @param input_blob
-   *    A Blob containing the data to be transformed. It applies the same
-   *    transformation to all the num images in the blob.
-   * @param transformed_blob
-   *    This is destination blob, it will contain as many images as the
-   *    input blob. It can be part of top blob's data.
-   */
-  void Transform(Blob<Dtype>* input_blob, Blob<Dtype>* transformed_blob);
-
-  /**
-   * @brief Infers the shape of transformed_blob will have when
-   *    the transformation is applied to the data.
-   *
-   * @param datum
-   *    Datum containing the data to be transformed.
-   */
-  vector<int> InferBlobShape(const Datum& datum);
-  /**
-   * @brief Infers the shape of transformed_blob will have when
-   *    the transformation is applied to the data.
-   *    It uses the first element to infer the shape of the blob.
-   *
-   * @param datum_vector
-   *    A vector of Datum containing the data to be transformed.
-   */
-  vector<int> InferBlobShape(const vector<Datum> & datum_vector);
-  /**
-   * @brief Infers the shape of transformed_blob will have when
-   *    the transformation is applied to the data.
-   *    It uses the first element to infer the shape of the blob.
-   *
-   * @param mat_vector
-   *    A vector of Mat containing the data to be transformed.
-   */
-#ifdef USE_OPENCV
-  vector<int> InferBlobShape(const vector<cv::Mat> & mat_vector);
-  /**
-   * @brief Infers the shape of transformed_blob will have when
-   *    the transformation is applied to the data.
-   *
-   * @param cv_img
-   *    cv::Mat containing the data to be transformed.
-   */
-  vector<int> InferBlobShape(const cv::Mat& cv_img);
-#endif  // USE_OPENCV
-
- protected:
-   /**
-   * @brief Generates a random integer from Uniform({0, 1, ..., n-1}).
-   *
-   * @param n
-   *    The upperbound (exclusive) value of the random number.
-   * @return
-   *    A uniformly random integer value from ({0, 1, ..., n-1}).
-   */
-  virtual int Rand(int n);
-
-  void Transform(const Datum& datum, Dtype* transformed_data);
-  // Tranformation parameters
-  TransformationParameter param_;
-
-
-  shared_ptr<Caffe::RNG> rng_;
-  Phase phase_;
-  Blob<Dtype> data_mean_;
-  vector<Dtype> mean_values_;
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_DATA_TRANSFORMER_HPP_
diff --git a/include/caffe/filler.hpp b/include/caffe/filler.hpp
deleted file mode 100644
index dad9ad4..0000000
--- a/include/caffe/filler.hpp
+++ /dev/null
@@ -1,295 +0,0 @@
-// Fillers are random number generators that fills a blob using the specified
-// algorithm. The expectation is that they are only going to be used during
-// initialization time and will not involve any GPUs.
-
-#ifndef CAFFE_FILLER_HPP
-#define CAFFE_FILLER_HPP
-
-#include <string>
-
-#include "caffe/blob.hpp"
-#include "caffe/proto/caffe.pb.h"
-#include "caffe/syncedmem.hpp"
-#include "caffe/util/math_functions.hpp"
-
-namespace caffe {
-
-/// @brief Fills a Blob with constant or randomly-generated data.
-template <typename Dtype>
-class Filler {
- public:
-  explicit Filler(const FillerParameter& param) : filler_param_(param) {}
-  virtual ~Filler() {}
-  virtual void Fill(Blob<Dtype>* blob) = 0;
- protected:
-  FillerParameter filler_param_;
-};  // class Filler
-
-
-/// @brief Fills a Blob with constant values @f$ x = 0 @f$.
-template <typename Dtype>
-class ConstantFiller : public Filler<Dtype> {
- public:
-  explicit ConstantFiller(const FillerParameter& param)
-      : Filler<Dtype>(param) {}
-  virtual void Fill(Blob<Dtype>* blob) {
-    Dtype* data = blob->mutable_cpu_data();
-    const int count = blob->count();
-    const Dtype value = this->filler_param_.value();
-    CHECK(count);
-    for (int i = 0; i < count; ++i) {
-      data[i] = value;
-    }
-    CHECK_EQ(this->filler_param_.sparse(), -1)
-         << "Sparsity not supported by this Filler.";
-  }
-};
-
-/// @brief Fills a Blob with uniformly distributed values @f$ x\sim U(a, b) @f$.
-template <typename Dtype>
-class UniformFiller : public Filler<Dtype> {
- public:
-  explicit UniformFiller(const FillerParameter& param)
-      : Filler<Dtype>(param) {}
-  virtual void Fill(Blob<Dtype>* blob) {
-    CHECK(blob->count());
-    caffe_rng_uniform<Dtype>(blob->count(), Dtype(this->filler_param_.min()),
-        Dtype(this->filler_param_.max()), blob->mutable_cpu_data());
-    CHECK_EQ(this->filler_param_.sparse(), -1)
-         << "Sparsity not supported by this Filler.";
-  }
-};
-
-/// @brief Fills a Blob with Gaussian-distributed values @f$ x = a @f$.
-template <typename Dtype>
-class GaussianFiller : public Filler<Dtype> {
- public:
-  explicit GaussianFiller(const FillerParameter& param)
-      : Filler<Dtype>(param) {}
-  virtual void Fill(Blob<Dtype>* blob) {
-    Dtype* data = blob->mutable_cpu_data();
-    CHECK(blob->count());
-    caffe_rng_gaussian<Dtype>(blob->count(), Dtype(this->filler_param_.mean()),
-        Dtype(this->filler_param_.std()), blob->mutable_cpu_data());
-    int sparse = this->filler_param_.sparse();
-    CHECK_GE(sparse, -1);
-    if (sparse >= 0) {
-      // Sparse initialization is implemented for "weight" blobs; i.e. matrices.
-      // These have num == channels == 1; width is number of inputs; height is
-      // number of outputs.  The 'sparse' variable specifies the mean number
-      // of non-zero input weights for a given output.
-      CHECK_GE(blob->num_axes(), 1);
-      const int num_outputs = blob->shape(0);
-      Dtype non_zero_probability = Dtype(sparse) / Dtype(num_outputs);
-      rand_vec_.reset(new SyncedMemory(blob->count() * sizeof(int)));
-      int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());
-      caffe_rng_bernoulli(blob->count(), non_zero_probability, mask);
-      for (int i = 0; i < blob->count(); ++i) {
-        data[i] *= mask[i];
-      }
-    }
-  }
-
- protected:
-  shared_ptr<SyncedMemory> rand_vec_;
-};
-
-/** @brief Fills a Blob with values @f$ x \in [0, 1] @f$
- *         such that @f$ \forall i \sum_j x_{ij} = 1 @f$.
- */
-template <typename Dtype>
-class PositiveUnitballFiller : public Filler<Dtype> {
- public:
-  explicit PositiveUnitballFiller(const FillerParameter& param)
-      : Filler<Dtype>(param) {}
-  virtual void Fill(Blob<Dtype>* blob) {
-    Dtype* data = blob->mutable_cpu_data();
-    DCHECK(blob->count());
-    caffe_rng_uniform<Dtype>(blob->count(), 0, 1, blob->mutable_cpu_data());
-    // We expect the filler to not be called very frequently, so we will
-    // just use a simple implementation
-    int dim = blob->count() / blob->num();
-    CHECK(dim);
-    for (int i = 0; i < blob->num(); ++i) {
-      Dtype sum = 0;
-      for (int j = 0; j < dim; ++j) {
-        sum += data[i * dim + j];
-      }
-      for (int j = 0; j < dim; ++j) {
-        data[i * dim + j] /= sum;
-      }
-    }
-    CHECK_EQ(this->filler_param_.sparse(), -1)
-         << "Sparsity not supported by this Filler.";
-  }
-};
-
-/**
- * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$ is
- *        set inversely proportional to number of incoming nodes, outgoing
- *        nodes, or their average.
- *
- * A Filler based on the paper [Bengio and Glorot 2010]: Understanding
- * the difficulty of training deep feedforward neuralnetworks.
- *
- * It fills the incoming matrix by randomly sampling uniform data from [-scale,
- * scale] where scale = sqrt(3 / n) where n is the fan_in, fan_out, or their
- * average, depending on the variance_norm option. You should make sure the
- * input blob has shape (num, a, b, c) where a * b * c = fan_in and num * b * c
- * = fan_out. Note that this is currently not the case for inner product layers.
- *
- * TODO(dox): make notation in above comment consistent with rest & use LaTeX.
- */
-template <typename Dtype>
-class XavierFiller : public Filler<Dtype> {
- public:
-  explicit XavierFiller(const FillerParameter& param)
-      : Filler<Dtype>(param) {}
-  virtual void Fill(Blob<Dtype>* blob) {
-    CHECK(blob->count());
-    int fan_in = blob->count() / blob->num();
-    int fan_out = blob->count() / blob->channels();
-    Dtype n = fan_in;  // default to fan_in
-    if (this->filler_param_.variance_norm() ==
-        FillerParameter_VarianceNorm_AVERAGE) {
-      n = (fan_in + fan_out) / Dtype(2);
-    } else if (this->filler_param_.variance_norm() ==
-        FillerParameter_VarianceNorm_FAN_OUT) {
-      n = fan_out;
-    }
-    Dtype scale = sqrt(Dtype(3) / n);
-    caffe_rng_uniform<Dtype>(blob->count(), -scale, scale,
-        blob->mutable_cpu_data());
-    CHECK_EQ(this->filler_param_.sparse(), -1)
-         << "Sparsity not supported by this Filler.";
-  }
-};
-
-/**
- * @brief Fills a Blob with values @f$ x \sim N(0, \sigma^2) @f$ where
- *        @f$ \sigma^2 @f$ is set inversely proportional to number of incoming
- *        nodes, outgoing nodes, or their average.
- *
- * A Filler based on the paper [He, Zhang, Ren and Sun 2015]: Specifically
- * accounts for ReLU nonlinearities.
- *
- * Aside: for another perspective on the scaling factor, see the derivation of
- * [Saxe, McClelland, and Ganguli 2013 (v3)].
- *
- * It fills the incoming matrix by randomly sampling Gaussian data with std =
- * sqrt(2 / n) where n is the fan_in, fan_out, or their average, depending on
- * the variance_norm option. You should make sure the input blob has shape (num,
- * a, b, c) where a * b * c = fan_in and num * b * c = fan_out. Note that this
- * is currently not the case for inner product layers.
- */
-template <typename Dtype>
-class MSRAFiller : public Filler<Dtype> {
- public:
-  explicit MSRAFiller(const FillerParameter& param)
-      : Filler<Dtype>(param) {}
-  virtual void Fill(Blob<Dtype>* blob) {
-    CHECK(blob->count());
-    int fan_in = blob->count() / blob->num();
-    int fan_out = blob->count() / blob->channels();
-    Dtype n = fan_in;  // default to fan_in
-    if (this->filler_param_.variance_norm() ==
-        FillerParameter_VarianceNorm_AVERAGE) {
-      n = (fan_in + fan_out) / Dtype(2);
-    } else if (this->filler_param_.variance_norm() ==
-        FillerParameter_VarianceNorm_FAN_OUT) {
-      n = fan_out;
-    }
-    Dtype std = sqrt(Dtype(2) / n);
-    caffe_rng_gaussian<Dtype>(blob->count(), Dtype(0), std,
-        blob->mutable_cpu_data());
-    CHECK_EQ(this->filler_param_.sparse(), -1)
-         << "Sparsity not supported by this Filler.";
-  }
-};
-
-/*!
-@brief Fills a Blob with coefficients for bilinear interpolation.
-
-A common use case is with the DeconvolutionLayer acting as upsampling.
-You can upsample a feature map with shape of (B, C, H, W) by any integer factor
-using the following proto.
-\code
-layer {
-  name: "upsample", type: "Deconvolution"
-  bottom: "{{bottom_name}}" top: "{{top_name}}"
-  convolution_param {
-    kernel_size: {{2 * factor - factor % 2}} stride: {{factor}}
-    num_output: {{C}} group: {{C}}
-    pad: {{ceil((factor - 1) / 2.)}}
-    weight_filler: { type: "bilinear" } bias_term: false
-  }
-  param { lr_mult: 0 decay_mult: 0 }
-}
-\endcode
-Please use this by replacing `{{}}` with your values. By specifying
-`num_output: {{C}} group: {{C}}`, it behaves as
-channel-wise convolution. The filter shape of this deconvolution layer will be
-(C, 1, K, K) where K is `kernel_size`, and this filler will set a (K, K)
-interpolation kernel for every channel of the filter identically. The resulting
-shape of the top feature map will be (B, C, factor * H, factor * W).
-Note that the learning rate and the
-weight decay are set to 0 in order to keep coefficient values of bilinear
-interpolation unchanged during training. If you apply this to an image, this
-operation is equivalent to the following call in Python with Scikit.Image.
-\code{.py}
-out = skimage.transform.rescale(img, factor, mode='constant', cval=0)
-\endcode
- */
-template <typename Dtype>
-class BilinearFiller : public Filler<Dtype> {
- public:
-  explicit BilinearFiller(const FillerParameter& param)
-      : Filler<Dtype>(param) {}
-  virtual void Fill(Blob<Dtype>* blob) {
-    CHECK_EQ(blob->num_axes(), 4) << "Blob must be 4 dim.";
-    CHECK_EQ(blob->width(), blob->height()) << "Filter must be square";
-    Dtype* data = blob->mutable_cpu_data();
-    int f = ceil(blob->width() / 2.);
-    float c = (2 * f - 1 - f % 2) / (2. * f);
-    for (int i = 0; i < blob->count(); ++i) {
-      float x = i % blob->width();
-      float y = (i / blob->width()) % blob->height();
-      data[i] = (1 - fabs(x / f - c)) * (1 - fabs(y / f - c));
-    }
-    CHECK_EQ(this->filler_param_.sparse(), -1)
-         << "Sparsity not supported by this Filler.";
-  }
-};
-
-/**
- * @brief Get a specific filler from the specification given in FillerParameter.
- *
- * Ideally this would be replaced by a factory pattern, but we will leave it
- * this way for now.
- */
-template <typename Dtype>
-Filler<Dtype>* GetFiller(const FillerParameter& param) {
-  const std::string& type = param.type();
-  if (type == "constant") {
-    return new ConstantFiller<Dtype>(param);
-  } else if (type == "gaussian") {
-    return new GaussianFiller<Dtype>(param);
-  } else if (type == "positive_unitball") {
-    return new PositiveUnitballFiller<Dtype>(param);
-  } else if (type == "uniform") {
-    return new UniformFiller<Dtype>(param);
-  } else if (type == "xavier") {
-    return new XavierFiller<Dtype>(param);
-  } else if (type == "msra") {
-    return new MSRAFiller<Dtype>(param);
-  } else if (type == "bilinear") {
-    return new BilinearFiller<Dtype>(param);
-  } else {
-    CHECK(false) << "Unknown filler name: " << param.type();
-  }
-  return (Filler<Dtype>*)(NULL);
-}
-
-}  // namespace caffe
-
-#endif  // CAFFE_FILLER_HPP_
diff --git a/include/caffe/glog_wrapper.hpp b/include/caffe/glog_wrapper.hpp
deleted file mode 100644
index b617742..0000000
--- a/include/caffe/glog_wrapper.hpp
+++ /dev/null
@@ -1,81 +0,0 @@
-#ifndef GLOG_WRAPPER_HPP
-#define GLOG_WRAPPER_HPP
-
-#include <iostream>
-#include <string>
-#include <ctime>
-#include <cstdlib>
-
-namespace caffe{
-
-struct nullstream: std::ostream{
-  nullstream(): std::ostream(0){}
-};
-
-template <typename T>
-nullstream &operator<<(nullstream &o, T const & x) { return o;}
-extern nullstream __nullstream;
-
-class LogMessage{
-  std::string level;
-  std::ostream &ofs;
-  static bool enable;
-  public:
-    LogMessage(const std::string &l)
-      :level(l), ofs(enable ? std::cerr : __nullstream){
-      stream() << "[" << level << "]\t";
-    }
-    LogMessage(std::ostream &o)
-      :level("ERROR"), ofs(o){
-      stream() << "[" << level << "]\t";
-    }
-    inline std::ostream &stream(){
-      return ofs;
-    }
-    ~LogMessage() {
-      stream() << std::endl;
-    }
-
-    static void Enable(bool _enable){
-      enable = _enable;
-    }
-};
-
-// This class is used to explicitly ignore values in the conditional
-// logging macros.  This avoids compiler warnings like "value computed
-// is not used" and "statement has no effect".
-
-class LogMessageVoidify {
- public:
-  LogMessageVoidify() { }
-  // This has to be an operator with a precedence lower than << but
-  // higher than ?:
-  void operator&(std::ostream&) { }
-};
-}
-
-#define   LOG(type)   caffe::LogMessage(#type).stream()
-#define   DLOG(type)   caffe::LogMessage(#type).stream()
-#define   VLOG(level)   if ((level) <= FLAGS_v) LOG(INFO)
-
-#define CHECK(x) if(x) {} else LOG(ERROR) << #x
-#define DCHECK(x) CHECK(x)
-
-#define   CHECK_EQ(x, y)   CHECK((x) == (y))
-#define   CHECK_LT(x, y)   CHECK((x) < (y))
-#define   CHECK_GT(x, y)   CHECK((x) > (y))
-#define   CHECK_LE(x, y)   CHECK((x) <= (y))
-#define   CHECK_GE(x, y)   CHECK((x) >= (y))
-#define   CHECK_NE(x, y)   CHECK((x) != (y))
-
-#define   DCHECK_EQ(x, y)   DCHECK((x) == (y))
-#define   DCHECK_LT(x, y)   DCHECK((x) < (y))
-#define   DCHECK_GT(x, y)   DCHECK((x) > (y))
-#define   DCHECK_LE(x, y)   DCHECK((x) <= (y))
-#define   DCHECK_GE(x, y)   DCHECK((x) >= (y))
-#define   DCHECK_NE(x, y)   DCHECK((x) != (y))
-
-#define LOG_IF(severity, condition) \
-  !(condition) ? (void) 0 : caffe::LogMessageVoidify() & LOG(severity)
-
-#endif
diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp
deleted file mode 100644
index 30dbfd5..0000000
--- a/include/caffe/layer.hpp
+++ /dev/null
@@ -1,477 +0,0 @@
-#ifndef CAFFE_LAYER_H_
-#define CAFFE_LAYER_H_
-
-#include <algorithm>
-#include <string>
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/common.hpp"
-#include "caffe/layer_factory.hpp"
-#include "caffe/proto/caffe.pb.h"
-#include "caffe/util/math_functions.hpp"
-
-/**
- Forward declare boost::thread instead of including boost/thread.hpp
- to avoid a boost/NVCC issues (#1009, #1010) on OSX.
- */
-namespace boost { class mutex; }
-
-namespace caffe {
-
-/**
- * @brief An interface for the units of computation which can be composed into a
- *        Net.
- *
- * Layer%s must implement a Forward function, in which they take their input
- * (bottom) Blob%s (if any) and compute their output Blob%s (if any).
- * They may also implement a Backward function, in which they compute the error
- * gradients with respect to their input Blob%s, given the error gradients with
- * their output Blob%s.
- */
-template <typename Dtype>
-class Layer {
- public:
-  /**
-   * You should not implement your own constructor. Any set up code should go
-   * to SetUp(), where the dimensions of the bottom blobs are provided to the
-   * layer.
-   */
-  explicit Layer(const LayerParameter& param)
-    : layer_param_(param) {
-      // Set phase and copy blobs (if there are any).
-      phase_ = param.phase();
-      if (layer_param_.blobs_size() > 0) {
-        blobs_.resize(layer_param_.blobs_size());
-        for (int i = 0; i < layer_param_.blobs_size(); ++i) {
-          blobs_[i].reset(new Blob<Dtype>());
-          blobs_[i]->FromProto(layer_param_.blobs(i));
-        }
-      }
-    }
-  virtual ~Layer() {}
-
-  /**
-   * @brief Implements common layer setup functionality.
-   *
-   * @param bottom the preshaped input blobs
-   * @param top
-   *     the allocated but unshaped output blobs, to be shaped by Reshape
-   *
-   * Checks that the number of bottom and top blobs is correct.
-   * Calls LayerSetUp to do special layer setup for individual layer types,
-   * followed by Reshape to set up sizes of top blobs and internal buffers.
-   * Sets up the loss weight multiplier blobs for any non-zero loss weights.
-   * This method may not be overridden.
-   */
-  void SetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-    CheckBlobCounts(bottom, top);
-    LayerSetUp(bottom, top);
-    Reshape(bottom, top);
-    SetLossWeights(top);
-  }
-
-  /**
-   * @brief Does layer-specific setup: your layer should implement this function
-   *        as well as Reshape.
-   *
-   * @param bottom
-   *     the preshaped input blobs, whose data fields store the input data for
-   *     this layer
-   * @param top
-   *     the allocated but unshaped output blobs
-   *
-   * This method should do one-time layer specific setup. This includes reading
-   * and processing relevent parameters from the <code>layer_param_</code>.
-   * Setting up the shapes of top blobs and internal buffers should be done in
-   * <code>Reshape</code>, which will be called before the forward pass to
-   * adjust the top blob sizes.
-   */
-  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {}
-
-  /**
-   * @brief Adjust the shapes of top blobs and internal buffers to accommodate
-   *        the shapes of the bottom blobs.
-   *
-   * @param bottom the input blobs, with the requested input shapes
-   * @param top the top blobs, which should be reshaped as needed
-   *
-   * This method should reshape top blobs as needed according to the shapes
-   * of the bottom (input) blobs, as well as reshaping any internal buffers
-   * and making any other necessary adjustments so that the layer can
-   * accommodate the bottom blobs.
-   */
-  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) = 0;
-
-  /**
-   * @brief Given the bottom blobs, compute the top blobs and the loss.
-   *
-   * @param bottom
-   *     the input blobs, whose data fields store the input data for this layer
-   * @param top
-   *     the preshaped output blobs, whose data fields will store this layers'
-   *     outputs
-   * \return The total loss from the layer.
-   *
-   * The Forward wrapper calls the relevant device wrapper function
-   * (Forward_cpu or Forward_gpu) to compute the top blob values given the
-   * bottom blobs.  If the layer has any non-zero loss_weights, the wrapper
-   * then computes and returns the loss.
-   *
-   * Your layer should implement Forward_cpu and (optionally) Forward_gpu.
-   */
-  inline Dtype Forward(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  /**
-   * @brief Given the top blob error gradients, compute the bottom blob error
-   *        gradients.
-   *
-   * @param top
-   *     the output blobs, whose diff fields store the gradient of the error
-   *     with respect to themselves
-   * @param propagate_down
-   *     a vector with equal length to bottom, with each index indicating
-   *     whether to propagate the error gradients down to the bottom blob at
-   *     the corresponding index
-   * @param bottom
-   *     the input blobs, whose diff fields will store the gradient of the error
-   *     with respect to themselves after Backward is run
-   *
-   * The Backward wrapper calls the relevant device wrapper function
-   * (Backward_cpu or Backward_gpu) to compute the bottom blob diffs given the
-   * top blob diffs.
-   *
-   * Your layer should implement Backward_cpu and (optionally) Backward_gpu.
-   */
-  inline void Backward(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down,
-      const vector<Blob<Dtype>*>& bottom);
-
-  /**
-   * @brief Returns the vector of learnable parameter blobs.
-   */
-  vector<shared_ptr<Blob<Dtype> > >& blobs() {
-    return blobs_;
-  }
-
-  /**
-   * @brief Returns the layer parameter.
-   */
-  const LayerParameter& layer_param() const { return layer_param_; }
-
-  /**
-   * @brief Writes the layer parameter to a protocol buffer
-   */
-  virtual void ToProto(LayerParameter* param, bool write_diff = false);
-
-  /**
-   * @brief Returns the scalar loss associated with a top blob at a given index.
-   */
-  inline Dtype loss(const int top_index) const {
-    return (loss_.size() > top_index) ? loss_[top_index] : Dtype(0);
-  }
-
-  /**
-   * @brief Sets the loss associated with a top blob at a given index.
-   */
-  inline void set_loss(const int top_index, const Dtype value) {
-    if (loss_.size() <= top_index) {
-      loss_.resize(top_index + 1, Dtype(0));
-    }
-    loss_[top_index] = value;
-  }
-
-  /**
-   * @brief Returns the layer type.
-   */
-  virtual inline const char* type() const { return ""; }
-
-  /**
-   * @brief Returns the exact number of bottom blobs required by the layer,
-   *        or -1 if no exact number is required.
-   *
-   * This method should be overridden to return a non-negative value if your
-   * layer expects some exact number of bottom blobs.
-   */
-  virtual inline int ExactNumBottomBlobs() const { return -1; }
-  /**
-   * @brief Returns the minimum number of bottom blobs required by the layer,
-   *        or -1 if no minimum number is required.
-   *
-   * This method should be overridden to return a non-negative value if your
-   * layer expects some minimum number of bottom blobs.
-   */
-  virtual inline int MinBottomBlobs() const { return -1; }
-  /**
-   * @brief Returns the maximum number of bottom blobs required by the layer,
-   *        or -1 if no maximum number is required.
-   *
-   * This method should be overridden to return a non-negative value if your
-   * layer expects some maximum number of bottom blobs.
-   */
-  virtual inline int MaxBottomBlobs() const { return -1; }
-  /**
-   * @brief Returns the exact number of top blobs required by the layer,
-   *        or -1 if no exact number is required.
-   *
-   * This method should be overridden to return a non-negative value if your
-   * layer expects some exact number of top blobs.
-   */
-  virtual inline int ExactNumTopBlobs() const { return -1; }
-  /**
-   * @brief Returns the minimum number of top blobs required by the layer,
-   *        or -1 if no minimum number is required.
-   *
-   * This method should be overridden to return a non-negative value if your
-   * layer expects some minimum number of top blobs.
-   */
-  virtual inline int MinTopBlobs() const { return -1; }
-  /**
-   * @brief Returns the maximum number of top blobs required by the layer,
-   *        or -1 if no maximum number is required.
-   *
-   * This method should be overridden to return a non-negative value if your
-   * layer expects some maximum number of top blobs.
-   */
-  virtual inline int MaxTopBlobs() const { return -1; }
-  /**
-   * @brief Returns true if the layer requires an equal number of bottom and
-   *        top blobs.
-   *
-   * This method should be overridden to return true if your layer expects an
-   * equal number of bottom and top blobs.
-   */
-  virtual inline bool EqualNumBottomTopBlobs() const { return false; }
-
-  /**
-   * @brief Return whether "anonymous" top blobs are created automatically
-   *        by the layer.
-   *
-   * If this method returns true, Net::Init will create enough "anonymous" top
-   * blobs to fulfill the requirement specified by ExactNumTopBlobs() or
-   * MinTopBlobs().
-   */
-  virtual inline bool AutoTopBlobs() const { return false; }
-
-  /**
-   * @brief Return whether to allow force_backward for a given bottom blob
-   *        index.
-   *
-   * If AllowForceBackward(i) == false, we will ignore the force_backward
-   * setting and backpropagate to blob i only if it needs gradient information
-   * (as is done when force_backward == false).
-   */
-  virtual inline bool AllowForceBackward(const int bottom_index) const {
-    return true;
-  }
-
-  /**
-   * @brief Specifies whether the layer should compute gradients w.r.t. a
-   *        parameter at a particular index given by param_id.
-   *
-   * You can safely ignore false values and always compute gradients
-   * for all parameters, but possibly with wasteful computation.
-   */
-  inline bool param_propagate_down(const int param_id) {
-    return (param_propagate_down_.size() > param_id) ?
-        param_propagate_down_[param_id] : false;
-  }
-  /**
-   * @brief Sets whether the layer should compute gradients w.r.t. a
-   *        parameter at a particular index given by param_id.
-   */
-  inline void set_param_propagate_down(const int param_id, const bool value) {
-    if (param_propagate_down_.size() <= param_id) {
-      param_propagate_down_.resize(param_id + 1, true);
-    }
-    param_propagate_down_[param_id] = value;
-  }
-
-
- protected:
-  /** The protobuf that stores the layer parameters */
-  LayerParameter layer_param_;
-  /** The phase: TRAIN or TEST */
-  Phase phase_;
-  /** The vector that stores the learnable parameters as a set of blobs. */
-  vector<shared_ptr<Blob<Dtype> > > blobs_;
-  /** Vector indicating whether to compute the diff of each param blob. */
-  vector<bool> param_propagate_down_;
-
-  /** The vector that indicates whether each top blob has a non-zero weight in
-   *  the objective function. */
-  vector<Dtype> loss_;
-
-  /** @brief Using the CPU device, compute the layer output. */
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) = 0;
-  /**
-   * @brief Using the GPU device, compute the layer output.
-   *        Fall back to Forward_cpu() if unavailable.
-   */
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-    // LOG(WARNING) << "Using CPU code as backup.";
-    return Forward_cpu(bottom, top);
-  }
-
-  /**
-   * @brief Using the CPU device, compute the gradients for any parameters and
-   *        for the bottom blobs if propagate_down is true.
-   */
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down,
-      const vector<Blob<Dtype>*>& bottom) = 0;
-  /**
-   * @brief Using the GPU device, compute the gradients for any parameters and
-   *        for the bottom blobs if propagate_down is true.
-   *        Fall back to Backward_cpu() if unavailable.
-   */
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down,
-      const vector<Blob<Dtype>*>& bottom) {
-    // LOG(WARNING) << "Using CPU code as backup.";
-    Backward_cpu(top, propagate_down, bottom);
-  }
-
-  /**
-   * Called by the parent Layer's SetUp to check that the number of bottom
-   * and top Blobs provided as input match the expected numbers specified by
-   * the {ExactNum,Min,Max}{Bottom,Top}Blobs() functions.
-   */
-  virtual void CheckBlobCounts(const vector<Blob<Dtype>*>& bottom,
-                               const vector<Blob<Dtype>*>& top) {
-    if (ExactNumBottomBlobs() >= 0) {
-      CHECK_EQ(ExactNumBottomBlobs(), bottom.size())
-          << type() << " Layer takes " << ExactNumBottomBlobs()
-          << " bottom blob(s) as input.";
-    }
-    if (MinBottomBlobs() >= 0) {
-      CHECK_LE(MinBottomBlobs(), bottom.size())
-          << type() << " Layer takes at least " << MinBottomBlobs()
-          << " bottom blob(s) as input.";
-    }
-    if (MaxBottomBlobs() >= 0) {
-      CHECK_GE(MaxBottomBlobs(), bottom.size())
-          << type() << " Layer takes at most " << MaxBottomBlobs()
-          << " bottom blob(s) as input.";
-    }
-    if (ExactNumTopBlobs() >= 0) {
-      CHECK_EQ(ExactNumTopBlobs(), top.size())
-          << type() << " Layer produces " << ExactNumTopBlobs()
-          << " top blob(s) as output.";
-    }
-    if (MinTopBlobs() >= 0) {
-      CHECK_LE(MinTopBlobs(), top.size())
-          << type() << " Layer produces at least " << MinTopBlobs()
-          << " top blob(s) as output.";
-    }
-    if (MaxTopBlobs() >= 0) {
-      CHECK_GE(MaxTopBlobs(), top.size())
-          << type() << " Layer produces at most " << MaxTopBlobs()
-          << " top blob(s) as output.";
-    }
-    if (EqualNumBottomTopBlobs()) {
-      CHECK_EQ(bottom.size(), top.size())
-          << type() << " Layer produces one top blob as output for each "
-          << "bottom blob input.";
-    }
-  }
-
-  /**
-   * Called by SetUp to initialize the weights associated with any top blobs in
-   * the loss function. Store non-zero loss weights in the diff blob.
-   */
-  inline void SetLossWeights(const vector<Blob<Dtype>*>& top) {
-    const int num_loss_weights = layer_param_.loss_weight_size();
-    if (num_loss_weights) {
-      CHECK_EQ(top.size(), num_loss_weights) << "loss_weight must be "
-          "unspecified or specified once per top blob.";
-      for (int top_id = 0; top_id < top.size(); ++top_id) {
-        const Dtype loss_weight = layer_param_.loss_weight(top_id);
-        if (loss_weight == Dtype(0)) { continue; }
-        this->set_loss(top_id, loss_weight);
-        const int count = top[top_id]->count();
-        Dtype* loss_multiplier = top[top_id]->mutable_cpu_diff();
-        caffe_set(count, loss_weight, loss_multiplier);
-      }
-    }
-  }
-
- private:
-  DISABLE_COPY_AND_ASSIGN(Layer);
-};  // class Layer
-
-// Forward and backward wrappers. You should implement the cpu and
-// gpu specific implementations instead, and should not change these
-// functions.
-template <typename Dtype>
-inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
-    const vector<Blob<Dtype>*>& top) {
-  Dtype loss = 0;
-  Reshape(bottom, top);
-  switch (Caffe::mode()) {
-  case Caffe::CPU:
-    Forward_cpu(bottom, top);
-    for (int top_id = 0; top_id < top.size(); ++top_id) {
-      if (!this->loss(top_id)) { continue; }
-      const int count = top[top_id]->count();
-      const Dtype* data = top[top_id]->cpu_data();
-      const Dtype* loss_weights = top[top_id]->cpu_diff();
-      loss += caffe_cpu_dot(count, data, loss_weights);
-    }
-    break;
-  case Caffe::GPU:
-    Forward_gpu(bottom, top);
-#ifndef CPU_ONLY
-    for (int top_id = 0; top_id < top.size(); ++top_id) {
-      if (!this->loss(top_id)) { continue; }
-      const int count = top[top_id]->count();
-      const Dtype* data = top[top_id]->gpu_data();
-      const Dtype* loss_weights = top[top_id]->gpu_diff();
-      Dtype blob_loss = 0;
-      caffe_gpu_dot(count, data, loss_weights, &blob_loss);
-      loss += blob_loss;
-    }
-#endif
-    break;
-  default:
-    LOG(FATAL) << "Unknown caffe mode.";
-  }
-  return loss;
-}
-
-template <typename Dtype>
-inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down,
-    const vector<Blob<Dtype>*>& bottom) {
-  switch (Caffe::mode()) {
-  case Caffe::CPU:
-    Backward_cpu(top, propagate_down, bottom);
-    break;
-  case Caffe::GPU:
-    Backward_gpu(top, propagate_down, bottom);
-    break;
-  default:
-    LOG(FATAL) << "Unknown caffe mode.";
-  }
-}
-
-// Serialize LayerParameter to protocol buffer
-template <typename Dtype>
-void Layer<Dtype>::ToProto(LayerParameter* param, bool write_diff) {
-  param->Clear();
-  param->CopyFrom(layer_param_);
-  param->clear_blobs();
-  for (int i = 0; i < blobs_.size(); ++i) {
-    blobs_[i]->ToProto(param->add_blobs(), write_diff);
-  }
-}
-
-}  // namespace caffe
-
-#endif  // CAFFE_LAYER_H_
diff --git a/include/caffe/layer_factory.hpp b/include/caffe/layer_factory.hpp
deleted file mode 100644
index 2369c13..0000000
--- a/include/caffe/layer_factory.hpp
+++ /dev/null
@@ -1,141 +0,0 @@
-/**
- * @brief A layer factory that allows one to register layers.
- * During runtime, registered layers can be called by passing a LayerParameter
- * protobuffer to the CreateLayer function:
- *
- *     LayerRegistry<Dtype>::CreateLayer(param);
- *
- * There are two ways to register a layer. Assuming that we have a layer like:
- *
- *   template <typename Dtype>
- *   class MyAwesomeLayer : public Layer<Dtype> {
- *     // your implementations
- *   };
- *
- * and its type is its C++ class name, but without the "Layer" at the end
- * ("MyAwesomeLayer" -> "MyAwesome").
- *
- * If the layer is going to be created simply by its constructor, in your c++
- * file, add the following line:
- *
- *    REGISTER_LAYER_CLASS(MyAwesome);
- *
- * Or, if the layer is going to be created by another creator function, in the
- * format of:
- *
- *    template <typename Dtype>
- *    Layer<Dtype*> GetMyAwesomeLayer(const LayerParameter& param) {
- *      // your implementation
- *    }
- *
- * (for example, when your layer has multiple backends, see GetConvolutionLayer
- * for a use case), then you can register the creator function instead, like
- *
- * REGISTER_LAYER_CREATOR(MyAwesome, GetMyAwesomeLayer)
- *
- * Note that each layer type should only be registered once.
- */
-
-#ifndef CAFFE_LAYER_FACTORY_H_
-#define CAFFE_LAYER_FACTORY_H_
-
-#include <map>
-#include <string>
-#include <vector>
-
-#include "caffe/common.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-namespace caffe {
-
-template <typename Dtype>
-class Layer;
-
-template <typename Dtype>
-class LayerRegistry {
- public:
-  typedef shared_ptr<Layer<Dtype> > (*Creator)(const LayerParameter&);
-  typedef std::map<string, Creator> CreatorRegistry;
-
-  static CreatorRegistry& Registry() {
-    static CreatorRegistry* g_registry_ = new CreatorRegistry();
-    return *g_registry_;
-  }
-
-  // Adds a creator.
-  static void AddCreator(const string& type, Creator creator) {
-    CreatorRegistry& registry = Registry();
-    CHECK_EQ(registry.count(type), 0)
-        << "Layer type " << type << " already registered.";
-    registry[type] = creator;
-  }
-
-  // Get a layer using a LayerParameter.
-  static shared_ptr<Layer<Dtype> > CreateLayer(const LayerParameter& param) {
-    if (Caffe::root_solver()) {
-      LOG(INFO) << "Creating layer " << param.name();
-    }
-    const string& type = param.type();
-    CreatorRegistry& registry = Registry();
-    CHECK_EQ(registry.count(type), 1) << "Unknown layer type: " << type
-        << " (known types: " << LayerTypeListString() << ")";
-    return registry[type](param);
-  }
-
-  static vector<string> LayerTypeList() {
-    CreatorRegistry& registry = Registry();
-    vector<string> layer_types;
-    for (typename CreatorRegistry::iterator iter = registry.begin();
-         iter != registry.end(); ++iter) {
-      layer_types.push_back(iter->first);
-    }
-    return layer_types;
-  }
-
- private:
-  // Layer registry should never be instantiated - everything is done with its
-  // static variables.
-  LayerRegistry() {}
-
-  static string LayerTypeListString() {
-    vector<string> layer_types = LayerTypeList();
-    string layer_types_str;
-    for (vector<string>::iterator iter = layer_types.begin();
-         iter != layer_types.end(); ++iter) {
-      if (iter != layer_types.begin()) {
-        layer_types_str += ", ";
-      }
-      layer_types_str += *iter;
-    }
-    return layer_types_str;
-  }
-};
-
-
-template <typename Dtype>
-class LayerRegisterer {
- public:
-  LayerRegisterer(const string& type,
-                  shared_ptr<Layer<Dtype> > (*creator)(const LayerParameter&)) {
-    // LOG(INFO) << "Registering layer type: " << type;
-    LayerRegistry<Dtype>::AddCreator(type, creator);
-  }
-};
-
-
-#define REGISTER_LAYER_CREATOR(type, creator)                                  \
-  static LayerRegisterer<float> g_creator_f_##type(#type, creator<float>);     \
-  static LayerRegisterer<double> g_creator_d_##type(#type, creator<double>)    \
-
-#define REGISTER_LAYER_CLASS(type)                                             \
-  template <typename Dtype>                                                    \
-  shared_ptr<Layer<Dtype> > Creator_##type##Layer(const LayerParameter& param) \
-  {                                                                            \
-    return shared_ptr<Layer<Dtype> >(new type##Layer<Dtype>(param));           \
-  }                                                                            \
-  REGISTER_LAYER_CREATOR(type, Creator_##type##Layer)
-
-}  // namespace caffe
-
-#endif  // CAFFE_LAYER_FACTORY_H_
diff --git a/include/caffe/layers/base_conv_layer.hpp b/include/caffe/layers/base_conv_layer.hpp
deleted file mode 100644
index 0160a83..0000000
--- a/include/caffe/layers/base_conv_layer.hpp
+++ /dev/null
@@ -1,174 +0,0 @@
-#ifndef CAFFE_BASE_CONVOLUTION_LAYER_HPP_
-#define CAFFE_BASE_CONVOLUTION_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-#include "caffe/util/im2col.hpp"
-
-namespace caffe {
-
-/**
- * @brief Abstract base class that factors out the BLAS code common to
- *        ConvolutionLayer and DeconvolutionLayer.
- */
-template <typename Dtype>
-class BaseConvolutionLayer : public Layer<Dtype> {
- public:
-  explicit BaseConvolutionLayer(const LayerParameter& param)
-      : Layer<Dtype>(param) {}
-  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  virtual inline int MinBottomBlobs() const { return 1; }
-  virtual inline int MinTopBlobs() const { return 1; }
-  virtual inline bool EqualNumBottomTopBlobs() const { return true; }
-
- protected:
-  // Helper functions that abstract away the column buffer and gemm arguments.
-  // The last argument in forward_cpu_gemm is so that we can skip the im2col if
-  // we just called weight_cpu_gemm with the same input.
-  void forward_cpu_gemm(const Dtype* input, const Dtype* weights,
-      Dtype* output, bool skip_im2col = false);
-  void forward_cpu_bias(Dtype* output, const Dtype* bias);
-  void backward_cpu_gemm(const Dtype* input, const Dtype* weights,
-      Dtype* output);
-  void weight_cpu_gemm(const Dtype* input, const Dtype* output, Dtype*
-      weights);
-  void backward_cpu_bias(Dtype* bias, const Dtype* input);
-
-#ifndef CPU_ONLY
-  void forward_gpu_gemm(const Dtype* col_input, const Dtype* weights,
-      Dtype* output, bool skip_im2col = false);
-  void forward_gpu_bias(Dtype* output, const Dtype* bias);
-  void backward_gpu_gemm(const Dtype* input, const Dtype* weights,
-      Dtype* col_output);
-  void weight_gpu_gemm(const Dtype* col_input, const Dtype* output, Dtype*
-      weights);
-  void backward_gpu_bias(Dtype* bias, const Dtype* input);
-#endif
-
-  /// @brief The spatial dimensions of the input.
-  inline int input_shape(int i) {
-    return (*bottom_shape_)[channel_axis_ + i];
-  }
-  // reverse_dimensions should return true iff we are implementing deconv, so
-  // that conv helpers know which dimensions are which.
-  virtual bool reverse_dimensions() = 0;
-  // Compute height_out_ and width_out_ from other parameters.
-  virtual void compute_output_shape() = 0;
-
-  /// @brief The spatial dimensions of a filter kernel.
-  Blob<int> kernel_shape_;
-  /// @brief The spatial dimensions of the stride.
-  Blob<int> stride_;
-  /// @brief The spatial dimensions of the padding.
-  Blob<int> pad_;
-  /// @brief The spatial dimensions of the dilation.
-  Blob<int> dilation_;
-  /// @brief The spatial dimensions of the convolution input.
-  Blob<int> conv_input_shape_;
-  /// @brief The spatial dimensions of the col_buffer.
-  vector<int> col_buffer_shape_;
-  /// @brief The spatial dimensions of the output.
-  vector<int> output_shape_;
-  const vector<int>* bottom_shape_;
-
-  int num_spatial_axes_;
-  int bottom_dim_;
-  int top_dim_;
-
-  int channel_axis_;
-  int num_;
-  int channels_;
-  int group_;
-  int out_spatial_dim_;
-  int weight_offset_;
-  int num_output_;
-  bool bias_term_;
-  bool is_1x1_;
-  bool force_nd_im2col_;
-
- private:
-  // wrap im2col/col2im so we don't have to remember the (long) argument lists
-  inline void conv_im2col_cpu(const Dtype* data, Dtype* col_buff) {
-    if (!force_nd_im2col_ && num_spatial_axes_ == 2) {
-      im2col_cpu(data, conv_in_channels_,
-          conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
-          kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
-          pad_.cpu_data()[0], pad_.cpu_data()[1],
-          stride_.cpu_data()[0], stride_.cpu_data()[1],
-          dilation_.cpu_data()[0], dilation_.cpu_data()[1], col_buff);
-    } else {
-      im2col_nd_cpu(data, num_spatial_axes_, conv_input_shape_.cpu_data(),
-          col_buffer_shape_.data(), kernel_shape_.cpu_data(),
-          pad_.cpu_data(), stride_.cpu_data(), dilation_.cpu_data(), col_buff);
-    }
-  }
-  inline void conv_col2im_cpu(const Dtype* col_buff, Dtype* data) {
-    if (!force_nd_im2col_ && num_spatial_axes_ == 2) {
-      col2im_cpu(col_buff, conv_in_channels_,
-          conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
-          kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
-          pad_.cpu_data()[0], pad_.cpu_data()[1],
-          stride_.cpu_data()[0], stride_.cpu_data()[1],
-          dilation_.cpu_data()[0], dilation_.cpu_data()[1], data);
-    } else {
-      col2im_nd_cpu(col_buff, num_spatial_axes_, conv_input_shape_.cpu_data(),
-          col_buffer_shape_.data(), kernel_shape_.cpu_data(),
-          pad_.cpu_data(), stride_.cpu_data(), dilation_.cpu_data(), data);
-    }
-  }
-#ifndef CPU_ONLY
-  inline void conv_im2col_gpu(const Dtype* data, Dtype* col_buff) {
-    if (!force_nd_im2col_ && num_spatial_axes_ == 2) {
-      im2col_gpu(data, conv_in_channels_,
-          conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
-          kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
-          pad_.cpu_data()[0], pad_.cpu_data()[1],
-          stride_.cpu_data()[0], stride_.cpu_data()[1],
-          dilation_.cpu_data()[0], dilation_.cpu_data()[1], col_buff);
-    } else {
-      im2col_nd_gpu(data, num_spatial_axes_, num_kernels_im2col_,
-          conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),
-          kernel_shape_.gpu_data(), pad_.gpu_data(),
-          stride_.gpu_data(), dilation_.gpu_data(), col_buff);
-    }
-  }
-  inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) {
-    if (!force_nd_im2col_ && num_spatial_axes_ == 2) {
-      col2im_gpu(col_buff, conv_in_channels_,
-          conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
-          kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
-          pad_.cpu_data()[0], pad_.cpu_data()[1],
-          stride_.cpu_data()[0], stride_.cpu_data()[1],
-          dilation_.cpu_data()[0], dilation_.cpu_data()[1], data);
-    } else {
-      col2im_nd_gpu(col_buff, num_spatial_axes_, num_kernels_col2im_,
-          conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),
-          kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(),
-          dilation_.gpu_data(), data);
-    }
-  }
-#endif
-
-  int num_kernels_im2col_;
-  int num_kernels_col2im_;
-  int conv_out_channels_;
-  int conv_in_channels_;
-  int conv_out_spatial_dim_;
-  int kernel_dim_;
-  int col_offset_;
-  int output_offset_;
-
-  Blob<Dtype> col_buffer_;
-  Blob<Dtype> bias_multiplier_;
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_BASE_CONVOLUTION_LAYER_HPP_
diff --git a/include/caffe/layers/base_data_layer.hpp b/include/caffe/layers/base_data_layer.hpp
deleted file mode 100644
index d1e921f..0000000
--- a/include/caffe/layers/base_data_layer.hpp
+++ /dev/null
@@ -1,88 +0,0 @@
-#ifndef CAFFE_DATA_LAYERS_HPP_
-#define CAFFE_DATA_LAYERS_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/data_transformer.hpp"
-#ifdef USE_BOOST
-#include "caffe/internal_thread.hpp"
-#endif
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-#include "caffe/util/blocking_queue.hpp"
-
-namespace caffe {
-
-/**
- * @brief Provides base for data layers that feed blobs to the Net.
- *
- * TODO(dox): thorough documentation for Forward and proto params.
- */
-template <typename Dtype>
-class BaseDataLayer : public Layer<Dtype> {
- public:
-  explicit BaseDataLayer(const LayerParameter& param);
-  // LayerSetUp: implements common data layer setup functionality, and calls
-  // DataLayerSetUp to do special data layer setup for individual layer types.
-  // This method may not be overridden except by the BasePrefetchingDataLayer.
-  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  // Data layers should be shared by multiple solvers in parallel
-  virtual inline bool ShareInParallel() const { return true; }
-  virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {}
-  // Data layers have no bottoms, so reshaping is trivial.
-  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {}
-
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
-
- protected:
-  TransformationParameter transform_param_;
-  shared_ptr<DataTransformer<Dtype> > data_transformer_;
-  bool output_labels_;
-};
-
-template <typename Dtype>
-class Batch {
- public:
-  Blob<Dtype> data_, label_;
-};
-
-#ifdef NO_CAFFE_MOBILE
-template <typename Dtype>
-class BasePrefetchingDataLayer :
-    public BaseDataLayer<Dtype>, public InternalThread {
- public:
-  explicit BasePrefetchingDataLayer(const LayerParameter& param);
-  // LayerSetUp: implements common data layer setup functionality, and calls
-  // DataLayerSetUp to do special data layer setup for individual layer types.
-  // This method may not be overridden.
-  void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
- protected:
-  virtual void InternalThreadEntry();
-  virtual void load_batch(Batch<Dtype>* batch) = 0;
-
-  vector<shared_ptr<Batch<Dtype> > > prefetch_;
-  BlockingQueue<Batch<Dtype>*> prefetch_free_;
-  BlockingQueue<Batch<Dtype>*> prefetch_full_;
-  Batch<Dtype>* prefetch_current_;
-
-  Blob<Dtype> transformed_data_;
-};
-#endif // NO_CAFFE_MOBILE
-
-}  // namespace caffe
-
-#endif  // CAFFE_DATA_LAYERS_HPP_
diff --git a/include/caffe/layers/bnll_layer.hpp b/include/caffe/layers/bnll_layer.hpp
deleted file mode 100644
index be07c74..0000000
--- a/include/caffe/layers/bnll_layer.hpp
+++ /dev/null
@@ -1,70 +0,0 @@
-#ifndef CAFFE_BNLL_LAYER_HPP_
-#define CAFFE_BNLL_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-#include "caffe/layers/neuron_layer.hpp"
-
-namespace caffe {
-
-/**
- * @brief Computes @f$ y = x + \log(1 + \exp(-x)) @f$ if @f$ x > 0 @f$;
- *        @f$ y = \log(1 + \exp(x)) @f$ otherwise.
- *
- * @param bottom input Blob vector (length 1)
- *   -# @f$ (N \times C \times H \times W) @f$
- *      the inputs @f$ x @f$
- * @param top output Blob vector (length 1)
- *   -# @f$ (N \times C \times H \times W) @f$
- *      the computed outputs @f$
- *      y = \left\{
- *         \begin{array}{ll}
- *            x + \log(1 + \exp(-x)) & \mbox{if } x > 0 \\
- *            \log(1 + \exp(x)) & \mbox{otherwise}
- *         \end{array} \right.
- *      @f$
- */
-template <typename Dtype>
-class BNLLLayer : public NeuronLayer<Dtype> {
- public:
-  explicit BNLLLayer(const LayerParameter& param)
-      : NeuronLayer<Dtype>(param) {}
-
-  virtual inline const char* type() const { return "BNLL"; }
-
- protected:
-  /// @copydoc BNLLLayer
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  /**
-   * @brief Computes the error gradient w.r.t. the BNLL inputs.
-   *
-   * @param top output Blob vector (length 1), providing the error gradient with
-   *      respect to the outputs
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      containing error gradients @f$ \frac{\partial E}{\partial y} @f$
-   *      with respect to computed outputs @f$ y @f$
-   * @param propagate_down see Layer::Backward.
-   * @param bottom input Blob vector (length 2)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the inputs @f$ x @f$; Backward fills their diff with
-   *      gradients @f$
-   *        \frac{\partial E}{\partial x}
-   *      @f$ if propagate_down[0]
-   */
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_BNLL_LAYER_HPP_
diff --git a/include/caffe/layers/concat_layer.hpp b/include/caffe/layers/concat_layer.hpp
deleted file mode 100644
index a157024..0000000
--- a/include/caffe/layers/concat_layer.hpp
+++ /dev/null
@@ -1,87 +0,0 @@
-#ifndef CAFFE_CONCAT_LAYER_HPP_
-#define CAFFE_CONCAT_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-namespace caffe {
-
-/**
- * @brief Takes at least two Blob%s and concatenates them along either the num
- *        or channel dimension, outputting the result.
- */
-template <typename Dtype>
-class ConcatLayer : public Layer<Dtype> {
- public:
-  explicit ConcatLayer(const LayerParameter& param)
-      : Layer<Dtype>(param) {}
-  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  virtual inline const char* type() const { return "Concat"; }
-  virtual inline int MinBottomBlobs() const { return 1; }
-  virtual inline int ExactNumTopBlobs() const { return 1; }
-
- protected:
-  /**
-   * @param bottom input Blob vector (length 2+)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the inputs @f$ x_1 @f$
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the inputs @f$ x_2 @f$
-   *   -# ...
-   *   - K @f$ (N \times C \times H \times W) @f$
-   *      the inputs @f$ x_K @f$
-   * @param top output Blob vector (length 1)
-   *   -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or
-   *      @f$ (N \times KC \times H \times W) @f$ if axis == 1:
-   *      the concatenated output @f$
-   *        y = [\begin{array}{cccc} x_1 & x_2 & ... & x_K \end{array}]
-   *      @f$
-   */
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  /**
-   * @brief Computes the error gradient w.r.t. the concatenate inputs.
-   *
-   * @param top output Blob vector (length 1), providing the error gradient with
-   *        respect to the outputs
-   *   -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or
-   *      @f$ (N \times KC \times H \times W) @f$ if axis == 1:
-   *      containing error gradients @f$ \frac{\partial E}{\partial y} @f$
-   *      with respect to concatenated outputs @f$ y @f$
-   * @param propagate_down see Layer::Backward.
-   * @param bottom input Blob vector (length K), into which the top gradient
-   *        @f$ \frac{\partial E}{\partial y} @f$ is deconcatenated back to the
-   *        inputs @f$
-   *        \left[ \begin{array}{cccc}
-   *          \frac{\partial E}{\partial x_1} &
-   *          \frac{\partial E}{\partial x_2} &
-   *          ... &
-   *          \frac{\partial E}{\partial x_K}
-   *        \end{array} \right] =
-   *        \frac{\partial E}{\partial y}
-   *        @f$
-   */
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-
-  int count_;
-  int num_concats_;
-  int concat_input_size_;
-  int concat_axis_;
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_CONCAT_LAYER_HPP_
diff --git a/include/caffe/layers/conv_layer.hpp b/include/caffe/layers/conv_layer.hpp
deleted file mode 100644
index 93a618d..0000000
--- a/include/caffe/layers/conv_layer.hpp
+++ /dev/null
@@ -1,84 +0,0 @@
-#ifndef CAFFE_CONV_LAYER_HPP_
-#define CAFFE_CONV_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-#include "caffe/layers/base_conv_layer.hpp"
-
-namespace caffe {
-
-/**
- * @brief Convolves the input image with a bank of learned filters,
- *        and (optionally) adds biases.
- *
- *   Caffe convolves by reduction to matrix multiplication. This achieves
- *   high-throughput and generality of input and filter dimensions but comes at
- *   the cost of memory for matrices. This makes use of efficiency in BLAS.
- *
- *   The input is "im2col" transformed to a channel K' x H x W data matrix
- *   for multiplication with the N x K' x H x W filter matrix to yield a
- *   N' x H x W output matrix that is then "col2im" restored. K' is the
- *   input channel * kernel height * kernel width dimension of the unrolled
- *   inputs so that the im2col matrix has a column for each input region to
- *   be filtered. col2im restores the output spatial structure by rolling up
- *   the output channel N' columns of the output matrix.
- */
-template <typename Dtype>
-class ConvolutionLayer : public BaseConvolutionLayer<Dtype> {
- public:
-  /**
-   * @param param provides ConvolutionParameter convolution_param,
-   *    with ConvolutionLayer options:
-   *  - num_output. The number of filters.
-   *  - kernel_size / kernel_h / kernel_w. The filter dimensions, given by
-   *  kernel_size for square filters or kernel_h and kernel_w for rectangular
-   *  filters.
-   *  - stride / stride_h / stride_w (\b optional, default 1). The filter
-   *  stride, given by stride_size for equal dimensions or stride_h and stride_w
-   *  for different strides. By default the convolution is dense with stride 1.
-   *  - pad / pad_h / pad_w (\b optional, default 0). The zero-padding for
-   *  convolution, given by pad for equal dimensions or pad_h and pad_w for
-   *  different padding. Input padding is computed implicitly instead of
-   *  actually padding.
-   *  - dilation (\b optional, default 1). The filter
-   *  dilation, given by dilation_size for equal dimensions for different
-   *  dilation. By default the convolution has dilation 1.
-   *  - group (\b optional, default 1). The number of filter groups. Group
-   *  convolution is a method for reducing parameterization by selectively
-   *  connecting input and output channels. The input and output channel dimensions must be divisible
-   *  by the number of groups. For group @f$ \geq 1 @f$, the
-   *  convolutional filters' input and output channels are separated s.t. each
-   *  group takes 1 / group of the input channels and makes 1 / group of the
-   *  output channels. Concretely 4 input channels, 8 output channels, and
-   *  2 groups separate input channels 1-2 and output channels 1-4 into the
-   *  first group and input channels 3-4 and output channels 5-8 into the second
-   *  group.
-   *  - bias_term (\b optional, default true). Whether to have a bias.
-   *  - engine: convolution has CAFFE (matrix multiplication) and CUDNN (library
-   *    kernels + stream parallelism) engines.
-   */
-  explicit ConvolutionLayer(const LayerParameter& param)
-      : BaseConvolutionLayer<Dtype>(param) {}
-
-  virtual inline const char* type() const { return "Convolution"; }
-
- protected:
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual inline bool reverse_dimensions() { return false; }
-  virtual void compute_output_shape();
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_CONV_LAYER_HPP_
diff --git a/include/caffe/layers/dropout_layer.hpp b/include/caffe/layers/dropout_layer.hpp
deleted file mode 100644
index e83143b..0000000
--- a/include/caffe/layers/dropout_layer.hpp
+++ /dev/null
@@ -1,80 +0,0 @@
-#ifndef CAFFE_DROPOUT_LAYER_HPP_
-#define CAFFE_DROPOUT_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-#include "caffe/layers/neuron_layer.hpp"
-
-namespace caffe {
-
-/**
- * @brief During training only, sets a random portion of @f$x@f$ to 0, adjusting
- *        the rest of the vector magnitude accordingly.
- *
- * @param bottom input Blob vector (length 1)
- *   -# @f$ (N \times C \times H \times W) @f$
- *      the inputs @f$ x @f$
- * @param top output Blob vector (length 1)
- *   -# @f$ (N \times C \times H \times W) @f$
- *      the computed outputs @f$ y = |x| @f$
- */
-template <typename Dtype>
-class DropoutLayer : public NeuronLayer<Dtype> {
- public:
-  /**
-   * @param param provides DropoutParameter dropout_param,
-   *     with DropoutLayer options:
-   *   - dropout_ratio (\b optional, default 0.5).
-   *     Sets the probability @f$ p @f$ that any given unit is dropped.
-   */
-  explicit DropoutLayer(const LayerParameter& param)
-      : NeuronLayer<Dtype>(param) {}
-  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  virtual inline const char* type() const { return "Dropout"; }
-
- protected:
-  /**
-   * @param bottom input Blob vector (length 1)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the inputs @f$ x @f$
-   * @param top output Blob vector (length 1)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the computed outputs. At training time, we have @f$
-   *      y_{\mbox{train}} = \left\{
-   *         \begin{array}{ll}
-   *            \frac{x}{1 - p} & \mbox{if } u > p \\
-   *            0 & \mbox{otherwise}
-   *         \end{array} \right.
-   *      @f$, where @f$ u \sim U(0, 1)@f$ is generated independently for each
-   *      input at each iteration. At test time, we simply have
-   *      @f$ y_{\mbox{test}} = \mathbb{E}[y_{\mbox{train}}] = x @f$.
-   */
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-
-  /// when divided by UINT_MAX, the randomly generated values @f$u\sim U(0,1)@f$
-  Blob<unsigned int> rand_vec_;
-  /// the probability @f$ p @f$ of dropping any input
-  Dtype threshold_;
-  /// the scale for undropped inputs at train time @f$ 1 / (1 - p) @f$
-  Dtype scale_;
-  unsigned int uint_thres_;
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_DROPOUT_LAYER_HPP_
diff --git a/include/caffe/layers/eltwise_layer.hpp b/include/caffe/layers/eltwise_layer.hpp
deleted file mode 100644
index 091de83..0000000
--- a/include/caffe/layers/eltwise_layer.hpp
+++ /dev/null
@@ -1,51 +0,0 @@
-#ifndef CAFFE_ELTWISE_LAYER_HPP_
-#define CAFFE_ELTWISE_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-namespace caffe {
-
-/**
- * @brief Compute elementwise operations, such as product and sum,
- *        along multiple input Blobs.
- *
- * TODO(dox): thorough documentation for Forward, Backward, and proto params.
- */
-template <typename Dtype>
-class EltwiseLayer : public Layer<Dtype> {
- public:
-  explicit EltwiseLayer(const LayerParameter& param)
-      : Layer<Dtype>(param) {}
-  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  virtual inline const char* type() const { return "Eltwise"; }
-  virtual inline int MinBottomBlobs() const { return 2; }
-  virtual inline int ExactNumTopBlobs() const { return 1; }
-
- protected:
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-
-  EltwiseParameter_EltwiseOp op_;
-  vector<Dtype> coeffs_;
-  Blob<int> max_idx_;
-
-  bool stable_prod_grad_;
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_ELTWISE_LAYER_HPP_
diff --git a/include/caffe/layers/flatten_layer.hpp b/include/caffe/layers/flatten_layer.hpp
deleted file mode 100644
index e494bbb..0000000
--- a/include/caffe/layers/flatten_layer.hpp
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef CAFFE_FLATTEN_LAYER_HPP_
-#define CAFFE_FLATTEN_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-namespace caffe {
-
-/**
- * @brief Reshapes the input Blob into flat vectors.
- *
- * Note: because this layer does not change the input values -- merely the
- * dimensions -- it can simply copy the input. The copy happens "virtually"
- * (thus taking effectively 0 real time) by setting, in Forward, the data
- * pointer of the top Blob to that of the bottom Blob (see Blob::ShareData),
- * and in Backward, the diff pointer of the bottom Blob to that of the top Blob
- * (see Blob::ShareDiff).
- */
-template <typename Dtype>
-class FlattenLayer : public Layer<Dtype> {
- public:
-  explicit FlattenLayer(const LayerParameter& param)
-      : Layer<Dtype>(param) {}
-  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  virtual inline const char* type() const { return "Flatten"; }
-  virtual inline int ExactNumBottomBlobs() const { return 1; }
-  virtual inline int ExactNumTopBlobs() const { return 1; }
-
- protected:
-  /**
-   * @param bottom input Blob vector (length 2+)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the inputs
-   * @param top output Blob vector (length 1)
-   *   -# @f$ (N \times CHW \times 1 \times 1) @f$
-   *      the outputs -- i.e., the (virtually) copied, flattened inputs
-   */
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  /**
-   * @brief Computes the error gradient w.r.t. the concatenate inputs.
-   *
-   * @param top output Blob vector (length 1), providing the error gradient with
-   *        respect to the outputs
-   * @param propagate_down see Layer::Backward.
-   * @param bottom input Blob vector (length K), into which the top error
-   *        gradient is (virtually) copied
-   */
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_FLATTEN_LAYER_HPP_
diff --git a/include/caffe/layers/im2col_layer.hpp b/include/caffe/layers/im2col_layer.hpp
deleted file mode 100644
index 71e32f7..0000000
--- a/include/caffe/layers/im2col_layer.hpp
+++ /dev/null
@@ -1,65 +0,0 @@
-#ifndef CAFFE_IM2COL_LAYER_HPP_
-#define CAFFE_IM2COL_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-namespace caffe {
-
-/**
- * @brief A helper for image operations that rearranges image regions into
- *        column vectors.  Used by ConvolutionLayer to perform convolution
- *        by matrix multiplication.
- *
- * TODO(dox): thorough documentation for Forward, Backward, and proto params.
- */
-template <typename Dtype>
-class Im2colLayer : public Layer<Dtype> {
- public:
-  explicit Im2colLayer(const LayerParameter& param)
-      : Layer<Dtype>(param) {}
-  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  virtual inline const char* type() const { return "Im2col"; }
-  virtual inline int ExactNumBottomBlobs() const { return 1; }
-  virtual inline int ExactNumTopBlobs() const { return 1; }
-
- protected:
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-
-  /// @brief The spatial dimensions of a filter kernel.
-  Blob<int> kernel_shape_;
-  /// @brief The spatial dimensions of the stride.
-  Blob<int> stride_;
-  /// @brief The spatial dimensions of the padding.
-  Blob<int> pad_;
-  /// @brief The spatial dimensions of the dilation.
-  Blob<int> dilation_;
-
-  int num_spatial_axes_;
-  int bottom_dim_;
-  int top_dim_;
-
-  int channel_axis_;
-  int num_;
-  int channels_;
-
-  bool force_nd_im2col_;
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_IM2COL_LAYER_HPP_
diff --git a/include/caffe/layers/inner_product_layer.hpp b/include/caffe/layers/inner_product_layer.hpp
deleted file mode 100644
index 18d0d61..0000000
--- a/include/caffe/layers/inner_product_layer.hpp
+++ /dev/null
@@ -1,52 +0,0 @@
-#ifndef CAFFE_INNER_PRODUCT_LAYER_HPP_
-#define CAFFE_INNER_PRODUCT_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-namespace caffe {
-
-/**
- * @brief Also known as a "fully-connected" layer, computes an inner product
- *        with a set of learned weights, and (optionally) adds biases.
- *
- * TODO(dox): thorough documentation for Forward, Backward, and proto params.
- */
-template <typename Dtype>
-class InnerProductLayer : public Layer<Dtype> {
- public:
-  explicit InnerProductLayer(const LayerParameter& param)
-      : Layer<Dtype>(param) {}
-  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  virtual inline const char* type() const { return "InnerProduct"; }
-  virtual inline int ExactNumBottomBlobs() const { return 1; }
-  virtual inline int ExactNumTopBlobs() const { return 1; }
-
- protected:
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-
-  int M_;
-  int K_;
-  int N_;
-  bool bias_term_;
-  Blob<Dtype> bias_multiplier_;
-  bool transpose_;  ///< if true, assume transposed weights
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_INNER_PRODUCT_LAYER_HPP_
diff --git a/include/caffe/layers/input_layer.hpp b/include/caffe/layers/input_layer.hpp
deleted file mode 100644
index f447267..0000000
--- a/include/caffe/layers/input_layer.hpp
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef CAFFE_INPUT_LAYER_HPP_
-#define CAFFE_INPUT_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-namespace caffe {
-
-/**
- * @brief Provides data to the Net by assigning tops directly.
- *
- * This data layer is a container that merely holds the data assigned to it;
- * forward, backward, and reshape are all no-ops.
- */
-template <typename Dtype>
-class InputLayer : public Layer<Dtype> {
- public:
-  explicit InputLayer(const LayerParameter& param)
-      : Layer<Dtype>(param) {}
-  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  // Data layers should be shared by multiple solvers in parallel
-  virtual inline bool ShareInParallel() const { return true; }
-  // Data layers have no bottoms, so reshaping is trivial.
-  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {}
-
-  virtual inline const char* type() const { return "Input"; }
-  virtual inline int ExactNumBottomBlobs() const { return 0; }
-  virtual inline int MinTopBlobs() const { return 1; }
-
- protected:
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {}
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {}
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_INPUT_LAYER_HPP_
diff --git a/include/caffe/layers/loss_layer.hpp b/include/caffe/layers/loss_layer.hpp
deleted file mode 100644
index dbdf612..0000000
--- a/include/caffe/layers/loss_layer.hpp
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef CAFFE_LOSS_LAYER_HPP_
-#define CAFFE_LOSS_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-namespace caffe {
-
-const float kLOG_THRESHOLD = 1e-20;
-
-/**
- * @brief An interface for Layer%s that take two Blob%s as input -- usually
- *        (1) predictions and (2) ground-truth labels -- and output a
- *        singleton Blob representing the loss.
- *
- * LossLayers are typically only capable of backpropagating to their first input
- * -- the predictions.
- */
-template <typename Dtype>
-class LossLayer : public Layer<Dtype> {
- public:
-  explicit LossLayer(const LayerParameter& param)
-     : Layer<Dtype>(param) {}
-  virtual void LayerSetUp(
-      const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
-  virtual void Reshape(
-      const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
-
-  virtual inline int ExactNumBottomBlobs() const { return 2; }
-
-  /**
-   * @brief For convenience and backwards compatibility, instruct the Net to
-   *        automatically allocate a single top Blob for LossLayers, into which
-   *        they output their singleton loss, (even if the user didn't specify
-   *        one in the prototxt, etc.).
-   */
-  virtual inline bool AutoTopBlobs() const { return true; }
-  virtual inline int ExactNumTopBlobs() const { return 1; }
-  /**
-   * We usually cannot backpropagate to the labels; ignore force_backward for
-   * these inputs.
-   */
-  virtual inline bool AllowForceBackward(const int bottom_index) const {
-    return bottom_index != 1;
-  }
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_LOSS_LAYER_HPP_
diff --git a/include/caffe/layers/lrn_layer.hpp b/include/caffe/layers/lrn_layer.hpp
deleted file mode 100644
index 06cf71a..0000000
--- a/include/caffe/layers/lrn_layer.hpp
+++ /dev/null
@@ -1,94 +0,0 @@
-#ifndef CAFFE_LRN_LAYER_HPP_
-#define CAFFE_LRN_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-#include "caffe/layers/eltwise_layer.hpp"
-#include "caffe/layers/pooling_layer.hpp"
-#include "caffe/layers/power_layer.hpp"
-#include "caffe/layers/split_layer.hpp"
-
-namespace caffe {
-
-/**
- * @brief Normalize the input in a local region across or within feature maps.
- *
- * TODO(dox): thorough documentation for Forward, Backward, and proto params.
- */
-template <typename Dtype>
-class LRNLayer : public Layer<Dtype> {
- public:
-  explicit LRNLayer(const LayerParameter& param)
-      : Layer<Dtype>(param) {}
-  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  virtual inline const char* type() const { return "LRN"; }
-  virtual inline int ExactNumBottomBlobs() const { return 1; }
-  virtual inline int ExactNumTopBlobs() const { return 1; }
-
- protected:
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-
-  virtual void CrossChannelForward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void CrossChannelForward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void WithinChannelForward(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void CrossChannelBackward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void CrossChannelBackward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void WithinChannelBackward(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-
-  int size_;
-  int pre_pad_;
-  Dtype alpha_;
-  Dtype beta_;
-  Dtype k_;
-  int num_;
-  int channels_;
-  int height_;
-  int width_;
-
-  // Fields used for normalization ACROSS_CHANNELS
-  // scale_ stores the intermediate summing results
-  Blob<Dtype> scale_;
-
-  // Fields used for normalization WITHIN_CHANNEL
-  shared_ptr<SplitLayer<Dtype> > split_layer_;
-  vector<Blob<Dtype>*> split_top_vec_;
-  shared_ptr<PowerLayer<Dtype> > square_layer_;
-  Blob<Dtype> square_input_;
-  Blob<Dtype> square_output_;
-  vector<Blob<Dtype>*> square_bottom_vec_;
-  vector<Blob<Dtype>*> square_top_vec_;
-  shared_ptr<PoolingLayer<Dtype> > pool_layer_;
-  Blob<Dtype> pool_output_;
-  vector<Blob<Dtype>*> pool_top_vec_;
-  shared_ptr<PowerLayer<Dtype> > power_layer_;
-  Blob<Dtype> power_output_;
-  vector<Blob<Dtype>*> power_top_vec_;
-  shared_ptr<EltwiseLayer<Dtype> > product_layer_;
-  Blob<Dtype> product_input_;
-  vector<Blob<Dtype>*> product_bottom_vec_;
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_LRN_LAYER_HPP_
diff --git a/include/caffe/layers/memory_data_layer.hpp b/include/caffe/layers/memory_data_layer.hpp
deleted file mode 100644
index 8abcc8c..0000000
--- a/include/caffe/layers/memory_data_layer.hpp
+++ /dev/null
@@ -1,63 +0,0 @@
-#ifndef CAFFE_MEMORY_DATA_LAYER_HPP_
-#define CAFFE_MEMORY_DATA_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-#include "caffe/layers/base_data_layer.hpp"
-
-namespace caffe {
-
-/**
- * @brief Provides data to the Net from memory.
- *
- * TODO(dox): thorough documentation for Forward and proto params.
- */
-template <typename Dtype>
-class MemoryDataLayer : public BaseDataLayer<Dtype> {
- public:
-  explicit MemoryDataLayer(const LayerParameter& param)
-      : BaseDataLayer<Dtype>(param), has_new_data_(false) {}
-  virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  virtual inline const char* type() const { return "MemoryData"; }
-  virtual inline int ExactNumBottomBlobs() const { return 0; }
-  virtual inline int ExactNumTopBlobs() const { return 2; }
-
-  virtual void AddDatumVector(const vector<Datum>& datum_vector);
-#ifdef USE_OPENCV
-  virtual void AddMatVector(const vector<cv::Mat>& mat_vector,
-      const vector<int>& labels);
-#endif  // USE_OPENCV
-
-  // Reset should accept const pointers, but can't, because the memory
-  //  will be given to Blob, which is mutable
-  void Reset(Dtype* data, Dtype* label, int n);
-  void set_batch_size(int new_size);
-
-  int batch_size() { return batch_size_; }
-  int channels() { return channels_; }
-  int height() { return height_; }
-  int width() { return width_; }
-
- protected:
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  int batch_size_, channels_, height_, width_, size_;
-  Dtype* data_;
-  Dtype* labels_;
-  int n_;
-  size_t pos_;
-  Blob<Dtype> added_data_;
-  Blob<Dtype> added_label_;
-  bool has_new_data_;
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_MEMORY_DATA_LAYER_HPP_
diff --git a/include/caffe/layers/neuron_layer.hpp b/include/caffe/layers/neuron_layer.hpp
deleted file mode 100644
index 10c108c..0000000
--- a/include/caffe/layers/neuron_layer.hpp
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef CAFFE_NEURON_LAYER_HPP_
-#define CAFFE_NEURON_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-namespace caffe {
-
-/**
- * @brief An interface for layers that take one blob as input (@f$ x @f$)
- *        and produce one equally-sized blob as output (@f$ y @f$), where
- *        each element of the output depends only on the corresponding input
- *        element.
- */
-template <typename Dtype>
-class NeuronLayer : public Layer<Dtype> {
- public:
-  explicit NeuronLayer(const LayerParameter& param)
-     : Layer<Dtype>(param) {}
-  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  virtual inline int ExactNumBottomBlobs() const { return 1; }
-  virtual inline int ExactNumTopBlobs() const { return 1; }
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_NEURON_LAYER_HPP_
diff --git a/include/caffe/layers/pooling_layer.hpp b/include/caffe/layers/pooling_layer.hpp
deleted file mode 100644
index f4d6803..0000000
--- a/include/caffe/layers/pooling_layer.hpp
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef CAFFE_POOLING_LAYER_HPP_
-#define CAFFE_POOLING_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-namespace caffe {
-
-/**
- * @brief Pools the input image by taking the max, average, etc. within regions.
- *
- * TODO(dox): thorough documentation for Forward, Backward, and proto params.
- */
-template <typename Dtype>
-class PoolingLayer : public Layer<Dtype> {
- public:
-  explicit PoolingLayer(const LayerParameter& param)
-      : Layer<Dtype>(param) {}
-  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  virtual inline const char* type() const { return "Pooling"; }
-  virtual inline int ExactNumBottomBlobs() const { return 1; }
-  virtual inline int MinTopBlobs() const { return 1; }
-  // MAX POOL layers can output an extra top blob for the mask;
-  // others can only output the pooled inputs.
-  virtual inline int MaxTopBlobs() const {
-    return (this->layer_param_.pooling_param().pool() ==
-            PoolingParameter_PoolMethod_MAX) ? 2 : 1;
-  }
-
- protected:
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-
-  int kernel_h_, kernel_w_;
-  int stride_h_, stride_w_;
-  int pad_h_, pad_w_;
-  int channels_;
-  int height_, width_;
-  int pooled_height_, pooled_width_;
-  bool global_pooling_;
-  Blob<Dtype> rand_idx_;
-  Blob<int> max_idx_;
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_POOLING_LAYER_HPP_
diff --git a/include/caffe/layers/power_layer.hpp b/include/caffe/layers/power_layer.hpp
deleted file mode 100644
index 6ecbafc..0000000
--- a/include/caffe/layers/power_layer.hpp
+++ /dev/null
@@ -1,89 +0,0 @@
-#ifndef CAFFE_POWER_LAYER_HPP_
-#define CAFFE_POWER_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-#include "caffe/layers/neuron_layer.hpp"
-
-namespace caffe {
-
-/**
- * @brief Computes @f$ y = (\alpha x + \beta) ^ \gamma @f$,
- *        as specified by the scale @f$ \alpha @f$, shift @f$ \beta @f$,
- *        and power @f$ \gamma @f$.
- */
-template <typename Dtype>
-class PowerLayer : public NeuronLayer<Dtype> {
- public:
-  /**
-   * @param param provides PowerParameter power_param,
-   *     with PowerLayer options:
-   *   - scale (\b optional, default 1) the scale @f$ \alpha @f$
-   *   - shift (\b optional, default 0) the shift @f$ \beta @f$
-   *   - power (\b optional, default 1) the power @f$ \gamma @f$
-   */
-  explicit PowerLayer(const LayerParameter& param)
-      : NeuronLayer<Dtype>(param) {}
-  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  virtual inline const char* type() const { return "Power"; }
-
- protected:
-  /**
-   * @param bottom input Blob vector (length 1)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the inputs @f$ x @f$
-   * @param top output Blob vector (length 1)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the computed outputs @f$
-   *        y = (\alpha x + \beta) ^ \gamma
-   *      @f$
-   */
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  /**
-   * @brief Computes the error gradient w.r.t. the power inputs.
-   *
-   * @param top output Blob vector (length 1), providing the error gradient with
-   *      respect to the outputs
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      containing error gradients @f$ \frac{\partial E}{\partial y} @f$
-   *      with respect to computed outputs @f$ y @f$
-   * @param propagate_down see Layer::Backward.
-   * @param bottom input Blob vector (length 1)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the inputs @f$ x @f$; Backward fills their diff with
-   *      gradients @f$
-   *        \frac{\partial E}{\partial x} =
-   *            \frac{\partial E}{\partial y}
-   *            \alpha \gamma (\alpha x + \beta) ^ {\gamma - 1} =
-   *            \frac{\partial E}{\partial y}
-   *            \frac{\alpha \gamma y}{\alpha x + \beta}
-   *      @f$ if propagate_down[0]
-   */
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-
-  /// @brief @f$ \gamma @f$ from layer_param_.power_param()
-  Dtype power_;
-  /// @brief @f$ \alpha @f$ from layer_param_.power_param()
-  Dtype scale_;
-  /// @brief @f$ \beta @f$ from layer_param_.power_param()
-  Dtype shift_;
-  /// @brief Result of @f$ \alpha \gamma @f$
-  Dtype diff_scale_;
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_POWER_LAYER_HPP_
diff --git a/include/caffe/layers/relu_layer.hpp b/include/caffe/layers/relu_layer.hpp
deleted file mode 100644
index d7a73f7..0000000
--- a/include/caffe/layers/relu_layer.hpp
+++ /dev/null
@@ -1,85 +0,0 @@
-#ifndef CAFFE_RELU_LAYER_HPP_
-#define CAFFE_RELU_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-#include "caffe/layers/neuron_layer.hpp"
-
-namespace caffe {
-
-/**
- * @brief Rectified Linear Unit non-linearity @f$ y = \max(0, x) @f$.
- *        The simple max is fast to compute, and the function does not saturate.
- */
-template <typename Dtype>
-class ReLULayer : public NeuronLayer<Dtype> {
- public:
-  /**
-   * @param param provides ReLUParameter relu_param,
-   *     with ReLULayer options:
-   *   - negative_slope (\b optional, default 0).
-   *     the value @f$ \nu @f$ by which negative values are multiplied.
-   */
-  explicit ReLULayer(const LayerParameter& param)
-      : NeuronLayer<Dtype>(param) {}
-
-  virtual inline const char* type() const { return "ReLU"; }
-
- protected:
-  /**
-   * @param bottom input Blob vector (length 1)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the inputs @f$ x @f$
-   * @param top output Blob vector (length 1)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the computed outputs @f$
-   *        y = \max(0, x)
-   *      @f$ by default.  If a non-zero negative_slope @f$ \nu @f$ is provided,
-   *      the computed outputs are @f$ y = \max(0, x) + \nu \min(0, x) @f$.
-   */
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  /**
-   * @brief Computes the error gradient w.r.t. the ReLU inputs.
-   *
-   * @param top output Blob vector (length 1), providing the error gradient with
-   *      respect to the outputs
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      containing error gradients @f$ \frac{\partial E}{\partial y} @f$
-   *      with respect to computed outputs @f$ y @f$
-   * @param propagate_down see Layer::Backward.
-   * @param bottom input Blob vector (length 1)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the inputs @f$ x @f$; Backward fills their diff with
-   *      gradients @f$
-   *        \frac{\partial E}{\partial x} = \left\{
-   *        \begin{array}{lr}
-   *            0 & \mathrm{if} \; x \le 0 \\
-   *            \frac{\partial E}{\partial y} & \mathrm{if} \; x > 0
-   *        \end{array} \right.
-   *      @f$ if propagate_down[0], by default.
-   *      If a non-zero negative_slope @f$ \nu @f$ is provided,
-   *      the computed gradients are @f$
-   *        \frac{\partial E}{\partial x} = \left\{
-   *        \begin{array}{lr}
-   *            \nu \frac{\partial E}{\partial y} & \mathrm{if} \; x \le 0 \\
-   *            \frac{\partial E}{\partial y} & \mathrm{if} \; x > 0
-   *        \end{array} \right.
-   *      @f$.
-   */
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_RELU_LAYER_HPP_
diff --git a/include/caffe/layers/sigmoid_layer.hpp b/include/caffe/layers/sigmoid_layer.hpp
deleted file mode 100644
index ac0f692..0000000
--- a/include/caffe/layers/sigmoid_layer.hpp
+++ /dev/null
@@ -1,71 +0,0 @@
-#ifndef CAFFE_SIGMOID_LAYER_HPP_
-#define CAFFE_SIGMOID_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-#include "caffe/layers/neuron_layer.hpp"
-
-namespace caffe {
-
-/**
- * @brief Sigmoid function non-linearity @f$
- *         y = (1 + \exp(-x))^{-1}
- *     @f$, a classic choice in neural networks.
- *
- * Note that the gradient vanishes as the values move away from 0.
- * The ReLULayer is often a better choice for this reason.
- */
-template <typename Dtype>
-class SigmoidLayer : public NeuronLayer<Dtype> {
- public:
-  explicit SigmoidLayer(const LayerParameter& param)
-      : NeuronLayer<Dtype>(param) {}
-
-  virtual inline const char* type() const { return "Sigmoid"; }
-
- protected:
-  /**
-   * @param bottom input Blob vector (length 1)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the inputs @f$ x @f$
-   * @param top output Blob vector (length 1)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the computed outputs @f$
-   *        y = (1 + \exp(-x))^{-1}
-   *      @f$
-   */
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  /**
-   * @brief Computes the error gradient w.r.t. the sigmoid inputs.
-   *
-   * @param top output Blob vector (length 1), providing the error gradient with
-   *      respect to the outputs
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      containing error gradients @f$ \frac{\partial E}{\partial y} @f$
-   *      with respect to computed outputs @f$ y @f$
-   * @param propagate_down see Layer::Backward.
-   * @param bottom input Blob vector (length 1)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the inputs @f$ x @f$; Backward fills their diff with
-   *      gradients @f$
-   *        \frac{\partial E}{\partial x}
-   *            = \frac{\partial E}{\partial y} y (1 - y)
-   *      @f$ if propagate_down[0]
-   */
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_SIGMOID_LAYER_HPP_
diff --git a/include/caffe/layers/softmax_layer.hpp b/include/caffe/layers/softmax_layer.hpp
deleted file mode 100644
index c65b870..0000000
--- a/include/caffe/layers/softmax_layer.hpp
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef CAFFE_SOFTMAX_LAYER_HPP_
-#define CAFFE_SOFTMAX_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-namespace caffe {
-
-/**
- * @brief Computes the softmax function.
- *
- * TODO(dox): thorough documentation for Forward, Backward, and proto params.
- */
-template <typename Dtype>
-class SoftmaxLayer : public Layer<Dtype> {
- public:
-  explicit SoftmaxLayer(const LayerParameter& param)
-      : Layer<Dtype>(param) {}
-  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  virtual inline const char* type() const { return "Softmax"; }
-  virtual inline int ExactNumBottomBlobs() const { return 1; }
-  virtual inline int ExactNumTopBlobs() const { return 1; }
-
- protected:
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-     const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-
-  int outer_num_;
-  int inner_num_;
-  int softmax_axis_;
-  /// sum_multiplier is used to carry out sum using BLAS
-  Blob<Dtype> sum_multiplier_;
-  /// scale is an intermediate Blob to hold temporary results.
-  Blob<Dtype> scale_;
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_SOFTMAX_LAYER_HPP_
diff --git a/include/caffe/layers/softmax_loss_layer.hpp b/include/caffe/layers/softmax_loss_layer.hpp
deleted file mode 100644
index f07e8a0..0000000
--- a/include/caffe/layers/softmax_loss_layer.hpp
+++ /dev/null
@@ -1,130 +0,0 @@
-#ifndef CAFFE_SOFTMAX_WITH_LOSS_LAYER_HPP_
-#define CAFFE_SOFTMAX_WITH_LOSS_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-#include "caffe/layers/loss_layer.hpp"
-#include "caffe/layers/softmax_layer.hpp"
-
-namespace caffe {
-
-/**
- * @brief Computes the multinomial logistic loss for a one-of-many
- *        classification task, passing real-valued predictions through a
- *        softmax to get a probability distribution over classes.
- *
- * This layer should be preferred over separate
- * SoftmaxLayer + MultinomialLogisticLossLayer
- * as its gradient computation is more numerically stable.
- * At test time, this layer can be replaced simply by a SoftmaxLayer.
- *
- * @param bottom input Blob vector (length 2)
- *   -# @f$ (N \times C \times H \times W) @f$
- *      the predictions @f$ x @f$, a Blob with values in
- *      @f$ [-\infty, +\infty] @f$ indicating the predicted score for each of
- *      the @f$ K = CHW @f$ classes. This layer maps these scores to a
- *      probability distribution over classes using the softmax function
- *      @f$ \hat{p}_{nk} = \exp(x_{nk}) /
- *      \left[\sum_{k'} \exp(x_{nk'})\right] @f$ (see SoftmaxLayer).
- *   -# @f$ (N \times 1 \times 1 \times 1) @f$
- *      the labels @f$ l @f$, an integer-valued Blob with values
- *      @f$ l_n \in [0, 1, 2, ..., K - 1] @f$
- *      indicating the correct class label among the @f$ K @f$ classes
- * @param top output Blob vector (length 1)
- *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
- *      the computed cross-entropy classification loss: @f$ E =
- *        \frac{-1}{N} \sum\limits_{n=1}^N \log(\hat{p}_{n,l_n})
- *      @f$, for softmax output class probabilites @f$ \hat{p} @f$
- */
-template <typename Dtype>
-class SoftmaxWithLossLayer : public LossLayer<Dtype> {
- public:
-   /**
-    * @param param provides LossParameter loss_param, with options:
-    *  - ignore_label (optional)
-    *    Specify a label value that should be ignored when computing the loss.
-    *  - normalize (optional, default true)
-    *    If true, the loss is normalized by the number of (nonignored) labels
-    *    present; otherwise the loss is simply summed over spatial locations.
-    */
-  explicit SoftmaxWithLossLayer(const LayerParameter& param)
-      : LossLayer<Dtype>(param) {}
-  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  virtual inline const char* type() const { return "SoftmaxWithLoss"; }
-  virtual inline int ExactNumTopBlobs() const { return -1; }
-  virtual inline int MinTopBlobs() const { return 1; }
-  virtual inline int MaxTopBlobs() const { return 2; }
-
- protected:
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  /**
-   * @brief Computes the softmax loss error gradient w.r.t. the predictions.
-   *
-   * Gradients cannot be computed with respect to the label inputs (bottom[1]),
-   * so this method ignores bottom[1] and requires !propagate_down[1], crashing
-   * if propagate_down[1] is set.
-   *
-   * @param top output Blob vector (length 1), providing the error gradient with
-   *      respect to the outputs
-   *   -# @f$ (1 \times 1 \times 1 \times 1) @f$
-   *      This Blob's diff will simply contain the loss_weight* @f$ \lambda @f$,
-   *      as @f$ \lambda @f$ is the coefficient of this layer's output
-   *      @f$\ell_i@f$ in the overall Net loss
-   *      @f$ E = \lambda_i \ell_i + \mbox{other loss terms}@f$; hence
-   *      @f$ \frac{\partial E}{\partial \ell_i} = \lambda_i @f$.
-   *      (*Assuming that this top Blob is not used as a bottom (input) by any
-   *      other layer of the Net.)
-   * @param propagate_down see Layer::Backward.
-   *      propagate_down[1] must be false as we can't compute gradients with
-   *      respect to the labels.
-   * @param bottom input Blob vector (length 2)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the predictions @f$ x @f$; Backward computes diff
-   *      @f$ \frac{\partial E}{\partial x} @f$
-   *   -# @f$ (N \times 1 \times 1 \times 1) @f$
-   *      the labels -- ignored as we can't compute their error gradients
-   */
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-
-  /// Read the normalization mode parameter and compute the normalizer based
-  /// on the blob size.  If normalization_mode is VALID, the count of valid
-  /// outputs will be read from valid_count, unless it is -1 in which case
-  /// all outputs are assumed to be valid.
-  virtual Dtype get_normalizer(
-      LossParameter_NormalizationMode normalization_mode, int valid_count);
-
-  /// The internal SoftmaxLayer used to map predictions to a distribution.
-  shared_ptr<Layer<Dtype> > softmax_layer_;
-  /// prob stores the output probability predictions from the SoftmaxLayer.
-  Blob<Dtype> prob_;
-  /// bottom vector holder used in call to the underlying SoftmaxLayer::Forward
-  vector<Blob<Dtype>*> softmax_bottom_vec_;
-  /// top vector holder used in call to the underlying SoftmaxLayer::Forward
-  vector<Blob<Dtype>*> softmax_top_vec_;
-  /// Whether to ignore instances with a certain label.
-  bool has_ignore_label_;
-  /// The label indicating that an instance should be ignored.
-  int ignore_label_;
-  /// How to normalize the output loss.
-  LossParameter_NormalizationMode normalization_;
-
-  int softmax_axis_, outer_num_, inner_num_;
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_SOFTMAX_WITH_LOSS_LAYER_HPP_
diff --git a/include/caffe/layers/split_layer.hpp b/include/caffe/layers/split_layer.hpp
deleted file mode 100644
index 8140dfc..0000000
--- a/include/caffe/layers/split_layer.hpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef CAFFE_SPLIT_LAYER_HPP_
-#define CAFFE_SPLIT_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-namespace caffe {
-
-/**
- * @brief Creates a "split" path in the network by copying the bottom Blob
- *        into multiple top Blob%s to be used by multiple consuming layers.
- *
- * TODO(dox): thorough documentation for Forward, Backward, and proto params.
- */
-template <typename Dtype>
-class SplitLayer : public Layer<Dtype> {
- public:
-  explicit SplitLayer(const LayerParameter& param)
-      : Layer<Dtype>(param) {}
-  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  virtual inline const char* type() const { return "Split"; }
-  virtual inline int ExactNumBottomBlobs() const { return 1; }
-  virtual inline int MinTopBlobs() const { return 1; }
-
- protected:
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-
-  int count_;
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_SPLIT_LAYER_HPP_
diff --git a/include/caffe/layers/tanh_layer.hpp b/include/caffe/layers/tanh_layer.hpp
deleted file mode 100644
index 8f95e93..0000000
--- a/include/caffe/layers/tanh_layer.hpp
+++ /dev/null
@@ -1,73 +0,0 @@
-#ifndef CAFFE_TANH_LAYER_HPP_
-#define CAFFE_TANH_LAYER_HPP_
-
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-#include "caffe/layers/neuron_layer.hpp"
-
-namespace caffe {
-
-/**
- * @brief TanH hyperbolic tangent non-linearity @f$
- *         y = \frac{\exp(2x) - 1}{\exp(2x) + 1}
- *     @f$, popular in auto-encoders.
- *
- * Note that the gradient vanishes as the values move away from 0.
- * The ReLULayer is often a better choice for this reason.
- */
-template <typename Dtype>
-class TanHLayer : public NeuronLayer<Dtype> {
- public:
-  explicit TanHLayer(const LayerParameter& param)
-      : NeuronLayer<Dtype>(param) {}
-
-  virtual inline const char* type() const { return "TanH"; }
-
- protected:
-  /**
-   * @param bottom input Blob vector (length 1)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the inputs @f$ x @f$
-   * @param top output Blob vector (length 1)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the computed outputs @f$
-   *        y = \frac{\exp(2x) - 1}{\exp(2x) + 1}
-   *      @f$
-   */
-  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
-
-  /**
-   * @brief Computes the error gradient w.r.t. the sigmoid inputs.
-   *
-   * @param top output Blob vector (length 1), providing the error gradient with
-   *      respect to the outputs
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      containing error gradients @f$ \frac{\partial E}{\partial y} @f$
-   *      with respect to computed outputs @f$ y @f$
-   * @param propagate_down see Layer::Backward.
-   * @param bottom input Blob vector (length 1)
-   *   -# @f$ (N \times C \times H \times W) @f$
-   *      the inputs @f$ x @f$; Backward fills their diff with
-   *      gradients @f$
-   *        \frac{\partial E}{\partial x}
-   *            = \frac{\partial E}{\partial y}
-   *              \left(1 - \left[\frac{\exp(2x) - 1}{exp(2x) + 1} \right]^2 \right)
-   *            = \frac{\partial E}{\partial y} (1 - y^2)
-   *      @f$ if propagate_down[0]
-   */
-  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_TANH_LAYER_HPP_
diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
deleted file mode 100644
index 4cb0343..0000000
--- a/include/caffe/net.hpp
+++ /dev/null
@@ -1,351 +0,0 @@
-#ifndef CAFFE_NET_HPP_
-#define CAFFE_NET_HPP_
-
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/common.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-namespace caffe {
-
-/**
- * @brief Connects Layer%s together into a directed acyclic graph (DAG)
- *        specified by a NetParameter.
- *
- * TODO(dox): more thorough description.
- */
-template <typename Dtype>
-class Net {
- public:
-  explicit Net(const NetParameter& param);
-  explicit Net(const string& param_file, Phase phase,
-      const int level = 0, const vector<string>* stages = NULL);
-  virtual ~Net() {}
-
-  /// @brief Initialize a network with a NetParameter.
-  void Init(const NetParameter& param);
-
-  /**
-   * @brief Run Forward and return the result.
-   *
-   */
-  const vector<Blob<Dtype>*>& Forward(Dtype* loss = NULL);
-  /// @brief DEPRECATED; use Forward() instead.
-  const vector<Blob<Dtype>*>& ForwardPrefilled(Dtype* loss = NULL) {
-#ifdef USE_GLOG
-    LOG_EVERY_N(WARNING, 1000) << "DEPRECATED: ForwardPrefilled() "
-        << "will be removed in a future version. Use Forward().";
-#endif
-    return Forward(loss);
-  }
-
-  /**
-   * The From and To variants of Forward and Backward operate on the
-   * (topological) ordering by which the net is specified. For general DAG
-   * networks, note that (1) computing from one layer to another might entail
-   * extra computation on unrelated branches, and (2) computation starting in
-   * the middle may be incorrect if all of the layers of a fan-in are not
-   * included.
-   */
-  Dtype ForwardFromTo(int start, int end);
-  Dtype ForwardFrom(int start);
-  Dtype ForwardTo(int end);
-  /// @brief DEPRECATED; set input blobs then use Forward() instead.
-  const vector<Blob<Dtype>*>& Forward(const vector<Blob<Dtype>* > & bottom,
-      Dtype* loss = NULL);
-
-  /**
-   * @brief Zeroes out the diffs of all net parameters.
-   *        Should be run before Backward.
-   */
-  void ClearParamDiffs();
-
-#ifdef ENABLE_BACKWARD
-  /**
-   * The network backward should take no input and output, since it solely
-   * computes the gradient w.r.t the parameters, and the data has already been
-   * provided during the forward pass.
-   */
-  void Backward();
-  void BackwardFromTo(int start, int end);
-  void BackwardFrom(int start);
-  void BackwardTo(int end);
-#endif
-
-  /**
-   * @brief Reshape all layers from bottom to top.
-   *
-   * This is useful to propagate changes to layer sizes without running
-   * a forward pass, e.g. to compute output feature size.
-   */
-  void Reshape();
-
-#ifdef ENABLE_BACKWARD
-  Dtype ForwardBackward() {
-    Dtype loss;
-    Forward(&loss);
-    Backward();
-    return loss;
-  }
-
-  /// @brief Updates the network weights based on the diff values computed.
-  void Update();
-#endif
-  /**
-   * @brief Shares weight data of owner blobs with shared blobs.
-   *
-   * Note: this is called by Net::Init, and thus should normally not be
-   * called manually.
-   */
-  void ShareWeights();
-
-  /**
-   * @brief For an already initialized net, implicitly copies (i.e., using no
-   *        additional memory) the pre-trained layers from another Net.
-   */
-  void ShareTrainedLayersWith(const Net* other);
-  // For an already initialized net, CopyTrainedLayersFrom() copies the already
-  // trained layers from another net parameter instance.
-  /**
-   * @brief For an already initialized net, copies the pre-trained layers from
-   *        another Net.
-   */
-  void CopyTrainedLayersFrom(const NetParameter& param);
-  void CopyTrainedLayersFrom(const string trained_filename);
-  void CopyTrainedLayersFromBinaryProto(const string trained_filename);
-  void CopyTrainedLayersFromHDF5(const string trained_filename);
-  /// @brief Writes the net to a proto.
-  void ToProto(NetParameter* param, bool write_diff = false) const;
-  /// @brief Writes the net to an HDF5 file.
-  void ToHDF5(const string& filename, bool write_diff = false) const;
-
-  /// @brief returns the network name.
-  inline const string& name() const { return name_; }
-  /// @brief returns the layer names
-  inline const vector<string>& layer_names() const { return layer_names_; }
-  /// @brief returns the blob names
-  inline const vector<string>& blob_names() const { return blob_names_; }
-  /// @brief returns the blobs
-  inline const vector<shared_ptr<Blob<Dtype> > >& blobs() const {
-    return blobs_;
-  }
-  /// @brief returns the layers
-  inline const vector<shared_ptr<Layer<Dtype> > >& layers() const {
-    return layers_;
-  }
-  /// @brief returns the phase: TRAIN or TEST
-  inline Phase phase() const { return phase_; }
-  /**
-   * @brief returns the bottom vecs for each layer -- usually you won't
-   *        need this unless you do per-layer checks such as gradients.
-   */
-  inline const vector<vector<Blob<Dtype>*> >& bottom_vecs() const {
-    return bottom_vecs_;
-  }
-  /**
-   * @brief returns the top vecs for each layer -- usually you won't
-   *        need this unless you do per-layer checks such as gradients.
-   */
-  inline const vector<vector<Blob<Dtype>*> >& top_vecs() const {
-    return top_vecs_;
-  }
-  /// @brief returns the ids of the top blobs of layer i
-  inline const vector<int> & top_ids(int i) const {
-    CHECK_GE(i, 0) << "Invalid layer id";
-    CHECK_LT(i, top_id_vecs_.size()) << "Invalid layer id";
-    return top_id_vecs_[i];
-  }
-  /// @brief returns the ids of the bottom blobs of layer i
-  inline const vector<int> & bottom_ids(int i) const {
-    CHECK_GE(i, 0) << "Invalid layer id";
-    CHECK_LT(i, bottom_id_vecs_.size()) << "Invalid layer id";
-    return bottom_id_vecs_[i];
-  }
-  inline const vector<vector<bool> >& bottom_need_backward() const {
-    return bottom_need_backward_;
-  }
-  inline const vector<Dtype>& blob_loss_weights() const {
-    return blob_loss_weights_;
-  }
-  inline const vector<bool>& layer_need_backward() const {
-    return layer_need_backward_;
-  }
-  /// @brief returns the parameters
-  inline const vector<shared_ptr<Blob<Dtype> > >& params() const {
-    return params_;
-  }
-  inline const vector<Blob<Dtype>*>& learnable_params() const {
-    return learnable_params_;
-  }
-  /// @brief returns the learnable parameter learning rate multipliers
-  inline const vector<float>& params_lr() const { return params_lr_; }
-  inline const vector<bool>& has_params_lr() const { return has_params_lr_; }
-  /// @brief returns the learnable parameter decay multipliers
-  inline const vector<float>& params_weight_decay() const {
-    return params_weight_decay_;
-  }
-  inline const vector<bool>& has_params_decay() const {
-    return has_params_decay_;
-  }
-  const map<string, int>& param_names_index() const {
-    return param_names_index_;
-  }
-  inline const vector<int>& param_owners() const { return param_owners_; }
-  inline const vector<string>& param_display_names() const {
-    return param_display_names_;
-  }
-  /// @brief Input and output blob numbers
-  inline int num_inputs() const { return net_input_blobs_.size(); }
-  inline int num_outputs() const { return net_output_blobs_.size(); }
-  inline const vector<Blob<Dtype>*>& input_blobs() const {
-    return net_input_blobs_;
-  }
-  inline const vector<Blob<Dtype>*>& output_blobs() const {
-    return net_output_blobs_;
-  }
-  inline const vector<int>& input_blob_indices() const {
-    return net_input_blob_indices_;
-  }
-  inline const vector<int>& output_blob_indices() const {
-    return net_output_blob_indices_;
-  }
-  bool has_blob(const string& blob_name) const;
-  const shared_ptr<Blob<Dtype> > blob_by_name(const string& blob_name) const;
-  bool has_layer(const string& layer_name) const;
-  const shared_ptr<Layer<Dtype> > layer_by_name(const string& layer_name) const;
-
-  void set_debug_info(const bool value) { debug_info_ = value; }
-
-  // Helpers for Init.
-  /**
-   * @brief Remove layers that the user specified should be excluded given the current
-   *        phase, level, and stage.
-   */
-  static void FilterNet(const NetParameter& param,
-      NetParameter* param_filtered);
-  /// @brief return whether NetState state meets NetStateRule rule
-  static bool StateMeetsRule(const NetState& state, const NetStateRule& rule,
-      const string& layer_name);
-
-  // Invoked at specific points during an iteration
-  class Callback {
-   protected:
-    virtual void run(int layer) = 0;
-
-    template <typename T>
-    friend class Net;
-  };
-  const vector<Callback*>& before_forward() const { return before_forward_; }
-  void add_before_forward(Callback* value) {
-    before_forward_.push_back(value);
-  }
-  const vector<Callback*>& after_forward() const { return after_forward_; }
-  void add_after_forward(Callback* value) {
-    after_forward_.push_back(value);
-  }
-  const vector<Callback*>& before_backward() const { return before_backward_; }
-  void add_before_backward(Callback* value) {
-    before_backward_.push_back(value);
-  }
-  const vector<Callback*>& after_backward() const { return after_backward_; }
-  void add_after_backward(Callback* value) {
-    after_backward_.push_back(value);
-  }
-
- protected:
-  // Helpers for Init.
-  /// @brief Append a new top blob to the net.
-  void AppendTop(const NetParameter& param, const int layer_id,
-                 const int top_id, set<string>* available_blobs,
-                 map<string, int>* blob_name_to_idx);
-  /// @brief Append a new bottom blob to the net.
-  int AppendBottom(const NetParameter& param, const int layer_id,
-                   const int bottom_id, set<string>* available_blobs,
-                   map<string, int>* blob_name_to_idx);
-  /// @brief Append a new parameter blob to the net.
-  void AppendParam(const NetParameter& param, const int layer_id,
-                   const int param_id);
-
-  /// @brief Helper for displaying debug info in Forward.
-  void ForwardDebugInfo(const int layer_id);
-  /// @brief Helper for displaying debug info in Backward.
-  void BackwardDebugInfo(const int layer_id);
-  /// @brief Helper for displaying debug info in Update.
-  void UpdateDebugInfo(const int param_id);
-
-  /// @brief The network name
-  string name_;
-  /// @brief The phase: TRAIN or TEST
-  Phase phase_;
-  /// @brief Individual layers in the net
-  vector<shared_ptr<Layer<Dtype> > > layers_;
-  vector<string> layer_names_;
-  map<string, int> layer_names_index_;
-  vector<bool> layer_need_backward_;
-  /// @brief the blobs storing intermediate results between the layer.
-  vector<shared_ptr<Blob<Dtype> > > blobs_;
-  vector<string> blob_names_;
-  map<string, int> blob_names_index_;
-  vector<bool> blob_need_backward_;
-  /// bottom_vecs stores the vectors containing the input for each layer.
-  /// They don't actually host the blobs (blobs_ does), so we simply store
-  /// pointers.
-  vector<vector<Blob<Dtype>*> > bottom_vecs_;
-  vector<vector<int> > bottom_id_vecs_;
-  vector<vector<bool> > bottom_need_backward_;
-  /// top_vecs stores the vectors containing the output for each layer
-  vector<vector<Blob<Dtype>*> > top_vecs_;
-  vector<vector<int> > top_id_vecs_;
-  /// Vector of weight in the loss (or objective) function of each net blob,
-  /// indexed by blob_id.
-  vector<Dtype> blob_loss_weights_;
-  vector<vector<int> > param_id_vecs_;
-  vector<int> param_owners_;
-  vector<string> param_display_names_;
-  vector<pair<int, int> > param_layer_indices_;
-  map<string, int> param_names_index_;
-  /// blob indices for the input and the output of the net
-  vector<int> net_input_blob_indices_;
-  vector<int> net_output_blob_indices_;
-  vector<Blob<Dtype>*> net_input_blobs_;
-  vector<Blob<Dtype>*> net_output_blobs_;
-  /// The parameters in the network.
-  vector<shared_ptr<Blob<Dtype> > > params_;
-  vector<Blob<Dtype>*> learnable_params_;
-  /**
-   * The mapping from params_ -> learnable_params_: we have
-   * learnable_param_ids_.size() == params_.size(),
-   * and learnable_params_[learnable_param_ids_[i]] == params_[i].get()
-   * if and only if params_[i] is an "owner"; otherwise, params_[i] is a sharer
-   * and learnable_params_[learnable_param_ids_[i]] gives its owner.
-   */
-  vector<int> learnable_param_ids_;
-  /// the learning rate multipliers for learnable_params_
-  vector<float> params_lr_;
-  vector<bool> has_params_lr_;
-  /// the weight decay multipliers for learnable_params_
-  vector<float> params_weight_decay_;
-  vector<bool> has_params_decay_;
-  /// The bytes of memory used by this net
-  size_t memory_used_;
-  /// Whether to compute and display debug info for the net.
-  bool debug_info_;
-  // Callbacks
-  vector<Callback*> before_forward_;
-  vector<Callback*> after_forward_;
-  vector<Callback*> before_backward_;
-  vector<Callback*> after_backward_;
-
-DISABLE_COPY_AND_ASSIGN(Net);
-};
-
-
-}  // namespace caffe
-
-#endif  // CAFFE_NET_HPP_
diff --git a/include/caffe/syncedmem.hpp b/include/caffe/syncedmem.hpp
deleted file mode 100644
index 317ce29..0000000
--- a/include/caffe/syncedmem.hpp
+++ /dev/null
@@ -1,95 +0,0 @@
-#ifndef CAFFE_SYNCEDMEM_HPP_
-#define CAFFE_SYNCEDMEM_HPP_
-
-#include <cstdlib>
-
-#ifdef USE_MKL
-  #include "mkl.h"
-#endif
-
-#include "caffe/common.hpp"
-
-namespace caffe {
-
-// If CUDA is available and in GPU mode, host memory will be allocated pinned,
-// using cudaMallocHost. It avoids dynamic pinning for transfers (DMA).
-// The improvement in performance seems negligible in the single GPU case,
-// but might be more significant for parallel training. Most importantly,
-// it improved stability for large models on many GPUs.
-inline void CaffeMallocHost(void** ptr, size_t size, bool* use_cuda) {
-#ifndef CPU_ONLY
-  if (Caffe::mode() == Caffe::GPU) {
-    CUDA_CHECK(cudaMallocHost(ptr, size));
-    *use_cuda = true;
-    return;
-  }
-#endif
-#ifdef USE_MKL
-  *ptr = mkl_malloc(size ? size:1, 64);
-#else
-  *ptr = malloc(size);
-#endif
-  *use_cuda = false;
-  CHECK(*ptr) << "host allocation of size " << size << " failed";
-}
-
-inline void CaffeFreeHost(void* ptr, bool use_cuda) {
-#ifndef CPU_ONLY
-  if (use_cuda) {
-    CUDA_CHECK(cudaFreeHost(ptr));
-    return;
-  }
-#endif
-#ifdef USE_MKL
-  mkl_free(ptr);
-#else
-  free(ptr);
-#endif
-}
-
-
-/**
- * @brief Manages memory allocation and synchronization between the host (CPU)
- *        and device (GPU).
- *
- * TODO(dox): more thorough description.
- */
-class SyncedMemory {
- public:
-  SyncedMemory();
-  explicit SyncedMemory(size_t size);
-  ~SyncedMemory();
-  const void* cpu_data();
-  void set_cpu_data(void* data);
-  const void* gpu_data();
-  void set_gpu_data(void* data);
-  void* mutable_cpu_data();
-  void* mutable_gpu_data();
-  enum SyncedHead { UNINITIALIZED, HEAD_AT_CPU, HEAD_AT_GPU, SYNCED };
-  SyncedHead head() { return head_; }
-  size_t size() { return size_; }
-
-#ifndef CPU_ONLY
-  void async_gpu_push(const cudaStream_t& stream);
-#endif
-
- private:
-  void check_device();
-
-  void to_cpu();
-  void to_gpu();
-  void* cpu_ptr_;
-  void* gpu_ptr_;
-  size_t size_;
-  SyncedHead head_;
-  bool own_cpu_data_;
-  bool cpu_malloc_use_cuda_;
-  bool own_gpu_data_;
-  int device_;
-
-  DISABLE_COPY_AND_ASSIGN(SyncedMemory);
-};  // class SyncedMemory
-
-}  // namespace caffe
-
-#endif  // CAFFE_SYNCEDMEM_HPP_
diff --git a/include/caffe/util/benchmark.hpp b/include/caffe/util/benchmark.hpp
deleted file mode 100644
index 4b9ef43..0000000
--- a/include/caffe/util/benchmark.hpp
+++ /dev/null
@@ -1,61 +0,0 @@
-#ifndef CAFFE_UTIL_BENCHMARK_H_
-#define CAFFE_UTIL_BENCHMARK_H_
-
-#ifdef USE_BOOST
-#include <boost/date_time/posix_time/posix_time.hpp>
-#else
-#include <sys/time.h>
-#endif
-
-#include "caffe/util/device_alternate.hpp"
-
-namespace caffe {
-
-class Timer {
- public:
-  Timer();
-  virtual ~Timer();
-  virtual void Start();
-  virtual void Stop();
-  virtual float MilliSeconds();
-  virtual float MicroSeconds();
-  virtual float Seconds();
-
-  inline bool initted() { return initted_; }
-  inline bool running() { return running_; }
-  inline bool has_run_at_least_once() { return has_run_at_least_once_; }
-
- protected:
-  void Init();
-
-  bool initted_;
-  bool running_;
-  bool has_run_at_least_once_;
-#ifndef CPU_ONLY
-  cudaEvent_t start_gpu_;
-  cudaEvent_t stop_gpu_;
-#endif
-#ifdef USE_BOOST
-  boost::posix_time::ptime start_cpu_;
-  boost::posix_time::ptime stop_cpu_;
-#else
-  struct timeval start_cpu_;
-  struct timeval stop_cpu_;
-#endif
-  float elapsed_milliseconds_;
-  float elapsed_microseconds_;
-};
-
-class CPUTimer : public Timer {
- public:
-  explicit CPUTimer();
-  virtual ~CPUTimer() {}
-  virtual void Start();
-  virtual void Stop();
-  virtual float MilliSeconds();
-  virtual float MicroSeconds();
-};
-
-}  // namespace caffe
-
-#endif   // CAFFE_UTIL_BENCHMARK_H_
diff --git a/include/caffe/util/blocking_queue.hpp b/include/caffe/util/blocking_queue.hpp
deleted file mode 100644
index d3de2e5..0000000
--- a/include/caffe/util/blocking_queue.hpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef CAFFE_UTIL_BLOCKING_QUEUE_HPP_
-#define CAFFE_UTIL_BLOCKING_QUEUE_HPP_
-
-#include <queue>
-#include <string>
-
-namespace caffe {
-
-template<typename T>
-class BlockingQueue {
- public:
-  explicit BlockingQueue();
-
-  void push(const T& t);
-
-  bool try_pop(T* t);
-
-  // This logs a message if the threads needs to be blocked
-  // useful for detecting e.g. when data feeding is too slow
-  T pop(const string& log_on_wait = "");
-
-  bool try_peek(T* t);
-
-  // Return element without removing it
-  T peek();
-
-  size_t size() const;
-
- protected:
-  /**
-   Move synchronization fields out instead of including boost/thread.hpp
-   to avoid a boost/NVCC issues (#1009, #1010) on OSX. Also fails on
-   Linux CUDA 7.0.18.
-   */
-  class sync;
-
-  std::queue<T> queue_;
-  shared_ptr<sync> sync_;
-
-DISABLE_COPY_AND_ASSIGN(BlockingQueue);
-};
-
-}  // namespace caffe
-
-#endif
diff --git a/include/caffe/util/cudnn.hpp b/include/caffe/util/cudnn.hpp
deleted file mode 100644
index a7d8dbb..0000000
--- a/include/caffe/util/cudnn.hpp
+++ /dev/null
@@ -1,153 +0,0 @@
-#ifndef CAFFE_UTIL_CUDNN_H_
-#define CAFFE_UTIL_CUDNN_H_
-#ifdef USE_CUDNN
-
-#include <cudnn.h>
-
-#include "caffe/common.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-#define CUDNN_VERSION_MIN(major, minor, patch) \
-    (CUDNN_VERSION >= (major * 1000 + minor * 100 + patch))
-
-#define CUDNN_CHECK(condition) \
-  do { \
-    cudnnStatus_t status = condition; \
-    CHECK_EQ(status, CUDNN_STATUS_SUCCESS) << " "\
-      << cudnnGetErrorString(status); \
-  } while (0)
-
-inline const char* cudnnGetErrorString(cudnnStatus_t status) {
-  switch (status) {
-    case CUDNN_STATUS_SUCCESS:
-      return "CUDNN_STATUS_SUCCESS";
-    case CUDNN_STATUS_NOT_INITIALIZED:
-      return "CUDNN_STATUS_NOT_INITIALIZED";
-    case CUDNN_STATUS_ALLOC_FAILED:
-      return "CUDNN_STATUS_ALLOC_FAILED";
-    case CUDNN_STATUS_BAD_PARAM:
-      return "CUDNN_STATUS_BAD_PARAM";
-    case CUDNN_STATUS_INTERNAL_ERROR:
-      return "CUDNN_STATUS_INTERNAL_ERROR";
-    case CUDNN_STATUS_INVALID_VALUE:
-      return "CUDNN_STATUS_INVALID_VALUE";
-    case CUDNN_STATUS_ARCH_MISMATCH:
-      return "CUDNN_STATUS_ARCH_MISMATCH";
-    case CUDNN_STATUS_MAPPING_ERROR:
-      return "CUDNN_STATUS_MAPPING_ERROR";
-    case CUDNN_STATUS_EXECUTION_FAILED:
-      return "CUDNN_STATUS_EXECUTION_FAILED";
-    case CUDNN_STATUS_NOT_SUPPORTED:
-      return "CUDNN_STATUS_NOT_SUPPORTED";
-    case CUDNN_STATUS_LICENSE_ERROR:
-      return "CUDNN_STATUS_LICENSE_ERROR";
-  }
-  return "Unknown cudnn status";
-}
-
-namespace caffe {
-
-namespace cudnn {
-
-template <typename Dtype> class dataType;
-template<> class dataType<float>  {
- public:
-  static const cudnnDataType_t type = CUDNN_DATA_FLOAT;
-  static float oneval, zeroval;
-  static const void *one, *zero;
-};
-template<> class dataType<double> {
- public:
-  static const cudnnDataType_t type = CUDNN_DATA_DOUBLE;
-  static double oneval, zeroval;
-  static const void *one, *zero;
-};
-
-template <typename Dtype>
-inline void createTensor4dDesc(cudnnTensorDescriptor_t* desc) {
-  CUDNN_CHECK(cudnnCreateTensorDescriptor(desc));
-}
-
-template <typename Dtype>
-inline void setTensor4dDesc(cudnnTensorDescriptor_t* desc,
-    int n, int c, int h, int w,
-    int stride_n, int stride_c, int stride_h, int stride_w) {
-  CUDNN_CHECK(cudnnSetTensor4dDescriptorEx(*desc, dataType<Dtype>::type,
-        n, c, h, w, stride_n, stride_c, stride_h, stride_w));
-}
-
-template <typename Dtype>
-inline void setTensor4dDesc(cudnnTensorDescriptor_t* desc,
-    int n, int c, int h, int w) {
-  const int stride_w = 1;
-  const int stride_h = w * stride_w;
-  const int stride_c = h * stride_h;
-  const int stride_n = c * stride_c;
-  setTensor4dDesc<Dtype>(desc, n, c, h, w,
-                         stride_n, stride_c, stride_h, stride_w);
-}
-
-template <typename Dtype>
-inline void createFilterDesc(cudnnFilterDescriptor_t* desc,
-    int n, int c, int h, int w) {
-  CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));
-#if CUDNN_VERSION_MIN(5, 0, 0)
-  CUDNN_CHECK(cudnnSetFilter4dDescriptor(*desc, dataType<Dtype>::type,
-      CUDNN_TENSOR_NCHW, n, c, h, w));
-#else
-  CUDNN_CHECK(cudnnSetFilter4dDescriptor_v4(*desc, dataType<Dtype>::type,
-      CUDNN_TENSOR_NCHW, n, c, h, w));
-#endif
-}
-
-template <typename Dtype>
-inline void createConvolutionDesc(cudnnConvolutionDescriptor_t* conv) {
-  CUDNN_CHECK(cudnnCreateConvolutionDescriptor(conv));
-}
-
-template <typename Dtype>
-inline void setConvolutionDesc(cudnnConvolutionDescriptor_t* conv,
-    cudnnTensorDescriptor_t bottom, cudnnFilterDescriptor_t filter,
-    int pad_h, int pad_w, int stride_h, int stride_w) {
-  CUDNN_CHECK(cudnnSetConvolution2dDescriptor(*conv,
-      pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION));
-}
-
-template <typename Dtype>
-inline void createPoolingDesc(cudnnPoolingDescriptor_t* pool_desc,
-    PoolingParameter_PoolMethod poolmethod, cudnnPoolingMode_t* mode,
-    int h, int w, int pad_h, int pad_w, int stride_h, int stride_w) {
-  switch (poolmethod) {
-  case PoolingParameter_PoolMethod_MAX:
-    *mode = CUDNN_POOLING_MAX;
-    break;
-  case PoolingParameter_PoolMethod_AVE:
-    *mode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
-    break;
-  default:
-    LOG(FATAL) << "Unknown pooling method.";
-  }
-  CUDNN_CHECK(cudnnCreatePoolingDescriptor(pool_desc));
-#if CUDNN_VERSION_MIN(5, 0, 0)
-  CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode,
-        CUDNN_PROPAGATE_NAN, h, w, pad_h, pad_w, stride_h, stride_w));
-#else
-  CUDNN_CHECK(cudnnSetPooling2dDescriptor_v4(*pool_desc, *mode,
-        CUDNN_PROPAGATE_NAN, h, w, pad_h, pad_w, stride_h, stride_w));
-#endif
-}
-
-template <typename Dtype>
-inline void createActivationDescriptor(cudnnActivationDescriptor_t* activ_desc,
-    cudnnActivationMode_t mode) {
-  CUDNN_CHECK(cudnnCreateActivationDescriptor(activ_desc));
-  CUDNN_CHECK(cudnnSetActivationDescriptor(*activ_desc, mode,
-                                           CUDNN_PROPAGATE_NAN, Dtype(0)));
-}
-
-}  // namespace cudnn
-
-}  // namespace caffe
-
-#endif  // USE_CUDNN
-#endif  // CAFFE_UTIL_CUDNN_H_
diff --git a/include/caffe/util/device_alternate.hpp b/include/caffe/util/device_alternate.hpp
deleted file mode 100644
index e3fe4fe..0000000
--- a/include/caffe/util/device_alternate.hpp
+++ /dev/null
@@ -1,96 +0,0 @@
-#ifndef CAFFE_UTIL_DEVICE_ALTERNATE_H_
-#define CAFFE_UTIL_DEVICE_ALTERNATE_H_
-
-#ifdef CPU_ONLY  // CPU-only Caffe.
-
-#include <vector>
-
-// Stub out GPU calls as unavailable.
-
-#define NO_GPU LOG(FATAL) << "Cannot use GPU in CPU-only Caffe: check mode."
-
-#define STUB_GPU(classname) \
-template <typename Dtype> \
-void classname<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, \
-    const vector<Blob<Dtype>*>& top) { NO_GPU; } \
-template <typename Dtype> \
-void classname<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, \
-    const vector<bool>& propagate_down, \
-    const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
-
-#define STUB_GPU_FORWARD(classname, funcname) \
-template <typename Dtype> \
-void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& bottom, \
-    const vector<Blob<Dtype>*>& top) { NO_GPU; } \
-
-#define STUB_GPU_BACKWARD(classname, funcname) \
-template <typename Dtype> \
-void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
-    const vector<bool>& propagate_down, \
-    const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
-
-#else  // Normal GPU + CPU Caffe.
-
-#include <cublas_v2.h>
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <curand.h>
-#include <driver_types.h>  // cuda driver types
-#ifdef USE_CUDNN  // cuDNN acceleration library.
-#include "caffe/util/cudnn.hpp"
-#endif
-
-//
-// CUDA macros
-//
-
-// CUDA: various checks for different function calls.
-#define CUDA_CHECK(condition) \
-  /* Code block avoids redefinition of cudaError_t error */ \
-  do { \
-    cudaError_t error = condition; \
-    CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
-  } while (0)
-
-#define CUBLAS_CHECK(condition) \
-  do { \
-    cublasStatus_t status = condition; \
-    CHECK_EQ(status, CUBLAS_STATUS_SUCCESS) << " " \
-      << caffe::cublasGetErrorString(status); \
-  } while (0)
-
-#define CURAND_CHECK(condition) \
-  do { \
-    curandStatus_t status = condition; \
-    CHECK_EQ(status, CURAND_STATUS_SUCCESS) << " " \
-      << caffe::curandGetErrorString(status); \
-  } while (0)
-
-// CUDA: grid stride looping
-#define CUDA_KERNEL_LOOP(i, n) \
-  for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
-       i < (n); \
-       i += blockDim.x * gridDim.x)
-
-// CUDA: check for error after kernel execution and exit loudly if there is one.
-#define CUDA_POST_KERNEL_CHECK CUDA_CHECK(cudaPeekAtLastError())
-
-namespace caffe {
-
-// CUDA: library error reporting.
-const char* cublasGetErrorString(cublasStatus_t error);
-const char* curandGetErrorString(curandStatus_t error);
-
-// CUDA: use 512 threads per block
-const int CAFFE_CUDA_NUM_THREADS = 512;
-
-// CUDA: number of blocks for threads.
-inline int CAFFE_GET_BLOCKS(const int N) {
-  return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS;
-}
-
-}  // namespace caffe
-
-#endif  // CPU_ONLY
-
-#endif  // CAFFE_UTIL_DEVICE_ALTERNATE_H_
diff --git a/include/caffe/util/format.hpp b/include/caffe/util/format.hpp
deleted file mode 100644
index 925ad2e..0000000
--- a/include/caffe/util/format.hpp
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef CAFFE_UTIL_FORMAT_H_
-#define CAFFE_UTIL_FORMAT_H_
-
-#include <iomanip>  // NOLINT(readability/streams)
-#include <sstream>  // NOLINT(readability/streams)
-#include <string>
-
-namespace caffe {
-
-inline std::string format_int(int n, int numberOfLeadingZeros = 0 ) {
-  std::ostringstream s;
-  s << std::setw(numberOfLeadingZeros) << std::setfill('0') << n;
-  return s.str();
-}
-
-}
-
-#endif   // CAFFE_UTIL_FORMAT_H_
diff --git a/include/caffe/util/im2col.hpp b/include/caffe/util/im2col.hpp
deleted file mode 100644
index a35bc6e..0000000
--- a/include/caffe/util/im2col.hpp
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef _CAFFE_UTIL_IM2COL_HPP_
-#define _CAFFE_UTIL_IM2COL_HPP_
-
-namespace caffe {
-
-template <typename Dtype>
-void im2col_nd_cpu(const Dtype* data_im, const int num_spatial_axes,
-    const int* im_shape, const int* col_shape,
-    const int* kernel_shape, const int* pad, const int* stride,
-    const int* dilation, Dtype* data_col);
-
-template <typename Dtype>
-void im2col_cpu(const Dtype* data_im, const int channels,
-    const int height, const int width, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w, const int stride_h,
-    const int stride_w, const int dilation_h, const int dilation_w,
-    Dtype* data_col);
-
-template <typename Dtype>
-void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes,
-    const int* im_shape, const int* col_shape,
-    const int* kernel_shape, const int* pad, const int* stride,
-    const int* dilation, Dtype* data_im);
-
-template <typename Dtype>
-void col2im_cpu(const Dtype* data_col, const int channels,
-    const int height, const int width, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w, const int stride_h,
-    const int stride_w, const int dilation_h, const int dilation_w,
-    Dtype* data_im);
-
-template <typename Dtype>
-void im2col_nd_gpu(const Dtype* data_im, const int num_spatial_axes,
-    const int col_size, const int* im_shape, const int* col_shape,
-    const int* kernel_shape, const int* pad, const int* stride,
-    const int* dilation, Dtype* data_col);
-
-template <typename Dtype>
-void im2col_gpu(const Dtype* data_im, const int channels,
-    const int height, const int width, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w, const int stride_h,
-    const int stride_w, const int dilation_h, const int dilation_w,
-    Dtype* data_col);
-
-template <typename Dtype>
-void col2im_nd_gpu(const Dtype* data_col, const int num_spatial_axes,
-    const int im_size, const int* im_shape, const int* col_shape,
-    const int* kernel_shape, const int* pad, const int* stride,
-    const int* dilation, Dtype* data_im);
-
-template <typename Dtype>
-void col2im_gpu(const Dtype* data_col, const int channels,
-    const int height, const int width, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w, const int stride_h,
-    const int stride_w, const int dilation_h, const int dilation_w,
-    Dtype* data_im);
-
-}  // namespace caffe
-
-#endif  // CAFFE_UTIL_IM2COL_HPP_
diff --git a/include/caffe/util/insert_splits.hpp b/include/caffe/util/insert_splits.hpp
deleted file mode 100644
index 446abb8..0000000
--- a/include/caffe/util/insert_splits.hpp
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef _CAFFE_UTIL_INSERT_SPLITS_HPP_
-#define _CAFFE_UTIL_INSERT_SPLITS_HPP_
-
-#include <string>
-
-#include "caffe/proto/caffe.pb.h"
-
-namespace caffe {
-
-// Copy NetParameters with SplitLayers added to replace any shared bottom
-// blobs with unique bottom blobs provided by the SplitLayer.
-void InsertSplits(const NetParameter& param, NetParameter* param_split);
-
-void ConfigureSplitLayer(const string& layer_name, const string& blob_name,
-    const int blob_idx, const int split_count, const float loss_weight,
-    LayerParameter* split_layer_param);
-
-string SplitLayerName(const string& layer_name, const string& blob_name,
-    const int blob_idx);
-
-string SplitBlobName(const string& layer_name, const string& blob_name,
-    const int blob_idx, const int split_idx);
-
-}  // namespace caffe
-
-#endif  // CAFFE_UTIL_INSERT_SPLITS_HPP_
diff --git a/include/caffe/util/io.hpp b/include/caffe/util/io.hpp
deleted file mode 100644
index 4db77e3..0000000
--- a/include/caffe/util/io.hpp
+++ /dev/null
@@ -1,156 +0,0 @@
-#ifndef CAFFE_UTIL_IO_H_
-#define CAFFE_UTIL_IO_H_
-
-#ifdef USE_BOOST
-#include <boost/filesystem.hpp>
-#endif
-#include <iomanip>
-#include <iostream>  // NOLINT(readability/streams)
-#include <string>
-
-#include "google/protobuf/message.h"
-
-#include "caffe/common.hpp"
-#include "caffe/proto/caffe.pb.h"
-#include "caffe/util/format.hpp"
-
-#ifndef CAFFE_TMP_DIR_RETRIES
-#define CAFFE_TMP_DIR_RETRIES 100
-#endif
-
-namespace caffe {
-
-using ::google::protobuf::Message;
-#ifdef USE_BOOST
-using ::boost::filesystem::path;
-
-inline void MakeTempDir(string* temp_dirname) {
-  temp_dirname->clear();
-  const path& model =
-    boost::filesystem::temp_directory_path()/"caffe_test.%%%%-%%%%";
-  for ( int i = 0; i < CAFFE_TMP_DIR_RETRIES; i++ ) {
-    const path& dir = boost::filesystem::unique_path(model).string();
-    bool done = boost::filesystem::create_directory(dir);
-    if ( done ) {
-      *temp_dirname = dir.string();
-      return;
-    }
-  }
-  LOG(FATAL) << "Failed to create a temporary directory.";
-}
-
-inline void MakeTempFilename(string* temp_filename) {
-  static path temp_files_subpath;
-  static uint64_t next_temp_file = 0;
-  temp_filename->clear();
-  if ( temp_files_subpath.empty() ) {
-    string path_string="";
-    MakeTempDir(&path_string);
-    temp_files_subpath = path_string;
-  }
-  *temp_filename =
-    (temp_files_subpath/caffe::format_int(next_temp_file++, 9)).string();
-}
-#endif
-
-bool ReadProtoFromTextFile(const char* filename, Message* proto);
-
-inline bool ReadProtoFromTextFile(const string& filename, Message* proto) {
-  return ReadProtoFromTextFile(filename.c_str(), proto);
-}
-
-inline void ReadProtoFromTextFileOrDie(const char* filename, Message* proto) {
-  CHECK(ReadProtoFromTextFile(filename, proto));
-}
-
-inline void ReadProtoFromTextFileOrDie(const string& filename, Message* proto) {
-  ReadProtoFromTextFileOrDie(filename.c_str(), proto);
-}
-
-void WriteProtoToTextFile(const Message& proto, const char* filename);
-inline void WriteProtoToTextFile(const Message& proto, const string& filename) {
-  WriteProtoToTextFile(proto, filename.c_str());
-}
-
-bool ReadProtoFromBinaryFile(const char* filename, Message* proto);
-
-inline bool ReadProtoFromBinaryFile(const string& filename, Message* proto) {
-  return ReadProtoFromBinaryFile(filename.c_str(), proto);
-}
-
-inline void ReadProtoFromBinaryFileOrDie(const char* filename, Message* proto) {
-  CHECK(ReadProtoFromBinaryFile(filename, proto));
-}
-
-inline void ReadProtoFromBinaryFileOrDie(const string& filename,
-                                         Message* proto) {
-  ReadProtoFromBinaryFileOrDie(filename.c_str(), proto);
-}
-
-
-void WriteProtoToBinaryFile(const Message& proto, const char* filename);
-inline void WriteProtoToBinaryFile(
-    const Message& proto, const string& filename) {
-  WriteProtoToBinaryFile(proto, filename.c_str());
-}
-
-bool ReadFileToDatum(const string& filename, const int label, Datum* datum);
-
-inline bool ReadFileToDatum(const string& filename, Datum* datum) {
-  return ReadFileToDatum(filename, -1, datum);
-}
-
-bool ReadImageToDatum(const string& filename, const int label,
-    const int height, const int width, const bool is_color,
-    const std::string & encoding, Datum* datum);
-
-inline bool ReadImageToDatum(const string& filename, const int label,
-    const int height, const int width, const bool is_color, Datum* datum) {
-  return ReadImageToDatum(filename, label, height, width, is_color,
-                          "", datum);
-}
-
-inline bool ReadImageToDatum(const string& filename, const int label,
-    const int height, const int width, Datum* datum) {
-  return ReadImageToDatum(filename, label, height, width, true, datum);
-}
-
-inline bool ReadImageToDatum(const string& filename, const int label,
-    const bool is_color, Datum* datum) {
-  return ReadImageToDatum(filename, label, 0, 0, is_color, datum);
-}
-
-inline bool ReadImageToDatum(const string& filename, const int label,
-    Datum* datum) {
-  return ReadImageToDatum(filename, label, 0, 0, true, datum);
-}
-
-inline bool ReadImageToDatum(const string& filename, const int label,
-    const std::string & encoding, Datum* datum) {
-  return ReadImageToDatum(filename, label, 0, 0, true, encoding, datum);
-}
-
-bool DecodeDatumNative(Datum* datum);
-bool DecodeDatum(Datum* datum, bool is_color);
-
-#ifdef USE_OPENCV
-cv::Mat ReadImageToCVMat(const string& filename,
-    const int height, const int width, const bool is_color);
-
-cv::Mat ReadImageToCVMat(const string& filename,
-    const int height, const int width);
-
-cv::Mat ReadImageToCVMat(const string& filename,
-    const bool is_color);
-
-cv::Mat ReadImageToCVMat(const string& filename);
-
-cv::Mat DecodeDatumToCVMatNative(const Datum& datum);
-cv::Mat DecodeDatumToCVMat(const Datum& datum, bool is_color);
-
-void CVMatToDatum(const cv::Mat& cv_img, Datum* datum);
-#endif  // USE_OPENCV
-
-}  // namespace caffe
-
-#endif   // CAFFE_UTIL_IO_H_
diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp
deleted file mode 100644
index 9b8e47f..0000000
--- a/include/caffe/util/math_functions.hpp
+++ /dev/null
@@ -1,283 +0,0 @@
-#ifndef CAFFE_UTIL_MATH_FUNCTIONS_H_
-#define CAFFE_UTIL_MATH_FUNCTIONS_H_
-
-#include <stdint.h>
-#include <cmath>  // for std::fabs and std::signbit
-
-#ifdef USE_GLOG
-#include "glog/logging.h"
-#endif
-#ifndef NO_CAFFE_MOBILE
-#include <string.h>
-#endif
-
-#include "caffe/common.hpp"
-#include "caffe/util/device_alternate.hpp"
-#include "caffe/util/mkl_alternate.hpp"
-
-namespace caffe {
-
-// Caffe gemm provides a simpler interface to the gemm functions, with the
-// limitation that the data has to be contiguous in memory.
-template <typename Dtype>
-void caffe_cpu_gemm(const CBLAS_TRANSPOSE TransA,
-    const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
-    const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta,
-    Dtype* C);
-
-template <typename Dtype>
-void caffe_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
-    const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta,
-    Dtype* y);
-
-template <typename Dtype>
-void caffe_axpy(const int N, const Dtype alpha, const Dtype* X,
-    Dtype* Y);
-
-template <typename Dtype>
-void caffe_cpu_axpby(const int N, const Dtype alpha, const Dtype* X,
-    const Dtype beta, Dtype* Y);
-
-template <typename Dtype>
-void caffe_copy(const int N, const Dtype *X, Dtype *Y);
-
-template <typename Dtype>
-void caffe_set(const int N, const Dtype alpha, Dtype *X);
-
-inline void caffe_memset(const size_t N, const int alpha, void* X) {
-  memset(X, alpha, N);  // NOLINT(caffe/alt_fn)
-}
-
-template <typename Dtype>
-void caffe_add_scalar(const int N, const Dtype alpha, Dtype *X);
-
-template <typename Dtype>
-void caffe_scal(const int N, const Dtype alpha, Dtype *X);
-
-template <typename Dtype>
-void caffe_sqr(const int N, const Dtype* a, Dtype* y);
-
-template <typename Dtype>
-void caffe_add(const int N, const Dtype* a, const Dtype* b, Dtype* y);
-
-template <typename Dtype>
-void caffe_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y);
-
-template <typename Dtype>
-void caffe_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y);
-
-template <typename Dtype>
-void caffe_div(const int N, const Dtype* a, const Dtype* b, Dtype* y);
-
-template <typename Dtype>
-void caffe_powx(const int n, const Dtype* a, const Dtype b, Dtype* y);
-
-unsigned int caffe_rng_rand();
-
-template <typename Dtype>
-Dtype caffe_nextafter(const Dtype b);
-
-template <typename Dtype>
-void caffe_rng_uniform(const int n, const Dtype a, const Dtype b, Dtype* r);
-
-template <typename Dtype>
-void caffe_rng_gaussian(const int n, const Dtype mu, const Dtype sigma,
-                        Dtype* r);
-
-template <typename Dtype>
-void caffe_rng_bernoulli(const int n, const Dtype p, int* r);
-
-template <typename Dtype>
-void caffe_rng_bernoulli(const int n, const Dtype p, unsigned int* r);
-
-template <typename Dtype>
-void caffe_exp(const int n, const Dtype* a, Dtype* y);
-
-template <typename Dtype>
-void caffe_log(const int n, const Dtype* a, Dtype* y);
-
-template <typename Dtype>
-void caffe_abs(const int n, const Dtype* a, Dtype* y);
-
-template <typename Dtype>
-Dtype caffe_cpu_dot(const int n, const Dtype* x, const Dtype* y);
-
-template <typename Dtype>
-Dtype caffe_cpu_strided_dot(const int n, const Dtype* x, const int incx,
-    const Dtype* y, const int incy);
-
-// Returns the sum of the absolute values of the elements of vector x
-template <typename Dtype>
-Dtype caffe_cpu_asum(const int n, const Dtype* x);
-
-// the branchless, type-safe version from
-// http://stackoverflow.com/questions/1903954/is-there-a-standard-sign-function-signum-sgn-in-c-c
-template<typename Dtype>
-inline int8_t caffe_sign(Dtype val) {
-  return (Dtype(0) < val) - (val < Dtype(0));
-}
-
-// The following two macros are modifications of DEFINE_VSL_UNARY_FUNC
-//   in include/caffe/util/mkl_alternate.hpp authored by @Rowland Depp.
-// Please refer to commit 7e8ef25c7 of the boost-eigen branch.
-// Git cherry picking that commit caused a conflict hard to resolve and
-//   copying that file in convenient for code reviewing.
-// So they have to be pasted here temporarily.
-#define DEFINE_CAFFE_CPU_UNARY_FUNC(name, operation) \
-  template<typename Dtype> \
-  void caffe_cpu_##name(const int n, const Dtype* x, Dtype* y) { \
-    CHECK_GT(n, 0); CHECK(x); CHECK(y); \
-    for (int i = 0; i < n; ++i) { \
-      operation; \
-    } \
-  }
-
-// output is 1 for the positives, 0 for zero, and -1 for the negatives
-DEFINE_CAFFE_CPU_UNARY_FUNC(sign, y[i] = caffe_sign<Dtype>(x[i]));
-
-// This returns a nonzero value if the input has its sign bit set.
-// The name sngbit is meant to avoid conflicts with std::signbit in the macro.
-// The extra parens are needed because CUDA < 6.5 defines signbit as a macro,
-// and we don't want that to expand here when CUDA headers are also included.
-DEFINE_CAFFE_CPU_UNARY_FUNC(sgnbit, \
-    y[i] = static_cast<bool>((std::signbit)(x[i])));
-
-DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i]));
-
-template <typename Dtype>
-void caffe_cpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y);
-
-#ifndef CPU_ONLY  // GPU
-
-// Decaf gpu gemm provides an interface that is almost the same as the cpu
-// gemm function - following the c convention and calling the fortran-order
-// gpu code under the hood.
-template <typename Dtype>
-void caffe_gpu_gemm(const CBLAS_TRANSPOSE TransA,
-    const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
-    const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta,
-    Dtype* C);
-
-template <typename Dtype>
-void caffe_gpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N,
-    const Dtype alpha, const Dtype* A, const Dtype* x, const Dtype beta,
-    Dtype* y);
-
-template <typename Dtype>
-void caffe_gpu_axpy(const int N, const Dtype alpha, const Dtype* X,
-    Dtype* Y);
-
-template <typename Dtype>
-void caffe_gpu_axpby(const int N, const Dtype alpha, const Dtype* X,
-    const Dtype beta, Dtype* Y);
-
-void caffe_gpu_memcpy(const size_t N, const void *X, void *Y);
-
-template <typename Dtype>
-void caffe_gpu_set(const int N, const Dtype alpha, Dtype *X);
-
-inline void caffe_gpu_memset(const size_t N, const int alpha, void* X) {
-#ifndef CPU_ONLY
-  CUDA_CHECK(cudaMemset(X, alpha, N));  // NOLINT(caffe/alt_fn)
-#else
-  NO_GPU;
-#endif
-}
-
-template <typename Dtype>
-void caffe_gpu_add_scalar(const int N, const Dtype alpha, Dtype *X);
-
-template <typename Dtype>
-void caffe_gpu_scal(const int N, const Dtype alpha, Dtype *X);
-
-#ifndef CPU_ONLY
-template <typename Dtype>
-void caffe_gpu_scal(const int N, const Dtype alpha, Dtype* X, cudaStream_t str);
-#endif
-
-template <typename Dtype>
-void caffe_gpu_add(const int N, const Dtype* a, const Dtype* b, Dtype* y);
-
-template <typename Dtype>
-void caffe_gpu_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y);
-
-template <typename Dtype>
-void caffe_gpu_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y);
-
-template <typename Dtype>
-void caffe_gpu_div(const int N, const Dtype* a, const Dtype* b, Dtype* y);
-
-template <typename Dtype>
-void caffe_gpu_abs(const int n, const Dtype* a, Dtype* y);
-
-template <typename Dtype>
-void caffe_gpu_exp(const int n, const Dtype* a, Dtype* y);
-
-template <typename Dtype>
-void caffe_gpu_log(const int n, const Dtype* a, Dtype* y);
-
-template <typename Dtype>
-void caffe_gpu_powx(const int n, const Dtype* a, const Dtype b, Dtype* y);
-
-// caffe_gpu_rng_uniform with two arguments generates integers in the range
-// [0, UINT_MAX].
-void caffe_gpu_rng_uniform(const int n, unsigned int* r);
-
-// caffe_gpu_rng_uniform with four arguments generates floats in the range
-// (a, b] (strictly greater than a, less than or equal to b) due to the
-// specification of curandGenerateUniform.  With a = 0, b = 1, just calls
-// curandGenerateUniform; with other limits will shift and scale the outputs
-// appropriately after calling curandGenerateUniform.
-template <typename Dtype>
-void caffe_gpu_rng_uniform(const int n, const Dtype a, const Dtype b, Dtype* r);
-
-template <typename Dtype>
-void caffe_gpu_rng_gaussian(const int n, const Dtype mu, const Dtype sigma,
-                            Dtype* r);
-
-template <typename Dtype>
-void caffe_gpu_rng_bernoulli(const int n, const Dtype p, int* r);
-
-template <typename Dtype>
-void caffe_gpu_dot(const int n, const Dtype* x, const Dtype* y, Dtype* out);
-
-template <typename Dtype>
-void caffe_gpu_asum(const int n, const Dtype* x, Dtype* y);
-
-template<typename Dtype>
-void caffe_gpu_sign(const int n, const Dtype* x, Dtype* y);
-
-template<typename Dtype>
-void caffe_gpu_sgnbit(const int n, const Dtype* x, Dtype* y);
-
-template <typename Dtype>
-void caffe_gpu_fabs(const int n, const Dtype* x, Dtype* y);
-
-template <typename Dtype>
-void caffe_gpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y);
-
-#define DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(name, operation) \
-template<typename Dtype> \
-__global__ void name##_kernel(const int n, const Dtype* x, Dtype* y) { \
-  CUDA_KERNEL_LOOP(index, n) { \
-    operation; \
-  } \
-} \
-template <> \
-void caffe_gpu_##name<float>(const int n, const float* x, float* y) { \
-  /* NOLINT_NEXT_LINE(whitespace/operators) */ \
-  name##_kernel<float><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \
-      n, x, y); \
-} \
-template <> \
-void caffe_gpu_##name<double>(const int n, const double* x, double* y) { \
-  /* NOLINT_NEXT_LINE(whitespace/operators) */ \
-  name##_kernel<double><<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>( \
-      n, x, y); \
-}
-
-#endif  // !CPU_ONLY
-
-}  // namespace caffe
-
-#endif  // CAFFE_UTIL_MATH_FUNCTIONS_H_
diff --git a/include/caffe/util/mkl_alternate.hpp b/include/caffe/util/mkl_alternate.hpp
deleted file mode 100644
index 95df0f9..0000000
--- a/include/caffe/util/mkl_alternate.hpp
+++ /dev/null
@@ -1,102 +0,0 @@
-#ifndef CAFFE_UTIL_MKL_ALTERNATE_H_
-#define CAFFE_UTIL_MKL_ALTERNATE_H_
-
-#ifdef USE_MKL
-
-#include <mkl.h>
-
-#else  // If use MKL, simply include the MKL header
-
-#ifdef USE_ACCELERATE
-#include <Accelerate/Accelerate.h>
-#else
-extern "C" {
-#include <cblas.h>
-}
-#endif  // USE_ACCELERATE
-
-#include <math.h>
-
-// Functions that caffe uses but are not present if MKL is not linked.
-
-// A simple way to define the vsl unary functions. The operation should
-// be in the form e.g. y[i] = sqrt(a[i])
-#define DEFINE_VSL_UNARY_FUNC(name, operation) \
-  template<typename Dtype> \
-  void v##name(const int n, const Dtype* a, Dtype* y) { \
-    CHECK_GT(n, 0); CHECK(a); CHECK(y); \
-    for (int i = 0; i < n; ++i) { operation; } \
-  } \
-  inline void vs##name( \
-    const int n, const float* a, float* y) { \
-    v##name<float>(n, a, y); \
-  } \
-  inline void vd##name( \
-      const int n, const double* a, double* y) { \
-    v##name<double>(n, a, y); \
-  }
-
-DEFINE_VSL_UNARY_FUNC(Sqr, y[i] = a[i] * a[i]);
-DEFINE_VSL_UNARY_FUNC(Exp, y[i] = exp(a[i]));
-DEFINE_VSL_UNARY_FUNC(Ln, y[i] = log(a[i]));
-DEFINE_VSL_UNARY_FUNC(Abs, y[i] = fabs(a[i]));
-
-// A simple way to define the vsl unary functions with singular parameter b.
-// The operation should be in the form e.g. y[i] = pow(a[i], b)
-#define DEFINE_VSL_UNARY_FUNC_WITH_PARAM(name, operation) \
-  template<typename Dtype> \
-  void v##name(const int n, const Dtype* a, const Dtype b, Dtype* y) { \
-    CHECK_GT(n, 0); CHECK(a); CHECK(y); \
-    for (int i = 0; i < n; ++i) { operation; } \
-  } \
-  inline void vs##name( \
-    const int n, const float* a, const float b, float* y) { \
-    v##name<float>(n, a, b, y); \
-  } \
-  inline void vd##name( \
-      const int n, const double* a, const float b, double* y) { \
-    v##name<double>(n, a, b, y); \
-  }
-
-DEFINE_VSL_UNARY_FUNC_WITH_PARAM(Powx, y[i] = pow(a[i], b));
-
-// A simple way to define the vsl binary functions. The operation should
-// be in the form e.g. y[i] = a[i] + b[i]
-#define DEFINE_VSL_BINARY_FUNC(name, operation) \
-  template<typename Dtype> \
-  void v##name(const int n, const Dtype* a, const Dtype* b, Dtype* y) { \
-    CHECK_GT(n, 0); CHECK(a); CHECK(b); CHECK(y); \
-    for (int i = 0; i < n; ++i) { operation; } \
-  } \
-  inline void vs##name( \
-    const int n, const float* a, const float* b, float* y) { \
-    v##name<float>(n, a, b, y); \
-  } \
-  inline void vd##name( \
-      const int n, const double* a, const double* b, double* y) { \
-    v##name<double>(n, a, b, y); \
-  }
-
-DEFINE_VSL_BINARY_FUNC(Add, y[i] = a[i] + b[i]);
-DEFINE_VSL_BINARY_FUNC(Sub, y[i] = a[i] - b[i]);
-DEFINE_VSL_BINARY_FUNC(Mul, y[i] = a[i] * b[i]);
-DEFINE_VSL_BINARY_FUNC(Div, y[i] = a[i] / b[i]);
-
-// In addition, MKL comes with an additional function axpby that is not present
-// in standard blas. We will simply use a two-step (inefficient, of course) way
-// to mimic that.
-inline void cblas_saxpby(const int N, const float alpha, const float* X,
-                         const int incX, const float beta, float* Y,
-                         const int incY) {
-  cblas_sscal(N, beta, Y, incY);
-  cblas_saxpy(N, alpha, X, incX, Y, incY);
-}
-inline void cblas_daxpby(const int N, const double alpha, const double* X,
-                         const int incX, const double beta, double* Y,
-                         const int incY) {
-  cblas_dscal(N, beta, Y, incY);
-  cblas_daxpy(N, alpha, X, incX, Y, incY);
-}
-
-#endif  // USE_MKL
-#endif  // CAFFE_UTIL_MKL_ALTERNATE_H_
diff --git a/include/caffe/util/rng.hpp b/include/caffe/util/rng.hpp
deleted file mode 100644
index 7526470..0000000
--- a/include/caffe/util/rng.hpp
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef CAFFE_RNG_CPP_HPP_
-#define CAFFE_RNG_CPP_HPP_
-
-#include <algorithm>
-#include <iterator>
-
-#ifdef USE_BOOST
-#FIXME extract random generator from boost
-#include "boost/random/mersenne_twister.hpp"
-#include "boost/random/uniform_int.hpp"
-#else
-#include <random>
-#endif
-
-#include "caffe/common.hpp"
-
-namespace caffe {
-
-#ifdef USE_BOOST
-typedef boost::mt19937 rng_t;
-#else
-typedef std::mt19937   rng_t;
-#endif
-
-inline rng_t* caffe_rng() {
-  return static_cast<caffe::rng_t*>(Caffe::rng_stream().generator());
-}
-
-// Fisher–Yates algorithm
-template <class RandomAccessIterator, class RandomGenerator>
-inline void shuffle(RandomAccessIterator begin, RandomAccessIterator end,
-                    RandomGenerator* gen) {
-#ifdef NO_CAFFE_MOBILE
-  typedef typename std::iterator_traits<RandomAccessIterator>::difference_type
-      difference_type;
-  typedef typename boost::uniform_int<difference_type> dist_type;
-
-  difference_type length = std::distance(begin, end);
-  if (length <= 0) return;
-
-  for (difference_type i = length - 1; i > 0; --i) {
-    dist_type dist(0, i);
-    std::iter_swap(begin + i, begin + dist(*gen));
-  }
-#else
-  NOT_IMPLEMENTED;
-#endif
-}
-
-template <class RandomAccessIterator>
-inline void shuffle(RandomAccessIterator begin, RandomAccessIterator end) {
-#ifdef NO_CAFFE_MOBILE
-  shuffle(begin, end, caffe_rng());
-#else
-  NOT_IMPLEMENTED;
-#endif
-}
-}  // namespace caffe
-
-#endif  // CAFFE_RNG_HPP_
diff --git a/include/caffe/util/upgrade_proto.hpp b/include/caffe/util/upgrade_proto.hpp
deleted file mode 100644
index b145822..0000000
--- a/include/caffe/util/upgrade_proto.hpp
+++ /dev/null
@@ -1,88 +0,0 @@
-#ifndef CAFFE_UTIL_UPGRADE_PROTO_H_
-#define CAFFE_UTIL_UPGRADE_PROTO_H_
-
-#include <string>
-
-#include "caffe/proto/caffe.pb.h"
-
-namespace caffe {
-
-// Return true iff the net is not the current version.
-bool NetNeedsUpgrade(const NetParameter& net_param);
-
-// Check for deprecations and upgrade the NetParameter as needed.
-bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param);
-
-// Read parameters from a file into a NetParameter proto message.
-void ReadNetParamsFromTextFileOrDie(const string& param_file,
-                                    NetParameter* param);
-void ReadNetParamsFromBinaryFileOrDie(const string& param_file,
-                                      NetParameter* param);
-
-// Return true iff any layer contains parameters specified using
-// deprecated V0LayerParameter.
-bool NetNeedsV0ToV1Upgrade(const NetParameter& net_param);
-
-// Perform all necessary transformations to upgrade a V0NetParameter into a
-// NetParameter (including upgrading padding layers and LayerParameters).
-bool UpgradeV0Net(const NetParameter& v0_net_param, NetParameter* net_param);
-
-// Upgrade NetParameter with padding layers to pad-aware conv layers.
-// For any padding layer, remove it and put its pad parameter in any layers
-// taking its top blob as input.
-// Error if any of these above layers are not-conv layers.
-void UpgradeV0PaddingLayers(const NetParameter& param,
-                            NetParameter* param_upgraded_pad);
-
-// Upgrade a single V0LayerConnection to the V1LayerParameter format.
-bool UpgradeV0LayerParameter(const V1LayerParameter& v0_layer_connection,
-                             V1LayerParameter* layer_param);
-
-V1LayerParameter_LayerType UpgradeV0LayerType(const string& type);
-
-// Return true iff any layer contains deprecated data transformation parameters.
-bool NetNeedsDataUpgrade(const NetParameter& net_param);
-
-// Perform all necessary transformations to upgrade old transformation fields
-// into a TransformationParameter.
-void UpgradeNetDataTransformation(NetParameter* net_param);
-
-// Return true iff the Net contains any layers specified as V1LayerParameters.
-bool NetNeedsV1ToV2Upgrade(const NetParameter& net_param);
-
-// Perform all necessary transformations to upgrade a NetParameter with
-// deprecated V1LayerParameters.
-bool UpgradeV1Net(const NetParameter& v1_net_param, NetParameter* net_param);
-
-bool UpgradeV1LayerParameter(const V1LayerParameter& v1_layer_param,
-                             LayerParameter* layer_param);
-
-const char* UpgradeV1LayerType(const V1LayerParameter_LayerType type);
-
-// Return true iff the Net contains input fields.
-bool NetNeedsInputUpgrade(const NetParameter& net_param);
-
-// Perform all necessary transformations to upgrade input fields into layers.
-void UpgradeNetInput(NetParameter* net_param);
-
-// Return true iff the Net contains batch norm layers with manual local LRs.
-bool NetNeedsBatchNormUpgrade(const NetParameter& net_param);
-
-// Perform all necessary transformations to upgrade batch norm layers.
-void UpgradeNetBatchNorm(NetParameter* net_param);
-
-// Return true iff the solver contains any old solver_type specified as enums
-bool SolverNeedsTypeUpgrade(const SolverParameter& solver_param);
-
-bool UpgradeSolverType(SolverParameter* solver_param);
-
-// Check for deprecations and upgrade the SolverParameter as needed.
-bool UpgradeSolverAsNeeded(const string& param_file, SolverParameter* param);
-
-// Read parameters from a file into a SolverParameter proto message.
-void ReadSolverParamsFromTextFileOrDie(const string& param_file,
-                                       SolverParameter* param);
-
-}  // namespace caffe
-
-#endif   // CAFFE_UTIL_UPGRADE_PROTO_H_
diff --git a/patch/diff.py b/patch/diff.py
deleted file mode 100755
index b48d51a..0000000
--- a/patch/diff.py
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env python
-import os
-caffe_dir='caffe'
-
-# @brief Diff
-def dir_diff(left_dir, right_dir, output_dir):
-    for root, dirs, files in os.walk(right_dir):
-        for f in files:
-            right_file = os.path.join(root, f)
-            if f.find('hpp') != -1 or f.find('cpp') != -1 or f.find('proto') != -1:
-                relpath = os.path.relpath(right_file, right_dir)
-                left_file = os.path.join(left_dir, relpath)
-                output_file = os.path.join(output_dir, relpath) + '.patch'
-                d = os.path.dirname(output_file)
-                if not os.path.isdir(d):
-                    os.makedirs(d)
-                os.system("diff -u %s %s > %s" % (left_file, right_file, output_file))
-                if os.path.getsize(output_file) == 0:
-                    os.remove(output_file)
-
-dir_diff(caffe_dir + '/include', '../include', 'include')
-dir_diff(caffe_dir + '/src', '../src', 'src')
-dir_diff(caffe_dir + '/tools', '../tools', 'tools')
diff --git a/patch/include/caffe/caffe.hpp.patch b/patch/include/caffe/caffe.hpp.patch
deleted file mode 100644
index 3baa3b6..0000000
--- a/patch/include/caffe/caffe.hpp.patch
+++ /dev/null
@@ -1,17 +0,0 @@
---- caffe/include/caffe/caffe.hpp	2017-01-27 09:51:55.335623700 +0800
-+++ ../include/caffe/caffe.hpp	2017-02-04 22:35:34.594153197 +0800
-@@ -10,10 +10,14 @@
- #include "caffe/layer.hpp"
- #include "caffe/layer_factory.hpp"
- #include "caffe/net.hpp"
-+#ifdef NO_CAFFE_MOBILE
- #include "caffe/parallel.hpp"
-+#endif
- #include "caffe/proto/caffe.pb.h"
-+#ifdef NO_CAFFE_MOBILE
- #include "caffe/solver.hpp"
- #include "caffe/solver_factory.hpp"
-+#endif
- #include "caffe/util/benchmark.hpp"
- #include "caffe/util/io.hpp"
- #include "caffe/util/upgrade_proto.hpp"
diff --git a/patch/include/caffe/common.hpp.patch b/patch/include/caffe/common.hpp.patch
deleted file mode 100644
index ca53538..0000000
--- a/patch/include/caffe/common.hpp.patch
+++ /dev/null
@@ -1,52 +0,0 @@
---- caffe/include/caffe/common.hpp	2017-01-27 09:51:55.340623900 +0800
-+++ ../include/caffe/common.hpp	2017-02-04 22:42:22.849568507 +0800
-@@ -1,9 +1,19 @@
- #ifndef CAFFE_COMMON_HPP_
- #define CAFFE_COMMON_HPP_
- 
-+#ifdef USE_BOOST
- #include <boost/shared_ptr.hpp>
-+#else
-+#include <memory>
-+#endif
-+#ifdef NO_CAFFE_MOBILE
- #include <gflags/gflags.h>
-+#endif
-+#ifdef USE_GLOG
- #include <glog/logging.h>
-+#else
-+#include "caffe/glog_wrapper.hpp"
-+#endif
- 
- #include <climits>
- #include <cmath>
-@@ -22,6 +32,7 @@
- #define STRINGIFY(m) #m
- #define AS_STRING(m) STRINGIFY(m)
- 
-+#ifdef NO_CAFFE_MOBILE
- // gflags 2.1 issue: namespace google was changed to gflags without warning.
- // Luckily we will be able to use GFLAGS_GFLAGS_H_ to detect if it is version
- // 2.1. If yes, we will add a temporary solution to redirect the namespace.
-@@ -30,6 +41,7 @@
- #ifndef GFLAGS_GFLAGS_H_
- namespace gflags = google;
- #endif  // GFLAGS_GFLAGS_H_
-+#endif  // NO_CAFFE_MOBILE
- 
- // Disable the copy and assignment operator for a class.
- #define DISABLE_COPY_AND_ASSIGN(classname) \
-@@ -74,9 +86,13 @@
- 
- namespace caffe {
- 
-+#ifdef USE_BOOST
- // We will use the boost shared_ptr instead of the new C++11 one mainly
- // because cuda does not work (at least now) well with C++11 features.
- using boost::shared_ptr;
-+#else
-+using std::shared_ptr;
-+#endif
- 
- // Common functions and classes from std that caffe often uses.
- using std::fstream;
diff --git a/patch/include/caffe/layers/base_data_layer.hpp.patch b/patch/include/caffe/layers/base_data_layer.hpp.patch
deleted file mode 100644
index b48a1c1..0000000
--- a/patch/include/caffe/layers/base_data_layer.hpp.patch
+++ /dev/null
@@ -1,28 +0,0 @@
---- caffe/include/caffe/layers/base_data_layer.hpp	2017-01-27 09:51:55.386123700 +0800
-+++ ../include/caffe/layers/base_data_layer.hpp	2017-02-06 20:20:48.220476426 +0800
-@@ -5,7 +5,9 @@
- 
- #include "caffe/blob.hpp"
- #include "caffe/data_transformer.hpp"
-+#ifdef USE_BOOST
- #include "caffe/internal_thread.hpp"
-+#endif
- #include "caffe/layer.hpp"
- #include "caffe/proto/caffe.pb.h"
- #include "caffe/util/blocking_queue.hpp"
-@@ -51,6 +53,7 @@
-   Blob<Dtype> data_, label_;
- };
- 
-+#ifdef NO_CAFFE_MOBILE
- template <typename Dtype>
- class BasePrefetchingDataLayer :
-     public BaseDataLayer<Dtype>, public InternalThread {
-@@ -78,6 +81,7 @@
- 
-   Blob<Dtype> transformed_data_;
- };
-+#endif // NO_CAFFE_MOBILE
- 
- }  // namespace caffe
- 
diff --git a/patch/include/caffe/net.hpp.patch b/patch/include/caffe/net.hpp.patch
deleted file mode 100644
index cab9915..0000000
--- a/patch/include/caffe/net.hpp.patch
+++ /dev/null
@@ -1,45 +0,0 @@
---- caffe/include/caffe/net.hpp	2017-01-27 09:51:55.678623300 +0800
-+++ ../include/caffe/net.hpp	2017-02-04 22:35:34.598149613 +0800
-@@ -38,8 +38,10 @@
-   const vector<Blob<Dtype>*>& Forward(Dtype* loss = NULL);
-   /// @brief DEPRECATED; use Forward() instead.
-   const vector<Blob<Dtype>*>& ForwardPrefilled(Dtype* loss = NULL) {
-+#ifdef USE_GLOG
-     LOG_EVERY_N(WARNING, 1000) << "DEPRECATED: ForwardPrefilled() "
-         << "will be removed in a future version. Use Forward().";
-+#endif
-     return Forward(loss);
-   }
- 
-@@ -64,6 +66,7 @@
-    */
-   void ClearParamDiffs();
- 
-+#ifdef ENABLE_BACKWARD
-   /**
-    * The network backward should take no input and output, since it solely
-    * computes the gradient w.r.t the parameters, and the data has already been
-@@ -73,6 +76,7 @@
-   void BackwardFromTo(int start, int end);
-   void BackwardFrom(int start);
-   void BackwardTo(int end);
-+#endif
- 
-   /**
-    * @brief Reshape all layers from bottom to top.
-@@ -82,6 +86,7 @@
-    */
-   void Reshape();
- 
-+#ifdef ENABLE_BACKWARD
-   Dtype ForwardBackward() {
-     Dtype loss;
-     Forward(&loss);
-@@ -91,6 +96,7 @@
- 
-   /// @brief Updates the network weights based on the diff values computed.
-   void Update();
-+#endif
-   /**
-    * @brief Shares weight data of owner blobs with shared blobs.
-    *
diff --git a/patch/include/caffe/util/benchmark.hpp.patch b/patch/include/caffe/util/benchmark.hpp.patch
deleted file mode 100644
index 70ba952..0000000
--- a/patch/include/caffe/util/benchmark.hpp.patch
+++ /dev/null
@@ -1,28 +0,0 @@
---- caffe/include/caffe/util/benchmark.hpp	2017-01-27 09:51:55.716623000 +0800
-+++ ../include/caffe/util/benchmark.hpp	2017-02-04 22:35:34.598149613 +0800
-@@ -1,7 +1,11 @@
- #ifndef CAFFE_UTIL_BENCHMARK_H_
- #define CAFFE_UTIL_BENCHMARK_H_
- 
-+#ifdef USE_BOOST
- #include <boost/date_time/posix_time/posix_time.hpp>
-+#else
-+#include <sys/time.h>
-+#endif
- 
- #include "caffe/util/device_alternate.hpp"
- 
-@@ -31,8 +35,13 @@
-   cudaEvent_t start_gpu_;
-   cudaEvent_t stop_gpu_;
- #endif
-+#ifdef USE_BOOST
-   boost::posix_time::ptime start_cpu_;
-   boost::posix_time::ptime stop_cpu_;
-+#else
-+  struct timeval start_cpu_;
-+  struct timeval stop_cpu_;
-+#endif
-   float elapsed_milliseconds_;
-   float elapsed_microseconds_;
- };
diff --git a/patch/include/caffe/util/io.hpp.patch b/patch/include/caffe/util/io.hpp.patch
deleted file mode 100644
index 7b065d5..0000000
--- a/patch/include/caffe/util/io.hpp.patch
+++ /dev/null
@@ -1,28 +0,0 @@
---- caffe/include/caffe/util/io.hpp	2017-01-27 09:51:55.768122600 +0800
-+++ ../include/caffe/util/io.hpp	2017-02-04 22:35:34.598149613 +0800
-@@ -1,7 +1,9 @@
- #ifndef CAFFE_UTIL_IO_H_
- #define CAFFE_UTIL_IO_H_
- 
-+#ifdef USE_BOOST
- #include <boost/filesystem.hpp>
-+#endif
- #include <iomanip>
- #include <iostream>  // NOLINT(readability/streams)
- #include <string>
-@@ -19,6 +21,7 @@
- namespace caffe {
- 
- using ::google::protobuf::Message;
-+#ifdef USE_BOOST
- using ::boost::filesystem::path;
- 
- inline void MakeTempDir(string* temp_dirname) {
-@@ -48,6 +51,7 @@
-   *temp_filename =
-     (temp_files_subpath/caffe::format_int(next_temp_file++, 9)).string();
- }
-+#endif
- 
- bool ReadProtoFromTextFile(const char* filename, Message* proto);
- 
diff --git a/patch/include/caffe/util/math_functions.hpp.patch b/patch/include/caffe/util/math_functions.hpp.patch
deleted file mode 100644
index 8ec458a..0000000
--- a/patch/include/caffe/util/math_functions.hpp.patch
+++ /dev/null
@@ -1,15 +0,0 @@
---- caffe/include/caffe/util/math_functions.hpp	2017-01-27 09:51:55.772623200 +0800
-+++ ../include/caffe/util/math_functions.hpp	2017-02-04 22:45:32.054023741 +0800
-@@ -4,7 +4,12 @@
- #include <stdint.h>
- #include <cmath>  // for std::fabs and std::signbit
- 
-+#ifdef USE_GLOG
- #include "glog/logging.h"
-+#endif
-+#ifndef NO_CAFFE_MOBILE
-+#include <string.h>
-+#endif
- 
- #include "caffe/common.hpp"
- #include "caffe/util/device_alternate.hpp"
diff --git a/patch/include/caffe/util/rng.hpp.patch b/patch/include/caffe/util/rng.hpp.patch
deleted file mode 100644
index ecdcde4..0000000
--- a/patch/include/caffe/util/rng.hpp.patch
+++ /dev/null
@@ -1,53 +0,0 @@
---- caffe/include/caffe/util/rng.hpp	2017-01-27 09:51:55.785123500 +0800
-+++ ../include/caffe/util/rng.hpp	2017-02-04 22:35:34.598149613 +0800
-@@ -4,14 +4,23 @@
- #include <algorithm>
- #include <iterator>
- 
-+#ifdef USE_BOOST
-+#FIXME extract random generator from boost
- #include "boost/random/mersenne_twister.hpp"
- #include "boost/random/uniform_int.hpp"
-+#else
-+#include <random>
-+#endif
- 
- #include "caffe/common.hpp"
- 
- namespace caffe {
- 
-+#ifdef USE_BOOST
- typedef boost::mt19937 rng_t;
-+#else
-+typedef std::mt19937   rng_t;
-+#endif
- 
- inline rng_t* caffe_rng() {
-   return static_cast<caffe::rng_t*>(Caffe::rng_stream().generator());
-@@ -21,6 +30,7 @@
- template <class RandomAccessIterator, class RandomGenerator>
- inline void shuffle(RandomAccessIterator begin, RandomAccessIterator end,
-                     RandomGenerator* gen) {
-+#ifdef NO_CAFFE_MOBILE
-   typedef typename std::iterator_traits<RandomAccessIterator>::difference_type
-       difference_type;
-   typedef typename boost::uniform_int<difference_type> dist_type;
-@@ -32,11 +42,18 @@
-     dist_type dist(0, i);
-     std::iter_swap(begin + i, begin + dist(*gen));
-   }
-+#else
-+  NOT_IMPLEMENTED;
-+#endif
- }
- 
- template <class RandomAccessIterator>
- inline void shuffle(RandomAccessIterator begin, RandomAccessIterator end) {
-+#ifdef NO_CAFFE_MOBILE
-   shuffle(begin, end, caffe_rng());
-+#else
-+  NOT_IMPLEMENTED;
-+#endif
- }
- }  // namespace caffe
- 
diff --git a/patch/patch.py b/patch/patch.py
deleted file mode 100755
index c2c045e..0000000
--- a/patch/patch.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python
-import os
-import shutil
-caffe_dir='caffe'
-
-# @brief Patch
-def dir_patch(left_dir, right_dir, diff_dir):
-    for root, dirs, files in os.walk(right_dir):
-        for f in files:
-            right_file = os.path.join(root, f)
-            if f.find('hpp') != -1 or f.find('cpp') != -1 or f.find('proto') != -1:
-                relpath = os.path.relpath(right_file, right_dir)
-                left_file = os.path.join(left_dir, relpath)
-                diff_file = os.path.join(diff_dir, relpath) + '.patch'
-                if os.path.isfile(left_file):
-                    if os.path.isfile(diff_file):
-                        print "patch %s %s -o %s" % (left_file, diff_file, right_file)
-                        os.system("patch %s %s -o %s" % (left_file, diff_file, right_file))
-                    else:
-                        print 'cp %s %s' % (left_file, right_file)
-                        shutil.copyfile(left_file, right_file)
-
-dir_patch(caffe_dir + '/include', '../include', 'include')
-dir_patch(caffe_dir + '/src', '../src', 'src')
-dir_patch(caffe_dir + '/tools', '../tools', 'tools')
diff --git a/patch/src/caffe/common.cpp.patch b/patch/src/caffe/common.cpp.patch
deleted file mode 100644
index 416ec44..0000000
--- a/patch/src/caffe/common.cpp.patch
+++ /dev/null
@@ -1,66 +0,0 @@
---- caffe/src/caffe/common.cpp	2017-01-27 09:51:56.283123700 +0800
-+++ ../src/caffe/common.cpp	2017-02-04 23:04:01.367500325 +0800
-@@ -1,5 +1,9 @@
-+#ifdef USE_BOOST
- #include <boost/thread.hpp>
-+#endif
-+#ifdef USE_GLOG
- #include <glog/logging.h>
-+#endif
- #include <cmath>
- #include <cstdio>
- #include <ctime>
-@@ -7,16 +11,36 @@
- #include "caffe/common.hpp"
- #include "caffe/util/rng.hpp"
- 
-+#ifndef NO_CAFFE_MOBILE
-+#include <unistd.h>
-+#endif
-+
- namespace caffe {
- 
-+#ifdef USE_BOOST
- // Make sure each thread can have different values.
- static boost::thread_specific_ptr<Caffe> thread_instance_;
-+#else
-+thread_local static Caffe *thread_instance_ = NULL;
-+#endif
-+
-+#ifndef USE_GLOG
-+nullstream __nullstream;
-+bool LogMessage::enable = true;
-+#endif
- 
- Caffe& Caffe::Get() {
-+#ifdef USE_BOOST
-   if (!thread_instance_.get()) {
-     thread_instance_.reset(new Caffe());
-   }
-   return *(thread_instance_.get());
-+#else
-+  if (thread_instance_ == NULL) {
-+      thread_instance_ = new Caffe();
-+  }
-+  return *thread_instance_;
-+#endif
- }
- 
- // random seeding
-@@ -41,12 +65,16 @@
- 
- 
- void GlobalInit(int* pargc, char*** pargv) {
-+#ifdef NO_CAFFE_MOBILE
-   // Google flags.
-   ::gflags::ParseCommandLineFlags(pargc, pargv, true);
-+#endif
-+#ifdef USE_GLOG
-   // Google logging.
-   ::google::InitGoogleLogging(*(pargv)[0]);
-   // Provide a backtrace on segfault.
-   ::google::InstallFailureSignalHandler();
-+#endif
- }
- 
- #ifdef CPU_ONLY  // CPU-only Caffe.
diff --git a/patch/src/caffe/layers/base_data_layer.cpp.patch b/patch/src/caffe/layers/base_data_layer.cpp.patch
deleted file mode 100644
index 9cd433e..0000000
--- a/patch/src/caffe/layers/base_data_layer.cpp.patch
+++ /dev/null
@@ -1,36 +0,0 @@
---- caffe/src/caffe/layers/base_data_layer.cpp	2017-01-27 09:51:56.330123800 +0800
-+++ ../src/caffe/layers/base_data_layer.cpp	2017-02-06 20:20:48.228476426 +0800
-@@ -1,9 +1,13 @@
-+#ifdef USE_BOOST
- #include <boost/thread.hpp>
-+#endif
- #include <vector>
- 
- #include "caffe/blob.hpp"
- #include "caffe/data_transformer.hpp"
-+#ifdef USE_BOOST
- #include "caffe/internal_thread.hpp"
-+#endif
- #include "caffe/layer.hpp"
- #include "caffe/layers/base_data_layer.hpp"
- #include "caffe/proto/caffe.pb.h"
-@@ -32,6 +36,7 @@
-   DataLayerSetUp(bottom, top);
- }
- 
-+#ifdef NO_CAFFE_MOBILE
- template <typename Dtype>
- BasePrefetchingDataLayer<Dtype>::BasePrefetchingDataLayer(
-     const LayerParameter& param)
-@@ -129,8 +134,11 @@
- #ifdef CPU_ONLY
- STUB_GPU_FORWARD(BasePrefetchingDataLayer, Forward);
- #endif
-+#endif // NO_CAFFE_MOBILE
- 
- INSTANTIATE_CLASS(BaseDataLayer);
-+#ifdef NO_CAFFE_MOBILE
- INSTANTIATE_CLASS(BasePrefetchingDataLayer);
-+#endif // NO_CAFFE_MOBILE
- 
- }  // namespace caffe
diff --git a/patch/src/caffe/net.cpp.patch b/patch/src/caffe/net.cpp.patch
deleted file mode 100644
index 6490cf0..0000000
--- a/patch/src/caffe/net.cpp.patch
+++ /dev/null
@@ -1,157 +0,0 @@
---- caffe/src/caffe/net.cpp	2017-01-27 09:51:56.935642700 +0800
-+++ ../src/caffe/net.cpp	2017-02-06 20:20:48.232476426 +0800
-@@ -5,19 +5,27 @@
- #include <utility>
- #include <vector>
- 
-+#ifdef USE_HDF5
- #include "hdf5.h"
-+#endif
- 
- #include "caffe/common.hpp"
- #include "caffe/layer.hpp"
- #include "caffe/net.hpp"
-+#ifdef NO_CAFFE_MOBILE
- #include "caffe/parallel.hpp"
-+#endif
- #include "caffe/proto/caffe.pb.h"
-+#ifdef USE_HDF5
- #include "caffe/util/hdf5.hpp"
-+#endif
- #include "caffe/util/insert_splits.hpp"
- #include "caffe/util/math_functions.hpp"
- #include "caffe/util/upgrade_proto.hpp"
- 
-+#ifdef NO_CAFFE_MOBILE
- #include "caffe/test/test_caffe_main.hpp"
-+#endif
- 
- namespace caffe {
- 
-@@ -556,8 +564,10 @@
- template <typename Dtype>
- const vector<Blob<Dtype>*>& Net<Dtype>::Forward(
-     const vector<Blob<Dtype>*> & bottom, Dtype* loss) {
-+#ifdef USE_GLOG
-   LOG_EVERY_N(WARNING, 1000) << "DEPRECATED: Forward(bottom, loss) "
-       << "will be removed in a future version. Use Forward(loss).";
-+#endif
-   // Copy bottom to net bottoms
-   for (int i = 0; i < bottom.size(); ++i) {
-     net_input_blobs_[i]->CopyFrom(*bottom[i]);
-@@ -565,6 +575,7 @@
-   return Forward(loss);
- }
- 
-+#ifdef ENABLE_BACKWARD
- template <typename Dtype>
- void Net<Dtype>::BackwardFromTo(int start, int end) {
-   CHECK_GE(end, 0);
-@@ -583,6 +594,7 @@
-     }
-   }
- }
-+#endif
- 
- template <typename Dtype>
- void Net<Dtype>::ForwardDebugInfo(const int layer_id) {
-@@ -610,6 +622,7 @@
-   }
- }
- 
-+#ifdef ENABLE_BACKWARD
- template <typename Dtype>
- void Net<Dtype>::BackwardDebugInfo(const int layer_id) {
-   const vector<Blob<Dtype>*>& bottom_vec = bottom_vecs_[layer_id];
-@@ -636,6 +649,7 @@
-         << " diff: " << diff_abs_val_mean;
-   }
- }
-+#endif
- 
- template <typename Dtype>
- void Net<Dtype>::UpdateDebugInfo(const int param_id) {
-@@ -651,6 +665,7 @@
-         << ", param " << param_display_name
-         << " data: " << data_abs_val_mean
-         << "; diff: " << diff_abs_val_mean;
-+#
-   } else {
-     const string& owner_layer_name =
-         layer_names_[param_layer_indices_[param_owner].first];
-@@ -695,6 +710,7 @@
-   }
- }
- 
-+#ifdef ENABLE_BACKWARD
- template <typename Dtype>
- void Net<Dtype>::BackwardFrom(int start) {
-   BackwardFromTo(start, 0);
-@@ -730,6 +746,7 @@
-     layers_[i]->Reshape(bottom_vecs_[i], top_vecs_[i]);
-   }
- }
-+#endif
- 
- template <typename Dtype>
- void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) {
-@@ -771,10 +788,14 @@
- 
- template <typename Dtype>
- void Net<Dtype>::CopyTrainedLayersFrom(const string trained_filename) {
-+#ifdef USE_HDF5
-   if (trained_filename.size() >= 3 &&
-       trained_filename.compare(trained_filename.size() - 3, 3, ".h5") == 0) {
-     CopyTrainedLayersFromHDF5(trained_filename);
-   } else {
-+#else
-+  {
-+#endif
-     CopyTrainedLayersFromBinaryProto(trained_filename);
-   }
- }
-@@ -787,8 +808,10 @@
-   CopyTrainedLayersFrom(param);
- }
- 
-+
- template <typename Dtype>
- void Net<Dtype>::CopyTrainedLayersFromHDF5(const string trained_filename) {
-+#ifdef USE_HDF5
-   hid_t file_hid = H5Fopen(trained_filename.c_str(), H5F_ACC_RDONLY,
-                            H5P_DEFAULT);
-   CHECK_GE(file_hid, 0) << "Couldn't open " << trained_filename;
-@@ -835,6 +858,7 @@
-   }
-   H5Gclose(data_hid);
-   H5Fclose(file_hid);
-+#endif
- }
- 
- template <typename Dtype>
-@@ -851,6 +875,7 @@
- 
- template <typename Dtype>
- void Net<Dtype>::ToHDF5(const string& filename, bool write_diff) const {
-+#ifdef USE_HDF5
-   hid_t file_hid = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT,
-       H5P_DEFAULT);
-   CHECK_GE(file_hid, 0)
-@@ -904,14 +929,17 @@
-     H5Gclose(diff_hid);
-   }
-   H5Fclose(file_hid);
-+#endif
- }
- 
-+#ifdef ENABLE_BACKWARD
- template <typename Dtype>
- void Net<Dtype>::Update() {
-   for (int i = 0; i < learnable_params_.size(); ++i) {
-     learnable_params_[i]->Update();
-   }
- }
-+#endif
- 
- template <typename Dtype>
- void Net<Dtype>::ClearParamDiffs() {
diff --git a/patch/src/caffe/util/benchmark.cpp.patch b/patch/src/caffe/util/benchmark.cpp.patch
deleted file mode 100644
index fbdc08c..0000000
--- a/patch/src/caffe/util/benchmark.cpp.patch
+++ /dev/null
@@ -1,111 +0,0 @@
---- caffe/src/caffe/util/benchmark.cpp	2017-01-27 09:51:57.449642800 +0800
-+++ ../src/caffe/util/benchmark.cpp	2017-02-04 23:00:28.602281548 +0800
-@@ -1,4 +1,6 @@
-+#ifdef USE_BOOST
- #include <boost/date_time/posix_time/posix_time.hpp>
-+#endif
- 
- #include "caffe/common.hpp"
- #include "caffe/util/benchmark.hpp"
-@@ -32,7 +34,11 @@
-       NO_GPU;
- #endif
-     } else {
-+#ifdef USE_BOOST
-       start_cpu_ = boost::posix_time::microsec_clock::local_time();
-+#else
-+      gettimeofday(&start_cpu_, NULL);
-+#endif
-     }
-     running_ = true;
-     has_run_at_least_once_ = true;
-@@ -48,7 +54,11 @@
-       NO_GPU;
- #endif
-     } else {
-+#ifdef USE_BOOST
-       stop_cpu_ = boost::posix_time::microsec_clock::local_time();
-+#else
-+      gettimeofday(&stop_cpu_, NULL);
-+#endif
-     }
-     running_ = false;
-   }
-@@ -74,7 +84,12 @@
-       NO_GPU;
- #endif
-   } else {
-+#ifdef USE_BOOST
-     elapsed_microseconds_ = (stop_cpu_ - start_cpu_).total_microseconds();
-+#else
-+    elapsed_microseconds_ = (stop_cpu_.tv_sec - start_cpu_.tv_sec)*1000000
-+    		+ (stop_cpu_.tv_usec - start_cpu_.tv_usec);
-+#endif
-   }
-   return elapsed_microseconds_;
- }
-@@ -96,7 +111,12 @@
-       NO_GPU;
- #endif
-   } else {
-+#ifdef USE_BOOST
-     elapsed_milliseconds_ = (stop_cpu_ - start_cpu_).total_milliseconds();
-+#else
-+    elapsed_microseconds_ = (stop_cpu_.tv_sec - start_cpu_.tv_sec)*1000
-+    		+ (stop_cpu_.tv_usec - start_cpu_.tv_usec)/1000.0;
-+#endif
-   }
-   return elapsed_milliseconds_;
- }
-@@ -127,7 +147,11 @@
- 
- void CPUTimer::Start() {
-   if (!running()) {
-+#ifdef USE_BOOST
-     this->start_cpu_ = boost::posix_time::microsec_clock::local_time();
-+#else
-+    gettimeofday(&start_cpu_, NULL);
-+#endif
-     this->running_ = true;
-     this->has_run_at_least_once_ = true;
-   }
-@@ -135,7 +159,11 @@
- 
- void CPUTimer::Stop() {
-   if (running()) {
-+#ifdef USE_BOOST
-     this->stop_cpu_ = boost::posix_time::microsec_clock::local_time();
-+#else
-+    gettimeofday(&stop_cpu_, NULL);
-+#endif
-     this->running_ = false;
-   }
- }
-@@ -148,8 +176,13 @@
-   if (running()) {
-     Stop();
-   }
-+#ifdef USE_BOOST
-   this->elapsed_milliseconds_ = (this->stop_cpu_ -
-                                 this->start_cpu_).total_milliseconds();
-+#else
-+    elapsed_milliseconds_ = (stop_cpu_.tv_sec - start_cpu_.tv_sec)*1000
-+    		+ (stop_cpu_.tv_usec - start_cpu_.tv_usec)/1000.0;
-+#endif
-   return this->elapsed_milliseconds_;
- }
- 
-@@ -161,8 +194,13 @@
-   if (running()) {
-     Stop();
-   }
-+#ifdef USE_BOOST
-   this->elapsed_microseconds_ = (this->stop_cpu_ -
-                                 this->start_cpu_).total_microseconds();
-+#else
-+    elapsed_microseconds_ = (stop_cpu_.tv_sec - start_cpu_.tv_sec)*1000000
-+    		+ (stop_cpu_.tv_usec - start_cpu_.tv_usec);
-+#endif
-   return this->elapsed_microseconds_;
- }
- 
diff --git a/patch/src/caffe/util/io.cpp.patch b/patch/src/caffe/util/io.cpp.patch
deleted file mode 100644
index 2da7aea..0000000
--- a/patch/src/caffe/util/io.cpp.patch
+++ /dev/null
@@ -1,12 +0,0 @@
---- caffe/src/caffe/util/io.cpp	2017-01-27 09:51:57.498643500 +0800
-+++ ../src/caffe/util/io.cpp	2017-02-04 23:01:28.591786478 +0800
-@@ -18,6 +18,9 @@
- #include "caffe/common.hpp"
- #include "caffe/proto/caffe.pb.h"
- #include "caffe/util/io.hpp"
-+#ifndef NO_CAFFE_MOBILE
-+#include <unistd.h>
-+#endif
- 
- const int kProtoReadBytesLimit = INT_MAX;  // Max size of 2 GB minus 1 byte.
- 
diff --git a/patch/src/caffe/util/math_functions.cpp.patch b/patch/src/caffe/util/math_functions.cpp.patch
deleted file mode 100644
index 8b6ba4e..0000000
--- a/patch/src/caffe/util/math_functions.cpp.patch
+++ /dev/null
@@ -1,104 +0,0 @@
---- caffe/src/caffe/util/math_functions.cpp	2017-01-27 09:51:57.502642700 +0800
-+++ ../src/caffe/util/math_functions.cpp	2017-02-04 23:03:03.164175136 +0800
-@@ -1,5 +1,7 @@
-+#ifdef USE_BOOST
- #include <boost/math/special_functions/next.hpp>
- #include <boost/random.hpp>
-+#endif
- 
- #include <limits>
- 
-@@ -232,8 +234,13 @@
- 
- template <typename Dtype>
- Dtype caffe_nextafter(const Dtype b) {
-+#ifdef USE_BOOST
-   return boost::math::nextafter<Dtype>(
-       b, std::numeric_limits<Dtype>::max());
-+#else
-+  return std::nextafter(
-+      b, std::numeric_limits<Dtype>::max());
-+#endif
- }
- 
- template
-@@ -247,12 +254,19 @@
-   CHECK_GE(n, 0);
-   CHECK(r);
-   CHECK_LE(a, b);
-+#ifdef USE_BOOST
-   boost::uniform_real<Dtype> random_distribution(a, caffe_nextafter<Dtype>(b));
-   boost::variate_generator<caffe::rng_t*, boost::uniform_real<Dtype> >
-       variate_generator(caffe_rng(), random_distribution);
-   for (int i = 0; i < n; ++i) {
-     r[i] = variate_generator();
-   }
-+#else
-+  std::uniform_real_distribution<Dtype> random_distribution(a, caffe_nextafter<Dtype>(b));
-+  for (int i = 0; i < n; ++i) {
-+    r[i] = random_distribution(*caffe_rng());
-+  }
-+#endif
- }
- 
- template
-@@ -269,12 +283,19 @@
-   CHECK_GE(n, 0);
-   CHECK(r);
-   CHECK_GT(sigma, 0);
-+#ifdef USE_BOOST
-   boost::normal_distribution<Dtype> random_distribution(a, sigma);
-   boost::variate_generator<caffe::rng_t*, boost::normal_distribution<Dtype> >
-       variate_generator(caffe_rng(), random_distribution);
-   for (int i = 0; i < n; ++i) {
-     r[i] = variate_generator();
-   }
-+#else
-+  std::normal_distribution<Dtype> random_distribution(a, sigma);
-+  for (int i = 0; i < n; ++i) {
-+    r[i] = random_distribution(*caffe_rng());
-+  }
-+#endif
- }
- 
- template
-@@ -291,12 +312,19 @@
-   CHECK(r);
-   CHECK_GE(p, 0);
-   CHECK_LE(p, 1);
-+#ifdef USE_BOOST
-   boost::bernoulli_distribution<Dtype> random_distribution(p);
-   boost::variate_generator<caffe::rng_t*, boost::bernoulli_distribution<Dtype> >
-       variate_generator(caffe_rng(), random_distribution);
-   for (int i = 0; i < n; ++i) {
-     r[i] = variate_generator();
-   }
-+#else
-+  std::bernoulli_distribution random_distribution(p);
-+  for (int i = 0; i < n; ++i) {
-+    r[i] = random_distribution(*caffe_rng());
-+  }
-+#endif
- }
- 
- template
-@@ -311,12 +339,19 @@
-   CHECK(r);
-   CHECK_GE(p, 0);
-   CHECK_LE(p, 1);
-+#ifdef USE_BOOST
-   boost::bernoulli_distribution<Dtype> random_distribution(p);
-   boost::variate_generator<caffe::rng_t*, boost::bernoulli_distribution<Dtype> >
-       variate_generator(caffe_rng(), random_distribution);
-   for (int i = 0; i < n; ++i) {
-     r[i] = static_cast<unsigned int>(variate_generator());
-   }
-+#else
-+  std::bernoulli_distribution random_distribution(p);
-+  for (int i = 0; i < n; ++i) {
-+    r[i] = static_cast<unsigned int>(random_distribution(*caffe_rng()));
-+  }
-+#endif
- }
- 
- template
diff --git a/patch/synced_version b/patch/synced_version
deleted file mode 100644
index c7e9907..0000000
--- a/patch/synced_version
+++ /dev/null
@@ -1 +0,0 @@
-https://github.com/BVLC/caffe/commit/746a77e6d55cf16d9b2d4ccd71e49774604e86f6
diff --git a/patch/tools/caffe.cpp.patch b/patch/tools/caffe.cpp.patch
deleted file mode 100644
index 650c3bb..0000000
--- a/patch/tools/caffe.cpp.patch
+++ /dev/null
@@ -1,157 +0,0 @@
---- caffe/tools/caffe.cpp	2017-01-27 09:51:57.559643300 +0800
-+++ ../tools/caffe.cpp	2017-02-04 22:35:34.674081512 +0800
-@@ -3,23 +3,29 @@
- namespace bp = boost::python;
- #endif
- 
--#include <gflags/gflags.h>
--#include <glog/logging.h>
--
- #include <cstring>
- #include <map>
- #include <string>
- #include <vector>
- 
-+#ifdef USE_BOOST
- #include "boost/algorithm/string.hpp"
-+#endif
- #include "caffe/caffe.hpp"
-+#ifdef NO_CAFFE_MOBILE
- #include "caffe/util/signal_handler.h"
-+#else
-+#include "caffe/util/benchmark.hpp"
-+#include <gflags/gflags.h>
-+#endif
- 
- using caffe::Blob;
- using caffe::Caffe;
- using caffe::Net;
- using caffe::Layer;
-+#ifdef NO_CAFFE_MOBILE
- using caffe::Solver;
-+#endif
- using caffe::shared_ptr;
- using caffe::string;
- using caffe::Timer;
-@@ -54,6 +60,7 @@
- DEFINE_string(sighup_effect, "snapshot",
-              "Optional; action to take when a SIGHUP signal is received: "
-              "snapshot, stop or none.");
-+DEFINE_int32(alsologtostderr, 1, "");
- 
- // A simple registry for caffe commands.
- typedef int (*BrewFunction)();
-@@ -87,6 +94,7 @@
- 
- // Parse GPU ids or use all available devices
- static void get_gpus(vector<int>* gpus) {
-+#ifndef CPU_ONLY
-   if (FLAGS_gpu == "all") {
-     int count = 0;
- #ifndef CPU_ONLY
-@@ -106,6 +114,7 @@
-   } else {
-     CHECK_EQ(gpus->size(), 0);
-   }
-+#endif
- }
- 
- // Parse phase from flags
-@@ -123,7 +132,11 @@
- // Parse stages from flags
- vector<string> get_stages_from_flags() {
-   vector<string> stages;
-+#ifdef USE_BOOST
-   boost::split(stages, FLAGS_stage, boost::is_any_of(","));
-+#else
-+  stages.push_back("TEST");
-+#endif
-   return stages;
- }
- 
-@@ -135,6 +148,7 @@
- 
- // Device Query: show diagnostic information for a GPU device.
- int device_query() {
-+#ifdef NO_CAFFE_MOBILE
-   LOG(INFO) << "Querying GPUs " << FLAGS_gpu;
-   vector<int> gpus;
-   get_gpus(&gpus);
-@@ -142,10 +156,12 @@
-     caffe::Caffe::SetDevice(gpus[i]);
-     caffe::Caffe::DeviceQuery();
-   }
-+#endif
-   return 0;
- }
- RegisterBrewFunction(device_query);
- 
-+#ifdef NO_CAFFE_MOBILE
- // Load the weights from the specified caffemodel(s) into the train and
- // test nets.
- void CopyLayers(caffe::Solver<float>* solver, const std::string& model_list) {
-@@ -160,6 +176,7 @@
-   }
- }
- 
-+
- // Translate the signal effect the user specified on the command-line to the
- // corresponding enumeration.
- caffe::SolverAction::Enum GetRequestedAction(
-@@ -175,9 +192,11 @@
-   }
-   LOG(FATAL) << "Invalid signal effect \""<< flag_value << "\" was specified";
- }
-+#endif
- 
- // Train / Finetune a model.
- int train() {
-+#ifdef NO_CAFFE_MOBILE
-   CHECK_GT(FLAGS_solver.size(), 0) << "Need a solver definition to train.";
-   CHECK(!FLAGS_snapshot.size() || !FLAGS_weights.size())
-       << "Give a snapshot to resume training or weights to finetune "
-@@ -257,6 +276,7 @@
-     solver->Solve();
-   }
-   LOG(INFO) << "Optimization Done.";
-+#endif
-   return 0;
- }
- RegisterBrewFunction(train);
-@@ -337,6 +357,7 @@
- 
- // Time: benchmark the execution time of a model.
- int time() {
-+#ifdef NO_BACKWORD
-   CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to time.";
-   caffe::Phase phase = get_phase_from_flags(caffe::TRAIN);
-   vector<string> stages = get_stages_from_flags();
-@@ -422,6 +443,7 @@
-     FLAGS_iterations << " ms.";
-   LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() << " ms.";
-   LOG(INFO) << "*** Benchmark ends ***";
-+#endif
-   return 0;
- }
- RegisterBrewFunction(time);
-@@ -435,12 +457,20 @@
-   gflags::SetUsageMessage("command line brew\n"
-       "usage: caffe <command> <args>\n\n"
-       "commands:\n"
-+#ifdef NO_CAFFE_MOBILE
-       "  train           train or finetune a model\n"
-+#endif
-       "  test            score a model\n"
-+#ifdef NO_CAFFE_MOBILE
-       "  device_query    show GPU diagnostic information\n"
-+#endif
-       "  time            benchmark model execution time");
-+
-   // Run tool or show usage.
-   caffe::GlobalInit(&argc, &argv);
-+#ifndef NO_CAFFE_MOBILE
-+  ::gflags::ParseCommandLineFlags(&argc, &argv, true);
-+#endif
-   if (argc == 2) {
- #ifdef WITH_PYTHON_LAYER
-     try {
diff --git a/src/caffe/CMakeLists.txt b/src/caffe/CMakeLists.txt
deleted file mode 100644
index 94ed1e2..0000000
--- a/src/caffe/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# generate protobuf sources
-file(GLOB proto_files proto/*.proto)
-caffe_protobuf_generate_cpp_py(${proto_gen_folder} proto_srcs proto_hdrs proto_python ${proto_files})
-
-# include python files either to force generation
-add_library(proto STATIC ${proto_hdrs} ${proto_srcs} ${proto_python})
-set(Caffe_LINKER_LIBS proto ${Caffe_LINKER_LIBS}) # note, crucial to prepend!
-caffe_default_properties(proto)
-
-# --[ Caffe library
-
-
-file(GLOB_RECURSE srcs ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
-
-add_library(caffe STATIC ${srcs})
-target_link_libraries(caffe proto ${Caffe_LINKER_LIBS})
-caffe_default_properties(caffe)
-
-#INSTALL(TARGETS caffe DESTINATION lib)
diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp
deleted file mode 100644
index 603e52f..0000000
--- a/src/caffe/blob.cpp
+++ /dev/null
@@ -1,561 +0,0 @@
-#include <climits>
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/common.hpp"
-#include "caffe/syncedmem.hpp"
-#include "caffe/util/math_functions.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void Blob<Dtype>::Reshape(const int num, const int channels, const int height,
-    const int width) {
-  vector<int> shape(4);
-  shape[0] = num;
-  shape[1] = channels;
-  shape[2] = height;
-  shape[3] = width;
-  Reshape(shape);
-}
-
-template <typename Dtype>
-void Blob<Dtype>::Reshape(const vector<int>& shape) {
-  CHECK_LE(shape.size(), kMaxBlobAxes);
-  count_ = 1;
-  shape_.resize(shape.size());
-  if (!shape_data_ || shape_data_->size() < shape.size() * sizeof(int)) {
-    shape_data_.reset(new SyncedMemory(shape.size() * sizeof(int)));
-  }
-  int* shape_data = static_cast<int*>(shape_data_->mutable_cpu_data());
-  for (int i = 0; i < shape.size(); ++i) {
-    CHECK_GE(shape[i], 0);
-    if (count_ != 0) {
-      CHECK_LE(shape[i], INT_MAX / count_) << "blob size exceeds INT_MAX";
-    }
-    count_ *= shape[i];
-    shape_[i] = shape[i];
-    shape_data[i] = shape[i];
-  }
-  if (count_ > capacity_) {
-    capacity_ = count_;
-    data_.reset(new SyncedMemory(capacity_ * sizeof(Dtype)));
-    diff_.reset(new SyncedMemory(capacity_ * sizeof(Dtype)));
-  }
-}
-
-template <typename Dtype>
-void Blob<Dtype>::Reshape(const BlobShape& shape) {
-  CHECK_LE(shape.dim_size(), kMaxBlobAxes);
-  vector<int> shape_vec(shape.dim_size());
-  for (int i = 0; i < shape.dim_size(); ++i) {
-    shape_vec[i] = shape.dim(i);
-  }
-  Reshape(shape_vec);
-}
-
-template <typename Dtype>
-void Blob<Dtype>::ReshapeLike(const Blob<Dtype>& other) {
-  Reshape(other.shape());
-}
-
-template <typename Dtype>
-Blob<Dtype>::Blob(const int num, const int channels, const int height,
-    const int width)
-  // capacity_ must be initialized before calling Reshape
-  : capacity_(0) {
-  Reshape(num, channels, height, width);
-}
-
-template <typename Dtype>
-Blob<Dtype>::Blob(const vector<int>& shape)
-  // capacity_ must be initialized before calling Reshape
-  : capacity_(0) {
-  Reshape(shape);
-}
-
-template <typename Dtype>
-const int* Blob<Dtype>::gpu_shape() const {
-  CHECK(shape_data_);
-  return (const int*)shape_data_->gpu_data();
-}
-
-template <typename Dtype>
-const Dtype* Blob<Dtype>::cpu_data() const {
-  CHECK(data_);
-  return (const Dtype*)data_->cpu_data();
-}
-
-template <typename Dtype>
-void Blob<Dtype>::set_cpu_data(Dtype* data) {
-  CHECK(data);
-  // Make sure CPU and GPU sizes remain equal
-  size_t size = count_ * sizeof(Dtype);
-  if (data_->size() != size) {
-    data_.reset(new SyncedMemory(size));
-    diff_.reset(new SyncedMemory(size));
-  }
-  data_->set_cpu_data(data);
-}
-
-template <typename Dtype>
-const Dtype* Blob<Dtype>::gpu_data() const {
-  CHECK(data_);
-  return (const Dtype*)data_->gpu_data();
-}
-
-template <typename Dtype>
-void Blob<Dtype>::set_gpu_data(Dtype* data) {
-  CHECK(data);
-  // Make sure CPU and GPU sizes remain equal
-  size_t size = count_ * sizeof(Dtype);
-  if (data_->size() != size) {
-    data_.reset(new SyncedMemory(size));
-    diff_.reset(new SyncedMemory(size));
-  }
-  data_->set_gpu_data(data);
-}
-
-template <typename Dtype>
-const Dtype* Blob<Dtype>::cpu_diff() const {
-  CHECK(diff_);
-  return (const Dtype*)diff_->cpu_data();
-}
-
-template <typename Dtype>
-const Dtype* Blob<Dtype>::gpu_diff() const {
-  CHECK(diff_);
-  return (const Dtype*)diff_->gpu_data();
-}
-
-template <typename Dtype>
-Dtype* Blob<Dtype>::mutable_cpu_data() {
-  CHECK(data_);
-  return static_cast<Dtype*>(data_->mutable_cpu_data());
-}
-
-template <typename Dtype>
-Dtype* Blob<Dtype>::mutable_gpu_data() {
-  CHECK(data_);
-  return static_cast<Dtype*>(data_->mutable_gpu_data());
-}
-
-template <typename Dtype>
-Dtype* Blob<Dtype>::mutable_cpu_diff() {
-  CHECK(diff_);
-  return static_cast<Dtype*>(diff_->mutable_cpu_data());
-}
-
-template <typename Dtype>
-Dtype* Blob<Dtype>::mutable_gpu_diff() {
-  CHECK(diff_);
-  return static_cast<Dtype*>(diff_->mutable_gpu_data());
-}
-
-template <typename Dtype>
-void Blob<Dtype>::ShareData(const Blob& other) {
-  CHECK_EQ(count_, other.count());
-  data_ = other.data();
-}
-
-template <typename Dtype>
-void Blob<Dtype>::ShareDiff(const Blob& other) {
-  CHECK_EQ(count_, other.count());
-  diff_ = other.diff();
-}
-
-// The "update" method is used for parameter blobs in a Net, which are stored
-// as Blob<float> or Blob<double> -- hence we do not define it for
-// Blob<int> or Blob<unsigned int>.
-template <> void Blob<unsigned int>::Update() { NOT_IMPLEMENTED; }
-template <> void Blob<int>::Update() { NOT_IMPLEMENTED; }
-
-template <typename Dtype>
-void Blob<Dtype>::Update() {
-  // We will perform update based on where the data is located.
-  switch (data_->head()) {
-  case SyncedMemory::HEAD_AT_CPU:
-    // perform computation on CPU
-    caffe_axpy<Dtype>(count_, Dtype(-1),
-        static_cast<const Dtype*>(diff_->cpu_data()),
-        static_cast<Dtype*>(data_->mutable_cpu_data()));
-    break;
-  case SyncedMemory::HEAD_AT_GPU:
-  case SyncedMemory::SYNCED:
-#ifndef CPU_ONLY
-    // perform computation on GPU
-    caffe_gpu_axpy<Dtype>(count_, Dtype(-1),
-        static_cast<const Dtype*>(diff_->gpu_data()),
-        static_cast<Dtype*>(data_->mutable_gpu_data()));
-#else
-    NO_GPU;
-#endif
-    break;
-  default:
-    LOG(FATAL) << "Syncedmem not initialized.";
-  }
-}
-
-template <> unsigned int Blob<unsigned int>::asum_data() const {
-  NOT_IMPLEMENTED;
-  return 0;
-}
-
-template <> int Blob<int>::asum_data() const {
-  NOT_IMPLEMENTED;
-  return 0;
-}
-
-template <typename Dtype>
-Dtype Blob<Dtype>::asum_data() const {
-  if (!data_) { return 0; }
-  switch (data_->head()) {
-  case SyncedMemory::HEAD_AT_CPU:
-    return caffe_cpu_asum(count_, cpu_data());
-  case SyncedMemory::HEAD_AT_GPU:
-  case SyncedMemory::SYNCED:
-#ifndef CPU_ONLY
-  {
-    Dtype asum;
-    caffe_gpu_asum(count_, gpu_data(), &asum);
-    return asum;
-  }
-#else
-    NO_GPU;
-#endif
-  case SyncedMemory::UNINITIALIZED:
-    return 0;
-  default:
-    LOG(FATAL) << "Unknown SyncedMemory head state: " << data_->head();
-  }
-  return 0;
-}
-
-template <> unsigned int Blob<unsigned int>::asum_diff() const {
-  NOT_IMPLEMENTED;
-  return 0;
-}
-
-template <> int Blob<int>::asum_diff() const {
-  NOT_IMPLEMENTED;
-  return 0;
-}
-
-template <typename Dtype>
-Dtype Blob<Dtype>::asum_diff() const {
-  if (!diff_) { return 0; }
-  switch (diff_->head()) {
-  case SyncedMemory::HEAD_AT_CPU:
-    return caffe_cpu_asum(count_, cpu_diff());
-  case SyncedMemory::HEAD_AT_GPU:
-  case SyncedMemory::SYNCED:
-#ifndef CPU_ONLY
-  {
-    Dtype asum;
-    caffe_gpu_asum(count_, gpu_diff(), &asum);
-    return asum;
-  }
-#else
-    NO_GPU;
-#endif
-  case SyncedMemory::UNINITIALIZED:
-    return 0;
-  default:
-    LOG(FATAL) << "Unknown SyncedMemory head state: " << diff_->head();
-  }
-  return 0;
-}
-
-template <> unsigned int Blob<unsigned int>::sumsq_data() const {
-  NOT_IMPLEMENTED;
-  return 0;
-}
-
-template <> int Blob<int>::sumsq_data() const {
-  NOT_IMPLEMENTED;
-  return 0;
-}
-
-template <typename Dtype>
-Dtype Blob<Dtype>::sumsq_data() const {
-  Dtype sumsq;
-  const Dtype* data;
-  if (!data_) { return 0; }
-  switch (data_->head()) {
-  case SyncedMemory::HEAD_AT_CPU:
-    data = cpu_data();
-    sumsq = caffe_cpu_dot(count_, data, data);
-    break;
-  case SyncedMemory::HEAD_AT_GPU:
-  case SyncedMemory::SYNCED:
-#ifndef CPU_ONLY
-    data = gpu_data();
-    caffe_gpu_dot(count_, data, data, &sumsq);
-#else
-    NO_GPU;
-#endif
-    break;
-  case SyncedMemory::UNINITIALIZED:
-    return 0;
-  default:
-    LOG(FATAL) << "Unknown SyncedMemory head state: " << data_->head();
-  }
-  return sumsq;
-}
-
-template <> unsigned int Blob<unsigned int>::sumsq_diff() const {
-  NOT_IMPLEMENTED;
-  return 0;
-}
-
-template <> int Blob<int>::sumsq_diff() const {
-  NOT_IMPLEMENTED;
-  return 0;
-}
-
-template <typename Dtype>
-Dtype Blob<Dtype>::sumsq_diff() const {
-  Dtype sumsq;
-  const Dtype* diff;
-  if (!diff_) { return 0; }
-  switch (diff_->head()) {
-  case SyncedMemory::HEAD_AT_CPU:
-    diff = cpu_diff();
-    sumsq = caffe_cpu_dot(count_, diff, diff);
-    break;
-  case SyncedMemory::HEAD_AT_GPU:
-  case SyncedMemory::SYNCED:
-#ifndef CPU_ONLY
-    diff = gpu_diff();
-    caffe_gpu_dot(count_, diff, diff, &sumsq);
-    break;
-#else
-    NO_GPU;
-#endif
-  case SyncedMemory::UNINITIALIZED:
-    return 0;
-  default:
-    LOG(FATAL) << "Unknown SyncedMemory head state: " << data_->head();
-  }
-  return sumsq;
-}
-
-template <> void Blob<unsigned int>::scale_data(unsigned int scale_factor) {
-  NOT_IMPLEMENTED;
-}
-
-template <> void Blob<int>::scale_data(int scale_factor) {
-  NOT_IMPLEMENTED;
-}
-
-template <typename Dtype>
-void Blob<Dtype>::scale_data(Dtype scale_factor) {
-  Dtype* data;
-  if (!data_) { return; }
-  switch (data_->head()) {
-  case SyncedMemory::HEAD_AT_CPU:
-    data = mutable_cpu_data();
-    caffe_scal(count_, scale_factor, data);
-    return;
-  case SyncedMemory::HEAD_AT_GPU:
-  case SyncedMemory::SYNCED:
-#ifndef CPU_ONLY
-    data = mutable_gpu_data();
-    caffe_gpu_scal(count_, scale_factor, data);
-    return;
-#else
-    NO_GPU;
-#endif
-  case SyncedMemory::UNINITIALIZED:
-    return;
-  default:
-    LOG(FATAL) << "Unknown SyncedMemory head state: " << data_->head();
-  }
-}
-
-template <> void Blob<unsigned int>::scale_diff(unsigned int scale_factor) {
-  NOT_IMPLEMENTED;
-}
-
-template <> void Blob<int>::scale_diff(int scale_factor) {
-  NOT_IMPLEMENTED;
-}
-
-template <typename Dtype>
-void Blob<Dtype>::scale_diff(Dtype scale_factor) {
-  Dtype* diff;
-  if (!diff_) { return; }
-  switch (diff_->head()) {
-  case SyncedMemory::HEAD_AT_CPU:
-    diff = mutable_cpu_diff();
-    caffe_scal(count_, scale_factor, diff);
-    return;
-  case SyncedMemory::HEAD_AT_GPU:
-  case SyncedMemory::SYNCED:
-#ifndef CPU_ONLY
-    diff = mutable_gpu_diff();
-    caffe_gpu_scal(count_, scale_factor, diff);
-    return;
-#else
-    NO_GPU;
-#endif
-  case SyncedMemory::UNINITIALIZED:
-    return;
-  default:
-    LOG(FATAL) << "Unknown SyncedMemory head state: " << diff_->head();
-  }
-}
-
-template <typename Dtype>
-bool Blob<Dtype>::ShapeEquals(const BlobProto& other) {
-  if (other.has_num() || other.has_channels() ||
-      other.has_height() || other.has_width()) {
-    // Using deprecated 4D Blob dimensions --
-    // shape is (num, channels, height, width).
-    // Note: we do not use the normal Blob::num(), Blob::channels(), etc.
-    // methods as these index from the beginning of the blob shape, where legacy
-    // parameter blobs were indexed from the end of the blob shape (e.g., bias
-    // Blob shape (1 x 1 x 1 x N), IP layer weight Blob shape (1 x 1 x M x N)).
-    return shape_.size() <= 4 &&
-           LegacyShape(-4) == other.num() &&
-           LegacyShape(-3) == other.channels() &&
-           LegacyShape(-2) == other.height() &&
-           LegacyShape(-1) == other.width();
-  }
-  vector<int> other_shape(other.shape().dim_size());
-  for (int i = 0; i < other.shape().dim_size(); ++i) {
-    other_shape[i] = other.shape().dim(i);
-  }
-  return shape_ == other_shape;
-}
-
-template <typename Dtype>
-void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) {
-  if (source.count() != count_ || source.shape() != shape_) {
-    if (reshape) {
-      ReshapeLike(source);
-    } else {
-      LOG(FATAL) << "Trying to copy blobs of different sizes.";
-    }
-  }
-  switch (Caffe::mode()) {
-  case Caffe::GPU:
-    if (copy_diff) {
-      caffe_copy(count_, source.gpu_diff(),
-          static_cast<Dtype*>(diff_->mutable_gpu_data()));
-    } else {
-      caffe_copy(count_, source.gpu_data(),
-          static_cast<Dtype*>(data_->mutable_gpu_data()));
-    }
-    break;
-  case Caffe::CPU:
-    if (copy_diff) {
-      caffe_copy(count_, source.cpu_diff(),
-          static_cast<Dtype*>(diff_->mutable_cpu_data()));
-    } else {
-      caffe_copy(count_, source.cpu_data(),
-          static_cast<Dtype*>(data_->mutable_cpu_data()));
-    }
-    break;
-  default:
-    LOG(FATAL) << "Unknown caffe mode.";
-  }
-}
-
-template <typename Dtype>
-void Blob<Dtype>::FromProto(const BlobProto& proto, bool reshape) {
-  if (reshape) {
-    vector<int> shape;
-    if (proto.has_num() || proto.has_channels() ||
-        proto.has_height() || proto.has_width()) {
-      // Using deprecated 4D Blob dimensions --
-      // shape is (num, channels, height, width).
-      shape.resize(4);
-      shape[0] = proto.num();
-      shape[1] = proto.channels();
-      shape[2] = proto.height();
-      shape[3] = proto.width();
-    } else {
-      shape.resize(proto.shape().dim_size());
-      for (int i = 0; i < proto.shape().dim_size(); ++i) {
-        shape[i] = proto.shape().dim(i);
-      }
-    }
-    Reshape(shape);
-  } else {
-    CHECK(ShapeEquals(proto)) << "shape mismatch (reshape not set)";
-  }
-  // copy data
-  Dtype* data_vec = mutable_cpu_data();
-  if (proto.double_data_size() > 0) {
-    CHECK_EQ(count_, proto.double_data_size());
-    for (int i = 0; i < count_; ++i) {
-      data_vec[i] = proto.double_data(i);
-    }
-  } else {
-    CHECK_EQ(count_, proto.data_size());
-    for (int i = 0; i < count_; ++i) {
-      data_vec[i] = proto.data(i);
-    }
-  }
-  if (proto.double_diff_size() > 0) {
-    CHECK_EQ(count_, proto.double_diff_size());
-    Dtype* diff_vec = mutable_cpu_diff();
-    for (int i = 0; i < count_; ++i) {
-      diff_vec[i] = proto.double_diff(i);
-    }
-  } else if (proto.diff_size() > 0) {
-    CHECK_EQ(count_, proto.diff_size());
-    Dtype* diff_vec = mutable_cpu_diff();
-    for (int i = 0; i < count_; ++i) {
-      diff_vec[i] = proto.diff(i);
-    }
-  }
-}
-
-template <>
-void Blob<double>::ToProto(BlobProto* proto, bool write_diff) const {
-  proto->clear_shape();
-  for (int i = 0; i < shape_.size(); ++i) {
-    proto->mutable_shape()->add_dim(shape_[i]);
-  }
-  proto->clear_double_data();
-  proto->clear_double_diff();
-  const double* data_vec = cpu_data();
-  for (int i = 0; i < count_; ++i) {
-    proto->add_double_data(data_vec[i]);
-  }
-  if (write_diff) {
-    const double* diff_vec = cpu_diff();
-    for (int i = 0; i < count_; ++i) {
-      proto->add_double_diff(diff_vec[i]);
-    }
-  }
-}
-
-template <>
-void Blob<float>::ToProto(BlobProto* proto, bool write_diff) const {
-  proto->clear_shape();
-  for (int i = 0; i < shape_.size(); ++i) {
-    proto->mutable_shape()->add_dim(shape_[i]);
-  }
-  proto->clear_data();
-  proto->clear_diff();
-  const float* data_vec = cpu_data();
-  for (int i = 0; i < count_; ++i) {
-    proto->add_data(data_vec[i]);
-  }
-  if (write_diff) {
-    const float* diff_vec = cpu_diff();
-    for (int i = 0; i < count_; ++i) {
-      proto->add_diff(diff_vec[i]);
-    }
-  }
-}
-
-INSTANTIATE_CLASS(Blob);
-template class Blob<int>;
-template class Blob<unsigned int>;
-
-}  // namespace caffe
-
diff --git a/src/caffe/common.cpp b/src/caffe/common.cpp
deleted file mode 100644
index 4856192..0000000
--- a/src/caffe/common.cpp
+++ /dev/null
@@ -1,353 +0,0 @@
-#ifdef USE_BOOST
-#include <boost/thread.hpp>
-#endif
-#ifdef USE_GLOG
-#include <glog/logging.h>
-#endif
-#include <cmath>
-#include <cstdio>
-#include <ctime>
-
-#include "caffe/common.hpp"
-#include "caffe/util/rng.hpp"
-
-#ifndef NO_CAFFE_MOBILE
-#include <unistd.h>
-#endif
-
-namespace caffe {
-
-#ifdef USE_BOOST
-// Make sure each thread can have different values.
-static boost::thread_specific_ptr<Caffe> thread_instance_;
-#else
-thread_local static Caffe *thread_instance_ = NULL;
-#endif
-
-#ifndef USE_GLOG
-nullstream __nullstream;
-bool LogMessage::enable = true;
-#endif
-
-Caffe& Caffe::Get() {
-#ifdef USE_BOOST
-  if (!thread_instance_.get()) {
-    thread_instance_.reset(new Caffe());
-  }
-  return *(thread_instance_.get());
-#else
-  if (thread_instance_ == NULL) {
-      thread_instance_ = new Caffe();
-  }
-  return *thread_instance_;
-#endif
-}
-
-// random seeding
-int64_t cluster_seedgen(void) {
-  int64_t s, seed, pid;
-  FILE* f = fopen("/dev/urandom", "rb");
-  if (f && fread(&seed, 1, sizeof(seed), f) == sizeof(seed)) {
-    fclose(f);
-    return seed;
-  }
-
-  LOG(INFO) << "System entropy source not available, "
-              "using fallback algorithm to generate seed instead.";
-  if (f)
-    fclose(f);
-
-  pid = getpid();
-  s = time(NULL);
-  seed = std::abs(((s * 181) * ((pid - 83) * 359)) % 104729);
-  return seed;
-}
-
-
-void GlobalInit(int* pargc, char*** pargv) {
-#ifdef NO_CAFFE_MOBILE
-  // Google flags.
-  ::gflags::ParseCommandLineFlags(pargc, pargv, true);
-#endif
-#ifdef USE_GLOG
-  // Google logging.
-  ::google::InitGoogleLogging(*(pargv)[0]);
-  // Provide a backtrace on segfault.
-  ::google::InstallFailureSignalHandler();
-#endif
-}
-
-#ifdef CPU_ONLY  // CPU-only Caffe.
-
-Caffe::Caffe()
-    : random_generator_(), mode_(Caffe::CPU),
-      solver_count_(1), solver_rank_(0), multiprocess_(false) { }
-
-Caffe::~Caffe() { }
-
-void Caffe::set_random_seed(const unsigned int seed) {
-  // RNG seed
-  Get().random_generator_.reset(new RNG(seed));
-}
-
-void Caffe::SetDevice(const int device_id) {
-  NO_GPU;
-}
-
-void Caffe::DeviceQuery() {
-  NO_GPU;
-}
-
-bool Caffe::CheckDevice(const int device_id) {
-  NO_GPU;
-  return false;
-}
-
-int Caffe::FindDevice(const int start_id) {
-  NO_GPU;
-  return -1;
-}
-
-class Caffe::RNG::Generator {
- public:
-  Generator() : rng_(new caffe::rng_t(cluster_seedgen())) {}
-  explicit Generator(unsigned int seed) : rng_(new caffe::rng_t(seed)) {}
-  caffe::rng_t* rng() { return rng_.get(); }
- private:
-  shared_ptr<caffe::rng_t> rng_;
-};
-
-Caffe::RNG::RNG() : generator_(new Generator()) { }
-
-Caffe::RNG::RNG(unsigned int seed) : generator_(new Generator(seed)) { }
-
-Caffe::RNG& Caffe::RNG::operator=(const RNG& other) {
-  generator_ = other.generator_;
-  return *this;
-}
-
-void* Caffe::RNG::generator() {
-  return static_cast<void*>(generator_->rng());
-}
-
-#else  // Normal GPU + CPU Caffe.
-
-Caffe::Caffe()
-    : cublas_handle_(NULL), curand_generator_(NULL), random_generator_(),
-    mode_(Caffe::CPU),
-    solver_count_(1), solver_rank_(0), multiprocess_(false) {
-  // Try to create a cublas handler, and report an error if failed (but we will
-  // keep the program running as one might just want to run CPU code).
-  if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
-    LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available.";
-  }
-  // Try to create a curand handler.
-  if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)
-      != CURAND_STATUS_SUCCESS ||
-      curandSetPseudoRandomGeneratorSeed(curand_generator_, cluster_seedgen())
-      != CURAND_STATUS_SUCCESS) {
-    LOG(ERROR) << "Cannot create Curand generator. Curand won't be available.";
-  }
-}
-
-Caffe::~Caffe() {
-  if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_));
-  if (curand_generator_) {
-    CURAND_CHECK(curandDestroyGenerator(curand_generator_));
-  }
-}
-
-void Caffe::set_random_seed(const unsigned int seed) {
-  // Curand seed
-  static bool g_curand_availability_logged = false;
-  if (Get().curand_generator_) {
-    CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(curand_generator(),
-        seed));
-    CURAND_CHECK(curandSetGeneratorOffset(curand_generator(), 0));
-  } else {
-    if (!g_curand_availability_logged) {
-        LOG(ERROR) <<
-            "Curand not available. Skipping setting the curand seed.";
-        g_curand_availability_logged = true;
-    }
-  }
-  // RNG seed
-  Get().random_generator_.reset(new RNG(seed));
-}
-
-void Caffe::SetDevice(const int device_id) {
-  int current_device;
-  CUDA_CHECK(cudaGetDevice(&current_device));
-  if (current_device == device_id) {
-    return;
-  }
-  // The call to cudaSetDevice must come before any calls to Get, which
-  // may perform initialization using the GPU.
-  CUDA_CHECK(cudaSetDevice(device_id));
-  if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
-  if (Get().curand_generator_) {
-    CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
-  }
-  CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
-  CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
-      CURAND_RNG_PSEUDO_DEFAULT));
-  CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
-      cluster_seedgen()));
-}
-
-void Caffe::DeviceQuery() {
-  cudaDeviceProp prop;
-  int device;
-  if (cudaSuccess != cudaGetDevice(&device)) {
-    printf("No cuda device present.\n");
-    return;
-  }
-  CUDA_CHECK(cudaGetDeviceProperties(&prop, device));
-  LOG(INFO) << "Device id:                     " << device;
-  LOG(INFO) << "Major revision number:         " << prop.major;
-  LOG(INFO) << "Minor revision number:         " << prop.minor;
-  LOG(INFO) << "Name:                          " << prop.name;
-  LOG(INFO) << "Total global memory:           " << prop.totalGlobalMem;
-  LOG(INFO) << "Total shared memory per block: " << prop.sharedMemPerBlock;
-  LOG(INFO) << "Total registers per block:     " << prop.regsPerBlock;
-  LOG(INFO) << "Warp size:                     " << prop.warpSize;
-  LOG(INFO) << "Maximum memory pitch:          " << prop.memPitch;
-  LOG(INFO) << "Maximum threads per block:     " << prop.maxThreadsPerBlock;
-  LOG(INFO) << "Maximum dimension of block:    "
-      << prop.maxThreadsDim[0] << ", " << prop.maxThreadsDim[1] << ", "
-      << prop.maxThreadsDim[2];
-  LOG(INFO) << "Maximum dimension of grid:     "
-      << prop.maxGridSize[0] << ", " << prop.maxGridSize[1] << ", "
-      << prop.maxGridSize[2];
-  LOG(INFO) << "Clock rate:                    " << prop.clockRate;
-  LOG(INFO) << "Total constant memory:         " << prop.totalConstMem;
-  LOG(INFO) << "Texture alignment:             " << prop.textureAlignment;
-  LOG(INFO) << "Concurrent copy and execution: "
-      << (prop.deviceOverlap ? "Yes" : "No");
-  LOG(INFO) << "Number of multiprocessors:     " << prop.multiProcessorCount;
-  LOG(INFO) << "Kernel execution timeout:      "
-      << (prop.kernelExecTimeoutEnabled ? "Yes" : "No");
-  return;
-}
-
-bool Caffe::CheckDevice(const int device_id) {
-  // This function checks the availability of GPU #device_id.
-  // It attempts to create a context on the device by calling cudaFree(0).
-  // cudaSetDevice() alone is not sufficient to check the availability.
-  // It lazily records device_id, however, does not initialize a
-  // context. So it does not know if the host thread has the permission to use
-  // the device or not.
-  //
-  // In a shared environment where the devices are set to EXCLUSIVE_PROCESS
-  // or EXCLUSIVE_THREAD mode, cudaSetDevice() returns cudaSuccess
-  // even if the device is exclusively occupied by another process or thread.
-  // Cuda operations that initialize the context are needed to check
-  // the permission. cudaFree(0) is one of those with no side effect,
-  // except the context initialization.
-  bool r = ((cudaSuccess == cudaSetDevice(device_id)) &&
-            (cudaSuccess == cudaFree(0)));
-  // reset any error that may have occurred.
-  cudaGetLastError();
-  return r;
-}
-
-int Caffe::FindDevice(const int start_id) {
-  // This function finds the first available device by checking devices with
-  // ordinal from start_id to the highest available value. In the
-  // EXCLUSIVE_PROCESS or EXCLUSIVE_THREAD mode, if it succeeds, it also
-  // claims the device due to the initialization of the context.
-  int count = 0;
-  CUDA_CHECK(cudaGetDeviceCount(&count));
-  for (int i = start_id; i < count; i++) {
-    if (CheckDevice(i)) return i;
-  }
-  return -1;
-}
-
-class Caffe::RNG::Generator {
- public:
-  Generator() : rng_(new caffe::rng_t(cluster_seedgen())) {}
-  explicit Generator(unsigned int seed) : rng_(new caffe::rng_t(seed)) {}
-  caffe::rng_t* rng() { return rng_.get(); }
- private:
-  shared_ptr<caffe::rng_t> rng_;
-};
-
-Caffe::RNG::RNG() : generator_(new Generator()) { }
-
-Caffe::RNG::RNG(unsigned int seed) : generator_(new Generator(seed)) { }
-
-Caffe::RNG& Caffe::RNG::operator=(const RNG& other) {
-  generator_.reset(other.generator_.get());
-  return *this;
-}
-
-void* Caffe::RNG::generator() {
-  return static_cast<void*>(generator_->rng());
-}
-
-const char* cublasGetErrorString(cublasStatus_t error) {
-  switch (error) {
-  case CUBLAS_STATUS_SUCCESS:
-    return "CUBLAS_STATUS_SUCCESS";
-  case CUBLAS_STATUS_NOT_INITIALIZED:
-    return "CUBLAS_STATUS_NOT_INITIALIZED";
-  case CUBLAS_STATUS_ALLOC_FAILED:
-    return "CUBLAS_STATUS_ALLOC_FAILED";
-  case CUBLAS_STATUS_INVALID_VALUE:
-    return "CUBLAS_STATUS_INVALID_VALUE";
-  case CUBLAS_STATUS_ARCH_MISMATCH:
-    return "CUBLAS_STATUS_ARCH_MISMATCH";
-  case CUBLAS_STATUS_MAPPING_ERROR:
-    return "CUBLAS_STATUS_MAPPING_ERROR";
-  case CUBLAS_STATUS_EXECUTION_FAILED:
-    return "CUBLAS_STATUS_EXECUTION_FAILED";
-  case CUBLAS_STATUS_INTERNAL_ERROR:
-    return "CUBLAS_STATUS_INTERNAL_ERROR";
-#if CUDA_VERSION >= 6000
-  case CUBLAS_STATUS_NOT_SUPPORTED:
-    return "CUBLAS_STATUS_NOT_SUPPORTED";
-#endif
-#if CUDA_VERSION >= 6050
-  case CUBLAS_STATUS_LICENSE_ERROR:
-    return "CUBLAS_STATUS_LICENSE_ERROR";
-#endif
-  }
-  return "Unknown cublas status";
-}
-
-const char* curandGetErrorString(curandStatus_t error) {
-  switch (error) {
-  case CURAND_STATUS_SUCCESS:
-    return "CURAND_STATUS_SUCCESS";
-  case CURAND_STATUS_VERSION_MISMATCH:
-    return "CURAND_STATUS_VERSION_MISMATCH";
-  case CURAND_STATUS_NOT_INITIALIZED:
-    return "CURAND_STATUS_NOT_INITIALIZED";
-  case CURAND_STATUS_ALLOCATION_FAILED:
-    return "CURAND_STATUS_ALLOCATION_FAILED";
-  case CURAND_STATUS_TYPE_ERROR:
-    return "CURAND_STATUS_TYPE_ERROR";
-  case CURAND_STATUS_OUT_OF_RANGE:
-    return "CURAND_STATUS_OUT_OF_RANGE";
-  case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
-    return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
-  case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
-    return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
-  case CURAND_STATUS_LAUNCH_FAILURE:
-    return "CURAND_STATUS_LAUNCH_FAILURE";
-  case CURAND_STATUS_PREEXISTING_FAILURE:
-    return "CURAND_STATUS_PREEXISTING_FAILURE";
-  case CURAND_STATUS_INITIALIZATION_FAILED:
-    return "CURAND_STATUS_INITIALIZATION_FAILED";
-  case CURAND_STATUS_ARCH_MISMATCH:
-    return "CURAND_STATUS_ARCH_MISMATCH";
-  case CURAND_STATUS_INTERNAL_ERROR:
-    return "CURAND_STATUS_INTERNAL_ERROR";
-  }
-  return "Unknown curand status";
-}
-
-#endif  // CPU_ONLY
-
-}  // namespace caffe
diff --git a/src/caffe/data_transformer.cpp b/src/caffe/data_transformer.cpp
deleted file mode 100644
index 3012251..0000000
--- a/src/caffe/data_transformer.cpp
+++ /dev/null
@@ -1,545 +0,0 @@
-#ifdef USE_OPENCV
-#include <opencv2/core/core.hpp>
-#endif  // USE_OPENCV
-
-#include <string>
-#include <vector>
-
-#include "caffe/data_transformer.hpp"
-#include "caffe/util/io.hpp"
-#include "caffe/util/math_functions.hpp"
-#include "caffe/util/rng.hpp"
-
-namespace caffe {
-
-template<typename Dtype>
-DataTransformer<Dtype>::DataTransformer(const TransformationParameter& param,
-    Phase phase)
-    : param_(param), phase_(phase) {
-  // check if we want to use mean_file
-  if (param_.has_mean_file()) {
-    CHECK_EQ(param_.mean_value_size(), 0) <<
-      "Cannot specify mean_file and mean_value at the same time";
-    const string& mean_file = param.mean_file();
-    if (Caffe::root_solver()) {
-      LOG(INFO) << "Loading mean file from: " << mean_file;
-    }
-    BlobProto blob_proto;
-    ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
-    data_mean_.FromProto(blob_proto);
-  }
-  // check if we want to use mean_value
-  if (param_.mean_value_size() > 0) {
-    CHECK(param_.has_mean_file() == false) <<
-      "Cannot specify mean_file and mean_value at the same time";
-    for (int c = 0; c < param_.mean_value_size(); ++c) {
-      mean_values_.push_back(param_.mean_value(c));
-    }
-  }
-}
-
-template<typename Dtype>
-void DataTransformer<Dtype>::Transform(const Datum& datum,
-                                       Dtype* transformed_data) {
-  const string& data = datum.data();
-  const int datum_channels = datum.channels();
-  const int datum_height = datum.height();
-  const int datum_width = datum.width();
-
-  const int crop_size = param_.crop_size();
-  const Dtype scale = param_.scale();
-  const bool do_mirror = param_.mirror() && Rand(2);
-  const bool has_mean_file = param_.has_mean_file();
-  const bool has_uint8 = data.size() > 0;
-  const bool has_mean_values = mean_values_.size() > 0;
-
-  CHECK_GT(datum_channels, 0);
-  CHECK_GE(datum_height, crop_size);
-  CHECK_GE(datum_width, crop_size);
-
-  Dtype* mean = NULL;
-  if (has_mean_file) {
-    CHECK_EQ(datum_channels, data_mean_.channels());
-    CHECK_EQ(datum_height, data_mean_.height());
-    CHECK_EQ(datum_width, data_mean_.width());
-    mean = data_mean_.mutable_cpu_data();
-  }
-  if (has_mean_values) {
-    CHECK(mean_values_.size() == 1 || mean_values_.size() == datum_channels) <<
-     "Specify either 1 mean_value or as many as channels: " << datum_channels;
-    if (datum_channels > 1 && mean_values_.size() == 1) {
-      // Replicate the mean_value for simplicity
-      for (int c = 1; c < datum_channels; ++c) {
-        mean_values_.push_back(mean_values_[0]);
-      }
-    }
-  }
-
-  int height = datum_height;
-  int width = datum_width;
-
-  int h_off = 0;
-  int w_off = 0;
-  if (crop_size) {
-    height = crop_size;
-    width = crop_size;
-    // We only do random crop when we do training.
-    if (phase_ == TRAIN) {
-      h_off = Rand(datum_height - crop_size + 1);
-      w_off = Rand(datum_width - crop_size + 1);
-    } else {
-      h_off = (datum_height - crop_size) / 2;
-      w_off = (datum_width - crop_size) / 2;
-    }
-  }
-
-  Dtype datum_element;
-  int top_index, data_index;
-  for (int c = 0; c < datum_channels; ++c) {
-    for (int h = 0; h < height; ++h) {
-      for (int w = 0; w < width; ++w) {
-        data_index = (c * datum_height + h_off + h) * datum_width + w_off + w;
-        if (do_mirror) {
-          top_index = (c * height + h) * width + (width - 1 - w);
-        } else {
-          top_index = (c * height + h) * width + w;
-        }
-        if (has_uint8) {
-          datum_element =
-            static_cast<Dtype>(static_cast<uint8_t>(data[data_index]));
-        } else {
-          datum_element = datum.float_data(data_index);
-        }
-        if (has_mean_file) {
-          transformed_data[top_index] =
-            (datum_element - mean[data_index]) * scale;
-        } else {
-          if (has_mean_values) {
-            transformed_data[top_index] =
-              (datum_element - mean_values_[c]) * scale;
-          } else {
-            transformed_data[top_index] = datum_element * scale;
-          }
-        }
-      }
-    }
-  }
-}
-
-
-template<typename Dtype>
-void DataTransformer<Dtype>::Transform(const Datum& datum,
-                                       Blob<Dtype>* transformed_blob) {
-  // If datum is encoded, decode and transform the cv::image.
-  if (datum.encoded()) {
-#ifdef USE_OPENCV
-    CHECK(!(param_.force_color() && param_.force_gray()))
-        << "cannot set both force_color and force_gray";
-    cv::Mat cv_img;
-    if (param_.force_color() || param_.force_gray()) {
-    // If force_color then decode in color otherwise decode in gray.
-      cv_img = DecodeDatumToCVMat(datum, param_.force_color());
-    } else {
-      cv_img = DecodeDatumToCVMatNative(datum);
-    }
-    // Transform the cv::image into blob.
-    return Transform(cv_img, transformed_blob);
-#else
-    LOG(FATAL) << "Encoded datum requires OpenCV; compile with USE_OPENCV.";
-#endif  // USE_OPENCV
-  } else {
-    if (param_.force_color() || param_.force_gray()) {
-      LOG(ERROR) << "force_color and force_gray only for encoded datum";
-    }
-  }
-
-  const int crop_size = param_.crop_size();
-  const int datum_channels = datum.channels();
-  const int datum_height = datum.height();
-  const int datum_width = datum.width();
-
-  // Check dimensions.
-  const int channels = transformed_blob->channels();
-  const int height = transformed_blob->height();
-  const int width = transformed_blob->width();
-  const int num = transformed_blob->num();
-
-  CHECK_EQ(channels, datum_channels);
-  CHECK_LE(height, datum_height);
-  CHECK_LE(width, datum_width);
-  CHECK_GE(num, 1);
-
-  if (crop_size) {
-    CHECK_EQ(crop_size, height);
-    CHECK_EQ(crop_size, width);
-  } else {
-    CHECK_EQ(datum_height, height);
-    CHECK_EQ(datum_width, width);
-  }
-
-  Dtype* transformed_data = transformed_blob->mutable_cpu_data();
-  Transform(datum, transformed_data);
-}
-
-template<typename Dtype>
-void DataTransformer<Dtype>::Transform(const vector<Datum> & datum_vector,
-                                       Blob<Dtype>* transformed_blob) {
-  const int datum_num = datum_vector.size();
-  const int num = transformed_blob->num();
-  const int channels = transformed_blob->channels();
-  const int height = transformed_blob->height();
-  const int width = transformed_blob->width();
-
-  CHECK_GT(datum_num, 0) << "There is no datum to add";
-  CHECK_LE(datum_num, num) <<
-    "The size of datum_vector must be no greater than transformed_blob->num()";
-  Blob<Dtype> uni_blob(1, channels, height, width);
-  for (int item_id = 0; item_id < datum_num; ++item_id) {
-    int offset = transformed_blob->offset(item_id);
-    uni_blob.set_cpu_data(transformed_blob->mutable_cpu_data() + offset);
-    Transform(datum_vector[item_id], &uni_blob);
-  }
-}
-
-#ifdef USE_OPENCV
-template<typename Dtype>
-void DataTransformer<Dtype>::Transform(const vector<cv::Mat> & mat_vector,
-                                       Blob<Dtype>* transformed_blob) {
-  const int mat_num = mat_vector.size();
-  const int num = transformed_blob->num();
-  const int channels = transformed_blob->channels();
-  const int height = transformed_blob->height();
-  const int width = transformed_blob->width();
-
-  CHECK_GT(mat_num, 0) << "There is no MAT to add";
-  CHECK_EQ(mat_num, num) <<
-    "The size of mat_vector must be equals to transformed_blob->num()";
-  Blob<Dtype> uni_blob(1, channels, height, width);
-  for (int item_id = 0; item_id < mat_num; ++item_id) {
-    int offset = transformed_blob->offset(item_id);
-    uni_blob.set_cpu_data(transformed_blob->mutable_cpu_data() + offset);
-    Transform(mat_vector[item_id], &uni_blob);
-  }
-}
-
-template<typename Dtype>
-void DataTransformer<Dtype>::Transform(const cv::Mat& cv_img,
-                                       Blob<Dtype>* transformed_blob) {
-  const int crop_size = param_.crop_size();
-  const int img_channels = cv_img.channels();
-  const int img_height = cv_img.rows;
-  const int img_width = cv_img.cols;
-
-  // Check dimensions.
-  const int channels = transformed_blob->channels();
-  const int height = transformed_blob->height();
-  const int width = transformed_blob->width();
-  const int num = transformed_blob->num();
-
-  CHECK_EQ(channels, img_channels);
-  CHECK_LE(height, img_height);
-  CHECK_LE(width, img_width);
-  CHECK_GE(num, 1);
-
-  CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte";
-
-  const Dtype scale = param_.scale();
-  const bool do_mirror = param_.mirror() && Rand(2);
-  const bool has_mean_file = param_.has_mean_file();
-  const bool has_mean_values = mean_values_.size() > 0;
-
-  CHECK_GT(img_channels, 0);
-  CHECK_GE(img_height, crop_size);
-  CHECK_GE(img_width, crop_size);
-
-  Dtype* mean = NULL;
-  if (has_mean_file) {
-    CHECK_EQ(img_channels, data_mean_.channels());
-    CHECK_EQ(img_height, data_mean_.height());
-    CHECK_EQ(img_width, data_mean_.width());
-    mean = data_mean_.mutable_cpu_data();
-  }
-  if (has_mean_values) {
-    CHECK(mean_values_.size() == 1 || mean_values_.size() == img_channels) <<
-     "Specify either 1 mean_value or as many as channels: " << img_channels;
-    if (img_channels > 1 && mean_values_.size() == 1) {
-      // Replicate the mean_value for simplicity
-      for (int c = 1; c < img_channels; ++c) {
-        mean_values_.push_back(mean_values_[0]);
-      }
-    }
-  }
-
-  int h_off = 0;
-  int w_off = 0;
-  cv::Mat cv_cropped_img = cv_img;
-  if (crop_size) {
-    CHECK_EQ(crop_size, height);
-    CHECK_EQ(crop_size, width);
-    // We only do random crop when we do training.
-    if (phase_ == TRAIN) {
-      h_off = Rand(img_height - crop_size + 1);
-      w_off = Rand(img_width - crop_size + 1);
-    } else {
-      h_off = (img_height - crop_size) / 2;
-      w_off = (img_width - crop_size) / 2;
-    }
-    cv::Rect roi(w_off, h_off, crop_size, crop_size);
-    cv_cropped_img = cv_img(roi);
-  } else {
-    CHECK_EQ(img_height, height);
-    CHECK_EQ(img_width, width);
-  }
-
-  CHECK(cv_cropped_img.data);
-
-  Dtype* transformed_data = transformed_blob->mutable_cpu_data();
-  int top_index;
-  for (int h = 0; h < height; ++h) {
-    const uchar* ptr = cv_cropped_img.ptr<uchar>(h);
-    int img_index = 0;
-    for (int w = 0; w < width; ++w) {
-      for (int c = 0; c < img_channels; ++c) {
-        if (do_mirror) {
-          top_index = (c * height + h) * width + (width - 1 - w);
-        } else {
-          top_index = (c * height + h) * width + w;
-        }
-        // int top_index = (c * height + h) * width + w;
-        Dtype pixel = static_cast<Dtype>(ptr[img_index++]);
-        if (has_mean_file) {
-          int mean_index = (c * img_height + h_off + h) * img_width + w_off + w;
-          transformed_data[top_index] =
-            (pixel - mean[mean_index]) * scale;
-        } else {
-          if (has_mean_values) {
-            transformed_data[top_index] =
-              (pixel - mean_values_[c]) * scale;
-          } else {
-            transformed_data[top_index] = pixel * scale;
-          }
-        }
-      }
-    }
-  }
-}
-#endif  // USE_OPENCV
-
-template<typename Dtype>
-void DataTransformer<Dtype>::Transform(Blob<Dtype>* input_blob,
-                                       Blob<Dtype>* transformed_blob) {
-  const int crop_size = param_.crop_size();
-  const int input_num = input_blob->num();
-  const int input_channels = input_blob->channels();
-  const int input_height = input_blob->height();
-  const int input_width = input_blob->width();
-
-  if (transformed_blob->count() == 0) {
-    // Initialize transformed_blob with the right shape.
-    if (crop_size) {
-      transformed_blob->Reshape(input_num, input_channels,
-                                crop_size, crop_size);
-    } else {
-      transformed_blob->Reshape(input_num, input_channels,
-                                input_height, input_width);
-    }
-  }
-
-  const int num = transformed_blob->num();
-  const int channels = transformed_blob->channels();
-  const int height = transformed_blob->height();
-  const int width = transformed_blob->width();
-  const int size = transformed_blob->count();
-
-  CHECK_LE(input_num, num);
-  CHECK_EQ(input_channels, channels);
-  CHECK_GE(input_height, height);
-  CHECK_GE(input_width, width);
-
-
-  const Dtype scale = param_.scale();
-  const bool do_mirror = param_.mirror() && Rand(2);
-  const bool has_mean_file = param_.has_mean_file();
-  const bool has_mean_values = mean_values_.size() > 0;
-
-  int h_off = 0;
-  int w_off = 0;
-  if (crop_size) {
-    CHECK_EQ(crop_size, height);
-    CHECK_EQ(crop_size, width);
-    // We only do random crop when we do training.
-    if (phase_ == TRAIN) {
-      h_off = Rand(input_height - crop_size + 1);
-      w_off = Rand(input_width - crop_size + 1);
-    } else {
-      h_off = (input_height - crop_size) / 2;
-      w_off = (input_width - crop_size) / 2;
-    }
-  } else {
-    CHECK_EQ(input_height, height);
-    CHECK_EQ(input_width, width);
-  }
-
-  Dtype* input_data = input_blob->mutable_cpu_data();
-  if (has_mean_file) {
-    CHECK_EQ(input_channels, data_mean_.channels());
-    CHECK_EQ(input_height, data_mean_.height());
-    CHECK_EQ(input_width, data_mean_.width());
-    for (int n = 0; n < input_num; ++n) {
-      int offset = input_blob->offset(n);
-      caffe_sub(data_mean_.count(), input_data + offset,
-            data_mean_.cpu_data(), input_data + offset);
-    }
-  }
-
-  if (has_mean_values) {
-    CHECK(mean_values_.size() == 1 || mean_values_.size() == input_channels) <<
-     "Specify either 1 mean_value or as many as channels: " << input_channels;
-    if (mean_values_.size() == 1) {
-      caffe_add_scalar(input_blob->count(), -(mean_values_[0]), input_data);
-    } else {
-      for (int n = 0; n < input_num; ++n) {
-        for (int c = 0; c < input_channels; ++c) {
-          int offset = input_blob->offset(n, c);
-          caffe_add_scalar(input_height * input_width, -(mean_values_[c]),
-            input_data + offset);
-        }
-      }
-    }
-  }
-
-  Dtype* transformed_data = transformed_blob->mutable_cpu_data();
-
-  for (int n = 0; n < input_num; ++n) {
-    int top_index_n = n * channels;
-    int data_index_n = n * channels;
-    for (int c = 0; c < channels; ++c) {
-      int top_index_c = (top_index_n + c) * height;
-      int data_index_c = (data_index_n + c) * input_height + h_off;
-      for (int h = 0; h < height; ++h) {
-        int top_index_h = (top_index_c + h) * width;
-        int data_index_h = (data_index_c + h) * input_width + w_off;
-        if (do_mirror) {
-          int top_index_w = top_index_h + width - 1;
-          for (int w = 0; w < width; ++w) {
-            transformed_data[top_index_w-w] = input_data[data_index_h + w];
-          }
-        } else {
-          for (int w = 0; w < width; ++w) {
-            transformed_data[top_index_h + w] = input_data[data_index_h + w];
-          }
-        }
-      }
-    }
-  }
-  if (scale != Dtype(1)) {
-    DLOG(INFO) << "Scale: " << scale;
-    caffe_scal(size, scale, transformed_data);
-  }
-}
-
-template<typename Dtype>
-vector<int> DataTransformer<Dtype>::InferBlobShape(const Datum& datum) {
-  if (datum.encoded()) {
-#ifdef USE_OPENCV
-    CHECK(!(param_.force_color() && param_.force_gray()))
-        << "cannot set both force_color and force_gray";
-    cv::Mat cv_img;
-    if (param_.force_color() || param_.force_gray()) {
-    // If force_color then decode in color otherwise decode in gray.
-      cv_img = DecodeDatumToCVMat(datum, param_.force_color());
-    } else {
-      cv_img = DecodeDatumToCVMatNative(datum);
-    }
-    // InferBlobShape using the cv::image.
-    return InferBlobShape(cv_img);
-#else
-    LOG(FATAL) << "Encoded datum requires OpenCV; compile with USE_OPENCV.";
-#endif  // USE_OPENCV
-  }
-  const int crop_size = param_.crop_size();
-  const int datum_channels = datum.channels();
-  const int datum_height = datum.height();
-  const int datum_width = datum.width();
-  // Check dimensions.
-  CHECK_GT(datum_channels, 0);
-  CHECK_GE(datum_height, crop_size);
-  CHECK_GE(datum_width, crop_size);
-  // Build BlobShape.
-  vector<int> shape(4);
-  shape[0] = 1;
-  shape[1] = datum_channels;
-  shape[2] = (crop_size)? crop_size: datum_height;
-  shape[3] = (crop_size)? crop_size: datum_width;
-  return shape;
-}
-
-template<typename Dtype>
-vector<int> DataTransformer<Dtype>::InferBlobShape(
-    const vector<Datum> & datum_vector) {
-  const int num = datum_vector.size();
-  CHECK_GT(num, 0) << "There is no datum to in the vector";
-  // Use first datum in the vector to InferBlobShape.
-  vector<int> shape = InferBlobShape(datum_vector[0]);
-  // Adjust num to the size of the vector.
-  shape[0] = num;
-  return shape;
-}
-
-#ifdef USE_OPENCV
-template<typename Dtype>
-vector<int> DataTransformer<Dtype>::InferBlobShape(const cv::Mat& cv_img) {
-  const int crop_size = param_.crop_size();
-  const int img_channels = cv_img.channels();
-  const int img_height = cv_img.rows;
-  const int img_width = cv_img.cols;
-  // Check dimensions.
-  CHECK_GT(img_channels, 0);
-  CHECK_GE(img_height, crop_size);
-  CHECK_GE(img_width, crop_size);
-  // Build BlobShape.
-  vector<int> shape(4);
-  shape[0] = 1;
-  shape[1] = img_channels;
-  shape[2] = (crop_size)? crop_size: img_height;
-  shape[3] = (crop_size)? crop_size: img_width;
-  return shape;
-}
-
-template<typename Dtype>
-vector<int> DataTransformer<Dtype>::InferBlobShape(
-    const vector<cv::Mat> & mat_vector) {
-  const int num = mat_vector.size();
-  CHECK_GT(num, 0) << "There is no cv_img to in the vector";
-  // Use first cv_img in the vector to InferBlobShape.
-  vector<int> shape = InferBlobShape(mat_vector[0]);
-  // Adjust num to the size of the vector.
-  shape[0] = num;
-  return shape;
-}
-#endif  // USE_OPENCV
-
-template <typename Dtype>
-void DataTransformer<Dtype>::InitRand() {
-  const bool needs_rand = param_.mirror() ||
-      (phase_ == TRAIN && param_.crop_size());
-  if (needs_rand) {
-    const unsigned int rng_seed = caffe_rng_rand();
-    rng_.reset(new Caffe::RNG(rng_seed));
-  } else {
-    rng_.reset();
-  }
-}
-
-template <typename Dtype>
-int DataTransformer<Dtype>::Rand(int n) {
-  CHECK(rng_);
-  CHECK_GT(n, 0);
-  caffe::rng_t* rng =
-      static_cast<caffe::rng_t*>(rng_->generator());
-  return ((*rng)() % n);
-}
-
-INSTANTIATE_CLASS(DataTransformer);
-
-}  // namespace caffe
diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp
deleted file mode 100644
index f14253a..0000000
--- a/src/caffe/layer_factory.cpp
+++ /dev/null
@@ -1,267 +0,0 @@
-// Make sure we include Python.h before any system header
-// to avoid _POSIX_C_SOURCE redefinition
-#ifdef WITH_PYTHON_LAYER
-#include <boost/python.hpp>
-#endif
-#include <string>
-
-#include "caffe/layer.hpp"
-#include "caffe/layer_factory.hpp"
-#include "caffe/layers/conv_layer.hpp"
-#include "caffe/layers/lrn_layer.hpp"
-#include "caffe/layers/pooling_layer.hpp"
-#include "caffe/layers/relu_layer.hpp"
-#include "caffe/layers/sigmoid_layer.hpp"
-#include "caffe/layers/softmax_layer.hpp"
-#include "caffe/layers/tanh_layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-#ifdef USE_CUDNN
-#include "caffe/layers/cudnn_conv_layer.hpp"
-#include "caffe/layers/cudnn_lcn_layer.hpp"
-#include "caffe/layers/cudnn_lrn_layer.hpp"
-#include "caffe/layers/cudnn_pooling_layer.hpp"
-#include "caffe/layers/cudnn_relu_layer.hpp"
-#include "caffe/layers/cudnn_sigmoid_layer.hpp"
-#include "caffe/layers/cudnn_softmax_layer.hpp"
-#include "caffe/layers/cudnn_tanh_layer.hpp"
-#endif
-
-#ifdef WITH_PYTHON_LAYER
-#include "caffe/layers/python_layer.hpp"
-#endif
-
-namespace caffe {
-
-// Get convolution layer according to engine.
-template <typename Dtype>
-shared_ptr<Layer<Dtype> > GetConvolutionLayer(
-    const LayerParameter& param) {
-  ConvolutionParameter conv_param = param.convolution_param();
-  ConvolutionParameter_Engine engine = conv_param.engine();
-#ifdef USE_CUDNN
-  bool use_dilation = false;
-  for (int i = 0; i < conv_param.dilation_size(); ++i) {
-    if (conv_param.dilation(i) > 1) {
-      use_dilation = true;
-    }
-  }
-#endif
-  if (engine == ConvolutionParameter_Engine_DEFAULT) {
-    engine = ConvolutionParameter_Engine_CAFFE;
-#ifdef USE_CUDNN
-    if (!use_dilation) {
-      engine = ConvolutionParameter_Engine_CUDNN;
-    }
-#endif
-  }
-  if (engine == ConvolutionParameter_Engine_CAFFE) {
-    return shared_ptr<Layer<Dtype> >(new ConvolutionLayer<Dtype>(param));
-#ifdef USE_CUDNN
-  } else if (engine == ConvolutionParameter_Engine_CUDNN) {
-    if (use_dilation) {
-      LOG(FATAL) << "CuDNN doesn't support the dilated convolution at Layer "
-                 << param.name();
-    }
-    return shared_ptr<Layer<Dtype> >(new CuDNNConvolutionLayer<Dtype>(param));
-#endif
-  } else {
-    LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
-    throw;  // Avoids missing return warning
-  }
-}
-
-REGISTER_LAYER_CREATOR(Convolution, GetConvolutionLayer);
-
-// Get pooling layer according to engine.
-template <typename Dtype>
-shared_ptr<Layer<Dtype> > GetPoolingLayer(const LayerParameter& param) {
-  PoolingParameter_Engine engine = param.pooling_param().engine();
-  if (engine == PoolingParameter_Engine_DEFAULT) {
-    engine = PoolingParameter_Engine_CAFFE;
-#ifdef USE_CUDNN
-    engine = PoolingParameter_Engine_CUDNN;
-#endif
-  }
-  if (engine == PoolingParameter_Engine_CAFFE) {
-    return shared_ptr<Layer<Dtype> >(new PoolingLayer<Dtype>(param));
-#ifdef USE_CUDNN
-  } else if (engine == PoolingParameter_Engine_CUDNN) {
-    if (param.top_size() > 1) {
-      LOG(INFO) << "cuDNN does not support multiple tops. "
-                << "Using Caffe's own pooling layer.";
-      return shared_ptr<Layer<Dtype> >(new PoolingLayer<Dtype>(param));
-    }
-    // CuDNN assumes layers are not being modified in place, thus
-    // breaking our index tracking for updates in some cases in Caffe.
-    // Until there is a workaround in Caffe (index management) or
-    // cuDNN, use Caffe layer to max pooling, or don't use in place
-    // layers after max pooling layers
-    if (param.pooling_param().pool() == PoolingParameter_PoolMethod_MAX) {
-        return shared_ptr<Layer<Dtype> >(new PoolingLayer<Dtype>(param));
-    } else {
-        return shared_ptr<Layer<Dtype> >(new CuDNNPoolingLayer<Dtype>(param));
-    }
-#endif
-  } else {
-    LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
-    throw;  // Avoids missing return warning
-  }
-}
-
-REGISTER_LAYER_CREATOR(Pooling, GetPoolingLayer);
-
-// Get LRN layer according to engine
-template <typename Dtype>
-shared_ptr<Layer<Dtype> > GetLRNLayer(const LayerParameter& param) {
-  LRNParameter_Engine engine = param.lrn_param().engine();
-
-  if (engine == LRNParameter_Engine_DEFAULT) {
-#ifdef USE_CUDNN
-    engine = LRNParameter_Engine_CUDNN;
-#else
-    engine = LRNParameter_Engine_CAFFE;
-#endif
-  }
-
-  if (engine == LRNParameter_Engine_CAFFE) {
-    return shared_ptr<Layer<Dtype> >(new LRNLayer<Dtype>(param));
-#ifdef USE_CUDNN
-  } else if (engine == LRNParameter_Engine_CUDNN) {
-    LRNParameter lrn_param = param.lrn_param();
-
-    if (lrn_param.norm_region() ==LRNParameter_NormRegion_WITHIN_CHANNEL) {
-      return shared_ptr<Layer<Dtype> >(new CuDNNLCNLayer<Dtype>(param));
-    } else {
-      // local size is too big to be handled through cuDNN
-      if (param.lrn_param().local_size() > CUDNN_LRN_MAX_N) {
-        return shared_ptr<Layer<Dtype> >(new LRNLayer<Dtype>(param));
-      } else {
-        return shared_ptr<Layer<Dtype> >(new CuDNNLRNLayer<Dtype>(param));
-      }
-    }
-#endif
-  } else {
-    LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
-    throw;  // Avoids missing return warning
-  }
-}
-
-REGISTER_LAYER_CREATOR(LRN, GetLRNLayer);
-
-// Get relu layer according to engine.
-template <typename Dtype>
-shared_ptr<Layer<Dtype> > GetReLULayer(const LayerParameter& param) {
-  ReLUParameter_Engine engine = param.relu_param().engine();
-  if (engine == ReLUParameter_Engine_DEFAULT) {
-    engine = ReLUParameter_Engine_CAFFE;
-#ifdef USE_CUDNN
-    engine = ReLUParameter_Engine_CUDNN;
-#endif
-  }
-  if (engine == ReLUParameter_Engine_CAFFE) {
-    return shared_ptr<Layer<Dtype> >(new ReLULayer<Dtype>(param));
-#ifdef USE_CUDNN
-  } else if (engine == ReLUParameter_Engine_CUDNN) {
-    return shared_ptr<Layer<Dtype> >(new CuDNNReLULayer<Dtype>(param));
-#endif
-  } else {
-    LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
-    throw;  // Avoids missing return warning
-  }
-}
-
-REGISTER_LAYER_CREATOR(ReLU, GetReLULayer);
-
-// Get sigmoid layer according to engine.
-template <typename Dtype>
-shared_ptr<Layer<Dtype> > GetSigmoidLayer(const LayerParameter& param) {
-  SigmoidParameter_Engine engine = param.sigmoid_param().engine();
-  if (engine == SigmoidParameter_Engine_DEFAULT) {
-    engine = SigmoidParameter_Engine_CAFFE;
-#ifdef USE_CUDNN
-    engine = SigmoidParameter_Engine_CUDNN;
-#endif
-  }
-  if (engine == SigmoidParameter_Engine_CAFFE) {
-    return shared_ptr<Layer<Dtype> >(new SigmoidLayer<Dtype>(param));
-#ifdef USE_CUDNN
-  } else if (engine == SigmoidParameter_Engine_CUDNN) {
-    return shared_ptr<Layer<Dtype> >(new CuDNNSigmoidLayer<Dtype>(param));
-#endif
-  } else {
-    LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
-    throw;  // Avoids missing return warning
-  }
-}
-
-REGISTER_LAYER_CREATOR(Sigmoid, GetSigmoidLayer);
-
-// Get softmax layer according to engine.
-template <typename Dtype>
-shared_ptr<Layer<Dtype> > GetSoftmaxLayer(const LayerParameter& param) {
-  SoftmaxParameter_Engine engine = param.softmax_param().engine();
-  if (engine == SoftmaxParameter_Engine_DEFAULT) {
-    engine = SoftmaxParameter_Engine_CAFFE;
-#ifdef USE_CUDNN
-    engine = SoftmaxParameter_Engine_CUDNN;
-#endif
-  }
-  if (engine == SoftmaxParameter_Engine_CAFFE) {
-    return shared_ptr<Layer<Dtype> >(new SoftmaxLayer<Dtype>(param));
-#ifdef USE_CUDNN
-  } else if (engine == SoftmaxParameter_Engine_CUDNN) {
-    return shared_ptr<Layer<Dtype> >(new CuDNNSoftmaxLayer<Dtype>(param));
-#endif
-  } else {
-    LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
-    throw;  // Avoids missing return warning
-  }
-}
-
-REGISTER_LAYER_CREATOR(Softmax, GetSoftmaxLayer);
-
-// Get tanh layer according to engine.
-template <typename Dtype>
-shared_ptr<Layer<Dtype> > GetTanHLayer(const LayerParameter& param) {
-  TanHParameter_Engine engine = param.tanh_param().engine();
-  if (engine == TanHParameter_Engine_DEFAULT) {
-    engine = TanHParameter_Engine_CAFFE;
-#ifdef USE_CUDNN
-    engine = TanHParameter_Engine_CUDNN;
-#endif
-  }
-  if (engine == TanHParameter_Engine_CAFFE) {
-    return shared_ptr<Layer<Dtype> >(new TanHLayer<Dtype>(param));
-#ifdef USE_CUDNN
-  } else if (engine == TanHParameter_Engine_CUDNN) {
-    return shared_ptr<Layer<Dtype> >(new CuDNNTanHLayer<Dtype>(param));
-#endif
-  } else {
-    LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
-    throw;  // Avoids missing return warning
-  }
-}
-
-REGISTER_LAYER_CREATOR(TanH, GetTanHLayer);
-
-#ifdef WITH_PYTHON_LAYER
-template <typename Dtype>
-shared_ptr<Layer<Dtype> > GetPythonLayer(const LayerParameter& param) {
-  Py_Initialize();
-  try {
-    bp::object module = bp::import(param.python_param().module().c_str());
-    bp::object layer = module.attr(param.python_param().layer().c_str())(param);
-    return bp::extract<shared_ptr<PythonLayer<Dtype> > >(layer)();
-  } catch (bp::error_already_set) {
-    PyErr_Print();
-    throw;
-  }
-}
-
-REGISTER_LAYER_CREATOR(Python, GetPythonLayer);
-#endif
-
-// Layers that use their constructor as their default creator should be
-// registered in their corresponding cpp files. Do not register them here.
-}  // namespace caffe
diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp
deleted file mode 100644
index 4a4c68e..0000000
--- a/src/caffe/layers/base_conv_layer.cpp
+++ /dev/null
@@ -1,396 +0,0 @@
-#include <algorithm>
-#include <vector>
-
-#include "caffe/filler.hpp"
-#include "caffe/layers/base_conv_layer.hpp"
-#include "caffe/util/im2col.hpp"
-#include "caffe/util/math_functions.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  // Configure the kernel size, padding, stride, and inputs.
-  ConvolutionParameter conv_param = this->layer_param_.convolution_param();
-  force_nd_im2col_ = conv_param.force_nd_im2col();
-  channel_axis_ = bottom[0]->CanonicalAxisIndex(conv_param.axis());
-  const int first_spatial_axis = channel_axis_ + 1;
-  const int num_axes = bottom[0]->num_axes();
-  num_spatial_axes_ = num_axes - first_spatial_axis;
-  CHECK_GE(num_spatial_axes_, 0);
-  vector<int> bottom_dim_blob_shape(1, num_spatial_axes_ + 1);
-  vector<int> spatial_dim_blob_shape(1, std::max(num_spatial_axes_, 1));
-  // Setup filter kernel dimensions (kernel_shape_).
-  kernel_shape_.Reshape(spatial_dim_blob_shape);
-  int* kernel_shape_data = kernel_shape_.mutable_cpu_data();
-  if (conv_param.has_kernel_h() || conv_param.has_kernel_w()) {
-    CHECK_EQ(num_spatial_axes_, 2)
-        << "kernel_h & kernel_w can only be used for 2D convolution.";
-    CHECK_EQ(0, conv_param.kernel_size_size())
-        << "Either kernel_size or kernel_h/w should be specified; not both.";
-    kernel_shape_data[0] = conv_param.kernel_h();
-    kernel_shape_data[1] = conv_param.kernel_w();
-  } else {
-    const int num_kernel_dims = conv_param.kernel_size_size();
-    CHECK(num_kernel_dims == 1 || num_kernel_dims == num_spatial_axes_)
-        << "kernel_size must be specified once, or once per spatial dimension "
-        << "(kernel_size specified " << num_kernel_dims << " times; "
-        << num_spatial_axes_ << " spatial dims).";
-      for (int i = 0; i < num_spatial_axes_; ++i) {
-        kernel_shape_data[i] =
-            conv_param.kernel_size((num_kernel_dims == 1) ? 0 : i);
-      }
-  }
-  for (int i = 0; i < num_spatial_axes_; ++i) {
-    CHECK_GT(kernel_shape_data[i], 0) << "Filter dimensions must be nonzero.";
-  }
-  // Setup stride dimensions (stride_).
-  stride_.Reshape(spatial_dim_blob_shape);
-  int* stride_data = stride_.mutable_cpu_data();
-  if (conv_param.has_stride_h() || conv_param.has_stride_w()) {
-    CHECK_EQ(num_spatial_axes_, 2)
-        << "stride_h & stride_w can only be used for 2D convolution.";
-    CHECK_EQ(0, conv_param.stride_size())
-        << "Either stride or stride_h/w should be specified; not both.";
-    stride_data[0] = conv_param.stride_h();
-    stride_data[1] = conv_param.stride_w();
-  } else {
-    const int num_stride_dims = conv_param.stride_size();
-    CHECK(num_stride_dims == 0 || num_stride_dims == 1 ||
-          num_stride_dims == num_spatial_axes_)
-        << "stride must be specified once, or once per spatial dimension "
-        << "(stride specified " << num_stride_dims << " times; "
-        << num_spatial_axes_ << " spatial dims).";
-    const int kDefaultStride = 1;
-    for (int i = 0; i < num_spatial_axes_; ++i) {
-      stride_data[i] = (num_stride_dims == 0) ? kDefaultStride :
-          conv_param.stride((num_stride_dims == 1) ? 0 : i);
-      CHECK_GT(stride_data[i], 0) << "Stride dimensions must be nonzero.";
-    }
-  }
-  // Setup pad dimensions (pad_).
-  pad_.Reshape(spatial_dim_blob_shape);
-  int* pad_data = pad_.mutable_cpu_data();
-  if (conv_param.has_pad_h() || conv_param.has_pad_w()) {
-    CHECK_EQ(num_spatial_axes_, 2)
-        << "pad_h & pad_w can only be used for 2D convolution.";
-    CHECK_EQ(0, conv_param.pad_size())
-        << "Either pad or pad_h/w should be specified; not both.";
-    pad_data[0] = conv_param.pad_h();
-    pad_data[1] = conv_param.pad_w();
-  } else {
-    const int num_pad_dims = conv_param.pad_size();
-    CHECK(num_pad_dims == 0 || num_pad_dims == 1 ||
-          num_pad_dims == num_spatial_axes_)
-        << "pad must be specified once, or once per spatial dimension "
-        << "(pad specified " << num_pad_dims << " times; "
-        << num_spatial_axes_ << " spatial dims).";
-    const int kDefaultPad = 0;
-    for (int i = 0; i < num_spatial_axes_; ++i) {
-      pad_data[i] = (num_pad_dims == 0) ? kDefaultPad :
-          conv_param.pad((num_pad_dims == 1) ? 0 : i);
-    }
-  }
-  // Setup dilation dimensions (dilation_).
-  dilation_.Reshape(spatial_dim_blob_shape);
-  int* dilation_data = dilation_.mutable_cpu_data();
-  const int num_dilation_dims = conv_param.dilation_size();
-  CHECK(num_dilation_dims == 0 || num_dilation_dims == 1 ||
-        num_dilation_dims == num_spatial_axes_)
-      << "dilation must be specified once, or once per spatial dimension "
-      << "(dilation specified " << num_dilation_dims << " times; "
-      << num_spatial_axes_ << " spatial dims).";
-  const int kDefaultDilation = 1;
-  for (int i = 0; i < num_spatial_axes_; ++i) {
-    dilation_data[i] = (num_dilation_dims == 0) ? kDefaultDilation :
-                       conv_param.dilation((num_dilation_dims == 1) ? 0 : i);
-  }
-  // Special case: im2col is the identity for 1x1 convolution with stride 1
-  // and no padding, so flag for skipping the buffer and transformation.
-  is_1x1_ = true;
-  for (int i = 0; i < num_spatial_axes_; ++i) {
-    is_1x1_ &=
-        kernel_shape_data[i] == 1 && stride_data[i] == 1 && pad_data[i] == 0;
-    if (!is_1x1_) { break; }
-  }
-  // Configure output channels and groups.
-  channels_ = bottom[0]->shape(channel_axis_);
-  num_output_ = this->layer_param_.convolution_param().num_output();
-  CHECK_GT(num_output_, 0);
-  group_ = this->layer_param_.convolution_param().group();
-  CHECK_EQ(channels_ % group_, 0);
-  CHECK_EQ(num_output_ % group_, 0)
-      << "Number of output should be multiples of group.";
-  if (reverse_dimensions()) {
-    conv_out_channels_ = channels_;
-    conv_in_channels_ = num_output_;
-  } else {
-    conv_out_channels_ = num_output_;
-    conv_in_channels_ = channels_;
-  }
-  // Handle the parameters: weights and biases.
-  // - blobs_[0] holds the filter weights
-  // - blobs_[1] holds the biases (optional)
-  vector<int> weight_shape(2);
-  weight_shape[0] = conv_out_channels_;
-  weight_shape[1] = conv_in_channels_ / group_;
-  for (int i = 0; i < num_spatial_axes_; ++i) {
-    weight_shape.push_back(kernel_shape_data[i]);
-  }
-  bias_term_ = this->layer_param_.convolution_param().bias_term();
-  vector<int> bias_shape(bias_term_, num_output_);
-  if (this->blobs_.size() > 0) {
-    CHECK_EQ(1 + bias_term_, this->blobs_.size())
-        << "Incorrect number of weight blobs.";
-    if (weight_shape != this->blobs_[0]->shape()) {
-      Blob<Dtype> weight_shaped_blob(weight_shape);
-      LOG(FATAL) << "Incorrect weight shape: expected shape "
-          << weight_shaped_blob.shape_string() << "; instead, shape was "
-          << this->blobs_[0]->shape_string();
-    }
-    if (bias_term_ && bias_shape != this->blobs_[1]->shape()) {
-      Blob<Dtype> bias_shaped_blob(bias_shape);
-      LOG(FATAL) << "Incorrect bias shape: expected shape "
-          << bias_shaped_blob.shape_string() << "; instead, shape was "
-          << this->blobs_[1]->shape_string();
-    }
-    LOG(INFO) << "Skipping parameter initialization";
-  } else {
-    if (bias_term_) {
-      this->blobs_.resize(2);
-    } else {
-      this->blobs_.resize(1);
-    }
-    // Initialize and fill the weights:
-    // output channels x input channels per-group x kernel height x kernel width
-    this->blobs_[0].reset(new Blob<Dtype>(weight_shape));
-    shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
-        this->layer_param_.convolution_param().weight_filler()));
-    weight_filler->Fill(this->blobs_[0].get());
-    // If necessary, initialize and fill the biases.
-    if (bias_term_) {
-      this->blobs_[1].reset(new Blob<Dtype>(bias_shape));
-      shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
-          this->layer_param_.convolution_param().bias_filler()));
-      bias_filler->Fill(this->blobs_[1].get());
-    }
-  }
-  kernel_dim_ = this->blobs_[0]->count(1);
-  weight_offset_ = conv_out_channels_ * kernel_dim_ / group_;
-  // Propagate gradients to the parameters (as directed by backward pass).
-  this->param_propagate_down_.resize(this->blobs_.size(), true);
-}
-
-template <typename Dtype>
-void BaseConvolutionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  const int first_spatial_axis = channel_axis_ + 1;
-  CHECK_EQ(bottom[0]->num_axes(), first_spatial_axis + num_spatial_axes_)
-      << "bottom num_axes may not change.";
-  num_ = bottom[0]->count(0, channel_axis_);
-  CHECK_EQ(bottom[0]->shape(channel_axis_), channels_)
-      << "Input size incompatible with convolution kernel.";
-  // TODO: generalize to handle inputs of different shapes.
-  for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) {
-    CHECK(bottom[0]->shape() == bottom[bottom_id]->shape())
-        << "All inputs must have the same shape.";
-  }
-  // Shape the tops.
-  bottom_shape_ = &bottom[0]->shape();
-  compute_output_shape();
-  vector<int> top_shape(bottom[0]->shape().begin(),
-      bottom[0]->shape().begin() + channel_axis_);
-  top_shape.push_back(num_output_);
-  for (int i = 0; i < num_spatial_axes_; ++i) {
-    top_shape.push_back(output_shape_[i]);
-  }
-  for (int top_id = 0; top_id < top.size(); ++top_id) {
-    top[top_id]->Reshape(top_shape);
-  }
-  if (reverse_dimensions()) {
-    conv_out_spatial_dim_ = bottom[0]->count(first_spatial_axis);
-  } else {
-    conv_out_spatial_dim_ = top[0]->count(first_spatial_axis);
-  }
-  col_offset_ = kernel_dim_ * conv_out_spatial_dim_;
-  output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_;
-  // Setup input dimensions (conv_input_shape_).
-  vector<int> bottom_dim_blob_shape(1, num_spatial_axes_ + 1);
-  conv_input_shape_.Reshape(bottom_dim_blob_shape);
-  int* conv_input_shape_data = conv_input_shape_.mutable_cpu_data();
-  for (int i = 0; i < num_spatial_axes_ + 1; ++i) {
-    if (reverse_dimensions()) {
-      conv_input_shape_data[i] = top[0]->shape(channel_axis_ + i);
-    } else {
-      conv_input_shape_data[i] = bottom[0]->shape(channel_axis_ + i);
-    }
-  }
-  // The im2col result buffer will only hold one image at a time to avoid
-  // overly large memory usage. In the special case of 1x1 convolution
-  // it goes lazily unused to save memory.
-  col_buffer_shape_.clear();
-  col_buffer_shape_.push_back(kernel_dim_ * group_);
-  for (int i = 0; i < num_spatial_axes_; ++i) {
-    if (reverse_dimensions()) {
-      col_buffer_shape_.push_back(input_shape(i + 1));
-    } else {
-      col_buffer_shape_.push_back(output_shape_[i]);
-    }
-  }
-  col_buffer_.Reshape(col_buffer_shape_);
-  bottom_dim_ = bottom[0]->count(channel_axis_);
-  top_dim_ = top[0]->count(channel_axis_);
-  num_kernels_im2col_ = conv_in_channels_ * conv_out_spatial_dim_;
-  num_kernels_col2im_ = reverse_dimensions() ? top_dim_ : bottom_dim_;
-  // Set up the all ones "bias multiplier" for adding biases by BLAS
-  out_spatial_dim_ = top[0]->count(first_spatial_axis);
-  if (bias_term_) {
-    vector<int> bias_multiplier_shape(1, out_spatial_dim_);
-    bias_multiplier_.Reshape(bias_multiplier_shape);
-    caffe_set(bias_multiplier_.count(), Dtype(1),
-        bias_multiplier_.mutable_cpu_data());
-  }
-}
-
-template <typename Dtype>
-void BaseConvolutionLayer<Dtype>::forward_cpu_gemm(const Dtype* input,
-    const Dtype* weights, Dtype* output, bool skip_im2col) {
-  const Dtype* col_buff = input;
-  if (!is_1x1_) {
-    if (!skip_im2col) {
-      conv_im2col_cpu(input, col_buffer_.mutable_cpu_data());
-    }
-    col_buff = col_buffer_.cpu_data();
-  }
-  for (int g = 0; g < group_; ++g) {
-    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, conv_out_channels_ /
-        group_, conv_out_spatial_dim_, kernel_dim_,
-        (Dtype)1., weights + weight_offset_ * g, col_buff + col_offset_ * g,
-        (Dtype)0., output + output_offset_ * g);
-  }
-}
-
-template <typename Dtype>
-void BaseConvolutionLayer<Dtype>::forward_cpu_bias(Dtype* output,
-    const Dtype* bias) {
-  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
-      out_spatial_dim_, 1, (Dtype)1., bias, bias_multiplier_.cpu_data(),
-      (Dtype)1., output);
-}
-
-template <typename Dtype>
-void BaseConvolutionLayer<Dtype>::backward_cpu_gemm(const Dtype* output,
-    const Dtype* weights, Dtype* input) {
-  Dtype* col_buff = col_buffer_.mutable_cpu_data();
-  if (is_1x1_) {
-    col_buff = input;
-  }
-  for (int g = 0; g < group_; ++g) {
-    caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, kernel_dim_,
-        conv_out_spatial_dim_, conv_out_channels_ / group_,
-        (Dtype)1., weights + weight_offset_ * g, output + output_offset_ * g,
-        (Dtype)0., col_buff + col_offset_ * g);
-  }
-  if (!is_1x1_) {
-    conv_col2im_cpu(col_buff, input);
-  }
-}
-
-template <typename Dtype>
-void BaseConvolutionLayer<Dtype>::weight_cpu_gemm(const Dtype* input,
-    const Dtype* output, Dtype* weights) {
-  const Dtype* col_buff = input;
-  if (!is_1x1_) {
-    conv_im2col_cpu(input, col_buffer_.mutable_cpu_data());
-    col_buff = col_buffer_.cpu_data();
-  }
-  for (int g = 0; g < group_; ++g) {
-    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, conv_out_channels_ / group_,
-        kernel_dim_, conv_out_spatial_dim_,
-        (Dtype)1., output + output_offset_ * g, col_buff + col_offset_ * g,
-        (Dtype)1., weights + weight_offset_ * g);
-  }
-}
-
-template <typename Dtype>
-void BaseConvolutionLayer<Dtype>::backward_cpu_bias(Dtype* bias,
-    const Dtype* input) {
-  caffe_cpu_gemv<Dtype>(CblasNoTrans, num_output_, out_spatial_dim_, 1.,
-      input, bias_multiplier_.cpu_data(), 1., bias);
-}
-
-#ifndef CPU_ONLY
-
-template <typename Dtype>
-void BaseConvolutionLayer<Dtype>::forward_gpu_gemm(const Dtype* input,
-    const Dtype* weights, Dtype* output, bool skip_im2col) {
-  const Dtype* col_buff = input;
-  if (!is_1x1_) {
-    if (!skip_im2col) {
-      conv_im2col_gpu(input, col_buffer_.mutable_gpu_data());
-    }
-    col_buff = col_buffer_.gpu_data();
-  }
-  for (int g = 0; g < group_; ++g) {
-    caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, conv_out_channels_ /
-        group_, conv_out_spatial_dim_, kernel_dim_,
-        (Dtype)1., weights + weight_offset_ * g, col_buff + col_offset_ * g,
-        (Dtype)0., output + output_offset_ * g);
-  }
-}
-
-template <typename Dtype>
-void BaseConvolutionLayer<Dtype>::forward_gpu_bias(Dtype* output,
-    const Dtype* bias) {
-  caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, num_output_,
-      out_spatial_dim_, 1, (Dtype)1., bias, bias_multiplier_.gpu_data(),
-      (Dtype)1., output);
-}
-
-template <typename Dtype>
-void BaseConvolutionLayer<Dtype>::backward_gpu_gemm(const Dtype* output,
-    const Dtype* weights, Dtype* input) {
-  Dtype* col_buff = col_buffer_.mutable_gpu_data();
-  if (is_1x1_) {
-    col_buff = input;
-  }
-  for (int g = 0; g < group_; ++g) {
-    caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, kernel_dim_,
-        conv_out_spatial_dim_, conv_out_channels_ / group_,
-        (Dtype)1., weights + weight_offset_ * g, output + output_offset_ * g,
-        (Dtype)0., col_buff + col_offset_ * g);
-  }
-  if (!is_1x1_) {
-    conv_col2im_gpu(col_buff, input);
-  }
-}
-
-template <typename Dtype>
-void BaseConvolutionLayer<Dtype>::weight_gpu_gemm(const Dtype* input,
-    const Dtype* output, Dtype* weights) {
-  const Dtype* col_buff = input;
-  if (!is_1x1_) {
-    conv_im2col_gpu(input, col_buffer_.mutable_gpu_data());
-    col_buff = col_buffer_.gpu_data();
-  }
-  for (int g = 0; g < group_; ++g) {
-    caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, conv_out_channels_ / group_,
-        kernel_dim_, conv_out_spatial_dim_,
-        (Dtype)1., output + output_offset_ * g, col_buff + col_offset_ * g,
-        (Dtype)1., weights + weight_offset_ * g);
-  }
-}
-
-template <typename Dtype>
-void BaseConvolutionLayer<Dtype>::backward_gpu_bias(Dtype* bias,
-    const Dtype* input) {
-  caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, out_spatial_dim_, 1.,
-      input, bias_multiplier_.gpu_data(), 1., bias);
-}
-
-#endif  // !CPU_ONLY
-
-INSTANTIATE_CLASS(BaseConvolutionLayer);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/base_data_layer.cpp b/src/caffe/layers/base_data_layer.cpp
deleted file mode 100644
index 04f9552..0000000
--- a/src/caffe/layers/base_data_layer.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-#ifdef USE_BOOST
-#include <boost/thread.hpp>
-#endif
-#include <vector>
-
-#include "caffe/blob.hpp"
-#include "caffe/data_transformer.hpp"
-#ifdef USE_BOOST
-#include "caffe/internal_thread.hpp"
-#endif
-#include "caffe/layer.hpp"
-#include "caffe/layers/base_data_layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-#include "caffe/util/blocking_queue.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-BaseDataLayer<Dtype>::BaseDataLayer(const LayerParameter& param)
-    : Layer<Dtype>(param),
-      transform_param_(param.transform_param()) {
-}
-
-template <typename Dtype>
-void BaseDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  if (top.size() == 1) {
-    output_labels_ = false;
-  } else {
-    output_labels_ = true;
-  }
-  data_transformer_.reset(
-      new DataTransformer<Dtype>(transform_param_, this->phase_));
-  data_transformer_->InitRand();
-  // The subclasses should setup the size of bottom and top
-  DataLayerSetUp(bottom, top);
-}
-
-#ifdef NO_CAFFE_MOBILE
-template <typename Dtype>
-BasePrefetchingDataLayer<Dtype>::BasePrefetchingDataLayer(
-    const LayerParameter& param)
-    : BaseDataLayer<Dtype>(param),
-      prefetch_(param.data_param().prefetch()),
-      prefetch_free_(), prefetch_full_(), prefetch_current_() {
-  for (int i = 0; i < prefetch_.size(); ++i) {
-    prefetch_[i].reset(new Batch<Dtype>());
-    prefetch_free_.push(prefetch_[i].get());
-  }
-}
-
-template <typename Dtype>
-void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
-    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-  BaseDataLayer<Dtype>::LayerSetUp(bottom, top);
-
-  // Before starting the prefetch thread, we make cpu_data and gpu_data
-  // calls so that the prefetch thread does not accidentally make simultaneous
-  // cudaMalloc calls when the main thread is running. In some GPUs this
-  // seems to cause failures if we do not so.
-  for (int i = 0; i < prefetch_.size(); ++i) {
-    prefetch_[i]->data_.mutable_cpu_data();
-    if (this->output_labels_) {
-      prefetch_[i]->label_.mutable_cpu_data();
-    }
-  }
-#ifndef CPU_ONLY
-  if (Caffe::mode() == Caffe::GPU) {
-    for (int i = 0; i < prefetch_.size(); ++i) {
-      prefetch_[i]->data_.mutable_gpu_data();
-      if (this->output_labels_) {
-        prefetch_[i]->label_.mutable_gpu_data();
-      }
-    }
-  }
-#endif
-  DLOG(INFO) << "Initializing prefetch";
-  this->data_transformer_->InitRand();
-  StartInternalThread();
-  DLOG(INFO) << "Prefetch initialized.";
-}
-
-template <typename Dtype>
-void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
-#ifndef CPU_ONLY
-  cudaStream_t stream;
-  if (Caffe::mode() == Caffe::GPU) {
-    CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
-  }
-#endif
-
-  try {
-    while (!must_stop()) {
-      Batch<Dtype>* batch = prefetch_free_.pop();
-      load_batch(batch);
-#ifndef CPU_ONLY
-      if (Caffe::mode() == Caffe::GPU) {
-        batch->data_.data().get()->async_gpu_push(stream);
-        if (this->output_labels_) {
-          batch->label_.data().get()->async_gpu_push(stream);
-        }
-        CUDA_CHECK(cudaStreamSynchronize(stream));
-      }
-#endif
-      prefetch_full_.push(batch);
-    }
-  } catch (boost::thread_interrupted&) {
-    // Interrupted exception is expected on shutdown
-  }
-#ifndef CPU_ONLY
-  if (Caffe::mode() == Caffe::GPU) {
-    CUDA_CHECK(cudaStreamDestroy(stream));
-  }
-#endif
-}
-
-template <typename Dtype>
-void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
-    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-  if (prefetch_current_) {
-    prefetch_free_.push(prefetch_current_);
-  }
-  prefetch_current_ = prefetch_full_.pop("Waiting for data");
-  // Reshape to loaded data.
-  top[0]->ReshapeLike(prefetch_current_->data_);
-  top[0]->set_cpu_data(prefetch_current_->data_.mutable_cpu_data());
-  if (this->output_labels_) {
-    // Reshape to loaded labels.
-    top[1]->ReshapeLike(prefetch_current_->label_);
-    top[1]->set_cpu_data(prefetch_current_->label_.mutable_cpu_data());
-  }
-}
-
-#ifdef CPU_ONLY
-STUB_GPU_FORWARD(BasePrefetchingDataLayer, Forward);
-#endif
-#endif // NO_CAFFE_MOBILE
-
-INSTANTIATE_CLASS(BaseDataLayer);
-#ifdef NO_CAFFE_MOBILE
-INSTANTIATE_CLASS(BasePrefetchingDataLayer);
-#endif // NO_CAFFE_MOBILE
-
-}  // namespace caffe
diff --git a/src/caffe/layers/bnll_layer.cpp b/src/caffe/layers/bnll_layer.cpp
deleted file mode 100644
index 448d86d..0000000
--- a/src/caffe/layers/bnll_layer.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-#include <algorithm>
-#include <vector>
-
-#include "caffe/layers/bnll_layer.hpp"
-
-namespace caffe {
-
-const float kBNLL_THRESHOLD = 50.;
-
-template <typename Dtype>
-void BNLLLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-    const vector<Blob<Dtype>*>& top) {
-  const Dtype* bottom_data = bottom[0]->cpu_data();
-  Dtype* top_data = top[0]->mutable_cpu_data();
-  const int count = bottom[0]->count();
-  for (int i = 0; i < count; ++i) {
-    top_data[i] = bottom_data[i] > 0 ?
-        bottom_data[i] + log(1. + exp(-bottom_data[i])) :
-        log(1. + exp(bottom_data[i]));
-  }
-}
-
-template <typename Dtype>
-void BNLLLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down,
-    const vector<Blob<Dtype>*>& bottom) {
-  if (propagate_down[0]) {
-    const Dtype* bottom_data = bottom[0]->cpu_data();
-    const Dtype* top_diff = top[0]->cpu_diff();
-    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
-    const int count = bottom[0]->count();
-    Dtype expval;
-    for (int i = 0; i < count; ++i) {
-      expval = exp(std::min(bottom_data[i], Dtype(kBNLL_THRESHOLD)));
-      bottom_diff[i] = top_diff[i] * expval / (expval + 1.);
-    }
-  }
-}
-
-#ifdef CPU_ONLY
-STUB_GPU(BNLLLayer);
-#endif
-
-INSTANTIATE_CLASS(BNLLLayer);
-REGISTER_LAYER_CLASS(BNLL);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/concat_layer.cpp b/src/caffe/layers/concat_layer.cpp
deleted file mode 100644
index 580bd47..0000000
--- a/src/caffe/layers/concat_layer.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-#include <vector>
-
-#include "caffe/layers/concat_layer.hpp"
-#include "caffe/util/math_functions.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void ConcatLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  const ConcatParameter& concat_param = this->layer_param_.concat_param();
-  CHECK(!(concat_param.has_axis() && concat_param.has_concat_dim()))
-      << "Either axis or concat_dim should be specified; not both.";
-}
-
-template <typename Dtype>
-void ConcatLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  const int num_axes = bottom[0]->num_axes();
-  const ConcatParameter& concat_param = this->layer_param_.concat_param();
-  if (concat_param.has_concat_dim()) {
-    concat_axis_ = static_cast<int>(concat_param.concat_dim());
-    // Don't allow negative indexing for concat_dim, a uint32 -- almost
-    // certainly unintended.
-    CHECK_GE(concat_axis_, 0) << "casting concat_dim from uint32 to int32 "
-        << "produced negative result; concat_dim must satisfy "
-        << "0 <= concat_dim < " << kMaxBlobAxes;
-    CHECK_LT(concat_axis_, num_axes) << "concat_dim out of range.";
-  } else {
-    concat_axis_ = bottom[0]->CanonicalAxisIndex(concat_param.axis());
-  }
-  // Initialize with the first blob.
-  vector<int> top_shape = bottom[0]->shape();
-  num_concats_ = bottom[0]->count(0, concat_axis_);
-  concat_input_size_ = bottom[0]->count(concat_axis_ + 1);
-  int bottom_count_sum = bottom[0]->count();
-  for (int i = 1; i < bottom.size(); ++i) {
-    CHECK_EQ(num_axes, bottom[i]->num_axes())
-        << "All inputs must have the same #axes.";
-    for (int j = 0; j < num_axes; ++j) {
-      if (j == concat_axis_) { continue; }
-      CHECK_EQ(top_shape[j], bottom[i]->shape(j))
-          << "All inputs must have the same shape, except at concat_axis.";
-    }
-    bottom_count_sum += bottom[i]->count();
-    top_shape[concat_axis_] += bottom[i]->shape(concat_axis_);
-  }
-  top[0]->Reshape(top_shape);
-  CHECK_EQ(bottom_count_sum, top[0]->count());
-  if (bottom.size() == 1) {
-    top[0]->ShareData(*bottom[0]);
-    top[0]->ShareDiff(*bottom[0]);
-  }
-}
-
-template <typename Dtype>
-void ConcatLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  if (bottom.size() == 1) { return; }
-  Dtype* top_data = top[0]->mutable_cpu_data();
-  int offset_concat_axis = 0;
-  const int top_concat_axis = top[0]->shape(concat_axis_);
-  for (int i = 0; i < bottom.size(); ++i) {
-    const Dtype* bottom_data = bottom[i]->cpu_data();
-    const int bottom_concat_axis = bottom[i]->shape(concat_axis_);
-    for (int n = 0; n < num_concats_; ++n) {
-      caffe_copy(bottom_concat_axis * concat_input_size_,
-          bottom_data + n * bottom_concat_axis * concat_input_size_,
-          top_data + (n * top_concat_axis + offset_concat_axis)
-              * concat_input_size_);
-    }
-    offset_concat_axis += bottom_concat_axis;
-  }
-}
-
-template <typename Dtype>
-void ConcatLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
-  if (bottom.size() == 1) { return; }
-  const Dtype* top_diff = top[0]->cpu_diff();
-  int offset_concat_axis = 0;
-  const int top_concat_axis = top[0]->shape(concat_axis_);
-  for (int i = 0; i < bottom.size(); ++i) {
-    const int bottom_concat_axis = bottom[i]->shape(concat_axis_);
-    if (propagate_down[i]) {
-      Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
-      for (int n = 0; n < num_concats_; ++n) {
-        caffe_copy(bottom_concat_axis * concat_input_size_, top_diff +
-            (n * top_concat_axis + offset_concat_axis) * concat_input_size_,
-            bottom_diff + n * bottom_concat_axis * concat_input_size_);
-      }
-    }
-    offset_concat_axis += bottom_concat_axis;
-  }
-}
-
-#ifdef CPU_ONLY
-STUB_GPU(ConcatLayer);
-#endif
-
-INSTANTIATE_CLASS(ConcatLayer);
-REGISTER_LAYER_CLASS(Concat);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp
deleted file mode 100644
index 5d522ab..0000000
--- a/src/caffe/layers/conv_layer.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-#include <vector>
-
-#include "caffe/layers/conv_layer.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void ConvolutionLayer<Dtype>::compute_output_shape() {
-  const int* kernel_shape_data = this->kernel_shape_.cpu_data();
-  const int* stride_data = this->stride_.cpu_data();
-  const int* pad_data = this->pad_.cpu_data();
-  const int* dilation_data = this->dilation_.cpu_data();
-  this->output_shape_.clear();
-  for (int i = 0; i < this->num_spatial_axes_; ++i) {
-    // i + 1 to skip channel axis
-    const int input_dim = this->input_shape(i + 1);
-    const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
-    const int output_dim = (input_dim + 2 * pad_data[i] - kernel_extent)
-        / stride_data[i] + 1;
-    this->output_shape_.push_back(output_dim);
-  }
-}
-
-template <typename Dtype>
-void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  const Dtype* weight = this->blobs_[0]->cpu_data();
-  for (int i = 0; i < bottom.size(); ++i) {
-    const Dtype* bottom_data = bottom[i]->cpu_data();
-    Dtype* top_data = top[i]->mutable_cpu_data();
-    for (int n = 0; n < this->num_; ++n) {
-      this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight,
-          top_data + n * this->top_dim_);
-      if (this->bias_term_) {
-        const Dtype* bias = this->blobs_[1]->cpu_data();
-        this->forward_cpu_bias(top_data + n * this->top_dim_, bias);
-      }
-    }
-  }
-}
-
-template <typename Dtype>
-void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
-  const Dtype* weight = this->blobs_[0]->cpu_data();
-  Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
-  for (int i = 0; i < top.size(); ++i) {
-    const Dtype* top_diff = top[i]->cpu_diff();
-    const Dtype* bottom_data = bottom[i]->cpu_data();
-    Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
-    // Bias gradient, if necessary.
-    if (this->bias_term_ && this->param_propagate_down_[1]) {
-      Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff();
-      for (int n = 0; n < this->num_; ++n) {
-        this->backward_cpu_bias(bias_diff, top_diff + n * this->top_dim_);
-      }
-    }
-    if (this->param_propagate_down_[0] || propagate_down[i]) {
-      for (int n = 0; n < this->num_; ++n) {
-        // gradient w.r.t. weight. Note that we will accumulate diffs.
-        if (this->param_propagate_down_[0]) {
-          this->weight_cpu_gemm(bottom_data + n * this->bottom_dim_,
-              top_diff + n * this->top_dim_, weight_diff);
-        }
-        // gradient w.r.t. bottom data, if necessary.
-        if (propagate_down[i]) {
-          this->backward_cpu_gemm(top_diff + n * this->top_dim_, weight,
-              bottom_diff + n * this->bottom_dim_);
-        }
-      }
-    }
-  }
-}
-
-#ifdef CPU_ONLY
-STUB_GPU(ConvolutionLayer);
-#endif
-
-INSTANTIATE_CLASS(ConvolutionLayer);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/dropout_layer.cpp b/src/caffe/layers/dropout_layer.cpp
deleted file mode 100644
index 533ab26..0000000
--- a/src/caffe/layers/dropout_layer.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-// TODO (sergeyk): effect should not be dependent on phase. wasted memcpy.
-
-#include <vector>
-
-#include "caffe/layers/dropout_layer.hpp"
-#include "caffe/util/math_functions.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void DropoutLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  NeuronLayer<Dtype>::LayerSetUp(bottom, top);
-  threshold_ = this->layer_param_.dropout_param().dropout_ratio();
-  DCHECK(threshold_ > 0.);
-  DCHECK(threshold_ < 1.);
-  scale_ = 1. / (1. - threshold_);
-  uint_thres_ = static_cast<unsigned int>(UINT_MAX * threshold_);
-}
-
-template <typename Dtype>
-void DropoutLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  NeuronLayer<Dtype>::Reshape(bottom, top);
-  // Set up the cache for random number generation
-  // ReshapeLike does not work because rand_vec_ is of Dtype uint
-  rand_vec_.Reshape(bottom[0]->shape());
-}
-
-template <typename Dtype>
-void DropoutLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-    const vector<Blob<Dtype>*>& top) {
-  const Dtype* bottom_data = bottom[0]->cpu_data();
-  Dtype* top_data = top[0]->mutable_cpu_data();
-  unsigned int* mask = rand_vec_.mutable_cpu_data();
-  const int count = bottom[0]->count();
-  if (this->phase_ == TRAIN) {
-    // Create random numbers
-    caffe_rng_bernoulli(count, 1. - threshold_, mask);
-    for (int i = 0; i < count; ++i) {
-      top_data[i] = bottom_data[i] * mask[i] * scale_;
-    }
-  } else {
-    caffe_copy(bottom[0]->count(), bottom_data, top_data);
-  }
-}
-
-template <typename Dtype>
-void DropoutLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down,
-    const vector<Blob<Dtype>*>& bottom) {
-  if (propagate_down[0]) {
-    const Dtype* top_diff = top[0]->cpu_diff();
-    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
-    if (this->phase_ == TRAIN) {
-      const unsigned int* mask = rand_vec_.cpu_data();
-      const int count = bottom[0]->count();
-      for (int i = 0; i < count; ++i) {
-        bottom_diff[i] = top_diff[i] * mask[i] * scale_;
-      }
-    } else {
-      caffe_copy(top[0]->count(), top_diff, bottom_diff);
-    }
-  }
-}
-
-
-#ifdef CPU_ONLY
-STUB_GPU(DropoutLayer);
-#endif
-
-INSTANTIATE_CLASS(DropoutLayer);
-REGISTER_LAYER_CLASS(Dropout);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/eltwise_layer.cpp b/src/caffe/layers/eltwise_layer.cpp
deleted file mode 100644
index 2125616..0000000
--- a/src/caffe/layers/eltwise_layer.cpp
+++ /dev/null
@@ -1,160 +0,0 @@
-#include <cfloat>
-#include <vector>
-
-#include "caffe/layers/eltwise_layer.hpp"
-#include "caffe/util/math_functions.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void EltwiseLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  CHECK(this->layer_param().eltwise_param().coeff_size() == 0
-      || this->layer_param().eltwise_param().coeff_size() == bottom.size()) <<
-      "Eltwise Layer takes one coefficient per bottom blob.";
-  CHECK(!(this->layer_param().eltwise_param().operation()
-      == EltwiseParameter_EltwiseOp_PROD
-      && this->layer_param().eltwise_param().coeff_size())) <<
-      "Eltwise layer only takes coefficients for summation.";
-  op_ = this->layer_param_.eltwise_param().operation();
-  // Blob-wise coefficients for the elementwise operation.
-  coeffs_ = vector<Dtype>(bottom.size(), 1);
-  if (this->layer_param().eltwise_param().coeff_size()) {
-    for (int i = 0; i < bottom.size(); ++i) {
-      coeffs_[i] = this->layer_param().eltwise_param().coeff(i);
-    }
-  }
-  stable_prod_grad_ = this->layer_param_.eltwise_param().stable_prod_grad();
-}
-
-template <typename Dtype>
-void EltwiseLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  for (int i = 1; i < bottom.size(); ++i) {
-    CHECK(bottom[i]->shape() == bottom[0]->shape());
-  }
-  top[0]->ReshapeLike(*bottom[0]);
-  // If max operation, we will initialize the vector index part.
-  if (this->layer_param_.eltwise_param().operation() ==
-      EltwiseParameter_EltwiseOp_MAX && top.size() == 1) {
-    max_idx_.Reshape(bottom[0]->shape());
-  }
-}
-
-template <typename Dtype>
-void EltwiseLayer<Dtype>::Forward_cpu(
-    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-  int* mask = NULL;
-  const Dtype* bottom_data_a = NULL;
-  const Dtype* bottom_data_b = NULL;
-  const int count = top[0]->count();
-  Dtype* top_data = top[0]->mutable_cpu_data();
-  switch (op_) {
-  case EltwiseParameter_EltwiseOp_PROD:
-    caffe_mul(count, bottom[0]->cpu_data(), bottom[1]->cpu_data(), top_data);
-    for (int i = 2; i < bottom.size(); ++i) {
-      caffe_mul(count, top_data, bottom[i]->cpu_data(), top_data);
-    }
-    break;
-  case EltwiseParameter_EltwiseOp_SUM:
-    caffe_set(count, Dtype(0), top_data);
-    // TODO(shelhamer) does BLAS optimize to sum for coeff = 1?
-    for (int i = 0; i < bottom.size(); ++i) {
-      caffe_axpy(count, coeffs_[i], bottom[i]->cpu_data(), top_data);
-    }
-    break;
-  case EltwiseParameter_EltwiseOp_MAX:
-    // Initialize
-    mask = max_idx_.mutable_cpu_data();
-    caffe_set(count, -1, mask);
-    caffe_set(count, Dtype(-FLT_MAX), top_data);
-    // bottom 0 & 1
-    bottom_data_a = bottom[0]->cpu_data();
-    bottom_data_b = bottom[1]->cpu_data();
-    for (int idx = 0; idx < count; ++idx) {
-      if (bottom_data_a[idx] > bottom_data_b[idx]) {
-        top_data[idx] = bottom_data_a[idx];  // maxval
-        mask[idx] = 0;  // maxid
-      } else {
-        top_data[idx] = bottom_data_b[idx];  // maxval
-        mask[idx] = 1;  // maxid
-      }
-    }
-    // bottom 2++
-    for (int blob_idx = 2; blob_idx < bottom.size(); ++blob_idx) {
-      bottom_data_b = bottom[blob_idx]->cpu_data();
-      for (int idx = 0; idx < count; ++idx) {
-        if (bottom_data_b[idx] > top_data[idx]) {
-          top_data[idx] = bottom_data_b[idx];  // maxval
-          mask[idx] = blob_idx;  // maxid
-        }
-      }
-    }
-    break;
-  default:
-    LOG(FATAL) << "Unknown elementwise operation.";
-  }
-}
-
-template <typename Dtype>
-void EltwiseLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
-  const int* mask = NULL;
-  const int count = top[0]->count();
-  const Dtype* top_data = top[0]->cpu_data();
-  const Dtype* top_diff = top[0]->cpu_diff();
-  for (int i = 0; i < bottom.size(); ++i) {
-    if (propagate_down[i]) {
-      const Dtype* bottom_data = bottom[i]->cpu_data();
-      Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
-      switch (op_) {
-      case EltwiseParameter_EltwiseOp_PROD:
-        if (stable_prod_grad_) {
-          bool initialized = false;
-          for (int j = 0; j < bottom.size(); ++j) {
-            if (i == j) { continue; }
-            if (!initialized) {
-              caffe_copy(count, bottom[j]->cpu_data(), bottom_diff);
-              initialized = true;
-            } else {
-              caffe_mul(count, bottom[j]->cpu_data(), bottom_diff,
-                        bottom_diff);
-            }
-          }
-        } else {
-          caffe_div(count, top_data, bottom_data, bottom_diff);
-        }
-        caffe_mul(count, bottom_diff, top_diff, bottom_diff);
-        break;
-      case EltwiseParameter_EltwiseOp_SUM:
-        if (coeffs_[i] == Dtype(1)) {
-          caffe_copy(count, top_diff, bottom_diff);
-        } else {
-          caffe_cpu_scale(count, coeffs_[i], top_diff, bottom_diff);
-        }
-        break;
-      case EltwiseParameter_EltwiseOp_MAX:
-        mask = max_idx_.cpu_data();
-        for (int index = 0; index < count; ++index) {
-          Dtype gradient = 0;
-          if (mask[index] == i) {
-            gradient += top_diff[index];
-          }
-          bottom_diff[index] = gradient;
-        }
-        break;
-      default:
-        LOG(FATAL) << "Unknown elementwise operation.";
-      }
-    }
-  }
-}
-
-#ifdef CPU_ONLY
-STUB_GPU(EltwiseLayer);
-#endif
-
-INSTANTIATE_CLASS(EltwiseLayer);
-REGISTER_LAYER_CLASS(Eltwise);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/flatten_layer.cpp b/src/caffe/layers/flatten_layer.cpp
deleted file mode 100644
index d4ab393..0000000
--- a/src/caffe/layers/flatten_layer.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-#include <vector>
-
-#include "caffe/layers/flatten_layer.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void FlattenLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  CHECK_NE(top[0], bottom[0]) << this->type() << " Layer does not "
-      "allow in-place computation.";
-  const int start_axis = bottom[0]->CanonicalAxisIndex(
-      this->layer_param_.flatten_param().axis());
-  const int end_axis = bottom[0]->CanonicalAxisIndex(
-      this->layer_param_.flatten_param().end_axis());
-  vector<int> top_shape;
-  for (int i = 0; i < start_axis; ++i) {
-    top_shape.push_back(bottom[0]->shape(i));
-  }
-  const int flattened_dim = bottom[0]->count(start_axis, end_axis + 1);
-  top_shape.push_back(flattened_dim);
-  for (int i = end_axis + 1; i < bottom[0]->num_axes(); ++i) {
-    top_shape.push_back(bottom[0]->shape(i));
-  }
-  top[0]->Reshape(top_shape);
-  CHECK_EQ(top[0]->count(), bottom[0]->count());
-}
-
-template <typename Dtype>
-void FlattenLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  top[0]->ShareData(*bottom[0]);
-}
-
-template <typename Dtype>
-void FlattenLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
-  bottom[0]->ShareDiff(*top[0]);
-}
-
-INSTANTIATE_CLASS(FlattenLayer);
-REGISTER_LAYER_CLASS(Flatten);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/im2col_layer.cpp b/src/caffe/layers/im2col_layer.cpp
deleted file mode 100644
index 2fb9b3c..0000000
--- a/src/caffe/layers/im2col_layer.cpp
+++ /dev/null
@@ -1,193 +0,0 @@
-#include <vector>
-
-#include "caffe/layers/im2col_layer.hpp"
-#include "caffe/util/im2col.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void Im2colLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  ConvolutionParameter conv_param = this->layer_param_.convolution_param();
-  force_nd_im2col_ = conv_param.force_nd_im2col();
-  const int input_num_dims = bottom[0]->shape().size();
-  channel_axis_ = bottom[0]->CanonicalAxisIndex(conv_param.axis());
-  const int first_spatial_dim = channel_axis_ + 1;
-  num_spatial_axes_ = input_num_dims - first_spatial_dim;
-  CHECK_GE(num_spatial_axes_, 1);
-  vector<int> dim_blob_shape(1, num_spatial_axes_);
-  // Setup filter kernel dimensions (kernel_shape_).
-  kernel_shape_.Reshape(dim_blob_shape);
-  int* kernel_shape_data = kernel_shape_.mutable_cpu_data();
-  if (conv_param.has_kernel_h() || conv_param.has_kernel_w()) {
-    CHECK_EQ(num_spatial_axes_, 2)
-        << "kernel_h & kernel_w can only be used for 2D convolution.";
-    CHECK_EQ(0, conv_param.kernel_size_size())
-        << "Either kernel_size or kernel_h/w should be specified; not both.";
-    kernel_shape_data[0] = conv_param.kernel_h();
-    kernel_shape_data[1] = conv_param.kernel_w();
-  } else {
-    const int num_kernel_dims = conv_param.kernel_size_size();
-    CHECK(num_kernel_dims == 1 || num_kernel_dims == num_spatial_axes_)
-        << "kernel_size must be specified once, or once per spatial dimension "
-        << "(kernel_size specified " << num_kernel_dims << " times; "
-        << num_spatial_axes_ << " spatial dims);";
-      for (int i = 0; i < num_spatial_axes_; ++i) {
-        kernel_shape_data[i] =
-            conv_param.kernel_size((num_kernel_dims == 1) ? 0 : i);
-      }
-  }
-  for (int i = 0; i < num_spatial_axes_; ++i) {
-    CHECK_GT(kernel_shape_data[i], 0) << "Filter dimensions must be nonzero.";
-  }
-  // Setup stride dimensions (stride_).
-  stride_.Reshape(dim_blob_shape);
-  int* stride_data = stride_.mutable_cpu_data();
-  if (conv_param.has_stride_h() || conv_param.has_stride_w()) {
-    CHECK_EQ(num_spatial_axes_, 2)
-        << "stride_h & stride_w can only be used for 2D convolution.";
-    CHECK_EQ(0, conv_param.stride_size())
-        << "Either stride or stride_h/w should be specified; not both.";
-    stride_data[0] = conv_param.stride_h();
-    stride_data[1] = conv_param.stride_w();
-  } else {
-    const int num_stride_dims = conv_param.stride_size();
-    CHECK(num_stride_dims == 0 || num_stride_dims == 1 ||
-          num_stride_dims == num_spatial_axes_)
-        << "stride must be specified once, or once per spatial dimension "
-        << "(stride specified " << num_stride_dims << " times; "
-        << num_spatial_axes_ << " spatial dims);";
-    const int kDefaultStride = 1;
-    for (int i = 0; i < num_spatial_axes_; ++i) {
-      stride_data[i] = (num_stride_dims == 0) ? kDefaultStride :
-          conv_param.stride((num_stride_dims == 1) ? 0 : i);
-      CHECK_GT(stride_data[i], 0) << "Stride dimensions must be nonzero.";
-    }
-  }
-  // Setup pad dimensions (pad_).
-  pad_.Reshape(dim_blob_shape);
-  int* pad_data = pad_.mutable_cpu_data();
-  if (conv_param.has_pad_h() || conv_param.has_pad_w()) {
-    CHECK_EQ(num_spatial_axes_, 2)
-        << "pad_h & pad_w can only be used for 2D convolution.";
-    CHECK_EQ(0, conv_param.pad_size())
-        << "Either pad or pad_h/w should be specified; not both.";
-    pad_data[0] = conv_param.pad_h();
-    pad_data[1] = conv_param.pad_w();
-  } else {
-    const int num_pad_dims = conv_param.pad_size();
-    CHECK(num_pad_dims == 0 || num_pad_dims == 1 ||
-          num_pad_dims == num_spatial_axes_)
-        << "pad must be specified once, or once per spatial dimension "
-        << "(pad specified " << num_pad_dims << " times; "
-        << num_spatial_axes_ << " spatial dims);";
-    const int kDefaultPad = 0;
-    for (int i = 0; i < num_spatial_axes_; ++i) {
-      pad_data[i] = (num_pad_dims == 0) ? kDefaultPad :
-          conv_param.pad((num_pad_dims == 1) ? 0 : i);
-    }
-  }
-  // Setup dilation dimensions (dilation_).
-  dilation_.Reshape(dim_blob_shape);
-  int* dilation_data = dilation_.mutable_cpu_data();
-  const int num_dilation_dims = conv_param.dilation_size();
-  CHECK(num_dilation_dims == 0 || num_dilation_dims == 1 ||
-        num_dilation_dims == num_spatial_axes_)
-      << "dilation must be specified once, or once per spatial dimension "
-      << "(dilation specified " << num_dilation_dims << " times; "
-      << num_spatial_axes_ << " spatial dims).";
-  const int kDefaultDilation = 1;
-  for (int i = 0; i < num_spatial_axes_; ++i) {
-    dilation_data[i] = (num_dilation_dims == 0) ? kDefaultDilation :
-                       conv_param.dilation((num_dilation_dims == 1) ? 0 : i);
-  }
-}
-
-template <typename Dtype>
-void Im2colLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  vector<int> top_shape = bottom[0]->shape();
-  const int* kernel_shape_data = kernel_shape_.cpu_data();
-  const int* stride_data = stride_.cpu_data();
-  const int* pad_data = pad_.cpu_data();
-  const int* dilation_data = dilation_.cpu_data();
-  for (int i = 0; i < num_spatial_axes_; ++i) {
-    top_shape[channel_axis_] *= kernel_shape_data[i];
-    const int input_dim = bottom[0]->shape(channel_axis_ + i + 1);
-    const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
-    const int output_dim = (input_dim + 2 * pad_data[i] - kernel_extent)
-        / stride_data[i] + 1;
-    top_shape[channel_axis_ + i + 1] = output_dim;
-  }
-  top[0]->Reshape(top_shape);
-  num_ = bottom[0]->count(0, channel_axis_);
-  bottom_dim_ = bottom[0]->count(channel_axis_);
-  top_dim_ = top[0]->count(channel_axis_);
-
-  channels_ = bottom[0]->shape(channel_axis_);
-}
-
-template <typename Dtype>
-void Im2colLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  const Dtype* bottom_data = bottom[0]->cpu_data();
-  Dtype* top_data = top[0]->mutable_cpu_data();
-  for (int n = 0; n < num_; ++n) {
-    DCHECK_EQ(bottom[0]->shape().size() - channel_axis_, num_spatial_axes_ + 1);
-    DCHECK_EQ(top[0]->shape().size() - channel_axis_, num_spatial_axes_ + 1);
-    DCHECK_EQ(kernel_shape_.count(), num_spatial_axes_);
-    DCHECK_EQ(pad_.count(), num_spatial_axes_);
-    DCHECK_EQ(stride_.count(), num_spatial_axes_);
-    DCHECK_EQ(dilation_.count(), num_spatial_axes_);
-    if (!force_nd_im2col_ && num_spatial_axes_ == 2) {
-      im2col_cpu(bottom_data + n * bottom_dim_, channels_,
-          bottom[0]->shape(channel_axis_ + 1),
-          bottom[0]->shape(channel_axis_ + 2),
-          kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
-          pad_.cpu_data()[0], pad_.cpu_data()[1],
-          stride_.cpu_data()[0], stride_.cpu_data()[1],
-          dilation_.cpu_data()[0], dilation_.cpu_data()[1],
-          top_data + n * top_dim_);
-    } else {
-      im2col_nd_cpu(bottom_data + n * bottom_dim_, num_spatial_axes_,
-          bottom[0]->shape().data() + channel_axis_,
-          top[0]->shape().data() + channel_axis_,
-          kernel_shape_.cpu_data(), pad_.cpu_data(), stride_.cpu_data(),
-          dilation_.cpu_data(), top_data + n * top_dim_);
-    }
-  }
-}
-
-template <typename Dtype>
-void Im2colLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
-  const Dtype* top_diff = top[0]->cpu_diff();
-  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
-  for (int n = 0; n < num_; ++n) {
-    if (!force_nd_im2col_ && num_spatial_axes_ == 2) {
-      col2im_cpu(top_diff + n * top_dim_, channels_,
-          bottom[0]->shape(channel_axis_ + 1),
-          bottom[0]->shape(channel_axis_ + 2),
-          kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
-          pad_.cpu_data()[0], pad_.cpu_data()[1],
-          stride_.cpu_data()[0], stride_.cpu_data()[1],
-          dilation_.cpu_data()[0], dilation_.cpu_data()[1],
-          bottom_diff + n * bottom_dim_);
-    } else {
-      col2im_nd_cpu(top_diff + n * top_dim_, num_spatial_axes_,
-          bottom[0]->shape().data() + channel_axis_,
-          top[0]->shape().data() + channel_axis_,
-          kernel_shape_.cpu_data(), pad_.cpu_data(), stride_.cpu_data(),
-          dilation_.cpu_data(), bottom_diff + n * bottom_dim_);
-    }
-  }
-}
-
-#ifdef CPU_ONLY
-STUB_GPU(Im2colLayer);
-#endif
-
-INSTANTIATE_CLASS(Im2colLayer);
-REGISTER_LAYER_CLASS(Im2col);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp
deleted file mode 100644
index e65349f..0000000
--- a/src/caffe/layers/inner_product_layer.cpp
+++ /dev/null
@@ -1,150 +0,0 @@
-#include <vector>
-
-#include "caffe/filler.hpp"
-#include "caffe/layers/inner_product_layer.hpp"
-#include "caffe/util/math_functions.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void InnerProductLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  const int num_output = this->layer_param_.inner_product_param().num_output();
-  bias_term_ = this->layer_param_.inner_product_param().bias_term();
-  transpose_ = this->layer_param_.inner_product_param().transpose();
-  N_ = num_output;
-  const int axis = bottom[0]->CanonicalAxisIndex(
-      this->layer_param_.inner_product_param().axis());
-  // Dimensions starting from "axis" are "flattened" into a single
-  // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W),
-  // and axis == 1, N inner products with dimension CHW are performed.
-  K_ = bottom[0]->count(axis);
-  // Check if we need to set up the weights
-  if (this->blobs_.size() > 0) {
-    LOG(INFO) << "Skipping parameter initialization";
-  } else {
-    if (bias_term_) {
-      this->blobs_.resize(2);
-    } else {
-      this->blobs_.resize(1);
-    }
-    // Initialize the weights
-    vector<int> weight_shape(2);
-    if (transpose_) {
-      weight_shape[0] = K_;
-      weight_shape[1] = N_;
-    } else {
-      weight_shape[0] = N_;
-      weight_shape[1] = K_;
-    }
-    this->blobs_[0].reset(new Blob<Dtype>(weight_shape));
-    // fill the weights
-    shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
-        this->layer_param_.inner_product_param().weight_filler()));
-    weight_filler->Fill(this->blobs_[0].get());
-    // If necessary, intiialize and fill the bias term
-    if (bias_term_) {
-      vector<int> bias_shape(1, N_);
-      this->blobs_[1].reset(new Blob<Dtype>(bias_shape));
-      shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
-          this->layer_param_.inner_product_param().bias_filler()));
-      bias_filler->Fill(this->blobs_[1].get());
-    }
-  }  // parameter initialization
-  this->param_propagate_down_.resize(this->blobs_.size(), true);
-}
-
-template <typename Dtype>
-void InnerProductLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  // Figure out the dimensions
-  const int axis = bottom[0]->CanonicalAxisIndex(
-      this->layer_param_.inner_product_param().axis());
-  const int new_K = bottom[0]->count(axis);
-  CHECK_EQ(K_, new_K)
-      << "Input size incompatible with inner product parameters.";
-  // The first "axis" dimensions are independent inner products; the total
-  // number of these is M_, the product over these dimensions.
-  M_ = bottom[0]->count(0, axis);
-  // The top shape will be the bottom shape with the flattened axes dropped,
-  // and replaced by a single axis with dimension num_output (N_).
-  vector<int> top_shape = bottom[0]->shape();
-  top_shape.resize(axis + 1);
-  top_shape[axis] = N_;
-  top[0]->Reshape(top_shape);
-  // Set up the bias multiplier
-  if (bias_term_) {
-    vector<int> bias_shape(1, M_);
-    bias_multiplier_.Reshape(bias_shape);
-    caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data());
-  }
-}
-
-template <typename Dtype>
-void InnerProductLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-    const vector<Blob<Dtype>*>& top) {
-  const Dtype* bottom_data = bottom[0]->cpu_data();
-  Dtype* top_data = top[0]->mutable_cpu_data();
-  const Dtype* weight = this->blobs_[0]->cpu_data();
-  caffe_cpu_gemm<Dtype>(CblasNoTrans, transpose_ ? CblasNoTrans : CblasTrans,
-      M_, N_, K_, (Dtype)1.,
-      bottom_data, weight, (Dtype)0., top_data);
-  if (bias_term_) {
-    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
-        bias_multiplier_.cpu_data(),
-        this->blobs_[1]->cpu_data(), (Dtype)1., top_data);
-  }
-}
-
-template <typename Dtype>
-void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down,
-    const vector<Blob<Dtype>*>& bottom) {
-  if (this->param_propagate_down_[0]) {
-    const Dtype* top_diff = top[0]->cpu_diff();
-    const Dtype* bottom_data = bottom[0]->cpu_data();
-    // Gradient with respect to weight
-    if (transpose_) {
-      caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans,
-          K_, N_, M_,
-          (Dtype)1., bottom_data, top_diff,
-          (Dtype)1., this->blobs_[0]->mutable_cpu_diff());
-    } else {
-      caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans,
-          N_, K_, M_,
-          (Dtype)1., top_diff, bottom_data,
-          (Dtype)1., this->blobs_[0]->mutable_cpu_diff());
-    }
-  }
-  if (bias_term_ && this->param_propagate_down_[1]) {
-    const Dtype* top_diff = top[0]->cpu_diff();
-    // Gradient with respect to bias
-    caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
-        bias_multiplier_.cpu_data(), (Dtype)1.,
-        this->blobs_[1]->mutable_cpu_diff());
-  }
-  if (propagate_down[0]) {
-    const Dtype* top_diff = top[0]->cpu_diff();
-    // Gradient with respect to bottom data
-    if (transpose_) {
-      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans,
-          M_, K_, N_,
-          (Dtype)1., top_diff, this->blobs_[0]->cpu_data(),
-          (Dtype)0., bottom[0]->mutable_cpu_diff());
-    } else {
-      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans,
-          M_, K_, N_,
-          (Dtype)1., top_diff, this->blobs_[0]->cpu_data(),
-          (Dtype)0., bottom[0]->mutable_cpu_diff());
-    }
-  }
-}
-
-#ifdef CPU_ONLY
-STUB_GPU(InnerProductLayer);
-#endif
-
-INSTANTIATE_CLASS(InnerProductLayer);
-REGISTER_LAYER_CLASS(InnerProduct);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/input_layer.cpp b/src/caffe/layers/input_layer.cpp
deleted file mode 100644
index 667d8ad..0000000
--- a/src/caffe/layers/input_layer.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-#include <vector>
-
-#include "caffe/layers/input_layer.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void InputLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  const int num_top = top.size();
-  const InputParameter& param = this->layer_param_.input_param();
-  const int num_shape = param.shape_size();
-  CHECK(num_shape == 0 || num_shape == 1 || num_shape == num_top)
-      << "Must specify 'shape' once, once per top blob, or not at all: "
-      << num_top << " tops vs. " << num_shape << " shapes.";
-  if (num_shape > 0) {
-    for (int i = 0; i < num_top; ++i) {
-      const int shape_index = (param.shape_size() == 1) ? 0 : i;
-      top[i]->Reshape(param.shape(shape_index));
-    }
-  }
-}
-
-INSTANTIATE_CLASS(InputLayer);
-REGISTER_LAYER_CLASS(Input);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/loss_layer.cpp b/src/caffe/layers/loss_layer.cpp
deleted file mode 100644
index afb1ce9..0000000
--- a/src/caffe/layers/loss_layer.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-#include <vector>
-
-#include "caffe/layers/loss_layer.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void LossLayer<Dtype>::LayerSetUp(
-    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-  // LossLayers have a non-zero (1) loss by default.
-  if (this->layer_param_.loss_weight_size() == 0) {
-    this->layer_param_.add_loss_weight(Dtype(1));
-  }
-}
-
-template <typename Dtype>
-void LossLayer<Dtype>::Reshape(
-    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-  CHECK_EQ(bottom[0]->shape(0), bottom[1]->shape(0))
-      << "The data and label should have the same first dimension.";
-  vector<int> loss_shape(0);  // Loss layers output a scalar; 0 axes.
-  top[0]->Reshape(loss_shape);
-}
-
-INSTANTIATE_CLASS(LossLayer);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/lrn_layer.cpp b/src/caffe/layers/lrn_layer.cpp
deleted file mode 100644
index 210525e..0000000
--- a/src/caffe/layers/lrn_layer.cpp
+++ /dev/null
@@ -1,257 +0,0 @@
-#include <vector>
-
-#include "caffe/layers/lrn_layer.hpp"
-#include "caffe/util/math_functions.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void LRNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  size_ = this->layer_param_.lrn_param().local_size();
-  CHECK_EQ(size_ % 2, 1) << "LRN only supports odd values for local_size";
-  pre_pad_ = (size_ - 1) / 2;
-  alpha_ = this->layer_param_.lrn_param().alpha();
-  beta_ = this->layer_param_.lrn_param().beta();
-  k_ = this->layer_param_.lrn_param().k();
-  if (this->layer_param_.lrn_param().norm_region() ==
-      LRNParameter_NormRegion_WITHIN_CHANNEL) {
-    // Set up split_layer_ to use inputs in the numerator and denominator.
-    split_top_vec_.clear();
-    split_top_vec_.push_back(&product_input_);
-    split_top_vec_.push_back(&square_input_);
-    LayerParameter split_param;
-    split_layer_.reset(new SplitLayer<Dtype>(split_param));
-    split_layer_->SetUp(bottom, split_top_vec_);
-    // Set up square_layer_ to square the inputs.
-    square_bottom_vec_.clear();
-    square_top_vec_.clear();
-    square_bottom_vec_.push_back(&square_input_);
-    square_top_vec_.push_back(&square_output_);
-    LayerParameter square_param;
-    square_param.mutable_power_param()->set_power(Dtype(2));
-    square_layer_.reset(new PowerLayer<Dtype>(square_param));
-    square_layer_->SetUp(square_bottom_vec_, square_top_vec_);
-    // Set up pool_layer_ to sum over square neighborhoods of the input.
-    pool_top_vec_.clear();
-    pool_top_vec_.push_back(&pool_output_);
-    LayerParameter pool_param;
-    pool_param.mutable_pooling_param()->set_pool(
-        PoolingParameter_PoolMethod_AVE);
-    pool_param.mutable_pooling_param()->set_pad(pre_pad_);
-    pool_param.mutable_pooling_param()->set_kernel_size(size_);
-    pool_layer_.reset(new PoolingLayer<Dtype>(pool_param));
-    pool_layer_->SetUp(square_top_vec_, pool_top_vec_);
-    // Set up power_layer_ to compute (1 + alpha_/N^2 s)^-beta_, where s is
-    // the sum of a squared neighborhood (the output of pool_layer_).
-    power_top_vec_.clear();
-    power_top_vec_.push_back(&power_output_);
-    LayerParameter power_param;
-    power_param.mutable_power_param()->set_power(-beta_);
-    power_param.mutable_power_param()->set_scale(alpha_);
-    power_param.mutable_power_param()->set_shift(Dtype(1));
-    power_layer_.reset(new PowerLayer<Dtype>(power_param));
-    power_layer_->SetUp(pool_top_vec_, power_top_vec_);
-    // Set up a product_layer_ to compute outputs by multiplying inputs by the
-    // inverse demoninator computed by the power layer.
-    product_bottom_vec_.clear();
-    product_bottom_vec_.push_back(&product_input_);
-    product_bottom_vec_.push_back(&power_output_);
-    LayerParameter product_param;
-    EltwiseParameter* eltwise_param = product_param.mutable_eltwise_param();
-    eltwise_param->set_operation(EltwiseParameter_EltwiseOp_PROD);
-    product_layer_.reset(new EltwiseLayer<Dtype>(product_param));
-    product_layer_->SetUp(product_bottom_vec_, top);
-  }
-}
-
-template <typename Dtype>
-void LRNLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
-      << "corresponding to (num, channels, height, width)";
-  num_ = bottom[0]->num();
-  channels_ = bottom[0]->channels();
-  height_ = bottom[0]->height();
-  width_ = bottom[0]->width();
-  switch (this->layer_param_.lrn_param().norm_region()) {
-  case LRNParameter_NormRegion_ACROSS_CHANNELS:
-    top[0]->Reshape(num_, channels_, height_, width_);
-    scale_.Reshape(num_, channels_, height_, width_);
-    break;
-  case LRNParameter_NormRegion_WITHIN_CHANNEL:
-    split_layer_->Reshape(bottom, split_top_vec_);
-    square_layer_->Reshape(square_bottom_vec_, square_top_vec_);
-    pool_layer_->Reshape(square_top_vec_, pool_top_vec_);
-    power_layer_->Reshape(pool_top_vec_, power_top_vec_);
-    product_layer_->Reshape(product_bottom_vec_, top);
-    break;
-  }
-}
-
-template <typename Dtype>
-void LRNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-    const vector<Blob<Dtype>*>& top) {
-  switch (this->layer_param_.lrn_param().norm_region()) {
-  case LRNParameter_NormRegion_ACROSS_CHANNELS:
-    CrossChannelForward_cpu(bottom, top);
-    break;
-  case LRNParameter_NormRegion_WITHIN_CHANNEL:
-    WithinChannelForward(bottom, top);
-    break;
-  default:
-    LOG(FATAL) << "Unknown normalization region.";
-  }
-}
-
-template <typename Dtype>
-void LRNLayer<Dtype>::CrossChannelForward_cpu(
-    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-  const Dtype* bottom_data = bottom[0]->cpu_data();
-  Dtype* top_data = top[0]->mutable_cpu_data();
-  Dtype* scale_data = scale_.mutable_cpu_data();
-  // start with the constant value
-  for (int i = 0; i < scale_.count(); ++i) {
-    scale_data[i] = k_;
-  }
-  Blob<Dtype> padded_square(1, channels_ + size_ - 1, height_, width_);
-  Dtype* padded_square_data = padded_square.mutable_cpu_data();
-  caffe_set(padded_square.count(), Dtype(0), padded_square_data);
-  Dtype alpha_over_size = alpha_ / size_;
-  // go through the images
-  for (int n = 0; n < num_; ++n) {
-    // compute the padded square
-    caffe_sqr(channels_ * height_ * width_,
-        bottom_data + bottom[0]->offset(n),
-        padded_square_data + padded_square.offset(0, pre_pad_));
-    // Create the first channel scale
-    for (int c = 0; c < size_; ++c) {
-      caffe_axpy<Dtype>(height_ * width_, alpha_over_size,
-          padded_square_data + padded_square.offset(0, c),
-          scale_data + scale_.offset(n, 0));
-    }
-    for (int c = 1; c < channels_; ++c) {
-      // copy previous scale
-      caffe_copy<Dtype>(height_ * width_,
-          scale_data + scale_.offset(n, c - 1),
-          scale_data + scale_.offset(n, c));
-      // add head
-      caffe_axpy<Dtype>(height_ * width_, alpha_over_size,
-          padded_square_data + padded_square.offset(0, c + size_ - 1),
-          scale_data + scale_.offset(n, c));
-      // subtract tail
-      caffe_axpy<Dtype>(height_ * width_, -alpha_over_size,
-          padded_square_data + padded_square.offset(0, c - 1),
-          scale_data + scale_.offset(n, c));
-    }
-  }
-
-  // In the end, compute output
-  caffe_powx<Dtype>(scale_.count(), scale_data, -beta_, top_data);
-  caffe_mul<Dtype>(scale_.count(), top_data, bottom_data, top_data);
-}
-
-template <typename Dtype>
-void LRNLayer<Dtype>::WithinChannelForward(
-    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-  split_layer_->Forward(bottom, split_top_vec_);
-  square_layer_->Forward(square_bottom_vec_, square_top_vec_);
-  pool_layer_->Forward(square_top_vec_, pool_top_vec_);
-  power_layer_->Forward(pool_top_vec_, power_top_vec_);
-  product_layer_->Forward(product_bottom_vec_, top);
-}
-
-template <typename Dtype>
-void LRNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
-  switch (this->layer_param_.lrn_param().norm_region()) {
-  case LRNParameter_NormRegion_ACROSS_CHANNELS:
-    CrossChannelBackward_cpu(top, propagate_down, bottom);
-    break;
-  case LRNParameter_NormRegion_WITHIN_CHANNEL:
-    WithinChannelBackward(top, propagate_down, bottom);
-    break;
-  default:
-    LOG(FATAL) << "Unknown normalization region.";
-  }
-}
-
-template <typename Dtype>
-void LRNLayer<Dtype>::CrossChannelBackward_cpu(
-    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
-    const vector<Blob<Dtype>*>& bottom) {
-  const Dtype* top_diff = top[0]->cpu_diff();
-  const Dtype* top_data = top[0]->cpu_data();
-  const Dtype* bottom_data = bottom[0]->cpu_data();
-  const Dtype* scale_data = scale_.cpu_data();
-  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
-  Blob<Dtype> padded_ratio(1, channels_ + size_ - 1, height_, width_);
-  Blob<Dtype> accum_ratio(1, 1, height_, width_);
-  Dtype* padded_ratio_data = padded_ratio.mutable_cpu_data();
-  Dtype* accum_ratio_data = accum_ratio.mutable_cpu_data();
-  // We hack a little bit by using the diff() to store an additional result
-  Dtype* accum_ratio_times_bottom = accum_ratio.mutable_cpu_diff();
-  caffe_set(padded_ratio.count(), Dtype(0), padded_ratio_data);
-  Dtype cache_ratio_value = 2. * alpha_ * beta_ / size_;
-
-  caffe_powx<Dtype>(scale_.count(), scale_data, -beta_, bottom_diff);
-  caffe_mul<Dtype>(scale_.count(), top_diff, bottom_diff, bottom_diff);
-
-  // go through individual data
-  int inverse_pre_pad = size_ - (size_ + 1) / 2;
-  for (int n = 0; n < num_; ++n) {
-    int block_offset = scale_.offset(n);
-    // first, compute diff_i * y_i / s_i
-    caffe_mul<Dtype>(channels_ * height_ * width_,
-        top_diff + block_offset, top_data + block_offset,
-        padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad));
-    caffe_div<Dtype>(channels_ * height_ * width_,
-        padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad),
-        scale_data + block_offset,
-        padded_ratio_data + padded_ratio.offset(0, inverse_pre_pad));
-    // Now, compute the accumulated ratios and the bottom diff
-    caffe_set(accum_ratio.count(), Dtype(0), accum_ratio_data);
-    for (int c = 0; c < size_ - 1; ++c) {
-      caffe_axpy<Dtype>(height_ * width_, 1.,
-          padded_ratio_data + padded_ratio.offset(0, c), accum_ratio_data);
-    }
-    for (int c = 0; c < channels_; ++c) {
-      caffe_axpy<Dtype>(height_ * width_, 1.,
-          padded_ratio_data + padded_ratio.offset(0, c + size_ - 1),
-          accum_ratio_data);
-      // compute bottom diff
-      caffe_mul<Dtype>(height_ * width_,
-          bottom_data + top[0]->offset(n, c),
-          accum_ratio_data, accum_ratio_times_bottom);
-      caffe_axpy<Dtype>(height_ * width_, -cache_ratio_value,
-          accum_ratio_times_bottom, bottom_diff + top[0]->offset(n, c));
-      caffe_axpy<Dtype>(height_ * width_, -1.,
-          padded_ratio_data + padded_ratio.offset(0, c), accum_ratio_data);
-    }
-  }
-}
-
-template <typename Dtype>
-void LRNLayer<Dtype>::WithinChannelBackward(
-    const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
-    const vector<Blob<Dtype>*>& bottom) {
-  if (propagate_down[0]) {
-    vector<bool> product_propagate_down(2, true);
-    product_layer_->Backward(top, product_propagate_down, product_bottom_vec_);
-    power_layer_->Backward(power_top_vec_, propagate_down, pool_top_vec_);
-    pool_layer_->Backward(pool_top_vec_, propagate_down, square_top_vec_);
-    square_layer_->Backward(square_top_vec_, propagate_down,
-                            square_bottom_vec_);
-    split_layer_->Backward(split_top_vec_, propagate_down, bottom);
-  }
-}
-
-#ifdef CPU_ONLY
-STUB_GPU(LRNLayer);
-STUB_GPU_FORWARD(LRNLayer, CrossChannelForward);
-STUB_GPU_BACKWARD(LRNLayer, CrossChannelBackward);
-#endif
-
-INSTANTIATE_CLASS(LRNLayer);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/memory_data_layer.cpp b/src/caffe/layers/memory_data_layer.cpp
deleted file mode 100644
index 975f484..0000000
--- a/src/caffe/layers/memory_data_layer.cpp
+++ /dev/null
@@ -1,123 +0,0 @@
-#ifdef USE_OPENCV
-#include <opencv2/core/core.hpp>
-#endif  // USE_OPENCV
-
-#include <vector>
-
-#include "caffe/layers/memory_data_layer.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void MemoryDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
-     const vector<Blob<Dtype>*>& top) {
-  batch_size_ = this->layer_param_.memory_data_param().batch_size();
-  channels_ = this->layer_param_.memory_data_param().channels();
-  height_ = this->layer_param_.memory_data_param().height();
-  width_ = this->layer_param_.memory_data_param().width();
-  size_ = channels_ * height_ * width_;
-  CHECK_GT(batch_size_ * size_, 0) <<
-      "batch_size, channels, height, and width must be specified and"
-      " positive in memory_data_param";
-  vector<int> label_shape(1, batch_size_);
-  top[0]->Reshape(batch_size_, channels_, height_, width_);
-  top[1]->Reshape(label_shape);
-  added_data_.Reshape(batch_size_, channels_, height_, width_);
-  added_label_.Reshape(label_shape);
-  data_ = NULL;
-  labels_ = NULL;
-  added_data_.cpu_data();
-  added_label_.cpu_data();
-}
-
-template <typename Dtype>
-void MemoryDataLayer<Dtype>::AddDatumVector(const vector<Datum>& datum_vector) {
-  CHECK(!has_new_data_) <<
-      "Can't add data until current data has been consumed.";
-  size_t num = datum_vector.size();
-  CHECK_GT(num, 0) << "There is no datum to add.";
-  CHECK_EQ(num % batch_size_, 0) <<
-      "The added data must be a multiple of the batch size.";
-  added_data_.Reshape(num, channels_, height_, width_);
-  added_label_.Reshape(num, 1, 1, 1);
-  // Apply data transformations (mirror, scale, crop...)
-  this->data_transformer_->Transform(datum_vector, &added_data_);
-  // Copy Labels
-  Dtype* top_label = added_label_.mutable_cpu_data();
-  for (int item_id = 0; item_id < num; ++item_id) {
-    top_label[item_id] = datum_vector[item_id].label();
-  }
-  // num_images == batch_size_
-  Dtype* top_data = added_data_.mutable_cpu_data();
-  Reset(top_data, top_label, num);
-  has_new_data_ = true;
-}
-
-#ifdef USE_OPENCV
-template <typename Dtype>
-void MemoryDataLayer<Dtype>::AddMatVector(const vector<cv::Mat>& mat_vector,
-    const vector<int>& labels) {
-  size_t num = mat_vector.size();
-  CHECK(!has_new_data_) <<
-      "Can't add mat until current data has been consumed.";
-  CHECK_GT(num, 0) << "There is no mat to add";
-  CHECK_EQ(num % batch_size_, 0) <<
-      "The added data must be a multiple of the batch size.";
-  added_data_.Reshape(num, channels_, height_, width_);
-  added_label_.Reshape(num, 1, 1, 1);
-  // Apply data transformations (mirror, scale, crop...)
-  this->data_transformer_->Transform(mat_vector, &added_data_);
-  // Copy Labels
-  Dtype* top_label = added_label_.mutable_cpu_data();
-  for (int item_id = 0; item_id < num; ++item_id) {
-    top_label[item_id] = labels[item_id];
-  }
-  // num_images == batch_size_
-  Dtype* top_data = added_data_.mutable_cpu_data();
-  Reset(top_data, top_label, num);
-  has_new_data_ = true;
-}
-#endif  // USE_OPENCV
-
-template <typename Dtype>
-void MemoryDataLayer<Dtype>::Reset(Dtype* data, Dtype* labels, int n) {
-  CHECK(data);
-  CHECK(labels);
-  CHECK_EQ(n % batch_size_, 0) << "n must be a multiple of batch size";
-  // Warn with transformation parameters since a memory array is meant to
-  // be generic and no transformations are done with Reset().
-  if (this->layer_param_.has_transform_param()) {
-    LOG(WARNING) << this->type() << " does not transform array data on Reset()";
-  }
-  data_ = data;
-  labels_ = labels;
-  n_ = n;
-  pos_ = 0;
-}
-
-template <typename Dtype>
-void MemoryDataLayer<Dtype>::set_batch_size(int new_size) {
-  CHECK(!has_new_data_) <<
-      "Can't change batch_size until current data has been consumed.";
-  batch_size_ = new_size;
-  added_data_.Reshape(batch_size_, channels_, height_, width_);
-  added_label_.Reshape(batch_size_, 1, 1, 1);
-}
-
-template <typename Dtype>
-void MemoryDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  CHECK(data_) << "MemoryDataLayer needs to be initialized by calling Reset";
-  top[0]->Reshape(batch_size_, channels_, height_, width_);
-  top[1]->Reshape(batch_size_, 1, 1, 1);
-  top[0]->set_cpu_data(data_ + pos_ * size_);
-  top[1]->set_cpu_data(labels_ + pos_);
-  pos_ = (pos_ + batch_size_) % n_;
-  if (pos_ == 0)
-    has_new_data_ = false;
-}
-
-INSTANTIATE_CLASS(MemoryDataLayer);
-REGISTER_LAYER_CLASS(MemoryData);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/neuron_layer.cpp b/src/caffe/layers/neuron_layer.cpp
deleted file mode 100644
index d7b5f38..0000000
--- a/src/caffe/layers/neuron_layer.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-#include <vector>
-
-#include "caffe/layers/neuron_layer.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void NeuronLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  top[0]->ReshapeLike(*bottom[0]);
-}
-
-INSTANTIATE_CLASS(NeuronLayer);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/pooling_layer.cpp b/src/caffe/layers/pooling_layer.cpp
deleted file mode 100644
index 90897db..0000000
--- a/src/caffe/layers/pooling_layer.cpp
+++ /dev/null
@@ -1,316 +0,0 @@
-#include <algorithm>
-#include <cfloat>
-#include <vector>
-
-#include "caffe/layers/pooling_layer.hpp"
-#include "caffe/util/math_functions.hpp"
-
-namespace caffe {
-
-using std::min;
-using std::max;
-
-template <typename Dtype>
-void PoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  PoolingParameter pool_param = this->layer_param_.pooling_param();
-  if (pool_param.global_pooling()) {
-    CHECK(!(pool_param.has_kernel_size() ||
-      pool_param.has_kernel_h() || pool_param.has_kernel_w()))
-      << "With Global_pooling: true Filter size cannot specified";
-  } else {
-    CHECK(!pool_param.has_kernel_size() !=
-      !(pool_param.has_kernel_h() && pool_param.has_kernel_w()))
-      << "Filter size is kernel_size OR kernel_h and kernel_w; not both";
-    CHECK(pool_param.has_kernel_size() ||
-      (pool_param.has_kernel_h() && pool_param.has_kernel_w()))
-      << "For non-square filters both kernel_h and kernel_w are required.";
-  }
-  CHECK((!pool_param.has_pad() && pool_param.has_pad_h()
-      && pool_param.has_pad_w())
-      || (!pool_param.has_pad_h() && !pool_param.has_pad_w()))
-      << "pad is pad OR pad_h and pad_w are required.";
-  CHECK((!pool_param.has_stride() && pool_param.has_stride_h()
-      && pool_param.has_stride_w())
-      || (!pool_param.has_stride_h() && !pool_param.has_stride_w()))
-      << "Stride is stride OR stride_h and stride_w are required.";
-  global_pooling_ = pool_param.global_pooling();
-  if (global_pooling_) {
-    kernel_h_ = bottom[0]->height();
-    kernel_w_ = bottom[0]->width();
-  } else {
-    if (pool_param.has_kernel_size()) {
-      kernel_h_ = kernel_w_ = pool_param.kernel_size();
-    } else {
-      kernel_h_ = pool_param.kernel_h();
-      kernel_w_ = pool_param.kernel_w();
-    }
-  }
-  CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero.";
-  CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero.";
-  if (!pool_param.has_pad_h()) {
-    pad_h_ = pad_w_ = pool_param.pad();
-  } else {
-    pad_h_ = pool_param.pad_h();
-    pad_w_ = pool_param.pad_w();
-  }
-  if (!pool_param.has_stride_h()) {
-    stride_h_ = stride_w_ = pool_param.stride();
-  } else {
-    stride_h_ = pool_param.stride_h();
-    stride_w_ = pool_param.stride_w();
-  }
-  if (global_pooling_) {
-    CHECK(pad_h_ == 0 && pad_w_ == 0 && stride_h_ == 1 && stride_w_ == 1)
-      << "With Global_pooling: true; only pad = 0 and stride = 1";
-  }
-  if (pad_h_ != 0 || pad_w_ != 0) {
-    CHECK(this->layer_param_.pooling_param().pool()
-        == PoolingParameter_PoolMethod_AVE
-        || this->layer_param_.pooling_param().pool()
-        == PoolingParameter_PoolMethod_MAX)
-        << "Padding implemented only for average and max pooling.";
-    CHECK_LT(pad_h_, kernel_h_);
-    CHECK_LT(pad_w_, kernel_w_);
-  }
-}
-
-template <typename Dtype>
-void PoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
-      << "corresponding to (num, channels, height, width)";
-  channels_ = bottom[0]->channels();
-  height_ = bottom[0]->height();
-  width_ = bottom[0]->width();
-  if (global_pooling_) {
-    kernel_h_ = bottom[0]->height();
-    kernel_w_ = bottom[0]->width();
-  }
-  pooled_height_ = static_cast<int>(ceil(static_cast<float>(
-      height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;
-  pooled_width_ = static_cast<int>(ceil(static_cast<float>(
-      width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
-  if (pad_h_ || pad_w_) {
-    // If we have padding, ensure that the last pooling starts strictly
-    // inside the image (instead of at the padding); otherwise clip the last.
-    if ((pooled_height_ - 1) * stride_h_ >= height_ + pad_h_) {
-      --pooled_height_;
-    }
-    if ((pooled_width_ - 1) * stride_w_ >= width_ + pad_w_) {
-      --pooled_width_;
-    }
-    CHECK_LT((pooled_height_ - 1) * stride_h_, height_ + pad_h_);
-    CHECK_LT((pooled_width_ - 1) * stride_w_, width_ + pad_w_);
-  }
-  top[0]->Reshape(bottom[0]->num(), channels_, pooled_height_,
-      pooled_width_);
-  if (top.size() > 1) {
-    top[1]->ReshapeLike(*top[0]);
-  }
-  // If max pooling, we will initialize the vector index part.
-  if (this->layer_param_.pooling_param().pool() ==
-      PoolingParameter_PoolMethod_MAX && top.size() == 1) {
-    max_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
-        pooled_width_);
-  }
-  // If stochastic pooling, we will initialize the random index part.
-  if (this->layer_param_.pooling_param().pool() ==
-      PoolingParameter_PoolMethod_STOCHASTIC) {
-    rand_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
-      pooled_width_);
-  }
-}
-
-// TODO(Yangqing): Is there a faster way to do pooling in the channel-first
-// case?
-template <typename Dtype>
-void PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  const Dtype* bottom_data = bottom[0]->cpu_data();
-  Dtype* top_data = top[0]->mutable_cpu_data();
-  const int top_count = top[0]->count();
-  // We'll output the mask to top[1] if it's of size >1.
-  const bool use_top_mask = top.size() > 1;
-  int* mask = NULL;  // suppress warnings about uninitalized variables
-  Dtype* top_mask = NULL;
-  // Different pooling methods. We explicitly do the switch outside the for
-  // loop to save time, although this results in more code.
-  switch (this->layer_param_.pooling_param().pool()) {
-  case PoolingParameter_PoolMethod_MAX:
-    // Initialize
-    if (use_top_mask) {
-      top_mask = top[1]->mutable_cpu_data();
-      caffe_set(top_count, Dtype(-1), top_mask);
-    } else {
-      mask = max_idx_.mutable_cpu_data();
-      caffe_set(top_count, -1, mask);
-    }
-    caffe_set(top_count, Dtype(-FLT_MAX), top_data);
-    // The main loop
-    for (int n = 0; n < bottom[0]->num(); ++n) {
-      for (int c = 0; c < channels_; ++c) {
-        for (int ph = 0; ph < pooled_height_; ++ph) {
-          for (int pw = 0; pw < pooled_width_; ++pw) {
-            int hstart = ph * stride_h_ - pad_h_;
-            int wstart = pw * stride_w_ - pad_w_;
-            int hend = min(hstart + kernel_h_, height_);
-            int wend = min(wstart + kernel_w_, width_);
-            hstart = max(hstart, 0);
-            wstart = max(wstart, 0);
-            const int pool_index = ph * pooled_width_ + pw;
-            for (int h = hstart; h < hend; ++h) {
-              for (int w = wstart; w < wend; ++w) {
-                const int index = h * width_ + w;
-                if (bottom_data[index] > top_data[pool_index]) {
-                  top_data[pool_index] = bottom_data[index];
-                  if (use_top_mask) {
-                    top_mask[pool_index] = static_cast<Dtype>(index);
-                  } else {
-                    mask[pool_index] = index;
-                  }
-                }
-              }
-            }
-          }
-        }
-        // compute offset
-        bottom_data += bottom[0]->offset(0, 1);
-        top_data += top[0]->offset(0, 1);
-        if (use_top_mask) {
-          top_mask += top[0]->offset(0, 1);
-        } else {
-          mask += top[0]->offset(0, 1);
-        }
-      }
-    }
-    break;
-  case PoolingParameter_PoolMethod_AVE:
-    for (int i = 0; i < top_count; ++i) {
-      top_data[i] = 0;
-    }
-    // The main loop
-    for (int n = 0; n < bottom[0]->num(); ++n) {
-      for (int c = 0; c < channels_; ++c) {
-        for (int ph = 0; ph < pooled_height_; ++ph) {
-          for (int pw = 0; pw < pooled_width_; ++pw) {
-            int hstart = ph * stride_h_ - pad_h_;
-            int wstart = pw * stride_w_ - pad_w_;
-            int hend = min(hstart + kernel_h_, height_ + pad_h_);
-            int wend = min(wstart + kernel_w_, width_ + pad_w_);
-            int pool_size = (hend - hstart) * (wend - wstart);
-            hstart = max(hstart, 0);
-            wstart = max(wstart, 0);
-            hend = min(hend, height_);
-            wend = min(wend, width_);
-            for (int h = hstart; h < hend; ++h) {
-              for (int w = wstart; w < wend; ++w) {
-                top_data[ph * pooled_width_ + pw] +=
-                    bottom_data[h * width_ + w];
-              }
-            }
-            top_data[ph * pooled_width_ + pw] /= pool_size;
-          }
-        }
-        // compute offset
-        bottom_data += bottom[0]->offset(0, 1);
-        top_data += top[0]->offset(0, 1);
-      }
-    }
-    break;
-  case PoolingParameter_PoolMethod_STOCHASTIC:
-    NOT_IMPLEMENTED;
-    break;
-  default:
-    LOG(FATAL) << "Unknown pooling method.";
-  }
-}
-
-template <typename Dtype>
-void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
-  if (!propagate_down[0]) {
-    return;
-  }
-  const Dtype* top_diff = top[0]->cpu_diff();
-  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
-  // Different pooling methods. We explicitly do the switch outside the for
-  // loop to save time, although this results in more codes.
-  caffe_set(bottom[0]->count(), Dtype(0), bottom_diff);
-  // We'll output the mask to top[1] if it's of size >1.
-  const bool use_top_mask = top.size() > 1;
-  const int* mask = NULL;  // suppress warnings about uninitialized variables
-  const Dtype* top_mask = NULL;
-  switch (this->layer_param_.pooling_param().pool()) {
-  case PoolingParameter_PoolMethod_MAX:
-    // The main loop
-    if (use_top_mask) {
-      top_mask = top[1]->cpu_data();
-    } else {
-      mask = max_idx_.cpu_data();
-    }
-    for (int n = 0; n < top[0]->num(); ++n) {
-      for (int c = 0; c < channels_; ++c) {
-        for (int ph = 0; ph < pooled_height_; ++ph) {
-          for (int pw = 0; pw < pooled_width_; ++pw) {
-            const int index = ph * pooled_width_ + pw;
-            const int bottom_index =
-                use_top_mask ? top_mask[index] : mask[index];
-            bottom_diff[bottom_index] += top_diff[index];
-          }
-        }
-        bottom_diff += bottom[0]->offset(0, 1);
-        top_diff += top[0]->offset(0, 1);
-        if (use_top_mask) {
-          top_mask += top[0]->offset(0, 1);
-        } else {
-          mask += top[0]->offset(0, 1);
-        }
-      }
-    }
-    break;
-  case PoolingParameter_PoolMethod_AVE:
-    // The main loop
-    for (int n = 0; n < top[0]->num(); ++n) {
-      for (int c = 0; c < channels_; ++c) {
-        for (int ph = 0; ph < pooled_height_; ++ph) {
-          for (int pw = 0; pw < pooled_width_; ++pw) {
-            int hstart = ph * stride_h_ - pad_h_;
-            int wstart = pw * stride_w_ - pad_w_;
-            int hend = min(hstart + kernel_h_, height_ + pad_h_);
-            int wend = min(wstart + kernel_w_, width_ + pad_w_);
-            int pool_size = (hend - hstart) * (wend - wstart);
-            hstart = max(hstart, 0);
-            wstart = max(wstart, 0);
-            hend = min(hend, height_);
-            wend = min(wend, width_);
-            for (int h = hstart; h < hend; ++h) {
-              for (int w = wstart; w < wend; ++w) {
-                bottom_diff[h * width_ + w] +=
-                  top_diff[ph * pooled_width_ + pw] / pool_size;
-              }
-            }
-          }
-        }
-        // offset
-        bottom_diff += bottom[0]->offset(0, 1);
-        top_diff += top[0]->offset(0, 1);
-      }
-    }
-    break;
-  case PoolingParameter_PoolMethod_STOCHASTIC:
-    NOT_IMPLEMENTED;
-    break;
-  default:
-    LOG(FATAL) << "Unknown pooling method.";
-  }
-}
-
-
-#ifdef CPU_ONLY
-STUB_GPU(PoolingLayer);
-#endif
-
-INSTANTIATE_CLASS(PoolingLayer);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/power_layer.cpp b/src/caffe/layers/power_layer.cpp
deleted file mode 100644
index d99b77c..0000000
--- a/src/caffe/layers/power_layer.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-#include <vector>
-
-#include "caffe/layers/power_layer.hpp"
-#include "caffe/util/math_functions.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void PowerLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  NeuronLayer<Dtype>::LayerSetUp(bottom, top);
-  power_ = this->layer_param_.power_param().power();
-  scale_ = this->layer_param_.power_param().scale();
-  shift_ = this->layer_param_.power_param().shift();
-  diff_scale_ = power_  * scale_;
-}
-
-// Compute y = (shift + scale * x)^power
-template <typename Dtype>
-void PowerLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-    const vector<Blob<Dtype>*>& top) {
-  Dtype* top_data = top[0]->mutable_cpu_data();
-  const int count = bottom[0]->count();
-  // Special case where we can ignore the input: scale or power is 0.
-  if (diff_scale_ == Dtype(0)) {
-    Dtype value = (power_ == 0) ? Dtype(1) : pow(shift_, power_);
-    caffe_set(count, value, top_data);
-    return;
-  }
-  const Dtype* bottom_data = bottom[0]->cpu_data();
-  caffe_copy(count, bottom_data, top_data);
-  if (scale_ != Dtype(1)) {
-    caffe_scal(count, scale_, top_data);
-  }
-  if (shift_ != Dtype(0)) {
-    caffe_add_scalar(count, shift_, top_data);
-  }
-  if (power_ != Dtype(1)) {
-    caffe_powx(count, top_data, power_, top_data);
-  }
-}
-
-template <typename Dtype>
-void PowerLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down,
-    const vector<Blob<Dtype>*>& bottom) {
-  if (propagate_down[0]) {
-    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
-    const int count = bottom[0]->count();
-    const Dtype* top_diff = top[0]->cpu_diff();
-    if (diff_scale_ == Dtype(0) || power_ == Dtype(1)) {
-      caffe_set(count, diff_scale_, bottom_diff);
-    } else {
-      const Dtype* bottom_data = bottom[0]->cpu_data();
-      // Compute dy/dx = scale * power * (shift + scale * x)^(power - 1)
-      //               = diff_scale * y / (shift + scale * x)
-      if (power_ == Dtype(2)) {
-        // Special case for y = (shift + scale * x)^2
-        //     -> dy/dx = 2 * scale * (shift + scale * x)
-        //              = diff_scale * shift + diff_scale * scale * x
-        caffe_cpu_axpby(count, diff_scale_ * scale_, bottom_data,
-            Dtype(0), bottom_diff);
-        if (shift_ != Dtype(0)) {
-          caffe_add_scalar(count, diff_scale_ * shift_, bottom_diff);
-        }
-      } else if (shift_ == Dtype(0)) {
-        // Special case for y = (scale * x)^power
-        //     -> dy/dx = scale * power * (scale * x)^(power - 1)
-        //              = scale * power * (scale * x)^power * (scale * x)^(-1)
-        //              = power * y / x
-        const Dtype* top_data = top[0]->cpu_data();
-        caffe_div(count, top_data, bottom_data, bottom_diff);
-        caffe_scal(count, power_, bottom_diff);
-      } else {
-        caffe_copy(count, bottom_data, bottom_diff);
-        if (scale_ != Dtype(1)) {
-          caffe_scal(count, scale_, bottom_diff);
-        }
-        if (shift_ != Dtype(0)) {
-          caffe_add_scalar(count, shift_, bottom_diff);
-        }
-        const Dtype* top_data = top[0]->cpu_data();
-        caffe_div<Dtype>(count, top_data, bottom_diff, bottom_diff);
-        if (diff_scale_ != Dtype(1)) {
-          caffe_scal(count, diff_scale_, bottom_diff);
-        }
-      }
-    }
-    if (diff_scale_ != Dtype(0)) {
-      caffe_mul(count, top_diff, bottom_diff, bottom_diff);
-    }
-  }
-}
-
-#ifdef CPU_ONLY
-STUB_GPU(PowerLayer);
-#endif
-
-INSTANTIATE_CLASS(PowerLayer);
-REGISTER_LAYER_CLASS(Power);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/relu_layer.cpp b/src/caffe/layers/relu_layer.cpp
deleted file mode 100644
index 92a729c..0000000
--- a/src/caffe/layers/relu_layer.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-#include <algorithm>
-#include <vector>
-
-#include "caffe/layers/relu_layer.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void ReLULayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-    const vector<Blob<Dtype>*>& top) {
-  const Dtype* bottom_data = bottom[0]->cpu_data();
-  Dtype* top_data = top[0]->mutable_cpu_data();
-  const int count = bottom[0]->count();
-  Dtype negative_slope = this->layer_param_.relu_param().negative_slope();
-  for (int i = 0; i < count; ++i) {
-    top_data[i] = std::max(bottom_data[i], Dtype(0))
-        + negative_slope * std::min(bottom_data[i], Dtype(0));
-  }
-}
-
-template <typename Dtype>
-void ReLULayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down,
-    const vector<Blob<Dtype>*>& bottom) {
-  if (propagate_down[0]) {
-    const Dtype* bottom_data = bottom[0]->cpu_data();
-    const Dtype* top_diff = top[0]->cpu_diff();
-    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
-    const int count = bottom[0]->count();
-    Dtype negative_slope = this->layer_param_.relu_param().negative_slope();
-    for (int i = 0; i < count; ++i) {
-      bottom_diff[i] = top_diff[i] * ((bottom_data[i] > 0)
-          + negative_slope * (bottom_data[i] <= 0));
-    }
-  }
-}
-
-
-#ifdef CPU_ONLY
-STUB_GPU(ReLULayer);
-#endif
-
-INSTANTIATE_CLASS(ReLULayer);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/sigmoid_layer.cpp b/src/caffe/layers/sigmoid_layer.cpp
deleted file mode 100644
index 85fd967..0000000
--- a/src/caffe/layers/sigmoid_layer.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-#include <cmath>
-#include <vector>
-
-#include "caffe/layers/sigmoid_layer.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-inline Dtype sigmoid(Dtype x) {
-  return 1. / (1. + exp(-x));
-}
-
-template <typename Dtype>
-void SigmoidLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-    const vector<Blob<Dtype>*>& top) {
-  const Dtype* bottom_data = bottom[0]->cpu_data();
-  Dtype* top_data = top[0]->mutable_cpu_data();
-  const int count = bottom[0]->count();
-  for (int i = 0; i < count; ++i) {
-    top_data[i] = sigmoid(bottom_data[i]);
-  }
-}
-
-template <typename Dtype>
-void SigmoidLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down,
-    const vector<Blob<Dtype>*>& bottom) {
-  if (propagate_down[0]) {
-    const Dtype* top_data = top[0]->cpu_data();
-    const Dtype* top_diff = top[0]->cpu_diff();
-    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
-    const int count = bottom[0]->count();
-    for (int i = 0; i < count; ++i) {
-      const Dtype sigmoid_x = top_data[i];
-      bottom_diff[i] = top_diff[i] * sigmoid_x * (1. - sigmoid_x);
-    }
-  }
-}
-
-#ifdef CPU_ONLY
-STUB_GPU(SigmoidLayer);
-#endif
-
-INSTANTIATE_CLASS(SigmoidLayer);
-
-
-}  // namespace caffe
diff --git a/src/caffe/layers/softmax_layer.cpp b/src/caffe/layers/softmax_layer.cpp
deleted file mode 100644
index f60e9b0..0000000
--- a/src/caffe/layers/softmax_layer.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-#include <algorithm>
-#include <vector>
-
-#include "caffe/layers/softmax_layer.hpp"
-#include "caffe/util/math_functions.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void SoftmaxLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  softmax_axis_ =
-      bottom[0]->CanonicalAxisIndex(this->layer_param_.softmax_param().axis());
-  top[0]->ReshapeLike(*bottom[0]);
-  vector<int> mult_dims(1, bottom[0]->shape(softmax_axis_));
-  sum_multiplier_.Reshape(mult_dims);
-  Dtype* multiplier_data = sum_multiplier_.mutable_cpu_data();
-  caffe_set(sum_multiplier_.count(), Dtype(1), multiplier_data);
-  outer_num_ = bottom[0]->count(0, softmax_axis_);
-  inner_num_ = bottom[0]->count(softmax_axis_ + 1);
-  vector<int> scale_dims = bottom[0]->shape();
-  scale_dims[softmax_axis_] = 1;
-  scale_.Reshape(scale_dims);
-}
-
-template <typename Dtype>
-void SoftmaxLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-    const vector<Blob<Dtype>*>& top) {
-  const Dtype* bottom_data = bottom[0]->cpu_data();
-  Dtype* top_data = top[0]->mutable_cpu_data();
-  Dtype* scale_data = scale_.mutable_cpu_data();
-  int channels = bottom[0]->shape(softmax_axis_);
-  int dim = bottom[0]->count() / outer_num_;
-  caffe_copy(bottom[0]->count(), bottom_data, top_data);
-  // We need to subtract the max to avoid numerical issues, compute the exp,
-  // and then normalize.
-  for (int i = 0; i < outer_num_; ++i) {
-    // initialize scale_data to the first plane
-    caffe_copy(inner_num_, bottom_data + i * dim, scale_data);
-    for (int j = 0; j < channels; j++) {
-      for (int k = 0; k < inner_num_; k++) {
-        scale_data[k] = std::max(scale_data[k],
-            bottom_data[i * dim + j * inner_num_ + k]);
-      }
-    }
-    // subtraction
-    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels, inner_num_,
-        1, -1., sum_multiplier_.cpu_data(), scale_data, 1., top_data);
-    // exponentiation
-    caffe_exp<Dtype>(dim, top_data, top_data);
-    // sum after exp
-    caffe_cpu_gemv<Dtype>(CblasTrans, channels, inner_num_, 1.,
-        top_data, sum_multiplier_.cpu_data(), 0., scale_data);
-    // division
-    for (int j = 0; j < channels; j++) {
-      caffe_div(inner_num_, top_data, scale_data, top_data);
-      top_data += inner_num_;
-    }
-  }
-}
-
-template <typename Dtype>
-void SoftmaxLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down,
-    const vector<Blob<Dtype>*>& bottom) {
-  const Dtype* top_diff = top[0]->cpu_diff();
-  const Dtype* top_data = top[0]->cpu_data();
-  Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
-  Dtype* scale_data = scale_.mutable_cpu_data();
-  int channels = top[0]->shape(softmax_axis_);
-  int dim = top[0]->count() / outer_num_;
-  caffe_copy(top[0]->count(), top_diff, bottom_diff);
-  for (int i = 0; i < outer_num_; ++i) {
-    // compute dot(top_diff, top_data) and subtract them from the bottom diff
-    for (int k = 0; k < inner_num_; ++k) {
-      scale_data[k] = caffe_cpu_strided_dot<Dtype>(channels,
-          bottom_diff + i * dim + k, inner_num_,
-          top_data + i * dim + k, inner_num_);
-    }
-    // subtraction
-    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels, inner_num_, 1,
-        -1., sum_multiplier_.cpu_data(), scale_data, 1., bottom_diff + i * dim);
-  }
-  // elementwise multiplication
-  caffe_mul(top[0]->count(), bottom_diff, top_data, bottom_diff);
-}
-
-
-#ifdef CPU_ONLY
-STUB_GPU(SoftmaxLayer);
-#endif
-
-INSTANTIATE_CLASS(SoftmaxLayer);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp
deleted file mode 100644
index dddb760..0000000
--- a/src/caffe/layers/softmax_loss_layer.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-#include <algorithm>
-#include <cfloat>
-#include <vector>
-
-#include "caffe/layers/softmax_loss_layer.hpp"
-#include "caffe/util/math_functions.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void SoftmaxWithLossLayer<Dtype>::LayerSetUp(
-    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-  LossLayer<Dtype>::LayerSetUp(bottom, top);
-  LayerParameter softmax_param(this->layer_param_);
-  softmax_param.set_type("Softmax");
-  softmax_layer_ = LayerRegistry<Dtype>::CreateLayer(softmax_param);
-  softmax_bottom_vec_.clear();
-  softmax_bottom_vec_.push_back(bottom[0]);
-  softmax_top_vec_.clear();
-  softmax_top_vec_.push_back(&prob_);
-  softmax_layer_->SetUp(softmax_bottom_vec_, softmax_top_vec_);
-
-  has_ignore_label_ =
-    this->layer_param_.loss_param().has_ignore_label();
-  if (has_ignore_label_) {
-    ignore_label_ = this->layer_param_.loss_param().ignore_label();
-  }
-  if (!this->layer_param_.loss_param().has_normalization() &&
-      this->layer_param_.loss_param().has_normalize()) {
-    normalization_ = this->layer_param_.loss_param().normalize() ?
-                     LossParameter_NormalizationMode_VALID :
-                     LossParameter_NormalizationMode_BATCH_SIZE;
-  } else {
-    normalization_ = this->layer_param_.loss_param().normalization();
-  }
-}
-
-template <typename Dtype>
-void SoftmaxWithLossLayer<Dtype>::Reshape(
-    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-  LossLayer<Dtype>::Reshape(bottom, top);
-  softmax_layer_->Reshape(softmax_bottom_vec_, softmax_top_vec_);
-  softmax_axis_ =
-      bottom[0]->CanonicalAxisIndex(this->layer_param_.softmax_param().axis());
-  outer_num_ = bottom[0]->count(0, softmax_axis_);
-  inner_num_ = bottom[0]->count(softmax_axis_ + 1);
-  CHECK_EQ(outer_num_ * inner_num_, bottom[1]->count())
-      << "Number of labels must match number of predictions; "
-      << "e.g., if softmax axis == 1 and prediction shape is (N, C, H, W), "
-      << "label count (number of labels) must be N*H*W, "
-      << "with integer values in {0, 1, ..., C-1}.";
-  if (top.size() >= 2) {
-    // softmax output
-    top[1]->ReshapeLike(*bottom[0]);
-  }
-}
-
-template <typename Dtype>
-Dtype SoftmaxWithLossLayer<Dtype>::get_normalizer(
-    LossParameter_NormalizationMode normalization_mode, int valid_count) {
-  Dtype normalizer;
-  switch (normalization_mode) {
-    case LossParameter_NormalizationMode_FULL:
-      normalizer = Dtype(outer_num_ * inner_num_);
-      break;
-    case LossParameter_NormalizationMode_VALID:
-      if (valid_count == -1) {
-        normalizer = Dtype(outer_num_ * inner_num_);
-      } else {
-        normalizer = Dtype(valid_count);
-      }
-      break;
-    case LossParameter_NormalizationMode_BATCH_SIZE:
-      normalizer = Dtype(outer_num_);
-      break;
-    case LossParameter_NormalizationMode_NONE:
-      normalizer = Dtype(1);
-      break;
-    default:
-      LOG(FATAL) << "Unknown normalization mode: "
-          << LossParameter_NormalizationMode_Name(normalization_mode);
-  }
-  // Some users will have no labels for some examples in order to 'turn off' a
-  // particular loss in a multi-task setup. The max prevents NaNs in that case.
-  return std::max(Dtype(1.0), normalizer);
-}
-
-template <typename Dtype>
-void SoftmaxWithLossLayer<Dtype>::Forward_cpu(
-    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-  // The forward pass computes the softmax prob values.
-  softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);
-  const Dtype* prob_data = prob_.cpu_data();
-  const Dtype* label = bottom[1]->cpu_data();
-  int dim = prob_.count() / outer_num_;
-  int count = 0;
-  Dtype loss = 0;
-  for (int i = 0; i < outer_num_; ++i) {
-    for (int j = 0; j < inner_num_; j++) {
-      const int label_value = static_cast<int>(label[i * inner_num_ + j]);
-      if (has_ignore_label_ && label_value == ignore_label_) {
-        continue;
-      }
-      DCHECK_GE(label_value, 0);
-      DCHECK_LT(label_value, prob_.shape(softmax_axis_));
-      loss -= log(std::max(prob_data[i * dim + label_value * inner_num_ + j],
-                           Dtype(FLT_MIN)));
-      ++count;
-    }
-  }
-  top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_, count);
-  if (top.size() == 2) {
-    top[1]->ShareData(prob_);
-  }
-}
-
-template <typename Dtype>
-void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
-  if (propagate_down[1]) {
-    LOG(FATAL) << this->type()
-               << " Layer cannot backpropagate to label inputs.";
-  }
-  if (propagate_down[0]) {
-    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
-    const Dtype* prob_data = prob_.cpu_data();
-    caffe_copy(prob_.count(), prob_data, bottom_diff);
-    const Dtype* label = bottom[1]->cpu_data();
-    int dim = prob_.count() / outer_num_;
-    int count = 0;
-    for (int i = 0; i < outer_num_; ++i) {
-      for (int j = 0; j < inner_num_; ++j) {
-        const int label_value = static_cast<int>(label[i * inner_num_ + j]);
-        if (has_ignore_label_ && label_value == ignore_label_) {
-          for (int c = 0; c < bottom[0]->shape(softmax_axis_); ++c) {
-            bottom_diff[i * dim + c * inner_num_ + j] = 0;
-          }
-        } else {
-          bottom_diff[i * dim + label_value * inner_num_ + j] -= 1;
-          ++count;
-        }
-      }
-    }
-    // Scale gradient
-    Dtype loss_weight = top[0]->cpu_diff()[0] /
-                        get_normalizer(normalization_, count);
-    caffe_scal(prob_.count(), loss_weight, bottom_diff);
-  }
-}
-
-#ifdef CPU_ONLY
-STUB_GPU(SoftmaxWithLossLayer);
-#endif
-
-INSTANTIATE_CLASS(SoftmaxWithLossLayer);
-REGISTER_LAYER_CLASS(SoftmaxWithLoss);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/split_layer.cpp b/src/caffe/layers/split_layer.cpp
deleted file mode 100644
index 1a27a9a..0000000
--- a/src/caffe/layers/split_layer.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-#include <vector>
-
-#include "caffe/layers/split_layer.hpp"
-#include "caffe/util/math_functions.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void SplitLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  count_ = bottom[0]->count();
-  for (int i = 0; i < top.size(); ++i) {
-    // Do not allow in-place computation in the SplitLayer.  Instead, share data
-    // by reference in the forward pass, and keep separate diff allocations in
-    // the backward pass.  (Technically, it should be possible to share the diff
-    // blob of the first split output with the input, but this seems to cause
-    // some strange effects in practice...)
-    CHECK_NE(top[i], bottom[0]) << this->type() << " Layer does not "
-        "allow in-place computation.";
-    top[i]->ReshapeLike(*bottom[0]);
-    CHECK_EQ(count_, top[i]->count());
-  }
-}
-
-template <typename Dtype>
-void SplitLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  for (int i = 0; i < top.size(); ++i) {
-    top[i]->ShareData(*bottom[0]);
-  }
-}
-
-template <typename Dtype>
-void SplitLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
-  if (!propagate_down[0]) { return; }
-  if (top.size() == 1) {
-    caffe_copy(count_, top[0]->cpu_diff(), bottom[0]->mutable_cpu_diff());
-    return;
-  }
-  caffe_add(count_, top[0]->cpu_diff(), top[1]->cpu_diff(),
-            bottom[0]->mutable_cpu_diff());
-  // Add remaining top blob diffs.
-  for (int i = 2; i < top.size(); ++i) {
-    const Dtype* top_diff = top[i]->cpu_diff();
-    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
-    caffe_axpy(count_, Dtype(1.), top_diff, bottom_diff);
-  }
-}
-
-
-#ifdef CPU_ONLY
-STUB_GPU(SplitLayer);
-#endif
-
-INSTANTIATE_CLASS(SplitLayer);
-REGISTER_LAYER_CLASS(Split);
-
-}  // namespace caffe
diff --git a/src/caffe/layers/tanh_layer.cpp b/src/caffe/layers/tanh_layer.cpp
deleted file mode 100644
index 184e926..0000000
--- a/src/caffe/layers/tanh_layer.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-// TanH neuron activation function layer.
-// Adapted from ReLU layer code written by Yangqing Jia
-
-#include <vector>
-
-#include "caffe/layers/tanh_layer.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void TanHLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
-    const vector<Blob<Dtype>*>& top) {
-  const Dtype* bottom_data = bottom[0]->cpu_data();
-  Dtype* top_data = top[0]->mutable_cpu_data();
-  const int count = bottom[0]->count();
-  for (int i = 0; i < count; ++i) {
-    top_data[i] = tanh(bottom_data[i]);
-  }
-}
-
-template <typename Dtype>
-void TanHLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down,
-    const vector<Blob<Dtype>*>& bottom) {
-  if (propagate_down[0]) {
-    const Dtype* top_data = top[0]->cpu_data();
-    const Dtype* top_diff = top[0]->cpu_diff();
-    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
-    const int count = bottom[0]->count();
-    Dtype tanhx;
-    for (int i = 0; i < count; ++i) {
-      tanhx = top_data[i];
-      bottom_diff[i] = top_diff[i] * (1 - tanhx * tanhx);
-    }
-  }
-}
-
-#ifdef CPU_ONLY
-STUB_GPU(TanHLayer);
-#endif
-
-INSTANTIATE_CLASS(TanHLayer);
-
-}  // namespace caffe
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
deleted file mode 100644
index 6a1637d..0000000
--- a/src/caffe/net.cpp
+++ /dev/null
@@ -1,1012 +0,0 @@
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-#ifdef USE_HDF5
-#include "hdf5.h"
-#endif
-
-#include "caffe/common.hpp"
-#include "caffe/layer.hpp"
-#include "caffe/net.hpp"
-#ifdef NO_CAFFE_MOBILE
-#include "caffe/parallel.hpp"
-#endif
-#include "caffe/proto/caffe.pb.h"
-#ifdef USE_HDF5
-#include "caffe/util/hdf5.hpp"
-#endif
-#include "caffe/util/insert_splits.hpp"
-#include "caffe/util/math_functions.hpp"
-#include "caffe/util/upgrade_proto.hpp"
-
-#ifdef NO_CAFFE_MOBILE
-#include "caffe/test/test_caffe_main.hpp"
-#endif
-
-namespace caffe {
-
-template <typename Dtype>
-Net<Dtype>::Net(const NetParameter& param) {
-  Init(param);
-}
-
-template <typename Dtype>
-Net<Dtype>::Net(const string& param_file, Phase phase,
-    const int level, const vector<string>* stages) {
-  NetParameter param;
-  ReadNetParamsFromTextFileOrDie(param_file, &param);
-  // Set phase, stages and level
-  param.mutable_state()->set_phase(phase);
-  if (stages != NULL) {
-    for (int i = 0; i < stages->size(); i++) {
-      param.mutable_state()->add_stage((*stages)[i]);
-    }
-  }
-  param.mutable_state()->set_level(level);
-  Init(param);
-}
-
-template <typename Dtype>
-void Net<Dtype>::Init(const NetParameter& in_param) {
-  // Set phase from the state.
-  phase_ = in_param.state().phase();
-  // Filter layers based on their include/exclude rules and
-  // the current NetState.
-  NetParameter filtered_param;
-  FilterNet(in_param, &filtered_param);
-  LOG_IF(INFO, Caffe::root_solver())
-      << "Initializing net from parameters: " << std::endl
-      << filtered_param.DebugString();
-  // Create a copy of filtered_param with splits added where necessary.
-  NetParameter param;
-  InsertSplits(filtered_param, &param);
-  // Basically, build all the layers and set up their connections.
-  name_ = param.name();
-  map<string, int> blob_name_to_idx;
-  set<string> available_blobs;
-  memory_used_ = 0;
-  // For each layer, set up its input and output
-  bottom_vecs_.resize(param.layer_size());
-  top_vecs_.resize(param.layer_size());
-  bottom_id_vecs_.resize(param.layer_size());
-  param_id_vecs_.resize(param.layer_size());
-  top_id_vecs_.resize(param.layer_size());
-  bottom_need_backward_.resize(param.layer_size());
-  for (int layer_id = 0; layer_id < param.layer_size(); ++layer_id) {
-    // Inherit phase from net if unset.
-    if (!param.layer(layer_id).has_phase()) {
-      param.mutable_layer(layer_id)->set_phase(phase_);
-    }
-    // Setup layer.
-    const LayerParameter& layer_param = param.layer(layer_id);
-    if (layer_param.propagate_down_size() > 0) {
-      CHECK_EQ(layer_param.propagate_down_size(),
-          layer_param.bottom_size())
-          << "propagate_down param must be specified "
-          << "either 0 or bottom_size times ";
-    }
-    layers_.push_back(LayerRegistry<Dtype>::CreateLayer(layer_param));
-    layer_names_.push_back(layer_param.name());
-    LOG_IF(INFO, Caffe::root_solver())
-        << "Creating Layer " << layer_param.name();
-    bool need_backward = false;
-
-    // Figure out this layer's input and output
-    for (int bottom_id = 0; bottom_id < layer_param.bottom_size();
-         ++bottom_id) {
-      const int blob_id = AppendBottom(param, layer_id, bottom_id,
-                                       &available_blobs, &blob_name_to_idx);
-      // If a blob needs backward, this layer should provide it.
-      need_backward |= blob_need_backward_[blob_id];
-    }
-    int num_top = layer_param.top_size();
-    for (int top_id = 0; top_id < num_top; ++top_id) {
-      AppendTop(param, layer_id, top_id, &available_blobs, &blob_name_to_idx);
-      // Collect Input layer tops as Net inputs.
-      if (layer_param.type() == "Input") {
-        const int blob_id = blobs_.size() - 1;
-        net_input_blob_indices_.push_back(blob_id);
-        net_input_blobs_.push_back(blobs_[blob_id].get());
-      }
-    }
-    // If the layer specifies that AutoTopBlobs() -> true and the LayerParameter
-    // specified fewer than the required number (as specified by
-    // ExactNumTopBlobs() or MinTopBlobs()), allocate them here.
-    Layer<Dtype>* layer = layers_[layer_id].get();
-    if (layer->AutoTopBlobs()) {
-      const int needed_num_top =
-          std::max(layer->MinTopBlobs(), layer->ExactNumTopBlobs());
-      for (; num_top < needed_num_top; ++num_top) {
-        // Add "anonymous" top blobs -- do not modify available_blobs or
-        // blob_name_to_idx as we don't want these blobs to be usable as input
-        // to other layers.
-        AppendTop(param, layer_id, num_top, NULL, NULL);
-      }
-    }
-    // After this layer is connected, set it up.
-    layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]);
-    LOG_IF(INFO, Caffe::root_solver())
-        << "Setting up " << layer_names_[layer_id];
-    for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
-      if (blob_loss_weights_.size() <= top_id_vecs_[layer_id][top_id]) {
-        blob_loss_weights_.resize(top_id_vecs_[layer_id][top_id] + 1, Dtype(0));
-      }
-      blob_loss_weights_[top_id_vecs_[layer_id][top_id]] = layer->loss(top_id);
-      LOG_IF(INFO, Caffe::root_solver())
-          << "Top shape: " << top_vecs_[layer_id][top_id]->shape_string();
-      if (layer->loss(top_id)) {
-        LOG_IF(INFO, Caffe::root_solver())
-            << "    with loss weight " << layer->loss(top_id);
-      }
-      memory_used_ += top_vecs_[layer_id][top_id]->count();
-    }
-    LOG_IF(INFO, Caffe::root_solver())
-        << "Memory required for data: " << memory_used_ * sizeof(Dtype);
-    const int param_size = layer_param.param_size();
-    const int num_param_blobs = layers_[layer_id]->blobs().size();
-    CHECK_LE(param_size, num_param_blobs)
-        << "Too many params specified for layer " << layer_param.name();
-    ParamSpec default_param_spec;
-    for (int param_id = 0; param_id < num_param_blobs; ++param_id) {
-      const ParamSpec* param_spec = (param_id < param_size) ?
-          &layer_param.param(param_id) : &default_param_spec;
-      const bool param_need_backward = param_spec->lr_mult() != 0;
-      need_backward |= param_need_backward;
-      layers_[layer_id]->set_param_propagate_down(param_id,
-                                                  param_need_backward);
-    }
-    for (int param_id = 0; param_id < num_param_blobs; ++param_id) {
-      AppendParam(param, layer_id, param_id);
-    }
-    // Finally, set the backward flag
-    layer_need_backward_.push_back(need_backward);
-    if (need_backward) {
-      for (int top_id = 0; top_id < top_id_vecs_[layer_id].size(); ++top_id) {
-        blob_need_backward_[top_id_vecs_[layer_id][top_id]] = true;
-      }
-    }
-  }
-  // Go through the net backwards to determine which blobs contribute to the
-  // loss.  We can skip backward computation for blobs that don't contribute
-  // to the loss.
-  // Also checks if all bottom blobs don't need backward computation (possible
-  // because the skip_propagate_down param) and so we can skip bacward
-  // computation for the entire layer
-  set<string> blobs_under_loss;
-  set<string> blobs_skip_backp;
-  for (int layer_id = layers_.size() - 1; layer_id >= 0; --layer_id) {
-    bool layer_contributes_loss = false;
-    bool layer_skip_propagate_down = true;
-    for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
-      const string& blob_name = blob_names_[top_id_vecs_[layer_id][top_id]];
-      if (layers_[layer_id]->loss(top_id) ||
-          (blobs_under_loss.find(blob_name) != blobs_under_loss.end())) {
-        layer_contributes_loss = true;
-      }
-      if (blobs_skip_backp.find(blob_name) == blobs_skip_backp.end()) {
-        layer_skip_propagate_down = false;
-      }
-      if (layer_contributes_loss && !layer_skip_propagate_down)
-        break;
-    }
-    // If this layer can skip backward computation, also all his bottom blobs
-    // don't need backpropagation
-    if (layer_need_backward_[layer_id] && layer_skip_propagate_down) {
-      layer_need_backward_[layer_id] = false;
-      for (int bottom_id = 0; bottom_id < bottom_vecs_[layer_id].size();
-               ++bottom_id) {
-        bottom_need_backward_[layer_id][bottom_id] = false;
-      }
-    }
-    if (!layer_contributes_loss) { layer_need_backward_[layer_id] = false; }
-    if (Caffe::root_solver()) {
-      if (layer_need_backward_[layer_id]) {
-        LOG(INFO) << layer_names_[layer_id] << " needs backward computation.";
-      } else {
-        LOG(INFO) << layer_names_[layer_id]
-            << " does not need backward computation.";
-      }
-    }
-    for (int bottom_id = 0; bottom_id < bottom_vecs_[layer_id].size();
-         ++bottom_id) {
-      if (layer_contributes_loss) {
-        const string& blob_name =
-            blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
-        blobs_under_loss.insert(blob_name);
-      } else {
-        bottom_need_backward_[layer_id][bottom_id] = false;
-      }
-      if (!bottom_need_backward_[layer_id][bottom_id]) {
-        const string& blob_name =
-                   blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
-        blobs_skip_backp.insert(blob_name);
-      }
-    }
-  }
-  // Handle force_backward if needed.
-  if (param.force_backward()) {
-    for (int layer_id = 0; layer_id < layers_.size(); ++layer_id) {
-      layer_need_backward_[layer_id] = true;
-      for (int bottom_id = 0;
-           bottom_id < bottom_need_backward_[layer_id].size(); ++bottom_id) {
-        bottom_need_backward_[layer_id][bottom_id] =
-            bottom_need_backward_[layer_id][bottom_id] ||
-            layers_[layer_id]->AllowForceBackward(bottom_id);
-        blob_need_backward_[bottom_id_vecs_[layer_id][bottom_id]] =
-            blob_need_backward_[bottom_id_vecs_[layer_id][bottom_id]] ||
-            bottom_need_backward_[layer_id][bottom_id];
-      }
-      for (int param_id = 0; param_id < layers_[layer_id]->blobs().size();
-           ++param_id) {
-        layers_[layer_id]->set_param_propagate_down(param_id, true);
-      }
-    }
-  }
-  // In the end, all remaining blobs are considered output blobs.
-  for (set<string>::iterator it = available_blobs.begin();
-      it != available_blobs.end(); ++it) {
-    LOG_IF(INFO, Caffe::root_solver())
-        << "This network produces output " << *it;
-    net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get());
-    net_output_blob_indices_.push_back(blob_name_to_idx[*it]);
-  }
-  for (size_t blob_id = 0; blob_id < blob_names_.size(); ++blob_id) {
-    blob_names_index_[blob_names_[blob_id]] = blob_id;
-  }
-  for (size_t layer_id = 0; layer_id < layer_names_.size(); ++layer_id) {
-    layer_names_index_[layer_names_[layer_id]] = layer_id;
-  }
-  ShareWeights();
-  debug_info_ = param.debug_info();
-  LOG_IF(INFO, Caffe::root_solver()) << "Network initialization done.";
-}
-
-template <typename Dtype>
-void Net<Dtype>::FilterNet(const NetParameter& param,
-    NetParameter* param_filtered) {
-  NetState net_state(param.state());
-  param_filtered->CopyFrom(param);
-  param_filtered->clear_layer();
-  for (int i = 0; i < param.layer_size(); ++i) {
-    const LayerParameter& layer_param = param.layer(i);
-    const string& layer_name = layer_param.name();
-    CHECK(layer_param.include_size() == 0 || layer_param.exclude_size() == 0)
-          << "Specify either include rules or exclude rules; not both.";
-    // If no include rules are specified, the layer is included by default and
-    // only excluded if it meets one of the exclude rules.
-    bool layer_included = (layer_param.include_size() == 0);
-    for (int j = 0; layer_included && j < layer_param.exclude_size(); ++j) {
-      if (StateMeetsRule(net_state, layer_param.exclude(j), layer_name)) {
-        layer_included = false;
-      }
-    }
-    for (int j = 0; !layer_included && j < layer_param.include_size(); ++j) {
-      if (StateMeetsRule(net_state, layer_param.include(j), layer_name)) {
-        layer_included = true;
-      }
-    }
-    if (layer_included) {
-      param_filtered->add_layer()->CopyFrom(layer_param);
-    }
-  }
-}
-
-template <typename Dtype>
-bool Net<Dtype>::StateMeetsRule(const NetState& state,
-    const NetStateRule& rule, const string& layer_name) {
-  // Check whether the rule is broken due to phase.
-  if (rule.has_phase()) {
-      if (rule.phase() != state.phase()) {
-        LOG_IF(INFO, Caffe::root_solver())
-            << "The NetState phase (" << state.phase()
-            << ") differed from the phase (" << rule.phase()
-            << ") specified by a rule in layer " << layer_name;
-        return false;
-      }
-  }
-  // Check whether the rule is broken due to min level.
-  if (rule.has_min_level()) {
-    if (state.level() < rule.min_level()) {
-      LOG_IF(INFO, Caffe::root_solver())
-          << "The NetState level (" << state.level()
-          << ") is above the min_level (" << rule.min_level()
-          << ") specified by a rule in layer " << layer_name;
-      return false;
-    }
-  }
-  // Check whether the rule is broken due to max level.
-  if (rule.has_max_level()) {
-    if (state.level() > rule.max_level()) {
-      LOG_IF(INFO, Caffe::root_solver())
-          << "The NetState level (" << state.level()
-          << ") is above the max_level (" << rule.max_level()
-          << ") specified by a rule in layer " << layer_name;
-      return false;
-    }
-  }
-  // Check whether the rule is broken due to stage. The NetState must
-  // contain ALL of the rule's stages to meet it.
-  for (int i = 0; i < rule.stage_size(); ++i) {
-    // Check that the NetState contains the rule's ith stage.
-    bool has_stage = false;
-    for (int j = 0; !has_stage && j < state.stage_size(); ++j) {
-      if (rule.stage(i) == state.stage(j)) { has_stage = true; }
-    }
-    if (!has_stage) {
-      LOG_IF(INFO, Caffe::root_solver())
-          << "The NetState did not contain stage '" << rule.stage(i)
-          << "' specified by a rule in layer " << layer_name;
-      return false;
-    }
-  }
-  // Check whether the rule is broken due to not_stage. The NetState must
-  // contain NONE of the rule's not_stages to meet it.
-  for (int i = 0; i < rule.not_stage_size(); ++i) {
-    // Check that the NetState contains the rule's ith not_stage.
-    bool has_stage = false;
-    for (int j = 0; !has_stage && j < state.stage_size(); ++j) {
-      if (rule.not_stage(i) == state.stage(j)) { has_stage = true; }
-    }
-    if (has_stage) {
-      LOG_IF(INFO, Caffe::root_solver())
-          << "The NetState contained a not_stage '" << rule.not_stage(i)
-          << "' specified by a rule in layer " << layer_name;
-      return false;
-    }
-  }
-  return true;
-}
-
-// Helper for Net::Init: add a new top blob to the net.
-template <typename Dtype>
-void Net<Dtype>::AppendTop(const NetParameter& param, const int layer_id,
-                           const int top_id, set<string>* available_blobs,
-                           map<string, int>* blob_name_to_idx) {
-  shared_ptr<LayerParameter> layer_param(
-      new LayerParameter(param.layer(layer_id)));
-  const string& blob_name = (layer_param->top_size() > top_id) ?
-      layer_param->top(top_id) : "(automatic)";
-  // Check if we are doing in-place computation
-  if (blob_name_to_idx && layer_param->bottom_size() > top_id &&
-      blob_name == layer_param->bottom(top_id)) {
-    // In-place computation
-    LOG_IF(INFO, Caffe::root_solver())
-        << layer_param->name() << " -> " << blob_name << " (in-place)";
-    top_vecs_[layer_id].push_back(blobs_[(*blob_name_to_idx)[blob_name]].get());
-    top_id_vecs_[layer_id].push_back((*blob_name_to_idx)[blob_name]);
-  } else if (blob_name_to_idx &&
-             blob_name_to_idx->find(blob_name) != blob_name_to_idx->end()) {
-    // If we are not doing in-place computation but have duplicated blobs,
-    // raise an error.
-    LOG(FATAL) << "Top blob '" << blob_name
-               << "' produced by multiple sources.";
-  } else {
-    // Normal output.
-    if (Caffe::root_solver()) {
-      LOG(INFO) << layer_param->name() << " -> " << blob_name;
-    }
-    shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>());
-    const int blob_id = blobs_.size();
-    blobs_.push_back(blob_pointer);
-    blob_names_.push_back(blob_name);
-    blob_need_backward_.push_back(false);
-    if (blob_name_to_idx) { (*blob_name_to_idx)[blob_name] = blob_id; }
-    top_id_vecs_[layer_id].push_back(blob_id);
-    top_vecs_[layer_id].push_back(blob_pointer.get());
-  }
-  if (available_blobs) { available_blobs->insert(blob_name); }
-}
-
-// Helper for Net::Init: add a new bottom blob to the net.
-template <typename Dtype>
-int Net<Dtype>::AppendBottom(const NetParameter& param, const int layer_id,
-    const int bottom_id, set<string>* available_blobs,
-    map<string, int>* blob_name_to_idx) {
-  const LayerParameter& layer_param = param.layer(layer_id);
-  const string& blob_name = layer_param.bottom(bottom_id);
-  if (available_blobs->find(blob_name) == available_blobs->end()) {
-    LOG(FATAL) << "Unknown bottom blob '" << blob_name << "' (layer '"
-               << layer_param.name() << "', bottom index " << bottom_id << ")";
-  }
-  const int blob_id = (*blob_name_to_idx)[blob_name];
-  LOG_IF(INFO, Caffe::root_solver())
-      << layer_names_[layer_id] << " <- " << blob_name;
-  bottom_vecs_[layer_id].push_back(blobs_[blob_id].get());
-  bottom_id_vecs_[layer_id].push_back(blob_id);
-  available_blobs->erase(blob_name);
-  bool need_backward = blob_need_backward_[blob_id];
-  // Check if the backpropagation on bottom_id should be skipped
-  if (layer_param.propagate_down_size() > 0) {
-    need_backward = layer_param.propagate_down(bottom_id);
-  }
-  bottom_need_backward_[layer_id].push_back(need_backward);
-  return blob_id;
-}
-
-template <typename Dtype>
-void Net<Dtype>::AppendParam(const NetParameter& param, const int layer_id,
-                             const int param_id) {
-  const LayerParameter& layer_param = layers_[layer_id]->layer_param();
-  const int param_size = layer_param.param_size();
-  string param_name =
-      (param_size > param_id) ? layer_param.param(param_id).name() : "";
-  if (param_name.size()) {
-    param_display_names_.push_back(param_name);
-  } else {
-    ostringstream param_display_name;
-    param_display_name << param_id;
-    param_display_names_.push_back(param_display_name.str());
-  }
-  const int net_param_id = params_.size();
-  params_.push_back(layers_[layer_id]->blobs()[param_id]);
-  param_id_vecs_[layer_id].push_back(net_param_id);
-  param_layer_indices_.push_back(make_pair(layer_id, param_id));
-  ParamSpec default_param_spec;
-  const ParamSpec* param_spec = (layer_param.param_size() > param_id) ?
-      &layer_param.param(param_id) : &default_param_spec;
-  if (!param_size || !param_name.size() || (param_name.size() &&
-      param_names_index_.find(param_name) == param_names_index_.end())) {
-    // This layer "owns" this parameter blob -- it is either anonymous
-    // (i.e., not given a param_name) or explicitly given a name that we
-    // haven't already seen.
-    param_owners_.push_back(-1);
-    if (param_name.size()) {
-      param_names_index_[param_name] = net_param_id;
-    }
-    const int learnable_param_id = learnable_params_.size();
-    learnable_params_.push_back(params_[net_param_id].get());
-    learnable_param_ids_.push_back(learnable_param_id);
-    has_params_lr_.push_back(param_spec->has_lr_mult());
-    has_params_decay_.push_back(param_spec->has_decay_mult());
-    params_lr_.push_back(param_spec->lr_mult());
-    params_weight_decay_.push_back(param_spec->decay_mult());
-  } else {
-    // Named param blob with name we've seen before: share params
-    const int owner_net_param_id = param_names_index_[param_name];
-    param_owners_.push_back(owner_net_param_id);
-    const pair<int, int>& owner_index =
-        param_layer_indices_[owner_net_param_id];
-    const int owner_layer_id = owner_index.first;
-    const int owner_param_id = owner_index.second;
-    LOG_IF(INFO, Caffe::root_solver()) << "Sharing parameters '" << param_name
-        << "' owned by "
-        << "layer '" << layer_names_[owner_layer_id] << "', param "
-        << "index " << owner_param_id;
-    Blob<Dtype>* this_blob = layers_[layer_id]->blobs()[param_id].get();
-    Blob<Dtype>* owner_blob =
-        layers_[owner_layer_id]->blobs()[owner_param_id].get();
-    const int param_size = layer_param.param_size();
-    if (param_size > param_id && (layer_param.param(param_id).share_mode() ==
-                                  ParamSpec_DimCheckMode_PERMISSIVE)) {
-      // Permissive dimension checking -- only check counts are the same.
-      CHECK_EQ(this_blob->count(), owner_blob->count())
-          << "Cannot share param '" << param_name << "' owned by layer '"
-          << layer_names_[owner_layer_id] << "' with layer '"
-          << layer_names_[layer_id] << "'; count mismatch.  Owner layer param "
-          << "shape is " << owner_blob->shape_string() << "; sharing layer "
-          << "shape is " << this_blob->shape_string();
-    } else {
-      // Strict dimension checking -- all dims must be the same.
-      CHECK(this_blob->shape() == owner_blob->shape())
-          << "Cannot share param '" << param_name << "' owned by layer '"
-          << layer_names_[owner_layer_id] << "' with layer '"
-          << layer_names_[layer_id] << "'; shape mismatch.  Owner layer param "
-          << "shape is " << owner_blob->shape_string() << "; sharing layer "
-          << "expects shape " << this_blob->shape_string();
-    }
-    const int learnable_param_id = learnable_param_ids_[owner_net_param_id];
-    learnable_param_ids_.push_back(learnable_param_id);
-    if (param_spec->has_lr_mult()) {
-      if (has_params_lr_[learnable_param_id]) {
-        CHECK_EQ(param_spec->lr_mult(), params_lr_[learnable_param_id])
-            << "Shared param '" << param_name << "' has mismatched lr_mult.";
-      } else {
-        has_params_lr_[learnable_param_id] = true;
-        params_lr_[learnable_param_id] = param_spec->lr_mult();
-      }
-    }
-    if (param_spec->has_decay_mult()) {
-      if (has_params_decay_[learnable_param_id]) {
-        CHECK_EQ(param_spec->decay_mult(),
-                 params_weight_decay_[learnable_param_id])
-            << "Shared param '" << param_name << "' has mismatched decay_mult.";
-      } else {
-        has_params_decay_[learnable_param_id] = true;
-        params_weight_decay_[learnable_param_id] = param_spec->decay_mult();
-      }
-    }
-  }
-}
-
-template <typename Dtype>
-Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
-  CHECK_GE(start, 0);
-  CHECK_LT(end, layers_.size());
-  Dtype loss = 0;
-  for (int i = start; i <= end; ++i) {
-    for (int c = 0; c < before_forward_.size(); ++c) {
-      before_forward_[c]->run(i);
-    }
-    Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i]);
-    loss += layer_loss;
-    if (debug_info_) { ForwardDebugInfo(i); }
-    for (int c = 0; c < after_forward_.size(); ++c) {
-      after_forward_[c]->run(i);
-    }
-  }
-  return loss;
-}
-
-template <typename Dtype>
-Dtype Net<Dtype>::ForwardFrom(int start) {
-  return ForwardFromTo(start, layers_.size() - 1);
-}
-
-template <typename Dtype>
-Dtype Net<Dtype>::ForwardTo(int end) {
-  return ForwardFromTo(0, end);
-}
-
-template <typename Dtype>
-const vector<Blob<Dtype>*>& Net<Dtype>::Forward(Dtype* loss) {
-  if (loss != NULL) {
-    *loss = ForwardFromTo(0, layers_.size() - 1);
-  } else {
-    ForwardFromTo(0, layers_.size() - 1);
-  }
-  return net_output_blobs_;
-}
-
-template <typename Dtype>
-const vector<Blob<Dtype>*>& Net<Dtype>::Forward(
-    const vector<Blob<Dtype>*> & bottom, Dtype* loss) {
-#ifdef USE_GLOG
-  LOG_EVERY_N(WARNING, 1000) << "DEPRECATED: Forward(bottom, loss) "
-      << "will be removed in a future version. Use Forward(loss).";
-#endif
-  // Copy bottom to net bottoms
-  for (int i = 0; i < bottom.size(); ++i) {
-    net_input_blobs_[i]->CopyFrom(*bottom[i]);
-  }
-  return Forward(loss);
-}
-
-#ifdef ENABLE_BACKWARD
-template <typename Dtype>
-void Net<Dtype>::BackwardFromTo(int start, int end) {
-  CHECK_GE(end, 0);
-  CHECK_LT(start, layers_.size());
-  for (int i = start; i >= end; --i) {
-    for (int c = 0; c < before_backward_.size(); ++c) {
-      before_backward_[c]->run(i);
-    }
-    if (layer_need_backward_[i]) {
-      layers_[i]->Backward(
-          top_vecs_[i], bottom_need_backward_[i], bottom_vecs_[i]);
-      if (debug_info_) { BackwardDebugInfo(i); }
-    }
-    for (int c = 0; c < after_backward_.size(); ++c) {
-      after_backward_[c]->run(i);
-    }
-  }
-}
-#endif
-
-template <typename Dtype>
-void Net<Dtype>::ForwardDebugInfo(const int layer_id) {
-  for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
-    const Blob<Dtype>& blob = *top_vecs_[layer_id][top_id];
-    const string& blob_name = blob_names_[top_id_vecs_[layer_id][top_id]];
-    const Dtype data_abs_val_mean = blob.asum_data() / blob.count();
-    LOG_IF(INFO, Caffe::root_solver())
-        << "    [Forward] "
-        << "Layer " << layer_names_[layer_id]
-        << ", top blob " << blob_name
-        << " data: " << data_abs_val_mean;
-  }
-  for (int param_id = 0; param_id < layers_[layer_id]->blobs().size();
-       ++param_id) {
-    const Blob<Dtype>& blob = *layers_[layer_id]->blobs()[param_id];
-    const int net_param_id = param_id_vecs_[layer_id][param_id];
-    const string& blob_name = param_display_names_[net_param_id];
-    const Dtype data_abs_val_mean = blob.asum_data() / blob.count();
-    LOG_IF(INFO, Caffe::root_solver())
-        << "    [Forward] "
-        << "Layer " << layer_names_[layer_id]
-        << ", param blob " << blob_name
-        << " data: " << data_abs_val_mean;
-  }
-}
-
-#ifdef ENABLE_BACKWARD
-template <typename Dtype>
-void Net<Dtype>::BackwardDebugInfo(const int layer_id) {
-  const vector<Blob<Dtype>*>& bottom_vec = bottom_vecs_[layer_id];
-  for (int bottom_id = 0; bottom_id < bottom_vec.size(); ++bottom_id) {
-    if (!bottom_need_backward_[layer_id][bottom_id]) { continue; }
-    const Blob<Dtype>& blob = *bottom_vec[bottom_id];
-    const string& blob_name = blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
-    const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count();
-    LOG_IF(INFO, Caffe::root_solver())
-        << "    [Backward] "
-        << "Layer " << layer_names_[layer_id]
-        << ", bottom blob " << blob_name
-        << " diff: " << diff_abs_val_mean;
-  }
-  for (int param_id = 0; param_id < layers_[layer_id]->blobs().size();
-       ++param_id) {
-    if (!layers_[layer_id]->param_propagate_down(param_id)) { continue; }
-    const Blob<Dtype>& blob = *layers_[layer_id]->blobs()[param_id];
-    const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count();
-    LOG_IF(INFO, Caffe::root_solver())
-        << "    [Backward] "
-        << "Layer " << layer_names_[layer_id]
-        << ", param blob " << param_id
-        << " diff: " << diff_abs_val_mean;
-  }
-}
-#endif
-
-template <typename Dtype>
-void Net<Dtype>::UpdateDebugInfo(const int param_id) {
-  const Blob<Dtype>& blob = *params_[param_id];
-  const int param_owner = param_owners_[param_id];
-  const string& layer_name = layer_names_[param_layer_indices_[param_id].first];
-  const string& param_display_name = param_display_names_[param_id];
-  const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count();
-  if (param_owner < 0) {
-    const Dtype data_abs_val_mean = blob.asum_data() / blob.count();
-    LOG_IF(INFO, Caffe::root_solver())
-        << "    [Update] Layer " << layer_name
-        << ", param " << param_display_name
-        << " data: " << data_abs_val_mean
-        << "; diff: " << diff_abs_val_mean;
-#
-  } else {
-    const string& owner_layer_name =
-        layer_names_[param_layer_indices_[param_owner].first];
-    LOG_IF(INFO, Caffe::root_solver())
-        << "    [Update] Layer " << layer_name
-        << ", param blob " << param_display_name
-        << " (owned by layer " << owner_layer_name << ", " << "param "
-        << param_display_names_[param_owners_[param_id]] << ")"
-        << " diff: " << diff_abs_val_mean;
-  }
-}
-
-template <typename Dtype>
-void Net<Dtype>::ShareTrainedLayersWith(const Net* other) {
-  int num_source_layers = other->layers().size();
-  for (int i = 0; i < num_source_layers; ++i) {
-    Layer<Dtype>* source_layer = other->layers()[i].get();
-    const string& source_layer_name = other->layer_names()[i];
-    int target_layer_id = 0;
-    while (target_layer_id != layer_names_.size() &&
-        layer_names_[target_layer_id] != source_layer_name) {
-      ++target_layer_id;
-    }
-    if (target_layer_id == layer_names_.size()) {
-      LOG(INFO) << "Ignoring source layer " << source_layer_name;
-      continue;
-    }
-    DLOG(INFO) << "Copying source layer " << source_layer_name;
-    vector<shared_ptr<Blob<Dtype> > >& target_blobs =
-        layers_[target_layer_id]->blobs();
-    CHECK_EQ(target_blobs.size(), source_layer->blobs().size())
-        << "Incompatible number of blobs for layer " << source_layer_name;
-    for (int j = 0; j < target_blobs.size(); ++j) {
-      Blob<Dtype>* source_blob = source_layer->blobs()[j].get();
-      CHECK(target_blobs[j]->shape() == source_blob->shape())
-          << "Cannot share param " << j << " weights from layer '"
-          << source_layer_name << "'; shape mismatch.  Source param shape is "
-          << source_blob->shape_string() << "; target param shape is "
-          << target_blobs[j]->shape_string();
-      target_blobs[j]->ShareData(*source_blob);
-    }
-  }
-}
-
-#ifdef ENABLE_BACKWARD
-template <typename Dtype>
-void Net<Dtype>::BackwardFrom(int start) {
-  BackwardFromTo(start, 0);
-}
-
-template <typename Dtype>
-void Net<Dtype>::BackwardTo(int end) {
-  BackwardFromTo(layers_.size() - 1, end);
-}
-
-template <typename Dtype>
-void Net<Dtype>::Backward() {
-  BackwardFromTo(layers_.size() - 1, 0);
-  if (debug_info_) {
-    Dtype asum_data = 0, asum_diff = 0, sumsq_data = 0, sumsq_diff = 0;
-    for (int i = 0; i < learnable_params_.size(); ++i) {
-      asum_data += learnable_params_[i]->asum_data();
-      asum_diff += learnable_params_[i]->asum_diff();
-      sumsq_data += learnable_params_[i]->sumsq_data();
-      sumsq_diff += learnable_params_[i]->sumsq_diff();
-    }
-    const Dtype l2norm_data = std::sqrt(sumsq_data);
-    const Dtype l2norm_diff = std::sqrt(sumsq_diff);
-    LOG(ERROR) << "    [Backward] All net params (data, diff): "
-               << "L1 norm = (" << asum_data << ", " << asum_diff << "); "
-               << "L2 norm = (" << l2norm_data << ", " << l2norm_diff << ")";
-  }
-}
-
-template <typename Dtype>
-void Net<Dtype>::Reshape() {
-  for (int i = 0; i < layers_.size(); ++i) {
-    layers_[i]->Reshape(bottom_vecs_[i], top_vecs_[i]);
-  }
-}
-#endif
-
-template <typename Dtype>
-void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) {
-  int num_source_layers = param.layer_size();
-  for (int i = 0; i < num_source_layers; ++i) {
-    const LayerParameter& source_layer = param.layer(i);
-    const string& source_layer_name = source_layer.name();
-    int target_layer_id = 0;
-    while (target_layer_id != layer_names_.size() &&
-        layer_names_[target_layer_id] != source_layer_name) {
-      ++target_layer_id;
-    }
-    if (target_layer_id == layer_names_.size()) {
-      LOG(INFO) << "Ignoring source layer " << source_layer_name;
-      continue;
-    }
-    DLOG(INFO) << "Copying source layer " << source_layer_name;
-    vector<shared_ptr<Blob<Dtype> > >& target_blobs =
-        layers_[target_layer_id]->blobs();
-    CHECK_EQ(target_blobs.size(), source_layer.blobs_size())
-        << "Incompatible number of blobs for layer " << source_layer_name;
-    for (int j = 0; j < target_blobs.size(); ++j) {
-      if (!target_blobs[j]->ShapeEquals(source_layer.blobs(j))) {
-        Blob<Dtype> source_blob;
-        const bool kReshape = true;
-        source_blob.FromProto(source_layer.blobs(j), kReshape);
-        LOG(FATAL) << "Cannot copy param " << j << " weights from layer '"
-            << source_layer_name << "'; shape mismatch.  Source param shape is "
-            << source_blob.shape_string() << "; target param shape is "
-            << target_blobs[j]->shape_string() << ". "
-            << "To learn this layer's parameters from scratch rather than "
-            << "copying from a saved net, rename the layer.";
-      }
-      const bool kReshape = false;
-      target_blobs[j]->FromProto(source_layer.blobs(j), kReshape);
-    }
-  }
-}
-
-template <typename Dtype>
-void Net<Dtype>::CopyTrainedLayersFrom(const string trained_filename) {
-#ifdef USE_HDF5
-  if (trained_filename.size() >= 3 &&
-      trained_filename.compare(trained_filename.size() - 3, 3, ".h5") == 0) {
-    CopyTrainedLayersFromHDF5(trained_filename);
-  } else {
-#else
-  {
-#endif
-    CopyTrainedLayersFromBinaryProto(trained_filename);
-  }
-}
-
-template <typename Dtype>
-void Net<Dtype>::CopyTrainedLayersFromBinaryProto(
-    const string trained_filename) {
-  NetParameter param;
-  ReadNetParamsFromBinaryFileOrDie(trained_filename, &param);
-  CopyTrainedLayersFrom(param);
-}
-
-
-template <typename Dtype>
-void Net<Dtype>::CopyTrainedLayersFromHDF5(const string trained_filename) {
-#ifdef USE_HDF5
-  hid_t file_hid = H5Fopen(trained_filename.c_str(), H5F_ACC_RDONLY,
-                           H5P_DEFAULT);
-  CHECK_GE(file_hid, 0) << "Couldn't open " << trained_filename;
-  hid_t data_hid = H5Gopen2(file_hid, "data", H5P_DEFAULT);
-  CHECK_GE(data_hid, 0) << "Error reading weights from " << trained_filename;
-  int num_layers = hdf5_get_num_links(data_hid);
-  for (int i = 0; i < num_layers; ++i) {
-    string source_layer_name = hdf5_get_name_by_idx(data_hid, i);
-    if (!layer_names_index_.count(source_layer_name)) {
-      LOG(INFO) << "Ignoring source layer " << source_layer_name;
-      continue;
-    }
-    int target_layer_id = layer_names_index_[source_layer_name];
-    DLOG(INFO) << "Copying source layer " << source_layer_name;
-    vector<shared_ptr<Blob<Dtype> > >& target_blobs =
-        layers_[target_layer_id]->blobs();
-    hid_t layer_hid = H5Gopen2(data_hid, source_layer_name.c_str(),
-        H5P_DEFAULT);
-    CHECK_GE(layer_hid, 0)
-        << "Error reading weights from " << trained_filename;
-    // Check that source layer doesn't have more params than target layer
-    int num_source_params = hdf5_get_num_links(layer_hid);
-    CHECK_LE(num_source_params, target_blobs.size())
-        << "Incompatible number of blobs for layer " << source_layer_name;
-    for (int j = 0; j < target_blobs.size(); ++j) {
-      ostringstream oss;
-      oss << j;
-      string dataset_name = oss.str();
-      int target_net_param_id = param_id_vecs_[target_layer_id][j];
-      if (!H5Lexists(layer_hid, dataset_name.c_str(), H5P_DEFAULT)) {
-        // Target param doesn't exist in source weights...
-        if (param_owners_[target_net_param_id] != -1) {
-          // ...but it's weight-shared in target, so that's fine.
-          continue;
-        } else {
-          LOG(FATAL) << "Incompatible number of blobs for layer "
-              << source_layer_name;
-        }
-      }
-      hdf5_load_nd_dataset(layer_hid, dataset_name.c_str(), 0, kMaxBlobAxes,
-          target_blobs[j].get());
-    }
-    H5Gclose(layer_hid);
-  }
-  H5Gclose(data_hid);
-  H5Fclose(file_hid);
-#endif
-}
-
-template <typename Dtype>
-void Net<Dtype>::ToProto(NetParameter* param, bool write_diff) const {
-  param->Clear();
-  param->set_name(name_);
-  // Add bottom and top
-  DLOG(INFO) << "Serializing " << layers_.size() << " layers";
-  for (int i = 0; i < layers_.size(); ++i) {
-    LayerParameter* layer_param = param->add_layer();
-    layers_[i]->ToProto(layer_param, write_diff);
-  }
-}
-
-template <typename Dtype>
-void Net<Dtype>::ToHDF5(const string& filename, bool write_diff) const {
-#ifdef USE_HDF5
-  hid_t file_hid = H5Fcreate(filename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT,
-      H5P_DEFAULT);
-  CHECK_GE(file_hid, 0)
-      << "Couldn't open " << filename << " to save weights.";
-  hid_t data_hid = H5Gcreate2(file_hid, "data", H5P_DEFAULT, H5P_DEFAULT,
-      H5P_DEFAULT);
-  CHECK_GE(data_hid, 0) << "Error saving weights to " << filename << ".";
-  hid_t diff_hid = -1;
-  if (write_diff) {
-    diff_hid = H5Gcreate2(file_hid, "diff", H5P_DEFAULT, H5P_DEFAULT,
-        H5P_DEFAULT);
-    CHECK_GE(diff_hid, 0) << "Error saving weights to " << filename << ".";
-  }
-  for (int layer_id = 0; layer_id < layers_.size(); ++layer_id) {
-    const LayerParameter& layer_param = layers_[layer_id]->layer_param();
-    string layer_name = layer_param.name();
-    hid_t layer_data_hid = H5Gcreate2(data_hid, layer_name.c_str(),
-        H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-    CHECK_GE(layer_data_hid, 0)
-        << "Error saving weights to " << filename << ".";
-    hid_t layer_diff_hid = -1;
-    if (write_diff) {
-      layer_diff_hid = H5Gcreate2(diff_hid, layer_name.c_str(),
-          H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-      CHECK_GE(layer_diff_hid, 0)
-          << "Error saving weights to " << filename << ".";
-    }
-    int num_params = layers_[layer_id]->blobs().size();
-    for (int param_id = 0; param_id < num_params; ++param_id) {
-      ostringstream dataset_name;
-      dataset_name << param_id;
-      const int net_param_id = param_id_vecs_[layer_id][param_id];
-      if (param_owners_[net_param_id] == -1) {
-        // Only save params that own themselves
-        hdf5_save_nd_dataset<Dtype>(layer_data_hid, dataset_name.str(),
-            *params_[net_param_id]);
-      }
-      if (write_diff) {
-        // Write diffs regardless of weight-sharing
-        hdf5_save_nd_dataset<Dtype>(layer_diff_hid, dataset_name.str(),
-            *params_[net_param_id], true);
-      }
-    }
-    H5Gclose(layer_data_hid);
-    if (write_diff) {
-      H5Gclose(layer_diff_hid);
-    }
-  }
-  H5Gclose(data_hid);
-  if (write_diff) {
-    H5Gclose(diff_hid);
-  }
-  H5Fclose(file_hid);
-#endif
-}
-
-#ifdef ENABLE_BACKWARD
-template <typename Dtype>
-void Net<Dtype>::Update() {
-  for (int i = 0; i < learnable_params_.size(); ++i) {
-    learnable_params_[i]->Update();
-  }
-}
-#endif
-
-template <typename Dtype>
-void Net<Dtype>::ClearParamDiffs() {
-  for (int i = 0; i < learnable_params_.size(); ++i) {
-    Blob<Dtype>* blob = learnable_params_[i];
-    switch (Caffe::mode()) {
-    case Caffe::CPU:
-      caffe_set(blob->count(), static_cast<Dtype>(0),
-                blob->mutable_cpu_diff());
-      break;
-    case Caffe::GPU:
-#ifndef CPU_ONLY
-      caffe_gpu_set(blob->count(), static_cast<Dtype>(0),
-                    blob->mutable_gpu_diff());
-#else
-      NO_GPU;
-#endif
-      break;
-    }
-  }
-}
-
-template <typename Dtype>
-void Net<Dtype>::ShareWeights() {
-  for (int i = 0; i < params_.size(); ++i) {
-    if (param_owners_[i] < 0) { continue; }
-    params_[i]->ShareData(*params_[param_owners_[i]]);
-    params_[i]->ShareDiff(*params_[param_owners_[i]]);
-  }
-}
-
-template <typename Dtype>
-bool Net<Dtype>::has_blob(const string& blob_name) const {
-  return blob_names_index_.find(blob_name) != blob_names_index_.end();
-}
-
-template <typename Dtype>
-const shared_ptr<Blob<Dtype> > Net<Dtype>::blob_by_name(
-    const string& blob_name) const {
-  shared_ptr<Blob<Dtype> > blob_ptr;
-  if (has_blob(blob_name)) {
-    blob_ptr = blobs_[blob_names_index_.find(blob_name)->second];
-  } else {
-    blob_ptr.reset((Blob<Dtype>*)(NULL));
-    LOG(WARNING) << "Unknown blob name " << blob_name;
-  }
-  return blob_ptr;
-}
-
-template <typename Dtype>
-bool Net<Dtype>::has_layer(const string& layer_name) const {
-  return layer_names_index_.find(layer_name) != layer_names_index_.end();
-}
-
-template <typename Dtype>
-const shared_ptr<Layer<Dtype> > Net<Dtype>::layer_by_name(
-    const string& layer_name) const {
-  shared_ptr<Layer<Dtype> > layer_ptr;
-  if (has_layer(layer_name)) {
-    layer_ptr = layers_[layer_names_index_.find(layer_name)->second];
-  } else {
-    layer_ptr.reset((Layer<Dtype>*)(NULL));
-    LOG(WARNING) << "Unknown layer name " << layer_name;
-  }
-  return layer_ptr;
-}
-
-INSTANTIATE_CLASS(Net);
-
-}  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
deleted file mode 100644
index 1c85f69..0000000
--- a/src/caffe/proto/caffe.proto
+++ /dev/null
@@ -1,1404 +0,0 @@
-syntax = "proto2";
-
-package caffe;
-
-// Specifies the shape (dimensions) of a Blob.
-message BlobShape {
-  repeated int64 dim = 1 [packed = true];
-}
-
-message BlobProto {
-  optional BlobShape shape = 7;
-  repeated float data = 5 [packed = true];
-  repeated float diff = 6 [packed = true];
-  repeated double double_data = 8 [packed = true];
-  repeated double double_diff = 9 [packed = true];
-
-  // 4D dimensions -- deprecated.  Use "shape" instead.
-  optional int32 num = 1 [default = 0];
-  optional int32 channels = 2 [default = 0];
-  optional int32 height = 3 [default = 0];
-  optional int32 width = 4 [default = 0];
-}
-
-// The BlobProtoVector is simply a way to pass multiple blobproto instances
-// around.
-message BlobProtoVector {
-  repeated BlobProto blobs = 1;
-}
-
-message Datum {
-  optional int32 channels = 1;
-  optional int32 height = 2;
-  optional int32 width = 3;
-  // the actual image data, in bytes
-  optional bytes data = 4;
-  optional int32 label = 5;
-  // Optionally, the datum could also hold float data.
-  repeated float float_data = 6;
-  // If true data contains an encoded image that need to be decoded
-  optional bool encoded = 7 [default = false];
-}
-
-message FillerParameter {
-  // The filler type.
-  optional string type = 1 [default = 'constant'];
-  optional float value = 2 [default = 0]; // the value in constant filler
-  optional float min = 3 [default = 0]; // the min value in uniform filler
-  optional float max = 4 [default = 1]; // the max value in uniform filler
-  optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
-  optional float std = 6 [default = 1]; // the std value in Gaussian filler
-  // The expected number of non-zero output weights for a given input in
-  // Gaussian filler -- the default -1 means don't perform sparsification.
-  optional int32 sparse = 7 [default = -1];
-  // Normalize the filler variance by fan_in, fan_out, or their average.
-  // Applies to 'xavier' and 'msra' fillers.
-  enum VarianceNorm {
-    FAN_IN = 0;
-    FAN_OUT = 1;
-    AVERAGE = 2;
-  }
-  optional VarianceNorm variance_norm = 8 [default = FAN_IN];
-}
-
-message NetParameter {
-  optional string name = 1; // consider giving the network a name
-  // DEPRECATED. See InputParameter. The input blobs to the network.
-  repeated string input = 3;
-  // DEPRECATED. See InputParameter. The shape of the input blobs.
-  repeated BlobShape input_shape = 8;
-
-  // 4D input dimensions -- deprecated.  Use "input_shape" instead.
-  // If specified, for each input blob there should be four
-  // values specifying the num, channels, height and width of the input blob.
-  // Thus, there should be a total of (4 * #input) numbers.
-  repeated int32 input_dim = 4;
-
-  // Whether the network will force every layer to carry out backward operation.
-  // If set False, then whether to carry out backward is determined
-  // automatically according to the net structure and learning rates.
-  optional bool force_backward = 5 [default = false];
-  // The current "state" of the network, including the phase, level, and stage.
-  // Some layers may be included/excluded depending on this state and the states
-  // specified in the layers' include and exclude fields.
-  optional NetState state = 6;
-
-  // Print debugging information about results while running Net::Forward,
-  // Net::Backward, and Net::Update.
-  optional bool debug_info = 7 [default = false];
-
-  // The layers that make up the net.  Each of their configurations, including
-  // connectivity and behavior, is specified as a LayerParameter.
-  repeated LayerParameter layer = 100;  // ID 100 so layers are printed last.
-
-  // DEPRECATED: use 'layer' instead.
-  repeated V1LayerParameter layers = 2;
-}
-
-// NOTE
-// Update the next available ID when you add a new SolverParameter field.
-//
-// SolverParameter next available ID: 42 (last added: layer_wise_reduce)
-message SolverParameter {
-  //////////////////////////////////////////////////////////////////////////////
-  // Specifying the train and test networks
-  //
-  // Exactly one train net must be specified using one of the following fields:
-  //     train_net_param, train_net, net_param, net
-  // One or more test nets may be specified using any of the following fields:
-  //     test_net_param, test_net, net_param, net
-  // If more than one test net field is specified (e.g., both net and
-  // test_net are specified), they will be evaluated in the field order given
-  // above: (1) test_net_param, (2) test_net, (3) net_param/net.
-  // A test_iter must be specified for each test_net.
-  // A test_level and/or a test_stage may also be specified for each test_net.
-  //////////////////////////////////////////////////////////////////////////////
-
-  // Proto filename for the train net, possibly combined with one or more
-  // test nets.
-  optional string net = 24;
-  // Inline train net param, possibly combined with one or more test nets.
-  optional NetParameter net_param = 25;
-
-  optional string train_net = 1; // Proto filename for the train net.
-  repeated string test_net = 2; // Proto filenames for the test nets.
-  optional NetParameter train_net_param = 21; // Inline train net params.
-  repeated NetParameter test_net_param = 22; // Inline test net params.
-
-  // The states for the train/test nets. Must be unspecified or
-  // specified once per net.
-  //
-  // By default, all states will have solver = true;
-  // train_state will have phase = TRAIN,
-  // and all test_state's will have phase = TEST.
-  // Other defaults are set according to the NetState defaults.
-  optional NetState train_state = 26;
-  repeated NetState test_state = 27;
-
-  // The number of iterations for each test net.
-  repeated int32 test_iter = 3;
-
-  // The number of iterations between two testing phases.
-  optional int32 test_interval = 4 [default = 0];
-  optional bool test_compute_loss = 19 [default = false];
-  // If true, run an initial test pass before the first iteration,
-  // ensuring memory availability and printing the starting value of the loss.
-  optional bool test_initialization = 32 [default = true];
-  optional float base_lr = 5; // The base learning rate
-  // the number of iterations between displaying info. If display = 0, no info
-  // will be displayed.
-  optional int32 display = 6;
-  // Display the loss averaged over the last average_loss iterations
-  optional int32 average_loss = 33 [default = 1];
-  optional int32 max_iter = 7; // the maximum number of iterations
-  // accumulate gradients over `iter_size` x `batch_size` instances
-  optional int32 iter_size = 36 [default = 1];
-
-  // The learning rate decay policy. The currently implemented learning rate
-  // policies are as follows:
-  //    - fixed: always return base_lr.
-  //    - step: return base_lr * gamma ^ (floor(iter / step))
-  //    - exp: return base_lr * gamma ^ iter
-  //    - inv: return base_lr * (1 + gamma * iter) ^ (- power)
-  //    - multistep: similar to step but it allows non uniform steps defined by
-  //      stepvalue
-  //    - poly: the effective learning rate follows a polynomial decay, to be
-  //      zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)
-  //    - sigmoid: the effective learning rate follows a sigmod decay
-  //      return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))
-  //
-  // where base_lr, max_iter, gamma, step, stepvalue and power are defined
-  // in the solver parameter protocol buffer, and iter is the current iteration.
-  optional string lr_policy = 8;
-  optional float gamma = 9; // The parameter to compute the learning rate.
-  optional float power = 10; // The parameter to compute the learning rate.
-  optional float momentum = 11; // The momentum value.
-  optional float weight_decay = 12; // The weight decay.
-  // regularization types supported: L1 and L2
-  // controlled by weight_decay
-  optional string regularization_type = 29 [default = "L2"];
-  // the stepsize for learning rate policy "step"
-  optional int32 stepsize = 13;
-  // the stepsize for learning rate policy "multistep"
-  repeated int32 stepvalue = 34;
-
-  // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,
-  // whenever their actual L2 norm is larger.
-  optional float clip_gradients = 35 [default = -1];
-
-  optional int32 snapshot = 14 [default = 0]; // The snapshot interval
-  optional string snapshot_prefix = 15; // The prefix for the snapshot.
-  // whether to snapshot diff in the results or not. Snapshotting diff will help
-  // debugging but the final protocol buffer size will be much larger.
-  optional bool snapshot_diff = 16 [default = false];
-  enum SnapshotFormat {
-    HDF5 = 0;
-    BINARYPROTO = 1;
-  }
-  optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO];
-  // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
-  enum SolverMode {
-    CPU = 0;
-    GPU = 1;
-  }
-  optional SolverMode solver_mode = 17 [default = GPU];
-  // the device_id will that be used in GPU mode. Use device_id = 0 in default.
-  optional int32 device_id = 18 [default = 0];
-  // If non-negative, the seed with which the Solver will initialize the Caffe
-  // random number generator -- useful for reproducible results. Otherwise,
-  // (and by default) initialize using a seed derived from the system clock.
-  optional int64 random_seed = 20 [default = -1];
-
-  // type of the solver
-  optional string type = 40 [default = "SGD"];
-
-  // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam
-  optional float delta = 31 [default = 1e-8];
-  // parameters for the Adam solver
-  optional float momentum2 = 39 [default = 0.999];
-
-  // RMSProp decay value
-  // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
-  optional float rms_decay = 38 [default = 0.99];
-
-  // If true, print information about the state of the net that may help with
-  // debugging learning problems.
-  optional bool debug_info = 23 [default = false];
-
-  // If false, don't save a snapshot after training finishes.
-  optional bool snapshot_after_train = 28 [default = true];
-
-  // DEPRECATED: old solver enum types, use string instead
-  enum SolverType {
-    SGD = 0;
-    NESTEROV = 1;
-    ADAGRAD = 2;
-    RMSPROP = 3;
-    ADADELTA = 4;
-    ADAM = 5;
-  }
-  // DEPRECATED: use type instead of solver_type
-  optional SolverType solver_type = 30 [default = SGD];
-
-  // Overlap compute and communication for data parallel training
-  optional bool layer_wise_reduce = 41 [default = true];
-}
-
-// A message that stores the solver snapshots
-message SolverState {
-  optional int32 iter = 1; // The current iteration
-  optional string learned_net = 2; // The file that stores the learned net.
-  repeated BlobProto history = 3; // The history for sgd solvers
-  optional int32 current_step = 4 [default = 0]; // The current step for learning rate
-}
-
-enum Phase {
-   TRAIN = 0;
-   TEST = 1;
-}
-
-message NetState {
-  optional Phase phase = 1 [default = TEST];
-  optional int32 level = 2 [default = 0];
-  repeated string stage = 3;
-}
-
-message NetStateRule {
-  // Set phase to require the NetState have a particular phase (TRAIN or TEST)
-  // to meet this rule.
-  optional Phase phase = 1;
-
-  // Set the minimum and/or maximum levels in which the layer should be used.
-  // Leave undefined to meet the rule regardless of level.
-  optional int32 min_level = 2;
-  optional int32 max_level = 3;
-
-  // Customizable sets of stages to include or exclude.
-  // The net must have ALL of the specified stages and NONE of the specified
-  // "not_stage"s to meet the rule.
-  // (Use multiple NetStateRules to specify conjunctions of stages.)
-  repeated string stage = 4;
-  repeated string not_stage = 5;
-}
-
-// Specifies training parameters (multipliers on global learning constants,
-// and the name and other settings used for weight sharing).
-message ParamSpec {
-  // The names of the parameter blobs -- useful for sharing parameters among
-  // layers, but never required otherwise.  To share a parameter between two
-  // layers, give it a (non-empty) name.
-  optional string name = 1;
-
-  // Whether to require shared weights to have the same shape, or just the same
-  // count -- defaults to STRICT if unspecified.
-  optional DimCheckMode share_mode = 2;
-  enum DimCheckMode {
-    // STRICT (default) requires that num, channels, height, width each match.
-    STRICT = 0;
-    // PERMISSIVE requires only the count (num*channels*height*width) to match.
-    PERMISSIVE = 1;
-  }
-
-  // The multiplier on the global learning rate for this parameter.
-  optional float lr_mult = 3 [default = 1.0];
-
-  // The multiplier on the global weight decay for this parameter.
-  optional float decay_mult = 4 [default = 1.0];
-}
-
-// NOTE
-// Update the next available ID when you add a new LayerParameter field.
-//
-// LayerParameter next available layer-specific ID: 147 (last added: recurrent_param)
-message LayerParameter {
-  optional string name = 1; // the layer name
-  optional string type = 2; // the layer type
-  repeated string bottom = 3; // the name of each bottom blob
-  repeated string top = 4; // the name of each top blob
-
-  // The train / test phase for computation.
-  optional Phase phase = 10;
-
-  // The amount of weight to assign each top blob in the objective.
-  // Each layer assigns a default value, usually of either 0 or 1,
-  // to each top blob.
-  repeated float loss_weight = 5;
-
-  // Specifies training parameters (multipliers on global learning constants,
-  // and the name and other settings used for weight sharing).
-  repeated ParamSpec param = 6;
-
-  // The blobs containing the numeric parameters of the layer.
-  repeated BlobProto blobs = 7;
-
-  // Specifies whether to backpropagate to each bottom. If unspecified,
-  // Caffe will automatically infer whether each input needs backpropagation
-  // to compute parameter gradients. If set to true for some inputs,
-  // backpropagation to those inputs is forced; if set false for some inputs,
-  // backpropagation to those inputs is skipped.
-  //
-  // The size must be either 0 or equal to the number of bottoms.
-  repeated bool propagate_down = 11;
-
-  // Rules controlling whether and when a layer is included in the network,
-  // based on the current NetState.  You may specify a non-zero number of rules
-  // to include OR exclude, but not both.  If no include or exclude rules are
-  // specified, the layer is always included.  If the current NetState meets
-  // ANY (i.e., one or more) of the specified rules, the layer is
-  // included/excluded.
-  repeated NetStateRule include = 8;
-  repeated NetStateRule exclude = 9;
-
-  // Parameters for data pre-processing.
-  optional TransformationParameter transform_param = 100;
-
-  // Parameters shared by loss layers.
-  optional LossParameter loss_param = 101;
-
-  // Layer type-specific parameters.
-  //
-  // Note: certain layers may have more than one computational engine
-  // for their implementation. These layers include an Engine type and
-  // engine parameter for selecting the implementation.
-  // The default for the engine is set by the ENGINE switch at compile-time.
-  optional AccuracyParameter accuracy_param = 102;
-  optional ArgMaxParameter argmax_param = 103;
-  optional BatchNormParameter batch_norm_param = 139;
-  optional BiasParameter bias_param = 141;
-  optional ConcatParameter concat_param = 104;
-  optional ContrastiveLossParameter contrastive_loss_param = 105;
-  optional ConvolutionParameter convolution_param = 106;
-  optional CropParameter crop_param = 144;
-  optional DataParameter data_param = 107;
-  optional DropoutParameter dropout_param = 108;
-  optional DummyDataParameter dummy_data_param = 109;
-  optional EltwiseParameter eltwise_param = 110;
-  optional ELUParameter elu_param = 140;
-  optional EmbedParameter embed_param = 137;
-  optional ExpParameter exp_param = 111;
-  optional FlattenParameter flatten_param = 135;
-  optional HDF5DataParameter hdf5_data_param = 112;
-  optional HDF5OutputParameter hdf5_output_param = 113;
-  optional HingeLossParameter hinge_loss_param = 114;
-  optional ImageDataParameter image_data_param = 115;
-  optional InfogainLossParameter infogain_loss_param = 116;
-  optional InnerProductParameter inner_product_param = 117;
-  optional InputParameter input_param = 143;
-  optional LogParameter log_param = 134;
-  optional LRNParameter lrn_param = 118;
-  optional MemoryDataParameter memory_data_param = 119;
-  optional MVNParameter mvn_param = 120;
-  optional ParameterParameter parameter_param = 145;
-  optional PoolingParameter pooling_param = 121;
-  optional PowerParameter power_param = 122;
-  optional PReLUParameter prelu_param = 131;
-  optional PythonParameter python_param = 130;
-  optional RecurrentParameter recurrent_param = 146;
-  optional ReductionParameter reduction_param = 136;
-  optional ReLUParameter relu_param = 123;
-  optional ReshapeParameter reshape_param = 133;
-  optional ScaleParameter scale_param = 142;
-  optional SigmoidParameter sigmoid_param = 124;
-  optional SoftmaxParameter softmax_param = 125;
-  optional SPPParameter spp_param = 132;
-  optional SliceParameter slice_param = 126;
-  optional TanHParameter tanh_param = 127;
-  optional ThresholdParameter threshold_param = 128;
-  optional TileParameter tile_param = 138;
-  optional WindowDataParameter window_data_param = 129;
-}
-
-// Message that stores parameters used to apply transformation
-// to the data layer's data
-message TransformationParameter {
-  // For data pre-processing, we can do simple scaling and subtracting the
-  // data mean, if provided. Note that the mean subtraction is always carried
-  // out before scaling.
-  optional float scale = 1 [default = 1];
-  // Specify if we want to randomly mirror data.
-  optional bool mirror = 2 [default = false];
-  // Specify if we would like to randomly crop an image.
-  optional uint32 crop_size = 3 [default = 0];
-  // mean_file and mean_value cannot be specified at the same time
-  optional string mean_file = 4;
-  // if specified can be repeated once (would subtract it from all the channels)
-  // or can be repeated the same number of times as channels
-  // (would subtract them from the corresponding channel)
-  repeated float mean_value = 5;
-  // Force the decoded image to have 3 color channels.
-  optional bool force_color = 6 [default = false];
-  // Force the decoded image to have 1 color channels.
-  optional bool force_gray = 7 [default = false];
-}
-
-// Message that stores parameters shared by loss layers
-message LossParameter {
-  // If specified, ignore instances with the given label.
-  optional int32 ignore_label = 1;
-  // How to normalize the loss for loss layers that aggregate across batches,
-  // spatial dimensions, or other dimensions.  Currently only implemented in
-  // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers.
-  enum NormalizationMode {
-    // Divide by the number of examples in the batch times spatial dimensions.
-    // Outputs that receive the ignore label will NOT be ignored in computing
-    // the normalization factor.
-    FULL = 0;
-    // Divide by the total number of output locations that do not take the
-    // ignore_label.  If ignore_label is not set, this behaves like FULL.
-    VALID = 1;
-    // Divide by the batch size.
-    BATCH_SIZE = 2;
-    // Do not normalize the loss.
-    NONE = 3;
-  }
-  // For historical reasons, the default normalization for
-  // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID.
-  optional NormalizationMode normalization = 3 [default = VALID];
-  // Deprecated.  Ignored if normalization is specified.  If normalization
-  // is not specified, then setting this to false will be equivalent to
-  // normalization = BATCH_SIZE to be consistent with previous behavior.
-  optional bool normalize = 2;
-}
-
-// Messages that store parameters used by individual layer types follow, in
-// alphabetical order.
-
-message AccuracyParameter {
-  // When computing accuracy, count as correct by comparing the true label to
-  // the top k scoring classes.  By default, only compare to the top scoring
-  // class (i.e. argmax).
-  optional uint32 top_k = 1 [default = 1];
-
-  // The "label" axis of the prediction blob, whose argmax corresponds to the
-  // predicted label -- may be negative to index from the end (e.g., -1 for the
-  // last axis).  For example, if axis == 1 and the predictions are
-  // (N x C x H x W), the label blob is expected to contain N*H*W ground truth
-  // labels with integer values in {0, 1, ..., C-1}.
-  optional int32 axis = 2 [default = 1];
-
-  // If specified, ignore instances with the given label.
-  optional int32 ignore_label = 3;
-}
-
-message ArgMaxParameter {
-  // If true produce pairs (argmax, maxval)
-  optional bool out_max_val = 1 [default = false];
-  optional uint32 top_k = 2 [default = 1];
-  // The axis along which to maximise -- may be negative to index from the
-  // end (e.g., -1 for the last axis).
-  // By default ArgMaxLayer maximizes over the flattened trailing dimensions
-  // for each index of the first / num dimension.
-  optional int32 axis = 3;
-}
-
-message ConcatParameter {
-  // The axis along which to concatenate -- may be negative to index from the
-  // end (e.g., -1 for the last axis).  Other axes must have the
-  // same dimension for all the bottom blobs.
-  // By default, ConcatLayer concatenates blobs along the "channels" axis (1).
-  optional int32 axis = 2 [default = 1];
-
-  // DEPRECATED: alias for "axis" -- does not support negative indexing.
-  optional uint32 concat_dim = 1 [default = 1];
-}
-
-message BatchNormParameter {
-  // If false, accumulate global mean/variance values via a moving average. If
-  // true, use those accumulated values instead of computing mean/variance
-  // across the batch.
-  optional bool use_global_stats = 1;
-  // How much does the moving average decay each iteration?
-  optional float moving_average_fraction = 2 [default = .999];
-  // Small value to add to the variance estimate so that we don't divide by
-  // zero.
-  optional float eps = 3 [default = 1e-5];
-}
-
-message BiasParameter {
-  // The first axis of bottom[0] (the first input Blob) along which to apply
-  // bottom[1] (the second input Blob).  May be negative to index from the end
-  // (e.g., -1 for the last axis).
-  //
-  // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
-  // top[0] will have the same shape, and bottom[1] may have any of the
-  // following shapes (for the given value of axis):
-  //    (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
-  //    (axis == 1 == -3)          3;     3x40;     3x40x60
-  //    (axis == 2 == -2)                   40;       40x60
-  //    (axis == 3 == -1)                                60
-  // Furthermore, bottom[1] may have the empty shape (regardless of the value of
-  // "axis") -- a scalar bias.
-  optional int32 axis = 1 [default = 1];
-
-  // (num_axes is ignored unless just one bottom is given and the bias is
-  // a learned parameter of the layer.  Otherwise, num_axes is determined by the
-  // number of axes by the second bottom.)
-  // The number of axes of the input (bottom[0]) covered by the bias
-  // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
-  // Set num_axes := 0, to add a zero-axis Blob: a scalar.
-  optional int32 num_axes = 2 [default = 1];
-
-  // (filler is ignored unless just one bottom is given and the bias is
-  // a learned parameter of the layer.)
-  // The initialization for the learned bias parameter.
-  // Default is the zero (0) initialization, resulting in the BiasLayer
-  // initially performing the identity operation.
-  optional FillerParameter filler = 3;
-}
-
-message ContrastiveLossParameter {
-  // margin for dissimilar pair
-  optional float margin = 1 [default = 1.0];
-  // The first implementation of this cost did not exactly match the cost of
-  // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2.
-  // legacy_version = false (the default) uses (margin - d)^2 as proposed in the
-  // Hadsell paper. New models should probably use this version.
-  // legacy_version = true uses (margin - d^2). This is kept to support /
-  // reproduce existing models and results
-  optional bool legacy_version = 2 [default = false];
-}
-
-message ConvolutionParameter {
-  optional uint32 num_output = 1; // The number of outputs for the layer
-  optional bool bias_term = 2 [default = true]; // whether to have bias terms
-
-  // Pad, kernel size, and stride are all given as a single value for equal
-  // dimensions in all spatial dimensions, or once per spatial dimension.
-  repeated uint32 pad = 3; // The padding size; defaults to 0
-  repeated uint32 kernel_size = 4; // The kernel size
-  repeated uint32 stride = 6; // The stride; defaults to 1
-  // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
-  // holes. (Kernel dilation is sometimes referred to by its use in the
-  // algorithme à trous from Holschneider et al. 1987.)
-  repeated uint32 dilation = 18; // The dilation; defaults to 1
-
-  // For 2D convolution only, the *_h and *_w versions may also be used to
-  // specify both spatial dimensions.
-  optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)
-  optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)
-  optional uint32 kernel_h = 11; // The kernel height (2D only)
-  optional uint32 kernel_w = 12; // The kernel width (2D only)
-  optional uint32 stride_h = 13; // The stride height (2D only)
-  optional uint32 stride_w = 14; // The stride width (2D only)
-
-  optional uint32 group = 5 [default = 1]; // The group size for group conv
-
-  optional FillerParameter weight_filler = 7; // The filler for the weight
-  optional FillerParameter bias_filler = 8; // The filler for the bias
-  enum Engine {
-    DEFAULT = 0;
-    CAFFE = 1;
-    CUDNN = 2;
-  }
-  optional Engine engine = 15 [default = DEFAULT];
-
-  // The axis to interpret as "channels" when performing convolution.
-  // Preceding dimensions are treated as independent inputs;
-  // succeeding dimensions are treated as "spatial".
-  // With (N, C, H, W) inputs, and axis == 1 (the default), we perform
-  // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for
-  // groups g>1) filters across the spatial axes (H, W) of the input.
-  // With (N, C, D, H, W) inputs, and axis == 1, we perform
-  // N independent 3D convolutions, sliding (C/g)-channels
-  // filters across the spatial axes (D, H, W) of the input.
-  optional int32 axis = 16 [default = 1];
-
-  // Whether to force use of the general ND convolution, even if a specific
-  // implementation for blobs of the appropriate number of spatial dimensions
-  // is available. (Currently, there is only a 2D-specific convolution
-  // implementation; for input blobs with num_axes != 2, this option is
-  // ignored and the ND implementation will be used.)
-  optional bool force_nd_im2col = 17 [default = false];
-}
-
-message CropParameter {
-  // To crop, elements of the first bottom are selected to fit the dimensions
-  // of the second, reference bottom. The crop is configured by
-  // - the crop `axis` to pick the dimensions for cropping
-  // - the crop `offset` to set the shift for all/each dimension
-  // to align the cropped bottom with the reference bottom.
-  // All dimensions up to but excluding `axis` are preserved, while
-  // the dimensions including and trailing `axis` are cropped.
-  // If only one `offset` is set, then all dimensions are offset by this amount.
-  // Otherwise, the number of offsets must equal the number of cropped axes to
-  // shift the crop in each dimension accordingly.
-  // Note: standard dimensions are N,C,H,W so the default is a spatial crop,
-  // and `axis` may be negative to index from the end (e.g., -1 for the last
-  // axis).
-  optional int32 axis = 1 [default = 2];
-  repeated uint32 offset = 2;
-}
-
-message DataParameter {
-  enum DB {
-    LEVELDB = 0;
-    LMDB = 1;
-  }
-  // Specify the data source.
-  optional string source = 1;
-  // Specify the batch size.
-  optional uint32 batch_size = 4;
-  // The rand_skip variable is for the data layer to skip a few data points
-  // to avoid all asynchronous sgd clients to start at the same point. The skip
-  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
-  // be larger than the number of keys in the database.
-  // DEPRECATED. Each solver accesses a different subset of the database.
-  optional uint32 rand_skip = 7 [default = 0];
-  optional DB backend = 8 [default = LEVELDB];
-  // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
-  // simple scaling and subtracting the data mean, if provided. Note that the
-  // mean subtraction is always carried out before scaling.
-  optional float scale = 2 [default = 1];
-  optional string mean_file = 3;
-  // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
-  // crop an image.
-  optional uint32 crop_size = 5 [default = 0];
-  // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
-  // data.
-  optional bool mirror = 6 [default = false];
-  // Force the encoded image to have 3 color channels
-  optional bool force_encoded_color = 9 [default = false];
-  // Prefetch queue (Increase if data feeding bandwidth varies, within the
-  // limit of device memory for GPU training)
-  optional uint32 prefetch = 10 [default = 4];
-}
-
-message DropoutParameter {
-  optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
-}
-
-// DummyDataLayer fills any number of arbitrarily shaped blobs with random
-// (or constant) data generated by "Fillers" (see "message FillerParameter").
-message DummyDataParameter {
-  // This layer produces N >= 1 top blobs.  DummyDataParameter must specify 1 or N
-  // shape fields, and 0, 1 or N data_fillers.
-  //
-  // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used.
-  // If 1 data_filler is specified, it is applied to all top blobs.  If N are
-  // specified, the ith is applied to the ith top blob.
-  repeated FillerParameter data_filler = 1;
-  repeated BlobShape shape = 6;
-
-  // 4D dimensions -- deprecated.  Use "shape" instead.
-  repeated uint32 num = 2;
-  repeated uint32 channels = 3;
-  repeated uint32 height = 4;
-  repeated uint32 width = 5;
-}
-
-message EltwiseParameter {
-  enum EltwiseOp {
-    PROD = 0;
-    SUM = 1;
-    MAX = 2;
-  }
-  optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
-  repeated float coeff = 2; // blob-wise coefficient for SUM operation
-
-  // Whether to use an asymptotically slower (for >2 inputs) but stabler method
-  // of computing the gradient for the PROD operation. (No effect for SUM op.)
-  optional bool stable_prod_grad = 3 [default = true];
-}
-
-// Message that stores parameters used by ELULayer
-message ELUParameter {
-  // Described in:
-  // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate
-  // Deep Network Learning by Exponential Linear Units (ELUs). arXiv
-  optional float alpha = 1 [default = 1];
-}
-
-// Message that stores parameters used by EmbedLayer
-message EmbedParameter {
-  optional uint32 num_output = 1; // The number of outputs for the layer
-  // The input is given as integers to be interpreted as one-hot
-  // vector indices with dimension num_input.  Hence num_input should be
-  // 1 greater than the maximum possible input value.
-  optional uint32 input_dim = 2;
-
-  optional bool bias_term = 3 [default = true]; // Whether to use a bias term
-  optional FillerParameter weight_filler = 4; // The filler for the weight
-  optional FillerParameter bias_filler = 5; // The filler for the bias
-
-}
-
-// Message that stores parameters used by ExpLayer
-message ExpParameter {
-  // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0.
-  // Or if base is set to the default (-1), base is set to e,
-  // so y = exp(shift + scale * x).
-  optional float base = 1 [default = -1.0];
-  optional float scale = 2 [default = 1.0];
-  optional float shift = 3 [default = 0.0];
-}
-
-/// Message that stores parameters used by FlattenLayer
-message FlattenParameter {
-  // The first axis to flatten: all preceding axes are retained in the output.
-  // May be negative to index from the end (e.g., -1 for the last axis).
-  optional int32 axis = 1 [default = 1];
-
-  // The last axis to flatten: all following axes are retained in the output.
-  // May be negative to index from the end (e.g., the default -1 for the last
-  // axis).
-  optional int32 end_axis = 2 [default = -1];
-}
-
-// Message that stores parameters used by HDF5DataLayer
-message HDF5DataParameter {
-  // Specify the data source.
-  optional string source = 1;
-  // Specify the batch size.
-  optional uint32 batch_size = 2;
-
-  // Specify whether to shuffle the data.
-  // If shuffle == true, the ordering of the HDF5 files is shuffled,
-  // and the ordering of data within any given HDF5 file is shuffled,
-  // but data between different files are not interleaved; all of a file's
-  // data are output (in a random order) before moving onto another file.
-  optional bool shuffle = 3 [default = false];
-}
-
-message HDF5OutputParameter {
-  optional string file_name = 1;
-}
-
-message HingeLossParameter {
-  enum Norm {
-    L1 = 1;
-    L2 = 2;
-  }
-  // Specify the Norm to use L1 or L2
-  optional Norm norm = 1 [default = L1];
-}
-
-message ImageDataParameter {
-  // Specify the data source.
-  optional string source = 1;
-  // Specify the batch size.
-  optional uint32 batch_size = 4 [default = 1];
-  // The rand_skip variable is for the data layer to skip a few data points
-  // to avoid all asynchronous sgd clients to start at the same point. The skip
-  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
-  // be larger than the number of keys in the database.
-  optional uint32 rand_skip = 7 [default = 0];
-  // Whether or not ImageLayer should shuffle the list of files at every epoch.
-  optional bool shuffle = 8 [default = false];
-  // It will also resize images if new_height or new_width are not zero.
-  optional uint32 new_height = 9 [default = 0];
-  optional uint32 new_width = 10 [default = 0];
-  // Specify if the images are color or gray
-  optional bool is_color = 11 [default = true];
-  // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
-  // simple scaling and subtracting the data mean, if provided. Note that the
-  // mean subtraction is always carried out before scaling.
-  optional float scale = 2 [default = 1];
-  optional string mean_file = 3;
-  // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
-  // crop an image.
-  optional uint32 crop_size = 5 [default = 0];
-  // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
-  // data.
-  optional bool mirror = 6 [default = false];
-  optional string root_folder = 12 [default = ""];
-}
-
-message InfogainLossParameter {
-  // Specify the infogain matrix source.
-  optional string source = 1;
-}
-
-message InnerProductParameter {
-  optional uint32 num_output = 1; // The number of outputs for the layer
-  optional bool bias_term = 2 [default = true]; // whether to have bias terms
-  optional FillerParameter weight_filler = 3; // The filler for the weight
-  optional FillerParameter bias_filler = 4; // The filler for the bias
-
-  // The first axis to be lumped into a single inner product computation;
-  // all preceding axes are retained in the output.
-  // May be negative to index from the end (e.g., -1 for the last axis).
-  optional int32 axis = 5 [default = 1];
-  // Specify whether to transpose the weight matrix or not.
-  // If transpose == true, any operations will be performed on the transpose
-  // of the weight matrix. The weight matrix itself is not going to be transposed
-  // but rather the transfer flag of operations will be toggled accordingly.
-  optional bool transpose = 6 [default = false];
-}
-
-message InputParameter {
-  // This layer produces N >= 1 top blob(s) to be assigned manually.
-  // Define N shapes to set a shape for each top.
-  // Define 1 shape to set the same shape for every top.
-  // Define no shape to defer to reshaping manually.
-  repeated BlobShape shape = 1;
-}
-
-// Message that stores parameters used by LogLayer
-message LogParameter {
-  // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0.
-  // Or if base is set to the default (-1), base is set to e,
-  // so y = ln(shift + scale * x) = log_e(shift + scale * x)
-  optional float base = 1 [default = -1.0];
-  optional float scale = 2 [default = 1.0];
-  optional float shift = 3 [default = 0.0];
-}
-
-// Message that stores parameters used by LRNLayer
-message LRNParameter {
-  optional uint32 local_size = 1 [default = 5];
-  optional float alpha = 2 [default = 1.];
-  optional float beta = 3 [default = 0.75];
-  enum NormRegion {
-    ACROSS_CHANNELS = 0;
-    WITHIN_CHANNEL = 1;
-  }
-  optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
-  optional float k = 5 [default = 1.];
-  enum Engine {
-    DEFAULT = 0;
-    CAFFE = 1;
-    CUDNN = 2;
-  }
-  optional Engine engine = 6 [default = DEFAULT];
-}
-
-message MemoryDataParameter {
-  optional uint32 batch_size = 1;
-  optional uint32 channels = 2;
-  optional uint32 height = 3;
-  optional uint32 width = 4;
-}
-
-message MVNParameter {
-  // This parameter can be set to false to normalize mean only
-  optional bool normalize_variance = 1 [default = true];
-
-  // This parameter can be set to true to perform DNN-like MVN
-  optional bool across_channels = 2 [default = false];
-
-  // Epsilon for not dividing by zero while normalizing variance
-  optional float eps = 3 [default = 1e-9];
-}
-
-message ParameterParameter {
-  optional BlobShape shape = 1;
-}
-
-message PoolingParameter {
-  enum PoolMethod {
-    MAX = 0;
-    AVE = 1;
-    STOCHASTIC = 2;
-  }
-  optional PoolMethod pool = 1 [default = MAX]; // The pooling method
-  // Pad, kernel size, and stride are all given as a single value for equal
-  // dimensions in height and width or as Y, X pairs.
-  optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)
-  optional uint32 pad_h = 9 [default = 0]; // The padding height
-  optional uint32 pad_w = 10 [default = 0]; // The padding width
-  optional uint32 kernel_size = 2; // The kernel size (square)
-  optional uint32 kernel_h = 5; // The kernel height
-  optional uint32 kernel_w = 6; // The kernel width
-  optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
-  optional uint32 stride_h = 7; // The stride height
-  optional uint32 stride_w = 8; // The stride width
-  enum Engine {
-    DEFAULT = 0;
-    CAFFE = 1;
-    CUDNN = 2;
-  }
-  optional Engine engine = 11 [default = DEFAULT];
-  // If global_pooling then it will pool over the size of the bottom by doing
-  // kernel_h = bottom->height and kernel_w = bottom->width
-  optional bool global_pooling = 12 [default = false];
-}
-
-message PowerParameter {
-  // PowerLayer computes outputs y = (shift + scale * x) ^ power.
-  optional float power = 1 [default = 1.0];
-  optional float scale = 2 [default = 1.0];
-  optional float shift = 3 [default = 0.0];
-}
-
-message PythonParameter {
-  optional string module = 1;
-  optional string layer = 2;
-  // This value is set to the attribute `param_str` of the `PythonLayer` object
-  // in Python before calling the `setup()` method. This could be a number,
-  // string, dictionary in Python dict format, JSON, etc. You may parse this
-  // string in `setup` method and use it in `forward` and `backward`.
-  optional string param_str = 3 [default = ''];
-  // Whether this PythonLayer is shared among worker solvers during data parallelism.
-  // If true, each worker solver sequentially run forward from this layer.
-  // This value should be set true if you are using it as a data layer.
-  optional bool share_in_parallel = 4 [default = false];
-}
-
-// Message that stores parameters used by RecurrentLayer
-message RecurrentParameter {
-  // The dimension of the output (and usually hidden state) representation --
-  // must be explicitly set to non-zero.
-  optional uint32 num_output = 1 [default = 0];
-
-  optional FillerParameter weight_filler = 2; // The filler for the weight
-  optional FillerParameter bias_filler = 3; // The filler for the bias
-
-  // Whether to enable displaying debug_info in the unrolled recurrent net.
-  optional bool debug_info = 4 [default = false];
-
-  // Whether to add as additional inputs (bottoms) the initial hidden state
-  // blobs, and add as additional outputs (tops) the final timestep hidden state
-  // blobs.  The number of additional bottom/top blobs required depends on the
-  // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs.
-  optional bool expose_hidden = 5 [default = false];
-}
-
-// Message that stores parameters used by ReductionLayer
-message ReductionParameter {
-  enum ReductionOp {
-    SUM = 1;
-    ASUM = 2;
-    SUMSQ = 3;
-    MEAN = 4;
-  }
-
-  optional ReductionOp operation = 1 [default = SUM]; // reduction operation
-
-  // The first axis to reduce to a scalar -- may be negative to index from the
-  // end (e.g., -1 for the last axis).
-  // (Currently, only reduction along ALL "tail" axes is supported; reduction
-  // of axis M through N, where N < num_axes - 1, is unsupported.)
-  // Suppose we have an n-axis bottom Blob with shape:
-  //     (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)).
-  // If axis == m, the output Blob will have shape
-  //     (d0, d1, d2, ..., d(m-1)),
-  // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1))
-  // times, each including (dm * d(m+1) * ... * d(n-1)) individual data.
-  // If axis == 0 (the default), the output Blob always has the empty shape
-  // (count 1), performing reduction across the entire input --
-  // often useful for creating new loss functions.
-  optional int32 axis = 2 [default = 0];
-
-  optional float coeff = 3 [default = 1.0]; // coefficient for output
-}
-
-// Message that stores parameters used by ReLULayer
-message ReLUParameter {
-  // Allow non-zero slope for negative inputs to speed up optimization
-  // Described in:
-  // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities
-  // improve neural network acoustic models. In ICML Workshop on Deep Learning
-  // for Audio, Speech, and Language Processing.
-  optional float negative_slope = 1 [default = 0];
-  enum Engine {
-    DEFAULT = 0;
-    CAFFE = 1;
-    CUDNN = 2;
-  }
-  optional Engine engine = 2 [default = DEFAULT];
-}
-
-message ReshapeParameter {
-  // Specify the output dimensions. If some of the dimensions are set to 0,
-  // the corresponding dimension from the bottom layer is used (unchanged).
-  // Exactly one dimension may be set to -1, in which case its value is
-  // inferred from the count of the bottom blob and the remaining dimensions.
-  // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8:
-  //
-  //   layer {
-  //     type: "Reshape" bottom: "input" top: "output"
-  //     reshape_param { ... }
-  //   }
-  //
-  // If "input" is 2D with shape 2 x 8, then the following reshape_param
-  // specifications are all equivalent, producing a 3D blob "output" with shape
-  // 2 x 2 x 4:
-  //
-  //   reshape_param { shape { dim:  2  dim: 2  dim:  4 } }
-  //   reshape_param { shape { dim:  0  dim: 2  dim:  4 } }
-  //   reshape_param { shape { dim:  0  dim: 2  dim: -1 } }
-  //   reshape_param { shape { dim:  0  dim:-1  dim:  4 } }
-  //
-  optional BlobShape shape = 1;
-
-  // axis and num_axes control the portion of the bottom blob's shape that are
-  // replaced by (included in) the reshape. By default (axis == 0 and
-  // num_axes == -1), the entire bottom blob shape is included in the reshape,
-  // and hence the shape field must specify the entire output shape.
-  //
-  // axis may be non-zero to retain some portion of the beginning of the input
-  // shape (and may be negative to index from the end; e.g., -1 to begin the
-  // reshape after the last axis, including nothing in the reshape,
-  // -2 to include only the last axis, etc.).
-  //
-  // For example, suppose "input" is a 2D blob with shape 2 x 8.
-  // Then the following ReshapeLayer specifications are all equivalent,
-  // producing a blob "output" with shape 2 x 2 x 4:
-  //
-  //   reshape_param { shape { dim: 2  dim: 2  dim: 4 } }
-  //   reshape_param { shape { dim: 2  dim: 4 } axis:  1 }
-  //   reshape_param { shape { dim: 2  dim: 4 } axis: -3 }
-  //
-  // num_axes specifies the extent of the reshape.
-  // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on
-  // input axes in the range [axis, axis+num_axes].
-  // num_axes may also be -1, the default, to include all remaining axes
-  // (starting from axis).
-  //
-  // For example, suppose "input" is a 2D blob with shape 2 x 8.
-  // Then the following ReshapeLayer specifications are equivalent,
-  // producing a blob "output" with shape 1 x 2 x 8.
-  //
-  //   reshape_param { shape { dim:  1  dim: 2  dim:  8 } }
-  //   reshape_param { shape { dim:  1  dim: 2  }  num_axes: 1 }
-  //   reshape_param { shape { dim:  1  }  num_axes: 0 }
-  //
-  // On the other hand, these would produce output blob shape 2 x 1 x 8:
-  //
-  //   reshape_param { shape { dim: 2  dim: 1  dim: 8  }  }
-  //   reshape_param { shape { dim: 1 }  axis: 1  num_axes: 0 }
-  //
-  optional int32 axis = 2 [default = 0];
-  optional int32 num_axes = 3 [default = -1];
-}
-
-message ScaleParameter {
-  // The first axis of bottom[0] (the first input Blob) along which to apply
-  // bottom[1] (the second input Blob).  May be negative to index from the end
-  // (e.g., -1 for the last axis).
-  //
-  // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
-  // top[0] will have the same shape, and bottom[1] may have any of the
-  // following shapes (for the given value of axis):
-  //    (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
-  //    (axis == 1 == -3)          3;     3x40;     3x40x60
-  //    (axis == 2 == -2)                   40;       40x60
-  //    (axis == 3 == -1)                                60
-  // Furthermore, bottom[1] may have the empty shape (regardless of the value of
-  // "axis") -- a scalar multiplier.
-  optional int32 axis = 1 [default = 1];
-
-  // (num_axes is ignored unless just one bottom is given and the scale is
-  // a learned parameter of the layer.  Otherwise, num_axes is determined by the
-  // number of axes by the second bottom.)
-  // The number of axes of the input (bottom[0]) covered by the scale
-  // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
-  // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar.
-  optional int32 num_axes = 2 [default = 1];
-
-  // (filler is ignored unless just one bottom is given and the scale is
-  // a learned parameter of the layer.)
-  // The initialization for the learned scale parameter.
-  // Default is the unit (1) initialization, resulting in the ScaleLayer
-  // initially performing the identity operation.
-  optional FillerParameter filler = 3;
-
-  // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but
-  // may be more efficient).  Initialized with bias_filler (defaults to 0).
-  optional bool bias_term = 4 [default = false];
-  optional FillerParameter bias_filler = 5;
-}
-
-message SigmoidParameter {
-  enum Engine {
-    DEFAULT = 0;
-    CAFFE = 1;
-    CUDNN = 2;
-  }
-  optional Engine engine = 1 [default = DEFAULT];
-}
-
-message SliceParameter {
-  // The axis along which to slice -- may be negative to index from the end
-  // (e.g., -1 for the last axis).
-  // By default, SliceLayer concatenates blobs along the "channels" axis (1).
-  optional int32 axis = 3 [default = 1];
-  repeated uint32 slice_point = 2;
-
-  // DEPRECATED: alias for "axis" -- does not support negative indexing.
-  optional uint32 slice_dim = 1 [default = 1];
-}
-
-// Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer
-message SoftmaxParameter {
-  enum Engine {
-    DEFAULT = 0;
-    CAFFE = 1;
-    CUDNN = 2;
-  }
-  optional Engine engine = 1 [default = DEFAULT];
-
-  // The axis along which to perform the softmax -- may be negative to index
-  // from the end (e.g., -1 for the last axis).
-  // Any other axes will be evaluated as independent softmaxes.
-  optional int32 axis = 2 [default = 1];
-}
-
-message TanHParameter {
-  enum Engine {
-    DEFAULT = 0;
-    CAFFE = 1;
-    CUDNN = 2;
-  }
-  optional Engine engine = 1 [default = DEFAULT];
-}
-
-// Message that stores parameters used by TileLayer
-message TileParameter {
-  // The index of the axis to tile.
-  optional int32 axis = 1 [default = 1];
-
-  // The number of copies (tiles) of the blob to output.
-  optional int32 tiles = 2;
-}
-
-// Message that stores parameters used by ThresholdLayer
-message ThresholdParameter {
-  optional float threshold = 1 [default = 0]; // Strictly positive values
-}
-
-message WindowDataParameter {
-  // Specify the data source.
-  optional string source = 1;
-  // For data pre-processing, we can do simple scaling and subtracting the
-  // data mean, if provided. Note that the mean subtraction is always carried
-  // out before scaling.
-  optional float scale = 2 [default = 1];
-  optional string mean_file = 3;
-  // Specify the batch size.
-  optional uint32 batch_size = 4;
-  // Specify if we would like to randomly crop an image.
-  optional uint32 crop_size = 5 [default = 0];
-  // Specify if we want to randomly mirror data.
-  optional bool mirror = 6 [default = false];
-  // Foreground (object) overlap threshold
-  optional float fg_threshold = 7 [default = 0.5];
-  // Background (non-object) overlap threshold
-  optional float bg_threshold = 8 [default = 0.5];
-  // Fraction of batch that should be foreground objects
-  optional float fg_fraction = 9 [default = 0.25];
-  // Amount of contextual padding to add around a window
-  // (used only by the window_data_layer)
-  optional uint32 context_pad = 10 [default = 0];
-  // Mode for cropping out a detection window
-  // warp: cropped window is warped to a fixed size and aspect ratio
-  // square: the tightest square around the window is cropped
-  optional string crop_mode = 11 [default = "warp"];
-  // cache_images: will load all images in memory for faster access
-  optional bool cache_images = 12 [default = false];
-  // append root_folder to locate images
-  optional string root_folder = 13 [default = ""];
-}
-
-message SPPParameter {
-  enum PoolMethod {
-    MAX = 0;
-    AVE = 1;
-    STOCHASTIC = 2;
-  }
-  optional uint32 pyramid_height = 1;
-  optional PoolMethod pool = 2 [default = MAX]; // The pooling method
-  enum Engine {
-    DEFAULT = 0;
-    CAFFE = 1;
-    CUDNN = 2;
-  }
-  optional Engine engine = 6 [default = DEFAULT];
-}
-
-// DEPRECATED: use LayerParameter.
-message V1LayerParameter {
-  repeated string bottom = 2;
-  repeated string top = 3;
-  optional string name = 4;
-  repeated NetStateRule include = 32;
-  repeated NetStateRule exclude = 33;
-  enum LayerType {
-    NONE = 0;
-    ABSVAL = 35;
-    ACCURACY = 1;
-    ARGMAX = 30;
-    BNLL = 2;
-    CONCAT = 3;
-    CONTRASTIVE_LOSS = 37;
-    CONVOLUTION = 4;
-    DATA = 5;
-    DECONVOLUTION = 39;
-    DROPOUT = 6;
-    DUMMY_DATA = 32;
-    EUCLIDEAN_LOSS = 7;
-    ELTWISE = 25;
-    EXP = 38;
-    FLATTEN = 8;
-    HDF5_DATA = 9;
-    HDF5_OUTPUT = 10;
-    HINGE_LOSS = 28;
-    IM2COL = 11;
-    IMAGE_DATA = 12;
-    INFOGAIN_LOSS = 13;
-    INNER_PRODUCT = 14;
-    LRN = 15;
-    MEMORY_DATA = 29;
-    MULTINOMIAL_LOGISTIC_LOSS = 16;
-    MVN = 34;
-    POOLING = 17;
-    POWER = 26;
-    RELU = 18;
-    SIGMOID = 19;
-    SIGMOID_CROSS_ENTROPY_LOSS = 27;
-    SILENCE = 36;
-    SOFTMAX = 20;
-    SOFTMAX_LOSS = 21;
-    SPLIT = 22;
-    SLICE = 33;
-    TANH = 23;
-    WINDOW_DATA = 24;
-    THRESHOLD = 31;
-  }
-  optional LayerType type = 5;
-  repeated BlobProto blobs = 6;
-  repeated string param = 1001;
-  repeated DimCheckMode blob_share_mode = 1002;
-  enum DimCheckMode {
-    STRICT = 0;
-    PERMISSIVE = 1;
-  }
-  repeated float blobs_lr = 7;
-  repeated float weight_decay = 8;
-  repeated float loss_weight = 35;
-  optional AccuracyParameter accuracy_param = 27;
-  optional ArgMaxParameter argmax_param = 23;
-  optional ConcatParameter concat_param = 9;
-  optional ContrastiveLossParameter contrastive_loss_param = 40;
-  optional ConvolutionParameter convolution_param = 10;
-  optional DataParameter data_param = 11;
-  optional DropoutParameter dropout_param = 12;
-  optional DummyDataParameter dummy_data_param = 26;
-  optional EltwiseParameter eltwise_param = 24;
-  optional ExpParameter exp_param = 41;
-  optional HDF5DataParameter hdf5_data_param = 13;
-  optional HDF5OutputParameter hdf5_output_param = 14;
-  optional HingeLossParameter hinge_loss_param = 29;
-  optional ImageDataParameter image_data_param = 15;
-  optional InfogainLossParameter infogain_loss_param = 16;
-  optional InnerProductParameter inner_product_param = 17;
-  optional LRNParameter lrn_param = 18;
-  optional MemoryDataParameter memory_data_param = 22;
-  optional MVNParameter mvn_param = 34;
-  optional PoolingParameter pooling_param = 19;
-  optional PowerParameter power_param = 21;
-  optional ReLUParameter relu_param = 30;
-  optional SigmoidParameter sigmoid_param = 38;
-  optional SoftmaxParameter softmax_param = 39;
-  optional SliceParameter slice_param = 31;
-  optional TanHParameter tanh_param = 37;
-  optional ThresholdParameter threshold_param = 25;
-  optional WindowDataParameter window_data_param = 20;
-  optional TransformationParameter transform_param = 36;
-  optional LossParameter loss_param = 42;
-  optional V0LayerParameter layer = 1;
-}
-
-// DEPRECATED: V0LayerParameter is the old way of specifying layer parameters
-// in Caffe.  We keep this message type around for legacy support.
-message V0LayerParameter {
-  optional string name = 1; // the layer name
-  optional string type = 2; // the string to specify the layer type
-
-  // Parameters to specify layers with inner products.
-  optional uint32 num_output = 3; // The number of outputs for the layer
-  optional bool biasterm = 4 [default = true]; // whether to have bias terms
-  optional FillerParameter weight_filler = 5; // The filler for the weight
-  optional FillerParameter bias_filler = 6; // The filler for the bias
-
-  optional uint32 pad = 7 [default = 0]; // The padding size
-  optional uint32 kernelsize = 8; // The kernel size
-  optional uint32 group = 9 [default = 1]; // The group size for group conv
-  optional uint32 stride = 10 [default = 1]; // The stride
-  enum PoolMethod {
-    MAX = 0;
-    AVE = 1;
-    STOCHASTIC = 2;
-  }
-  optional PoolMethod pool = 11 [default = MAX]; // The pooling method
-  optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio
-
-  optional uint32 local_size = 13 [default = 5]; // for local response norm
-  optional float alpha = 14 [default = 1.]; // for local response norm
-  optional float beta = 15 [default = 0.75]; // for local response norm
-  optional float k = 22 [default = 1.];
-
-  // For data layers, specify the data source
-  optional string source = 16;
-  // For data pre-processing, we can do simple scaling and subtracting the
-  // data mean, if provided. Note that the mean subtraction is always carried
-  // out before scaling.
-  optional float scale = 17 [default = 1];
-  optional string meanfile = 18;
-  // For data layers, specify the batch size.
-  optional uint32 batchsize = 19;
-  // For data layers, specify if we would like to randomly crop an image.
-  optional uint32 cropsize = 20 [default = 0];
-  // For data layers, specify if we want to randomly mirror data.
-  optional bool mirror = 21 [default = false];
-
-  // The blobs containing the numeric parameters of the layer
-  repeated BlobProto blobs = 50;
-  // The ratio that is multiplied on the global learning rate. If you want to
-  // set the learning ratio for one blob, you need to set it for all blobs.
-  repeated float blobs_lr = 51;
-  // The weight decay that is multiplied on the global weight decay.
-  repeated float weight_decay = 52;
-
-  // The rand_skip variable is for the data layer to skip a few data points
-  // to avoid all asynchronous sgd clients to start at the same point. The skip
-  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
-  // be larger than the number of keys in the database.
-  optional uint32 rand_skip = 53 [default = 0];
-
-  // Fields related to detection (det_*)
-  // foreground (object) overlap threshold
-  optional float det_fg_threshold = 54 [default = 0.5];
-  // background (non-object) overlap threshold
-  optional float det_bg_threshold = 55 [default = 0.5];
-  // Fraction of batch that should be foreground objects
-  optional float det_fg_fraction = 56 [default = 0.25];
-
-  // optional bool OBSOLETE_can_clobber = 57 [default = true];
-
-  // Amount of contextual padding to add around a window
-  // (used only by the window_data_layer)
-  optional uint32 det_context_pad = 58 [default = 0];
-
-  // Mode for cropping out a detection window
-  // warp: cropped window is warped to a fixed size and aspect ratio
-  // square: the tightest square around the window is cropped
-  optional string det_crop_mode = 59 [default = "warp"];
-
-  // For ReshapeLayer, one needs to specify the new dimensions.
-  optional int32 new_num = 60 [default = 0];
-  optional int32 new_channels = 61 [default = 0];
-  optional int32 new_height = 62 [default = 0];
-  optional int32 new_width = 63 [default = 0];
-
-  // Whether or not ImageLayer should shuffle the list of files at every epoch.
-  // It will also resize images if new_height or new_width are not zero.
-  optional bool shuffle_images = 64 [default = false];
-
-  // For ConcatLayer, one needs to specify the dimension for concatenation, and
-  // the other dimensions must be the same for all the bottom blobs.
-  // By default it will concatenate blobs along the channels dimension.
-  optional uint32 concat_dim = 65 [default = 1];
-
-  optional HDF5OutputParameter hdf5_output_param = 1001;
-}
-
-message PReLUParameter {
-  // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers:
-  // Surpassing Human-Level Performance on ImageNet Classification, 2015.
-
-  // Initial value of a_i. Default is a_i=0.25 for all i.
-  optional FillerParameter filler = 1;
-  // Whether or not slope parameters are shared across channels.
-  optional bool channel_shared = 2 [default = false];
-}
diff --git a/src/caffe/syncedmem.cpp b/src/caffe/syncedmem.cpp
deleted file mode 100644
index 88d9b78..0000000
--- a/src/caffe/syncedmem.cpp
+++ /dev/null
@@ -1,186 +0,0 @@
-#include "caffe/common.hpp"
-#include "caffe/syncedmem.hpp"
-#include "caffe/util/math_functions.hpp"
-
-namespace caffe {
-SyncedMemory::SyncedMemory()
-  : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED),
-    own_cpu_data_(false), cpu_malloc_use_cuda_(false), own_gpu_data_(false) {
-#ifndef CPU_ONLY
-#ifdef DEBUG
-  CUDA_CHECK(cudaGetDevice(&device_));
-#endif
-#endif
-}
-
-SyncedMemory::SyncedMemory(size_t size)
-  : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED),
-    own_cpu_data_(false), cpu_malloc_use_cuda_(false), own_gpu_data_(false) {
-#ifndef CPU_ONLY
-#ifdef DEBUG
-  CUDA_CHECK(cudaGetDevice(&device_));
-#endif
-#endif
-}
-
-SyncedMemory::~SyncedMemory() {
-  check_device();
-  if (cpu_ptr_ && own_cpu_data_) {
-    CaffeFreeHost(cpu_ptr_, cpu_malloc_use_cuda_);
-  }
-
-#ifndef CPU_ONLY
-  if (gpu_ptr_ && own_gpu_data_) {
-    CUDA_CHECK(cudaFree(gpu_ptr_));
-  }
-#endif  // CPU_ONLY
-}
-
-inline void SyncedMemory::to_cpu() {
-  check_device();
-  switch (head_) {
-  case UNINITIALIZED:
-    CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_);
-    caffe_memset(size_, 0, cpu_ptr_);
-    head_ = HEAD_AT_CPU;
-    own_cpu_data_ = true;
-    break;
-  case HEAD_AT_GPU:
-#ifndef CPU_ONLY
-    if (cpu_ptr_ == NULL) {
-      CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_);
-      own_cpu_data_ = true;
-    }
-    caffe_gpu_memcpy(size_, gpu_ptr_, cpu_ptr_);
-    head_ = SYNCED;
-#else
-    NO_GPU;
-#endif
-    break;
-  case HEAD_AT_CPU:
-  case SYNCED:
-    break;
-  }
-}
-
-inline void SyncedMemory::to_gpu() {
-  check_device();
-#ifndef CPU_ONLY
-  switch (head_) {
-  case UNINITIALIZED:
-    CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
-    caffe_gpu_memset(size_, 0, gpu_ptr_);
-    head_ = HEAD_AT_GPU;
-    own_gpu_data_ = true;
-    break;
-  case HEAD_AT_CPU:
-    if (gpu_ptr_ == NULL) {
-      CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
-      own_gpu_data_ = true;
-    }
-    caffe_gpu_memcpy(size_, cpu_ptr_, gpu_ptr_);
-    head_ = SYNCED;
-    break;
-  case HEAD_AT_GPU:
-  case SYNCED:
-    break;
-  }
-#else
-  NO_GPU;
-#endif
-}
-
-const void* SyncedMemory::cpu_data() {
-  check_device();
-  to_cpu();
-  return (const void*)cpu_ptr_;
-}
-
-void SyncedMemory::set_cpu_data(void* data) {
-  check_device();
-  CHECK(data);
-  if (own_cpu_data_) {
-    CaffeFreeHost(cpu_ptr_, cpu_malloc_use_cuda_);
-  }
-  cpu_ptr_ = data;
-  head_ = HEAD_AT_CPU;
-  own_cpu_data_ = false;
-}
-
-const void* SyncedMemory::gpu_data() {
-  check_device();
-#ifndef CPU_ONLY
-  to_gpu();
-  return (const void*)gpu_ptr_;
-#else
-  NO_GPU;
-  return NULL;
-#endif
-}
-
-void SyncedMemory::set_gpu_data(void* data) {
-  check_device();
-#ifndef CPU_ONLY
-  CHECK(data);
-  if (own_gpu_data_) {
-    CUDA_CHECK(cudaFree(gpu_ptr_));
-  }
-  gpu_ptr_ = data;
-  head_ = HEAD_AT_GPU;
-  own_gpu_data_ = false;
-#else
-  NO_GPU;
-#endif
-}
-
-void* SyncedMemory::mutable_cpu_data() {
-  check_device();
-  to_cpu();
-  head_ = HEAD_AT_CPU;
-  return cpu_ptr_;
-}
-
-void* SyncedMemory::mutable_gpu_data() {
-  check_device();
-#ifndef CPU_ONLY
-  to_gpu();
-  head_ = HEAD_AT_GPU;
-  return gpu_ptr_;
-#else
-  NO_GPU;
-  return NULL;
-#endif
-}
-
-#ifndef CPU_ONLY
-void SyncedMemory::async_gpu_push(const cudaStream_t& stream) {
-  check_device();
-  CHECK(head_ == HEAD_AT_CPU);
-  if (gpu_ptr_ == NULL) {
-    CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
-    own_gpu_data_ = true;
-  }
-  const cudaMemcpyKind put = cudaMemcpyHostToDevice;
-  CUDA_CHECK(cudaMemcpyAsync(gpu_ptr_, cpu_ptr_, size_, put, stream));
-  // Assume caller will synchronize on the stream before use
-  head_ = SYNCED;
-}
-#endif
-
-void SyncedMemory::check_device() {
-#ifndef CPU_ONLY
-#ifdef DEBUG
-  int device;
-  cudaGetDevice(&device);
-  CHECK(device == device_);
-  if (gpu_ptr_ && own_gpu_data_) {
-    cudaPointerAttributes attributes;
-    CUDA_CHECK(cudaPointerGetAttributes(&attributes, gpu_ptr_));
-    CHECK(attributes.device == device_);
-  }
-#endif
-#endif
-}
-
-}  // namespace caffe
-
diff --git a/src/caffe/util/benchmark.cpp b/src/caffe/util/benchmark.cpp
deleted file mode 100644
index 8f46c88..0000000
--- a/src/caffe/util/benchmark.cpp
+++ /dev/null
@@ -1,207 +0,0 @@
-#ifdef USE_BOOST
-#include <boost/date_time/posix_time/posix_time.hpp>
-#endif
-
-#include "caffe/common.hpp"
-#include "caffe/util/benchmark.hpp"
-
-namespace caffe {
-
-Timer::Timer()
-    : initted_(false),
-      running_(false),
-      has_run_at_least_once_(false) {
-  Init();
-}
-
-Timer::~Timer() {
-  if (Caffe::mode() == Caffe::GPU) {
-#ifndef CPU_ONLY
-    CUDA_CHECK(cudaEventDestroy(start_gpu_));
-    CUDA_CHECK(cudaEventDestroy(stop_gpu_));
-#else
-    NO_GPU;
-#endif
-  }
-}
-
-void Timer::Start() {
-  if (!running()) {
-    if (Caffe::mode() == Caffe::GPU) {
-#ifndef CPU_ONLY
-      CUDA_CHECK(cudaEventRecord(start_gpu_, 0));
-#else
-      NO_GPU;
-#endif
-    } else {
-#ifdef USE_BOOST
-      start_cpu_ = boost::posix_time::microsec_clock::local_time();
-#else
-      gettimeofday(&start_cpu_, NULL);
-#endif
-    }
-    running_ = true;
-    has_run_at_least_once_ = true;
-  }
-}
-
-void Timer::Stop() {
-  if (running()) {
-    if (Caffe::mode() == Caffe::GPU) {
-#ifndef CPU_ONLY
-      CUDA_CHECK(cudaEventRecord(stop_gpu_, 0));
-#else
-      NO_GPU;
-#endif
-    } else {
-#ifdef USE_BOOST
-      stop_cpu_ = boost::posix_time::microsec_clock::local_time();
-#else
-      gettimeofday(&stop_cpu_, NULL);
-#endif
-    }
-    running_ = false;
-  }
-}
-
-
-float Timer::MicroSeconds() {
-  if (!has_run_at_least_once()) {
-    LOG(WARNING) << "Timer has never been run before reading time.";
-    return 0;
-  }
-  if (running()) {
-    Stop();
-  }
-  if (Caffe::mode() == Caffe::GPU) {
-#ifndef CPU_ONLY
-    CUDA_CHECK(cudaEventSynchronize(stop_gpu_));
-    CUDA_CHECK(cudaEventElapsedTime(&elapsed_milliseconds_, start_gpu_,
-                                    stop_gpu_));
-    // Cuda only measure milliseconds
-    elapsed_microseconds_ = elapsed_milliseconds_ * 1000;
-#else
-      NO_GPU;
-#endif
-  } else {
-#ifdef USE_BOOST
-    elapsed_microseconds_ = (stop_cpu_ - start_cpu_).total_microseconds();
-#else
-    elapsed_microseconds_ = (stop_cpu_.tv_sec - start_cpu_.tv_sec)*1000000
-    		+ (stop_cpu_.tv_usec - start_cpu_.tv_usec);
-#endif
-  }
-  return elapsed_microseconds_;
-}
-
-float Timer::MilliSeconds() {
-  if (!has_run_at_least_once()) {
-    LOG(WARNING) << "Timer has never been run before reading time.";
-    return 0;
-  }
-  if (running()) {
-    Stop();
-  }
-  if (Caffe::mode() == Caffe::GPU) {
-#ifndef CPU_ONLY
-    CUDA_CHECK(cudaEventSynchronize(stop_gpu_));
-    CUDA_CHECK(cudaEventElapsedTime(&elapsed_milliseconds_, start_gpu_,
-                                    stop_gpu_));
-#else
-      NO_GPU;
-#endif
-  } else {
-#ifdef USE_BOOST
-    elapsed_milliseconds_ = (stop_cpu_ - start_cpu_).total_milliseconds();
-#else
-    elapsed_microseconds_ = (stop_cpu_.tv_sec - start_cpu_.tv_sec)*1000
-    		+ (stop_cpu_.tv_usec - start_cpu_.tv_usec)/1000.0;
-#endif
-  }
-  return elapsed_milliseconds_;
-}
-
-float Timer::Seconds() {
-  return MilliSeconds() / 1000.;
-}
-
-void Timer::Init() {
-  if (!initted()) {
-    if (Caffe::mode() == Caffe::GPU) {
-#ifndef CPU_ONLY
-      CUDA_CHECK(cudaEventCreate(&start_gpu_));
-      CUDA_CHECK(cudaEventCreate(&stop_gpu_));
-#else
-      NO_GPU;
-#endif
-    }
-    initted_ = true;
-  }
-}
-
-CPUTimer::CPUTimer() {
-  this->initted_ = true;
-  this->running_ = false;
-  this->has_run_at_least_once_ = false;
-}
-
-void CPUTimer::Start() {
-  if (!running()) {
-#ifdef USE_BOOST
-    this->start_cpu_ = boost::posix_time::microsec_clock::local_time();
-#else
-    gettimeofday(&start_cpu_, NULL);
-#endif
-    this->running_ = true;
-    this->has_run_at_least_once_ = true;
-  }
-}
-
-void CPUTimer::Stop() {
-  if (running()) {
-#ifdef USE_BOOST
-    this->stop_cpu_ = boost::posix_time::microsec_clock::local_time();
-#else
-    gettimeofday(&stop_cpu_, NULL);
-#endif
-    this->running_ = false;
-  }
-}
-
-float CPUTimer::MilliSeconds() {
-  if (!has_run_at_least_once()) {
-    LOG(WARNING) << "Timer has never been run before reading time.";
-    return 0;
-  }
-  if (running()) {
-    Stop();
-  }
-#ifdef USE_BOOST
-  this->elapsed_milliseconds_ = (this->stop_cpu_ -
-                                this->start_cpu_).total_milliseconds();
-#else
-    elapsed_milliseconds_ = (stop_cpu_.tv_sec - start_cpu_.tv_sec)*1000
-    		+ (stop_cpu_.tv_usec - start_cpu_.tv_usec)/1000.0;
-#endif
-  return this->elapsed_milliseconds_;
-}
-
-float CPUTimer::MicroSeconds() {
-  if (!has_run_at_least_once()) {
-    LOG(WARNING) << "Timer has never been run before reading time.";
-    return 0;
-  }
-  if (running()) {
-    Stop();
-  }
-#ifdef USE_BOOST
-  this->elapsed_microseconds_ = (this->stop_cpu_ -
-                                this->start_cpu_).total_microseconds();
-#else
-    elapsed_microseconds_ = (stop_cpu_.tv_sec - start_cpu_.tv_sec)*1000000
-    		+ (stop_cpu_.tv_usec - start_cpu_.tv_usec);
-#endif
-  return this->elapsed_microseconds_;
-}
-
-}  // namespace caffe
diff --git a/src/caffe/util/im2col.cpp b/src/caffe/util/im2col.cpp
deleted file mode 100644
index 114a86c..0000000
--- a/src/caffe/util/im2col.cpp
+++ /dev/null
@@ -1,234 +0,0 @@
-#include <vector>
-
-#include "caffe/util/im2col.hpp"
-#include "caffe/util/math_functions.hpp"
-
-namespace caffe {
-
-// Function uses casting from int to unsigned to compare if value of
-// parameter a is greater or equal to zero and lower than value of
-// parameter b. The b parameter is of type signed and is always positive,
-// therefore its value is always lower than 0x800... where casting
-// negative value of a parameter converts it to value higher than 0x800...
-// The casting allows to use one condition instead of two.
-inline bool is_a_ge_zero_and_a_lt_b(int a, int b) {
-  return static_cast<unsigned>(a) < static_cast<unsigned>(b);
-}
-
-template <typename Dtype>
-void im2col_cpu(const Dtype* data_im, const int channels,
-    const int height, const int width, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w,
-    const int stride_h, const int stride_w,
-    const int dilation_h, const int dilation_w,
-    Dtype* data_col) {
-  const int output_h = (height + 2 * pad_h -
-    (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
-  const int output_w = (width + 2 * pad_w -
-    (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
-  const int channel_size = height * width;
-  for (int channel = channels; channel--; data_im += channel_size) {
-    for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
-      for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
-        int input_row = -pad_h + kernel_row * dilation_h;
-        for (int output_rows = output_h; output_rows; output_rows--) {
-          if (!is_a_ge_zero_and_a_lt_b(input_row, height)) {
-            for (int output_cols = output_w; output_cols; output_cols--) {
-              *(data_col++) = 0;
-            }
-          } else {
-            int input_col = -pad_w + kernel_col * dilation_w;
-            for (int output_col = output_w; output_col; output_col--) {
-              if (is_a_ge_zero_and_a_lt_b(input_col, width)) {
-                *(data_col++) = data_im[input_row * width + input_col];
-              } else {
-                *(data_col++) = 0;
-              }
-              input_col += stride_w;
-            }
-          }
-          input_row += stride_h;
-        }
-      }
-    }
-  }
-}
-
-// Explicit instantiation
-template void im2col_cpu<float>(const float* data_im, const int channels,
-    const int height, const int width, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w, const int stride_h,
-    const int stride_w, const int dilation_h, const int dilation_w,
-    float* data_col);
-template void im2col_cpu<double>(const double* data_im, const int channels,
-    const int height, const int width, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w, const int stride_h,
-    const int stride_w, const int dilation_h, const int dilation_w,
-    double* data_col);
-
-template <typename Dtype>
-inline void im2col_nd_core_cpu(const Dtype* data_input, const bool im2col,
-    const int num_spatial_axes, const int* im_shape, const int* col_shape,
-    const int* kernel_shape, const int* pad, const int* stride,
-    const int* dilation, Dtype* data_output) {
-  if (!im2col) {
-    int im_size = im_shape[0];
-    for (int i = 0; i < num_spatial_axes; ++i) {
-      im_size *= im_shape[1 + i];
-    }
-    caffe_set(im_size, Dtype(0), data_output);
-  }
-  int kernel_size = 1;
-  for (int i = 0; i < num_spatial_axes; ++i) {
-    kernel_size *= kernel_shape[i];
-  }
-  const int channels_col = col_shape[0];
-  vector<int> d_offset(num_spatial_axes, 0);
-  vector<int> d_iter(num_spatial_axes, 0);
-  for (int c_col = 0; c_col < channels_col; ++c_col) {
-    // Loop over spatial axes in reverse order to compute a per-axis offset.
-    int offset = c_col;
-    for (int d_i = num_spatial_axes - 1; d_i >= 0; --d_i) {
-      if (d_i < num_spatial_axes - 1) {
-        offset /= kernel_shape[d_i + 1];
-      }
-      d_offset[d_i] = offset % kernel_shape[d_i];
-    }
-    for (bool incremented = true; incremented; ) {
-      // Loop over spatial axes in forward order to compute the indices in the
-      // image and column, and whether the index lies in the padding.
-      int index_col = c_col;
-      int index_im = c_col / kernel_size;
-      bool is_padding = false;
-      for (int d_i = 0; d_i < num_spatial_axes; ++d_i) {
-        const int d = d_iter[d_i];
-        const int d_im = d * stride[d_i] - pad[d_i] +
-            d_offset[d_i] * dilation[d_i];
-        is_padding |= d_im < 0 || d_im >= im_shape[d_i + 1];
-        index_col *= col_shape[d_i + 1];
-        index_col += d;
-        index_im *= im_shape[d_i + 1];
-        index_im += d_im;
-      }
-      if (im2col) {
-        if (is_padding) {
-          data_output[index_col] = 0;
-        } else {
-          data_output[index_col] = data_input[index_im];
-        }
-      } else if (!is_padding) {  // col2im
-        data_output[index_im] += data_input[index_col];
-      }
-      // Loop over spatial axes in reverse order to choose an index,
-      // like counting.
-      incremented = false;
-      for (int d_i = num_spatial_axes - 1; d_i >= 0; --d_i) {
-        const int d_max = col_shape[d_i + 1];
-        DCHECK_LT(d_iter[d_i], d_max);
-        if (d_iter[d_i] == d_max - 1) {
-          d_iter[d_i] = 0;
-        } else {  // d_iter[d_i] < d_max - 1
-          ++d_iter[d_i];
-          incremented = true;
-          break;
-        }
-      }
-    }  // while(incremented) {
-  }  // for (int c = 0; c < channels_col; ++c) {
-}
-
-template <typename Dtype>
-void im2col_nd_cpu(const Dtype* data_im, const int num_spatial_axes,
-    const int* im_shape, const int* col_shape,
-    const int* kernel_shape, const int* pad, const int* stride,
-    const int* dilation, Dtype* data_col) {
-  const bool kIm2Col = true;
-  im2col_nd_core_cpu(data_im, kIm2Col, num_spatial_axes, im_shape, col_shape,
-                  kernel_shape, pad, stride, dilation, data_col);
-}
-
-// Explicit instantiation
-template void im2col_nd_cpu<float>(const float* data_im,
-    const int num_spatial_axes,
-    const int* im_shape, const int* col_shape,
-    const int* kernel_shape, const int* pad, const int* stride,
-    const int* dilation, float* data_col);
-template void im2col_nd_cpu<double>(const double* data_im,
-    const int num_spatial_axes,
-    const int* im_shape, const int* col_shape,
-    const int* kernel_shape, const int* pad, const int* stride,
-    const int* dilation, double* data_col);
-
-template <typename Dtype>
-void col2im_cpu(const Dtype* data_col, const int channels,
-    const int height, const int width, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w,
-    const int stride_h, const int stride_w,
-    const int dilation_h, const int dilation_w,
-    Dtype* data_im) {
-  caffe_set(height * width * channels, Dtype(0), data_im);
-  const int output_h = (height + 2 * pad_h -
-    (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
-  const int output_w = (width + 2 * pad_w -
-    (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
-  const int channel_size = height * width;
-  for (int channel = channels; channel--; data_im += channel_size) {
-    for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
-      for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
-        int input_row = -pad_h + kernel_row * dilation_h;
-        for (int output_rows = output_h; output_rows; output_rows--) {
-          if (!is_a_ge_zero_and_a_lt_b(input_row, height)) {
-            data_col += output_w;
-          } else {
-            int input_col = -pad_w + kernel_col * dilation_w;
-            for (int output_col = output_w; output_col; output_col--) {
-              if (is_a_ge_zero_and_a_lt_b(input_col, width)) {
-                data_im[input_row * width + input_col] += *data_col;
-              }
-              data_col++;
-              input_col += stride_w;
-            }
-          }
-          input_row += stride_h;
-        }
-      }
-    }
-  }
-}
-
-// Explicit instantiation
-template void col2im_cpu<float>(const float* data_col, const int channels,
-    const int height, const int width, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w, const int stride_h,
-    const int stride_w, const int dilation_h, const int dilation_w,
-    float* data_im);
-template void col2im_cpu<double>(const double* data_col, const int channels,
-    const int height, const int width, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w, const int stride_h,
-    const int stride_w, const int dilation_h, const int dilation_w,
-    double* data_im);
-
-template <typename Dtype>
-void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes,
-    const int* im_shape, const int* col_shape,
-    const int* kernel_shape, const int* pad, const int* stride,
-    const int* dilation, Dtype* data_im) {
-  const bool kIm2Col = false;
-  im2col_nd_core_cpu(data_col, kIm2Col, num_spatial_axes, im_shape, col_shape,
-                     kernel_shape, pad, stride, dilation, data_im);
-}
-
-// Explicit instantiation
-template void col2im_nd_cpu<float>(const float* data_col,
-    const int num_spatial_axes,
-    const int* im_shape, const int* col_shape,
-    const int* kernel_shape, const int* pad, const int* stride,
-    const int* dilation, float* data_im);
-template void col2im_nd_cpu<double>(const double* data_col,
-    const int num_spatial_axes,
-    const int* im_shape, const int* col_shape,
-    const int* kernel_shape, const int* pad, const int* stride,
-    const int* dilation, double* data_im);
-
-
-}  // namespace caffe
diff --git a/src/caffe/util/insert_splits.cpp b/src/caffe/util/insert_splits.cpp
deleted file mode 100644
index 7a899c6..0000000
--- a/src/caffe/util/insert_splits.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-#include <algorithm>
-#include <map>
-#include <sstream>
-#include <string>
-#include <utility>
-
-#include "caffe/common.hpp"
-#include "caffe/util/insert_splits.hpp"
-
-namespace caffe {
-
-void InsertSplits(const NetParameter& param, NetParameter* param_split) {
-  // Initialize by copying from the input NetParameter.
-  param_split->CopyFrom(param);
-  param_split->clear_layer();
-  map<string, pair<int, int> > blob_name_to_last_top_idx;
-  map<pair<int, int>, pair<int, int> > bottom_idx_to_source_top_idx;
-  map<pair<int, int>, int> top_idx_to_bottom_count;
-  map<pair<int, int>, float> top_idx_to_loss_weight;
-  map<pair<int, int>, int> top_idx_to_bottom_split_idx;
-  map<int, string> layer_idx_to_layer_name;
-  for (int i = 0; i < param.layer_size(); ++i) {
-    const LayerParameter& layer_param = param.layer(i);
-    layer_idx_to_layer_name[i] = layer_param.name();
-    for (int j = 0; j < layer_param.bottom_size(); ++j) {
-      const string& blob_name = layer_param.bottom(j);
-      if (blob_name_to_last_top_idx.find(blob_name) ==
-          blob_name_to_last_top_idx.end()) {
-        LOG(FATAL) << "Unknown bottom blob '" << blob_name << "' (layer '"
-                   << layer_param.name() << "', bottom index " << j << ")";
-      }
-      const pair<int, int>& bottom_idx = make_pair(i, j);
-      const pair<int, int>& top_idx = blob_name_to_last_top_idx[blob_name];
-      bottom_idx_to_source_top_idx[bottom_idx] = top_idx;
-      ++top_idx_to_bottom_count[top_idx];
-    }
-    for (int j = 0; j < layer_param.top_size(); ++j) {
-      const string& blob_name = layer_param.top(j);
-      blob_name_to_last_top_idx[blob_name] = make_pair(i, j);
-    }
-    // A use of a top blob as a loss should be handled similarly to the use of
-    // a top blob as a bottom blob to another layer.
-    const int last_loss =
-        std::min(layer_param.loss_weight_size(), layer_param.top_size());
-    for (int j = 0; j < last_loss; ++j) {
-      const string& blob_name = layer_param.top(j);
-      const pair<int, int>& top_idx = blob_name_to_last_top_idx[blob_name];
-      top_idx_to_loss_weight[top_idx] = layer_param.loss_weight(j);
-      if (top_idx_to_loss_weight[top_idx]) {
-        ++top_idx_to_bottom_count[top_idx];
-      }
-    }
-  }
-  for (int i = 0; i < param.layer_size(); ++i) {
-    LayerParameter* layer_param = param_split->add_layer();
-    layer_param->CopyFrom(param.layer(i));
-    // Replace any shared bottom blobs with split layer outputs.
-    for (int j = 0; j < layer_param->bottom_size(); ++j) {
-      const pair<int, int>& top_idx =
-          bottom_idx_to_source_top_idx[make_pair(i, j)];
-      const int split_count = top_idx_to_bottom_count[top_idx];
-      if (split_count > 1) {
-        const string& layer_name = layer_idx_to_layer_name[top_idx.first];
-        const string& blob_name = layer_param->bottom(j);
-        layer_param->set_bottom(j, SplitBlobName(layer_name,
-            blob_name, top_idx.second, top_idx_to_bottom_split_idx[top_idx]++));
-      }
-    }
-    // Create split layer for any top blobs used by other layer as bottom
-    // blobs more than once.
-    for (int j = 0; j < layer_param->top_size(); ++j) {
-      const pair<int, int>& top_idx = make_pair(i, j);
-      const int split_count = top_idx_to_bottom_count[top_idx];
-      if (split_count > 1) {
-        const string& layer_name = layer_idx_to_layer_name[i];
-        const string& blob_name = layer_param->top(j);
-        LayerParameter* split_layer_param = param_split->add_layer();
-        const float loss_weight = top_idx_to_loss_weight[top_idx];
-        ConfigureSplitLayer(layer_name, blob_name, j, split_count,
-            loss_weight, split_layer_param);
-        if (loss_weight) {
-          layer_param->clear_loss_weight();
-          top_idx_to_bottom_split_idx[top_idx]++;
-        }
-      }
-    }
-  }
-}
-
-void ConfigureSplitLayer(const string& layer_name, const string& blob_name,
-    const int blob_idx, const int split_count, const float loss_weight,
-    LayerParameter* split_layer_param) {
-  split_layer_param->Clear();
-  split_layer_param->add_bottom(blob_name);
-  split_layer_param->set_name(SplitLayerName(layer_name, blob_name, blob_idx));
-  split_layer_param->set_type("Split");
-  for (int k = 0; k < split_count; ++k) {
-    split_layer_param->add_top(
-        SplitBlobName(layer_name, blob_name, blob_idx, k));
-    if (loss_weight) {
-      if (k == 0) {
-        split_layer_param->add_loss_weight(loss_weight);
-      } else {
-        split_layer_param->add_loss_weight(0);
-      }
-    }
-  }
-}
-
-string SplitLayerName(const string& layer_name, const string& blob_name,
-    const int blob_idx) {
-  ostringstream split_layer_name;
-  split_layer_name << blob_name << "_" << layer_name << "_" << blob_idx
-      << "_split";
-  return split_layer_name.str();
-}
-
-string SplitBlobName(const string& layer_name, const string& blob_name,
-    const int blob_idx, const int split_idx) {
-  ostringstream split_blob_name;
-  split_blob_name << blob_name << "_" << layer_name << "_" << blob_idx
-      << "_split_" << split_idx;
-  return split_blob_name.str();
-}
-
-}  // namespace caffe
diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp
deleted file mode 100644
index e65fd4c..0000000
--- a/src/caffe/util/io.cpp
+++ /dev/null
@@ -1,241 +0,0 @@
-#include <fcntl.h>
-#include <google/protobuf/io/coded_stream.h>
-#include <google/protobuf/io/zero_copy_stream_impl.h>
-#include <google/protobuf/text_format.h>
-#ifdef USE_OPENCV
-#include <opencv2/core/core.hpp>
-#include <opencv2/highgui/highgui.hpp>
-#include <opencv2/highgui/highgui_c.h>
-#include <opencv2/imgproc/imgproc.hpp>
-#endif  // USE_OPENCV
-#include <stdint.h>
-
-#include <algorithm>
-#include <fstream>  // NOLINT(readability/streams)
-#include <string>
-#include <vector>
-
-#include "caffe/common.hpp"
-#include "caffe/proto/caffe.pb.h"
-#include "caffe/util/io.hpp"
-#ifndef NO_CAFFE_MOBILE
-#include <unistd.h>
-#endif
-
-const int kProtoReadBytesLimit = INT_MAX;  // Max size of 2 GB minus 1 byte.
-
-namespace caffe {
-
-using google::protobuf::io::FileInputStream;
-using google::protobuf::io::FileOutputStream;
-using google::protobuf::io::ZeroCopyInputStream;
-using google::protobuf::io::CodedInputStream;
-using google::protobuf::io::ZeroCopyOutputStream;
-using google::protobuf::io::CodedOutputStream;
-using google::protobuf::Message;
-
-bool ReadProtoFromTextFile(const char* filename, Message* proto) {
-  int fd = open(filename, O_RDONLY);
-  CHECK_NE(fd, -1) << "File not found: " << filename;
-  FileInputStream* input = new FileInputStream(fd);
-  bool success = google::protobuf::TextFormat::Parse(input, proto);
-  delete input;
-  close(fd);
-  return success;
-}
-
-void WriteProtoToTextFile(const Message& proto, const char* filename) {
-  int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644);
-  FileOutputStream* output = new FileOutputStream(fd);
-  CHECK(google::protobuf::TextFormat::Print(proto, output));
-  delete output;
-  close(fd);
-}
-
-bool ReadProtoFromBinaryFile(const char* filename, Message* proto) {
-  int fd = open(filename, O_RDONLY);
-  CHECK_NE(fd, -1) << "File not found: " << filename;
-  ZeroCopyInputStream* raw_input = new FileInputStream(fd);
-  CodedInputStream* coded_input = new CodedInputStream(raw_input);
-  coded_input->SetTotalBytesLimit(kProtoReadBytesLimit, 536870912);
-
-  bool success = proto->ParseFromCodedStream(coded_input);
-
-  delete coded_input;
-  delete raw_input;
-  close(fd);
-  return success;
-}
-
-void WriteProtoToBinaryFile(const Message& proto, const char* filename) {
-  fstream output(filename, ios::out | ios::trunc | ios::binary);
-  CHECK(proto.SerializeToOstream(&output));
-}
-
-#ifdef USE_OPENCV
-cv::Mat ReadImageToCVMat(const string& filename,
-    const int height, const int width, const bool is_color) {
-  cv::Mat cv_img;
-  int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR :
-    CV_LOAD_IMAGE_GRAYSCALE);
-  cv::Mat cv_img_origin = cv::imread(filename, cv_read_flag);
-  if (!cv_img_origin.data) {
-    LOG(ERROR) << "Could not open or find file " << filename;
-    return cv_img_origin;
-  }
-  if (height > 0 && width > 0) {
-    cv::resize(cv_img_origin, cv_img, cv::Size(width, height));
-  } else {
-    cv_img = cv_img_origin;
-  }
-  return cv_img;
-}
-
-cv::Mat ReadImageToCVMat(const string& filename,
-    const int height, const int width) {
-  return ReadImageToCVMat(filename, height, width, true);
-}
-
-cv::Mat ReadImageToCVMat(const string& filename,
-    const bool is_color) {
-  return ReadImageToCVMat(filename, 0, 0, is_color);
-}
-
-cv::Mat ReadImageToCVMat(const string& filename) {
-  return ReadImageToCVMat(filename, 0, 0, true);
-}
-
-// Do the file extension and encoding match?
-static bool matchExt(const std::string & fn,
-                     std::string en) {
-  size_t p = fn.rfind('.');
-  std::string ext = p != fn.npos ? fn.substr(p) : fn;
-  std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
-  std::transform(en.begin(), en.end(), en.begin(), ::tolower);
-  if ( ext == en )
-    return true;
-  if ( en == "jpg" && ext == "jpeg" )
-    return true;
-  return false;
-}
-
-bool ReadImageToDatum(const string& filename, const int label,
-    const int height, const int width, const bool is_color,
-    const std::string & encoding, Datum* datum) {
-  cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color);
-  if (cv_img.data) {
-    if (encoding.size()) {
-      if ( (cv_img.channels() == 3) == is_color && !height && !width &&
-          matchExt(filename, encoding) )
-        return ReadFileToDatum(filename, label, datum);
-      std::vector<uchar> buf;
-      cv::imencode("."+encoding, cv_img, buf);
-      datum->set_data(std::string(reinterpret_cast<char*>(&buf[0]),
-                      buf.size()));
-      datum->set_label(label);
-      datum->set_encoded(true);
-      return true;
-    }
-    CVMatToDatum(cv_img, datum);
-    datum->set_label(label);
-    return true;
-  } else {
-    return false;
-  }
-}
-#endif  // USE_OPENCV
-
-bool ReadFileToDatum(const string& filename, const int label,
-    Datum* datum) {
-  std::streampos size;
-
-  fstream file(filename.c_str(), ios::in|ios::binary|ios::ate);
-  if (file.is_open()) {
-    size = file.tellg();
-    std::string buffer(size, ' ');
-    file.seekg(0, ios::beg);
-    file.read(&buffer[0], size);
-    file.close();
-    datum->set_data(buffer);
-    datum->set_label(label);
-    datum->set_encoded(true);
-    return true;
-  } else {
-    return false;
-  }
-}
-
-#ifdef USE_OPENCV
-cv::Mat DecodeDatumToCVMatNative(const Datum& datum) {
-  cv::Mat cv_img;
-  CHECK(datum.encoded()) << "Datum not encoded";
-  const string& data = datum.data();
-  std::vector<char> vec_data(data.c_str(), data.c_str() + data.size());
-  cv_img = cv::imdecode(vec_data, -1);
-  if (!cv_img.data) {
-    LOG(ERROR) << "Could not decode datum ";
-  }
-  return cv_img;
-}
-cv::Mat DecodeDatumToCVMat(const Datum& datum, bool is_color) {
-  cv::Mat cv_img;
-  CHECK(datum.encoded()) << "Datum not encoded";
-  const string& data = datum.data();
-  std::vector<char> vec_data(data.c_str(), data.c_str() + data.size());
-  int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR :
-    CV_LOAD_IMAGE_GRAYSCALE);
-  cv_img = cv::imdecode(vec_data, cv_read_flag);
-  if (!cv_img.data) {
-    LOG(ERROR) << "Could not decode datum ";
-  }
-  return cv_img;
-}
-
-// If Datum is encoded will decoded using DecodeDatumToCVMat and CVMatToDatum
-// If Datum is not encoded will do nothing
-bool DecodeDatumNative(Datum* datum) {
-  if (datum->encoded()) {
-    cv::Mat cv_img = DecodeDatumToCVMatNative((*datum));
-    CVMatToDatum(cv_img, datum);
-    return true;
-  } else {
-    return false;
-  }
-}
-bool DecodeDatum(Datum* datum, bool is_color) {
-  if (datum->encoded()) {
-    cv::Mat cv_img = DecodeDatumToCVMat((*datum), is_color);
-    CVMatToDatum(cv_img, datum);
-    return true;
-  } else {
-    return false;
-  }
-}
-
-void CVMatToDatum(const cv::Mat& cv_img, Datum* datum) {
-  CHECK(cv_img.depth() == CV_8U) << "Image data type must be unsigned byte";
-  datum->set_channels(cv_img.channels());
-  datum->set_height(cv_img.rows);
-  datum->set_width(cv_img.cols);
-  datum->clear_data();
-  datum->clear_float_data();
-  datum->set_encoded(false);
-  int datum_channels = datum->channels();
-  int datum_height = datum->height();
-  int datum_width = datum->width();
-  int datum_size = datum_channels * datum_height * datum_width;
-  std::string buffer(datum_size, ' ');
-  for (int h = 0; h < datum_height; ++h) {
-    const uchar* ptr = cv_img.ptr<uchar>(h);
-    int img_index = 0;
-    for (int w = 0; w < datum_width; ++w) {
-      for (int c = 0; c < datum_channels; ++c) {
-        int datum_index = (c * datum_height + h) * datum_width + w;
-        buffer[datum_index] = static_cast<char>(ptr[img_index++]);
-      }
-    }
-  }
-  datum->set_data(buffer);
-}
-#endif  // USE_OPENCV
-}  // namespace caffe
diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp
deleted file mode 100644
index 66e5c98..0000000
--- a/src/caffe/util/math_functions.cpp
+++ /dev/null
@@ -1,418 +0,0 @@
-#ifdef USE_BOOST
-#include <boost/math/special_functions/next.hpp>
-#include <boost/random.hpp>
-#else
-#include <math.h>
-#endif // USE_BOOST
-
-#include <limits>
-
-#include "caffe/common.hpp"
-#include "caffe/util/math_functions.hpp"
-#include "caffe/util/rng.hpp"
-
-namespace caffe {
-
-template<>
-void caffe_cpu_gemm<float>(const CBLAS_TRANSPOSE TransA,
-    const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
-    const float alpha, const float* A, const float* B, const float beta,
-    float* C) {
-  int lda = (TransA == CblasNoTrans) ? K : M;
-  int ldb = (TransB == CblasNoTrans) ? N : K;
-  cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
-      ldb, beta, C, N);
-}
-
-template<>
-void caffe_cpu_gemm<double>(const CBLAS_TRANSPOSE TransA,
-    const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
-    const double alpha, const double* A, const double* B, const double beta,
-    double* C) {
-  int lda = (TransA == CblasNoTrans) ? K : M;
-  int ldb = (TransB == CblasNoTrans) ? N : K;
-  cblas_dgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
-      ldb, beta, C, N);
-}
-
-template <>
-void caffe_cpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M,
-    const int N, const float alpha, const float* A, const float* x,
-    const float beta, float* y) {
-  cblas_sgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1);
-}
-
-template <>
-void caffe_cpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M,
-    const int N, const double alpha, const double* A, const double* x,
-    const double beta, double* y) {
-  cblas_dgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1);
-}
-
-template <>
-void caffe_axpy<float>(const int N, const float alpha, const float* X,
-    float* Y) { cblas_saxpy(N, alpha, X, 1, Y, 1); }
-
-template <>
-void caffe_axpy<double>(const int N, const double alpha, const double* X,
-    double* Y) { cblas_daxpy(N, alpha, X, 1, Y, 1); }
-
-template <typename Dtype>
-void caffe_set(const int N, const Dtype alpha, Dtype* Y) {
-  if (alpha == 0) {
-    memset(Y, 0, sizeof(Dtype) * N);  // NOLINT(caffe/alt_fn)
-    return;
-  }
-  for (int i = 0; i < N; ++i) {
-    Y[i] = alpha;
-  }
-}
-
-template void caffe_set<int>(const int N, const int alpha, int* Y);
-template void caffe_set<float>(const int N, const float alpha, float* Y);
-template void caffe_set<double>(const int N, const double alpha, double* Y);
-
-template <>
-void caffe_add_scalar(const int N, const float alpha, float* Y) {
-  for (int i = 0; i < N; ++i) {
-    Y[i] += alpha;
-  }
-}
-
-template <>
-void caffe_add_scalar(const int N, const double alpha, double* Y) {
-  for (int i = 0; i < N; ++i) {
-    Y[i] += alpha;
-  }
-}
-
-template <typename Dtype>
-void caffe_copy(const int N, const Dtype* X, Dtype* Y) {
-  if (X != Y) {
-    if (Caffe::mode() == Caffe::GPU) {
-#ifndef CPU_ONLY
-      // NOLINT_NEXT_LINE(caffe/alt_fn)
-      CUDA_CHECK(cudaMemcpy(Y, X, sizeof(Dtype) * N, cudaMemcpyDefault));
-#else
-      NO_GPU;
-#endif
-    } else {
-      memcpy(Y, X, sizeof(Dtype) * N);  // NOLINT(caffe/alt_fn)
-    }
-  }
-}
-
-template void caffe_copy<int>(const int N, const int* X, int* Y);
-template void caffe_copy<unsigned int>(const int N, const unsigned int* X,
-    unsigned int* Y);
-template void caffe_copy<float>(const int N, const float* X, float* Y);
-template void caffe_copy<double>(const int N, const double* X, double* Y);
-
-template <>
-void caffe_scal<float>(const int N, const float alpha, float *X) {
-  cblas_sscal(N, alpha, X, 1);
-}
-
-template <>
-void caffe_scal<double>(const int N, const double alpha, double *X) {
-  cblas_dscal(N, alpha, X, 1);
-}
-
-template <>
-void caffe_cpu_axpby<float>(const int N, const float alpha, const float* X,
-                            const float beta, float* Y) {
-  cblas_saxpby(N, alpha, X, 1, beta, Y, 1);
-}
-
-template <>
-void caffe_cpu_axpby<double>(const int N, const double alpha, const double* X,
-                             const double beta, double* Y) {
-  cblas_daxpby(N, alpha, X, 1, beta, Y, 1);
-}
-
-template <>
-void caffe_add<float>(const int n, const float* a, const float* b,
-    float* y) {
-  vsAdd(n, a, b, y);
-}
-
-template <>
-void caffe_add<double>(const int n, const double* a, const double* b,
-    double* y) {
-  vdAdd(n, a, b, y);
-}
-
-template <>
-void caffe_sub<float>(const int n, const float* a, const float* b,
-    float* y) {
-  vsSub(n, a, b, y);
-}
-
-template <>
-void caffe_sub<double>(const int n, const double* a, const double* b,
-    double* y) {
-  vdSub(n, a, b, y);
-}
-
-template <>
-void caffe_mul<float>(const int n, const float* a, const float* b,
-    float* y) {
-  vsMul(n, a, b, y);
-}
-
-template <>
-void caffe_mul<double>(const int n, const double* a, const double* b,
-    double* y) {
-  vdMul(n, a, b, y);
-}
-
-template <>
-void caffe_div<float>(const int n, const float* a, const float* b,
-    float* y) {
-  vsDiv(n, a, b, y);
-}
-
-template <>
-void caffe_div<double>(const int n, const double* a, const double* b,
-    double* y) {
-  vdDiv(n, a, b, y);
-}
-
-template <>
-void caffe_powx<float>(const int n, const float* a, const float b,
-    float* y) {
-  vsPowx(n, a, b, y);
-}
-
-template <>
-void caffe_powx<double>(const int n, const double* a, const double b,
-    double* y) {
-  vdPowx(n, a, b, y);
-}
-
-template <>
-void caffe_sqr<float>(const int n, const float* a, float* y) {
-  vsSqr(n, a, y);
-}
-
-template <>
-void caffe_sqr<double>(const int n, const double* a, double* y) {
-  vdSqr(n, a, y);
-}
-
-template <>
-void caffe_exp<float>(const int n, const float* a, float* y) {
-  vsExp(n, a, y);
-}
-
-template <>
-void caffe_exp<double>(const int n, const double* a, double* y) {
-  vdExp(n, a, y);
-}
-
-template <>
-void caffe_log<float>(const int n, const float* a, float* y) {
-  vsLn(n, a, y);
-}
-
-template <>
-void caffe_log<double>(const int n, const double* a, double* y) {
-  vdLn(n, a, y);
-}
-
-template <>
-void caffe_abs<float>(const int n, const float* a, float* y) {
-    vsAbs(n, a, y);
-}
-
-template <>
-void caffe_abs<double>(const int n, const double* a, double* y) {
-    vdAbs(n, a, y);
-}
-
-unsigned int caffe_rng_rand() {
-  return (*caffe_rng())();
-}
-
-#ifdef USE_BOOST
-template <typename Dtype>
-Dtype caffe_nextafter(const Dtype b) {
-  return boost::math::nextafter<Dtype>(
-      b, std::numeric_limits<Dtype>::max());
-}
-
-template
-float caffe_nextafter(const float b);
-
-template
-double caffe_nextafter(const double b);
-#else
-// std::nextafter has some problems with tr1 & _GLIBCXX_USE_C99_MATH_TR1
-// when using android ndk
-float caffe_nextafter(const float b) {
-    return ::nextafterf(b, std::numeric_limits<float>::max());
-}
-double caffe_nextafter(const double b) {
-    return ::nextafter(b, std::numeric_limits<float>::max());
-}
-#endif
-
-template <typename Dtype>
-void caffe_rng_uniform(const int n, const Dtype a, const Dtype b, Dtype* r) {
-  CHECK_GE(n, 0);
-  CHECK(r);
-  CHECK_LE(a, b);
-#ifdef USE_BOOST
-  boost::uniform_real<Dtype> random_distribution(a, caffe_nextafter<Dtype>(b));
-  boost::variate_generator<caffe::rng_t*, boost::uniform_real<Dtype> >
-      variate_generator(caffe_rng(), random_distribution);
-  for (int i = 0; i < n; ++i) {
-    r[i] = variate_generator();
-  }
-#else
-  std::uniform_real_distribution<Dtype> random_distribution(a, caffe_nextafter<Dtype>(b));
-  for (int i = 0; i < n; ++i) {
-    r[i] = random_distribution(*caffe_rng());
-  }
-#endif
-}
-
-template
-void caffe_rng_uniform<float>(const int n, const float a, const float b,
-                              float* r);
-
-template
-void caffe_rng_uniform<double>(const int n, const double a, const double b,
-                               double* r);
-
-template <typename Dtype>
-void caffe_rng_gaussian(const int n, const Dtype a,
-                        const Dtype sigma, Dtype* r) {
-  CHECK_GE(n, 0);
-  CHECK(r);
-  CHECK_GT(sigma, 0);
-#ifdef USE_BOOST
-  boost::normal_distribution<Dtype> random_distribution(a, sigma);
-  boost::variate_generator<caffe::rng_t*, boost::normal_distribution<Dtype> >
-      variate_generator(caffe_rng(), random_distribution);
-  for (int i = 0; i < n; ++i) {
-    r[i] = variate_generator();
-  }
-#else
-  std::normal_distribution<Dtype> random_distribution(a, sigma);
-  for (int i = 0; i < n; ++i) {
-    r[i] = random_distribution(*caffe_rng());
-  }
-#endif
-}
-
-template
-void caffe_rng_gaussian<float>(const int n, const float mu,
-                               const float sigma, float* r);
-
-template
-void caffe_rng_gaussian<double>(const int n, const double mu,
-                                const double sigma, double* r);
-
-template <typename Dtype>
-void caffe_rng_bernoulli(const int n, const Dtype p, int* r) {
-  CHECK_GE(n, 0);
-  CHECK(r);
-  CHECK_GE(p, 0);
-  CHECK_LE(p, 1);
-#ifdef USE_BOOST
-  boost::bernoulli_distribution<Dtype> random_distribution(p);
-  boost::variate_generator<caffe::rng_t*, boost::bernoulli_distribution<Dtype> >
-      variate_generator(caffe_rng(), random_distribution);
-  for (int i = 0; i < n; ++i) {
-    r[i] = variate_generator();
-  }
-#else
-  std::bernoulli_distribution random_distribution(p);
-  for (int i = 0; i < n; ++i) {
-    r[i] = random_distribution(*caffe_rng());
-  }
-#endif
-}
-
-template
-void caffe_rng_bernoulli<double>(const int n, const double p, int* r);
-
-template
-void caffe_rng_bernoulli<float>(const int n, const float p, int* r);
-
-template <typename Dtype>
-void caffe_rng_bernoulli(const int n, const Dtype p, unsigned int* r) {
-  CHECK_GE(n, 0);
-  CHECK(r);
-  CHECK_GE(p, 0);
-  CHECK_LE(p, 1);
-#ifdef USE_BOOST
-  boost::bernoulli_distribution<Dtype> random_distribution(p);
-  boost::variate_generator<caffe::rng_t*, boost::bernoulli_distribution<Dtype> >
-      variate_generator(caffe_rng(), random_distribution);
-  for (int i = 0; i < n; ++i) {
-    r[i] = static_cast<unsigned int>(variate_generator());
-  }
-#else
-  std::bernoulli_distribution random_distribution(p);
-  for (int i = 0; i < n; ++i) {
-    r[i] = static_cast<unsigned int>(random_distribution(*caffe_rng()));
-  }
-#endif
-}
-
-template
-void caffe_rng_bernoulli<double>(const int n, const double p, unsigned int* r);
-
-template
-void caffe_rng_bernoulli<float>(const int n, const float p, unsigned int* r);
-
-template <>
-float caffe_cpu_strided_dot<float>(const int n, const float* x, const int incx,
-    const float* y, const int incy) {
-  return cblas_sdot(n, x, incx, y, incy);
-}
-
-template <>
-double caffe_cpu_strided_dot<double>(const int n, const double* x,
-    const int incx, const double* y, const int incy) {
-  return cblas_ddot(n, x, incx, y, incy);
-}
-
-template <typename Dtype>
-Dtype caffe_cpu_dot(const int n, const Dtype* x, const Dtype* y) {
-  return caffe_cpu_strided_dot(n, x, 1, y, 1);
-}
-
-template
-float caffe_cpu_dot<float>(const int n, const float* x, const float* y);
-
-template
-double caffe_cpu_dot<double>(const int n, const double* x, const double* y);
-
-template <>
-float caffe_cpu_asum<float>(const int n, const float* x) {
-  return cblas_sasum(n, x, 1);
-}
-
-template <>
-double caffe_cpu_asum<double>(const int n, const double* x) {
-  return cblas_dasum(n, x, 1);
-}
-
-template <>
-void caffe_cpu_scale<float>(const int n, const float alpha, const float *x,
-                            float* y) {
-  cblas_scopy(n, x, 1, y, 1);
-  cblas_sscal(n, alpha, y, 1);
-}
-
-template <>
-void caffe_cpu_scale<double>(const int n, const double alpha, const double *x,
-                             double* y) {
-  cblas_dcopy(n, x, 1, y, 1);
-  cblas_dscal(n, alpha, y, 1);
-}
-
-}  // namespace caffe
diff --git a/src/caffe/util/upgrade_proto.cpp b/src/caffe/util/upgrade_proto.cpp
deleted file mode 100644
index 94771c8..0000000
--- a/src/caffe/util/upgrade_proto.cpp
+++ /dev/null
@@ -1,1106 +0,0 @@
-#include <google/protobuf/io/coded_stream.h>
-#include <google/protobuf/io/zero_copy_stream_impl.h>
-#include <google/protobuf/text_format.h>
-
-#include <map>
-#include <string>
-
-#include "caffe/common.hpp"
-#include "caffe/proto/caffe.pb.h"
-#include "caffe/util/io.hpp"
-#include "caffe/util/upgrade_proto.hpp"
-
-namespace caffe {
-
-bool NetNeedsUpgrade(const NetParameter& net_param) {
-  return NetNeedsV0ToV1Upgrade(net_param) || NetNeedsV1ToV2Upgrade(net_param)
-      || NetNeedsDataUpgrade(net_param) || NetNeedsInputUpgrade(net_param)
-      || NetNeedsBatchNormUpgrade(net_param);
-}
-
-bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param) {
-  bool success = true;
-  if (NetNeedsV0ToV1Upgrade(*param)) {
-    // NetParameter was specified using the old style (V0LayerParameter); try to
-    // upgrade it.
-    LOG(INFO) << "Attempting to upgrade input file specified using deprecated "
-              << "V0LayerParameter: " << param_file;
-    NetParameter original_param(*param);
-    if (!UpgradeV0Net(original_param, param)) {
-      success = false;
-      LOG(ERROR) << "Warning: had one or more problems upgrading "
-          << "V0NetParameter to NetParameter (see above); continuing anyway.";
-    } else {
-      LOG(INFO) << "Successfully upgraded file specified using deprecated "
-                << "V0LayerParameter";
-    }
-    LOG(WARNING) << "Note that future Caffe releases will not support "
-        << "V0NetParameter; use ./build/tools/upgrade_net_proto_text for "
-        << "prototxt and ./build/tools/upgrade_net_proto_binary for model "
-        << "weights upgrade this and any other net protos to the new format.";
-  }
-  // NetParameter uses old style data transformation fields; try to upgrade it.
-  if (NetNeedsDataUpgrade(*param)) {
-    LOG(INFO) << "Attempting to upgrade input file specified using deprecated "
-              << "transformation parameters: " << param_file;
-    UpgradeNetDataTransformation(param);
-    LOG(INFO) << "Successfully upgraded file specified using deprecated "
-              << "data transformation parameters.";
-    LOG(WARNING) << "Note that future Caffe releases will only support "
-                 << "transform_param messages for transformation fields.";
-  }
-  if (NetNeedsV1ToV2Upgrade(*param)) {
-    LOG(INFO) << "Attempting to upgrade input file specified using deprecated "
-              << "V1LayerParameter: " << param_file;
-    NetParameter original_param(*param);
-    if (!UpgradeV1Net(original_param, param)) {
-      success = false;
-      LOG(ERROR) << "Warning: had one or more problems upgrading "
-                 << "V1LayerParameter (see above); continuing anyway.";
-    } else {
-      LOG(INFO) << "Successfully upgraded file specified using deprecated "
-                << "V1LayerParameter";
-    }
-  }
-  // NetParameter uses old style input fields; try to upgrade it.
-  if (NetNeedsInputUpgrade(*param)) {
-    LOG(INFO) << "Attempting to upgrade input file specified using deprecated "
-              << "input fields: " << param_file;
-    UpgradeNetInput(param);
-    LOG(INFO) << "Successfully upgraded file specified using deprecated "
-              << "input fields.";
-    LOG(WARNING) << "Note that future Caffe releases will only support "
-                 << "input layers and not input fields.";
-  }
-  // NetParameter uses old style batch norm layers; try to upgrade it.
-  if (NetNeedsBatchNormUpgrade(*param)) {
-    LOG(INFO) << "Attempting to upgrade batch norm layers using deprecated "
-              << "params: " << param_file;
-    UpgradeNetBatchNorm(param);
-    LOG(INFO) << "Successfully upgraded batch norm layers using deprecated "
-              << "params.";
-  }
-  return success;
-}
-
-void ReadNetParamsFromTextFileOrDie(const string& param_file,
-                                    NetParameter* param) {
-  CHECK(ReadProtoFromTextFile(param_file, param))
-      << "Failed to parse NetParameter file: " << param_file;
-  UpgradeNetAsNeeded(param_file, param);
-}
-
-void ReadNetParamsFromBinaryFileOrDie(const string& param_file,
-                                      NetParameter* param) {
-  CHECK(ReadProtoFromBinaryFile(param_file, param))
-      << "Failed to parse NetParameter file: " << param_file;
-  UpgradeNetAsNeeded(param_file, param);
-}
-
-bool NetNeedsV0ToV1Upgrade(const NetParameter& net_param) {
-  for (int i = 0; i < net_param.layers_size(); ++i) {
-    if (net_param.layers(i).has_layer()) {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool NetNeedsV1ToV2Upgrade(const NetParameter& net_param) {
-  return net_param.layers_size() > 0;
-}
-
-bool UpgradeV0Net(const NetParameter& v0_net_param_padding_layers,
-                  NetParameter* net_param) {
-  // First upgrade padding layers to padded conv layers.
-  NetParameter v0_net_param;
-  UpgradeV0PaddingLayers(v0_net_param_padding_layers, &v0_net_param);
-  // Now upgrade layer parameters.
-  bool is_fully_compatible = true;
-  net_param->Clear();
-  if (v0_net_param.has_name()) {
-    net_param->set_name(v0_net_param.name());
-  }
-  for (int i = 0; i < v0_net_param.layers_size(); ++i) {
-    is_fully_compatible &= UpgradeV0LayerParameter(v0_net_param.layers(i),
-                                                   net_param->add_layers());
-  }
-  for (int i = 0; i < v0_net_param.input_size(); ++i) {
-    net_param->add_input(v0_net_param.input(i));
-  }
-  for (int i = 0; i < v0_net_param.input_dim_size(); ++i) {
-    net_param->add_input_dim(v0_net_param.input_dim(i));
-  }
-  if (v0_net_param.has_force_backward()) {
-    net_param->set_force_backward(v0_net_param.force_backward());
-  }
-  return is_fully_compatible;
-}
-
-void UpgradeV0PaddingLayers(const NetParameter& param,
-                            NetParameter* param_upgraded_pad) {
-  // Copy everything other than the layers from the original param.
-  param_upgraded_pad->Clear();
-  param_upgraded_pad->CopyFrom(param);
-  param_upgraded_pad->clear_layers();
-  // Figure out which layer each bottom blob comes from.
-  map<string, int> blob_name_to_last_top_idx;
-  for (int i = 0; i < param.input_size(); ++i) {
-    const string& blob_name = param.input(i);
-    blob_name_to_last_top_idx[blob_name] = -1;
-  }
-  for (int i = 0; i < param.layers_size(); ++i) {
-    const V1LayerParameter& layer_connection = param.layers(i);
-    const V0LayerParameter& layer_param = layer_connection.layer();
-    // Add the layer to the new net, unless it's a padding layer.
-    if (layer_param.type() != "padding") {
-      param_upgraded_pad->add_layers()->CopyFrom(layer_connection);
-    }
-    for (int j = 0; j < layer_connection.bottom_size(); ++j) {
-      const string& blob_name = layer_connection.bottom(j);
-      if (blob_name_to_last_top_idx.find(blob_name) ==
-          blob_name_to_last_top_idx.end()) {
-        LOG(FATAL) << "Unknown blob input " << blob_name << " to layer " << j;
-      }
-      const int top_idx = blob_name_to_last_top_idx[blob_name];
-      if (top_idx == -1) {
-        continue;
-      }
-      const V1LayerParameter& source_layer = param.layers(top_idx);
-      if (source_layer.layer().type() == "padding") {
-        // This layer has a padding layer as input -- check that it is a conv
-        // layer or a pooling layer and takes only one input.  Also check that
-        // the padding layer input has only one input and one output.  Other
-        // cases have undefined behavior in Caffe.
-        CHECK((layer_param.type() == "conv") || (layer_param.type() == "pool"))
-            << "Padding layer input to "
-            "non-convolutional / non-pooling layer type "
-            << layer_param.type();
-        CHECK_EQ(layer_connection.bottom_size(), 1)
-            << "Conv Layer takes a single blob as input.";
-        CHECK_EQ(source_layer.bottom_size(), 1)
-            << "Padding Layer takes a single blob as input.";
-        CHECK_EQ(source_layer.top_size(), 1)
-            << "Padding Layer produces a single blob as output.";
-        int layer_index = param_upgraded_pad->layers_size() - 1;
-        param_upgraded_pad->mutable_layers(layer_index)->mutable_layer()
-            ->set_pad(source_layer.layer().pad());
-        param_upgraded_pad->mutable_layers(layer_index)
-            ->set_bottom(j, source_layer.bottom(0));
-      }
-    }
-    for (int j = 0; j < layer_connection.top_size(); ++j) {
-      const string& blob_name = layer_connection.top(j);
-      blob_name_to_last_top_idx[blob_name] = i;
-    }
-  }
-}
-
-bool UpgradeV0LayerParameter(const V1LayerParameter& v0_layer_connection,
-                             V1LayerParameter* layer_param) {
-  bool is_fully_compatible = true;
-  layer_param->Clear();
-  for (int i = 0; i < v0_layer_connection.bottom_size(); ++i) {
-    layer_param->add_bottom(v0_layer_connection.bottom(i));
-  }
-  for (int i = 0; i < v0_layer_connection.top_size(); ++i) {
-    layer_param->add_top(v0_layer_connection.top(i));
-  }
-  if (v0_layer_connection.has_layer()) {
-    const V0LayerParameter& v0_layer_param = v0_layer_connection.layer();
-    if (v0_layer_param.has_name()) {
-      layer_param->set_name(v0_layer_param.name());
-    }
-    const string& type = v0_layer_param.type();
-    if (v0_layer_param.has_type()) {
-      layer_param->set_type(UpgradeV0LayerType(type));
-    }
-    for (int i = 0; i < v0_layer_param.blobs_size(); ++i) {
-      layer_param->add_blobs()->CopyFrom(v0_layer_param.blobs(i));
-    }
-    for (int i = 0; i < v0_layer_param.blobs_lr_size(); ++i) {
-      layer_param->add_blobs_lr(v0_layer_param.blobs_lr(i));
-    }
-    for (int i = 0; i < v0_layer_param.weight_decay_size(); ++i) {
-      layer_param->add_weight_decay(v0_layer_param.weight_decay(i));
-    }
-    if (v0_layer_param.has_num_output()) {
-      if (type == "conv") {
-        layer_param->mutable_convolution_param()->set_num_output(
-            v0_layer_param.num_output());
-      } else if (type == "innerproduct") {
-        layer_param->mutable_inner_product_param()->set_num_output(
-            v0_layer_param.num_output());
-      } else {
-        LOG(ERROR) << "Unknown parameter num_output for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_biasterm()) {
-      if (type == "conv") {
-        layer_param->mutable_convolution_param()->set_bias_term(
-            v0_layer_param.biasterm());
-      } else if (type == "innerproduct") {
-        layer_param->mutable_inner_product_param()->set_bias_term(
-            v0_layer_param.biasterm());
-      } else {
-        LOG(ERROR) << "Unknown parameter biasterm for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_weight_filler()) {
-      if (type == "conv") {
-        layer_param->mutable_convolution_param()->
-            mutable_weight_filler()->CopyFrom(v0_layer_param.weight_filler());
-      } else if (type == "innerproduct") {
-        layer_param->mutable_inner_product_param()->
-            mutable_weight_filler()->CopyFrom(v0_layer_param.weight_filler());
-      } else {
-        LOG(ERROR) << "Unknown parameter weight_filler for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_bias_filler()) {
-      if (type == "conv") {
-        layer_param->mutable_convolution_param()->
-            mutable_bias_filler()->CopyFrom(v0_layer_param.bias_filler());
-      } else if (type == "innerproduct") {
-        layer_param->mutable_inner_product_param()->
-            mutable_bias_filler()->CopyFrom(v0_layer_param.bias_filler());
-      } else {
-        LOG(ERROR) << "Unknown parameter bias_filler for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_pad()) {
-      if (type == "conv") {
-        layer_param->mutable_convolution_param()->add_pad(v0_layer_param.pad());
-      } else if (type == "pool") {
-        layer_param->mutable_pooling_param()->set_pad(v0_layer_param.pad());
-      } else {
-        LOG(ERROR) << "Unknown parameter pad for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_kernelsize()) {
-      if (type == "conv") {
-        layer_param->mutable_convolution_param()->add_kernel_size(
-            v0_layer_param.kernelsize());
-      } else if (type == "pool") {
-        layer_param->mutable_pooling_param()->set_kernel_size(
-            v0_layer_param.kernelsize());
-      } else {
-        LOG(ERROR) << "Unknown parameter kernelsize for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_group()) {
-      if (type == "conv") {
-        layer_param->mutable_convolution_param()->set_group(
-            v0_layer_param.group());
-      } else {
-        LOG(ERROR) << "Unknown parameter group for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_stride()) {
-      if (type == "conv") {
-        layer_param->mutable_convolution_param()->add_stride(
-            v0_layer_param.stride());
-      } else if (type == "pool") {
-        layer_param->mutable_pooling_param()->set_stride(
-            v0_layer_param.stride());
-      } else {
-        LOG(ERROR) << "Unknown parameter stride for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_pool()) {
-      if (type == "pool") {
-        V0LayerParameter_PoolMethod pool = v0_layer_param.pool();
-        switch (pool) {
-        case V0LayerParameter_PoolMethod_MAX:
-          layer_param->mutable_pooling_param()->set_pool(
-              PoolingParameter_PoolMethod_MAX);
-          break;
-        case V0LayerParameter_PoolMethod_AVE:
-          layer_param->mutable_pooling_param()->set_pool(
-              PoolingParameter_PoolMethod_AVE);
-          break;
-        case V0LayerParameter_PoolMethod_STOCHASTIC:
-          layer_param->mutable_pooling_param()->set_pool(
-              PoolingParameter_PoolMethod_STOCHASTIC);
-          break;
-        default:
-          LOG(ERROR) << "Unknown pool method " << pool;
-          is_fully_compatible = false;
-        }
-      } else {
-        LOG(ERROR) << "Unknown parameter pool for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_dropout_ratio()) {
-      if (type == "dropout") {
-        layer_param->mutable_dropout_param()->set_dropout_ratio(
-            v0_layer_param.dropout_ratio());
-      } else {
-        LOG(ERROR) << "Unknown parameter dropout_ratio for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_local_size()) {
-      if (type == "lrn") {
-        layer_param->mutable_lrn_param()->set_local_size(
-            v0_layer_param.local_size());
-      } else {
-        LOG(ERROR) << "Unknown parameter local_size for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_alpha()) {
-      if (type == "lrn") {
-        layer_param->mutable_lrn_param()->set_alpha(v0_layer_param.alpha());
-      } else {
-        LOG(ERROR) << "Unknown parameter alpha for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_beta()) {
-      if (type == "lrn") {
-        layer_param->mutable_lrn_param()->set_beta(v0_layer_param.beta());
-      } else {
-        LOG(ERROR) << "Unknown parameter beta for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_k()) {
-      if (type == "lrn") {
-        layer_param->mutable_lrn_param()->set_k(v0_layer_param.k());
-      } else {
-        LOG(ERROR) << "Unknown parameter k for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_source()) {
-      if (type == "data") {
-        layer_param->mutable_data_param()->set_source(v0_layer_param.source());
-      } else if (type == "hdf5_data") {
-        layer_param->mutable_hdf5_data_param()->set_source(
-            v0_layer_param.source());
-      } else if (type == "images") {
-        layer_param->mutable_image_data_param()->set_source(
-            v0_layer_param.source());
-      } else if (type == "window_data") {
-        layer_param->mutable_window_data_param()->set_source(
-            v0_layer_param.source());
-      } else if (type == "infogain_loss") {
-        layer_param->mutable_infogain_loss_param()->set_source(
-            v0_layer_param.source());
-      } else {
-        LOG(ERROR) << "Unknown parameter source for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_scale()) {
-      layer_param->mutable_transform_param()->
-          set_scale(v0_layer_param.scale());
-    }
-    if (v0_layer_param.has_meanfile()) {
-      layer_param->mutable_transform_param()->
-          set_mean_file(v0_layer_param.meanfile());
-    }
-    if (v0_layer_param.has_batchsize()) {
-      if (type == "data") {
-        layer_param->mutable_data_param()->set_batch_size(
-            v0_layer_param.batchsize());
-      } else if (type == "hdf5_data") {
-        layer_param->mutable_hdf5_data_param()->set_batch_size(
-            v0_layer_param.batchsize());
-      } else if (type == "images") {
-        layer_param->mutable_image_data_param()->set_batch_size(
-            v0_layer_param.batchsize());
-      } else if (type == "window_data") {
-        layer_param->mutable_window_data_param()->set_batch_size(
-            v0_layer_param.batchsize());
-      } else {
-        LOG(ERROR) << "Unknown parameter batchsize for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_cropsize()) {
-      layer_param->mutable_transform_param()->
-          set_crop_size(v0_layer_param.cropsize());
-    }
-    if (v0_layer_param.has_mirror()) {
-      layer_param->mutable_transform_param()->
-          set_mirror(v0_layer_param.mirror());
-    }
-    if (v0_layer_param.has_rand_skip()) {
-      if (type == "data") {
-        layer_param->mutable_data_param()->set_rand_skip(
-            v0_layer_param.rand_skip());
-      } else if (type == "images") {
-        layer_param->mutable_image_data_param()->set_rand_skip(
-            v0_layer_param.rand_skip());
-      } else {
-        LOG(ERROR) << "Unknown parameter rand_skip for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_shuffle_images()) {
-      if (type == "images") {
-        layer_param->mutable_image_data_param()->set_shuffle(
-            v0_layer_param.shuffle_images());
-      } else {
-        LOG(ERROR) << "Unknown parameter shuffle for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_new_height()) {
-      if (type == "images") {
-        layer_param->mutable_image_data_param()->set_new_height(
-            v0_layer_param.new_height());
-      } else {
-        LOG(ERROR) << "Unknown parameter new_height for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_new_width()) {
-      if (type == "images") {
-        layer_param->mutable_image_data_param()->set_new_width(
-            v0_layer_param.new_width());
-      } else {
-        LOG(ERROR) << "Unknown parameter new_width for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_concat_dim()) {
-      if (type == "concat") {
-        layer_param->mutable_concat_param()->set_concat_dim(
-            v0_layer_param.concat_dim());
-      } else {
-        LOG(ERROR) << "Unknown parameter concat_dim for layer type " << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_det_fg_threshold()) {
-      if (type == "window_data") {
-        layer_param->mutable_window_data_param()->set_fg_threshold(
-            v0_layer_param.det_fg_threshold());
-      } else {
-        LOG(ERROR) << "Unknown parameter det_fg_threshold for layer type "
-                   << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_det_bg_threshold()) {
-      if (type == "window_data") {
-        layer_param->mutable_window_data_param()->set_bg_threshold(
-            v0_layer_param.det_bg_threshold());
-      } else {
-        LOG(ERROR) << "Unknown parameter det_bg_threshold for layer type "
-                   << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_det_fg_fraction()) {
-      if (type == "window_data") {
-        layer_param->mutable_window_data_param()->set_fg_fraction(
-            v0_layer_param.det_fg_fraction());
-      } else {
-        LOG(ERROR) << "Unknown parameter det_fg_fraction for layer type "
-                   << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_det_context_pad()) {
-      if (type == "window_data") {
-        layer_param->mutable_window_data_param()->set_context_pad(
-            v0_layer_param.det_context_pad());
-      } else {
-        LOG(ERROR) << "Unknown parameter det_context_pad for layer type "
-                   << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_det_crop_mode()) {
-      if (type == "window_data") {
-        layer_param->mutable_window_data_param()->set_crop_mode(
-            v0_layer_param.det_crop_mode());
-      } else {
-        LOG(ERROR) << "Unknown parameter det_crop_mode for layer type "
-                   << type;
-        is_fully_compatible = false;
-      }
-    }
-    if (v0_layer_param.has_hdf5_output_param()) {
-      if (type == "hdf5_output") {
-        layer_param->mutable_hdf5_output_param()->CopyFrom(
-            v0_layer_param.hdf5_output_param());
-      } else {
-        LOG(ERROR) << "Unknown parameter hdf5_output_param for layer type "
-                   << type;
-        is_fully_compatible = false;
-      }
-    }
-  }
-  return is_fully_compatible;
-}
-
-V1LayerParameter_LayerType UpgradeV0LayerType(const string& type) {
-  if (type == "accuracy") {
-    return V1LayerParameter_LayerType_ACCURACY;
-  } else if (type == "bnll") {
-    return V1LayerParameter_LayerType_BNLL;
-  } else if (type == "concat") {
-    return V1LayerParameter_LayerType_CONCAT;
-  } else if (type == "conv") {
-    return V1LayerParameter_LayerType_CONVOLUTION;
-  } else if (type == "data") {
-    return V1LayerParameter_LayerType_DATA;
-  } else if (type == "dropout") {
-    return V1LayerParameter_LayerType_DROPOUT;
-  } else if (type == "euclidean_loss") {
-    return V1LayerParameter_LayerType_EUCLIDEAN_LOSS;
-  } else if (type == "flatten") {
-    return V1LayerParameter_LayerType_FLATTEN;
-  } else if (type == "hdf5_data") {
-    return V1LayerParameter_LayerType_HDF5_DATA;
-  } else if (type == "hdf5_output") {
-    return V1LayerParameter_LayerType_HDF5_OUTPUT;
-  } else if (type == "im2col") {
-    return V1LayerParameter_LayerType_IM2COL;
-  } else if (type == "images") {
-    return V1LayerParameter_LayerType_IMAGE_DATA;
-  } else if (type == "infogain_loss") {
-    return V1LayerParameter_LayerType_INFOGAIN_LOSS;
-  } else if (type == "innerproduct") {
-    return V1LayerParameter_LayerType_INNER_PRODUCT;
-  } else if (type == "lrn") {
-    return V1LayerParameter_LayerType_LRN;
-  } else if (type == "multinomial_logistic_loss") {
-    return V1LayerParameter_LayerType_MULTINOMIAL_LOGISTIC_LOSS;
-  } else if (type == "pool") {
-    return V1LayerParameter_LayerType_POOLING;
-  } else if (type == "relu") {
-    return V1LayerParameter_LayerType_RELU;
-  } else if (type == "sigmoid") {
-    return V1LayerParameter_LayerType_SIGMOID;
-  } else if (type == "softmax") {
-    return V1LayerParameter_LayerType_SOFTMAX;
-  } else if (type == "softmax_loss") {
-    return V1LayerParameter_LayerType_SOFTMAX_LOSS;
-  } else if (type == "split") {
-    return V1LayerParameter_LayerType_SPLIT;
-  } else if (type == "tanh") {
-    return V1LayerParameter_LayerType_TANH;
-  } else if (type == "window_data") {
-    return V1LayerParameter_LayerType_WINDOW_DATA;
-  } else {
-    LOG(FATAL) << "Unknown layer name: " << type;
-    return V1LayerParameter_LayerType_NONE;
-  }
-}
-
-bool NetNeedsDataUpgrade(const NetParameter& net_param) {
-  for (int i = 0; i < net_param.layers_size(); ++i) {
-    if (net_param.layers(i).type() == V1LayerParameter_LayerType_DATA) {
-      DataParameter layer_param = net_param.layers(i).data_param();
-      if (layer_param.has_scale()) { return true; }
-      if (layer_param.has_mean_file()) { return true; }
-      if (layer_param.has_crop_size()) { return true; }
-      if (layer_param.has_mirror()) { return true; }
-    }
-    if (net_param.layers(i).type() == V1LayerParameter_LayerType_IMAGE_DATA) {
-      ImageDataParameter layer_param = net_param.layers(i).image_data_param();
-      if (layer_param.has_scale()) { return true; }
-      if (layer_param.has_mean_file()) { return true; }
-      if (layer_param.has_crop_size()) { return true; }
-      if (layer_param.has_mirror()) { return true; }
-    }
-    if (net_param.layers(i).type() == V1LayerParameter_LayerType_WINDOW_DATA) {
-      WindowDataParameter layer_param = net_param.layers(i).window_data_param();
-      if (layer_param.has_scale()) { return true; }
-      if (layer_param.has_mean_file()) { return true; }
-      if (layer_param.has_crop_size()) { return true; }
-      if (layer_param.has_mirror()) { return true; }
-    }
-  }
-  return false;
-}
-
-#define CONVERT_LAYER_TRANSFORM_PARAM(TYPE, Name, param_name) \
-  do { \
-    if (net_param->layers(i).type() == V1LayerParameter_LayerType_##TYPE) { \
-      Name##Parameter* layer_param = \
-          net_param->mutable_layers(i)->mutable_##param_name##_param(); \
-      TransformationParameter* transform_param = \
-          net_param->mutable_layers(i)->mutable_transform_param(); \
-      if (layer_param->has_scale()) { \
-        transform_param->set_scale(layer_param->scale()); \
-        layer_param->clear_scale(); \
-      } \
-      if (layer_param->has_mean_file()) { \
-        transform_param->set_mean_file(layer_param->mean_file()); \
-        layer_param->clear_mean_file(); \
-      } \
-      if (layer_param->has_crop_size()) { \
-        transform_param->set_crop_size(layer_param->crop_size()); \
-        layer_param->clear_crop_size(); \
-      } \
-      if (layer_param->has_mirror()) { \
-        transform_param->set_mirror(layer_param->mirror()); \
-        layer_param->clear_mirror(); \
-      } \
-    } \
-  } while (0)
-
-void UpgradeNetDataTransformation(NetParameter* net_param) {
-  for (int i = 0; i < net_param->layers_size(); ++i) {
-    CONVERT_LAYER_TRANSFORM_PARAM(DATA, Data, data);
-    CONVERT_LAYER_TRANSFORM_PARAM(IMAGE_DATA, ImageData, image_data);
-    CONVERT_LAYER_TRANSFORM_PARAM(WINDOW_DATA, WindowData, window_data);
-  }
-}
-
-bool UpgradeV1Net(const NetParameter& v1_net_param, NetParameter* net_param) {
-  if (v1_net_param.layer_size() > 0) {
-    LOG(FATAL) << "Refusing to upgrade inconsistent NetParameter input; "
-        << "the definition includes both 'layer' and 'layers' fields. "
-        << "The current format defines 'layer' fields with string type like "
-        << "layer { type: 'Layer' ... } and not layers { type: LAYER ... }. "
-        << "Manually switch the definition to 'layer' format to continue.";
-  }
-  bool is_fully_compatible = true;
-  net_param->CopyFrom(v1_net_param);
-  net_param->clear_layers();
-  net_param->clear_layer();
-  for (int i = 0; i < v1_net_param.layers_size(); ++i) {
-    if (!UpgradeV1LayerParameter(v1_net_param.layers(i),
-                                 net_param->add_layer())) {
-      LOG(ERROR) << "Upgrade of input layer " << i << " failed.";
-      is_fully_compatible = false;
-    }
-  }
-  return is_fully_compatible;
-}
-
-bool UpgradeV1LayerParameter(const V1LayerParameter& v1_layer_param,
-                             LayerParameter* layer_param) {
-  layer_param->Clear();
-  bool is_fully_compatible = true;
-  for (int i = 0; i < v1_layer_param.bottom_size(); ++i) {
-    layer_param->add_bottom(v1_layer_param.bottom(i));
-  }
-  for (int i = 0; i < v1_layer_param.top_size(); ++i) {
-    layer_param->add_top(v1_layer_param.top(i));
-  }
-  if (v1_layer_param.has_name()) {
-    layer_param->set_name(v1_layer_param.name());
-  }
-  for (int i = 0; i < v1_layer_param.include_size(); ++i) {
-    layer_param->add_include()->CopyFrom(v1_layer_param.include(i));
-  }
-  for (int i = 0; i < v1_layer_param.exclude_size(); ++i) {
-    layer_param->add_exclude()->CopyFrom(v1_layer_param.exclude(i));
-  }
-  if (v1_layer_param.has_type()) {
-    layer_param->set_type(UpgradeV1LayerType(v1_layer_param.type()));
-  }
-  for (int i = 0; i < v1_layer_param.blobs_size(); ++i) {
-    layer_param->add_blobs()->CopyFrom(v1_layer_param.blobs(i));
-  }
-  for (int i = 0; i < v1_layer_param.param_size(); ++i) {
-    while (layer_param->param_size() <= i) { layer_param->add_param(); }
-    layer_param->mutable_param(i)->set_name(v1_layer_param.param(i));
-  }
-  ParamSpec_DimCheckMode mode;
-  for (int i = 0; i < v1_layer_param.blob_share_mode_size(); ++i) {
-    while (layer_param->param_size() <= i) { layer_param->add_param(); }
-    switch (v1_layer_param.blob_share_mode(i)) {
-    case V1LayerParameter_DimCheckMode_STRICT:
-      mode = ParamSpec_DimCheckMode_STRICT;
-      break;
-    case V1LayerParameter_DimCheckMode_PERMISSIVE:
-      mode = ParamSpec_DimCheckMode_PERMISSIVE;
-      break;
-    default:
-      LOG(FATAL) << "Unknown blob_share_mode: "
-                 << v1_layer_param.blob_share_mode(i);
-      break;
-    }
-    layer_param->mutable_param(i)->set_share_mode(mode);
-  }
-  for (int i = 0; i < v1_layer_param.blobs_lr_size(); ++i) {
-    while (layer_param->param_size() <= i) { layer_param->add_param(); }
-    layer_param->mutable_param(i)->set_lr_mult(v1_layer_param.blobs_lr(i));
-  }
-  for (int i = 0; i < v1_layer_param.weight_decay_size(); ++i) {
-    while (layer_param->param_size() <= i) { layer_param->add_param(); }
-    layer_param->mutable_param(i)->set_decay_mult(
-        v1_layer_param.weight_decay(i));
-  }
-  for (int i = 0; i < v1_layer_param.loss_weight_size(); ++i) {
-    layer_param->add_loss_weight(v1_layer_param.loss_weight(i));
-  }
-  if (v1_layer_param.has_accuracy_param()) {
-    layer_param->mutable_accuracy_param()->CopyFrom(
-        v1_layer_param.accuracy_param());
-  }
-  if (v1_layer_param.has_argmax_param()) {
-    layer_param->mutable_argmax_param()->CopyFrom(
-        v1_layer_param.argmax_param());
-  }
-  if (v1_layer_param.has_concat_param()) {
-    layer_param->mutable_concat_param()->CopyFrom(
-        v1_layer_param.concat_param());
-  }
-  if (v1_layer_param.has_contrastive_loss_param()) {
-    layer_param->mutable_contrastive_loss_param()->CopyFrom(
-        v1_layer_param.contrastive_loss_param());
-  }
-  if (v1_layer_param.has_convolution_param()) {
-    layer_param->mutable_convolution_param()->CopyFrom(
-        v1_layer_param.convolution_param());
-  }
-  if (v1_layer_param.has_data_param()) {
-    layer_param->mutable_data_param()->CopyFrom(
-        v1_layer_param.data_param());
-  }
-  if (v1_layer_param.has_dropout_param()) {
-    layer_param->mutable_dropout_param()->CopyFrom(
-        v1_layer_param.dropout_param());
-  }
-  if (v1_layer_param.has_dummy_data_param()) {
-    layer_param->mutable_dummy_data_param()->CopyFrom(
-        v1_layer_param.dummy_data_param());
-  }
-  if (v1_layer_param.has_eltwise_param()) {
-    layer_param->mutable_eltwise_param()->CopyFrom(
-        v1_layer_param.eltwise_param());
-  }
-  if (v1_layer_param.has_exp_param()) {
-    layer_param->mutable_exp_param()->CopyFrom(
-        v1_layer_param.exp_param());
-  }
-  if (v1_layer_param.has_hdf5_data_param()) {
-    layer_param->mutable_hdf5_data_param()->CopyFrom(
-        v1_layer_param.hdf5_data_param());
-  }
-  if (v1_layer_param.has_hdf5_output_param()) {
-    layer_param->mutable_hdf5_output_param()->CopyFrom(
-        v1_layer_param.hdf5_output_param());
-  }
-  if (v1_layer_param.has_hinge_loss_param()) {
-    layer_param->mutable_hinge_loss_param()->CopyFrom(
-        v1_layer_param.hinge_loss_param());
-  }
-  if (v1_layer_param.has_image_data_param()) {
-    layer_param->mutable_image_data_param()->CopyFrom(
-        v1_layer_param.image_data_param());
-  }
-  if (v1_layer_param.has_infogain_loss_param()) {
-    layer_param->mutable_infogain_loss_param()->CopyFrom(
-        v1_layer_param.infogain_loss_param());
-  }
-  if (v1_layer_param.has_inner_product_param()) {
-    layer_param->mutable_inner_product_param()->CopyFrom(
-        v1_layer_param.inner_product_param());
-  }
-  if (v1_layer_param.has_lrn_param()) {
-    layer_param->mutable_lrn_param()->CopyFrom(
-        v1_layer_param.lrn_param());
-  }
-  if (v1_layer_param.has_memory_data_param()) {
-    layer_param->mutable_memory_data_param()->CopyFrom(
-        v1_layer_param.memory_data_param());
-  }
-  if (v1_layer_param.has_mvn_param()) {
-    layer_param->mutable_mvn_param()->CopyFrom(
-        v1_layer_param.mvn_param());
-  }
-  if (v1_layer_param.has_pooling_param()) {
-    layer_param->mutable_pooling_param()->CopyFrom(
-        v1_layer_param.pooling_param());
-  }
-  if (v1_layer_param.has_power_param()) {
-    layer_param->mutable_power_param()->CopyFrom(
-        v1_layer_param.power_param());
-  }
-  if (v1_layer_param.has_relu_param()) {
-    layer_param->mutable_relu_param()->CopyFrom(
-        v1_layer_param.relu_param());
-  }
-  if (v1_layer_param.has_sigmoid_param()) {
-    layer_param->mutable_sigmoid_param()->CopyFrom(
-        v1_layer_param.sigmoid_param());
-  }
-  if (v1_layer_param.has_softmax_param()) {
-    layer_param->mutable_softmax_param()->CopyFrom(
-        v1_layer_param.softmax_param());
-  }
-  if (v1_layer_param.has_slice_param()) {
-    layer_param->mutable_slice_param()->CopyFrom(
-        v1_layer_param.slice_param());
-  }
-  if (v1_layer_param.has_tanh_param()) {
-    layer_param->mutable_tanh_param()->CopyFrom(
-        v1_layer_param.tanh_param());
-  }
-  if (v1_layer_param.has_threshold_param()) {
-    layer_param->mutable_threshold_param()->CopyFrom(
-        v1_layer_param.threshold_param());
-  }
-  if (v1_layer_param.has_window_data_param()) {
-    layer_param->mutable_window_data_param()->CopyFrom(
-        v1_layer_param.window_data_param());
-  }
-  if (v1_layer_param.has_transform_param()) {
-    layer_param->mutable_transform_param()->CopyFrom(
-        v1_layer_param.transform_param());
-  }
-  if (v1_layer_param.has_loss_param()) {
-    layer_param->mutable_loss_param()->CopyFrom(
-        v1_layer_param.loss_param());
-  }
-  if (v1_layer_param.has_layer()) {
-    LOG(ERROR) << "Input NetParameter has V0 layer -- ignoring.";
-    is_fully_compatible = false;
-  }
-  return is_fully_compatible;
-}
-
-const char* UpgradeV1LayerType(const V1LayerParameter_LayerType type) {
-  switch (type) {
-  case V1LayerParameter_LayerType_NONE:
-    return "";
-  case V1LayerParameter_LayerType_ABSVAL:
-    return "AbsVal";
-  case V1LayerParameter_LayerType_ACCURACY:
-    return "Accuracy";
-  case V1LayerParameter_LayerType_ARGMAX:
-    return "ArgMax";
-  case V1LayerParameter_LayerType_BNLL:
-    return "BNLL";
-  case V1LayerParameter_LayerType_CONCAT:
-    return "Concat";
-  case V1LayerParameter_LayerType_CONTRASTIVE_LOSS:
-    return "ContrastiveLoss";
-  case V1LayerParameter_LayerType_CONVOLUTION:
-    return "Convolution";
-  case V1LayerParameter_LayerType_DECONVOLUTION:
-    return "Deconvolution";
-  case V1LayerParameter_LayerType_DATA:
-    return "Data";
-  case V1LayerParameter_LayerType_DROPOUT:
-    return "Dropout";
-  case V1LayerParameter_LayerType_DUMMY_DATA:
-    return "DummyData";
-  case V1LayerParameter_LayerType_EUCLIDEAN_LOSS:
-    return "EuclideanLoss";
-  case V1LayerParameter_LayerType_ELTWISE:
-    return "Eltwise";
-  case V1LayerParameter_LayerType_EXP:
-    return "Exp";
-  case V1LayerParameter_LayerType_FLATTEN:
-    return "Flatten";
-  case V1LayerParameter_LayerType_HDF5_DATA:
-    return "HDF5Data";
-  case V1LayerParameter_LayerType_HDF5_OUTPUT:
-    return "HDF5Output";
-  case V1LayerParameter_LayerType_HINGE_LOSS:
-    return "HingeLoss";
-  case V1LayerParameter_LayerType_IM2COL:
-    return "Im2col";
-  case V1LayerParameter_LayerType_IMAGE_DATA:
-    return "ImageData";
-  case V1LayerParameter_LayerType_INFOGAIN_LOSS:
-    return "InfogainLoss";
-  case V1LayerParameter_LayerType_INNER_PRODUCT:
-    return "InnerProduct";
-  case V1LayerParameter_LayerType_LRN:
-    return "LRN";
-  case V1LayerParameter_LayerType_MEMORY_DATA:
-    return "MemoryData";
-  case V1LayerParameter_LayerType_MULTINOMIAL_LOGISTIC_LOSS:
-    return "MultinomialLogisticLoss";
-  case V1LayerParameter_LayerType_MVN:
-    return "MVN";
-  case V1LayerParameter_LayerType_POOLING:
-    return "Pooling";
-  case V1LayerParameter_LayerType_POWER:
-    return "Power";
-  case V1LayerParameter_LayerType_RELU:
-    return "ReLU";
-  case V1LayerParameter_LayerType_SIGMOID:
-    return "Sigmoid";
-  case V1LayerParameter_LayerType_SIGMOID_CROSS_ENTROPY_LOSS:
-    return "SigmoidCrossEntropyLoss";
-  case V1LayerParameter_LayerType_SILENCE:
-    return "Silence";
-  case V1LayerParameter_LayerType_SOFTMAX:
-    return "Softmax";
-  case V1LayerParameter_LayerType_SOFTMAX_LOSS:
-    return "SoftmaxWithLoss";
-  case V1LayerParameter_LayerType_SPLIT:
-    return "Split";
-  case V1LayerParameter_LayerType_SLICE:
-    return "Slice";
-  case V1LayerParameter_LayerType_TANH:
-    return "TanH";
-  case V1LayerParameter_LayerType_WINDOW_DATA:
-    return "WindowData";
-  case V1LayerParameter_LayerType_THRESHOLD:
-    return "Threshold";
-  default:
-    LOG(FATAL) << "Unknown V1LayerParameter layer type: " << type;
-    return "";
-  }
-}
-
-bool NetNeedsInputUpgrade(const NetParameter& net_param) {
-  return net_param.input_size() > 0;
-}
-
-void UpgradeNetInput(NetParameter* net_param) {
-  // Collect inputs and convert to Input layer definitions.
-  // If the NetParameter holds an input alone, without shape/dim, then
-  // it's a legacy caffemodel and simply stripping the input field is enough.
-  bool has_shape = net_param->input_shape_size() > 0;
-  bool has_dim = net_param->input_dim_size() > 0;
-  if (has_shape || has_dim) {
-    LayerParameter* layer_param = net_param->add_layer();
-    layer_param->set_name("input");
-    layer_param->set_type("Input");
-    InputParameter* input_param = layer_param->mutable_input_param();
-    // Convert input fields into a layer.
-    for (int i = 0; i < net_param->input_size(); ++i) {
-      layer_param->add_top(net_param->input(i));
-      if (has_shape) {
-        input_param->add_shape()->CopyFrom(net_param->input_shape(i));
-      } else {
-        // Turn legacy input dimensions into shape.
-        BlobShape* shape = input_param->add_shape();
-        int first_dim = i*4;
-        int last_dim = first_dim + 4;
-        for (int j = first_dim; j < last_dim; j++) {
-          shape->add_dim(net_param->input_dim(j));
-        }
-      }
-    }
-    // Swap input layer to beginning of net to satisfy layer dependencies.
-    for (int i = net_param->layer_size() - 1; i > 0; --i) {
-      net_param->mutable_layer(i-1)->Swap(net_param->mutable_layer(i));
-    }
-  }
-  // Clear inputs.
-  net_param->clear_input();
-  net_param->clear_input_shape();
-  net_param->clear_input_dim();
-}
-
-bool NetNeedsBatchNormUpgrade(const NetParameter& net_param) {
-  for (int i = 0; i < net_param.layer_size(); ++i) {
-    // Check if BatchNorm layers declare three parameters, as required by
-    // the previous BatchNorm layer definition.
-    if (net_param.layer(i).type() == "BatchNorm"
-        && net_param.layer(i).param_size() == 3) {
-      return true;
-    }
-  }
-  return false;
-}
-
-void UpgradeNetBatchNorm(NetParameter* net_param) {
-  for (int i = 0; i < net_param->layer_size(); ++i) {
-    // Check if BatchNorm layers declare three parameters, as required by
-    // the previous BatchNorm layer definition.
-    if (net_param->layer(i).type() == "BatchNorm"
-        && net_param->layer(i).param_size() == 3) {
-      // set lr_mult and decay_mult to zero. leave all other param intact.
-      for (int ip = 0; ip < net_param->layer(i).param_size(); ip++) {
-        ParamSpec* fixed_param_spec =
-          net_param->mutable_layer(i)->mutable_param(ip);
-        fixed_param_spec->set_lr_mult(0.f);
-        fixed_param_spec->set_decay_mult(0.f);
-      }
-    }
-  }
-}
-
-// Return true iff the solver contains any old solver_type specified as enums
-bool SolverNeedsTypeUpgrade(const SolverParameter& solver_param) {
-  if (solver_param.has_solver_type()) {
-    return true;
-  }
-  return false;
-}
-
-bool UpgradeSolverType(SolverParameter* solver_param) {
-  CHECK(!solver_param->has_solver_type() || !solver_param->has_type())
-      << "Failed to upgrade solver: old solver_type field (enum) and new type "
-      << "field (string) cannot be both specified in solver proto text.";
-  if (solver_param->has_solver_type()) {
-    string type;
-    switch (solver_param->solver_type()) {
-    case SolverParameter_SolverType_SGD:
-      type = "SGD";
-      break;
-    case SolverParameter_SolverType_NESTEROV:
-      type = "Nesterov";
-      break;
-    case SolverParameter_SolverType_ADAGRAD:
-      type = "AdaGrad";
-      break;
-    case SolverParameter_SolverType_RMSPROP:
-      type = "RMSProp";
-      break;
-    case SolverParameter_SolverType_ADADELTA:
-      type = "AdaDelta";
-      break;
-    case SolverParameter_SolverType_ADAM:
-      type = "Adam";
-      break;
-    default:
-      LOG(FATAL) << "Unknown SolverParameter solver_type: " << type;
-    }
-    solver_param->set_type(type);
-    solver_param->clear_solver_type();
-  } else {
-    LOG(ERROR) << "Warning: solver type already up to date. ";
-    return false;
-  }
-  return true;
-}
-
-// Check for deprecations and upgrade the SolverParameter as needed.
-bool UpgradeSolverAsNeeded(const string& param_file, SolverParameter* param) {
-  bool success = true;
-  // Try to upgrade old style solver_type enum fields into new string type
-  if (SolverNeedsTypeUpgrade(*param)) {
-    LOG(INFO) << "Attempting to upgrade input file specified using deprecated "
-              << "'solver_type' field (enum)': " << param_file;
-    if (!UpgradeSolverType(param)) {
-      success = false;
-      LOG(ERROR) << "Warning: had one or more problems upgrading "
-                 << "SolverType (see above).";
-    } else {
-      LOG(INFO) << "Successfully upgraded file specified using deprecated "
-                << "'solver_type' field (enum) to 'type' field (string).";
-      LOG(WARNING) << "Note that future Caffe releases will only support "
-                   << "'type' field (string) for a solver's type.";
-    }
-  }
-  return success;
-}
-
-// Read parameters from a file into a SolverParameter proto message.
-void ReadSolverParamsFromTextFileOrDie(const string& param_file,
-                                       SolverParameter* param) {
-  CHECK(ReadProtoFromTextFile(param_file, param))
-      << "Failed to parse SolverParameter file: " << param_file;
-  UpgradeSolverAsNeeded(param_file, param);
-}
-
-}  // namespace caffe
diff --git a/third_party/android-cmake b/third_party/android-cmake
deleted file mode 160000
index 556cc14..0000000
--- a/third_party/android-cmake
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 556cc14296c226f753a3778d99d8b60778b7df4f
diff --git a/third_party/build-openblas.sh b/third_party/build-openblas.sh
deleted file mode 100755
index 03b3f11..0000000
--- a/third_party/build-openblas.sh
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/bin/bash
-
-PLATFORM=Android
-
-# Options for All
-OPENBLAS_VERSION=0.2.19
-MAKE_FLAGS="$MAKE_FLAGS -j 4"
-BUILD_DIR=".cbuild"
-
-# Options for Android
-ANDROID_NATIVE_API_LEVEL=21
-# Options: "arm64-v8a" "armeabi-v7a with NEON"
-ANDROID_ABI="armeabi-v7a with NEON"
-
-# Build Environment
-if [ "$(uname)" = "Darwin" ]; then
-    OS=darwin
-elif [ "$(expr substr $(uname -s) 1 5)" = "Linux" ]; then
-    OS=linux
-elif [ "$(expr substr $(uname -s) 1 10)" = "MINGW32_NT" ||
-       "$(expr substr $(uname -s) 1 9)" = "CYGWIN_NT" ]; then
-    OS=windows
-else
-    echo "Unknown OS"
-    exit 1
-fi
-
-if [ "$(uname -m)" = "x86_64"  ]; then
-    BIT=x86_64
-else
-    BIT=x86
-fi
-
-echo "$(tput setaf 2)"
-echo Building Openblas for $PLATFORM
-echo "$(tput sgr0)"
-
-RUN_DIR=$PWD
-
-function fetch-OpenBLAS {
-    echo "$(tput setaf 2)"
-    echo "##########################################"
-    echo " Fetch Openblas $OPENBLAS_VERSION from source."
-    echo "##########################################"
-    echo "$(tput sgr0)"
-
-    if [ ! -f OpenBLAS-${OPENBLAS_VERSION}.tar.gz ]; then
-        curl -L https://github.com/xianyi/OpenBLAS/archive/v${OPENBLAS_VERSION}.tar.gz --output OpenBLAS-${OPENBLAS_VERSION}.tar.gz
-    fi
-    if [ -d OpenBLAS-${OPENBLAS_VERSION} ]; then
-        rm -rf OpenBLAS-${OPENBLAS_VERSION}
-    fi
-    tar -xzf OpenBLAS-${OPENBLAS_VERSION}.tar.gz
-}
-
-function build-Android {
-    echo "$(tput setaf 2)"
-    echo "#####################"
-    echo " Building OpenBLAS for $PLATFORM"
-    echo "#####################"
-    echo "$(tput sgr0)"
-
-    # Test ENV NDK_HOME
-    if [ ! -d "$NDK_HOME" ]; then
-        echo "$(tput setaf 2)"
-        echo "###########################################################"
-        echo " ERROR: Invalid NDK_HOME=\"$NDK_HOME\" env variable, exit. "
-        echo "###########################################################"
-        echo "$(tput sgr0)"
-        exit 1
-    fi
-
-    if [ "${ANDROID_ABI}" = "armeabi-v7a with NEON" ]; then
-        CROSS_SUFFIX=$NDK_HOME/toolchains/arm-linux-androideabi-4.9/prebuilt/${OS}-${BIT}/bin/arm-linux-androideabi-
-        SYSROOT=$NDK_HOME/platforms/android-$ANDROID_NATIVE_API_LEVEL/arch-arm
-        TARGET=ARMV7
-        BINARY=32
-    elif [ "${ANDROID_ABI}" = "arm64-v8a" ]; then
-        CROSS_SUFFIX=$NDK_HOME/toolchains/aarch64-linux-android-4.9/prebuilt/${OS}-${BIT}/bin/aarch64-linux-android-
-        SYSROOT=$NDK_HOME/platforms/android-$ANDROID_NATIVE_API_LEVEL/arch-arm64
-        TARGET=ARMV8
-        BINARY=64
-    else
-        echo "Error: not support OpenBLAS for ABI: ${ANDROID_ABI}"
-        exit 1
-    fi
-
-    mkdir -p OpenBLAS-$TARGET
-    cd OpenBLAS-$OPENBLAS_VERSION
-    make ${MAKE_FLAGS} \
-        NOFORTRAN=1 \
-        NO_NOLAPACKE=1 \
-        SMP=1 \
-        USE_THREAD=1 \
-        NUM_THREAD=4 \
-        CROSS_SUFFIX="$CROSS_SUFFIX" \
-        CC="${CROSS_SUFFIX}gcc --sysroot=$SYSROOT" \
-        HOSTCC=gcc \
-        TARGET=$TARGET \
-        BINARY=$BINARY \
-        PREFIX="../OpenBLAS-$TARGET"
-    make ${MAKE_FLAGS} \
-        NOFORTRAN=1 \
-        NO_NOLAPACKE=1 \
-        SMP=1 \
-        USE_THREAD=1 \
-        NUM_THREAD=4 \
-        CROSS_SUFFIX="$CROSS_SUFFIX" \
-        CC="${CROSS_SUFFIX}gcc --sysroot=$SYSROOT" \
-        HOSTCC=gcc \
-        TARGET=$TARGET \
-        BINARY=$BINARY \
-        PREFIX="../OpenBLAS-$TARGET" \
-        install
-    cd ..
-}
-
-fetch-OpenBLAS
-build-$PLATFORM
diff --git a/third_party/build-protobuf-3.1.0.sh b/third_party/build-protobuf-3.1.0.sh
deleted file mode 100755
index caf9a84..0000000
--- a/third_party/build-protobuf-3.1.0.sh
+++ /dev/null
@@ -1,184 +0,0 @@
-#!/bin/bash
-
-# TARGET: Linux, Android, iPhoneOS, iPhoneSimulator, MacOSX
-if [ "$1" = "" ]; then
-    TARGET=Linux
-else
-    TARGET=$1
-fi
-
-# Options for All
-PB_VERSION=3.1.0
-MAKE_FLAGS="$MAKE_FLAGS -j 4"
-BUILD_DIR=".cbuild"
-
-# Options for Android
-ANDROID_ABI="armeabi-v7a with NEON"
-ANDROID_NATIVE_API_LEVEL=21
-BUILD_PROTOC=OFF
-
-echo "$(tput setaf 2)"
-echo Building Google Protobuf for $TARGET
-echo "$(tput sgr0)"
-
-RUN_DIR=$PWD
-
-function fetch-protobuf {
-    echo "$(tput setaf 2)"
-    echo "##########################################"
-    echo " Fetch Google Protobuf $PB_VERSION from source."
-    echo "##########################################"
-    echo "$(tput sgr0)"
-
-    if [ ! -f protobuf-${PB_VERSION}.tar.gz ]; then
-        curl -L https://github.com/google/protobuf/archive/v${PB_VERSION}.tar.gz --output protobuf-${PB_VERSION}.tar.gz
-    fi
-    if [ -d protobuf-${PB_VERSION} ]; then
-        rm -rf protobuf-${PB_VERSION}
-    fi
-    tar -xzf protobuf-${PB_VERSION}.tar.gz
-}
-
-function build-Linux {
-    echo "$(tput setaf 2)"
-    echo "#####################"
-    echo " Building protobuf for $TARGET"
-    echo "#####################"
-    echo "$(tput sgr0)"
-
-    mkdir -p protobuf-$PB_VERSION/$BUILD_DIR
-    rm -rf protobuf-$PB_VERSION/$BUILD_DIR/*
-    cd protobuf-$PB_VERSION/$BUILD_DIR
-    cmake ../cmake -DCMAKE_INSTALL_PREFIX=../../protobuf-$TARGET \
-        -Dprotobuf_BUILD_TESTS=OFF \
-        -Dprotobuf_BUILD_SHARED_LIBS=OFF \
-        -Dprotobuf_WITH_ZLIB=OFF
-    make ${MAKE_FLAGS}
-    make install
-    mkdir -p ../../protobuf
-    cd ../../protobuf
-    rm -f lib include bin
-    ln -s ../protobuf-$TARGET/lib lib
-    ln -s ../protobuf-$TARGET/include include
-    ln -s ../protobuf-$TARGET/bin bin
-    cd ..
-}
-
-function build-MacOSX {
-    build-Linux
-}
-
-function build-Android {
-    echo "$(tput setaf 2)"
-    echo "#####################"
-    echo " Building protobuf for $TARGET"
-    echo "#####################"
-    echo "$(tput sgr0)"
-
-    # Test ENV NDK_HOME
-    if [ ! -d "$NDK_HOME" ]; then
-        echo "$(tput setaf 2)"
-        echo "###########################################################"
-        echo " ERROR: Invalid NDK_HOME=\"$NDK_HOME\" env variable, exit. "
-        echo "###########################################################"
-        echo "$(tput sgr0)"
-        exit 1
-    fi
-
-    mkdir -p protobuf-$PB_VERSION/$BUILD_DIR
-    rm -rf protobuf-$PB_VERSION/$BUILD_DIR/*
-    cd protobuf-$PB_VERSION/$BUILD_DIR
-    # if [ "$BUILD_PROTOC" = "OFF" ]; then
-    #     # Do not cross build protoc
-    #     sed -i "s/include(libprotoc.cmake)/#include(libprotoc.cmake)/" ../cmake/CMakeLists.txt
-    #     sed -i "s/include(protoc.cmake)/#include(protoc.cmake)/" ../cmake/CMakeLists.txt
-    # fi
-    cmake ../cmake -DCMAKE_INSTALL_PREFIX=../../protobuf-$TARGET\
-        -DCMAKE_TOOLCHAIN_FILE="../../android-cmake/android.toolchain.cmake" \
-        -DANDROID_NDK="$NDK_HOME" \
-        -DANDROID_ABI="$ANDROID_ABI" \
-        -DANDROID_NATIVE_API_LEVEL="$ANDROID_NATIVE_API_LEVEL" \
-        -Dprotobuf_BUILD_TESTS=OFF \
-        -Dprotobuf_BUILD_SHARED_LIBS=OFF \
-        -Dprotobuf_WITH_ZLIB=OFF
-    make ${MAKE_FLAGS}
-    make install
-    mkdir -p ../../protobuf
-    cd ../../protobuf
-    rm -f lib include
-    ln -s ../protobuf-$TARGET/lib lib
-    ln -s ../protobuf-$TARGET/include include
-    cd ..
-}
-
-function build-iPhoneSimulator {
-    echo "$(tput setaf 2)"
-    echo "#####################"
-    echo " Building protobuf for $TARGET"
-    echo "#####################"
-    echo "$(tput sgr0)"
-
-    mkdir -p protobuf-$PB_VERSION/$BUILD_DIR
-    rm -rf protobuf-$PB_VERSION/$BUILD_DIR/*
-    cd protobuf-$PB_VERSION/$BUILD_DIR
-    # if [ "$BUILD_PROTOC" = "OFF" ]; then
-    #     # Do not cross build protoc
-    #     sed -i "s/include(libprotoc.cmake)/#include(libprotoc.cmake)/" ../cmake/CMakeLists.txt
-    #     sed -i "s/include(protoc.cmake)/#include(protoc.cmake)/" ../cmake/CMakeLists.txt
-    # fi
-    cmake ../cmake -DCMAKE_INSTALL_PREFIX=../../protobuf-$TARGET\
-        -DCMAKE_TOOLCHAIN_FILE="../../ios-cmake/toolchain/iOS.cmake" \
-        -DIOS_PLATFORM=SIMULATOR \
-        -Dprotobuf_BUILD_TESTS=OFF \
-        -Dprotobuf_BUILD_SHARED_LIBS=OFF \
-        -Dprotobuf_WITH_ZLIB=OFF
-    make ${MAKE_FLAGS}
-    make install
-    mkdir -p ../../protobuf
-    cd ../../protobuf
-    rm -f lib include
-    ln -s ../protobuf-$TARGET/lib lib
-    ln -s ../protobuf-$TARGET/include include
-    cd ..
-}
-
-function build-iPhoneOS {
-    echo "$(tput setaf 2)"
-    echo "#####################"
-    echo " Building protobuf for $TARGET"
-    echo "#####################"
-    echo "$(tput sgr0)"
-
-    mkdir -p protobuf-$PB_VERSION/$BUILD_DIR
-    rm -rf protobuf-$PB_VERSION/$BUILD_DIR/*
-    cd protobuf-$PB_VERSION/$BUILD_DIR
-    # if [ "$BUILD_PROTOC" = "OFF" ]; then
-    #     # Do not cross build protoc
-    #     sed -i "s/include(libprotoc.cmake)/#include(libprotoc.cmake)/" ../cmake/CMakeLists.txt
-    #     sed -i "s/include(protoc.cmake)/#include(protoc.cmake)/" ../cmake/CMakeLists.txt
-    # fi
-    cmake ../cmake -DCMAKE_INSTALL_PREFIX=../../protobuf-$TARGET\
-        -DCMAKE_TOOLCHAIN_FILE="../../ios-cmake/toolchain/iOS.cmake" \
-        -DIOS_PLATFORM=OS \
-        -DCMAKE_CXX_FLAGS="-fembed-bitcode" \
-        -Dprotobuf_BUILD_TESTS=OFF \
-        -Dprotobuf_BUILD_SHARED_LIBS=OFF \
-        -Dprotobuf_WITH_ZLIB=OFF
-    make ${MAKE_FLAGS}
-    make install
-    mkdir -p ../../protobuf
-    cd ../../protobuf
-    rm -f lib include
-    ln -s ../protobuf-$TARGET/lib lib
-    ln -s ../protobuf-$TARGET/include include
-    cd ..
-}
-
-fetch-protobuf
-if [ "$TARGET" != "Linux" -a "$TARGET" != "MacOSX" ]; then
-    TARGET_SAVE=$TARGET
-    TARGET=Linux
-    build-$TARGET
-    TARGET=$TARGET_SAVE
-fi
-build-$TARGET
diff --git a/third_party/ios-cmake b/third_party/ios-cmake
deleted file mode 160000
index e3a7695..0000000
--- a/third_party/ios-cmake
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit e3a7695d1d68ef4eca716031f94e8475b1589b1f
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
deleted file mode 100644
index b101bf2..0000000
--- a/tools/CMakeLists.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-# Collect source files
-file(GLOB_RECURSE srcs ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
-
-include(../cmake/Modules/FindGFlags.cmake)
-if(GFLAGS_FOUND)
-    include_directories(${GFLAGS_INCLUDE_DIRS})
-    list(APPEND Caffe_LINK ${GFLAGS_LIBRARIES})
-endif()
-
-# Build each source file independently
-foreach(source ${srcs})
-  get_filename_component(name ${source} NAME_WE)
-
-  # caffe target already exits
-  if(name MATCHES "caffe")
-    set(name ${name}.bin)
-  endif()
-
-  # target
-  add_executable(${name} ${source})
-  target_link_libraries(${name} ${Caffe_LINK})
-  caffe_default_properties(${name})
-
-  # set back RUNTIME_OUTPUT_DIRECTORY
-  caffe_set_runtime_directory(${name} "${PROJECT_BINARY_DIR}/tools")
-  caffe_set_solution_folder(${name} tools)
-
-  # restore output name without suffix
-  if(name MATCHES "caffe.bin")
-    set_target_properties(${name} PROPERTIES OUTPUT_NAME caffe)
-  endif()
-
-  # Install
-  install(TARGETS ${name} DESTINATION bin)
-endforeach(source)
diff --git a/tools/caffe.cpp b/tools/caffe.cpp
deleted file mode 100644
index 99411a0..0000000
--- a/tools/caffe.cpp
+++ /dev/null
@@ -1,488 +0,0 @@
-#ifdef WITH_PYTHON_LAYER
-#include "boost/python.hpp"
-namespace bp = boost::python;
-#endif
-
-#include <cstring>
-#include <map>
-#include <string>
-#include <vector>
-
-#ifdef USE_BOOST
-#include "boost/algorithm/string.hpp"
-#endif
-#include "caffe/caffe.hpp"
-#ifdef NO_CAFFE_MOBILE
-#include "caffe/util/signal_handler.h"
-#else
-#include "caffe/util/benchmark.hpp"
-#include <gflags/gflags.h>
-#endif
-
-using caffe::Blob;
-using caffe::Caffe;
-using caffe::Net;
-using caffe::Layer;
-#ifdef NO_CAFFE_MOBILE
-using caffe::Solver;
-#endif
-using caffe::shared_ptr;
-using caffe::string;
-using caffe::Timer;
-using caffe::vector;
-using std::ostringstream;
-
-DEFINE_string(gpu, "",
-    "Optional; run in GPU mode on given device IDs separated by ','."
-    "Use '-gpu all' to run on all available GPUs. The effective training "
-    "batch size is multiplied by the number of devices.");
-DEFINE_string(solver, "",
-    "The solver definition protocol buffer text file.");
-DEFINE_string(model, "",
-    "The model definition protocol buffer text file.");
-DEFINE_string(phase, "",
-    "Optional; network phase (TRAIN or TEST). Only used for 'time'.");
-DEFINE_int32(level, 0,
-    "Optional; network level.");
-DEFINE_string(stage, "",
-    "Optional; network stages (not to be confused with phase), "
-    "separated by ','.");
-DEFINE_string(snapshot, "",
-    "Optional; the snapshot solver state to resume training.");
-DEFINE_string(weights, "",
-    "Optional; the pretrained weights to initialize finetuning, "
-    "separated by ','. Cannot be set simultaneously with snapshot.");
-DEFINE_int32(iterations, 50,
-    "The number of iterations to run.");
-DEFINE_string(sigint_effect, "stop",
-             "Optional; action to take when a SIGINT signal is received: "
-              "snapshot, stop or none.");
-DEFINE_string(sighup_effect, "snapshot",
-             "Optional; action to take when a SIGHUP signal is received: "
-             "snapshot, stop or none.");
-DEFINE_int32(alsologtostderr, 1, "");
-
-// A simple registry for caffe commands.
-typedef int (*BrewFunction)();
-typedef std::map<caffe::string, BrewFunction> BrewMap;
-BrewMap g_brew_map;
-
-#define RegisterBrewFunction(func) \
-namespace { \
-class __Registerer_##func { \
- public: /* NOLINT */ \
-  __Registerer_##func() { \
-    g_brew_map[#func] = &func; \
-  } \
-}; \
-__Registerer_##func g_registerer_##func; \
-}
-
-static BrewFunction GetBrewFunction(const caffe::string& name) {
-  if (g_brew_map.count(name)) {
-    return g_brew_map[name];
-  } else {
-    LOG(ERROR) << "Available caffe actions:";
-    for (BrewMap::iterator it = g_brew_map.begin();
-         it != g_brew_map.end(); ++it) {
-      LOG(ERROR) << "\t" << it->first;
-    }
-    LOG(FATAL) << "Unknown action: " << name;
-    return NULL;  // not reachable, just to suppress old compiler warnings.
-  }
-}
-
-// Parse GPU ids or use all available devices
-static void get_gpus(vector<int>* gpus) {
-#ifndef CPU_ONLY
-  if (FLAGS_gpu == "all") {
-    int count = 0;
-#ifndef CPU_ONLY
-    CUDA_CHECK(cudaGetDeviceCount(&count));
-#else
-    NO_GPU;
-#endif
-    for (int i = 0; i < count; ++i) {
-      gpus->push_back(i);
-    }
-  } else if (FLAGS_gpu.size()) {
-    vector<string> strings;
-    boost::split(strings, FLAGS_gpu, boost::is_any_of(","));
-    for (int i = 0; i < strings.size(); ++i) {
-      gpus->push_back(boost::lexical_cast<int>(strings[i]));
-    }
-  } else {
-    CHECK_EQ(gpus->size(), 0);
-  }
-#endif
-}
-
-// Parse phase from flags
-caffe::Phase get_phase_from_flags(caffe::Phase default_value) {
-  if (FLAGS_phase == "")
-    return default_value;
-  if (FLAGS_phase == "TRAIN")
-    return caffe::TRAIN;
-  if (FLAGS_phase == "TEST")
-    return caffe::TEST;
-  LOG(FATAL) << "phase must be \"TRAIN\" or \"TEST\"";
-  return caffe::TRAIN;  // Avoid warning
-}
-
-// Parse stages from flags
-vector<string> get_stages_from_flags() {
-  vector<string> stages;
-#ifdef USE_BOOST
-  boost::split(stages, FLAGS_stage, boost::is_any_of(","));
-#else
-  stages.push_back("TEST");
-#endif
-  return stages;
-}
-
-// caffe commands to call by
-//     caffe <command> <args>
-//
-// To add a command, define a function "int command()" and register it with
-// RegisterBrewFunction(action);
-
-// Device Query: show diagnostic information for a GPU device.
-int device_query() {
-#ifdef NO_CAFFE_MOBILE
-  LOG(INFO) << "Querying GPUs " << FLAGS_gpu;
-  vector<int> gpus;
-  get_gpus(&gpus);
-  for (int i = 0; i < gpus.size(); ++i) {
-    caffe::Caffe::SetDevice(gpus[i]);
-    caffe::Caffe::DeviceQuery();
-  }
-#endif
-  return 0;
-}
-RegisterBrewFunction(device_query);
-
-#ifdef NO_CAFFE_MOBILE
-// Load the weights from the specified caffemodel(s) into the train and
-// test nets.
-void CopyLayers(caffe::Solver<float>* solver, const std::string& model_list) {
-  std::vector<std::string> model_names;
-  boost::split(model_names, model_list, boost::is_any_of(",") );
-  for (int i = 0; i < model_names.size(); ++i) {
-    LOG(INFO) << "Finetuning from " << model_names[i];
-    solver->net()->CopyTrainedLayersFrom(model_names[i]);
-    for (int j = 0; j < solver->test_nets().size(); ++j) {
-      solver->test_nets()[j]->CopyTrainedLayersFrom(model_names[i]);
-    }
-  }
-}
-
-
-// Translate the signal effect the user specified on the command-line to the
-// corresponding enumeration.
-caffe::SolverAction::Enum GetRequestedAction(
-    const std::string& flag_value) {
-  if (flag_value == "stop") {
-    return caffe::SolverAction::STOP;
-  }
-  if (flag_value == "snapshot") {
-    return caffe::SolverAction::SNAPSHOT;
-  }
-  if (flag_value == "none") {
-    return caffe::SolverAction::NONE;
-  }
-  LOG(FATAL) << "Invalid signal effect \""<< flag_value << "\" was specified";
-}
-#endif
-
-// Train / Finetune a model.
-int train() {
-#ifdef NO_CAFFE_MOBILE
-  CHECK_GT(FLAGS_solver.size(), 0) << "Need a solver definition to train.";
-  CHECK(!FLAGS_snapshot.size() || !FLAGS_weights.size())
-      << "Give a snapshot to resume training or weights to finetune "
-      "but not both.";
-  vector<string> stages = get_stages_from_flags();
-
-  caffe::SolverParameter solver_param;
-  caffe::ReadSolverParamsFromTextFileOrDie(FLAGS_solver, &solver_param);
-
-  solver_param.mutable_train_state()->set_level(FLAGS_level);
-  for (int i = 0; i < stages.size(); i++) {
-    solver_param.mutable_train_state()->add_stage(stages[i]);
-  }
-
-  // If the gpus flag is not provided, allow the mode and device to be set
-  // in the solver prototxt.
-  if (FLAGS_gpu.size() == 0
-      && solver_param.has_solver_mode()
-      && solver_param.solver_mode() == caffe::SolverParameter_SolverMode_GPU) {
-      if (solver_param.has_device_id()) {
-          FLAGS_gpu = "" +
-              boost::lexical_cast<string>(solver_param.device_id());
-      } else {  // Set default GPU if unspecified
-          FLAGS_gpu = "" + boost::lexical_cast<string>(0);
-      }
-  }
-
-  vector<int> gpus;
-  get_gpus(&gpus);
-  if (gpus.size() == 0) {
-    LOG(INFO) << "Use CPU.";
-    Caffe::set_mode(Caffe::CPU);
-  } else {
-    ostringstream s;
-    for (int i = 0; i < gpus.size(); ++i) {
-      s << (i ? ", " : "") << gpus[i];
-    }
-    LOG(INFO) << "Using GPUs " << s.str();
-#ifndef CPU_ONLY
-    cudaDeviceProp device_prop;
-    for (int i = 0; i < gpus.size(); ++i) {
-      cudaGetDeviceProperties(&device_prop, gpus[i]);
-      LOG(INFO) << "GPU " << gpus[i] << ": " << device_prop.name;
-    }
-#endif
-    solver_param.set_device_id(gpus[0]);
-    Caffe::SetDevice(gpus[0]);
-    Caffe::set_mode(Caffe::GPU);
-    Caffe::set_solver_count(gpus.size());
-  }
-
-  caffe::SignalHandler signal_handler(
-        GetRequestedAction(FLAGS_sigint_effect),
-        GetRequestedAction(FLAGS_sighup_effect));
-
-  shared_ptr<caffe::Solver<float> >
-      solver(caffe::SolverRegistry<float>::CreateSolver(solver_param));
-
-  solver->SetActionFunction(signal_handler.GetActionFunction());
-
-  if (FLAGS_snapshot.size()) {
-    LOG(INFO) << "Resuming from " << FLAGS_snapshot;
-    solver->Restore(FLAGS_snapshot.c_str());
-  } else if (FLAGS_weights.size()) {
-    CopyLayers(solver.get(), FLAGS_weights);
-  }
-
-  LOG(INFO) << "Starting Optimization";
-  if (gpus.size() > 1) {
-#ifdef USE_NCCL
-    caffe::NCCL<float> nccl(solver);
-    nccl.Run(gpus, FLAGS_snapshot.size() > 0 ? FLAGS_snapshot.c_str() : NULL);
-#else
-    LOG(FATAL) << "Multi-GPU execution not available - rebuild with USE_NCCL";
-#endif
-  } else {
-    solver->Solve();
-  }
-  LOG(INFO) << "Optimization Done.";
-#endif
-  return 0;
-}
-RegisterBrewFunction(train);
-
-
-// Test: score a model.
-int test() {
-  CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to score.";
-  CHECK_GT(FLAGS_weights.size(), 0) << "Need model weights to score.";
-  vector<string> stages = get_stages_from_flags();
-
-  // Set device id and mode
-  vector<int> gpus;
-  get_gpus(&gpus);
-  if (gpus.size() != 0) {
-    LOG(INFO) << "Use GPU with device ID " << gpus[0];
-#ifndef CPU_ONLY
-    cudaDeviceProp device_prop;
-    cudaGetDeviceProperties(&device_prop, gpus[0]);
-    LOG(INFO) << "GPU device name: " << device_prop.name;
-#endif
-    Caffe::SetDevice(gpus[0]);
-    Caffe::set_mode(Caffe::GPU);
-  } else {
-    LOG(INFO) << "Use CPU.";
-    Caffe::set_mode(Caffe::CPU);
-  }
-  // Instantiate the caffe net.
-  Net<float> caffe_net(FLAGS_model, caffe::TEST, FLAGS_level, &stages);
-  caffe_net.CopyTrainedLayersFrom(FLAGS_weights);
-  LOG(INFO) << "Running for " << FLAGS_iterations << " iterations.";
-
-  vector<int> test_score_output_id;
-  vector<float> test_score;
-  float loss = 0;
-  for (int i = 0; i < FLAGS_iterations; ++i) {
-    float iter_loss;
-    const vector<Blob<float>*>& result =
-        caffe_net.Forward(&iter_loss);
-    loss += iter_loss;
-    int idx = 0;
-    for (int j = 0; j < result.size(); ++j) {
-      const float* result_vec = result[j]->cpu_data();
-      for (int k = 0; k < result[j]->count(); ++k, ++idx) {
-        const float score = result_vec[k];
-        if (i == 0) {
-          test_score.push_back(score);
-          test_score_output_id.push_back(j);
-        } else {
-          test_score[idx] += score;
-        }
-        const std::string& output_name = caffe_net.blob_names()[
-            caffe_net.output_blob_indices()[j]];
-        LOG(INFO) << "Batch " << i << ", " << output_name << " = " << score;
-      }
-    }
-  }
-  loss /= FLAGS_iterations;
-  LOG(INFO) << "Loss: " << loss;
-  for (int i = 0; i < test_score.size(); ++i) {
-    const std::string& output_name = caffe_net.blob_names()[
-        caffe_net.output_blob_indices()[test_score_output_id[i]]];
-    const float loss_weight = caffe_net.blob_loss_weights()[
-        caffe_net.output_blob_indices()[test_score_output_id[i]]];
-    std::ostringstream loss_msg_stream;
-    const float mean_score = test_score[i] / FLAGS_iterations;
-    if (loss_weight) {
-      loss_msg_stream << " (* " << loss_weight
-                      << " = " << loss_weight * mean_score << " loss)";
-    }
-    LOG(INFO) << output_name << " = " << mean_score << loss_msg_stream.str();
-  }
-
-  return 0;
-}
-RegisterBrewFunction(test);
-
-
-// Time: benchmark the execution time of a model.
-int time() {
-#ifdef NO_BACKWORD
-  CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to time.";
-  caffe::Phase phase = get_phase_from_flags(caffe::TRAIN);
-  vector<string> stages = get_stages_from_flags();
-
-  // Set device id and mode
-  vector<int> gpus;
-  get_gpus(&gpus);
-  if (gpus.size() != 0) {
-    LOG(INFO) << "Use GPU with device ID " << gpus[0];
-    Caffe::SetDevice(gpus[0]);
-    Caffe::set_mode(Caffe::GPU);
-  } else {
-    LOG(INFO) << "Use CPU.";
-    Caffe::set_mode(Caffe::CPU);
-  }
-  // Instantiate the caffe net.
-  Net<float> caffe_net(FLAGS_model, phase, FLAGS_level, &stages);
-
-  // Do a clean forward and backward pass, so that memory allocation are done
-  // and future iterations will be more stable.
-  LOG(INFO) << "Performing Forward";
-  // Note that for the speed benchmark, we will assume that the network does
-  // not take any input blobs.
-  float initial_loss;
-  caffe_net.Forward(&initial_loss);
-  LOG(INFO) << "Initial loss: " << initial_loss;
-  LOG(INFO) << "Performing Backward";
-  caffe_net.Backward();
-
-  const vector<shared_ptr<Layer<float> > >& layers = caffe_net.layers();
-  const vector<vector<Blob<float>*> >& bottom_vecs = caffe_net.bottom_vecs();
-  const vector<vector<Blob<float>*> >& top_vecs = caffe_net.top_vecs();
-  const vector<vector<bool> >& bottom_need_backward =
-      caffe_net.bottom_need_backward();
-  LOG(INFO) << "*** Benchmark begins ***";
-  LOG(INFO) << "Testing for " << FLAGS_iterations << " iterations.";
-  Timer total_timer;
-  total_timer.Start();
-  Timer forward_timer;
-  Timer backward_timer;
-  Timer timer;
-  std::vector<double> forward_time_per_layer(layers.size(), 0.0);
-  std::vector<double> backward_time_per_layer(layers.size(), 0.0);
-  double forward_time = 0.0;
-  double backward_time = 0.0;
-  for (int j = 0; j < FLAGS_iterations; ++j) {
-    Timer iter_timer;
-    iter_timer.Start();
-    forward_timer.Start();
-    for (int i = 0; i < layers.size(); ++i) {
-      timer.Start();
-      layers[i]->Forward(bottom_vecs[i], top_vecs[i]);
-      forward_time_per_layer[i] += timer.MicroSeconds();
-    }
-    forward_time += forward_timer.MicroSeconds();
-    backward_timer.Start();
-    for (int i = layers.size() - 1; i >= 0; --i) {
-      timer.Start();
-      layers[i]->Backward(top_vecs[i], bottom_need_backward[i],
-                          bottom_vecs[i]);
-      backward_time_per_layer[i] += timer.MicroSeconds();
-    }
-    backward_time += backward_timer.MicroSeconds();
-    LOG(INFO) << "Iteration: " << j + 1 << " forward-backward time: "
-      << iter_timer.MilliSeconds() << " ms.";
-  }
-  LOG(INFO) << "Average time per layer: ";
-  for (int i = 0; i < layers.size(); ++i) {
-    const caffe::string& layername = layers[i]->layer_param().name();
-    LOG(INFO) << std::setfill(' ') << std::setw(10) << layername <<
-      "\tforward: " << forward_time_per_layer[i] / 1000 /
-      FLAGS_iterations << " ms.";
-    LOG(INFO) << std::setfill(' ') << std::setw(10) << layername  <<
-      "\tbackward: " << backward_time_per_layer[i] / 1000 /
-      FLAGS_iterations << " ms.";
-  }
-  total_timer.Stop();
-  LOG(INFO) << "Average Forward pass: " << forward_time / 1000 /
-    FLAGS_iterations << " ms.";
-  LOG(INFO) << "Average Backward pass: " << backward_time / 1000 /
-    FLAGS_iterations << " ms.";
-  LOG(INFO) << "Average Forward-Backward: " << total_timer.MilliSeconds() /
-    FLAGS_iterations << " ms.";
-  LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() << " ms.";
-  LOG(INFO) << "*** Benchmark ends ***";
-#endif
-  return 0;
-}
-RegisterBrewFunction(time);
-
-int main(int argc, char** argv) {
-  // Print output to stderr (while still logging).
-  FLAGS_alsologtostderr = 1;
-  // Set version
-  gflags::SetVersionString(AS_STRING(CAFFE_VERSION));
-  // Usage message.
-  gflags::SetUsageMessage("command line brew\n"
-      "usage: caffe <command> <args>\n\n"
-      "commands:\n"
-#ifdef NO_CAFFE_MOBILE
-      "  train           train or finetune a model\n"
-#endif
-      "  test            score a model\n"
-#ifdef NO_CAFFE_MOBILE
-      "  device_query    show GPU diagnostic information\n"
-#endif
-      "  time            benchmark model execution time");
-
-  // Run tool or show usage.
-  caffe::GlobalInit(&argc, &argv);
-#ifndef NO_CAFFE_MOBILE
-  ::gflags::ParseCommandLineFlags(&argc, &argv, true);
-#endif
-  if (argc == 2) {
-#ifdef WITH_PYTHON_LAYER
-    try {
-#endif
-      return GetBrewFunction(caffe::string(argv[1]))();
-#ifdef WITH_PYTHON_LAYER
-    } catch (bp::error_already_set) {
-      PyErr_Print();
-      return 1;
-    }
-#endif
-  } else {
-    gflags::ShowUsageWithFlagsRestrict(argv[0], "tools/caffe");
-  }
-}