summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYe Joey <joeyye@openailab.com>2017-07-06 14:39:37 +0800
committerYe Joey <joeyye@openailab.com>2017-07-06 14:39:37 +0800
commit70eb0cc1f22c629882e0ce4425035f043350d024 (patch)
tree81d606b255f6312d09b094292c19f861df533b48
parent0c199e3577a0a968ba4a0c07ae5a35bce5348eba (diff)
downloadcaffeonacl-70eb0cc1f22c629882e0ce4425035f043350d024.tar.gz
caffeonacl-70eb0cc1f22c629882e0ce4425035f043350d024.tar.bz2
caffeonacl-70eb0cc1f22c629882e0ce4425035f043350d024.zip
1. Refine Makefile
2. Rewrite the release notes with markdown format 3. Add the missed files in SqueezeNet
-rw-r--r--README.md2
-rw-r--r--acl_openailab/Makefile.config.acl147
-rw-r--r--acl_openailab/README.md83
-rw-r--r--acl_openailab/Reversion.md113
-rw-r--r--acl_openailab/pics/openailab.pngbin0 -> 376527 bytes
-rw-r--r--models/SqueezeNet/README.md4
-rw-r--r--models/SqueezeNet/SqueezeNet_v1.1/squeezenet.1.1.deploy.prototxt556
-rw-r--r--unit_tests/Makefile80
8 files changed, 936 insertions, 49 deletions
diff --git a/README.md b/README.md
index a20099a9..bf7d480b 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
CaffeOnACL is a project to use ARM Compute Library (NEON+GPU) to speed up caffe and provide utilities to debug, profile and tune application performance.
Check out the documents for the details like
-- [release notes](https://github.com/OAID/caffeOnACL/tree/master/docs/caffeOnACL_release_notes_0_2_0.docx)
+- [release notes](https://github.com/OAID/caffeOnACL/tree/master/acl_openailab/README.md)
- [user guide](https://github.com/OAID/caffeOnACL/tree/master/docs/caffeOnACL_user_guide_0_2_0.docx)
diff --git a/acl_openailab/Makefile.config.acl b/acl_openailab/Makefile.config.acl
new file mode 100644
index 00000000..c909a424
--- /dev/null
+++ b/acl_openailab/Makefile.config.acl
@@ -0,0 +1,147 @@
+## Refer to http://caffe.berkeleyvision.org/installation.html
+# Contributions simplifying and improving our build system are welcome!
+
+# cuDNN acceleration switch (uncomment to build with cuDNN).
+# USE_CUDNN := 1
+
+# CPU-only switch (uncomment to build without GPU support).
+CPU_ONLY := 1
+
+# Enable ACL (ARM Compute Library)
+USE_ACL :=1
+
+USE_PROFILING := 0
+
+ifeq ($(USE_ACL), 1)
+ ifeq ($(ACL_ROOT),)
+ $(error ACL_ROOT does not specified. use "export ACL_ROOT='path of acl soure code'")
+ endif
+
+ACL_INCS :=$(ACL_ROOT)/include
+ACL_INCS +=$(ACL_ROOT)
+ACL_LIBS_DIR :=$(ACL_ROOT)/build
+ACL_LIBS_DIR +=$(ACL_ROOT)/build/arm_compute
+ACL_LIBS :=arm_compute OpenCL
+endif
+
+# uncomment to disable IO dependencies and corresponding data layers
+# USE_OPENCV := 0
+# USE_LEVELDB := 0
+# USE_LMDB := 0
+
+# uncomment to allow MDB_NOLOCK when reading LMDB files (only if necessary)
+# You should not set this flag if you will be reading LMDBs with any
+# possibility of simultaneous read and write
+# ALLOW_LMDB_NOLOCK := 1
+
+# Uncomment if you're using OpenCV 3
+# OPENCV_VERSION := 3
+
+# To customize your choice of compiler, uncomment and set the following.
+# N.B. the default for Linux is g++ and the default for OSX is clang++
+# CUSTOM_CXX := g++
+#CUSTOM_CXX := aarch64-linux-gnu-g++
+#os :=linux
+#arch :=arm64-v8a
+
+# CUDA directory contains bin/ and lib/ directories that we need.
+CUDA_DIR := /usr/local/cuda
+# On Ubuntu 14.04, if cuda tools are installed via
+# "sudo apt-get install nvidia-cuda-toolkit" then use this instead:
+# CUDA_DIR := /usr
+
+# CUDA architecture setting: going with all of them.
+# For CUDA < 6.0, comment the *_50 through *_61 lines for compatibility.
+# For CUDA < 8.0, comment the *_60 and *_61 lines for compatibility.
+CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \
+ -gencode arch=compute_20,code=sm_21 \
+ -gencode arch=compute_30,code=sm_30 \
+ -gencode arch=compute_35,code=sm_35 \
+ -gencode arch=compute_50,code=sm_50 \
+ -gencode arch=compute_52,code=sm_52 \
+ -gencode arch=compute_60,code=sm_60 \
+ -gencode arch=compute_61,code=sm_61 \
+ -gencode arch=compute_61,code=compute_61
+
+# BLAS choice:
+# atlas for ATLAS (default)
+# mkl for MKL
+# open for OpenBlas
+#BLAS := atlas
+BLAS := open
+# Custom (MKL/ATLAS/OpenBLAS) include and lib directories.
+# Leave commented to accept the defaults for your choice of BLAS
+# (which should work)!
+# BLAS_INCLUDE := /path/to/your/blas
+# BLAS_LIB := /path/to/your/blas
+
+# Homebrew puts openblas in a directory that is not on the standard search path
+# BLAS_INCLUDE := $(shell brew --prefix openblas)/include
+# BLAS_LIB := $(shell brew --prefix openblas)/lib
+
+# This is required only if you will compile the matlab interface.
+# MATLAB directory should contain the mex binary in /bin.
+# MATLAB_DIR := /usr/local
+# MATLAB_DIR := /Applications/MATLAB_R2012b.app
+
+# NOTE: this is required only if you will compile the python interface.
+# We need to be able to find Python.h and numpy/arrayobject.h.
+PYTHON_INCLUDE := /usr/include/python2.7 \
+ /usr/lib/python2.7/dist-packages/numpy/core/include
+# Anaconda Python distribution is quite popular. Include path:
+# Verify anaconda location, sometimes it's in root.
+# ANACONDA_HOME := $(HOME)/anaconda
+# PYTHON_INCLUDE := $(ANACONDA_HOME)/include \
+ # $(ANACONDA_HOME)/include/python2.7 \
+ # $(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include
+
+# Uncomment to use Python 3 (default is Python 2)
+# PYTHON_LIBRARIES := boost_python3 python3.5m
+# PYTHON_INCLUDE := /usr/include/python3.5m \
+# /usr/lib/python3.5/dist-packages/numpy/core/include
+
+# We need to be able to find libpythonX.X.so or .dylib.
+PYTHON_LIB := /usr/lib
+# PYTHON_LIB := $(ANACONDA_HOME)/lib
+
+# Homebrew installs numpy in a non standard path (keg only)
+# PYTHON_INCLUDE += $(dir $(shell python -c 'import numpy.core; print(numpy.core.__file__)'))/include
+# PYTHON_LIB += $(shell brew --prefix numpy)/lib
+
+# Uncomment to support layers written in Python (will link against Python libs)
+# WITH_PYTHON_LAYER := 1
+
+# Whatever else you find you need goes here.
+INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include
+LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib
+
+# If Homebrew is installed at a non standard location (for example your home directory) and you use it for general dependencies
+# INCLUDE_DIRS += $(shell brew --prefix)/include
+# LIBRARY_DIRS += $(shell brew --prefix)/lib
+
+# NCCL acceleration switch (uncomment to build with NCCL)
+# https://github.com/NVIDIA/nccl (last tested version: v1.2.3-1+cuda8.0)
+# USE_NCCL := 1
+
+# Uncomment to use `pkg-config` to specify OpenCV library paths.
+# (Usually not necessary -- OpenCV libraries are normally installed in one of the above $LIBRARY_DIRS.)
+# USE_PKG_CONFIG := 1
+
+# N.B. both build and distribute dirs are cleared on `make clean`
+BUILD_DIR := build
+DISTRIBUTE_DIR := distribute
+
+#HDF5
+USE_HDF5 := 1
+HDF5_INCLUDE_DIRS := /usr/include/hdf5/serial
+HDF5_LIBRARY_DIRS := /usr/lib/aarch64-linux-gnu/hdf5/serial
+HDF5_LIBRARIES :=hdf5_hl hdf5
+
+# Uncomment for debugging. Does not work on OSX due to https://github.com/BVLC/caffe/issues/171
+# DEBUG := 1
+
+# The ID of the GPU that 'make runtest' will use to run unit tests.
+TEST_GPUID := 0
+
+# enable pretty build (comment to see full commands)
+Q ?= @
diff --git a/acl_openailab/README.md b/acl_openailab/README.md
new file mode 100644
index 00000000..6eae5df2
--- /dev/null
+++ b/acl_openailab/README.md
@@ -0,0 +1,83 @@
+![OPEN AI LAB](https://github.com/OAID/caffeOnACL/blob/master/acl_openailab/pics/openailab.png)
+
+# 1. Release Notes
+[![License](https://img.shields.io/badge/license-BSD-blue.svg)](LICENSE)
+
+Please refer to [CaffeOnACL Release NOTE](https://github.com/OAID/caffeOnACL/blob/master/acl_openailab/Reversion.md) for details
+
+# 2. Preparation
+## 2.1 General dependencies installation
+ sudo apt-get -y update
+ sodo apt-get -y upgrade
+ sudo apt-get install -y build-essential pkg-config automake autoconf protobuf-compiler cmake cmake-gui
+ sudo apt-get install -y libprotobuf-dev libleveldb-dev libsnappy-dev libhdf5-serial-dev
+ sudo apt-get install -y libatlas-base-dev libgflags-dev libgoogle-glog-dev liblmdb-dev libopenblas-dev
+ sudo apt-get install -y libopencv-dev python-dev
+ sudo apt-get install -y python-numpy python-scipy python-yaml python-six python-pip
+ sudo apt-get install -y scons git
+ sudo apt-get install -y --no-install-recommends libboost-all-dev
+ pip install --upgrade pip
+
+## 2.2 Download source code
+Recommend creating a new directory in your work directory to execute the following steps. For example, you can create a direcotry named "oaid" in your home directory by the following commands.<br>
+
+ cd ~
+ mkdir oaid
+ cd oaid
+
+#### Download "ACL" (arm_compute : v17.05):
+ git clone https://github.com/ARM-software/ComputeLibrary.git
+#### Download "CaffeOnACL" :
+ git clone https://github.com/OAID/caffeOnACL.git
+#### Download "Googletest" :
+ git clone https://github.com/google/googletest.git
+
+# 3. Build CaffeOnACL
+## 3.1 Build ACL :
+ cd ~/oaid/ComputeLibrary
+ scons Werror=1 -j8 debug=0 asserts=1 neon=1 opencl=1 embed_kernels=1 os=linux arch=arm64-v8a
+
+## 3.2 Build Caffe :
+ export ACL_ROOT=~/oaid/ComputeLibrary
+ cd ~/oaid/caffeOnACL
+ cp acl_openailab/Makefile.config.acl Makefile.config
+ make all distribute
+
+## 3.3 Build Unit tests
+##### Build the gtest libraries
+ cd ~/oaid/googletest
+ cmake CMakeLists.txt
+ make
+ sudo make install
+
+##### Build Caffe Unit tests
+ export CAFFE_ROOT=~/oaid/caffeOnACL
+ cd ~/oaid/caffeOnACL/unit_tests
+ make clean
+ make
+
+## 3.3 Run tests
+If the output message of the following two tests is same as the examples, it means the porting is success.
+
+ export LD_LIBRARY_PATH=~/oaid/caffeOnACL/distribute/lib:~/oaid/ComputeLibrary/build
+
+#### Reference Caffenet
+ cd ~/oaid/caffeOnACL/data/ilsvrc12
+ sudo chmod +x get_ilsvrc_aux.sh
+ ./get_ilsvrc_aux.sh
+ cd ../..
+ ./scripts/download_model_binary.py ./models/bvlc_reference_caffenet
+ ./distribute/bin/classification.bin models/bvlc_reference_caffenet/deploy.prototxt models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel data/ilsvrc12/imagenet_mean.binaryproto data/ilsvrc12/synset_words.txt examples/images/cat.jpg
+ output message --
+ ---------- Prediction for examples/images/cat.jpg ----------
+ 0.3094 - "n02124075 Egyptian cat"
+ 0.1761 - "n02123159 tiger cat"
+ 0.1221 - "n02123045 tabby, tabby cat"
+ 0.1132 - "n02119022 red fox, Vulpes vulpes"
+ 0.0421 - "n02085620 Chihuahua"
+
+#### Unit test
+ cd ~/oaid/caffeOnACL/unit_tests
+ ./test_caffe_main
+ output message:
+ [==========] 29 tests from 6 test cases ran. (1236 ms total) [ PASSED ] 29 tests.
diff --git a/acl_openailab/Reversion.md b/acl_openailab/Reversion.md
new file mode 100644
index 00000000..f7c934b0
--- /dev/null
+++ b/acl_openailab/Reversion.md
@@ -0,0 +1,113 @@
+# Release Note
+[![License](https://img.shields.io/badge/license-BSD-blue.svg)](LICENSE)
+
+The release version is 0.2.0. You can download the source code from [OAID/caffeOnACL](https://github.com/OAID/caffeOnACL)
+
+## Verified Platform :
+
+The release is verified on 64bits ARMv8 processor<br>
+* Hardware platform : Rockchip RK3399 (firefly RK3399 board)<br>
+* Software platform : Ubuntu 16.04<br>
+
+## 10 Layers accelerated by ACL layers :
+* ConvolutionLayer
+* PoolingLayer
+* LRNLayer
+* ReLULayer
+* SigmoidLayer
+* SoftmaxLayer
+* TanHLayer
+* AbsValLayer
+* BNLLLayer
+* InnerProductLayer
+
+## ACL compatibility issues :
+There are some compatibility issues between ACL and caffe Layers, we bypass it to Caffe's original layer class as the workaround solution for the below issues
+* Normalization in-channel issue
+* Tanh issue
+* Even Kernel size
+* Softmax supporting multi-dimension issue
+* Group issue
+* Performance need be fine turned in the future
+
+# Changelist
+The caffe based version is `793bd96351749cb8df16f1581baf3e7d8036ac37`.
+## New Files :
+ Makefile.config.acl
+ cmake/Modules/FindACL.cmake
+ examples/cpp_classification/classification_profiling.cpp
+ examples/cpp_classification/classification_profiling_gpu.cpp
+ include/caffe/acl_layer.hpp
+ include/caffe/layers/acl_absval_layer.hpp
+ include/caffe/layers/acl_base_activation_layer.hpp
+ include/caffe/layers/acl_bnll_layer.hpp
+ include/caffe/layers/acl_conv_layer.hpp
+ include/caffe/layers/acl_inner_product_layer.hpp
+ include/caffe/layers/acl_lrn_layer.hpp
+ include/caffe/layers/acl_pooling_layer.hpp
+ include/caffe/layers/acl_relu_layer.hpp
+ include/caffe/layers/acl_sigmoid_layer.hpp
+ include/caffe/layers/acl_softmax_layer.hpp
+ include/caffe/layers/acl_tanh_layer.hpp
+ models/SqueezeNet/README.md
+ models/SqueezeNet/SqueezeNet_v1.1/squeezenet.1.1.deploy.prototxt
+ src/caffe/acl_layer.cpp
+ src/caffe/layers/acl_absval_layer.cpp
+ src/caffe/layers/acl_base_activation_layer.cpp
+ src/caffe/layers/acl_bnll_layer.cpp
+ src/caffe/layers/acl_conv_layer.cpp
+ src/caffe/layers/acl_inner_product_layer.cpp
+ src/caffe/layers/acl_lrn_layer.cpp
+ src/caffe/layers/acl_pooling_layer.cpp
+ src/caffe/layers/acl_relu_layer.cpp
+ src/caffe/layers/acl_sigmoid_layer.cpp
+ src/caffe/layers/acl_softmax_layer.cpp
+ src/caffe/layers/acl_tanh_layer.cpp
+ unit_tests/Makefile
+ unit_tests/pmu.c
+ unit_tests/pmu.h
+ unit_tests/prof_convolution_layer.cpp
+ unit_tests/sgemm.cpp
+ unit_tests/test.cpp
+ unit_tests/test_caffe_main.cpp
+ unit_tests/test_common.cpp
+ unit_tests/test_convolution_layer.cpp
+ unit_tests/test_fail.cpp
+ unit_tests/test_inner_product_layer.cpp
+ unit_tests/test_lrn_layer.cpp
+ unit_tests/test_neuron_layer.cpp
+ unit_tests/test_pooling_layer.cpp
+ unit_tests/test_softmax_layer.cpp
+ unit_tests/testbed.c
+ unit_tests/testbed.h
+
+## Change Files :
+ Makefile
+ cmake/Dependencies.cmake
+ include/caffe/caffe.hpp
+ include/caffe/common.hpp
+ include/caffe/layer.hpp
+ include/caffe/util/device_alternate.hpp
+ include/caffe/util/hdf5.hpp
+ src/caffe/common.cpp
+ src/caffe/layer.cpp
+ src/caffe/layer_factory.cpp
+ src/caffe/layers/absval_layer.cpp
+ src/caffe/layers/bnll_layer.cpp
+ src/caffe/layers/hdf5_data_layer.cpp
+ src/caffe/layers/hdf5_data_layer.cu
+ src/caffe/layers/hdf5_output_layer.cpp
+ src/caffe/layers/hdf5_output_layer.cu
+ src/caffe/layers/inner_product_layer.cpp
+ src/caffe/net.cpp
+ src/caffe/solvers/sgd_solver.cpp
+ src/caffe/syncedmem.cpp
+ src/caffe/test/test_hdf5_output_layer.cpp
+ src/caffe/test/test_hdf5data_layer.cpp
+ src/caffe/util/hdf5.cpp
+ src/caffe/util/math_functions.cpp
+
+# Issue report
+Encounter any issue, please report on [issue report](https://github.com/OAID/caffeOnACL/issues). Issue report should contain the following information :
+* The exact description of the steps that are needed to reproduce the issue
+* The exact description of what happens and what you think is wrong
diff --git a/acl_openailab/pics/openailab.png b/acl_openailab/pics/openailab.png
new file mode 100644
index 00000000..e8dfeef2
--- /dev/null
+++ b/acl_openailab/pics/openailab.png
Binary files differ
diff --git a/models/SqueezeNet/README.md b/models/SqueezeNet/README.md
new file mode 100644
index 00000000..c11e77dd
--- /dev/null
+++ b/models/SqueezeNet/README.md
@@ -0,0 +1,4 @@
+
+Download the model files from https://github.com/DeepScale/SqueezeNet/tree/master/SqueezeNet_v1.1
+The model architecture file for deploying is:
+ SqueezeNet_v1.1/squeezenet.1.1.deploy.prototxt
diff --git a/models/SqueezeNet/SqueezeNet_v1.1/squeezenet.1.1.deploy.prototxt b/models/SqueezeNet/SqueezeNet_v1.1/squeezenet.1.1.deploy.prototxt
new file mode 100644
index 00000000..e62fd5d2
--- /dev/null
+++ b/models/SqueezeNet/SqueezeNet_v1.1/squeezenet.1.1.deploy.prototxt
@@ -0,0 +1,556 @@
+# please cite:
+# @article{SqueezeNet,
+# Author = {Forrest N. Iandola and Matthew W. Moskewicz and Khalid Ashraf and Song Han and William J. Dally and Kurt Keutzer},
+# Title = {SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and $<$1MB model size},
+# Journal = {arXiv:1602.07360},
+# Year = {2016}
+# }
+input: "data"
+input_shape {
+ dim: 10
+ dim: 3
+ dim: 227
+ dim: 227
+}
+
+layer {
+ name: "conv1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1"
+ convolution_param {
+ num_output: 64
+ kernel_size: 3
+ stride: 2
+ }
+}
+
+layer {
+ name: "relu_conv1"
+ type: "ReLU"
+ bottom: "conv1"
+ top: "conv1"
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "fire2/squeeze1x1"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "fire2/squeeze1x1"
+ convolution_param {
+ num_output: 16
+ kernel_size: 1
+ }
+}
+layer {
+ name: "fire2/relu_squeeze1x1"
+ type: "ReLU"
+ bottom: "fire2/squeeze1x1"
+ top: "fire2/squeeze1x1"
+}
+layer {
+ name: "fire2/expand1x1"
+ type: "Convolution"
+ bottom: "fire2/squeeze1x1"
+ top: "fire2/expand1x1"
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ }
+}
+layer {
+ name: "fire2/relu_expand1x1"
+ type: "ReLU"
+ bottom: "fire2/expand1x1"
+ top: "fire2/expand1x1"
+}
+layer {
+ name: "fire2/expand3x3"
+ type: "Convolution"
+ bottom: "fire2/squeeze1x1"
+ top: "fire2/expand3x3"
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ }
+}
+layer {
+ name: "fire2/relu_expand3x3"
+ type: "ReLU"
+ bottom: "fire2/expand3x3"
+ top: "fire2/expand3x3"
+}
+layer {
+ name: "fire2/concat"
+ type: "Concat"
+ bottom: "fire2/expand1x1"
+ bottom: "fire2/expand3x3"
+ top: "fire2/concat"
+}
+layer {
+ name: "fire3/squeeze1x1"
+ type: "Convolution"
+ bottom: "fire2/concat"
+ top: "fire3/squeeze1x1"
+ convolution_param {
+ num_output: 16
+ kernel_size: 1
+ }
+}
+layer {
+ name: "fire3/relu_squeeze1x1"
+ type: "ReLU"
+ bottom: "fire3/squeeze1x1"
+ top: "fire3/squeeze1x1"
+}
+layer {
+ name: "fire3/expand1x1"
+ type: "Convolution"
+ bottom: "fire3/squeeze1x1"
+ top: "fire3/expand1x1"
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ }
+}
+layer {
+ name: "fire3/relu_expand1x1"
+ type: "ReLU"
+ bottom: "fire3/expand1x1"
+ top: "fire3/expand1x1"
+}
+layer {
+ name: "fire3/expand3x3"
+ type: "Convolution"
+ bottom: "fire3/squeeze1x1"
+ top: "fire3/expand3x3"
+ convolution_param {
+ num_output: 64
+ pad: 1
+ kernel_size: 3
+ }
+}
+layer {
+ name: "fire3/relu_expand3x3"
+ type: "ReLU"
+ bottom: "fire3/expand3x3"
+ top: "fire3/expand3x3"
+}
+layer {
+ name: "fire3/concat"
+ type: "Concat"
+ bottom: "fire3/expand1x1"
+ bottom: "fire3/expand3x3"
+ top: "fire3/concat"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "fire3/concat"
+ top: "pool3"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "fire4/squeeze1x1"
+ type: "Convolution"
+ bottom: "pool3"
+ top: "fire4/squeeze1x1"
+ convolution_param {
+ num_output: 32
+ kernel_size: 1
+ }
+}
+layer {
+ name: "fire4/relu_squeeze1x1"
+ type: "ReLU"
+ bottom: "fire4/squeeze1x1"
+ top: "fire4/squeeze1x1"
+}
+layer {
+ name: "fire4/expand1x1"
+ type: "Convolution"
+ bottom: "fire4/squeeze1x1"
+ top: "fire4/expand1x1"
+ convolution_param {
+ num_output: 128
+ kernel_size: 1
+ }
+}
+layer {
+ name: "fire4/relu_expand1x1"
+ type: "ReLU"
+ bottom: "fire4/expand1x1"
+ top: "fire4/expand1x1"
+}
+layer {
+ name: "fire4/expand3x3"
+ type: "Convolution"
+ bottom: "fire4/squeeze1x1"
+ top: "fire4/expand3x3"
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ }
+}
+layer {
+ name: "fire4/relu_expand3x3"
+ type: "ReLU"
+ bottom: "fire4/expand3x3"
+ top: "fire4/expand3x3"
+}
+layer {
+ name: "fire4/concat"
+ type: "Concat"
+ bottom: "fire4/expand1x1"
+ bottom: "fire4/expand3x3"
+ top: "fire4/concat"
+}
+layer {
+ name: "fire5/squeeze1x1"
+ type: "Convolution"
+ bottom: "fire4/concat"
+ top: "fire5/squeeze1x1"
+ convolution_param {
+ num_output: 32
+ kernel_size: 1
+ }
+}
+layer {
+ name: "fire5/relu_squeeze1x1"
+ type: "ReLU"
+ bottom: "fire5/squeeze1x1"
+ top: "fire5/squeeze1x1"
+}
+layer {
+ name: "fire5/expand1x1"
+ type: "Convolution"
+ bottom: "fire5/squeeze1x1"
+ top: "fire5/expand1x1"
+ convolution_param {
+ num_output: 128
+ kernel_size: 1
+ }
+}
+layer {
+ name: "fire5/relu_expand1x1"
+ type: "ReLU"
+ bottom: "fire5/expand1x1"
+ top: "fire5/expand1x1"
+}
+layer {
+ name: "fire5/expand3x3"
+ type: "Convolution"
+ bottom: "fire5/squeeze1x1"
+ top: "fire5/expand3x3"
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ }
+}
+layer {
+ name: "fire5/relu_expand3x3"
+ type: "ReLU"
+ bottom: "fire5/expand3x3"
+ top: "fire5/expand3x3"
+}
+layer {
+ name: "fire5/concat"
+ type: "Concat"
+ bottom: "fire5/expand1x1"
+ bottom: "fire5/expand3x3"
+ top: "fire5/concat"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "fire5/concat"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "fire6/squeeze1x1"
+ type: "Convolution"
+ bottom: "pool5"
+ top: "fire6/squeeze1x1"
+ convolution_param {
+ num_output: 48
+ kernel_size: 1
+ }
+}
+layer {
+ name: "fire6/relu_squeeze1x1"
+ type: "ReLU"
+ bottom: "fire6/squeeze1x1"
+ top: "fire6/squeeze1x1"
+}
+layer {
+ name: "fire6/expand1x1"
+ type: "Convolution"
+ bottom: "fire6/squeeze1x1"
+ top: "fire6/expand1x1"
+ convolution_param {
+ num_output: 192
+ kernel_size: 1
+ }
+}
+layer {
+ name: "fire6/relu_expand1x1"
+ type: "ReLU"
+ bottom: "fire6/expand1x1"
+ top: "fire6/expand1x1"
+}
+layer {
+ name: "fire6/expand3x3"
+ type: "Convolution"
+ bottom: "fire6/squeeze1x1"
+ top: "fire6/expand3x3"
+ convolution_param {
+ num_output: 192
+ pad: 1
+ kernel_size: 3
+ }
+}
+layer {
+ name: "fire6/relu_expand3x3"
+ type: "ReLU"
+ bottom: "fire6/expand3x3"
+ top: "fire6/expand3x3"
+}
+layer {
+ name: "fire6/concat"
+ type: "Concat"
+ bottom: "fire6/expand1x1"
+ bottom: "fire6/expand3x3"
+ top: "fire6/concat"
+}
+layer {
+ name: "fire7/squeeze1x1"
+ type: "Convolution"
+ bottom: "fire6/concat"
+ top: "fire7/squeeze1x1"
+ convolution_param {
+ num_output: 48
+ kernel_size: 1
+ }
+}
+layer {
+ name: "fire7/relu_squeeze1x1"
+ type: "ReLU"
+ bottom: "fire7/squeeze1x1"
+ top: "fire7/squeeze1x1"
+}
+layer {
+ name: "fire7/expand1x1"
+ type: "Convolution"
+ bottom: "fire7/squeeze1x1"
+ top: "fire7/expand1x1"
+ convolution_param {
+ num_output: 192
+ kernel_size: 1
+ }
+}
+layer {
+ name: "fire7/relu_expand1x1"
+ type: "ReLU"
+ bottom: "fire7/expand1x1"
+ top: "fire7/expand1x1"
+}
+layer {
+ name: "fire7/expand3x3"
+ type: "Convolution"
+ bottom: "fire7/squeeze1x1"
+ top: "fire7/expand3x3"
+ convolution_param {
+ num_output: 192
+ pad: 1
+ kernel_size: 3
+ }
+}
+layer {
+ name: "fire7/relu_expand3x3"
+ type: "ReLU"
+ bottom: "fire7/expand3x3"
+ top: "fire7/expand3x3"
+}
+layer {
+ name: "fire7/concat"
+ type: "Concat"
+ bottom: "fire7/expand1x1"
+ bottom: "fire7/expand3x3"
+ top: "fire7/concat"
+}
+layer {
+ name: "fire8/squeeze1x1"
+ type: "Convolution"
+ bottom: "fire7/concat"
+ top: "fire8/squeeze1x1"
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ }
+}
+layer {
+ name: "fire8/relu_squeeze1x1"
+ type: "ReLU"
+ bottom: "fire8/squeeze1x1"
+ top: "fire8/squeeze1x1"
+}
+layer {
+ name: "fire8/expand1x1"
+ type: "Convolution"
+ bottom: "fire8/squeeze1x1"
+ top: "fire8/expand1x1"
+ convolution_param {
+ num_output: 256
+ kernel_size: 1
+ }
+}
+layer {
+ name: "fire8/relu_expand1x1"
+ type: "ReLU"
+ bottom: "fire8/expand1x1"
+ top: "fire8/expand1x1"
+}
+layer {
+ name: "fire8/expand3x3"
+ type: "Convolution"
+ bottom: "fire8/squeeze1x1"
+ top: "fire8/expand3x3"
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ }
+}
+layer {
+ name: "fire8/relu_expand3x3"
+ type: "ReLU"
+ bottom: "fire8/expand3x3"
+ top: "fire8/expand3x3"
+}
+layer {
+ name: "fire8/concat"
+ type: "Concat"
+ bottom: "fire8/expand1x1"
+ bottom: "fire8/expand3x3"
+ top: "fire8/concat"
+}
+layer {
+ name: "fire9/squeeze1x1"
+ type: "Convolution"
+ bottom: "fire8/concat"
+ top: "fire9/squeeze1x1"
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ }
+}
+layer {
+ name: "fire9/relu_squeeze1x1"
+ type: "ReLU"
+ bottom: "fire9/squeeze1x1"
+ top: "fire9/squeeze1x1"
+}
+layer {
+ name: "fire9/expand1x1"
+ type: "Convolution"
+ bottom: "fire9/squeeze1x1"
+ top: "fire9/expand1x1"
+ convolution_param {
+ num_output: 256
+ kernel_size: 1
+ }
+}
+layer {
+ name: "fire9/relu_expand1x1"
+ type: "ReLU"
+ bottom: "fire9/expand1x1"
+ top: "fire9/expand1x1"
+}
+layer {
+ name: "fire9/expand3x3"
+ type: "Convolution"
+ bottom: "fire9/squeeze1x1"
+ top: "fire9/expand3x3"
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ }
+}
+layer {
+ name: "fire9/relu_expand3x3"
+ type: "ReLU"
+ bottom: "fire9/expand3x3"
+ top: "fire9/expand3x3"
+}
+layer {
+ name: "fire9/concat"
+ type: "Concat"
+ bottom: "fire9/expand1x1"
+ bottom: "fire9/expand3x3"
+ top: "fire9/concat"
+}
+layer {
+ name: "drop9"
+ type: "Dropout"
+ bottom: "fire9/concat"
+ top: "fire9/concat"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "conv10"
+ type: "Convolution"
+ bottom: "fire9/concat"
+ top: "conv10"
+ convolution_param {
+ num_output: 1000
+ kernel_size: 1
+ }
+}
+layer {
+ name: "relu_conv10"
+ type: "ReLU"
+ bottom: "conv10"
+ top: "conv10"
+}
+layer {
+ name: "pool10"
+ type: "Pooling"
+ bottom: "conv10"
+ top: "pool10"
+ pooling_param {
+ pool: AVE
+ global_pooling: true
+ }
+}
+
+layer {
+ name: "prob"
+ type: "Softmax"
+ bottom: "pool10"
+ top: "prob"
+}
diff --git a/unit_tests/Makefile b/unit_tests/Makefile
index 034ee02f..657d088d 100644
--- a/unit_tests/Makefile
+++ b/unit_tests/Makefile
@@ -1,87 +1,71 @@
+CC = gcc
+CXX = g++
-include ../Makefile.config
+ifeq ($(ACL_ROOT),)
+$(error ACL_ROOT does not specified. use "export ACL_ROOT='path of acl soure directory'")
+endif
+
+ifeq ($(CAFFE_ROOT),)
+$(error CAFFE_ROOT does not specified. use "export CAFFE_ROOT='path of caffe soure directory'")
+endif
-CXX_SRCS+=test_pooling_layer.cpp
-CXX_SRCS+=test_softmax_layer.cpp
-CXX_SRCS+= test_inner_product_layer.cpp
-CXX_SRCS+=test_neuron_layer.cpp
-CXX_SRCS+=test_lrn_layer.cpp #failed on single channel LRN
-#C_SRCS=pmu.c testbed.c
-CXX_SRCS+= test_convolution_layer.cpp
-#CXX_SRCS= test_fail.cpp
-BIN_SRCS=test_caffe_main.cpp test.cpp
+include $(CAFFE_ROOT)/Makefile.config
-HOME=/home/firefly
#
# caffe related stuff
#
-
-CAFFE_ROOT=$(HOME)/caffeOnACL
CAFFE_INCS = -I$(CAFFE_ROOT)/include -I$(CAFFE_ROOT)/distribute/include/
CAFFE_LIBS = -L$(CAFFE_ROOT)/distribute/lib -lcaffe -lglog -lgflags -lprotobuf -lboost_system -lboost_filesystem
-CAFFE_RPATH =$(CAFFE_ROOT)/distribute/lib
-
#
# google test related stuff
#
-
-GTEST_ROOT=/usr/local
-GTEST_LIBS=$(GTEST_ROOT)/lib/libgtest_main.a $(GTEST_ROOT)/lib/libgtest.a
-GTEST_INCS=-I$(GTEST_ROOT)/include/
-
+GTEST_ROOT = /usr/local
+GTEST_INCS = -I$(GTEST_ROOT)/include/
+GTEST_LIBS = -L$(GTEST_ROOT)/lib/ -lgtest_main -lgtest
#
# arm compute
#
+ACL_LIBS = -L$(ACL_ROOT)/build -L$(ACL_ROOT)/build/arm_compute -larm_compute -lOpenCL
-ACL_ROOT=$(HOME)/ComputeLibrary
-ACL_LIBS=-L$(ACL_ROOT)/build -L$(ACL_ROOT)/build/arm_compute -larm_compute -lOpenCL
-ACL_RPATH=$(ACL_ROOT)/build:$(ACL_ROOT)/build/arm_compute
+COMM_CFLAGS = -Wall -g -DCPU_ONLY -DUSE_ACL $(GTEST_INCS) $(CAFFE_INCS)
-COMM_CFLAGS=$(GTEST_INCS) $(CAFFE_INCS) -Wall -g -DCPU_ONLY -DUSE_ACL
#USE_PROFILING -- get profiling informations, is controled by LOGACL
#LAYER_PERF_STAT -- haitao's net profiling information
ifeq ($(USE_PROFILING), 1)
COMM_CFLAGS += -DUSE_PROFILING -DLAYER_PERF_STAT
endif
-CXXFLAGS=$(COMM_CFLAGS) -Wno-sign-compare
-CFLAGS=$(COMM_CFLAGS)
+CXXFLAGS = $(COMM_CFLAGS) -Wno-sign-compare
+CFLAGS = $(COMM_CFLAGS)
-CC=gcc
-CXX=g++
+LDFLAGS += $(CAFFE_LIBS) $(GTEST_LIBS) $(ACL_LIBS) -lpthread -lopenblas
-COMM_OBJS=$(CXX_SRCS:.cpp=.o) $(C_SRCS:.c=.o)
-BIN_OBJS+=$(BIN_SRCS:.cpp=.o)
-BIN_EXES=$(BIN_OBJS:.o=)
-
-LIBS+=$(CAFFE_LIBS) $(GTEST_LIBS) -lpthread -lopenblas $(ACL_LIBS)
-
-RT_PATH=-Wl,-rpath,$(CAFFE_RPATH):$(ACL_RPATH)
-
-LDFLAGS+=$(RT_PATH)
+COMM_OBJS += test_pooling_layer.o
+COMM_OBJS += test_softmax_layer.o
+COMM_OBJS += test_inner_product_layer.o
+COMM_OBJS += test_neuron_layer.o
+COMM_OBJS += test_lrn_layer.o
+COMM_OBJS += test_convolution_layer.o
+BIN_EXES = test test_caffe_main
all : $(BIN_EXES)
-$(BIN_EXES):%:%.o
-
-$(BIN_EXES):$(COMM_OBJS)
-
+test : test.o $(COMM_OBJS)
+ $(CXX) $< $(COMM_OBJS) -o $@ $(LDFLAGS)
+test_caffe_main : test_caffe_main.o $(COMM_OBJS)
+ $(CXX) $< $(COMM_OBJS) -o $@ $(LDFLAGS)
clean:
- rm -f $(BIN_EXES) *.o *.so
+ $(RM) $(BIN_EXES) *.o *.so
.PHONY : all clean
%.o : %.c
- $(CC) $(CFLAGS) -c $< -o $@
+ $(CC) -c $(CFLAGS) $< -o $@
%.o : %.cpp
- $(CXX) $(CXXFLAGS) -c $< -o $@
-
-%: %.o
- $(CXX) $(LDFLAGS) $< $(COMM_OBJS) -o $@ $(LIBS)
-
+ $(CXX) -c $(CXXFLAGS) $< -o $@