diff options
author | Ye Joey <joeyye@openailab.com> | 2017-07-06 14:39:37 +0800 |
---|---|---|
committer | Ye Joey <joeyye@openailab.com> | 2017-07-06 14:39:37 +0800 |
commit | 70eb0cc1f22c629882e0ce4425035f043350d024 (patch) | |
tree | 81d606b255f6312d09b094292c19f861df533b48 | |
parent | 0c199e3577a0a968ba4a0c07ae5a35bce5348eba (diff) | |
download | caffeonacl-70eb0cc1f22c629882e0ce4425035f043350d024.tar.gz caffeonacl-70eb0cc1f22c629882e0ce4425035f043350d024.tar.bz2 caffeonacl-70eb0cc1f22c629882e0ce4425035f043350d024.zip |
1. Refine Makefile
2. Rewrite the release notes with markdown format
3. Add the missed files in SqueezeNet
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | acl_openailab/Makefile.config.acl | 147 | ||||
-rw-r--r-- | acl_openailab/README.md | 83 | ||||
-rw-r--r-- | acl_openailab/Reversion.md | 113 | ||||
-rw-r--r-- | acl_openailab/pics/openailab.png | bin | 0 -> 376527 bytes | |||
-rw-r--r-- | models/SqueezeNet/README.md | 4 | ||||
-rw-r--r-- | models/SqueezeNet/SqueezeNet_v1.1/squeezenet.1.1.deploy.prototxt | 556 | ||||
-rw-r--r-- | unit_tests/Makefile | 80 |
8 files changed, 936 insertions, 49 deletions
@@ -4,7 +4,7 @@ CaffeOnACL is a project to use ARM Compute Library (NEON+GPU) to speed up caffe and provide utilities to debug, profile and tune application performance. Check out the documents for the details like -- [release notes](https://github.com/OAID/caffeOnACL/tree/master/docs/caffeOnACL_release_notes_0_2_0.docx) +- [release notes](https://github.com/OAID/caffeOnACL/tree/master/acl_openailab/README.md) - [user guide](https://github.com/OAID/caffeOnACL/tree/master/docs/caffeOnACL_user_guide_0_2_0.docx) diff --git a/acl_openailab/Makefile.config.acl b/acl_openailab/Makefile.config.acl new file mode 100644 index 00000000..c909a424 --- /dev/null +++ b/acl_openailab/Makefile.config.acl @@ -0,0 +1,147 @@ +## Refer to http://caffe.berkeleyvision.org/installation.html +# Contributions simplifying and improving our build system are welcome! + +# cuDNN acceleration switch (uncomment to build with cuDNN). +# USE_CUDNN := 1 + +# CPU-only switch (uncomment to build without GPU support). +CPU_ONLY := 1 + +# Enable ACL (ARM Compute Library) +USE_ACL :=1 + +USE_PROFILING := 0 + +ifeq ($(USE_ACL), 1) + ifeq ($(ACL_ROOT),) + $(error ACL_ROOT does not specified. use "export ACL_ROOT='path of acl soure code'") + endif + +ACL_INCS :=$(ACL_ROOT)/include +ACL_INCS +=$(ACL_ROOT) +ACL_LIBS_DIR :=$(ACL_ROOT)/build +ACL_LIBS_DIR +=$(ACL_ROOT)/build/arm_compute +ACL_LIBS :=arm_compute OpenCL +endif + +# uncomment to disable IO dependencies and corresponding data layers +# USE_OPENCV := 0 +# USE_LEVELDB := 0 +# USE_LMDB := 0 + +# uncomment to allow MDB_NOLOCK when reading LMDB files (only if necessary) +# You should not set this flag if you will be reading LMDBs with any +# possibility of simultaneous read and write +# ALLOW_LMDB_NOLOCK := 1 + +# Uncomment if you're using OpenCV 3 +# OPENCV_VERSION := 3 + +# To customize your choice of compiler, uncomment and set the following. +# N.B. the default for Linux is g++ and the default for OSX is clang++ +# CUSTOM_CXX := g++ +#CUSTOM_CXX := aarch64-linux-gnu-g++ +#os :=linux +#arch :=arm64-v8a + +# CUDA directory contains bin/ and lib/ directories that we need. +CUDA_DIR := /usr/local/cuda +# On Ubuntu 14.04, if cuda tools are installed via +# "sudo apt-get install nvidia-cuda-toolkit" then use this instead: +# CUDA_DIR := /usr + +# CUDA architecture setting: going with all of them. +# For CUDA < 6.0, comment the *_50 through *_61 lines for compatibility. +# For CUDA < 8.0, comment the *_60 and *_61 lines for compatibility. +CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \ + -gencode arch=compute_20,code=sm_21 \ + -gencode arch=compute_30,code=sm_30 \ + -gencode arch=compute_35,code=sm_35 \ + -gencode arch=compute_50,code=sm_50 \ + -gencode arch=compute_52,code=sm_52 \ + -gencode arch=compute_60,code=sm_60 \ + -gencode arch=compute_61,code=sm_61 \ + -gencode arch=compute_61,code=compute_61 + +# BLAS choice: +# atlas for ATLAS (default) +# mkl for MKL +# open for OpenBlas +#BLAS := atlas +BLAS := open +# Custom (MKL/ATLAS/OpenBLAS) include and lib directories. +# Leave commented to accept the defaults for your choice of BLAS +# (which should work)! +# BLAS_INCLUDE := /path/to/your/blas +# BLAS_LIB := /path/to/your/blas + +# Homebrew puts openblas in a directory that is not on the standard search path +# BLAS_INCLUDE := $(shell brew --prefix openblas)/include +# BLAS_LIB := $(shell brew --prefix openblas)/lib + +# This is required only if you will compile the matlab interface. +# MATLAB directory should contain the mex binary in /bin. +# MATLAB_DIR := /usr/local +# MATLAB_DIR := /Applications/MATLAB_R2012b.app + +# NOTE: this is required only if you will compile the python interface. +# We need to be able to find Python.h and numpy/arrayobject.h. +PYTHON_INCLUDE := /usr/include/python2.7 \ + /usr/lib/python2.7/dist-packages/numpy/core/include +# Anaconda Python distribution is quite popular. Include path: +# Verify anaconda location, sometimes it's in root. +# ANACONDA_HOME := $(HOME)/anaconda +# PYTHON_INCLUDE := $(ANACONDA_HOME)/include \ + # $(ANACONDA_HOME)/include/python2.7 \ + # $(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include + +# Uncomment to use Python 3 (default is Python 2) +# PYTHON_LIBRARIES := boost_python3 python3.5m +# PYTHON_INCLUDE := /usr/include/python3.5m \ +# /usr/lib/python3.5/dist-packages/numpy/core/include + +# We need to be able to find libpythonX.X.so or .dylib. +PYTHON_LIB := /usr/lib +# PYTHON_LIB := $(ANACONDA_HOME)/lib + +# Homebrew installs numpy in a non standard path (keg only) +# PYTHON_INCLUDE += $(dir $(shell python -c 'import numpy.core; print(numpy.core.__file__)'))/include +# PYTHON_LIB += $(shell brew --prefix numpy)/lib + +# Uncomment to support layers written in Python (will link against Python libs) +# WITH_PYTHON_LAYER := 1 + +# Whatever else you find you need goes here. +INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include +LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib + +# If Homebrew is installed at a non standard location (for example your home directory) and you use it for general dependencies +# INCLUDE_DIRS += $(shell brew --prefix)/include +# LIBRARY_DIRS += $(shell brew --prefix)/lib + +# NCCL acceleration switch (uncomment to build with NCCL) +# https://github.com/NVIDIA/nccl (last tested version: v1.2.3-1+cuda8.0) +# USE_NCCL := 1 + +# Uncomment to use `pkg-config` to specify OpenCV library paths. +# (Usually not necessary -- OpenCV libraries are normally installed in one of the above $LIBRARY_DIRS.) +# USE_PKG_CONFIG := 1 + +# N.B. both build and distribute dirs are cleared on `make clean` +BUILD_DIR := build +DISTRIBUTE_DIR := distribute + +#HDF5 +USE_HDF5 := 1 +HDF5_INCLUDE_DIRS := /usr/include/hdf5/serial +HDF5_LIBRARY_DIRS := /usr/lib/aarch64-linux-gnu/hdf5/serial +HDF5_LIBRARIES :=hdf5_hl hdf5 + +# Uncomment for debugging. Does not work on OSX due to https://github.com/BVLC/caffe/issues/171 +# DEBUG := 1 + +# The ID of the GPU that 'make runtest' will use to run unit tests. +TEST_GPUID := 0 + +# enable pretty build (comment to see full commands) +Q ?= @ diff --git a/acl_openailab/README.md b/acl_openailab/README.md new file mode 100644 index 00000000..6eae5df2 --- /dev/null +++ b/acl_openailab/README.md @@ -0,0 +1,83 @@ +![OPEN AI LAB](https://github.com/OAID/caffeOnACL/blob/master/acl_openailab/pics/openailab.png) + +# 1. Release Notes +[![License](https://img.shields.io/badge/license-BSD-blue.svg)](LICENSE) + +Please refer to [CaffeOnACL Release NOTE](https://github.com/OAID/caffeOnACL/blob/master/acl_openailab/Reversion.md) for details + +# 2. Preparation +## 2.1 General dependencies installation + sudo apt-get -y update + sodo apt-get -y upgrade + sudo apt-get install -y build-essential pkg-config automake autoconf protobuf-compiler cmake cmake-gui + sudo apt-get install -y libprotobuf-dev libleveldb-dev libsnappy-dev libhdf5-serial-dev + sudo apt-get install -y libatlas-base-dev libgflags-dev libgoogle-glog-dev liblmdb-dev libopenblas-dev + sudo apt-get install -y libopencv-dev python-dev + sudo apt-get install -y python-numpy python-scipy python-yaml python-six python-pip + sudo apt-get install -y scons git + sudo apt-get install -y --no-install-recommends libboost-all-dev + pip install --upgrade pip + +## 2.2 Download source code +Recommend creating a new directory in your work directory to execute the following steps. For example, you can create a direcotry named "oaid" in your home directory by the following commands.<br> + + cd ~ + mkdir oaid + cd oaid + +#### Download "ACL" (arm_compute : v17.05): + git clone https://github.com/ARM-software/ComputeLibrary.git +#### Download "CaffeOnACL" : + git clone https://github.com/OAID/caffeOnACL.git +#### Download "Googletest" : + git clone https://github.com/google/googletest.git + +# 3. Build CaffeOnACL +## 3.1 Build ACL : + cd ~/oaid/ComputeLibrary + scons Werror=1 -j8 debug=0 asserts=1 neon=1 opencl=1 embed_kernels=1 os=linux arch=arm64-v8a + +## 3.2 Build Caffe : + export ACL_ROOT=~/oaid/ComputeLibrary + cd ~/oaid/caffeOnACL + cp acl_openailab/Makefile.config.acl Makefile.config + make all distribute + +## 3.3 Build Unit tests +##### Build the gtest libraries + cd ~/oaid/googletest + cmake CMakeLists.txt + make + sudo make install + +##### Build Caffe Unit tests + export CAFFE_ROOT=~/oaid/caffeOnACL + cd ~/oaid/caffeOnACL/unit_tests + make clean + make + +## 3.3 Run tests +If the output message of the following two tests is same as the examples, it means the porting is success. + + export LD_LIBRARY_PATH=~/oaid/caffeOnACL/distribute/lib:~/oaid/ComputeLibrary/build + +#### Reference Caffenet + cd ~/oaid/caffeOnACL/data/ilsvrc12 + sudo chmod +x get_ilsvrc_aux.sh + ./get_ilsvrc_aux.sh + cd ../.. + ./scripts/download_model_binary.py ./models/bvlc_reference_caffenet + ./distribute/bin/classification.bin models/bvlc_reference_caffenet/deploy.prototxt models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel data/ilsvrc12/imagenet_mean.binaryproto data/ilsvrc12/synset_words.txt examples/images/cat.jpg + output message -- + ---------- Prediction for examples/images/cat.jpg ---------- + 0.3094 - "n02124075 Egyptian cat" + 0.1761 - "n02123159 tiger cat" + 0.1221 - "n02123045 tabby, tabby cat" + 0.1132 - "n02119022 red fox, Vulpes vulpes" + 0.0421 - "n02085620 Chihuahua" + +#### Unit test + cd ~/oaid/caffeOnACL/unit_tests + ./test_caffe_main + output message: + [==========] 29 tests from 6 test cases ran. (1236 ms total) [ PASSED ] 29 tests. diff --git a/acl_openailab/Reversion.md b/acl_openailab/Reversion.md new file mode 100644 index 00000000..f7c934b0 --- /dev/null +++ b/acl_openailab/Reversion.md @@ -0,0 +1,113 @@ +# Release Note +[![License](https://img.shields.io/badge/license-BSD-blue.svg)](LICENSE) + +The release version is 0.2.0. You can download the source code from [OAID/caffeOnACL](https://github.com/OAID/caffeOnACL) + +## Verified Platform : + +The release is verified on 64bits ARMv8 processor<br> +* Hardware platform : Rockchip RK3399 (firefly RK3399 board)<br> +* Software platform : Ubuntu 16.04<br> + +## 10 Layers accelerated by ACL layers : +* ConvolutionLayer +* PoolingLayer +* LRNLayer +* ReLULayer +* SigmoidLayer +* SoftmaxLayer +* TanHLayer +* AbsValLayer +* BNLLLayer +* InnerProductLayer + +## ACL compatibility issues : +There are some compatibility issues between ACL and caffe Layers, we bypass it to Caffe's original layer class as the workaround solution for the below issues +* Normalization in-channel issue +* Tanh issue +* Even Kernel size +* Softmax supporting multi-dimension issue +* Group issue +* Performance need be fine turned in the future + +# Changelist +The caffe based version is `793bd96351749cb8df16f1581baf3e7d8036ac37`. +## New Files : + Makefile.config.acl + cmake/Modules/FindACL.cmake + examples/cpp_classification/classification_profiling.cpp + examples/cpp_classification/classification_profiling_gpu.cpp + include/caffe/acl_layer.hpp + include/caffe/layers/acl_absval_layer.hpp + include/caffe/layers/acl_base_activation_layer.hpp + include/caffe/layers/acl_bnll_layer.hpp + include/caffe/layers/acl_conv_layer.hpp + include/caffe/layers/acl_inner_product_layer.hpp + include/caffe/layers/acl_lrn_layer.hpp + include/caffe/layers/acl_pooling_layer.hpp + include/caffe/layers/acl_relu_layer.hpp + include/caffe/layers/acl_sigmoid_layer.hpp + include/caffe/layers/acl_softmax_layer.hpp + include/caffe/layers/acl_tanh_layer.hpp + models/SqueezeNet/README.md + models/SqueezeNet/SqueezeNet_v1.1/squeezenet.1.1.deploy.prototxt + src/caffe/acl_layer.cpp + src/caffe/layers/acl_absval_layer.cpp + src/caffe/layers/acl_base_activation_layer.cpp + src/caffe/layers/acl_bnll_layer.cpp + src/caffe/layers/acl_conv_layer.cpp + src/caffe/layers/acl_inner_product_layer.cpp + src/caffe/layers/acl_lrn_layer.cpp + src/caffe/layers/acl_pooling_layer.cpp + src/caffe/layers/acl_relu_layer.cpp + src/caffe/layers/acl_sigmoid_layer.cpp + src/caffe/layers/acl_softmax_layer.cpp + src/caffe/layers/acl_tanh_layer.cpp + unit_tests/Makefile + unit_tests/pmu.c + unit_tests/pmu.h + unit_tests/prof_convolution_layer.cpp + unit_tests/sgemm.cpp + unit_tests/test.cpp + unit_tests/test_caffe_main.cpp + unit_tests/test_common.cpp + unit_tests/test_convolution_layer.cpp + unit_tests/test_fail.cpp + unit_tests/test_inner_product_layer.cpp + unit_tests/test_lrn_layer.cpp + unit_tests/test_neuron_layer.cpp + unit_tests/test_pooling_layer.cpp + unit_tests/test_softmax_layer.cpp + unit_tests/testbed.c + unit_tests/testbed.h + +## Change Files : + Makefile + cmake/Dependencies.cmake + include/caffe/caffe.hpp + include/caffe/common.hpp + include/caffe/layer.hpp + include/caffe/util/device_alternate.hpp + include/caffe/util/hdf5.hpp + src/caffe/common.cpp + src/caffe/layer.cpp + src/caffe/layer_factory.cpp + src/caffe/layers/absval_layer.cpp + src/caffe/layers/bnll_layer.cpp + src/caffe/layers/hdf5_data_layer.cpp + src/caffe/layers/hdf5_data_layer.cu + src/caffe/layers/hdf5_output_layer.cpp + src/caffe/layers/hdf5_output_layer.cu + src/caffe/layers/inner_product_layer.cpp + src/caffe/net.cpp + src/caffe/solvers/sgd_solver.cpp + src/caffe/syncedmem.cpp + src/caffe/test/test_hdf5_output_layer.cpp + src/caffe/test/test_hdf5data_layer.cpp + src/caffe/util/hdf5.cpp + src/caffe/util/math_functions.cpp + +# Issue report +Encounter any issue, please report on [issue report](https://github.com/OAID/caffeOnACL/issues). Issue report should contain the following information : +* The exact description of the steps that are needed to reproduce the issue +* The exact description of what happens and what you think is wrong diff --git a/acl_openailab/pics/openailab.png b/acl_openailab/pics/openailab.png Binary files differnew file mode 100644 index 00000000..e8dfeef2 --- /dev/null +++ b/acl_openailab/pics/openailab.png diff --git a/models/SqueezeNet/README.md b/models/SqueezeNet/README.md new file mode 100644 index 00000000..c11e77dd --- /dev/null +++ b/models/SqueezeNet/README.md @@ -0,0 +1,4 @@ + +Download the model files from https://github.com/DeepScale/SqueezeNet/tree/master/SqueezeNet_v1.1 +The model architecture file for deploying is: + SqueezeNet_v1.1/squeezenet.1.1.deploy.prototxt diff --git a/models/SqueezeNet/SqueezeNet_v1.1/squeezenet.1.1.deploy.prototxt b/models/SqueezeNet/SqueezeNet_v1.1/squeezenet.1.1.deploy.prototxt new file mode 100644 index 00000000..e62fd5d2 --- /dev/null +++ b/models/SqueezeNet/SqueezeNet_v1.1/squeezenet.1.1.deploy.prototxt @@ -0,0 +1,556 @@ +# please cite: +# @article{SqueezeNet, +# Author = {Forrest N. Iandola and Matthew W. Moskewicz and Khalid Ashraf and Song Han and William J. Dally and Kurt Keutzer}, +# Title = {SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and $<$1MB model size}, +# Journal = {arXiv:1602.07360}, +# Year = {2016} +# } +input: "data" +input_shape { + dim: 10 + dim: 3 + dim: 227 + dim: 227 +} + +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + convolution_param { + num_output: 64 + kernel_size: 3 + stride: 2 + } +} + +layer { + name: "relu_conv1" + type: "ReLU" + bottom: "conv1" + top: "conv1" +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "fire2/squeeze1x1" + type: "Convolution" + bottom: "pool1" + top: "fire2/squeeze1x1" + convolution_param { + num_output: 16 + kernel_size: 1 + } +} +layer { + name: "fire2/relu_squeeze1x1" + type: "ReLU" + bottom: "fire2/squeeze1x1" + top: "fire2/squeeze1x1" +} +layer { + name: "fire2/expand1x1" + type: "Convolution" + bottom: "fire2/squeeze1x1" + top: "fire2/expand1x1" + convolution_param { + num_output: 64 + kernel_size: 1 + } +} +layer { + name: "fire2/relu_expand1x1" + type: "ReLU" + bottom: "fire2/expand1x1" + top: "fire2/expand1x1" +} +layer { + name: "fire2/expand3x3" + type: "Convolution" + bottom: "fire2/squeeze1x1" + top: "fire2/expand3x3" + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "fire2/relu_expand3x3" + type: "ReLU" + bottom: "fire2/expand3x3" + top: "fire2/expand3x3" +} +layer { + name: "fire2/concat" + type: "Concat" + bottom: "fire2/expand1x1" + bottom: "fire2/expand3x3" + top: "fire2/concat" +} +layer { + name: "fire3/squeeze1x1" + type: "Convolution" + bottom: "fire2/concat" + top: "fire3/squeeze1x1" + convolution_param { + num_output: 16 + kernel_size: 1 + } +} +layer { + name: "fire3/relu_squeeze1x1" + type: "ReLU" + bottom: "fire3/squeeze1x1" + top: "fire3/squeeze1x1" +} +layer { + name: "fire3/expand1x1" + type: "Convolution" + bottom: "fire3/squeeze1x1" + top: "fire3/expand1x1" + convolution_param { + num_output: 64 + kernel_size: 1 + } +} +layer { + name: "fire3/relu_expand1x1" + type: "ReLU" + bottom: "fire3/expand1x1" + top: "fire3/expand1x1" +} +layer { + name: "fire3/expand3x3" + type: "Convolution" + bottom: "fire3/squeeze1x1" + top: "fire3/expand3x3" + convolution_param { + num_output: 64 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "fire3/relu_expand3x3" + type: "ReLU" + bottom: "fire3/expand3x3" + top: "fire3/expand3x3" +} +layer { + name: "fire3/concat" + type: "Concat" + bottom: "fire3/expand1x1" + bottom: "fire3/expand3x3" + top: "fire3/concat" +} +layer { + name: "pool3" + type: "Pooling" + bottom: "fire3/concat" + top: "pool3" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "fire4/squeeze1x1" + type: "Convolution" + bottom: "pool3" + top: "fire4/squeeze1x1" + convolution_param { + num_output: 32 + kernel_size: 1 + } +} +layer { + name: "fire4/relu_squeeze1x1" + type: "ReLU" + bottom: "fire4/squeeze1x1" + top: "fire4/squeeze1x1" +} +layer { + name: "fire4/expand1x1" + type: "Convolution" + bottom: "fire4/squeeze1x1" + top: "fire4/expand1x1" + convolution_param { + num_output: 128 + kernel_size: 1 + } +} +layer { + name: "fire4/relu_expand1x1" + type: "ReLU" + bottom: "fire4/expand1x1" + top: "fire4/expand1x1" +} +layer { + name: "fire4/expand3x3" + type: "Convolution" + bottom: "fire4/squeeze1x1" + top: "fire4/expand3x3" + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "fire4/relu_expand3x3" + type: "ReLU" + bottom: "fire4/expand3x3" + top: "fire4/expand3x3" +} +layer { + name: "fire4/concat" + type: "Concat" + bottom: "fire4/expand1x1" + bottom: "fire4/expand3x3" + top: "fire4/concat" +} +layer { + name: "fire5/squeeze1x1" + type: "Convolution" + bottom: "fire4/concat" + top: "fire5/squeeze1x1" + convolution_param { + num_output: 32 + kernel_size: 1 + } +} +layer { + name: "fire5/relu_squeeze1x1" + type: "ReLU" + bottom: "fire5/squeeze1x1" + top: "fire5/squeeze1x1" +} +layer { + name: "fire5/expand1x1" + type: "Convolution" + bottom: "fire5/squeeze1x1" + top: "fire5/expand1x1" + convolution_param { + num_output: 128 + kernel_size: 1 + } +} +layer { + name: "fire5/relu_expand1x1" + type: "ReLU" + bottom: "fire5/expand1x1" + top: "fire5/expand1x1" +} +layer { + name: "fire5/expand3x3" + type: "Convolution" + bottom: "fire5/squeeze1x1" + top: "fire5/expand3x3" + convolution_param { + num_output: 128 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "fire5/relu_expand3x3" + type: "ReLU" + bottom: "fire5/expand3x3" + top: "fire5/expand3x3" +} +layer { + name: "fire5/concat" + type: "Concat" + bottom: "fire5/expand1x1" + bottom: "fire5/expand3x3" + top: "fire5/concat" +} +layer { + name: "pool5" + type: "Pooling" + bottom: "fire5/concat" + top: "pool5" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} +layer { + name: "fire6/squeeze1x1" + type: "Convolution" + bottom: "pool5" + top: "fire6/squeeze1x1" + convolution_param { + num_output: 48 + kernel_size: 1 + } +} +layer { + name: "fire6/relu_squeeze1x1" + type: "ReLU" + bottom: "fire6/squeeze1x1" + top: "fire6/squeeze1x1" +} +layer { + name: "fire6/expand1x1" + type: "Convolution" + bottom: "fire6/squeeze1x1" + top: "fire6/expand1x1" + convolution_param { + num_output: 192 + kernel_size: 1 + } +} +layer { + name: "fire6/relu_expand1x1" + type: "ReLU" + bottom: "fire6/expand1x1" + top: "fire6/expand1x1" +} +layer { + name: "fire6/expand3x3" + type: "Convolution" + bottom: "fire6/squeeze1x1" + top: "fire6/expand3x3" + convolution_param { + num_output: 192 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "fire6/relu_expand3x3" + type: "ReLU" + bottom: "fire6/expand3x3" + top: "fire6/expand3x3" +} +layer { + name: "fire6/concat" + type: "Concat" + bottom: "fire6/expand1x1" + bottom: "fire6/expand3x3" + top: "fire6/concat" +} +layer { + name: "fire7/squeeze1x1" + type: "Convolution" + bottom: "fire6/concat" + top: "fire7/squeeze1x1" + convolution_param { + num_output: 48 + kernel_size: 1 + } +} +layer { + name: "fire7/relu_squeeze1x1" + type: "ReLU" + bottom: "fire7/squeeze1x1" + top: "fire7/squeeze1x1" +} +layer { + name: "fire7/expand1x1" + type: "Convolution" + bottom: "fire7/squeeze1x1" + top: "fire7/expand1x1" + convolution_param { + num_output: 192 + kernel_size: 1 + } +} +layer { + name: "fire7/relu_expand1x1" + type: "ReLU" + bottom: "fire7/expand1x1" + top: "fire7/expand1x1" +} +layer { + name: "fire7/expand3x3" + type: "Convolution" + bottom: "fire7/squeeze1x1" + top: "fire7/expand3x3" + convolution_param { + num_output: 192 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "fire7/relu_expand3x3" + type: "ReLU" + bottom: "fire7/expand3x3" + top: "fire7/expand3x3" +} +layer { + name: "fire7/concat" + type: "Concat" + bottom: "fire7/expand1x1" + bottom: "fire7/expand3x3" + top: "fire7/concat" +} +layer { + name: "fire8/squeeze1x1" + type: "Convolution" + bottom: "fire7/concat" + top: "fire8/squeeze1x1" + convolution_param { + num_output: 64 + kernel_size: 1 + } +} +layer { + name: "fire8/relu_squeeze1x1" + type: "ReLU" + bottom: "fire8/squeeze1x1" + top: "fire8/squeeze1x1" +} +layer { + name: "fire8/expand1x1" + type: "Convolution" + bottom: "fire8/squeeze1x1" + top: "fire8/expand1x1" + convolution_param { + num_output: 256 + kernel_size: 1 + } +} +layer { + name: "fire8/relu_expand1x1" + type: "ReLU" + bottom: "fire8/expand1x1" + top: "fire8/expand1x1" +} +layer { + name: "fire8/expand3x3" + type: "Convolution" + bottom: "fire8/squeeze1x1" + top: "fire8/expand3x3" + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "fire8/relu_expand3x3" + type: "ReLU" + bottom: "fire8/expand3x3" + top: "fire8/expand3x3" +} +layer { + name: "fire8/concat" + type: "Concat" + bottom: "fire8/expand1x1" + bottom: "fire8/expand3x3" + top: "fire8/concat" +} +layer { + name: "fire9/squeeze1x1" + type: "Convolution" + bottom: "fire8/concat" + top: "fire9/squeeze1x1" + convolution_param { + num_output: 64 + kernel_size: 1 + } +} +layer { + name: "fire9/relu_squeeze1x1" + type: "ReLU" + bottom: "fire9/squeeze1x1" + top: "fire9/squeeze1x1" +} +layer { + name: "fire9/expand1x1" + type: "Convolution" + bottom: "fire9/squeeze1x1" + top: "fire9/expand1x1" + convolution_param { + num_output: 256 + kernel_size: 1 + } +} +layer { + name: "fire9/relu_expand1x1" + type: "ReLU" + bottom: "fire9/expand1x1" + top: "fire9/expand1x1" +} +layer { + name: "fire9/expand3x3" + type: "Convolution" + bottom: "fire9/squeeze1x1" + top: "fire9/expand3x3" + convolution_param { + num_output: 256 + pad: 1 + kernel_size: 3 + } +} +layer { + name: "fire9/relu_expand3x3" + type: "ReLU" + bottom: "fire9/expand3x3" + top: "fire9/expand3x3" +} +layer { + name: "fire9/concat" + type: "Concat" + bottom: "fire9/expand1x1" + bottom: "fire9/expand3x3" + top: "fire9/concat" +} +layer { + name: "drop9" + type: "Dropout" + bottom: "fire9/concat" + top: "fire9/concat" + dropout_param { + dropout_ratio: 0.5 + } +} +layer { + name: "conv10" + type: "Convolution" + bottom: "fire9/concat" + top: "conv10" + convolution_param { + num_output: 1000 + kernel_size: 1 + } +} +layer { + name: "relu_conv10" + type: "ReLU" + bottom: "conv10" + top: "conv10" +} +layer { + name: "pool10" + type: "Pooling" + bottom: "conv10" + top: "pool10" + pooling_param { + pool: AVE + global_pooling: true + } +} + +layer { + name: "prob" + type: "Softmax" + bottom: "pool10" + top: "prob" +} diff --git a/unit_tests/Makefile b/unit_tests/Makefile index 034ee02f..657d088d 100644 --- a/unit_tests/Makefile +++ b/unit_tests/Makefile @@ -1,87 +1,71 @@ +CC = gcc +CXX = g++ -include ../Makefile.config +ifeq ($(ACL_ROOT),) +$(error ACL_ROOT does not specified. use "export ACL_ROOT='path of acl soure directory'") +endif + +ifeq ($(CAFFE_ROOT),) +$(error CAFFE_ROOT does not specified. use "export CAFFE_ROOT='path of caffe soure directory'") +endif -CXX_SRCS+=test_pooling_layer.cpp -CXX_SRCS+=test_softmax_layer.cpp -CXX_SRCS+= test_inner_product_layer.cpp -CXX_SRCS+=test_neuron_layer.cpp -CXX_SRCS+=test_lrn_layer.cpp #failed on single channel LRN -#C_SRCS=pmu.c testbed.c -CXX_SRCS+= test_convolution_layer.cpp -#CXX_SRCS= test_fail.cpp -BIN_SRCS=test_caffe_main.cpp test.cpp +include $(CAFFE_ROOT)/Makefile.config -HOME=/home/firefly # # caffe related stuff # - -CAFFE_ROOT=$(HOME)/caffeOnACL CAFFE_INCS = -I$(CAFFE_ROOT)/include -I$(CAFFE_ROOT)/distribute/include/ CAFFE_LIBS = -L$(CAFFE_ROOT)/distribute/lib -lcaffe -lglog -lgflags -lprotobuf -lboost_system -lboost_filesystem -CAFFE_RPATH =$(CAFFE_ROOT)/distribute/lib - # # google test related stuff # - -GTEST_ROOT=/usr/local -GTEST_LIBS=$(GTEST_ROOT)/lib/libgtest_main.a $(GTEST_ROOT)/lib/libgtest.a -GTEST_INCS=-I$(GTEST_ROOT)/include/ - +GTEST_ROOT = /usr/local +GTEST_INCS = -I$(GTEST_ROOT)/include/ +GTEST_LIBS = -L$(GTEST_ROOT)/lib/ -lgtest_main -lgtest # # arm compute # +ACL_LIBS = -L$(ACL_ROOT)/build -L$(ACL_ROOT)/build/arm_compute -larm_compute -lOpenCL -ACL_ROOT=$(HOME)/ComputeLibrary -ACL_LIBS=-L$(ACL_ROOT)/build -L$(ACL_ROOT)/build/arm_compute -larm_compute -lOpenCL -ACL_RPATH=$(ACL_ROOT)/build:$(ACL_ROOT)/build/arm_compute +COMM_CFLAGS = -Wall -g -DCPU_ONLY -DUSE_ACL $(GTEST_INCS) $(CAFFE_INCS) -COMM_CFLAGS=$(GTEST_INCS) $(CAFFE_INCS) -Wall -g -DCPU_ONLY -DUSE_ACL #USE_PROFILING -- get profiling informations, is controled by LOGACL #LAYER_PERF_STAT -- haitao's net profiling information ifeq ($(USE_PROFILING), 1) COMM_CFLAGS += -DUSE_PROFILING -DLAYER_PERF_STAT endif -CXXFLAGS=$(COMM_CFLAGS) -Wno-sign-compare -CFLAGS=$(COMM_CFLAGS) +CXXFLAGS = $(COMM_CFLAGS) -Wno-sign-compare +CFLAGS = $(COMM_CFLAGS) -CC=gcc -CXX=g++ +LDFLAGS += $(CAFFE_LIBS) $(GTEST_LIBS) $(ACL_LIBS) -lpthread -lopenblas -COMM_OBJS=$(CXX_SRCS:.cpp=.o) $(C_SRCS:.c=.o) -BIN_OBJS+=$(BIN_SRCS:.cpp=.o) -BIN_EXES=$(BIN_OBJS:.o=) - -LIBS+=$(CAFFE_LIBS) $(GTEST_LIBS) -lpthread -lopenblas $(ACL_LIBS) - -RT_PATH=-Wl,-rpath,$(CAFFE_RPATH):$(ACL_RPATH) - -LDFLAGS+=$(RT_PATH) +COMM_OBJS += test_pooling_layer.o +COMM_OBJS += test_softmax_layer.o +COMM_OBJS += test_inner_product_layer.o +COMM_OBJS += test_neuron_layer.o +COMM_OBJS += test_lrn_layer.o +COMM_OBJS += test_convolution_layer.o +BIN_EXES = test test_caffe_main all : $(BIN_EXES) -$(BIN_EXES):%:%.o - -$(BIN_EXES):$(COMM_OBJS) - +test : test.o $(COMM_OBJS) + $(CXX) $< $(COMM_OBJS) -o $@ $(LDFLAGS) +test_caffe_main : test_caffe_main.o $(COMM_OBJS) + $(CXX) $< $(COMM_OBJS) -o $@ $(LDFLAGS) clean: - rm -f $(BIN_EXES) *.o *.so + $(RM) $(BIN_EXES) *.o *.so .PHONY : all clean %.o : %.c - $(CC) $(CFLAGS) -c $< -o $@ + $(CC) -c $(CFLAGS) $< -o $@ %.o : %.cpp - $(CXX) $(CXXFLAGS) -c $< -o $@ - -%: %.o - $(CXX) $(LDFLAGS) $< $(COMM_OBJS) -o $@ $(LIBS) - + $(CXX) -c $(CXXFLAGS) $< -o $@ |