diff options
author | honggui <hongguiyao@msn.com> | 2017-10-20 14:34:09 +0800 |
---|---|---|
committer | honggui <hongguiyao@msn.com> | 2017-10-20 14:34:09 +0800 |
commit | d06352537c5f6f38897fb30119bad9bacb37ce79 (patch) | |
tree | bb5ca293640f379dbff3852609806273b0059031 | |
parent | bba197c0b071b94ad632cf98de472bbaad0dea52 (diff) | |
download | caffeonacl-d06352537c5f6f38897fb30119bad9bacb37ce79.tar.gz caffeonacl-d06352537c5f6f38897fb30119bad9bacb37ce79.tar.bz2 caffeonacl-d06352537c5f6f38897fb30119bad9bacb37ce79.zip |
1. support ACL v17.10
2. the performance of pooling was increased 30X
3. Removing CORASE timer
-rw-r--r-- | Makefile.config.acl | 2 | ||||
-rw-r--r-- | src/caffe/layer.cpp | 2 | ||||
-rw-r--r-- | src/caffe/layers/acl_pooling_layer.cpp | 16 | ||||
-rw-r--r-- | unit_tests/Makefile | 2 |
4 files changed, 9 insertions, 13 deletions
diff --git a/Makefile.config.acl b/Makefile.config.acl index b30759fb..30669c2d 100644 --- a/Makefile.config.acl +++ b/Makefile.config.acl @@ -15,7 +15,7 @@ ACL_INCS :=$(ACL_ROOT)/include ACL_INCS +=$(ACL_ROOT) ACL_LIBS_DIR :=$(ACL_ROOT)/build ACL_LIBS_DIR +=$(ACL_ROOT)/build/arm_compute -ACL_LIBS :=arm_compute OpenCL +ACL_LIBS :=arm_compute arm_compute_core OpenCL # uncomment to disable IO dependencies and corresponding data layers # USE_OPENCV := 0 diff --git a/src/caffe/layer.cpp b/src/caffe/layer.cpp index 677ed100..882b86aa 100644 --- a/src/caffe/layer.cpp +++ b/src/caffe/layer.cpp @@ -20,7 +20,7 @@ unsigned long get_cur_time(void) { struct timespec tm; - clock_gettime(CLOCK_MONOTONIC_COARSE, &tm); + clock_gettime(CLOCK_MONOTONIC, &tm); return (tm.tv_sec*1000000+tm.tv_nsec/1000); } diff --git a/src/caffe/layers/acl_pooling_layer.cpp b/src/caffe/layers/acl_pooling_layer.cpp index eb1c5daa..f72b2235 100644 --- a/src/caffe/layers/acl_pooling_layer.cpp +++ b/src/caffe/layers/acl_pooling_layer.cpp @@ -15,8 +15,8 @@ template <typename Dtype> void ACLPoolingLayer<Dtype>::SetupACLLayer(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top){ - TensorShape in_shape ((unsigned int)this->width_, (unsigned int)this->height_); - TensorShape out_shape((unsigned int)this->pooled_width_, (unsigned int)this->pooled_height_); + TensorShape in_shape ((unsigned int)this->width_, (unsigned int)this->height_,(unsigned int)this->channels_); + TensorShape out_shape((unsigned int)this->pooled_width_, (unsigned int)this->pooled_height_,(unsigned int)this->channels_); checkreshape(in_shape,Caffe::arm_gpu_mode()); if (!this->init_layer_) return; this->init_layer_=false; @@ -93,13 +93,11 @@ void ACLPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, } SetupACLLayer(bottom,top); for (int n = 0; n < bottom[0]->num(); ++n) { - for (int c = 0; c < this->channels_; ++c) { tensor_mem(this->cpu().input,(void*)(bottom_data)); cpu_run(); tensor_mem((void*)(top_data),this->cpu().output); - bottom_data += bottom[0]->offset(0, 1); - top_data += top[0]->offset(0, 1); - } + bottom_data += bottom[0]->offset(1); + top_data += top[0]->offset(1); } } @@ -130,13 +128,11 @@ void ACLPoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, } SetupACLLayer(bottom,top); for (int n = 0; n < bottom[0]->num(); ++n) { - for (int c = 0; c < this->channels_; ++c) { tensor_mem(this->gpu().input,(void*)(bottom_data)); gpu_run(); tensor_mem((void*)(top_data),this->gpu().output); - bottom_data += bottom[0]->offset(0, 1); - top_data += top[0]->offset(0, 1); - } + bottom_data += bottom[0]->offset(1); + top_data += top[0]->offset(1); } } diff --git a/unit_tests/Makefile b/unit_tests/Makefile index 034ee02f..4e64c0cb 100644 --- a/unit_tests/Makefile +++ b/unit_tests/Makefile @@ -16,7 +16,7 @@ HOME=/home/firefly # caffe related stuff # -CAFFE_ROOT=$(HOME)/caffeOnACL +CAFFE_ROOT=$(HOME)/CaffeOnACL CAFFE_INCS = -I$(CAFFE_ROOT)/include -I$(CAFFE_ROOT)/distribute/include/ CAFFE_LIBS = -L$(CAFFE_ROOT)/distribute/lib -lcaffe -lglog -lgflags -lprotobuf -lboost_system -lboost_filesystem CAFFE_RPATH =$(CAFFE_ROOT)/distribute/lib |