summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorhonggui <hongguiyao@msn.com>2017-10-20 14:34:09 +0800
committerhonggui <hongguiyao@msn.com>2017-10-20 14:34:09 +0800
commitd06352537c5f6f38897fb30119bad9bacb37ce79 (patch)
treebb5ca293640f379dbff3852609806273b0059031
parentbba197c0b071b94ad632cf98de472bbaad0dea52 (diff)
downloadcaffeonacl-d06352537c5f6f38897fb30119bad9bacb37ce79.tar.gz
caffeonacl-d06352537c5f6f38897fb30119bad9bacb37ce79.tar.bz2
caffeonacl-d06352537c5f6f38897fb30119bad9bacb37ce79.zip
1. support ACL v17.10
2. the performance of pooling was increased 30X 3. Removing CORASE timer
-rw-r--r--Makefile.config.acl2
-rw-r--r--src/caffe/layer.cpp2
-rw-r--r--src/caffe/layers/acl_pooling_layer.cpp16
-rw-r--r--unit_tests/Makefile2
4 files changed, 9 insertions, 13 deletions
diff --git a/Makefile.config.acl b/Makefile.config.acl
index b30759fb..30669c2d 100644
--- a/Makefile.config.acl
+++ b/Makefile.config.acl
@@ -15,7 +15,7 @@ ACL_INCS :=$(ACL_ROOT)/include
ACL_INCS +=$(ACL_ROOT)
ACL_LIBS_DIR :=$(ACL_ROOT)/build
ACL_LIBS_DIR +=$(ACL_ROOT)/build/arm_compute
-ACL_LIBS :=arm_compute OpenCL
+ACL_LIBS :=arm_compute arm_compute_core OpenCL
# uncomment to disable IO dependencies and corresponding data layers
# USE_OPENCV := 0
diff --git a/src/caffe/layer.cpp b/src/caffe/layer.cpp
index 677ed100..882b86aa 100644
--- a/src/caffe/layer.cpp
+++ b/src/caffe/layer.cpp
@@ -20,7 +20,7 @@ unsigned long get_cur_time(void)
{
struct timespec tm;
- clock_gettime(CLOCK_MONOTONIC_COARSE, &tm);
+ clock_gettime(CLOCK_MONOTONIC, &tm);
return (tm.tv_sec*1000000+tm.tv_nsec/1000);
}
diff --git a/src/caffe/layers/acl_pooling_layer.cpp b/src/caffe/layers/acl_pooling_layer.cpp
index eb1c5daa..f72b2235 100644
--- a/src/caffe/layers/acl_pooling_layer.cpp
+++ b/src/caffe/layers/acl_pooling_layer.cpp
@@ -15,8 +15,8 @@ template <typename Dtype>
void ACLPoolingLayer<Dtype>::SetupACLLayer(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top){
- TensorShape in_shape ((unsigned int)this->width_, (unsigned int)this->height_);
- TensorShape out_shape((unsigned int)this->pooled_width_, (unsigned int)this->pooled_height_);
+ TensorShape in_shape ((unsigned int)this->width_, (unsigned int)this->height_,(unsigned int)this->channels_);
+ TensorShape out_shape((unsigned int)this->pooled_width_, (unsigned int)this->pooled_height_,(unsigned int)this->channels_);
checkreshape(in_shape,Caffe::arm_gpu_mode());
if (!this->init_layer_) return;
this->init_layer_=false;
@@ -93,13 +93,11 @@ void ACLPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
}
SetupACLLayer(bottom,top);
for (int n = 0; n < bottom[0]->num(); ++n) {
- for (int c = 0; c < this->channels_; ++c) {
tensor_mem(this->cpu().input,(void*)(bottom_data));
cpu_run();
tensor_mem((void*)(top_data),this->cpu().output);
- bottom_data += bottom[0]->offset(0, 1);
- top_data += top[0]->offset(0, 1);
- }
+ bottom_data += bottom[0]->offset(1);
+ top_data += top[0]->offset(1);
}
}
@@ -130,13 +128,11 @@ void ACLPoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
}
SetupACLLayer(bottom,top);
for (int n = 0; n < bottom[0]->num(); ++n) {
- for (int c = 0; c < this->channels_; ++c) {
tensor_mem(this->gpu().input,(void*)(bottom_data));
gpu_run();
tensor_mem((void*)(top_data),this->gpu().output);
- bottom_data += bottom[0]->offset(0, 1);
- top_data += top[0]->offset(0, 1);
- }
+ bottom_data += bottom[0]->offset(1);
+ top_data += top[0]->offset(1);
}
}
diff --git a/unit_tests/Makefile b/unit_tests/Makefile
index 034ee02f..4e64c0cb 100644
--- a/unit_tests/Makefile
+++ b/unit_tests/Makefile
@@ -16,7 +16,7 @@ HOME=/home/firefly
# caffe related stuff
#
-CAFFE_ROOT=$(HOME)/caffeOnACL
+CAFFE_ROOT=$(HOME)/CaffeOnACL
CAFFE_INCS = -I$(CAFFE_ROOT)/include -I$(CAFFE_ROOT)/distribute/include/
CAFFE_LIBS = -L$(CAFFE_ROOT)/distribute/lib -lcaffe -lglog -lgflags -lprotobuf -lboost_system -lboost_filesystem
CAFFE_RPATH =$(CAFFE_ROOT)/distribute/lib