3 files changed, 5 insertions, 4 deletions
diff --git a/src/caffe/acl_layer.cpp b/src/caffe/acl_layer.cpp
index 84f201d3..879b6701 100644
--- a/src/caffe/acl_layer.cpp
+++ b/src/caffe/acl_layer.cpp
@@ -116,7 +116,7 @@ int BaseTensor<ACLTensor>::tensor_copy(void * mem,bool toTensor)
 #endif //USE_PROFILING
     arm_compute::Window window;
     ACLTensor* tensor=this;
-    window.use_tensor_dimensions(tensor->info(), /* first_dimension =*/Window::DimY); // Iterate through the rows (not each element)
+    window.use_tensor_dimensions(tensor->info()->tensor_shape(), /* first_dimension =*/Window::DimY); // Iterate through the rows (not each element)
     int width = tensor->info()->tensor_shape()[0]; //->dimension(0); //window.x().end() - window.x().start(); // + 1;
     int height = tensor->info()->tensor_shape()[1]; //->dimension(1); //window.y().end() - window.y().start(); // + 1;
     int deepth = tensor->info()->tensor_shape()[2];
diff --git a/src/caffe/layers/acl_batch_norm_layer.cpp b/src/caffe/layers/acl_batch_norm_layer.cpp
index b117343c..a6bc16d1 100644
--- a/src/caffe/layers/acl_batch_norm_layer.cpp
+++ b/src/caffe/layers/acl_batch_norm_layer.cpp
@@ -115,13 +115,13 @@ void ACLBatchNormLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       Forward_gpu(bottom, top);
       return;
   }         
+#ifdef USE_PROFILING
+  logtime_util log_time(ACL_BN_INFO);
+#endif //USE_PROFILING
   if (this->force_bypass_acl_path_||!this->use_global_stats_) {
         BatchNormLayer<Dtype>::Forward_cpu(bottom,top);
         return;
   }
-#ifdef USE_PROFILING
-  logtime_util log_time(ACL_BN_INFO);
-#endif //USE_PROFILING
   const Dtype* bottom_data = bottom[0]->cpu_data();
   Dtype* top_data = top[0]->mutable_cpu_data();
   SetupACLLayer(bottom,top);
diff --git a/src/caffe/layers/acl_concat_layer.cpp b/src/caffe/layers/acl_concat_layer.cpp
index 4750cb4b..57a14126 100644
--- a/src/caffe/layers/acl_concat_layer.cpp
+++ b/src/caffe/layers/acl_concat_layer.cpp
@@ -113,6 +113,7 @@ void ACLConcatLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 
 template <typename Dtype>
 ACLConcatLayer<Dtype>::~ACLConcatLayer() {
+    if(this->force_bypass_acl_path_)return;
     for (int i =0; i < cpu_vectors.size(); i ++) {
         delete cpu_vectors[i];
     }