summaryrefslogtreecommitdiff
path: root/libs/ARMComputeEx/src/runtime/CL/functions
diff options
context:
space:
mode:
Diffstat (limited to 'libs/ARMComputeEx/src/runtime/CL/functions')
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp35
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp120
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp46
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp28
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp39
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp1
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp40
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp28
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp29
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp28
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp4
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp29
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp28
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp50
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp39
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp28
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp36
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp3
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLReduceMax.cpp121
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp123
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLReductionMean.cpp51
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp29
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp28
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp39
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSlice.cpp307
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp30
-rw-r--r--libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp5
27 files changed, 853 insertions, 491 deletions
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp
new file mode 100644
index 000000000..1e52fc429
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLActivationLayerEx.h"
+
+#include "arm_compute/core/CL/kernels/CLActivationLayerExKernel.h"
+
+using namespace arm_compute;
+
+void CLActivationLayerEx::configure(ICLTensor *input, ICLTensor *output,
+ ActivationLayerInfoEx act_info)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLActivationLayerExKernel>();
+ k->configure(input, output, act_info);
+ _kernel = std::move(k);
+}
+
+Status CLActivationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ActivationLayerInfoEx &act_info)
+{
+ return CLActivationLayerExKernel::validate(input, output, act_info);
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp
new file mode 100644
index 000000000..dff743e89
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLArgMinMax.h"
+
+#include "arm_compute/core/CL/kernels/CLArgMinMaxKernel.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+namespace arm_compute
+{
+
+CLArgMinMax::CLArgMinMax()
+ : _input(nullptr), _output(nullptr), _argminmax_axis(), _interm_tensors(), _argminmax_kernels(),
+ _num_of_kernels()
+{
+}
+
+void CLArgMinMax::configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis,
+ ArgOperation op)
+{
+ ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), axis, op));
+ _input = input;
+ _output = output;
+ _argminmax_axis = axis;
+ _arg_op = op;
+ // NOTE The argminmax_axis must have no duplication.
+ _num_of_kernels = axis.size();
+ const size_t num_of_interm_tensors = _num_of_kernels - 1;
+
+ _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
+ _argminmax_kernels =
+ arm_compute::support::cpp14::make_unique<CLArgMinMaxKernel[]>(_num_of_kernels);
+
+ TensorShape shape{input->info()->tensor_shape()};
+ for (size_t i = 0; i < num_of_interm_tensors; i++)
+ {
+ shape.set(_argminmax_axis[i], 1);
+ _interm_tensors[i].allocator()->init(
+ TensorInfo(shape, input->info()->num_channels(), input->info()->data_type()));
+ _interm_tensors[i].allocator()->allocate();
+ }
+
+ // Set a vector that is ordered ICLTensors sequentially.
+ std::vector<ICLTensor *> tensors;
+ tensors.emplace_back(input);
+ for (size_t i = 0; i < num_of_interm_tensors; i++)
+ {
+ tensors.emplace_back(_interm_tensors.get() + i);
+ }
+ tensors.emplace_back(output);
+
+ // Apply ArgMinMax on all kernels
+ for (size_t i = 0; i < _num_of_kernels; i++)
+ {
+ _argminmax_kernels[i].configure(tensors[i], tensors[i + 1], _argminmax_axis[i], op);
+ }
+}
+
+Status CLArgMinMax::validate(const ITensorInfo *input, const std::vector<uint32_t> &argminmax_axis,
+ const ITensorInfo *output, ArgOperation op)
+{
+ const size_t num_of_kernels = argminmax_axis.size();
+ const size_t num_of_interm_tensors = num_of_kernels - 1;
+
+ // Create temporary tensor infos
+ auto interm_tensors =
+ arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
+
+ // Create intermediate tensor info
+ TensorShape shape{input->tensor_shape()};
+
+ for (size_t i = 0; i < num_of_interm_tensors; i++)
+ {
+ shape.set(argminmax_axis[i], 1);
+ interm_tensors[i].set_data_type(input->data_type());
+ interm_tensors[i].set_tensor_shape(shape);
+ interm_tensors[i].set_num_channels(input->num_channels());
+ }
+
+ // Set a vector that is ordered ITensorInfo sequentially.
+ std::vector<const ITensorInfo *> tensors;
+ tensors.emplace_back(input);
+ for (size_t i = 0; i < num_of_interm_tensors; i++)
+ {
+ tensors.emplace_back(interm_tensors.get() + i);
+ }
+ tensors.emplace_back(output);
+
+ // Validate argminmax only on all kernels
+ for (size_t i = 0; i < num_of_kernels; i++)
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLArgMinMaxKernel::validate(tensors[i], tensors[i + 1], argminmax_axis[i], op));
+ }
+
+ return Status{};
+}
+
+void CLArgMinMax::run()
+{
+ for (size_t i = 0; i < _num_of_kernels; ++i)
+ {
+ CLScheduler::get().enqueue(_argminmax_kernels[i]);
+ }
+}
+
+} // namespace arm_compute
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp
new file mode 100644
index 000000000..3f403c80a
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h"
+
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h"
+
+using namespace arm_compute;
+
+void CLArithmeticSubtractionEx::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
+ ConvertPolicy policy)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLArithmeticSubtractionExKernel>();
+ k->configure(input1, input2, output, policy);
+ _kernel = std::move(k);
+
+ if (output->info()->dimension(0) > 1)
+ {
+ ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2;
+
+ if (broadcasted_info->info()->dimension(0) == 1)
+ {
+ _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+ }
+ }
+}
+
+Status CLArithmeticSubtractionEx::validate(const ITensorInfo *input1, const ITensorInfo *input2,
+ const ITensorInfo *output, ConvertPolicy policy)
+{
+ return CLArithmeticSubtractionExKernel::validate(input1, input2, output, policy);
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp
new file mode 100644
index 000000000..26e3798cc
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLBatchToSpaceND.h"
+
+#include "arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h"
+
+using namespace arm_compute;
+
+void CLBatchToSpaceND::configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLBatchToSpaceNDKernel>();
+ k->configure(input, output, block_size);
+ _kernel = std::move(k);
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
new file mode 100644
index 000000000..7c5fe5eda
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h"
+
+#include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+
+using namespace arm_compute;
+
+void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
+ BinaryLogicalOperation op)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLBinaryLogicalOpKernel>();
+ k->configure(input1, input2, output, op);
+ _kernel = std::move(k);
+
+ if (output->info()->dimension(0) > 1)
+ {
+ ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2;
+ if (broadcasted_info->info()->dimension(0) == 1)
+ {
+ _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+ }
+ }
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp
index e1059ab53..8e106737c 100644
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp
@@ -17,7 +17,6 @@
#include "arm_compute/runtime/CL/functions/CLCast.h"
#include "arm_compute/core/CL/kernels/CLCastKernel.h"
-#include "support/ToolchainSupport.h"
using namespace arm_compute;
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp
new file mode 100644
index 000000000..f6a745a25
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLComparisonOp.h"
+
+#include "arm_compute/core/CL/kernels/CLComparisonOpKernel.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+
+using namespace arm_compute;
+
+void CLComparisonOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
+ const ComparisonOperation &op)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLComparisonOpKernel>();
+ k->configure(input1, input2, output, op);
+ _kernel = std::move(k);
+
+ if (output->info()->dimension(0) > 1)
+ {
+ ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2;
+
+ if (broadcasted_info->info()->dimension(0) == 1)
+ {
+ _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+ }
+ }
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp
new file mode 100644
index 000000000..c2e4ca9ff
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLDepthToSpace.h"
+
+#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h"
+
+using namespace arm_compute;
+
+void CLDepthToSpace::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLDepthToSpaceKernel>();
+ k->configure(input, output, block_size);
+ _kernel = std::move(k);
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
new file mode 100644
index 000000000..2781784ca
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLEmbeddingLookup.h"
+
+#include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h"
+
+using namespace arm_compute;
+
+void CLEmbeddingLookup::configure(const ICLTensor *input, ICLTensor *output,
+ const ICLTensor *lookups)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLEmbeddingLookupKernel>();
+ k->configure(input, output, lookups);
+ _kernel = std::move(k);
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp
new file mode 100644
index 000000000..411fa8700
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLExp.h"
+
+#include "arm_compute/core/CL/kernels/CLExpKernel.h"
+
+using namespace arm_compute;
+
+void CLExp::configure(const ICLTensor *input, ICLTensor *output)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLExpKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp
index 5552cbc6f..fb056fe45 100644
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp
@@ -16,11 +16,7 @@
*/
#include "arm_compute/runtime/CL/functions/CLGather.h"
-#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/kernels/CLGatherKernel.h"
-#include "support/ToolchainSupport.h"
-
-#include <utility>
using namespace arm_compute;
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
new file mode 100644
index 000000000..7180e9356
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLHashtableLookup.h"
+
+#include "arm_compute/core/CL/kernels/CLHashtableLookupKernel.h"
+
+using namespace arm_compute;
+
+void CLHashtableLookup::configure(const ICLTensor *lookups, const ICLTensor *keys,
+ const ICLTensor *input, ICLTensor *output, ICLTensor *hits)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLHashtableLookupKernel>();
+ k->configure(lookups, keys, input, output, hits);
+ _kernel = std::move(k);
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
new file mode 100644
index 000000000..be35ea732
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLNeg.h"
+
+#include "arm_compute/core/CL/kernels/CLNegKernel.h"
+
+using namespace arm_compute;
+
+void CLNeg::configure(ICLTensor *input, ICLTensor *output)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLNegKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp
new file mode 100644
index 000000000..276c4557a
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h"
+
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+using namespace arm_compute;
+
+CLNormalizationLayerEx::CLNormalizationLayerEx() : _norm_kernel(), _border_handler() {}
+
+void CLNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output,
+ const NormalizationLayerInfo &norm_info)
+{
+ ARM_COMPUTE_ERROR_ON(input == nullptr);
+
+ // Configure normalization kernel
+ _norm_kernel.configure(input, output, norm_info);
+
+ // Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel
+ _border_handler.configure(input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0));
+}
+
+Status CLNormalizationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
+ const NormalizationLayerInfo &norm_info)
+{
+ return CLNormalizationLayerExKernel::validate(input, output, norm_info);
+}
+
+void CLNormalizationLayerEx::run()
+{
+ // Run border handler
+ CLScheduler::get().enqueue(_border_handler, false);
+
+ // Run normalization kernel
+ CLScheduler::get().enqueue(_norm_kernel);
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp
new file mode 100644
index 000000000..38adedd10
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLPReLU.h"
+
+#include "arm_compute/core/CL/kernels/CLPReLUKernel.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+
+using namespace arm_compute;
+
+void CLPReLU::configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLPReLUKernel>();
+ k->configure(input, alpha, output);
+ _kernel = std::move(k);
+
+ if (output->info()->dimension(0) > 1)
+ {
+ ICLTensor *broadcasted_info = (input->info()->dimension(0) == 1) ? input : alpha;
+
+ if (broadcasted_info->info()->dimension(0) == 1)
+ {
+ _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+ }
+ }
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp
new file mode 100644
index 000000000..5265b6c34
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp
@@ -0,0 +1,28 @@
+/*
+* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+* Copyright (c) 2016-2018 ARM Limited.
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+#include "arm_compute/runtime/CL/functions/CLPadLayerEx.h"
+
+#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
+
+using namespace arm_compute;
+
+void CLPadLayerEx::configure(ICLTensor *input, ICLTensor *output, ICLTensor *pad_size)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLPadLayerKernel>();
+ k->configure(input, output, pad_size);
+ _kernel = std::move(k);
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp
new file mode 100644
index 000000000..fb363270d
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLPermuteEx.h"
+
+#include "arm_compute/core/CL/kernels/CLPermuteExKernel.h"
+
+using namespace arm_compute;
+
+void CLPermuteEx::configure(const ICLTensor *input, ICLTensor *output,
+ const PermutationVector &perm)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLPermuteExKernel>();
+ k->configure(input, output, perm);
+ _kernel = std::move(k);
+}
+
+Status CLPermuteEx::validate(const ITensorInfo *input, const ITensorInfo *output,
+ const PermutationVector &perm)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(CLPermuteExKernel::validate(input, output, perm));
+ return Status{};
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp
index e1add5e90..dc0baa8dd 100644
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp
@@ -18,9 +18,6 @@
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h"
-#include "support/ToolchainSupport.h"
-
-#include <utility>
using namespace arm_compute;
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceMax.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceMax.cpp
deleted file mode 100644
index 3382058db..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceMax.cpp
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLReduceMax.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "support/ToolchainSupport.h"
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/kernels/CLReduceMaxKernel.h"
-
-#include <vector>
-#include <algorithm>
-
-#include <utility>
-
-#define REDUCE_MAX_RUN_ON_CPU 1
-
-namespace arm_compute
-{
-
-CLReduceMax::CLReduceMax() : _axis(0), _input(nullptr), _output(nullptr), _kernel(nullptr) {}
-
-void CLReduceMax::configure(ICLTensor *input, int axis, ICLTensor *output)
-{
- _axis = axis;
-
- _input = input;
- _output = output;
-
- auto k = arm_compute::support::cpp14::make_unique<CLReduceMaxKernel>();
- k->configure(input, axis, output);
- _kernel = std::move(k);
-
- // We can handle for simple case only
- // Output rank: 1
- // Axis: one axis value, restrict to 1
- ARM_COMPUTE_ERROR_ON(input->info()->tensor_shape().num_dimensions() != 2);
- ARM_COMPUTE_ERROR_ON(output->info()->tensor_shape().num_dimensions() != 1);
- ARM_COMPUTE_ERROR_ON(axis != 1);
-}
-
-Status CLReduceMax::validate(const ITensorInfo *input, int32_t axis, const ITensorInfo *output)
-{
- return CLReduceMaxKernel::validate(input, axis, output);
-}
-
-void CLReduceMax::run()
-{
-#if REDUCE_MAX_RUN_ON_CPU
- run_on_cpu();
-
- arm_compute::CLScheduler::get().sync();
-#else
- arm_compute::CLScheduler::get().enqueue(*_kernel);
-#endif
-}
-
-void CLReduceMax::run_on_cpu()
-{
- cl::CommandQueue q = CLScheduler::get().queue();
-
- _input->map(q);
- _output->map(q);
-
- // Compute by CPU for simple case
- // Input rank: 2
- // Output rank: 1
- // Axis: one axis value, restrict to 1
-
- float *input_data = (float *)_input->buffer();
- float *output_data = (float *)_output->buffer();
-
- std::vector<float> container_max;
- int cols = _input->info()->tensor_shape()[0];
- int rows = _input->info()->tensor_shape()[1];
- container_max.resize(rows);
-
- // Initialize as 1st element in row
- float *input_pointer = input_data;
- for (int i = 0; i < rows; i++)
- {
- container_max[i] = *input_pointer;
- input_pointer += cols;
- }
-
- // Update max value in row
- for (int i = 0; i < rows; i++)
- {
- float max_in_row = container_max[i];
- for (int j = 1; j < cols; j++)
- {
- if (max_in_row < input_data[i * cols + j])
- {
- max_in_row = input_data[i * cols + j];
- }
- }
- container_max[i] = max_in_row;
- }
-
- for (int i = 0; i < rows; i++)
- {
- output_data[i] = container_max[i];
- }
-
- _input->unmap(q);
- _output->unmap(q);
-}
-} // namespace arm_compute
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
new file mode 100644
index 000000000..2b8d82706
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLReduceOperation.h"
+
+#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+using namespace arm_compute;
+
+CLReduceOperation::CLReduceOperation()
+ : _input(nullptr), _output(nullptr), _axis(), _interm_tensors(), _reduce_kernels()
+{
+}
+
+Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *output,
+ const std::set<uint32_t> &axis, const ReduceOperation &op)
+{
+ const size_t num_of_kernels = axis.size();
+ const size_t num_of_interm_tensors = num_of_kernels - 1;
+
+ // Create temporary tensor infos
+ auto interm_tensors =
+ arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
+
+ // Create intermediate tensor info
+ TensorShape shape{input->tensor_shape()};
+
+ auto it = axis.begin();
+ for (size_t i = 0; i < num_of_interm_tensors; ++i, ++it)
+ {
+ shape.set(*it, 1);
+ interm_tensors[i].set_data_type(input->data_type());
+ interm_tensors[i].set_tensor_shape(shape);
+ interm_tensors[i].set_num_channels(input->num_channels());
+ }
+
+ // Set a vector that is ordered ITensorInfo sequentially.
+ std::vector<const ITensorInfo *> tensors;
+ tensors.emplace_back(input);
+ for (size_t i = 0; i < num_of_interm_tensors; ++i)
+ {
+ tensors.emplace_back(interm_tensors.get() + i);
+ }
+ tensors.emplace_back(output);
+
+ // Validate ReduceOperation only on all kernels
+ it = axis.begin();
+ for (size_t i = 0; i < num_of_kernels; ++i, ++it)
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op));
+ }
+
+ return Status{};
+}
+
+void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output,
+ const std::set<uint32_t> &axis, ReduceOperation op)
+{
+ ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), axis, op));
+
+ _axis = axis;
+
+ _input = input;
+ _output = output;
+
+ // NOTE The axis must have no duplication.
+ const size_t num_of_kernels = axis.size();
+ const size_t num_of_interm_tensors = num_of_kernels - 1;
+
+ _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
+ _reduce_kernels =
+ arm_compute::support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels);
+
+ TensorShape shape{input->info()->tensor_shape()};
+ auto it = axis.begin();
+ for (size_t i = 0; i < num_of_interm_tensors; ++i, ++it)
+ {
+ shape.set(*it, 1);
+ _interm_tensors[i].allocator()->init(
+ TensorInfo(shape, input->info()->num_channels(), input->info()->data_type()));
+ _interm_tensors[i].allocator()->allocate();
+ }
+
+ // Set a vector that is ordered ICLTensors sequentially.
+ std::vector<ICLTensor *> tensors;
+ tensors.emplace_back(input);
+ for (size_t i = 0; i < num_of_interm_tensors; ++i)
+ {
+ tensors.emplace_back(_interm_tensors.get() + i);
+ }
+ tensors.emplace_back(output);
+
+ // Apply ReduceOperation on all kernels
+ it = axis.begin();
+ for (size_t i = 0; i < num_of_kernels; ++i, ++it)
+ {
+ _reduce_kernels[i].configure(tensors[i], tensors[i + 1], *it, op);
+ }
+}
+
+void CLReduceOperation::run()
+{
+ const size_t num_of_kernels = _axis.size();
+ for (size_t i = 0; i < num_of_kernels; ++i)
+ {
+ CLScheduler::get().enqueue(_reduce_kernels[i]);
+ }
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLReductionMean.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLReductionMean.cpp
deleted file mode 100644
index ab724e752..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLReductionMean.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLReductionMean.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLReductionMeanKernel.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-
-CLReductionMean::CLReductionMean() : _reduction_mean_kernel(), _fill_border_kernel() {}
-
-Status CLReductionMean::validate(const ITensorInfo *input, const ITensorInfo *output,
- std::vector<uint32_t> axis)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(CLReductionMeanKernel::validate(input, output, axis));
- return Status{};
-}
-
-void CLReductionMean::configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis)
-{
- _reduction_mean_kernel.configure(input, output, axis);
- _fill_border_kernel.configure(input, _reduction_mean_kernel.border_size(), BorderMode::CONSTANT,
- PixelValue(0));
-}
-
-void CLReductionMean::run()
-{
- CLScheduler::get().enqueue(_fill_border_kernel);
- CLScheduler::get().enqueue(_reduction_mean_kernel);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp
new file mode 100644
index 000000000..c03826891
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLSpaceToBatchND.h"
+
+#include "arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h"
+
+using namespace arm_compute;
+
+void CLSpaceToBatchND::configure(const ICLTensor *input, const ICLTensor *block_size,
+ const ICLTensor *padding_size, ICLTensor *output)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLSpaceToBatchNDKernel>();
+ k->configure(input, block_size, padding_size, output);
+ _kernel = std::move(k);
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp
new file mode 100644
index 000000000..0f455f96f
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLSpaceToDepth.h"
+
+#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h"
+
+using namespace arm_compute;
+
+void CLSpaceToDepth::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLSpaceToDepthKernel>();
+ k->configure(input, output, block_size);
+ _kernel = std::move(k);
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp
new file mode 100644
index 000000000..dc6e4af44
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLSquaredDifference.h"
+
+#include "arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+
+using namespace arm_compute;
+
+void CLSquaredDifference::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLSquaredDifferenceKernel>();
+ k->configure(input1, input2, output);
+ _kernel = std::move(k);
+
+ if (output->info()->dimension(0) > 1)
+ {
+ ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2;
+
+ if (broadcasted_info->info()->dimension(0) == 1)
+ {
+ _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+ }
+ }
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSlice.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSlice.cpp
deleted file mode 100644
index cd576cec1..000000000
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSlice.cpp
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLStridedSlice.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h"
-#include "arm_compute/core/utils/misc/Utility.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/ToolchainSupport.h"
-#include <vector>
-
-using namespace arm_compute;
-
-static const int32_t maxDims = 4;
-
-// Return the index for the first element along that axis. This index will be a
-// positive integer between [0, axisSize - 1] that can be used to index
-// directly into the data.
-inline int32_t StartForAxis(int32_t beginMask, std::vector<int32_t> const &startIndices,
- std::vector<int32_t> const &strides, const TensorShape &inputShape,
- int32_t axis)
-{
- // Begin with the specified index
- int32_t start = startIndices[axis];
-
- // beginMask override
- if (beginMask & 1 << axis)
- {
- if (strides[axis] > 0)
- {
- // Forward iteration - use the first element. These values will get
- // clamped below (Note: We could have set them to 0 and axisSize-1, but
- // use lowest() and max() to maintain symmetry with StopForAxis())
- start = std::numeric_limits<int32_t>::lowest();
- }
- else
- {
- // Backward iteration - use the last element.
- start = std::numeric_limits<int32_t>::max();
- }
- }
-
- // Handle negative indices
- int32_t axisSize = inputShape[axis];
- if (start < 0)
- {
- start += axisSize;
- }
-
- // Clamping
- start = arm_compute::utility::clamp(start, 0, axisSize - 1);
-
- return start;
-}
-
-// Return the "real" index for the end of iteration along that axis. This is an
-// "end" in the traditional C sense, in that it points to one past the last
-// element. ie. So if you were iterating through all elements of a 1D array of
-// size 4, this function would return 4 as the stop, because it is one past the
-// "real" indices of 0, 1, 2 & 3.
-inline int32_t StopForAxis(int32_t endMask, std::vector<int32_t> const &stopIndices,
- std::vector<int32_t> const &strides, const TensorShape &inputShape,
- int32_t axis)
-{
- // Begin with the specified index
- int32_t stop = stopIndices[axis];
-
- // endMask override
- if (endMask & (1 << axis))
- {
- if (strides[axis] > 0)
- {
- // Forward iteration - use the last element. These values will get
- // clamped below
- stop = std::numeric_limits<int32_t>::max();
- }
- else
- {
- // Backward iteration - use the first element.
- stop = std::numeric_limits<int32_t>::lowest();
- }
- }
-
- // Handle negative indices
- int32_t axisSize = inputShape[axis];
- if (stop < 0)
- {
- stop += axisSize;
- }
-
- // Clamping
- // Because the end index points one past the last element, we need slightly
- // different clamping ranges depending on the direction.
- if (strides[axis] > 0)
- {
- // Forward iteration
- stop = arm_compute::utility::clamp(stop, 0, axisSize);
- }
- else
- {
- // Backward iteration
- stop = arm_compute::utility::clamp(stop, -1, axisSize - 1);
- }
-
- return stop;
-}
-
-inline int32_t offset4D(const TensorShape &shape, int32_t b, int32_t d, int32_t h, int32_t w)
-{
- int32_t offset = b * shape[2] * shape[1] * shape[0];
- offset += d * shape[1] * shape[0];
- offset += h * shape[0];
- offset += w;
- return offset;
-}
-
-void CLStridedSlice::configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData,
- ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask,
- int32_t endMask, int32_t shrinkAxisMask)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLStridedSliceKernel>();
- k->configure(input, output, beginData, endData, stridesData, beginMask, endMask, shrinkAxisMask);
- _kernel = std::move(k);
-}
-
-void CLStridedSliceCPU::configure(ICLTensor *input, ICLTensor *output, ICLTensor *beginData,
- ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask,
- int32_t endMask, int32_t shrinkAxisMask)
-{
- ARM_COMPUTE_ERROR_THROW_ON(CLStridedSliceKernel::validate(
- input->info(), output->info(), beginData->info(), endData->info(), stridesData->info(),
- beginMask, endMask, shrinkAxisMask));
-
- _input = input;
- _output = output;
- _beginData = beginData;
- _endData = endData;
- _stridesData = stridesData;
- _beginMask = beginMask;
- _endMask = endMask;
- _shrinkAxisMask = shrinkAxisMask;
-}
-
-void CLStridedSliceCPU::run()
-{
- run_on_cpu();
-
- arm_compute::CLScheduler::get().sync();
-}
-
-inline int32_t getOutDim(int32_t start, int32_t stop, int32_t stride)
-{
- if (stride > 0)
- {
- return ((stop - start - 1) / stride) + 1;
- }
- else
- {
- return ((stop - start + 1) / stride) + 1;
- }
-}
-
-template <typename T>
-inline void StridedSlice(const T *inputData, const TensorShape &inputShape, int32_t beginMask,
- int32_t endMask, const std::vector<int32_t> &startIndices,
- const std::vector<int32_t> &stopIndices,
- const std::vector<int32_t> &strides, T *outputData)
-{
- ARM_COMPUTE_ERROR_ON(startIndices.size() != maxDims);
- ARM_COMPUTE_ERROR_ON(stopIndices.size() != maxDims);
- ARM_COMPUTE_ERROR_ON(strides.size() != maxDims);
-
- const int32_t start_b = StartForAxis(beginMask, startIndices, strides, inputShape, 3);
- const int32_t stop_b = StopForAxis(endMask, stopIndices, strides, inputShape, 3);
- const int32_t start_d = StartForAxis(beginMask, startIndices, strides, inputShape, 2);
- const int32_t stop_d = StopForAxis(endMask, stopIndices, strides, inputShape, 2);
- const int32_t start_h = StartForAxis(beginMask, startIndices, strides, inputShape, 1);
- const int32_t stop_h = StopForAxis(endMask, stopIndices, strides, inputShape, 1);
- const int32_t start_w = StartForAxis(beginMask, startIndices, strides, inputShape, 0);
- const int32_t stop_w = StopForAxis(endMask, stopIndices, strides, inputShape, 0);
-
- // The shape of outputData may collapse in one-dimension.
- // Therefore, it is necessary to create a shape that matches the result of the outputData.
- TensorShape outputShape(
- getOutDim(start_w, stop_w, strides[0]), getOutDim(start_h, stop_h, strides[1]),
- getOutDim(start_d, stop_d, strides[2]), getOutDim(start_b, stop_b, strides[3]));
- for (int32_t in_b = start_b, b = 0; strides[3] > 0 ? in_b < stop_b : in_b > stop_b;
- in_b += strides[3], b++)
- {
- for (int32_t in_d = start_d, d = 0; strides[2] > 0 ? in_d < stop_d : in_d > stop_d;
- in_d += strides[2], d++)
- {
- for (int32_t in_h = start_h, h = 0; strides[1] > 0 ? in_h < stop_h : in_h > stop_h;
- in_h += strides[1], h++)
- {
- for (int32_t in_w = start_w, w = 0; strides[0] > 0 ? in_w < stop_w : in_w > stop_w;
- in_w += strides[0], w++)
- {
- outputData[offset4D(outputShape, b, d, h, w)] =
- inputData[offset4D(inputShape, in_b, in_d, in_h, in_w)];
- }
- }
- }
- }
-}
-
-void CLStridedSliceCPU::run_on_cpu()
-{
- // TODO: Support shrinkAxisMask
- cl::CommandQueue q = CLScheduler::get().queue();
-
- _input->map(q);
- _output->map(q);
- _beginData->map(q);
- _endData->map(q);
- _stridesData->map(q);
-
- TensorShape inputShape = _input->info()->tensor_shape();
- TensorShape outputShape = _output->info()->tensor_shape();
-
- std::vector<int32_t> starts;
- std::vector<int32_t> stops;
- std::vector<int32_t> strides;
-
- for (uint32_t idx = 0; idx <= _input->info()->num_dimensions() - 1; ++idx)
- {
- starts.emplace_back(reinterpret_cast<int32_t *>(_beginData->buffer())[idx]);
- stops.emplace_back(reinterpret_cast<int32_t *>(_endData->buffer())[idx]);
- strides.emplace_back(reinterpret_cast<int32_t *>(_stridesData->buffer())[idx]);
- }
-
- for (uint32_t i = _input->info()->num_dimensions(); i < maxDims; i++)
- {
- starts.emplace_back(0);
- stops.emplace_back(1);
- strides.emplace_back(1);
- }
-
- switch (_input->info()->data_type())
- {
- case DataType::U8:
- case DataType::QASYMM8:
- StridedSlice(reinterpret_cast<const uint8_t *>(_input->buffer()), inputShape, _beginMask,
- _endMask, starts, stops, strides,
- reinterpret_cast<uint8_t *>(_output->buffer()));
- break;
- case DataType::S8:
- case DataType::QS8:
- StridedSlice(reinterpret_cast<const int8_t *>(_input->buffer()), inputShape, _beginMask,
- _endMask, starts, stops, strides, reinterpret_cast<int8_t *>(_output->buffer()));
- break;
- case DataType::U16:
- StridedSlice(reinterpret_cast<const uint16_t *>(_input->buffer()), inputShape, _beginMask,
- _endMask, starts, stops, strides,
- reinterpret_cast<uint16_t *>(_output->buffer()));
- break;
- case DataType::S16:
- case DataType::QS16:
- StridedSlice(reinterpret_cast<const int16_t *>(_input->buffer()), inputShape, _beginMask,
- _endMask, starts, stops, strides,
- reinterpret_cast<int16_t *>(_output->buffer()));
- break;
- case DataType::F16:
- // Not sure this works.
- StridedSlice(reinterpret_cast<const half *>(_input->buffer()), inputShape, _beginMask,
- _endMask, starts, stops, strides, reinterpret_cast<half *>(_output->buffer()));
- break;
- case DataType::U32:
- StridedSlice(reinterpret_cast<const uint32_t *>(_input->buffer()), inputShape, _beginMask,
- _endMask, starts, stops, strides,
- reinterpret_cast<uint32_t *>(_output->buffer()));
- break;
- case DataType::S32:
- StridedSlice(reinterpret_cast<const int32_t *>(_input->buffer()), inputShape, _beginMask,
- _endMask, starts, stops, strides,
- reinterpret_cast<int32_t *>(_output->buffer()));
- break;
- case DataType::F32:
- StridedSlice(reinterpret_cast<const float *>(_input->buffer()), inputShape, _beginMask,
- _endMask, starts, stops, strides, reinterpret_cast<float *>(_output->buffer()));
- break;
- default:
- ARM_COMPUTE_ERROR("DataType not supported");
- break;
- }
-
- _input->unmap(q);
- _output->unmap(q);
- _beginData->unmap(q);
- _endData->unmap(q);
- _stridesData->unmap(q);
-}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp
new file mode 100644
index 000000000..be7353493
--- /dev/null
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "arm_compute/runtime/CL/functions/CLStridedSliceEx.h"
+
+#include "arm_compute/core/CL/kernels/CLStridedSliceExKernel.h"
+
+using namespace arm_compute;
+
+void CLStridedSliceEx::configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData,
+ ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask,
+ int32_t endMask, int32_t shrinkAxisMask)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLStridedSliceExKernel>();
+ k->configure(input, output, beginData, endData, stridesData, beginMask, endMask, shrinkAxisMask);
+ _kernel = std::move(k);
+}
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
index 6426364c9..19177497c 100644
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
@@ -15,12 +15,9 @@
* limitations under the License.
*/
#include "arm_compute/runtime/CL/functions/CLTopKV2.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/CLHelpers.h"
-
-#include <vector>
-#include <algorithm>
#include "../../topk_v2.h"