diff options
Diffstat (limited to 'libs/ARMComputeEx/src/runtime/CL/functions')
27 files changed, 853 insertions, 491 deletions
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp new file mode 100644 index 000000000..1e52fc429 --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLActivationLayerEx.h" + +#include "arm_compute/core/CL/kernels/CLActivationLayerExKernel.h" + +using namespace arm_compute; + +void CLActivationLayerEx::configure(ICLTensor *input, ICLTensor *output, + ActivationLayerInfoEx act_info) +{ + auto k = arm_compute::support::cpp14::make_unique<CLActivationLayerExKernel>(); + k->configure(input, output, act_info); + _kernel = std::move(k); +} + +Status CLActivationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, + const ActivationLayerInfoEx &act_info) +{ + return CLActivationLayerExKernel::validate(input, output, act_info); +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp new file mode 100644 index 000000000..dff743e89 --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLArgMinMax.h" + +#include "arm_compute/core/CL/kernels/CLArgMinMaxKernel.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +namespace arm_compute +{ + +CLArgMinMax::CLArgMinMax() + : _input(nullptr), _output(nullptr), _argminmax_axis(), _interm_tensors(), _argminmax_kernels(), + _num_of_kernels() +{ +} + +void CLArgMinMax::configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis, + ArgOperation op) +{ + ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), axis, op)); + _input = input; + _output = output; + _argminmax_axis = axis; + _arg_op = op; + // NOTE The argminmax_axis must have no duplication. + _num_of_kernels = axis.size(); + const size_t num_of_interm_tensors = _num_of_kernels - 1; + + _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors); + _argminmax_kernels = + arm_compute::support::cpp14::make_unique<CLArgMinMaxKernel[]>(_num_of_kernels); + + TensorShape shape{input->info()->tensor_shape()}; + for (size_t i = 0; i < num_of_interm_tensors; i++) + { + shape.set(_argminmax_axis[i], 1); + _interm_tensors[i].allocator()->init( + TensorInfo(shape, input->info()->num_channels(), input->info()->data_type())); + _interm_tensors[i].allocator()->allocate(); + } + + // Set a vector that is ordered ICLTensors sequentially. + std::vector<ICLTensor *> tensors; + tensors.emplace_back(input); + for (size_t i = 0; i < num_of_interm_tensors; i++) + { + tensors.emplace_back(_interm_tensors.get() + i); + } + tensors.emplace_back(output); + + // Apply ArgMinMax on all kernels + for (size_t i = 0; i < _num_of_kernels; i++) + { + _argminmax_kernels[i].configure(tensors[i], tensors[i + 1], _argminmax_axis[i], op); + } +} + +Status CLArgMinMax::validate(const ITensorInfo *input, const std::vector<uint32_t> &argminmax_axis, + const ITensorInfo *output, ArgOperation op) +{ + const size_t num_of_kernels = argminmax_axis.size(); + const size_t num_of_interm_tensors = num_of_kernels - 1; + + // Create temporary tensor infos + auto interm_tensors = + arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors); + + // Create intermediate tensor info + TensorShape shape{input->tensor_shape()}; + + for (size_t i = 0; i < num_of_interm_tensors; i++) + { + shape.set(argminmax_axis[i], 1); + interm_tensors[i].set_data_type(input->data_type()); + interm_tensors[i].set_tensor_shape(shape); + interm_tensors[i].set_num_channels(input->num_channels()); + } + + // Set a vector that is ordered ITensorInfo sequentially. + std::vector<const ITensorInfo *> tensors; + tensors.emplace_back(input); + for (size_t i = 0; i < num_of_interm_tensors; i++) + { + tensors.emplace_back(interm_tensors.get() + i); + } + tensors.emplace_back(output); + + // Validate argminmax only on all kernels + for (size_t i = 0; i < num_of_kernels; i++) + { + ARM_COMPUTE_RETURN_ON_ERROR( + CLArgMinMaxKernel::validate(tensors[i], tensors[i + 1], argminmax_axis[i], op)); + } + + return Status{}; +} + +void CLArgMinMax::run() +{ + for (size_t i = 0; i < _num_of_kernels; ++i) + { + CLScheduler::get().enqueue(_argminmax_kernels[i]); + } +} + +} // namespace arm_compute diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp new file mode 100644 index 000000000..3f403c80a --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h" + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h" + +using namespace arm_compute; + +void CLArithmeticSubtractionEx::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, + ConvertPolicy policy) +{ + auto k = arm_compute::support::cpp14::make_unique<CLArithmeticSubtractionExKernel>(); + k->configure(input1, input2, output, policy); + _kernel = std::move(k); + + if (output->info()->dimension(0) > 1) + { + ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; + + if (broadcasted_info->info()->dimension(0) == 1) + { + _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); + } + } +} + +Status CLArithmeticSubtractionEx::validate(const ITensorInfo *input1, const ITensorInfo *input2, + const ITensorInfo *output, ConvertPolicy policy) +{ + return CLArithmeticSubtractionExKernel::validate(input1, input2, output, policy); +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp new file mode 100644 index 000000000..26e3798cc --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLBatchToSpaceND.h" + +#include "arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h" + +using namespace arm_compute; + +void CLBatchToSpaceND::configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size) +{ + auto k = arm_compute::support::cpp14::make_unique<CLBatchToSpaceNDKernel>(); + k->configure(input, output, block_size); + _kernel = std::move(k); +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp new file mode 100644 index 000000000..7c5fe5eda --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h" + +#include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h" +#include "arm_compute/core/CL/ICLTensor.h" + +using namespace arm_compute; + +void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, + BinaryLogicalOperation op) +{ + auto k = arm_compute::support::cpp14::make_unique<CLBinaryLogicalOpKernel>(); + k->configure(input1, input2, output, op); + _kernel = std::move(k); + + if (output->info()->dimension(0) > 1) + { + ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; + if (broadcasted_info->info()->dimension(0) == 1) + { + _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); + } + } +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp index e1059ab53..8e106737c 100644 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp @@ -17,7 +17,6 @@ #include "arm_compute/runtime/CL/functions/CLCast.h" #include "arm_compute/core/CL/kernels/CLCastKernel.h" -#include "support/ToolchainSupport.h" using namespace arm_compute; diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp new file mode 100644 index 000000000..f6a745a25 --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLComparisonOp.h" + +#include "arm_compute/core/CL/kernels/CLComparisonOpKernel.h" +#include "arm_compute/core/CL/ICLTensor.h" + +using namespace arm_compute; + +void CLComparisonOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, + const ComparisonOperation &op) +{ + auto k = arm_compute::support::cpp14::make_unique<CLComparisonOpKernel>(); + k->configure(input1, input2, output, op); + _kernel = std::move(k); + + if (output->info()->dimension(0) > 1) + { + ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; + + if (broadcasted_info->info()->dimension(0) == 1) + { + _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); + } + } +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp new file mode 100644 index 000000000..c2e4ca9ff --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLDepthToSpace.h" + +#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h" + +using namespace arm_compute; + +void CLDepthToSpace::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size) +{ + auto k = arm_compute::support::cpp14::make_unique<CLDepthToSpaceKernel>(); + k->configure(input, output, block_size); + _kernel = std::move(k); +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp new file mode 100644 index 000000000..2781784ca --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLEmbeddingLookup.h" + +#include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h" + +using namespace arm_compute; + +void CLEmbeddingLookup::configure(const ICLTensor *input, ICLTensor *output, + const ICLTensor *lookups) +{ + auto k = arm_compute::support::cpp14::make_unique<CLEmbeddingLookupKernel>(); + k->configure(input, output, lookups); + _kernel = std::move(k); +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp new file mode 100644 index 000000000..411fa8700 --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLExp.h" + +#include "arm_compute/core/CL/kernels/CLExpKernel.h" + +using namespace arm_compute; + +void CLExp::configure(const ICLTensor *input, ICLTensor *output) +{ + auto k = arm_compute::support::cpp14::make_unique<CLExpKernel>(); + k->configure(input, output); + _kernel = std::move(k); +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp index 5552cbc6f..fb056fe45 100644 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp @@ -16,11 +16,7 @@ */ #include "arm_compute/runtime/CL/functions/CLGather.h" -#include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/kernels/CLGatherKernel.h" -#include "support/ToolchainSupport.h" - -#include <utility> using namespace arm_compute; diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp new file mode 100644 index 000000000..7180e9356 --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLHashtableLookup.h" + +#include "arm_compute/core/CL/kernels/CLHashtableLookupKernel.h" + +using namespace arm_compute; + +void CLHashtableLookup::configure(const ICLTensor *lookups, const ICLTensor *keys, + const ICLTensor *input, ICLTensor *output, ICLTensor *hits) +{ + auto k = arm_compute::support::cpp14::make_unique<CLHashtableLookupKernel>(); + k->configure(lookups, keys, input, output, hits); + _kernel = std::move(k); +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp new file mode 100644 index 000000000..be35ea732 --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLNeg.h" + +#include "arm_compute/core/CL/kernels/CLNegKernel.h" + +using namespace arm_compute; + +void CLNeg::configure(ICLTensor *input, ICLTensor *output) +{ + auto k = arm_compute::support::cpp14::make_unique<CLNegKernel>(); + k->configure(input, output); + _kernel = std::move(k); +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp new file mode 100644 index 000000000..276c4557a --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h" + +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLNormalizationLayerEx::CLNormalizationLayerEx() : _norm_kernel(), _border_handler() {} + +void CLNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output, + const NormalizationLayerInfo &norm_info) +{ + ARM_COMPUTE_ERROR_ON(input == nullptr); + + // Configure normalization kernel + _norm_kernel.configure(input, output, norm_info); + + // Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel + _border_handler.configure(input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0)); +} + +Status CLNormalizationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, + const NormalizationLayerInfo &norm_info) +{ + return CLNormalizationLayerExKernel::validate(input, output, norm_info); +} + +void CLNormalizationLayerEx::run() +{ + // Run border handler + CLScheduler::get().enqueue(_border_handler, false); + + // Run normalization kernel + CLScheduler::get().enqueue(_norm_kernel); +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp new file mode 100644 index 000000000..38adedd10 --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLPReLU.h" + +#include "arm_compute/core/CL/kernels/CLPReLUKernel.h" +#include "arm_compute/core/CL/ICLTensor.h" + +using namespace arm_compute; + +void CLPReLU::configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output) +{ + auto k = arm_compute::support::cpp14::make_unique<CLPReLUKernel>(); + k->configure(input, alpha, output); + _kernel = std::move(k); + + if (output->info()->dimension(0) > 1) + { + ICLTensor *broadcasted_info = (input->info()->dimension(0) == 1) ? input : alpha; + + if (broadcasted_info->info()->dimension(0) == 1) + { + _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); + } + } +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp new file mode 100644 index 000000000..5265b6c34 --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp @@ -0,0 +1,28 @@ +/* +* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved +* Copyright (c) 2016-2018 ARM Limited. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +#include "arm_compute/runtime/CL/functions/CLPadLayerEx.h" + +#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h" + +using namespace arm_compute; + +void CLPadLayerEx::configure(ICLTensor *input, ICLTensor *output, ICLTensor *pad_size) +{ + auto k = arm_compute::support::cpp14::make_unique<CLPadLayerKernel>(); + k->configure(input, output, pad_size); + _kernel = std::move(k); +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp new file mode 100644 index 000000000..fb363270d --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLPermuteEx.h" + +#include "arm_compute/core/CL/kernels/CLPermuteExKernel.h" + +using namespace arm_compute; + +void CLPermuteEx::configure(const ICLTensor *input, ICLTensor *output, + const PermutationVector &perm) +{ + auto k = arm_compute::support::cpp14::make_unique<CLPermuteExKernel>(); + k->configure(input, output, perm); + _kernel = std::move(k); +} + +Status CLPermuteEx::validate(const ITensorInfo *input, const ITensorInfo *output, + const PermutationVector &perm) +{ + ARM_COMPUTE_RETURN_ON_ERROR(CLPermuteExKernel::validate(input, output, perm)); + return Status{}; +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp index e1add5e90..dc0baa8dd 100644 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp @@ -18,9 +18,6 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h" -#include "support/ToolchainSupport.h" - -#include <utility> using namespace arm_compute; diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceMax.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceMax.cpp deleted file mode 100644 index 3382058db..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceMax.cpp +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLReduceMax.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "support/ToolchainSupport.h" -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/kernels/CLReduceMaxKernel.h" - -#include <vector> -#include <algorithm> - -#include <utility> - -#define REDUCE_MAX_RUN_ON_CPU 1 - -namespace arm_compute -{ - -CLReduceMax::CLReduceMax() : _axis(0), _input(nullptr), _output(nullptr), _kernel(nullptr) {} - -void CLReduceMax::configure(ICLTensor *input, int axis, ICLTensor *output) -{ - _axis = axis; - - _input = input; - _output = output; - - auto k = arm_compute::support::cpp14::make_unique<CLReduceMaxKernel>(); - k->configure(input, axis, output); - _kernel = std::move(k); - - // We can handle for simple case only - // Output rank: 1 - // Axis: one axis value, restrict to 1 - ARM_COMPUTE_ERROR_ON(input->info()->tensor_shape().num_dimensions() != 2); - ARM_COMPUTE_ERROR_ON(output->info()->tensor_shape().num_dimensions() != 1); - ARM_COMPUTE_ERROR_ON(axis != 1); -} - -Status CLReduceMax::validate(const ITensorInfo *input, int32_t axis, const ITensorInfo *output) -{ - return CLReduceMaxKernel::validate(input, axis, output); -} - -void CLReduceMax::run() -{ -#if REDUCE_MAX_RUN_ON_CPU - run_on_cpu(); - - arm_compute::CLScheduler::get().sync(); -#else - arm_compute::CLScheduler::get().enqueue(*_kernel); -#endif -} - -void CLReduceMax::run_on_cpu() -{ - cl::CommandQueue q = CLScheduler::get().queue(); - - _input->map(q); - _output->map(q); - - // Compute by CPU for simple case - // Input rank: 2 - // Output rank: 1 - // Axis: one axis value, restrict to 1 - - float *input_data = (float *)_input->buffer(); - float *output_data = (float *)_output->buffer(); - - std::vector<float> container_max; - int cols = _input->info()->tensor_shape()[0]; - int rows = _input->info()->tensor_shape()[1]; - container_max.resize(rows); - - // Initialize as 1st element in row - float *input_pointer = input_data; - for (int i = 0; i < rows; i++) - { - container_max[i] = *input_pointer; - input_pointer += cols; - } - - // Update max value in row - for (int i = 0; i < rows; i++) - { - float max_in_row = container_max[i]; - for (int j = 1; j < cols; j++) - { - if (max_in_row < input_data[i * cols + j]) - { - max_in_row = input_data[i * cols + j]; - } - } - container_max[i] = max_in_row; - } - - for (int i = 0; i < rows; i++) - { - output_data[i] = container_max[i]; - } - - _input->unmap(q); - _output->unmap(q); -} -} // namespace arm_compute diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp new file mode 100644 index 000000000..2b8d82706 --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLReduceOperation.h" + +#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h" +#include "arm_compute/runtime/CL/CLScheduler.h" + +using namespace arm_compute; + +CLReduceOperation::CLReduceOperation() + : _input(nullptr), _output(nullptr), _axis(), _interm_tensors(), _reduce_kernels() +{ +} + +Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *output, + const std::set<uint32_t> &axis, const ReduceOperation &op) +{ + const size_t num_of_kernels = axis.size(); + const size_t num_of_interm_tensors = num_of_kernels - 1; + + // Create temporary tensor infos + auto interm_tensors = + arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors); + + // Create intermediate tensor info + TensorShape shape{input->tensor_shape()}; + + auto it = axis.begin(); + for (size_t i = 0; i < num_of_interm_tensors; ++i, ++it) + { + shape.set(*it, 1); + interm_tensors[i].set_data_type(input->data_type()); + interm_tensors[i].set_tensor_shape(shape); + interm_tensors[i].set_num_channels(input->num_channels()); + } + + // Set a vector that is ordered ITensorInfo sequentially. + std::vector<const ITensorInfo *> tensors; + tensors.emplace_back(input); + for (size_t i = 0; i < num_of_interm_tensors; ++i) + { + tensors.emplace_back(interm_tensors.get() + i); + } + tensors.emplace_back(output); + + // Validate ReduceOperation only on all kernels + it = axis.begin(); + for (size_t i = 0; i < num_of_kernels; ++i, ++it) + { + ARM_COMPUTE_RETURN_ON_ERROR( + CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op)); + } + + return Status{}; +} + +void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output, + const std::set<uint32_t> &axis, ReduceOperation op) +{ + ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), axis, op)); + + _axis = axis; + + _input = input; + _output = output; + + // NOTE The axis must have no duplication. + const size_t num_of_kernels = axis.size(); + const size_t num_of_interm_tensors = num_of_kernels - 1; + + _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors); + _reduce_kernels = + arm_compute::support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels); + + TensorShape shape{input->info()->tensor_shape()}; + auto it = axis.begin(); + for (size_t i = 0; i < num_of_interm_tensors; ++i, ++it) + { + shape.set(*it, 1); + _interm_tensors[i].allocator()->init( + TensorInfo(shape, input->info()->num_channels(), input->info()->data_type())); + _interm_tensors[i].allocator()->allocate(); + } + + // Set a vector that is ordered ICLTensors sequentially. + std::vector<ICLTensor *> tensors; + tensors.emplace_back(input); + for (size_t i = 0; i < num_of_interm_tensors; ++i) + { + tensors.emplace_back(_interm_tensors.get() + i); + } + tensors.emplace_back(output); + + // Apply ReduceOperation on all kernels + it = axis.begin(); + for (size_t i = 0; i < num_of_kernels; ++i, ++it) + { + _reduce_kernels[i].configure(tensors[i], tensors[i + 1], *it, op); + } +} + +void CLReduceOperation::run() +{ + const size_t num_of_kernels = _axis.size(); + for (size_t i = 0; i < num_of_kernels; ++i) + { + CLScheduler::get().enqueue(_reduce_kernels[i]); + } +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLReductionMean.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLReductionMean.cpp deleted file mode 100644 index ab724e752..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLReductionMean.cpp +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLReductionMean.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLReductionMeanKernel.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/PixelValue.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/Tensor.h" -#include "support/ToolchainSupport.h" - -using namespace arm_compute; - -CLReductionMean::CLReductionMean() : _reduction_mean_kernel(), _fill_border_kernel() {} - -Status CLReductionMean::validate(const ITensorInfo *input, const ITensorInfo *output, - std::vector<uint32_t> axis) -{ - ARM_COMPUTE_RETURN_ON_ERROR(CLReductionMeanKernel::validate(input, output, axis)); - return Status{}; -} - -void CLReductionMean::configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis) -{ - _reduction_mean_kernel.configure(input, output, axis); - _fill_border_kernel.configure(input, _reduction_mean_kernel.border_size(), BorderMode::CONSTANT, - PixelValue(0)); -} - -void CLReductionMean::run() -{ - CLScheduler::get().enqueue(_fill_border_kernel); - CLScheduler::get().enqueue(_reduction_mean_kernel); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp new file mode 100644 index 000000000..c03826891 --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLSpaceToBatchND.h" + +#include "arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h" + +using namespace arm_compute; + +void CLSpaceToBatchND::configure(const ICLTensor *input, const ICLTensor *block_size, + const ICLTensor *padding_size, ICLTensor *output) +{ + auto k = arm_compute::support::cpp14::make_unique<CLSpaceToBatchNDKernel>(); + k->configure(input, block_size, padding_size, output); + _kernel = std::move(k); +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp new file mode 100644 index 000000000..0f455f96f --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLSpaceToDepth.h" + +#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h" + +using namespace arm_compute; + +void CLSpaceToDepth::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size) +{ + auto k = arm_compute::support::cpp14::make_unique<CLSpaceToDepthKernel>(); + k->configure(input, output, block_size); + _kernel = std::move(k); +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp new file mode 100644 index 000000000..dc6e4af44 --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2016-2018 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLSquaredDifference.h" + +#include "arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h" +#include "arm_compute/core/CL/ICLTensor.h" + +using namespace arm_compute; + +void CLSquaredDifference::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output) +{ + auto k = arm_compute::support::cpp14::make_unique<CLSquaredDifferenceKernel>(); + k->configure(input1, input2, output); + _kernel = std::move(k); + + if (output->info()->dimension(0) > 1) + { + ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; + + if (broadcasted_info->info()->dimension(0) == 1) + { + _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); + } + } +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSlice.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSlice.cpp deleted file mode 100644 index cd576cec1..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSlice.cpp +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * Copyright 2018 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLStridedSlice.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h" -#include "arm_compute/core/utils/misc/Utility.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "support/ToolchainSupport.h" -#include <vector> - -using namespace arm_compute; - -static const int32_t maxDims = 4; - -// Return the index for the first element along that axis. This index will be a -// positive integer between [0, axisSize - 1] that can be used to index -// directly into the data. -inline int32_t StartForAxis(int32_t beginMask, std::vector<int32_t> const &startIndices, - std::vector<int32_t> const &strides, const TensorShape &inputShape, - int32_t axis) -{ - // Begin with the specified index - int32_t start = startIndices[axis]; - - // beginMask override - if (beginMask & 1 << axis) - { - if (strides[axis] > 0) - { - // Forward iteration - use the first element. These values will get - // clamped below (Note: We could have set them to 0 and axisSize-1, but - // use lowest() and max() to maintain symmetry with StopForAxis()) - start = std::numeric_limits<int32_t>::lowest(); - } - else - { - // Backward iteration - use the last element. - start = std::numeric_limits<int32_t>::max(); - } - } - - // Handle negative indices - int32_t axisSize = inputShape[axis]; - if (start < 0) - { - start += axisSize; - } - - // Clamping - start = arm_compute::utility::clamp(start, 0, axisSize - 1); - - return start; -} - -// Return the "real" index for the end of iteration along that axis. This is an -// "end" in the traditional C sense, in that it points to one past the last -// element. ie. So if you were iterating through all elements of a 1D array of -// size 4, this function would return 4 as the stop, because it is one past the -// "real" indices of 0, 1, 2 & 3. -inline int32_t StopForAxis(int32_t endMask, std::vector<int32_t> const &stopIndices, - std::vector<int32_t> const &strides, const TensorShape &inputShape, - int32_t axis) -{ - // Begin with the specified index - int32_t stop = stopIndices[axis]; - - // endMask override - if (endMask & (1 << axis)) - { - if (strides[axis] > 0) - { - // Forward iteration - use the last element. These values will get - // clamped below - stop = std::numeric_limits<int32_t>::max(); - } - else - { - // Backward iteration - use the first element. - stop = std::numeric_limits<int32_t>::lowest(); - } - } - - // Handle negative indices - int32_t axisSize = inputShape[axis]; - if (stop < 0) - { - stop += axisSize; - } - - // Clamping - // Because the end index points one past the last element, we need slightly - // different clamping ranges depending on the direction. - if (strides[axis] > 0) - { - // Forward iteration - stop = arm_compute::utility::clamp(stop, 0, axisSize); - } - else - { - // Backward iteration - stop = arm_compute::utility::clamp(stop, -1, axisSize - 1); - } - - return stop; -} - -inline int32_t offset4D(const TensorShape &shape, int32_t b, int32_t d, int32_t h, int32_t w) -{ - int32_t offset = b * shape[2] * shape[1] * shape[0]; - offset += d * shape[1] * shape[0]; - offset += h * shape[0]; - offset += w; - return offset; -} - -void CLStridedSlice::configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData, - ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, - int32_t endMask, int32_t shrinkAxisMask) -{ - auto k = arm_compute::support::cpp14::make_unique<CLStridedSliceKernel>(); - k->configure(input, output, beginData, endData, stridesData, beginMask, endMask, shrinkAxisMask); - _kernel = std::move(k); -} - -void CLStridedSliceCPU::configure(ICLTensor *input, ICLTensor *output, ICLTensor *beginData, - ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, - int32_t endMask, int32_t shrinkAxisMask) -{ - ARM_COMPUTE_ERROR_THROW_ON(CLStridedSliceKernel::validate( - input->info(), output->info(), beginData->info(), endData->info(), stridesData->info(), - beginMask, endMask, shrinkAxisMask)); - - _input = input; - _output = output; - _beginData = beginData; - _endData = endData; - _stridesData = stridesData; - _beginMask = beginMask; - _endMask = endMask; - _shrinkAxisMask = shrinkAxisMask; -} - -void CLStridedSliceCPU::run() -{ - run_on_cpu(); - - arm_compute::CLScheduler::get().sync(); -} - -inline int32_t getOutDim(int32_t start, int32_t stop, int32_t stride) -{ - if (stride > 0) - { - return ((stop - start - 1) / stride) + 1; - } - else - { - return ((stop - start + 1) / stride) + 1; - } -} - -template <typename T> -inline void StridedSlice(const T *inputData, const TensorShape &inputShape, int32_t beginMask, - int32_t endMask, const std::vector<int32_t> &startIndices, - const std::vector<int32_t> &stopIndices, - const std::vector<int32_t> &strides, T *outputData) -{ - ARM_COMPUTE_ERROR_ON(startIndices.size() != maxDims); - ARM_COMPUTE_ERROR_ON(stopIndices.size() != maxDims); - ARM_COMPUTE_ERROR_ON(strides.size() != maxDims); - - const int32_t start_b = StartForAxis(beginMask, startIndices, strides, inputShape, 3); - const int32_t stop_b = StopForAxis(endMask, stopIndices, strides, inputShape, 3); - const int32_t start_d = StartForAxis(beginMask, startIndices, strides, inputShape, 2); - const int32_t stop_d = StopForAxis(endMask, stopIndices, strides, inputShape, 2); - const int32_t start_h = StartForAxis(beginMask, startIndices, strides, inputShape, 1); - const int32_t stop_h = StopForAxis(endMask, stopIndices, strides, inputShape, 1); - const int32_t start_w = StartForAxis(beginMask, startIndices, strides, inputShape, 0); - const int32_t stop_w = StopForAxis(endMask, stopIndices, strides, inputShape, 0); - - // The shape of outputData may collapse in one-dimension. - // Therefore, it is necessary to create a shape that matches the result of the outputData. - TensorShape outputShape( - getOutDim(start_w, stop_w, strides[0]), getOutDim(start_h, stop_h, strides[1]), - getOutDim(start_d, stop_d, strides[2]), getOutDim(start_b, stop_b, strides[3])); - for (int32_t in_b = start_b, b = 0; strides[3] > 0 ? in_b < stop_b : in_b > stop_b; - in_b += strides[3], b++) - { - for (int32_t in_d = start_d, d = 0; strides[2] > 0 ? in_d < stop_d : in_d > stop_d; - in_d += strides[2], d++) - { - for (int32_t in_h = start_h, h = 0; strides[1] > 0 ? in_h < stop_h : in_h > stop_h; - in_h += strides[1], h++) - { - for (int32_t in_w = start_w, w = 0; strides[0] > 0 ? in_w < stop_w : in_w > stop_w; - in_w += strides[0], w++) - { - outputData[offset4D(outputShape, b, d, h, w)] = - inputData[offset4D(inputShape, in_b, in_d, in_h, in_w)]; - } - } - } - } -} - -void CLStridedSliceCPU::run_on_cpu() -{ - // TODO: Support shrinkAxisMask - cl::CommandQueue q = CLScheduler::get().queue(); - - _input->map(q); - _output->map(q); - _beginData->map(q); - _endData->map(q); - _stridesData->map(q); - - TensorShape inputShape = _input->info()->tensor_shape(); - TensorShape outputShape = _output->info()->tensor_shape(); - - std::vector<int32_t> starts; - std::vector<int32_t> stops; - std::vector<int32_t> strides; - - for (uint32_t idx = 0; idx <= _input->info()->num_dimensions() - 1; ++idx) - { - starts.emplace_back(reinterpret_cast<int32_t *>(_beginData->buffer())[idx]); - stops.emplace_back(reinterpret_cast<int32_t *>(_endData->buffer())[idx]); - strides.emplace_back(reinterpret_cast<int32_t *>(_stridesData->buffer())[idx]); - } - - for (uint32_t i = _input->info()->num_dimensions(); i < maxDims; i++) - { - starts.emplace_back(0); - stops.emplace_back(1); - strides.emplace_back(1); - } - - switch (_input->info()->data_type()) - { - case DataType::U8: - case DataType::QASYMM8: - StridedSlice(reinterpret_cast<const uint8_t *>(_input->buffer()), inputShape, _beginMask, - _endMask, starts, stops, strides, - reinterpret_cast<uint8_t *>(_output->buffer())); - break; - case DataType::S8: - case DataType::QS8: - StridedSlice(reinterpret_cast<const int8_t *>(_input->buffer()), inputShape, _beginMask, - _endMask, starts, stops, strides, reinterpret_cast<int8_t *>(_output->buffer())); - break; - case DataType::U16: - StridedSlice(reinterpret_cast<const uint16_t *>(_input->buffer()), inputShape, _beginMask, - _endMask, starts, stops, strides, - reinterpret_cast<uint16_t *>(_output->buffer())); - break; - case DataType::S16: - case DataType::QS16: - StridedSlice(reinterpret_cast<const int16_t *>(_input->buffer()), inputShape, _beginMask, - _endMask, starts, stops, strides, - reinterpret_cast<int16_t *>(_output->buffer())); - break; - case DataType::F16: - // Not sure this works. - StridedSlice(reinterpret_cast<const half *>(_input->buffer()), inputShape, _beginMask, - _endMask, starts, stops, strides, reinterpret_cast<half *>(_output->buffer())); - break; - case DataType::U32: - StridedSlice(reinterpret_cast<const uint32_t *>(_input->buffer()), inputShape, _beginMask, - _endMask, starts, stops, strides, - reinterpret_cast<uint32_t *>(_output->buffer())); - break; - case DataType::S32: - StridedSlice(reinterpret_cast<const int32_t *>(_input->buffer()), inputShape, _beginMask, - _endMask, starts, stops, strides, - reinterpret_cast<int32_t *>(_output->buffer())); - break; - case DataType::F32: - StridedSlice(reinterpret_cast<const float *>(_input->buffer()), inputShape, _beginMask, - _endMask, starts, stops, strides, reinterpret_cast<float *>(_output->buffer())); - break; - default: - ARM_COMPUTE_ERROR("DataType not supported"); - break; - } - - _input->unmap(q); - _output->unmap(q); - _beginData->unmap(q); - _endData->unmap(q); - _stridesData->unmap(q); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp new file mode 100644 index 000000000..be7353493 --- /dev/null +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (c) 2017 ARM Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "arm_compute/runtime/CL/functions/CLStridedSliceEx.h" + +#include "arm_compute/core/CL/kernels/CLStridedSliceExKernel.h" + +using namespace arm_compute; + +void CLStridedSliceEx::configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData, + ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, + int32_t endMask, int32_t shrinkAxisMask) +{ + auto k = arm_compute::support::cpp14::make_unique<CLStridedSliceExKernel>(); + k->configure(input, output, beginData, endData, stridesData, beginMask, endMask, shrinkAxisMask); + _kernel = std::move(k); +} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp index 6426364c9..19177497c 100644 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp @@ -15,12 +15,9 @@ * limitations under the License. */ #include "arm_compute/runtime/CL/functions/CLTopKV2.h" +#include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/CLHelpers.h" - -#include <vector> -#include <algorithm> #include "../../topk_v2.h" |