diff options
Diffstat (limited to 'libs/ARMComputeEx/src/runtime')
26 files changed, 0 insertions, 1530 deletions
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp deleted file mode 100644 index 1e52fc429..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLActivationLayerEx.h" - -#include "arm_compute/core/CL/kernels/CLActivationLayerExKernel.h" - -using namespace arm_compute; - -void CLActivationLayerEx::configure(ICLTensor *input, ICLTensor *output, - ActivationLayerInfoEx act_info) -{ - auto k = arm_compute::support::cpp14::make_unique<CLActivationLayerExKernel>(); - k->configure(input, output, act_info); - _kernel = std::move(k); -} - -Status CLActivationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const ActivationLayerInfoEx &act_info) -{ - return CLActivationLayerExKernel::validate(input, output, act_info); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp deleted file mode 100644 index dff743e89..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLArgMinMax.h" - -#include "arm_compute/core/CL/kernels/CLArgMinMaxKernel.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -namespace arm_compute -{ - -CLArgMinMax::CLArgMinMax() - : _input(nullptr), _output(nullptr), _argminmax_axis(), _interm_tensors(), _argminmax_kernels(), - _num_of_kernels() -{ -} - -void CLArgMinMax::configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis, - ArgOperation op) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), axis, op)); - _input = input; - _output = output; - _argminmax_axis = axis; - _arg_op = op; - // NOTE The argminmax_axis must have no duplication. - _num_of_kernels = axis.size(); - const size_t num_of_interm_tensors = _num_of_kernels - 1; - - _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors); - _argminmax_kernels = - arm_compute::support::cpp14::make_unique<CLArgMinMaxKernel[]>(_num_of_kernels); - - TensorShape shape{input->info()->tensor_shape()}; - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - shape.set(_argminmax_axis[i], 1); - _interm_tensors[i].allocator()->init( - TensorInfo(shape, input->info()->num_channels(), input->info()->data_type())); - _interm_tensors[i].allocator()->allocate(); - } - - // Set a vector that is ordered ICLTensors sequentially. - std::vector<ICLTensor *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - tensors.emplace_back(_interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Apply ArgMinMax on all kernels - for (size_t i = 0; i < _num_of_kernels; i++) - { - _argminmax_kernels[i].configure(tensors[i], tensors[i + 1], _argminmax_axis[i], op); - } -} - -Status CLArgMinMax::validate(const ITensorInfo *input, const std::vector<uint32_t> &argminmax_axis, - const ITensorInfo *output, ArgOperation op) -{ - const size_t num_of_kernels = argminmax_axis.size(); - const size_t num_of_interm_tensors = num_of_kernels - 1; - - // Create temporary tensor infos - auto interm_tensors = - arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors); - - // Create intermediate tensor info - TensorShape shape{input->tensor_shape()}; - - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - shape.set(argminmax_axis[i], 1); - interm_tensors[i].set_data_type(input->data_type()); - interm_tensors[i].set_tensor_shape(shape); - interm_tensors[i].set_num_channels(input->num_channels()); - } - - // Set a vector that is ordered ITensorInfo sequentially. - std::vector<const ITensorInfo *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - tensors.emplace_back(interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Validate argminmax only on all kernels - for (size_t i = 0; i < num_of_kernels; i++) - { - ARM_COMPUTE_RETURN_ON_ERROR( - CLArgMinMaxKernel::validate(tensors[i], tensors[i + 1], argminmax_axis[i], op)); - } - - return Status{}; -} - -void CLArgMinMax::run() -{ - for (size_t i = 0; i < _num_of_kernels; ++i) - { - CLScheduler::get().enqueue(_argminmax_kernels[i]); - } -} - -} // namespace arm_compute diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp deleted file mode 100644 index 3f403c80a..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h" - -using namespace arm_compute; - -void CLArithmeticSubtractionEx::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, - ConvertPolicy policy) -{ - auto k = arm_compute::support::cpp14::make_unique<CLArithmeticSubtractionExKernel>(); - k->configure(input1, input2, output, policy); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} - -Status CLArithmeticSubtractionEx::validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, ConvertPolicy policy) -{ - return CLArithmeticSubtractionExKernel::validate(input1, input2, output, policy); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp deleted file mode 100644 index 26e3798cc..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLBatchToSpaceND.h" - -#include "arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h" - -using namespace arm_compute; - -void CLBatchToSpaceND::configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size) -{ - auto k = arm_compute::support::cpp14::make_unique<CLBatchToSpaceNDKernel>(); - k->configure(input, output, block_size); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp deleted file mode 100644 index 7c5fe5eda..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h" - -#include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, - BinaryLogicalOperation op) -{ - auto k = arm_compute::support::cpp14::make_unique<CLBinaryLogicalOpKernel>(); - k->configure(input1, input2, output, op); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp deleted file mode 100644 index 8e106737c..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLCast.h" - -#include "arm_compute/core/CL/kernels/CLCastKernel.h" - -using namespace arm_compute; - -void CLCast::configure(ICLTensor *input, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLCastKernel>(); - k->configure(input, output); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp deleted file mode 100644 index f6a745a25..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLComparisonOp.h" - -#include "arm_compute/core/CL/kernels/CLComparisonOpKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -void CLComparisonOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, - const ComparisonOperation &op) -{ - auto k = arm_compute::support::cpp14::make_unique<CLComparisonOpKernel>(); - k->configure(input1, input2, output, op); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp deleted file mode 100644 index c2e4ca9ff..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLDepthToSpace.h" - -#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h" - -using namespace arm_compute; - -void CLDepthToSpace::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size) -{ - auto k = arm_compute::support::cpp14::make_unique<CLDepthToSpaceKernel>(); - k->configure(input, output, block_size); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp deleted file mode 100644 index 2781784ca..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLEmbeddingLookup.h" - -#include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h" - -using namespace arm_compute; - -void CLEmbeddingLookup::configure(const ICLTensor *input, ICLTensor *output, - const ICLTensor *lookups) -{ - auto k = arm_compute::support::cpp14::make_unique<CLEmbeddingLookupKernel>(); - k->configure(input, output, lookups); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp deleted file mode 100644 index 411fa8700..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLExp.h" - -#include "arm_compute/core/CL/kernels/CLExpKernel.h" - -using namespace arm_compute; - -void CLExp::configure(const ICLTensor *input, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLExpKernel>(); - k->configure(input, output); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp deleted file mode 100644 index fb056fe45..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLGather.h" - -#include "arm_compute/core/CL/kernels/CLGatherKernel.h" - -using namespace arm_compute; - -void CLGather::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLGatherKernel>(); - k->configure(input1, input2, output); - _kernel = std::move(k); -} - -Status CLGather::validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output) -{ - return CLGatherKernel::validate(input1, input2, output); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp deleted file mode 100644 index 7180e9356..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLHashtableLookup.h" - -#include "arm_compute/core/CL/kernels/CLHashtableLookupKernel.h" - -using namespace arm_compute; - -void CLHashtableLookup::configure(const ICLTensor *lookups, const ICLTensor *keys, - const ICLTensor *input, ICLTensor *output, ICLTensor *hits) -{ - auto k = arm_compute::support::cpp14::make_unique<CLHashtableLookupKernel>(); - k->configure(lookups, keys, input, output, hits); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp deleted file mode 100644 index be35ea732..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLNeg.h" - -#include "arm_compute/core/CL/kernels/CLNegKernel.h" - -using namespace arm_compute; - -void CLNeg::configure(ICLTensor *input, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLNegKernel>(); - k->configure(input, output); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp deleted file mode 100644 index 276c4557a..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h" - -#include "arm_compute/runtime/CL/CLScheduler.h" - -using namespace arm_compute; - -CLNormalizationLayerEx::CLNormalizationLayerEx() : _norm_kernel(), _border_handler() {} - -void CLNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output, - const NormalizationLayerInfo &norm_info) -{ - ARM_COMPUTE_ERROR_ON(input == nullptr); - - // Configure normalization kernel - _norm_kernel.configure(input, output, norm_info); - - // Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel - _border_handler.configure(input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0)); -} - -Status CLNormalizationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const NormalizationLayerInfo &norm_info) -{ - return CLNormalizationLayerExKernel::validate(input, output, norm_info); -} - -void CLNormalizationLayerEx::run() -{ - // Run border handler - CLScheduler::get().enqueue(_border_handler, false); - - // Run normalization kernel - CLScheduler::get().enqueue(_norm_kernel); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp deleted file mode 100644 index 38adedd10..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLPReLU.h" - -#include "arm_compute/core/CL/kernels/CLPReLUKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -void CLPReLU::configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLPReLUKernel>(); - k->configure(input, alpha, output); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input->info()->dimension(0) == 1) ? input : alpha; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp deleted file mode 100644 index 5265b6c34..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* -* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved -* Copyright (c) 2016-2018 ARM Limited. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ -#include "arm_compute/runtime/CL/functions/CLPadLayerEx.h" - -#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h" - -using namespace arm_compute; - -void CLPadLayerEx::configure(ICLTensor *input, ICLTensor *output, ICLTensor *pad_size) -{ - auto k = arm_compute::support::cpp14::make_unique<CLPadLayerKernel>(); - k->configure(input, output, pad_size); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp deleted file mode 100644 index fb363270d..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLPermuteEx.h" - -#include "arm_compute/core/CL/kernels/CLPermuteExKernel.h" - -using namespace arm_compute; - -void CLPermuteEx::configure(const ICLTensor *input, ICLTensor *output, - const PermutationVector &perm) -{ - auto k = arm_compute::support::cpp14::make_unique<CLPermuteExKernel>(); - k->configure(input, output, perm); - _kernel = std::move(k); -} - -Status CLPermuteEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const PermutationVector &perm) -{ - ARM_COMPUTE_RETURN_ON_ERROR(CLPermuteExKernel::validate(input, output, perm)); - return Status{}; -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp deleted file mode 100644 index dc0baa8dd..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLPixelWiseDivision.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h" - -using namespace arm_compute; - -void CLPixelWiseDivision::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, - float scale, ConvertPolicy overflow_policy, - RoundingPolicy rounding_policy) -{ - auto k = arm_compute::support::cpp14::make_unique<CLPixelWiseDivisionKernel>(); - k->configure(input1, input2, output, scale, overflow_policy, rounding_policy); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} - -Status CLPixelWiseDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy) -{ - return CLPixelWiseDivisionKernel::validate(input1, input2, output, scale, overflow_policy, - rounding_policy); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp deleted file mode 100644 index 2b8d82706..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLReduceOperation.h" - -#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -using namespace arm_compute; - -CLReduceOperation::CLReduceOperation() - : _input(nullptr), _output(nullptr), _axis(), _interm_tensors(), _reduce_kernels() -{ -} - -Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *output, - const std::set<uint32_t> &axis, const ReduceOperation &op) -{ - const size_t num_of_kernels = axis.size(); - const size_t num_of_interm_tensors = num_of_kernels - 1; - - // Create temporary tensor infos - auto interm_tensors = - arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors); - - // Create intermediate tensor info - TensorShape shape{input->tensor_shape()}; - - auto it = axis.begin(); - for (size_t i = 0; i < num_of_interm_tensors; ++i, ++it) - { - shape.set(*it, 1); - interm_tensors[i].set_data_type(input->data_type()); - interm_tensors[i].set_tensor_shape(shape); - interm_tensors[i].set_num_channels(input->num_channels()); - } - - // Set a vector that is ordered ITensorInfo sequentially. - std::vector<const ITensorInfo *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; ++i) - { - tensors.emplace_back(interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Validate ReduceOperation only on all kernels - it = axis.begin(); - for (size_t i = 0; i < num_of_kernels; ++i, ++it) - { - ARM_COMPUTE_RETURN_ON_ERROR( - CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op)); - } - - return Status{}; -} - -void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output, - const std::set<uint32_t> &axis, ReduceOperation op) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), axis, op)); - - _axis = axis; - - _input = input; - _output = output; - - // NOTE The axis must have no duplication. - const size_t num_of_kernels = axis.size(); - const size_t num_of_interm_tensors = num_of_kernels - 1; - - _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors); - _reduce_kernels = - arm_compute::support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels); - - TensorShape shape{input->info()->tensor_shape()}; - auto it = axis.begin(); - for (size_t i = 0; i < num_of_interm_tensors; ++i, ++it) - { - shape.set(*it, 1); - _interm_tensors[i].allocator()->init( - TensorInfo(shape, input->info()->num_channels(), input->info()->data_type())); - _interm_tensors[i].allocator()->allocate(); - } - - // Set a vector that is ordered ICLTensors sequentially. - std::vector<ICLTensor *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; ++i) - { - tensors.emplace_back(_interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Apply ReduceOperation on all kernels - it = axis.begin(); - for (size_t i = 0; i < num_of_kernels; ++i, ++it) - { - _reduce_kernels[i].configure(tensors[i], tensors[i + 1], *it, op); - } -} - -void CLReduceOperation::run() -{ - const size_t num_of_kernels = _axis.size(); - for (size_t i = 0; i < num_of_kernels; ++i) - { - CLScheduler::get().enqueue(_reduce_kernels[i]); - } -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp deleted file mode 100644 index c03826891..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLSpaceToBatchND.h" - -#include "arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h" - -using namespace arm_compute; - -void CLSpaceToBatchND::configure(const ICLTensor *input, const ICLTensor *block_size, - const ICLTensor *padding_size, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLSpaceToBatchNDKernel>(); - k->configure(input, block_size, padding_size, output); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp deleted file mode 100644 index 0f455f96f..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLSpaceToDepth.h" - -#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h" - -using namespace arm_compute; - -void CLSpaceToDepth::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size) -{ - auto k = arm_compute::support::cpp14::make_unique<CLSpaceToDepthKernel>(); - k->configure(input, output, block_size); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp deleted file mode 100644 index dc6e4af44..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLSquaredDifference.h" - -#include "arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -void CLSquaredDifference::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLSquaredDifferenceKernel>(); - k->configure(input1, input2, output); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp deleted file mode 100644 index be7353493..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLStridedSliceEx.h" - -#include "arm_compute/core/CL/kernels/CLStridedSliceExKernel.h" - -using namespace arm_compute; - -void CLStridedSliceEx::configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData, - ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, - int32_t endMask, int32_t shrinkAxisMask) -{ - auto k = arm_compute::support::cpp14::make_unique<CLStridedSliceExKernel>(); - k->configure(input, output, beginData, endData, stridesData, beginMask, endMask, shrinkAxisMask); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp deleted file mode 100644 index 19177497c..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLTopKV2.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -#include "arm_compute/core/CL/ICLTensor.h" - -#include "../../topk_v2.h" - -namespace arm_compute -{ - -CLTopKV2::CLTopKV2() - : _k(0), _total_bits(0), _bits(0), _radix(0), _hist_buf_size(0), _glob_sum_buf_size(0), _n(0), - _input(nullptr), _values(nullptr), _indices(nullptr), _qs_idx_buf(), _qs_temp_buf(), - _hist_buf(), _glob_sum_buf(), _temp_buf(), _first_negative_idx_buf(), _in_key_buf(), - _out_key_buf(), _in_ind_buf(), _out_ind_buf(), _p_in_key_buf(nullptr), - _p_out_key_buf(nullptr), _p_in_ind_buf(nullptr), _p_out_ind_buf(nullptr), _qs_kernel(), - _init_kernel(), _hist_kernel(), _scan_hist_kernel(), _glob_scan_hist_kernel(), - _paste_hist_kernel(), _reorder_kernel(), _find_first_negative_kernel(), - _reorder_negatives_kernel(), _store_kernel() -{ -} - -void CLTopKV2::configure(ICLTensor *input, int k, ICLTensor *values, ICLTensor *indices, - int total_bits, int bits) -{ - _total_bits = total_bits; - _bits = bits; - _n = input->info()->tensor_shape()[0]; - - // _total_bits should be divided by _bits. - ARM_COMPUTE_ERROR_ON((_total_bits % _bits) != 0); - - _k = k; - _radix = 1 << bits; - - _input = input; - _values = values; - _indices = indices; - - std::string topk_env; - - char *env = getenv("ACL_TOPKV2"); - if (env) - topk_env = env; - - if (topk_env == "GPU_SINGLE") - { - _qs_idx_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n); - _qs_temp_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n); - - _qs_kernel.configure(input, values, indices, &_qs_idx_buf, &_qs_temp_buf, k, _n); - } - else if (topk_env == "GPU") - { - // n should be divided by (_GROUPS * _ITEMS) - ARM_COMPUTE_ERROR_ON((_n % (_GROUPS * _ITEMS)) != 0); - - _hist_buf_size = _radix * _GROUPS * _ITEMS; - _glob_sum_buf_size = _HISTOSPLIT; - - _hist_buf = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, - sizeof(cl_int) * _hist_buf_size); - _glob_sum_buf = - cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, - sizeof(cl_int) * _glob_sum_buf_size); - _temp_buf = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, - sizeof(cl_int) * _glob_sum_buf_size); - _first_negative_idx_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int)); - _in_key_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_float) * _n); - _out_key_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_float) * _n); - _in_ind_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n); - _out_ind_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n); - - _p_in_key_buf = &_in_key_buf; - _p_out_key_buf = &_out_key_buf; - _p_in_ind_buf = &_in_ind_buf; - _p_out_ind_buf = &_out_ind_buf; - - _init_kernel.configure(input, _p_in_key_buf, _p_in_ind_buf, _n); - _hist_kernel.configure(&_hist_buf, bits, _n); - _scan_hist_kernel.configure(&_hist_buf, &_glob_sum_buf, bits); - _glob_scan_hist_kernel.configure(&_glob_sum_buf, &_temp_buf, bits); - _paste_hist_kernel.configure(&_hist_buf, &_glob_sum_buf, bits); - _reorder_kernel.configure(&_hist_buf, bits, _n); - _find_first_negative_kernel.configure(&_first_negative_idx_buf, _n); - _reorder_negatives_kernel.configure(&_first_negative_idx_buf, _n); - _store_kernel.configure(values, indices, k, _n); - } - else - { - // DO NOTHING for CPU. - } -} - -void CLTopKV2::run() -{ - std::string topk_env; - - char *env = getenv("ACL_TOPKV2"); - if (env) - topk_env = env; - - if (topk_env == "GPU_SINGLE") - { - run_on_gpu_single_quicksort(); - } - else if (topk_env == "GPU") - { - run_on_gpu(); - } - else - { - run_on_cpu(); - } -} - -void CLTopKV2::run_on_gpu_single_quicksort() -{ - // This is a single threaded quick sort implementation. - CLScheduler::get().enqueue(_qs_kernel, false); - - arm_compute::CLScheduler::get().sync(); -} - -void CLTopKV2::run_on_gpu() -{ - cl::CommandQueue q = CLScheduler::get().queue(); - - // 1. CLTopKV2Init set key buffer and index buffer. - // - Key buffer is set as the same value of the layer's input - // - Values in the index buffer are set as their indices. - CLScheduler::get().enqueue(_init_kernel, false); - - int n_passes = _total_bits / _bits; - - // 2. Repeat (total_bits/bits) times. - // - total_bits is the number of bits of the data type (e.g., 32 for float) - // - bits defines number of buckets (e.g. 16 buckets where bit is 4) - for (int pass = 0; pass < n_passes; ++pass) - { - arm_compute::CLScheduler::get().sync(); - - // 2.1. Calculate histogram with _GROUPS * _ITEMS threads - _hist_kernel.setPass(pass, _p_in_key_buf); - CLScheduler::get().enqueue(_hist_kernel, false); - - // 2.2. Calculate prefix sum locally with multiple threads - CLScheduler::get().enqueue(_scan_hist_kernel, false); - // 2.3. Calculate prefix sum within a work group - CLScheduler::get().enqueue(_glob_scan_hist_kernel, false); - // 2.4. Calculate global prefix sum - CLScheduler::get().enqueue(_paste_hist_kernel, false); - - // 2.5. Reorder keys and indices based on the global prefix sum - _reorder_kernel.setPass(pass, _p_in_key_buf, _p_out_key_buf, _p_in_ind_buf, _p_out_ind_buf); - CLScheduler::get().enqueue(_reorder_kernel, false); - - cl::Buffer *tmp; - // swap key buffers - tmp = _p_in_key_buf; - _p_in_key_buf = _p_out_key_buf; - _p_out_key_buf = tmp; - - // swap index buffers - tmp = _p_in_ind_buf; - _p_in_ind_buf = _p_out_ind_buf; - _p_out_ind_buf = tmp; - } - - // 3. Get the first negative index - // Because we swap in_buf and out_buf at the end of the above for loop, - // the output buffers are in bufs. - _find_first_negative_kernel.setOutputBuffer(_p_in_key_buf); - CLScheduler::get().enqueue(_find_first_negative_kernel, false); - - // 4. Correct odering of negatives - // - Since radix sort does not consider negatives, negatives are considered as bigger values - // than positives. - // reordered data will be stored in _p_out_key_buf and _p_out_ind_buf - _reorder_negatives_kernel.setBuffers(_p_in_key_buf, _p_out_key_buf, _p_in_ind_buf, - _p_out_ind_buf); - CLScheduler::get().enqueue(_reorder_negatives_kernel, false); - - // 5. Extract top k values from sorted keys and indices. - _store_kernel.setOutputBuffers(_p_out_key_buf, _p_out_ind_buf); - CLScheduler::get().enqueue(_store_kernel, false); - - arm_compute::CLScheduler::get().sync(); - -#if 0 - // below code is left for debugging. - int first_neg; - q.enqueueReadBuffer(_first_negative_idx_buf, CL_TRUE, 0, sizeof(cl_int), &first_neg); - std::cout << "first neg = " << first_neg << std::endl; - - float in_key[_n]; - q.enqueueReadBuffer(*_p_in_key_buf, CL_TRUE, 0, sizeof(cl_float)*_n, in_key); - for(uint32_t i = 0 ; i < _n; ++i) { - std::cout << "in_key[" << i << "] = " << in_key[i] << std::endl; - } - - float out_key[_n]; - q.enqueueReadBuffer(*_p_out_key_buf, CL_TRUE, 0, sizeof(cl_float)*_n, out_key); - for(uint32_t i = 0 ; i < _n; ++i) { - std::cout << "out_key[" << i << "] = " << out_key[i] << std::endl; - } - - int in_ind[_n]; - q.enqueueReadBuffer(*_p_in_ind_buf, CL_TRUE, 0, sizeof(cl_int)*_n, in_ind); - for(uint32_t i = 0 ; i < _n; ++i) { - std::cout << "in_ind[" << i << "] = " << in_ind[i] << std::endl; - } - - int out_ind[_n]; - q.enqueueReadBuffer(*_p_out_ind_buf, CL_TRUE, 0, sizeof(cl_int)*_n, out_ind); - for(uint32_t i = 0 ; i < _n; ++i) { - std::cout << "out_ind[" << i << "] = " << out_ind[i] << std::endl; - } - - int hist_buf[_hist_buf_size]; - q.enqueueReadBuffer(_hist_buf, CL_TRUE, 0, sizeof(cl_int)*_hist_buf_size, hist_buf); - for(uint32_t i = 0 ; i < _hist_buf_size; ++i) { - std::cout << "hist_buf[" << i << "] = " << hist_buf[i] << std::endl; - } - - int glob_sum_buf[_glob_sum_buf_size]; - q.enqueueReadBuffer(_glob_sum_buf, CL_TRUE, 0, sizeof(cl_int)*_glob_sum_buf_size, glob_sum_buf); - for(uint32_t i = 0 ; i < _glob_sum_buf_size; ++i) { - std::cout << "glob_sum_buf[" << i << "] = " << glob_sum_buf[i] << std::endl; - } - -#endif -} - -void CLTopKV2::run_on_cpu() -{ - cl::CommandQueue q = CLScheduler::get().queue(); - // const Window& w = _topkv2_kernel.window(); - - _input->map(q); - _values->map(q); - _indices->map(q); - - // int row_size = (w[0].end() - w[0].start()) / w[0].step(); - int row_size = _input->info()->tensor_shape()[0]; - int rank = _input->info()->num_dimensions(); - - if (rank > 2) - throw std::runtime_error("Not supported type."); - - int row_num = (rank == 2 ? _input->info()->tensor_shape()[1] : 1); - - if (_input->info()->data_type() == DataType::F32) - { - nnfw::rt::optimized_ops::TopK<float>(row_size, row_num, (float *)_input->buffer(), _k, - (int32 *)_indices->buffer(), (float *)_values->buffer()); - } - else if (_input->info()->data_type() == DataType::S32) - { - nnfw::rt::optimized_ops::TopK<int32_t>(row_size, row_num, (int32_t *)_input->buffer(), _k, - (int32 *)_indices->buffer(), - (int32_t *)_values->buffer()); - } - else if (_input->info()->data_type() == DataType::QASYMM8) - { - nnfw::rt::optimized_ops::TopK<uint8_t>(row_size, row_num, (uint8_t *)_input->buffer(), _k, - (int32 *)_indices->buffer(), - (uint8_t *)_values->buffer()); - } - else - { - throw std::runtime_error("Not supported type."); - } - - _input->unmap(q); - _values->unmap(q); - _indices->unmap(q); -} -} // namespace arm_compute diff --git a/libs/ARMComputeEx/src/runtime/NEON/functions/NENormalizationLayerEx.cpp b/libs/ARMComputeEx/src/runtime/NEON/functions/NENormalizationLayerEx.cpp deleted file mode 100644 index 988e92715..000000000 --- a/libs/ARMComputeEx/src/runtime/NEON/functions/NENormalizationLayerEx.cpp +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/NEON/functions/NENormalizationLayerEx.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -using namespace arm_compute; - -NENormalizationLayerEx::NENormalizationLayerEx(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _norm_kernel(), _multiply_kernel(), - _border_handler(), _input_squared() -{ -} - -void NENormalizationLayerEx::configure(const ITensor *input, ITensor *output, - const NormalizationLayerInfo &norm_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - TensorInfo tensor_info(input->info()->tensor_shape(), 1, input->info()->data_type(), - input->info()->quantization_info()); - _input_squared.allocator()->init(tensor_info); - - // Manage intermediate buffers - _memory_group.manage(&_input_squared); - - // Configure kernels - _norm_kernel.configure(input, &_input_squared, output, norm_info); - _multiply_kernel.configure(input, input, &_input_squared, 1.0f, ConvertPolicy::SATURATE, - RoundingPolicy::TO_ZERO); - _border_handler.configure(&_input_squared, _norm_kernel.border_size(), BorderMode::CONSTANT, - PixelValue(0.0f)); - - // Allocate the tensor once the configure methods have been called - _input_squared.allocator()->allocate(); -} - -Status NENormalizationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const NormalizationLayerInfo &norm_info) -{ - // Perform validation step - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - - ARM_COMPUTE_RETURN_ON_ERROR( - NENormalizationLayerExKernel::validate(input, input, output, norm_info)); - ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplicationKernel::validate( - input, input, output, 1.0f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO)); - - return Status{}; -} - -void NENormalizationLayerEx::run() -{ - _memory_group.acquire(); - - NEScheduler::get().schedule(&_multiply_kernel, Window::DimY); - NEScheduler::get().schedule(&_border_handler, Window::DimY); - NEScheduler::get().schedule(&_norm_kernel, Window::DimY); - - _memory_group.release(); -} diff --git a/libs/ARMComputeEx/src/runtime/topk_v2.h b/libs/ARMComputeEx/src/runtime/topk_v2.h deleted file mode 100644 index f94effea1..000000000 --- a/libs/ARMComputeEx/src/runtime/topk_v2.h +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright 2018 The TensorFlow Authors. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file topk_v2.h - * @brief This file contains TopK method and TopContainer class for TopK operation - * @ingroup COM_AI_RUNTIME - */ - -#ifndef __NNFW_RT_OPTIMIZED_OPS_TOPK_V2_H__ -#define __NNFW_RT_OPTIMIZED_OPS_TOPK_V2_H__ - -typedef int32_t int32; - -namespace nnfw -{ -namespace rt -{ -namespace optimized_ops -{ -/** - * @brief class to define TopK operation - * @note The follwing codes are impemented and modified while referring to TFLite topk_v2.cc file. - * TopK_v2 of NN Runtime supports TENSOR_FLOAT32, TENSOR_QUANT8_ASYMM, TENSOR_INT32 other than - * TFLite. - * (TFLite additionaly supports kTfLiteInt64.) - * - * The class that collects top indexes of k values. Based on template - * tensorflow::gtl::TopN<> but, for optimization, - * it re-uses the same container. - */ -template <typename T> class TopContainer -{ -public: - /** - * @brief Prevent default constructor of of this class - */ - TopContainer() = delete; - /** - * @brief Constructor with params - * @param [in] row_size Size of row in data - * @param [in] k The top k predictions - */ - TopContainer(int32 k, int32 row_size) : k_(k), container_(), values_(nullptr) - { - container_.reserve(std::min(k, row_size) + 1); - } - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * @param [in] topContainer To copy - */ - TopContainer(const TopContainer &) = delete; - /* - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * @param [in] topContainer To copy - * @return Reference of TopContainer - */ - TopContainer &operator=(const TopContainer &) = delete; - - /** - * @brief Start collecting - * @param [in] values To set as values - * @return N/A - */ - void start_collecting(const T *values) - { - values_ = values; - container_.clear(); - } - - /** - * @brief Push a value to be compared for topk - * @param [in] a A value to compare - * @return N/A - */ - void push(int32 a) - { - auto comparator = [this](int32 a, int32 b) { return compare_fun(a, b); }; - if (container_.size() <= (size_t)k_) - { - container_.push_back(a); - if (container_.size() == (size_t)(k_ + 1)) - { - std::make_heap(container_.begin(), container_.end(), comparator); - std::pop_heap(container_.begin(), container_.end(), comparator); - } - } - else if (comparator(a, container_.front())) - { - container_.back() = a; - std::push_heap(container_.begin(), container_.end(), comparator); - std::pop_heap(container_.begin(), container_.end(), comparator); - } - } - - /** - * @brief Get sorted result from pushed values - * @return Reference of vector with sorted values - */ - const std::vector<int32> &sorted_result() - { - auto comparator = [this](int32 a, int32 b) { return compare_fun(a, b); }; - if (container_.size() <= (size_t)(k_)) - { - std::sort(container_.begin(), container_.end(), comparator); - } - else - { - std::sort_heap(container_.begin(), container_.end() - 1, comparator); - container_.resize(k_); - } - return container_; - } - -private: - int32 k_; - std::vector<int32> container_; - const T *values_ = nullptr; - - bool compare_fun(int32 a, int32 b) const - { - if (values_[b] < values_[a]) - { - return true; - } - else if (values_[b] > values_[a]) - { - return false; - } - else - { - return a < b; - } - } -}; - -/** - * @brief Operates TopK operation with params - * @param [in] row_size Size of row in data - * @param [in] num_rows The number of rows in data - * @param [in] data To be operated in - * @param [in] k The top k predictions - * @param [out] output_indexes Indexes of targets in the top k predictions - * @param [out] output_values Values of targets in the top k predictions - * @return N/A - */ -template <typename T> -void TopK(int32 row_size, int32 num_rows, const T *data, int32 k, int32 *output_indexes, - T *output_values) -{ - TopContainer<T> topc(k, row_size); - for (int row = 0; row < num_rows; ++row) - { - const T *values_row = data + row * row_size; - topc.start_collecting(values_row); - for (int32 c = 0; c < row_size; ++c) - { - topc.push(c); - } - - // Prepare output buffers. - int32 *indexes_row = output_indexes + row * k; - T *output_row = output_values + row * k; - // We always assume that the output is sorted. - const auto &top_k = topc.sorted_result(); - std::copy(top_k.begin(), top_k.end(), indexes_row); - std::transform(top_k.begin(), top_k.end(), output_row, - [values_row](const int32 loc) { return values_row[loc]; }); - } -} - -} // namespace optimized_ops -} // namespace rt -} // namespace nnfw - -#endif // __NNFW_RT_OPTIMIZED_OPS_TOPK_V2_H__ |