diff options
Diffstat (limited to 'libs/ARMComputeEx/src/runtime/CL')
24 files changed, 0 insertions, 1265 deletions
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp deleted file mode 100644 index 1e52fc429..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLActivationLayerEx.cpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLActivationLayerEx.h" - -#include "arm_compute/core/CL/kernels/CLActivationLayerExKernel.h" - -using namespace arm_compute; - -void CLActivationLayerEx::configure(ICLTensor *input, ICLTensor *output, - ActivationLayerInfoEx act_info) -{ - auto k = arm_compute::support::cpp14::make_unique<CLActivationLayerExKernel>(); - k->configure(input, output, act_info); - _kernel = std::move(k); -} - -Status CLActivationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const ActivationLayerInfoEx &act_info) -{ - return CLActivationLayerExKernel::validate(input, output, act_info); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp deleted file mode 100644 index dff743e89..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLArgMinMax.cpp +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLArgMinMax.h" - -#include "arm_compute/core/CL/kernels/CLArgMinMaxKernel.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -namespace arm_compute -{ - -CLArgMinMax::CLArgMinMax() - : _input(nullptr), _output(nullptr), _argminmax_axis(), _interm_tensors(), _argminmax_kernels(), - _num_of_kernels() -{ -} - -void CLArgMinMax::configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis, - ArgOperation op) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), axis, op)); - _input = input; - _output = output; - _argminmax_axis = axis; - _arg_op = op; - // NOTE The argminmax_axis must have no duplication. - _num_of_kernels = axis.size(); - const size_t num_of_interm_tensors = _num_of_kernels - 1; - - _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors); - _argminmax_kernels = - arm_compute::support::cpp14::make_unique<CLArgMinMaxKernel[]>(_num_of_kernels); - - TensorShape shape{input->info()->tensor_shape()}; - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - shape.set(_argminmax_axis[i], 1); - _interm_tensors[i].allocator()->init( - TensorInfo(shape, input->info()->num_channels(), input->info()->data_type())); - _interm_tensors[i].allocator()->allocate(); - } - - // Set a vector that is ordered ICLTensors sequentially. - std::vector<ICLTensor *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - tensors.emplace_back(_interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Apply ArgMinMax on all kernels - for (size_t i = 0; i < _num_of_kernels; i++) - { - _argminmax_kernels[i].configure(tensors[i], tensors[i + 1], _argminmax_axis[i], op); - } -} - -Status CLArgMinMax::validate(const ITensorInfo *input, const std::vector<uint32_t> &argminmax_axis, - const ITensorInfo *output, ArgOperation op) -{ - const size_t num_of_kernels = argminmax_axis.size(); - const size_t num_of_interm_tensors = num_of_kernels - 1; - - // Create temporary tensor infos - auto interm_tensors = - arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors); - - // Create intermediate tensor info - TensorShape shape{input->tensor_shape()}; - - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - shape.set(argminmax_axis[i], 1); - interm_tensors[i].set_data_type(input->data_type()); - interm_tensors[i].set_tensor_shape(shape); - interm_tensors[i].set_num_channels(input->num_channels()); - } - - // Set a vector that is ordered ITensorInfo sequentially. - std::vector<const ITensorInfo *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; i++) - { - tensors.emplace_back(interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Validate argminmax only on all kernels - for (size_t i = 0; i < num_of_kernels; i++) - { - ARM_COMPUTE_RETURN_ON_ERROR( - CLArgMinMaxKernel::validate(tensors[i], tensors[i + 1], argminmax_axis[i], op)); - } - - return Status{}; -} - -void CLArgMinMax::run() -{ - for (size_t i = 0; i < _num_of_kernels; ++i) - { - CLScheduler::get().enqueue(_argminmax_kernels[i]); - } -} - -} // namespace arm_compute diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp deleted file mode 100644 index 3f403c80a..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLArithmeticSubtractionEx.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLArithmeticSubtractionEx.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLArithmeticSubtractionExKernel.h" - -using namespace arm_compute; - -void CLArithmeticSubtractionEx::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, - ConvertPolicy policy) -{ - auto k = arm_compute::support::cpp14::make_unique<CLArithmeticSubtractionExKernel>(); - k->configure(input1, input2, output, policy); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} - -Status CLArithmeticSubtractionEx::validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, ConvertPolicy policy) -{ - return CLArithmeticSubtractionExKernel::validate(input1, input2, output, policy); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp deleted file mode 100644 index 26e3798cc..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLBatchToSpaceND.h" - -#include "arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h" - -using namespace arm_compute; - -void CLBatchToSpaceND::configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size) -{ - auto k = arm_compute::support::cpp14::make_unique<CLBatchToSpaceNDKernel>(); - k->configure(input, output, block_size); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp deleted file mode 100644 index 7c5fe5eda..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h" - -#include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -void CLBinaryLogicalOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, - BinaryLogicalOperation op) -{ - auto k = arm_compute::support::cpp14::make_unique<CLBinaryLogicalOpKernel>(); - k->configure(input1, input2, output, op); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp deleted file mode 100644 index 8e106737c..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLCast.h" - -#include "arm_compute/core/CL/kernels/CLCastKernel.h" - -using namespace arm_compute; - -void CLCast::configure(ICLTensor *input, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLCastKernel>(); - k->configure(input, output); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp deleted file mode 100644 index f6a745a25..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLComparisonOp.cpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLComparisonOp.h" - -#include "arm_compute/core/CL/kernels/CLComparisonOpKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -void CLComparisonOp::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, - const ComparisonOperation &op) -{ - auto k = arm_compute::support::cpp14::make_unique<CLComparisonOpKernel>(); - k->configure(input1, input2, output, op); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp deleted file mode 100644 index c2e4ca9ff..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLDepthToSpace.h" - -#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h" - -using namespace arm_compute; - -void CLDepthToSpace::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size) -{ - auto k = arm_compute::support::cpp14::make_unique<CLDepthToSpaceKernel>(); - k->configure(input, output, block_size); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp deleted file mode 100644 index 2781784ca..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLEmbeddingLookup.h" - -#include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h" - -using namespace arm_compute; - -void CLEmbeddingLookup::configure(const ICLTensor *input, ICLTensor *output, - const ICLTensor *lookups) -{ - auto k = arm_compute::support::cpp14::make_unique<CLEmbeddingLookupKernel>(); - k->configure(input, output, lookups); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp deleted file mode 100644 index 411fa8700..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLExp.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLExp.h" - -#include "arm_compute/core/CL/kernels/CLExpKernel.h" - -using namespace arm_compute; - -void CLExp::configure(const ICLTensor *input, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLExpKernel>(); - k->configure(input, output); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp deleted file mode 100644 index fb056fe45..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLGather.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLGather.h" - -#include "arm_compute/core/CL/kernels/CLGatherKernel.h" - -using namespace arm_compute; - -void CLGather::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLGatherKernel>(); - k->configure(input1, input2, output); - _kernel = std::move(k); -} - -Status CLGather::validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output) -{ - return CLGatherKernel::validate(input1, input2, output); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp deleted file mode 100644 index 7180e9356..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLHashtableLookup.h" - -#include "arm_compute/core/CL/kernels/CLHashtableLookupKernel.h" - -using namespace arm_compute; - -void CLHashtableLookup::configure(const ICLTensor *lookups, const ICLTensor *keys, - const ICLTensor *input, ICLTensor *output, ICLTensor *hits) -{ - auto k = arm_compute::support::cpp14::make_unique<CLHashtableLookupKernel>(); - k->configure(lookups, keys, input, output, hits); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp deleted file mode 100644 index be35ea732..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLNeg.h" - -#include "arm_compute/core/CL/kernels/CLNegKernel.h" - -using namespace arm_compute; - -void CLNeg::configure(ICLTensor *input, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLNegKernel>(); - k->configure(input, output); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp deleted file mode 100644 index 276c4557a..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h" - -#include "arm_compute/runtime/CL/CLScheduler.h" - -using namespace arm_compute; - -CLNormalizationLayerEx::CLNormalizationLayerEx() : _norm_kernel(), _border_handler() {} - -void CLNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output, - const NormalizationLayerInfo &norm_info) -{ - ARM_COMPUTE_ERROR_ON(input == nullptr); - - // Configure normalization kernel - _norm_kernel.configure(input, output, norm_info); - - // Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel - _border_handler.configure(input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0)); -} - -Status CLNormalizationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const NormalizationLayerInfo &norm_info) -{ - return CLNormalizationLayerExKernel::validate(input, output, norm_info); -} - -void CLNormalizationLayerEx::run() -{ - // Run border handler - CLScheduler::get().enqueue(_border_handler, false); - - // Run normalization kernel - CLScheduler::get().enqueue(_norm_kernel); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp deleted file mode 100644 index 38adedd10..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLPReLU.h" - -#include "arm_compute/core/CL/kernels/CLPReLUKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -void CLPReLU::configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLPReLUKernel>(); - k->configure(input, alpha, output); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input->info()->dimension(0) == 1) ? input : alpha; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp deleted file mode 100644 index 5265b6c34..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPadLayerEx.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* -* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved -* Copyright (c) 2016-2018 ARM Limited. -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ -#include "arm_compute/runtime/CL/functions/CLPadLayerEx.h" - -#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h" - -using namespace arm_compute; - -void CLPadLayerEx::configure(ICLTensor *input, ICLTensor *output, ICLTensor *pad_size) -{ - auto k = arm_compute::support::cpp14::make_unique<CLPadLayerKernel>(); - k->configure(input, output, pad_size); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp deleted file mode 100644 index fb363270d..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPermuteEx.cpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLPermuteEx.h" - -#include "arm_compute/core/CL/kernels/CLPermuteExKernel.h" - -using namespace arm_compute; - -void CLPermuteEx::configure(const ICLTensor *input, ICLTensor *output, - const PermutationVector &perm) -{ - auto k = arm_compute::support::cpp14::make_unique<CLPermuteExKernel>(); - k->configure(input, output, perm); - _kernel = std::move(k); -} - -Status CLPermuteEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const PermutationVector &perm) -{ - ARM_COMPUTE_RETURN_ON_ERROR(CLPermuteExKernel::validate(input, output, perm)); - return Status{}; -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp deleted file mode 100644 index dc0baa8dd..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLPixelWiseDivision.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h" - -using namespace arm_compute; - -void CLPixelWiseDivision::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, - float scale, ConvertPolicy overflow_policy, - RoundingPolicy rounding_policy) -{ - auto k = arm_compute::support::cpp14::make_unique<CLPixelWiseDivisionKernel>(); - k->configure(input1, input2, output, scale, overflow_policy, rounding_policy); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} - -Status CLPixelWiseDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy) -{ - return CLPixelWiseDivisionKernel::validate(input1, input2, output, scale, overflow_policy, - rounding_policy); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp deleted file mode 100644 index 2b8d82706..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLReduceOperation.h" - -#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -using namespace arm_compute; - -CLReduceOperation::CLReduceOperation() - : _input(nullptr), _output(nullptr), _axis(), _interm_tensors(), _reduce_kernels() -{ -} - -Status CLReduceOperation::validate(const ITensorInfo *input, const ITensorInfo *output, - const std::set<uint32_t> &axis, const ReduceOperation &op) -{ - const size_t num_of_kernels = axis.size(); - const size_t num_of_interm_tensors = num_of_kernels - 1; - - // Create temporary tensor infos - auto interm_tensors = - arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors); - - // Create intermediate tensor info - TensorShape shape{input->tensor_shape()}; - - auto it = axis.begin(); - for (size_t i = 0; i < num_of_interm_tensors; ++i, ++it) - { - shape.set(*it, 1); - interm_tensors[i].set_data_type(input->data_type()); - interm_tensors[i].set_tensor_shape(shape); - interm_tensors[i].set_num_channels(input->num_channels()); - } - - // Set a vector that is ordered ITensorInfo sequentially. - std::vector<const ITensorInfo *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; ++i) - { - tensors.emplace_back(interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Validate ReduceOperation only on all kernels - it = axis.begin(); - for (size_t i = 0; i < num_of_kernels; ++i, ++it) - { - ARM_COMPUTE_RETURN_ON_ERROR( - CLReduceOperationKernel::validate(tensors[i], tensors[i + 1], *it, op)); - } - - return Status{}; -} - -void CLReduceOperation::configure(ICLTensor *input, ICLTensor *output, - const std::set<uint32_t> &axis, ReduceOperation op) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), axis, op)); - - _axis = axis; - - _input = input; - _output = output; - - // NOTE The axis must have no duplication. - const size_t num_of_kernels = axis.size(); - const size_t num_of_interm_tensors = num_of_kernels - 1; - - _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors); - _reduce_kernels = - arm_compute::support::cpp14::make_unique<CLReduceOperationKernel[]>(num_of_kernels); - - TensorShape shape{input->info()->tensor_shape()}; - auto it = axis.begin(); - for (size_t i = 0; i < num_of_interm_tensors; ++i, ++it) - { - shape.set(*it, 1); - _interm_tensors[i].allocator()->init( - TensorInfo(shape, input->info()->num_channels(), input->info()->data_type())); - _interm_tensors[i].allocator()->allocate(); - } - - // Set a vector that is ordered ICLTensors sequentially. - std::vector<ICLTensor *> tensors; - tensors.emplace_back(input); - for (size_t i = 0; i < num_of_interm_tensors; ++i) - { - tensors.emplace_back(_interm_tensors.get() + i); - } - tensors.emplace_back(output); - - // Apply ReduceOperation on all kernels - it = axis.begin(); - for (size_t i = 0; i < num_of_kernels; ++i, ++it) - { - _reduce_kernels[i].configure(tensors[i], tensors[i + 1], *it, op); - } -} - -void CLReduceOperation::run() -{ - const size_t num_of_kernels = _axis.size(); - for (size_t i = 0; i < num_of_kernels; ++i) - { - CLScheduler::get().enqueue(_reduce_kernels[i]); - } -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp deleted file mode 100644 index c03826891..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLSpaceToBatchND.h" - -#include "arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h" - -using namespace arm_compute; - -void CLSpaceToBatchND::configure(const ICLTensor *input, const ICLTensor *block_size, - const ICLTensor *padding_size, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLSpaceToBatchNDKernel>(); - k->configure(input, block_size, padding_size, output); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp deleted file mode 100644 index 0f455f96f..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLSpaceToDepth.h" - -#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h" - -using namespace arm_compute; - -void CLSpaceToDepth::configure(ICLTensor *input, ICLTensor *output, const int32_t block_size) -{ - auto k = arm_compute::support::cpp14::make_unique<CLSpaceToDepthKernel>(); - k->configure(input, output, block_size); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp deleted file mode 100644 index dc6e4af44..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLSquaredDifference.cpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLSquaredDifference.h" - -#include "arm_compute/core/CL/kernels/CLSquaredDifferenceKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -void CLSquaredDifference::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique<CLSquaredDifferenceKernel>(); - k->configure(input1, input2, output); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp deleted file mode 100644 index be7353493..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLStridedSliceEx.h" - -#include "arm_compute/core/CL/kernels/CLStridedSliceExKernel.h" - -using namespace arm_compute; - -void CLStridedSliceEx::configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData, - ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, - int32_t endMask, int32_t shrinkAxisMask) -{ - auto k = arm_compute::support::cpp14::make_unique<CLStridedSliceExKernel>(); - k->configure(input, output, beginData, endData, stridesData, beginMask, endMask, shrinkAxisMask); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp deleted file mode 100644 index 19177497c..000000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLTopKV2.h" -#include "arm_compute/runtime/CL/CLScheduler.h" - -#include "arm_compute/core/CL/ICLTensor.h" - -#include "../../topk_v2.h" - -namespace arm_compute -{ - -CLTopKV2::CLTopKV2() - : _k(0), _total_bits(0), _bits(0), _radix(0), _hist_buf_size(0), _glob_sum_buf_size(0), _n(0), - _input(nullptr), _values(nullptr), _indices(nullptr), _qs_idx_buf(), _qs_temp_buf(), - _hist_buf(), _glob_sum_buf(), _temp_buf(), _first_negative_idx_buf(), _in_key_buf(), - _out_key_buf(), _in_ind_buf(), _out_ind_buf(), _p_in_key_buf(nullptr), - _p_out_key_buf(nullptr), _p_in_ind_buf(nullptr), _p_out_ind_buf(nullptr), _qs_kernel(), - _init_kernel(), _hist_kernel(), _scan_hist_kernel(), _glob_scan_hist_kernel(), - _paste_hist_kernel(), _reorder_kernel(), _find_first_negative_kernel(), - _reorder_negatives_kernel(), _store_kernel() -{ -} - -void CLTopKV2::configure(ICLTensor *input, int k, ICLTensor *values, ICLTensor *indices, - int total_bits, int bits) -{ - _total_bits = total_bits; - _bits = bits; - _n = input->info()->tensor_shape()[0]; - - // _total_bits should be divided by _bits. - ARM_COMPUTE_ERROR_ON((_total_bits % _bits) != 0); - - _k = k; - _radix = 1 << bits; - - _input = input; - _values = values; - _indices = indices; - - std::string topk_env; - - char *env = getenv("ACL_TOPKV2"); - if (env) - topk_env = env; - - if (topk_env == "GPU_SINGLE") - { - _qs_idx_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n); - _qs_temp_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n); - - _qs_kernel.configure(input, values, indices, &_qs_idx_buf, &_qs_temp_buf, k, _n); - } - else if (topk_env == "GPU") - { - // n should be divided by (_GROUPS * _ITEMS) - ARM_COMPUTE_ERROR_ON((_n % (_GROUPS * _ITEMS)) != 0); - - _hist_buf_size = _radix * _GROUPS * _ITEMS; - _glob_sum_buf_size = _HISTOSPLIT; - - _hist_buf = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, - sizeof(cl_int) * _hist_buf_size); - _glob_sum_buf = - cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, - sizeof(cl_int) * _glob_sum_buf_size); - _temp_buf = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, - sizeof(cl_int) * _glob_sum_buf_size); - _first_negative_idx_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int)); - _in_key_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_float) * _n); - _out_key_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_float) * _n); - _in_ind_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n); - _out_ind_buf = cl::Buffer(CLScheduler::get().context(), - CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_int) * _n); - - _p_in_key_buf = &_in_key_buf; - _p_out_key_buf = &_out_key_buf; - _p_in_ind_buf = &_in_ind_buf; - _p_out_ind_buf = &_out_ind_buf; - - _init_kernel.configure(input, _p_in_key_buf, _p_in_ind_buf, _n); - _hist_kernel.configure(&_hist_buf, bits, _n); - _scan_hist_kernel.configure(&_hist_buf, &_glob_sum_buf, bits); - _glob_scan_hist_kernel.configure(&_glob_sum_buf, &_temp_buf, bits); - _paste_hist_kernel.configure(&_hist_buf, &_glob_sum_buf, bits); - _reorder_kernel.configure(&_hist_buf, bits, _n); - _find_first_negative_kernel.configure(&_first_negative_idx_buf, _n); - _reorder_negatives_kernel.configure(&_first_negative_idx_buf, _n); - _store_kernel.configure(values, indices, k, _n); - } - else - { - // DO NOTHING for CPU. - } -} - -void CLTopKV2::run() -{ - std::string topk_env; - - char *env = getenv("ACL_TOPKV2"); - if (env) - topk_env = env; - - if (topk_env == "GPU_SINGLE") - { - run_on_gpu_single_quicksort(); - } - else if (topk_env == "GPU") - { - run_on_gpu(); - } - else - { - run_on_cpu(); - } -} - -void CLTopKV2::run_on_gpu_single_quicksort() -{ - // This is a single threaded quick sort implementation. - CLScheduler::get().enqueue(_qs_kernel, false); - - arm_compute::CLScheduler::get().sync(); -} - -void CLTopKV2::run_on_gpu() -{ - cl::CommandQueue q = CLScheduler::get().queue(); - - // 1. CLTopKV2Init set key buffer and index buffer. - // - Key buffer is set as the same value of the layer's input - // - Values in the index buffer are set as their indices. - CLScheduler::get().enqueue(_init_kernel, false); - - int n_passes = _total_bits / _bits; - - // 2. Repeat (total_bits/bits) times. - // - total_bits is the number of bits of the data type (e.g., 32 for float) - // - bits defines number of buckets (e.g. 16 buckets where bit is 4) - for (int pass = 0; pass < n_passes; ++pass) - { - arm_compute::CLScheduler::get().sync(); - - // 2.1. Calculate histogram with _GROUPS * _ITEMS threads - _hist_kernel.setPass(pass, _p_in_key_buf); - CLScheduler::get().enqueue(_hist_kernel, false); - - // 2.2. Calculate prefix sum locally with multiple threads - CLScheduler::get().enqueue(_scan_hist_kernel, false); - // 2.3. Calculate prefix sum within a work group - CLScheduler::get().enqueue(_glob_scan_hist_kernel, false); - // 2.4. Calculate global prefix sum - CLScheduler::get().enqueue(_paste_hist_kernel, false); - - // 2.5. Reorder keys and indices based on the global prefix sum - _reorder_kernel.setPass(pass, _p_in_key_buf, _p_out_key_buf, _p_in_ind_buf, _p_out_ind_buf); - CLScheduler::get().enqueue(_reorder_kernel, false); - - cl::Buffer *tmp; - // swap key buffers - tmp = _p_in_key_buf; - _p_in_key_buf = _p_out_key_buf; - _p_out_key_buf = tmp; - - // swap index buffers - tmp = _p_in_ind_buf; - _p_in_ind_buf = _p_out_ind_buf; - _p_out_ind_buf = tmp; - } - - // 3. Get the first negative index - // Because we swap in_buf and out_buf at the end of the above for loop, - // the output buffers are in bufs. - _find_first_negative_kernel.setOutputBuffer(_p_in_key_buf); - CLScheduler::get().enqueue(_find_first_negative_kernel, false); - - // 4. Correct odering of negatives - // - Since radix sort does not consider negatives, negatives are considered as bigger values - // than positives. - // reordered data will be stored in _p_out_key_buf and _p_out_ind_buf - _reorder_negatives_kernel.setBuffers(_p_in_key_buf, _p_out_key_buf, _p_in_ind_buf, - _p_out_ind_buf); - CLScheduler::get().enqueue(_reorder_negatives_kernel, false); - - // 5. Extract top k values from sorted keys and indices. - _store_kernel.setOutputBuffers(_p_out_key_buf, _p_out_ind_buf); - CLScheduler::get().enqueue(_store_kernel, false); - - arm_compute::CLScheduler::get().sync(); - -#if 0 - // below code is left for debugging. - int first_neg; - q.enqueueReadBuffer(_first_negative_idx_buf, CL_TRUE, 0, sizeof(cl_int), &first_neg); - std::cout << "first neg = " << first_neg << std::endl; - - float in_key[_n]; - q.enqueueReadBuffer(*_p_in_key_buf, CL_TRUE, 0, sizeof(cl_float)*_n, in_key); - for(uint32_t i = 0 ; i < _n; ++i) { - std::cout << "in_key[" << i << "] = " << in_key[i] << std::endl; - } - - float out_key[_n]; - q.enqueueReadBuffer(*_p_out_key_buf, CL_TRUE, 0, sizeof(cl_float)*_n, out_key); - for(uint32_t i = 0 ; i < _n; ++i) { - std::cout << "out_key[" << i << "] = " << out_key[i] << std::endl; - } - - int in_ind[_n]; - q.enqueueReadBuffer(*_p_in_ind_buf, CL_TRUE, 0, sizeof(cl_int)*_n, in_ind); - for(uint32_t i = 0 ; i < _n; ++i) { - std::cout << "in_ind[" << i << "] = " << in_ind[i] << std::endl; - } - - int out_ind[_n]; - q.enqueueReadBuffer(*_p_out_ind_buf, CL_TRUE, 0, sizeof(cl_int)*_n, out_ind); - for(uint32_t i = 0 ; i < _n; ++i) { - std::cout << "out_ind[" << i << "] = " << out_ind[i] << std::endl; - } - - int hist_buf[_hist_buf_size]; - q.enqueueReadBuffer(_hist_buf, CL_TRUE, 0, sizeof(cl_int)*_hist_buf_size, hist_buf); - for(uint32_t i = 0 ; i < _hist_buf_size; ++i) { - std::cout << "hist_buf[" << i << "] = " << hist_buf[i] << std::endl; - } - - int glob_sum_buf[_glob_sum_buf_size]; - q.enqueueReadBuffer(_glob_sum_buf, CL_TRUE, 0, sizeof(cl_int)*_glob_sum_buf_size, glob_sum_buf); - for(uint32_t i = 0 ; i < _glob_sum_buf_size; ++i) { - std::cout << "glob_sum_buf[" << i << "] = " << glob_sum_buf[i] << std::endl; - } - -#endif -} - -void CLTopKV2::run_on_cpu() -{ - cl::CommandQueue q = CLScheduler::get().queue(); - // const Window& w = _topkv2_kernel.window(); - - _input->map(q); - _values->map(q); - _indices->map(q); - - // int row_size = (w[0].end() - w[0].start()) / w[0].step(); - int row_size = _input->info()->tensor_shape()[0]; - int rank = _input->info()->num_dimensions(); - - if (rank > 2) - throw std::runtime_error("Not supported type."); - - int row_num = (rank == 2 ? _input->info()->tensor_shape()[1] : 1); - - if (_input->info()->data_type() == DataType::F32) - { - nnfw::rt::optimized_ops::TopK<float>(row_size, row_num, (float *)_input->buffer(), _k, - (int32 *)_indices->buffer(), (float *)_values->buffer()); - } - else if (_input->info()->data_type() == DataType::S32) - { - nnfw::rt::optimized_ops::TopK<int32_t>(row_size, row_num, (int32_t *)_input->buffer(), _k, - (int32 *)_indices->buffer(), - (int32_t *)_values->buffer()); - } - else if (_input->info()->data_type() == DataType::QASYMM8) - { - nnfw::rt::optimized_ops::TopK<uint8_t>(row_size, row_num, (uint8_t *)_input->buffer(), _k, - (int32 *)_indices->buffer(), - (uint8_t *)_values->buffer()); - } - else - { - throw std::runtime_error("Not supported type."); - } - - _input->unmap(q); - _values->unmap(q); - _indices->unmap(q); -} -} // namespace arm_compute |