summaryrefslogtreecommitdiff
path: root/compute/ARMComputeEx/src
diff options
context:
space:
mode:
authorChunseok Lee <chunseok.lee@samsung.com>2020-04-23 14:45:49 +0900
committerChunseok Lee <chunseok.lee@samsung.com>2020-04-23 14:45:49 +0900
commite2ef8438a24f7c56a0744eb579a6e293ee2fbf8e (patch)
tree44a1a7951d168dd4370e13593ed03f4bc6d920c5 /compute/ARMComputeEx/src
parent302e6564a7a76109e1178207e44e45a58631c477 (diff)
downloadnnfw-e2ef8438a24f7c56a0744eb579a6e293ee2fbf8e.tar.gz
nnfw-e2ef8438a24f7c56a0744eb579a6e293ee2fbf8e.tar.bz2
nnfw-e2ef8438a24f7c56a0744eb579a6e293ee2fbf8e.zip
Imported Upstream version 1.4.0upstream/1.4.0submit/tizen/20200423.054851
Diffstat (limited to 'compute/ARMComputeEx/src')
-rw-r--r--compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp44
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/gather_ex.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl354
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h17
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h19
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl16
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl122
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl136
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/scale_factor.cl108
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl26
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl25
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl25
-rw-r--r--compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl25
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp37
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.cpp372
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp173
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp50
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp172
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp154
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp30
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp26
-rw-r--r--compute/ARMComputeEx/src/core/CPP/kernels/CPPOneHotKernelEx.cpp103
-rw-r--r--compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp18
-rw-r--r--compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp78
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEActivationLayerKernelEx.cpp730
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp30
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp18
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp22
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp19
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/core/UtilsEx.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp337
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp583
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp50
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp180
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp29
-rw-r--r--compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp (renamed from compute/ARMComputeEx/src/runtime/NEON/functions/NEElementwiseUnaryLayerEx.cpp)40
-rw-r--r--compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp66
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp109
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp15
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp15
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp20
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp41
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp36
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp26
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp15
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp18
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp16
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp15
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp15
-rw-r--r--compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp16
107 files changed, 5647 insertions, 278 deletions
diff --git a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
index 7d4760600..191a5bc2a 100644
--- a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
+++ b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/CLKernelLibraryEx.h"
@@ -53,13 +69,16 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map
{"gather_ex", "gather_ex.cl"},
{"gather_ex_1d", "gather_ex.cl"},
{"gather_ex_1d_out", "gather_ex.cl"},
+ {"gemmlowp_mm_midgard_ex", "gemmlowp_ex.cl"},
{"hashtable_lookup", "hashtable_lookup.cl"},
{"instance_normalization_ex", "instance_normalization_ex.cl"},
+ {"multiply_scale_factor", "multiply_scale_factor.cl"},
{"neg_tensor", "neg_tensor.cl"},
{"permute_generic", "permute_ex.cl"},
{"pixelwise_mul_qasymm8", "pixelwise_mul_quantized.cl"},
{"prelu", "prelu.cl"},
{"prelu_qasymm8", "prelu_quantized.cl"},
+ {"quantization_symm8", "quantization_symm8.cl"},
{"reduce_min_max", "reduce_operation.cl"},
{"reduce_sum_mean", "reduce_operation.cl"},
{"topkv2_init", "topkv2.cl"},
@@ -71,6 +90,7 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map
{"radixsort_pastehistograms", "topkv2_radixsort.cl"},
{"radixsort_reorder", "topkv2_radixsort.cl"},
{"topkv2_quicksort", "topkv2_quicksort.cl"},
+ {"scale_factor_symm8", "scale_factor.cl"},
{"space_to_batch_4d_nchw", "space_to_batch.cl"},
{"space_to_batch_4d_nhwc", "space_to_batch.cl"},
{"space_to_depth_nchw", "space_to_depth.cl"},
@@ -100,6 +120,10 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
#include "./cl_kernels/gather_ex.clembed"
},
{
+ "gemmlowp_ex.cl",
+#include "./cl_kernels/gemmlowp_ex.clembed"
+ },
+ {
"hashtable_lookup.cl",
#include "./cl_kernels/hashtable_lookup.clembed"
},
@@ -120,6 +144,10 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
#include "./cl_kernels/binary_logical_op.clembed"
},
{
+ "multiply_scale_factor.cl",
+#include "./cl_kernels/multiply_scale_factor.clembed"
+ },
+ {
"neg_tensor.cl",
#include "./cl_kernels/neg_tensor.clembed"
},
@@ -132,10 +160,18 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map
#include "./cl_kernels/prelu_quantized.clembed"
},
{
+ "quantization_symm8.cl",
+#include "./cl_kernels/quantization_symm8.clembed"
+ },
+ {
"reduce_operation.cl",
#include "./cl_kernels/reduce_operation.clembed"
},
{
+ "scale_factor.cl",
+#include "./cl_kernels/scale_factor.clembed"
+ },
+ {
"space_to_batch.cl",
#include "./cl_kernels/space_to_batch.clembed"
},
@@ -180,7 +216,7 @@ Kernel CLKernelLibraryEx::create_kernel(const std::string &kernel_name,
if (_kernel_program_map.end() == kernel_program_it)
{
- ARM_COMPUTE_ERROR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
+ ARM_COMPUTE_ERROR_VAR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
}
std::string concat_str;
@@ -261,7 +297,7 @@ const Program &CLKernelLibraryEx::load_program(const std::string &program_name)
if (_program_source_map.end() == program_source_it)
{
- ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str());
+ ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
}
program = Program(_context, program_name, program_source_it->second);
@@ -282,7 +318,7 @@ const Program &CLKernelLibraryEx::load_program(const std::string &program_name)
}
else
{
- ARM_COMPUTE_ERROR("Kernel file %s does not exist.", source_name.c_str());
+ ARM_COMPUTE_ERROR_VAR("Kernel file %s does not exist.", source_name.c_str());
}
#endif /* EMBEDDED_KERNELS */
@@ -315,7 +351,7 @@ std::string CLKernelLibraryEx::get_program_source(const std::string &program_nam
if (program_source_it == _program_source_map.end())
{
- ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str());
+ ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
}
return program_source_it->second;
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl
index 2a6dfc91f..03717cfe9 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl
index 77e239f55..f74c1c103 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers_asymm.h"
#ifdef SATURATE
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl
index 8c875516d..e249663bc 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#ifndef VEC_SIZE
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl
index 2342fda9f..4147a0017 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#ifndef SCALE
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl
index e005322f7..0285c955b 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE) && defined(Z_OUT)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl
index dd8cb6d93..92e5dfbee 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#ifndef VEC_SIZE
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/gather_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/gather_ex.cl
index 09f776156..2236021f1 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/gather_ex.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/gather_ex.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#if defined(DATA_TYPE) && defined(AXIS) && defined(INDICES_DIM)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl
new file mode 100644
index 000000000..80ba73d1d
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "helpers.h"
+
+#if defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_Y) && \
+ defined(COLS_A)
+#define VECTOR_CHAR VEC_DATA_TYPE(char, NUM_ELEMS_PROCESSED_PER_THREAD_X)
+#define VECTOR_INT VEC_DATA_TYPE(int, NUM_ELEMS_PROCESSED_PER_THREAD_X)
+#define VECTOR_FLOAT VEC_DATA_TYPE(float, NUM_ELEMS_PROCESSED_PER_THREAD_X)
+/** This OpenCL kernel computes the matrix multiplication between matrix A (src0) and matrix B
+ * (src1) in case both matrices have not beed reshaped
+ *
+ * @attention The number of matrix A columns needs to be passed at compile time using -DCOLS_A
+ *
+ * @note In case the input or output have to be reinterpreted as a 3D tensor, the following
+ * information must be passed at compile time:
+ * -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D
+ * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D
+ * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D
+ * tensor.
+ * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor
+ * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped
+ *
+ * @param[in] src0_ptr Pointer to the source matrix. Supported data type:
+ * QASYMM8
+ * @param[in] src0_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src0_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src0_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[in] src1_ptr Pointer to the source matrix. Supported data type:
+ * same as @p src0_ptr
+ * @param[in] src1_stride_x Stride of the source matrix in X dimension (in
+ * bytes)
+ * @param[in] src1_step_x src_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in
+ * bytes)
+ * @param[in] src1_step_y src_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source
+ * matrix
+ * @param[out] dst_ptr Pointer to the destination matrix Supported data
+ * type: S32
+ * @param[in] dst_stride_x Stride of the destination matrix in X dimension
+ * (in bytes)
+ * @param[in] dst_step_x dst_gx_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] dst_stride_y Stride of the destination matrix in Y dimension
+ * (in bytes)
+ * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination
+ * matrix
+ * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in
+ * bytes)
+ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension
+ * (in bytes)
+ * @param[in] src_cross_plane_pad (Optional) Bottom paddings in unit of elements for
+ * the input tensor (only if defined REINTERPRET_INPUT_AS_3D)
+ * @param[in] dst_cross_plane_pad (Optional) Bottom paddings in unit of elements for
+ * the output tensor (only if defined REINTERPRET_OUTPUT_AS_3D)
+ */
+__kernel void gemmlowp_mm_midgard_ex(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1),
+ IMAGE_DECLARATION(dst), uint src0_stride_z, uint src1_stride_z,
+ uint dst_stride_z
+#if defined(REINTERPRET_INPUT_AS_3D)
+ ,
+ uint src_cross_plane_pad
+#endif // REINTERPRET_INPUT_AS_3D
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ ,
+ uint dst_cross_plane_pad
+#endif // REINTERPRET_OUTPUT_AS_3D
+ )
+{
+ int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X;
+
+ // Compute starting address for matrix A and Matrix B
+ int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes));
+
+ // Update address for the matrix A
+ src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y;
+
+ // Update address for the matrix B
+ src_addr.s1 += idx;
+
+#if defined(REINTERPRET_INPUT_AS_3D)
+ // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across
+ // the z dimension
+ // in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+ // by HEIGHT_GEMM3D
+ uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+ (uint4)HEIGHT_GEMM3D;
+ zin = min(DEPTH_GEMM3D - 1, zin);
+
+ // Add offset due to the cross plane paddings
+ zin *= (src_cross_plane_pad * src0_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply src0_stride_z by DEPTH_GEMM3D
+ src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D;
+
+#else // defined(REINTERPRET_INPUT_AS_3D)
+
+ // Add offset for batched GEMM
+ src_addr.s0 += get_global_id(2) * src0_stride_z;
+
+#endif // defined(REINTERPRET_INPUT_AS_3D)
+
+#if defined(MATRIX_B_DEPTH)
+ // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3
+ src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z;
+#else // defined(MATRIX_B_DEPTH)
+ src_addr.s1 += get_global_id(2) * src1_stride_z;
+#endif // defined(MATRIX_B_DEPTH)
+
+ int end_row_vec_a = src_addr.s0 + COLS_A;
+
+ VECTOR_INT acc0 = 0;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ VECTOR_INT acc1 = 0;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ VECTOR_INT acc2 = 0;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ VECTOR_INT acc3 = 0;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ VECTOR_INT acc4 = 0;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+
+ for (; src_addr.s0 <= (end_row_vec_a - 2); src_addr += (int2)(2, 2 * src1_stride_y))
+ {
+ // Load values from matrix A
+ char2 a0 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ char2 a1 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ char2 a2 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ char2 a3 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ char2 a4 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 4 * src0_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ // Load values from matrix B
+ VECTOR_CHAR b0 =
+ VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1));
+ VECTOR_CHAR b1 = VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(
+ 0, (__global char *)(src1_ptr + src_addr.s1 + src1_stride_y));
+
+ // Accumulate
+ acc0 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a0.s0;
+ acc0 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a0.s1;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a1.s0;
+ acc1 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a1.s1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a2.s0;
+ acc2 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a2.s1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a3.s0;
+ acc3 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a3.s1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ acc4 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a4.s0;
+ acc4 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a4.s1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ }
+
+ for (; src_addr.s0 < end_row_vec_a; src_addr += (int2)(1, src1_stride_y))
+ {
+ // Load values from matrix A
+ char a0 = *(__global char *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ char a1 = *(__global char *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ char a2 = *(__global char *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ char a3 = *(__global char *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ char a4 = *(__global char *)(src0_ptr + src_addr.s0 + 4 * src0_stride_y);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ // Load values from matrix B
+ VECTOR_CHAR b0 =
+ VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1));
+
+ // Accumulate
+ acc0 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a0;
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ acc1 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a1;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ acc2 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a2;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ acc3 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a3;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ acc4 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a4;
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ }
+
+ const int z = get_global_id(2);
+
+ // Compute destination address
+ Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+#if defined(REINTERPRET_OUTPUT_AS_3D)
+ // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across
+ // the z dimension
+ // in order to take into account the presence of possible cross plane paddings
+ //
+ // | |
+ // | plane0 |
+ // | |
+ // |__________________|
+ // |******************|
+ // | cross_plane_pad |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | |
+ // |__________________|
+
+ // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)
+ // by HEIGHT_GEMM3D
+ uint8 zout = ((uint8)(0, 1, 2, 3, 4, 5, 6, 7) +
+ (uint8)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) /
+ (uint8)HEIGHT_GEMM3D;
+ zout = min(DEPTH_GEMM3D - 1, zout);
+
+ // Add offset due to the cross plane paddings
+ zout *= (dst_cross_plane_pad * dst_stride_y);
+
+ // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
+ // multiply dst_stride_z by DEPTH_GEMM3D
+ dst.ptr += z * dst_stride_z * DEPTH_GEMM3D;
+
+ // Store the result
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc0, VECTOR_INT), 0, (__global int *)(dst.ptr + 0 * dst_stride_y + zout.s0));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc1, VECTOR_INT), 0, (__global int *)(dst.ptr + 1 * dst_stride_y + zout.s1));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc2, VECTOR_INT), 0, (__global int *)(dst.ptr + 2 * dst_stride_y + zout.s2));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc3, VECTOR_INT), 0, (__global int *)(dst.ptr + 3 * dst_stride_y + zout.s3));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc4, VECTOR_INT), 0, (__global int *)(dst.ptr + 4 * dst_stride_y + zout.s4));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+
+#else // defined(REINTERPRET_OUTPUT_AS_3D)
+ // Add offset for batched GEMM
+ dst.ptr += z * dst_stride_z;
+
+ // Store the result
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc0, VECTOR_INT), 0, (__global int *)(dst.ptr + 0 * dst_stride_y));
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc1, VECTOR_INT), 0, (__global int *)(dst.ptr + 1 * dst_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc2, VECTOR_INT), 0, (__global int *)(dst.ptr + 2 * dst_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc3, VECTOR_INT), 0, (__global int *)(dst.ptr + 3 * dst_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+ VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X)
+ (CONVERT(acc4, VECTOR_INT), 0, (__global int *)(dst.ptr + 4 * dst_stride_y));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4
+#endif // defined(REINTERPRET_OUTPUT_AS_3D)
+}
+#endif // defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_Y) &&
+ // defined(COLS_A)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl
index 73f29e3e5..a4f7dbd48 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#ifndef VEC_SIZE
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h
index 0e123ae0a..2d0b6a299 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h
@@ -1,4 +1,20 @@
/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -21,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef ARM_COMPUTE_HELPER_H
#define ARM_COMPUTE_HELPER_H
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h
index c39138caa..a83b1a8a5 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h
@@ -1,4 +1,20 @@
/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -21,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#ifndef ARM_COMPUTE_HELPERS_ASYMM_H
#define ARM_COMPUTE_HELPERS_ASYMM_H
@@ -403,4 +420,4 @@ ASYMM_RESCALE_IMPL(4)
ASYMM_RESCALE_IMPL(8)
ASYMM_RESCALE_IMPL(16)
-#endif // ARM_COMPUTE_HELPERS_ASYMM_H \ No newline at end of file
+#endif // ARM_COMPUTE_HELPERS_ASYMM_H
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl
index 1d96150f8..014842680 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "helpers.h"
#if defined(VEC_SIZE) && defined(DATA_TYPE) && defined(EPSILON) && defined(DIM_X) && \
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl
new file mode 100644
index 000000000..3943fc4c2
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "helpers.h"
+
+#if defined(VEC_SIZE) && defined(DATA_TYPE)
+
+/** This performs to multiply input by scale_factor.
+ *
+ * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g.
+ * -DDATA_TYPE=float
+ * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
+ * -DVEC_SIZE=16
+ * @note Quantization scale of input tensor is passed in with -DSCALE=scale.
+ *
+ * @param[in] input_ptr Pointer to the source tensor. Supported data
+ * types: S8
+ * @param[in] input_stride_x Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] input_step_x input_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] input_stride_y Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in] input_step_y input_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[in] scale_ptr Pointer to the source tensor. Supported data
+ * types: S32
+ * @param[in] scale_stride_x Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] scale_step_x scale_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] scale_offset_first_element_in_bytes The offset of the first element in the scale
+ * tensor
+ * @param[out] output_ptr Pointer to the destination tensor. Supported
+ * data types: F16/F32
+ * @param[in] output_stride_x Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in] output_step_x output_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] output_stride_y Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in] output_step_y output_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
+ * destination tensor
+ */
+__kernel void multiply_scale_factor(IMAGE_DECLARATION(input), VECTOR_DECLARATION(scale),
+ IMAGE_DECLARATION(output), float multiplier)
+{
+ // Get pixels pointer
+ Image input = CONVERT_TO_IMAGE_STRUCT(input);
+ Image output = CONVERT_TO_IMAGE_STRUCT(output);
+
+#if defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
+ // Check if access on width gets out of bounds
+ // If it does shift access vector to access elements within bounds
+ const int xi = (int)(get_global_id(0) * VEC_SIZE);
+ input.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * input_stride_x;
+ output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x;
+
+ // Load data
+ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ val = CONVERT(VLOAD(VEC_SIZE)(0, (__global int *)input.ptr), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE));
+
+ // Create scale vector
+ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ vscale = *(((__global DATA_TYPE *)(scale_ptr)) + get_global_id(1));
+
+ // Dequantize
+ vscale *= (DATA_TYPE)(multiplier);
+ val *= vscale;
+
+ // Store result
+ VSTORE(VEC_SIZE)
+ (val, 0, (__global DATA_TYPE *)output.ptr);
+#else // !defined(VEC_SIZE) || !defined(LAST_ACCESSED_X)
+ *((__global DATA_TYPE *)(output.ptr)) =
+ ((DATA_TYPE)(*((__global int *)(input.ptr)))) *
+ *(((__global DATA_TYPE *)(scale_ptr)) + get_global_id(1)) * (DATA_TYPE)(multiplier);
+#endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
+}
+
+#endif // defined(VEC_SIZE) && defined(DATA_TYPE)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl
index 4aa7883c3..15c16f80c 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#ifndef VEC_SIZE
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl
index 2074d3ceb..76fda9041 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers_asymm.h"
#ifdef SATURATE
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl
index 62a8901f6..12c8eeb79 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#ifndef VEC_SIZE
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl
index 5e0abd585..a66e107d1 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#define SUB(x, y) (x) - (y)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl
new file mode 100644
index 000000000..4ae9adb0b
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "helpers.h"
+
+#define CONVERT_RTE(x, type) (convert_##type##_rte((x)))
+#define CONVERT_RTE_VEC_STR(x, type, size) (convert_##type##size##_rte((x)))
+#define CONVERT_RTE_VEC(x, type, size) CONVERT_RTE_VEC_STR(x, type, size)
+#define MIN_QUANT_VAL -127
+#define MAX_QUANT_VAL 127
+
+#if defined(VEC_SIZE) && defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT)
+
+/** This performs the quantization of floating point inputs to 8-bit unsigned integers.
+ *
+ * @note Input data type should be given as a preprocessor argument using -DDATA_TYPE_IN=type. e.g.
+ * -DDATA_TYPE=short
+ * @note Output data type should be given as a preprocessor argument using -DDATA_TYPE_OUT=type.
+ * e.g. -DDATA_TYPE=short
+ * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g.
+ * -DVEC_SIZE=16
+ * @note Quantization scale should be given as a preprocessor argument using -DSCALE=scale. e.g.
+ * -DSCALE=0.125
+ * @note Quantization offset should be given as a preprocessor argument using -DOFFSET=offset. e.g.
+ * -DOFFSET=125
+ * @note Minimum value for quantized type should be given as a preprocessor argument using
+ * -DMIN_QUANT_VAL=value. e.g. -DMIN_QUANT_VAL=0
+ * @note Maximum value for quantized type should be given as a preprocessor argument using
+ * -DMAX_QUANT_VAL=value. e.g. -DMAXIN_QUANT_VAL=255
+ *
+ * @param[in] input_ptr Pointer to the source tensor. Supported data
+ * types: F32
+ * @param[in] input_stride_x Stride of the source tensor in X dimension (in
+ * bytes)
+ * @param[in] input_step_x input_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] input_stride_y Stride of the source tensor in Y dimension (in
+ * bytes)
+ * @param[in] input_step_y input_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
+ * tensor
+ * @param[out] output_ptr Pointer to the destination tensor. Supported
+ * data types: S8
+ * @param[in] output_stride_x Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in] output_step_x output_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ * @param[in] output_stride_y Stride of the destination tensor in Y dimension
+ * (in bytes)
+ * @param[in] output_step_y output_stride_y * number of elements along Y
+ * processed per workitem(in bytes)
+ * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
+ * destination tensor
+ * @param[out] scale_ptr Pointer to the scale tensor. Supported data
+ * types: F32
+ * @param[in] scale_stride_x Stride of the destination tensor in X dimension
+ * (in bytes)
+ * @param[in] scale_step_x scale_stride_x * number of elements along X
+ * processed per workitem(in bytes)
+ */
+__kernel void quantization_symm8(IMAGE_DECLARATION(input), VECTOR_DECLARATION(scale),
+ IMAGE_DECLARATION(output))
+{
+ // Get pixels pointer
+ Image input = CONVERT_TO_IMAGE_STRUCT(input);
+ Image output = CONVERT_TO_IMAGE_STRUCT(output);
+
+#if defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
+ // Check if access on width gets out of bounds
+ // If it does shift access vector to access elements within bounds
+ const int xi = (int)(get_global_id(0) * VEC_SIZE);
+ input.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * input_stride_x;
+ output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x;
+
+ // Load data
+ VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE)
+ val = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr);
+
+ // Create scale vector
+ const VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE) vscale =
+ *(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1));
+
+ // Quantize
+ VEC_DATA_TYPE(int, VEC_SIZE)
+ res = CLAMP(CONVERT_RTE_VEC(val / vscale, int, VEC_SIZE), MIN_QUANT_VAL, MAX_QUANT_VAL);
+
+ // Store result
+ VSTORE(VEC_SIZE)
+ (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0, (__global DATA_TYPE_OUT *)output.ptr);
+#else //! defined(VEC_SIZE) || !defined(LAST_ACCESSED_X)
+ *((__global DATA_TYPE_OUT *)(output.ptr)) = (DATA_TYPE_OUT)CLAMP(
+ CONVERT_RTE((*(__global DATA_TYPE_IN *)input.ptr) /
+ (*(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1))),
+ int),
+ MIN_QUANT_VAL, MAX_QUANT_VAL);
+#endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X)
+}
+#endif // defined(VEC_SIZE) && defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl
index d7ea2e2c4..832ac1270 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/scale_factor.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/scale_factor.cl
new file mode 100644
index 000000000..3d5e90356
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/scale_factor.cl
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "helpers.h"
+
+#if defined(WIDTH)
+/** This function identifies the min and maximum value of an input 3D tensor.
+ *
+ * @note The width, height and depth of the input tensor must be provided at compile time using
+ * -DWIDTH, -DHEIGHT and -DDEPTH (e.g. -DWIDTH=320, -DHEIGHT=240, -DDEPTH=3)
+ *
+ * @param[in] src_ptr Pointer to the source tensor. Supported data types:
+ * F32
+ * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
+ * @param[in] src_step_x src_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
+ * @param[in] src_step_y src_stride_y * number of elements along Y processed
+ * per workitem(in bytes)
+ * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[in] dst_ptr Pointer to the min/max vector. Minimum value in
+ * position 0, maximum value in position 1. Supported data types: F32.
+ * @param[in] dst_stride_x Stride of the min/max vector in X dimension (in
+ * bytes)
+ * @param[in] dst_step_x dst_stride_x * number of elements along X processed
+ * per workitem(in bytes)
+ * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the min/max
+ * vector
+ */
+__kernel void scale_factor_symm8(IMAGE_DECLARATION(src), VECTOR_DECLARATION(dst))
+{
+ Image src = CONVERT_TO_IMAGE_STRUCT(src);
+
+ float4 min_value = (float4)FLT_MAX;
+ float4 max_value = (float4)-FLT_MAX;
+
+ int x = 0;
+ __global float *src_addr = (__global float *)(src.ptr);
+
+ for (; x <= (int)(WIDTH - 8); x += 8)
+ {
+ float8 value = vload8(0, (__global float *)(src_addr + x));
+
+ min_value = select(value.s0123, min_value, min_value < value.s0123);
+ min_value = select(value.s4567, min_value, min_value < value.s4567);
+
+ max_value = select(value.s0123, max_value, max_value > value.s0123);
+ max_value = select(value.s4567, max_value, max_value > value.s4567);
+ }
+
+ for (; x < WIDTH; ++x)
+ {
+ float value = *(src_addr + x);
+
+ min_value.s0 = min(min_value.s0, value);
+ max_value.s0 = max(max_value.s0, value);
+ }
+
+ // Perform min/max reduction
+ min_value.s01 = min(min_value.s01, min_value.s23);
+ min_value.s0 = min(min_value.s0, min_value.s1);
+ max_value.s01 = max(max_value.s01, max_value.s23);
+ max_value.s0 = max(max_value.s0, max_value.s1);
+
+ // Extract scale
+ max_value.s0 = max(fabs(min_value.s0), fabs(max_value.s0)) / 127.0f;
+
+ // Store min and max
+ *((__global float *)(dst_ptr) + get_global_id(1)) = max_value.s0;
+}
+#endif // defined(WIDTH)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl
index 7367da7fb..b1611043b 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BATCH_IN) && defined(HEIGHT_IN) && \
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl
index a26e762e8..eb612f834 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
#if defined(DATA_TYPE) && defined(DEPTH_IN) && defined(BLOCK_SIZE) && defined(Z_IN)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl
index 50472e4f9..3eb1a4ce7 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
__kernel void topkv2_init(VECTOR_DECLARATION(input), __global float *in_key_buf,
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl
index 9594daf19..460de790b 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "helpers.h"
__global inline float *get_vec_elem(Vector *vec, int idx)
diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl
index f6830d229..e9d4696b4 100644
--- a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl
+++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -15,6 +14,30 @@
* limitations under the License.
*/
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
// reference:
// https://code.google.com/archive/p/ocl-radix-sort/source/default/source
// OpenCL kernel sources for the CLRadixSort class
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp
index 7f4b5b0df..06eeb5b98 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
index c14e73634..bb5556888 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp
index 35f607bd0..01ea655b4 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLCastKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
@@ -52,8 +76,9 @@ void CLCastKernel::configure(const ICLTensor *input, ICLTensor *output, SubDataT
// Create kernel
if (is_data_type_quantized_asymmetric(input->info()->data_type()))
{
- const float scale_in = input->info()->quantization_info().scale;
- const int offset_in = input->info()->quantization_info().offset;
+ UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
+ const float scale_in = qinfo.scale;
+ const int offset_in = qinfo.offset;
build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in));
build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in));
@@ -62,8 +87,10 @@ void CLCastKernel::configure(const ICLTensor *input, ICLTensor *output, SubDataT
}
else if (is_data_type_quantized_asymmetric(output->info()->data_type()))
{
- const float scale_in = output->info()->quantization_info().scale;
- const int offset_in = output->info()->quantization_info().offset;
+ UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
+ const float scale_in = qinfo.scale;
+ const float offset_in = qinfo.offset;
+
build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in));
build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in));
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp
index 2a3433c2b..389136817 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
index 0862b78bf..79f5ce065 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.cpp
new file mode 100644
index 000000000..235e8975d
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.cpp
@@ -0,0 +1,372 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/AccessWindowTranspose.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "support/ToolchainSupport.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <tuple>
+
+using namespace arm_compute;
+using namespace arm_compute::misc::shape_calculator;
+
+namespace arm_compute
+{
+class Coordinates;
+} // namespace arm_compute
+
+namespace
+{
+using ElementsProcessed = Steps;
+
+Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1,
+ const ITensorInfo *output, const GEMMReshapeInfo &gemm_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::S8);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input0->num_dimensions() > 4,
+ "The number of dimensions for the matrix A must be <= 4");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 3,
+ "The number of dimensions for the matrix B must be <= 3");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 2 &&
+ gemm_info.reinterpret_input_as_3d(),
+ "The input1 tensor cannot have more than 2 dimensions if input0 "
+ "has to be reinterpreted as 3D");
+
+ const int m = gemm_info.m();
+ const int n = gemm_info.n();
+ const int k = gemm_info.k();
+
+ ARM_COMPUTE_UNUSED(m);
+ ARM_COMPUTE_UNUSED(n);
+ ARM_COMPUTE_UNUSED(k);
+
+ ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != static_cast<unsigned int>(k));
+ ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) != static_cast<unsigned int>(n));
+ ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(1) != static_cast<unsigned int>(k));
+ if (gemm_info.reinterpret_input_as_3d())
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) * input0->dimension(2) !=
+ static_cast<unsigned int>(m));
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != static_cast<unsigned int>(m));
+ }
+
+ if (output->total_size() != 0)
+ {
+ const TensorInfo tensor_info_output =
+ output->clone()->set_tensor_shape(compute_mm_shape(*input0, *input1, false, gemm_info));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32);
+ }
+
+ return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1,
+ ITensorInfo *output,
+ const GEMMReshapeInfo &gemm_info,
+ ElementsProcessed &num_elements_processed)
+{
+ unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
+ unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];
+ bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
+ bool reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0);
+
+ Window win{};
+ Window win_out{};
+ bool window_changed = false;
+
+ // In case both input and output have to be reinterpreted as 3D tensors,
+ // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
+ if (reinterpret_input_as_3d == reinterpret_output_as_3d)
+ {
+ reinterpret_input_as_3d = false;
+ reinterpret_output_as_3d = false;
+ }
+
+ // Output tensor auto inizialitation if not yet initialized
+ auto_init_if_empty(*output,
+ input0->clone()
+ ->set_tensor_shape(compute_mm_shape(*input0, *input1, false, gemm_info))
+ .set_data_type(DataType::S32));
+
+ TensorInfo tmp_info(*output);
+
+ if (reinterpret_output_as_3d)
+ {
+ // Since the output tensor has to be reinterpreted as 3D and the execute window is based on a 2D
+ // GEMM,
+ // the window needs to be constructed on the 2D collapsed version of the tensor
+ TensorShape tmp_shape(output->tensor_shape());
+ tmp_shape.collapse(2U, 1U);
+ tmp_info.set_tensor_shape(tmp_shape);
+ }
+
+ // Special case for 1xN, 2xN, 3xN and 4xN input0 tensor. num_elems_processed_per_iteration_x
+ // Note: if the dot product instruction is available, the 8x2 tile has to be used
+ num_elems_processed_per_iteration_x = 4;
+ num_elems_processed_per_iteration_y = std::min(static_cast<int>(output->dimension(1)), 4);
+
+ // Note: bottom paddings are calculated manually as the output can be reinterpreted as 3D tensor
+ // The only way to set properly the paddings, it is to set those explicitly through the
+ // AccessWindowStatic
+ const int m = reinterpret_input_as_3d ? input0->tensor_shape()[1] * input0->tensor_shape()[2]
+ : input0->tensor_shape()[1];
+ const int bottom_pad =
+ (num_elems_processed_per_iteration_y - (m % num_elems_processed_per_iteration_y)) %
+ num_elems_processed_per_iteration_y;
+
+ // Configure window
+ win = calculate_max_window(
+ tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
+ win_out = calculate_max_window(
+ *output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
+
+ AccessWindowStatic input0_access(input0, 0, 0, input0->dimension(0),
+ input0->dimension(1) + bottom_pad);
+ AccessWindowStatic input1_access(
+ input1, 0, 0, ceil_to_multiple(input1->dimension(0), num_elems_processed_per_iteration_x),
+ input1->dimension(1));
+ AccessWindowStatic output_access(
+ output, 0, 0, ceil_to_multiple(output->dimension(0), num_elems_processed_per_iteration_x),
+ output->dimension(1) + bottom_pad);
+
+ window_changed =
+ update_window_and_padding(win, input0_access,
+ input1_access) || // window used by the execute_window_loop
+ update_window_and_padding(
+ win_out,
+ output_access); // window used to update the padding requirements of output tensor
+
+ Coordinates coord;
+ coord.set_num_dimensions(output->num_dimensions());
+ output_access.set_valid_region(win_out, ValidRegion(coord, output->tensor_shape()));
+
+ // Collapse along the Z direction
+ // This collapse needs to be here in order to tune the Z dimension of LWS
+ Window collapsed = win;
+ const unsigned int dimension_to_collapse =
+ std::min(static_cast<unsigned int>(output->num_dimensions()), 2u);
+ collapsed = win.collapse(win, dimension_to_collapse);
+
+ Status err = (window_changed)
+ ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+ : Status{};
+ return std::make_pair(err, collapsed);
+}
+} // namespace
+
+CLGEMMLowpMatrixMultiplyKernelEx::CLGEMMLowpMatrixMultiplyKernelEx()
+ : _input0(nullptr), _input1(nullptr), _output(nullptr), _slide_matrix_b(true),
+ _reinterpret_input_as_3d(false), _reinterpret_output_as_3d(false)
+{
+}
+
+void CLGEMMLowpMatrixMultiplyKernelEx::configure(const ICLTensor *input0, const ICLTensor *input1,
+ ICLTensor *output,
+ const GEMMReshapeInfo &gemm_info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
+
+ ARM_COMPUTE_ERROR_THROW_ON(
+ validate_arguments(input0->info(), input1->info(), output->info(), gemm_info));
+
+ _input0 = input0;
+ _input1 = input1;
+ _output = output;
+ _reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
+ _reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0);
+
+ // In case both input and output have to be reinterpreted as 3D tensors,
+ // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false.
+ if (_reinterpret_input_as_3d == _reinterpret_output_as_3d)
+ {
+ _reinterpret_input_as_3d = false;
+ _reinterpret_output_as_3d = false;
+ }
+
+ // Check if we need to slide the matrix B
+ const unsigned int num_dimensions_input0 = _reinterpret_input_as_3d
+ ? _input0->info()->num_dimensions() - 1
+ : _input0->info()->num_dimensions();
+ _slide_matrix_b = (_input1->info()->num_dimensions() >= num_dimensions_input0);
+
+ ElementsProcessed num_elements_processed{};
+
+ // Configure kernel window
+ auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(),
+ gemm_info, num_elements_processed);
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ ICLKernel::configure_internal(win_config.second);
+
+ // Create build options
+ std::string kernel_name(" ");
+ CLBuildOptions build_opts;
+ build_opts.add_option_if(_reinterpret_input_as_3d, "-DREINTERPRET_INPUT_AS_3D");
+ build_opts.add_option_if(_reinterpret_output_as_3d, "-DREINTERPRET_OUTPUT_AS_3D");
+ build_opts.add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d,
+ "-DHEIGHT_GEMM3D=" +
+ support::cpp11::to_string(output->info()->dimension(1)));
+ build_opts.add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d,
+ "-DDEPTH_GEMM3D=" +
+ support::cpp11::to_string(output->info()->dimension(2)));
+ build_opts.add_option_if(!_slide_matrix_b,
+ "-DMATRIX_B_DEPTH=" +
+ support::cpp11::to_string(input1->info()->dimension(2)));
+ build_opts.add_option("-DCOLS_A=" + support::cpp11::to_string(input0->info()->dimension(0)));
+ build_opts.add_option("-DNUM_ELEMS_PROCESSED_PER_THREAD_X=" +
+ support::cpp11::to_string(num_elements_processed.x()));
+ build_opts.add_option("-DNUM_ELEMS_PROCESSED_PER_THREAD_Y=" +
+ support::cpp11::to_string(num_elements_processed.y()));
+
+ kernel_name = "gemmlowp_mm_midgard_ex";
+
+ // Create kernel
+ _kernel = static_cast<cl::Kernel>(
+ CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options()));
+
+ // Set config_id for enabling LWS tuning
+ _config_id = kernel_name;
+ _config_id += "_";
+ _config_id += (_reinterpret_input_as_3d ? "3di_" : "");
+ _config_id += (_reinterpret_output_as_3d ? "3do_" : "");
+ _config_id += lower_string(string_from_data_type(input0->info()->data_type()));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(output->info()->dimension(1));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(output->info()->dimension(0));
+}
+
+Status CLGEMMLowpMatrixMultiplyKernelEx::validate(const ITensorInfo *input0,
+ const ITensorInfo *input1,
+ const ITensorInfo *output,
+ const GEMMReshapeInfo &gemm_info)
+{
+ ElementsProcessed num_elements_processed{};
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output, gemm_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_and_configure_window(input0->clone().get(), input1->clone().get(),
+ output->clone().get(), gemm_info, num_elements_processed)
+ .first);
+
+ return Status{};
+}
+
+void CLGEMMLowpMatrixMultiplyKernelEx::run(const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ if (_input1->info()->num_dimensions() < 3)
+ {
+ // The stride_z for matrix B must be zero if we do not slice
+ ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
+ }
+
+ Window slice = window.first_slice_window_3D();
+ Window slice_matrix_b = slice;
+
+ slice_matrix_b.set(Window::DimX, Window::Dimension(0, 1, 1));
+ slice_matrix_b.set(Window::DimY, Window::Dimension(0, 1, 1));
+
+ if (_reinterpret_input_as_3d)
+ {
+ // Pass bottom paddings to the kernel if the input has to be reinterpreted as 3D tensor
+ const unsigned int idx0 = 3 * num_arguments_per_2D_tensor() + 3;
+ const unsigned int total_cross_plane_pad =
+ _input0->info()->padding().top + _input0->info()->padding().bottom;
+ _kernel.setArg<cl_uint>(idx0, static_cast<unsigned int>(total_cross_plane_pad));
+ }
+
+ if (_reinterpret_output_as_3d)
+ {
+ // Pass bottom paddings to the kernel if the output has to be reinterpreted as 3D tensor
+ const unsigned int idx0 =
+ 3 * num_arguments_per_2D_tensor() + 3 + (_reinterpret_input_as_3d ? 1 : 0);
+ const unsigned int total_cross_plane_pad =
+ _output->info()->padding().top + _output->info()->padding().bottom;
+ _kernel.setArg<cl_uint>(idx0, static_cast<unsigned int>(total_cross_plane_pad));
+ }
+
+ do
+ {
+ Window slice_b = slice;
+ // Don't slice matrix B along the z dimension if matrix B has just 2 dimensions and matrix A
+ // more than 2
+ // This scenario can happen when the matrix multiplication is used to perform a convolution
+ // operation
+ if (!_slide_matrix_b)
+ {
+ slice_b = slice_matrix_b;
+ }
+
+ unsigned int idx = 0;
+ add_2D_tensor_argument(idx, _input0, slice);
+ add_2D_tensor_argument(idx, _input1, slice_b);
+ add_2D_tensor_argument(idx, _output, slice);
+ _kernel.setArg<cl_uint>(idx++,
+ static_cast<unsigned int>(_input0->info()->strides_in_bytes()[2]));
+ _kernel.setArg<cl_uint>(idx++,
+ static_cast<unsigned int>(_input1->info()->strides_in_bytes()[2]));
+ _kernel.setArg<cl_uint>(idx++,
+ static_cast<unsigned int>(_output->info()->strides_in_bytes()[2]));
+ enqueue(queue, *this, slice, lws_hint());
+ } while (window.slide_window_slice_3D(slice));
+}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp
index 718f615f9..3a25987d0 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLGatherExKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
index 31e98c9a8..7fbdcdaa7 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLHashtableLookupKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp
index 5db414f62..b45f6bb24 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp
new file mode 100644
index 000000000..d305896ea
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/CLValidate.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+using namespace arm_compute;
+
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *scale_factor,
+ const ITensorInfo *output)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scale_factor, 1, DataType::F16,
+ DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->num_dimensions() > 1);
+ ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->dimension(0) != input->dimension(1));
+ ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+
+ // Checks performed when output is configured
+ if ((output->total_size() != 0))
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+ }
+
+ return Status{};
+}
+
+std::tuple<Status, Window> validate_and_configure_window(const ITensorInfo *input,
+ ITensorInfo *output)
+{
+ // Configure kernel window
+ Window win = calculate_max_window(*input, Steps());
+
+ // Output tensor auto initialization if not yet initialized
+ auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::F32);
+
+ // CLMultiplyScaleFactorKernel doesn't need padding so update_window_and_padding() can be
+ // skipped
+ Coordinates coord;
+ coord.set_num_dimensions(output->num_dimensions());
+ output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
+
+ return std::make_tuple(Status{}, win);
+}
+} // namespace
+
+CLMultiplyScaleFactorKernel::CLMultiplyScaleFactorKernel()
+ : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f)
+{
+}
+
+void CLMultiplyScaleFactorKernel::configure(const ICLTensor *input, const ICLTensor *scale_factor,
+ ICLTensor *output, float multiplier)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_ERROR_THROW_ON(
+ validate_arguments(input->info(), scale_factor->info(), output->info()));
+
+ _input = input;
+ _scale_factor = scale_factor;
+ _output = output;
+ _multiplier = multiplier;
+
+ const int vec_size_x = 16 / output->info()->element_size();
+ const int output_width_x = output->info()->tensor_shape().x();
+ const bool multi_access_x = (output_width_x / vec_size_x > 0);
+
+ // Create and update the window (if needed)
+ Window win = calculate_max_window(*output->info());
+ if (multi_access_x)
+ {
+ win.set(Window::DimX,
+ Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x),
+ vec_size_x));
+ }
+ ICLKernel::configure_internal(win);
+
+ // Create kernel
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type()));
+ build_opts.add_option_if(
+ multi_access_x, "-DLAST_ACCESSED_X=" +
+ support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
+
+ _kernel = static_cast<cl::Kernel>(
+ CLKernelLibraryEx::get().create_kernel("multiply_scale_factor", build_opts.options()));
+}
+
+Status CLMultiplyScaleFactorKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *scale_factor,
+ const ITensorInfo *output)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, scale_factor, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
+ return Status{};
+}
+
+void CLMultiplyScaleFactorKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
+ Window slice = window_collapsed.first_slice_window_2D();
+
+ // Set scale_factor window
+ Window win_scale = calculate_max_window(*_scale_factor->info(), Steps());
+
+ do
+ {
+ unsigned int idx = 0;
+ add_2D_tensor_argument(idx, _input, slice);
+ add_1D_tensor_argument(idx, _scale_factor, win_scale);
+ add_2D_tensor_argument(idx, _output, slice);
+ _kernel.setArg<float>(idx++, _multiplier);
+ enqueue(queue, *this, slice, lws_hint());
+ } while (window_collapsed.slide_window_slice_2D(slice));
+}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
index ecfe05a51..74f7b4158 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLNegKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp
index e7d587029..8910a7b80 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLPReLUKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
@@ -72,18 +96,18 @@ void CLPReLUKernel::configure(const ICLTensor *input, const ICLTensor *alpha, IC
if (is_data_type_quantized_asymmetric(input->info()->data_type()))
{
- build_opts.emplace("-DOFF_IN=" +
- support::cpp11::to_string(input->info()->quantization_info().offset));
- build_opts.emplace("-DOFF_ALPHA=" +
- support::cpp11::to_string(alpha->info()->quantization_info().offset));
- build_opts.emplace("-DOFF_OUT=" +
- support::cpp11::to_string(output->info()->quantization_info().offset));
- build_opts.emplace("-DSCALE_IN=" +
- support::cpp11::to_string(input->info()->quantization_info().scale));
- build_opts.emplace("-DSCALE_ALPHA=" +
- support::cpp11::to_string(alpha->info()->quantization_info().scale));
- build_opts.emplace("-DSCALE_OUT=" +
- support::cpp11::to_string(output->info()->quantization_info().scale));
+ build_opts.emplace("-DOFF_IN=" + support::cpp11::to_string(
+ input->info()->quantization_info().uniform().offset));
+ build_opts.emplace("-DOFF_ALPHA=" + support::cpp11::to_string(
+ alpha->info()->quantization_info().uniform().offset));
+ build_opts.emplace("-DOFF_OUT=" + support::cpp11::to_string(
+ output->info()->quantization_info().uniform().offset));
+ build_opts.emplace("-DSCALE_IN=" + support::cpp11::to_string(
+ input->info()->quantization_info().uniform().scale));
+ build_opts.emplace("-DSCALE_ALPHA=" + support::cpp11::to_string(
+ alpha->info()->quantization_info().uniform().scale));
+ build_opts.emplace("-DSCALE_OUT=" + support::cpp11::to_string(
+ output->info()->quantization_info().uniform().scale));
kernel_name += "_qasymm8";
}
_kernel =
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp
new file mode 100644
index 000000000..2d551f654
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/CLValidate.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+namespace arm_compute
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *scale_factor,
+ const ITensorInfo *output)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, scale_factor);
+ ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->num_dimensions() > 1);
+ ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->dimension(0) != input->dimension(1));
+
+ // Output must always be initialized
+ ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S8);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+
+ return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
+{
+ // Configure kernel window
+ Window win = calculate_max_window(*input, Steps());
+
+ const int vec_size_x = 16 / input->element_size();
+ const int input_width_x = input->tensor_shape().x();
+ const bool multi_access_x = (input_width_x / vec_size_x > 0);
+
+ if (multi_access_x)
+ {
+ win.set(Window::DimX,
+ Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x),
+ vec_size_x));
+ }
+
+ Coordinates coord;
+ coord.set_num_dimensions(output->num_dimensions());
+ output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
+
+ return std::make_pair(Status{}, win);
+}
+} // namespace
+
+CLQuantizationSymmetricKernel::CLQuantizationSymmetricKernel()
+ : _input(nullptr), _scale_factor(nullptr), _output(nullptr)
+{
+}
+
+void CLQuantizationSymmetricKernel::configure(const ICLTensor *input, const ICLTensor *scale_factor,
+ ICLTensor *output)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, scale_factor, output);
+ ARM_COMPUTE_ERROR_THROW_ON(
+ validate_arguments(input->info(), scale_factor->info(), output->info()));
+
+ _input = input;
+ _scale_factor = scale_factor;
+ _output = output;
+
+ const int vec_size_x = 16 / input->info()->element_size();
+ const int input_width_x = input->info()->tensor_shape().x();
+ const bool multi_access_x = (input_width_x / vec_size_x > 0);
+
+ // Configure kernel window
+ auto win_config = validate_and_configure_window(input->info(), output->info());
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ ICLKernel::configure_internal(win_config.second);
+
+ // Create kernel
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
+ build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
+ build_opts.add_option("-DDATA_TYPE_OUT=" +
+ get_cl_type_from_data_type(output->info()->data_type()));
+ build_opts.add_option_if(
+ multi_access_x, "-DLAST_ACCESSED_X=" +
+ support::cpp11::to_string(std::max<int>(input_width_x - vec_size_x, 0)));
+
+ _kernel = static_cast<cl::Kernel>(
+ CLKernelLibraryEx::get().create_kernel("quantization_symm8", build_opts.options()));
+}
+
+Status CLQuantizationSymmetricKernel::validate(const ITensorInfo *input,
+ const ITensorInfo *scale_factor,
+ const ITensorInfo *output)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, scale_factor, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_and_configure_window(input->clone().get(), output->clone().get()).first);
+
+ return Status{};
+}
+
+void CLQuantizationSymmetricKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ // Support only 2D
+ Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
+ Window slice = window_collapsed.first_slice_window_2D();
+
+ do
+ {
+ Window scale_slice = slice.shift_dimensions(1);
+
+ unsigned int idx = 0;
+ add_2D_tensor_argument(idx, _input, slice);
+ add_1D_tensor_argument(idx, _scale_factor, scale_slice);
+ add_2D_tensor_argument(idx, _output, slice);
+ enqueue(queue, *this, slice, lws_hint());
+ } while (window_collapsed.slide_window_slice_2D(slice));
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
index 24e89db28..a98318323 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp
new file mode 100644
index 000000000..ff1904abd
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibraryEx.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
+#include <climits>
+
+using namespace arm_compute;
+using namespace arm_compute::misc::shape_calculator;
+
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2);
+
+ if (output->tensor_shape().total_size() > 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+
+ TensorShape output_shape = TensorShape{input->dimension(1)};
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
+ }
+
+ return Status{};
+}
+
+std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
+{
+ TensorShape output_shape = TensorShape{input->dimension(1)};
+
+ // Output auto initialization if not yet initialized
+ auto_init_if_empty(*output, output_shape, 1, input->data_type());
+
+ const unsigned int num_elems_processed_per_iteration = 1;
+
+ // Configure kernel window
+ Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
+ AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
+ AccessWindowStatic output_access(output, 0, 0, output->dimension(0), 1);
+
+ bool window_changed = update_window_and_padding(win, input_access, output_access);
+
+ output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
+
+ Status err = (window_changed)
+ ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
+ : Status{};
+ return std::make_tuple(err, win);
+}
+} // namespace
+
+CLScaleFactorSymm8Kernel::CLScaleFactorSymm8Kernel() : _input(nullptr), _output(nullptr) {}
+
+void CLScaleFactorSymm8Kernel::configure(const ICLTensor *input, ICLTensor *output)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
+
+ _input = input;
+ _output = output;
+
+ std::set<std::string> build_opts;
+ build_opts.emplace("-DWIDTH=" + support::cpp11::to_string(input->info()->dimension(0)));
+
+ // Create kernel
+ _kernel = static_cast<cl::Kernel>(
+ CLKernelLibraryEx::get().create_kernel("scale_factor_symm8", build_opts));
+
+ auto win_config = validate_and_configure_window(input->info(), output->info());
+
+ ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
+
+ ICLKernel::configure_internal(std::get<1>(win_config));
+}
+
+Status CLScaleFactorSymm8Kernel::validate(const ITensorInfo *input, const ITensorInfo *output)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
+
+ return Status{};
+}
+
+void CLScaleFactorSymm8Kernel::run(const Window &window, cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+ Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
+ Window slice = window_collapsed.first_slice_window_2D();
+ slice.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ do
+ {
+ Window output_slice = slice.shift_dimensions(1);
+
+ unsigned int idx = 0;
+ // Set inputs
+ add_2D_tensor_argument(idx, _input, slice);
+ add_1D_tensor_argument(idx, _output, output_slice);
+ enqueue(queue, *this, slice, lws_hint());
+ } while (window_collapsed.slide_window_slice_2D(slice));
+}
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp
index f7836b6cd..8b9b57fd8 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
@@ -147,8 +171,8 @@ void CLSpaceToBatchNDKernel::configure(const ICLTensor *input, const ICLTensor *
build_opts.emplace("-DBATCH_IN=" + support::cpp11::to_string(input->info()->dimension(3)));
if (input->info()->data_type() == DataType::QASYMM8)
{
- build_opts.emplace("-DZERO_VALUE=" +
- support::cpp11::to_string(input->info()->quantization_info().offset));
+ build_opts.emplace("-DZERO_VALUE=" + support::cpp11::to_string(
+ input->info()->quantization_info().uniform().offset));
}
else
{
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp
index b085192a2..64fc0384e 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp
index 4f2b388c9..151d45e8d 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLTopKV2Kernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp
index 6cc8d9d13..61999cbd4 100644
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017-2019 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
diff --git a/compute/ARMComputeEx/src/core/CPP/kernels/CPPOneHotKernelEx.cpp b/compute/ARMComputeEx/src/core/CPP/kernels/CPPOneHotKernelEx.cpp
new file mode 100644
index 000000000..d6c49b2b4
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/CPP/kernels/CPPOneHotKernelEx.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h"
+#include "arm_compute/core/Coordinates.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/core/utils/misc/Traits.h"
+
+namespace arm_compute
+{
+CPPOneHotKernelEx::CPPOneHotKernelEx()
+ : _indices(nullptr), _output(nullptr), _depth(0), _on_value(0), _off_value(0), _axis(-1)
+{
+}
+
+void CPPOneHotKernelEx::configure(const ITensor *indices, ITensor *output, const int depth,
+ const float on_value, const float off_value, const int axis)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(indices, output);
+ ARM_COMPUTE_ERROR_THROW_ON(validate(indices, depth, on_value, off_value, axis));
+
+ _indices = indices;
+ _output = output;
+ _depth = depth;
+ _on_value = on_value;
+ _off_value = off_value;
+ _axis = axis;
+
+ ICPPKernel::configure(Window()); // Default 1 iteration window
+}
+
+Status CPPOneHotKernelEx::validate(const ITensor *indices, const int depth, const float on_value,
+ const float off_value, const int axis)
+{
+ ARM_COMPUTE_UNUSED(on_value, off_value);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(indices, DataType::S32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(indices->info()->num_dimensions() != 1,
+ "Only 1D indices are supported.");
+ ARM_COMPUTE_RETURN_ERROR_ON(depth <= 0);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis != -1, "Only axis = -1 is supported.");
+ return Status{};
+}
+
+bool CPPOneHotKernelEx::is_parallelisable() const { return false; }
+
+void CPPOneHotKernelEx::run(const Window &window, const ThreadInfo &info)
+{
+ ARM_COMPUTE_UNUSED(info);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IKernel::window(), window);
+
+ const auto num_indices = _indices->info()->dimension(0);
+ for (size_t i = 0; i < num_indices; ++i)
+ {
+ const auto index = *reinterpret_cast<int32_t *>(_indices->ptr_to_element(Coordinates{i}));
+ for (int d = 0; d < _depth; ++d)
+ *reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(d, i))) =
+ (d == index) ? _on_value : _off_value;
+ }
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp b/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp
index 8ac667ceb..648afb304 100644
--- a/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h"
#include "arm_compute/core/Error.h"
@@ -81,7 +97,7 @@ void CPPUpsampleKernelEx::run(const Window &window, const ThreadInfo &info)
// The fill value is normally 0, but for QASYMM8 the '0' corresponds to the offset
const uint8_t fill_value =
_output->info()->data_type() == DataType::QASYMM8
- ? utility::clamp<uint8_t>(_output->info()->quantization_info().offset)
+ ? utility::clamp<uint8_t>(_output->info()->quantization_info().uniform().offset)
: 0;
// Filling a value different than 0 works only for QASYMM8 datatype since we are filling 1byte
// values in a buffer of uint8_ts
diff --git a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp b/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp
index 4508f5800..254c33ea9 100644
--- a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -221,8 +236,9 @@ void elementwise_op_quantized(
const auto window_end_x = static_cast<int>(window.x().end());
const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
- const float output_scale = out->info()->quantization_info().scale;
- const int output_offset = out->info()->quantization_info().offset;
+ UniformQuantizationInfo qinfo = out->info()->quantization_info().uniform();
+ const float output_scale = qinfo.scale;
+ const int output_offset = qinfo.offset;
// Output quantization info (add 0.5 to round toward the nearest integer - 0.5 rounds away from
// zero)
@@ -238,8 +254,10 @@ void elementwise_op_quantized(
const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1;
const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1;
- const QuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info();
- const QuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info();
+ const UniformQuantizationInfo broadcast_qinfo =
+ broadcast_tensor->info()->quantization_info().uniform();
+ const UniformQuantizationInfo non_broadcast_qinfo =
+ non_broadcast_tensor->info()->quantization_info().uniform();
const int32x4_t voffset_non_broadcast = vdupq_n_s32(non_broadcast_qinfo.offset);
const float32x4_t vscale_non_broadcast = vdupq_n_f32(non_broadcast_qinfo.scale);
@@ -269,10 +287,8 @@ void elementwise_op_quantized(
for (; x < window_end_x; ++x)
{
const float afs =
- scvt_f32_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo.scale,
- non_broadcast_qinfo.offset);
- const float bfs =
- scvt_f32_qasymm8(broadcast_value, broadcast_qinfo.scale, broadcast_qinfo.offset);
+ dequantize_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo);
+ const float bfs = dequantize_qasymm8(broadcast_value, broadcast_qinfo);
*(output_ptr + x) =
(*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs,
out->info()->quantization_info());
@@ -283,12 +299,14 @@ void elementwise_op_quantized(
else
{
// Input1 quantization info
- const int32x4_t voffset1 = vdupq_n_s32(in1->info()->quantization_info().offset);
- const float32x4_t vscale1 = vdupq_n_f32(in1->info()->quantization_info().scale);
+ UniformQuantizationInfo qinfo = in1->info()->quantization_info().uniform();
+ const int32x4_t voffset1 = vdupq_n_s32(qinfo.offset);
+ const float32x4_t vscale1 = vdupq_n_f32(qinfo.scale);
// Input2 quantization info
- const int32x4_t voffset2 = vdupq_n_s32(in2->info()->quantization_info().offset);
- const float32x4_t vscale2 = vdupq_n_f32(in2->info()->quantization_info().scale);
+ qinfo = in2->info()->quantization_info().uniform();
+ const int32x4_t voffset2 = vdupq_n_s32(qinfo.offset);
+ const float32x4_t vscale2 = vdupq_n_f32(qinfo.scale);
// Clear X Dimension on execution window as we handle manually
input1_win.set(Window::DimX, Window::Dimension(0, 1, 1));
@@ -301,26 +319,24 @@ void elementwise_op_quantized(
Iterator input2(in2, input2_win);
Iterator output(out, win);
- execute_window_loop(
- win,
- [&](const Coordinates &) {
- const auto input1_ptr = reinterpret_cast<const uint8_t *>(input1.ptr());
- const auto input2_ptr = reinterpret_cast<const uint8_t *>(input2.ptr());
- const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
+ execute_window_loop(win,
+ [&](const Coordinates &) {
+ const auto input1_ptr = reinterpret_cast<const uint8_t *>(input1.ptr());
+ const auto input2_ptr = reinterpret_cast<const uint8_t *>(input2.ptr());
+ const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
- int x =
- (*neon_func)(window_start_x, window_end_x, window_step_x, input1_ptr, input2_ptr,
- output_ptr, voffset1, voffset2, vscale1, vscale2, voffseto, invvscaleo);
- for (; x < window_end_x; ++x)
- {
- const float afs =
- scvt_f32_qasymm8(*(input1_ptr + x), input1_qinfo.scale, input1_qinfo.offset);
- const float bfs =
- scvt_f32_qasymm8(*(input2_ptr + x), input2_qinfo.scale, input2_qinfo.offset);
- *(output_ptr + x) = (*scalar_func)(afs, bfs, out->info()->quantization_info());
- }
- },
- input1, input2, output);
+ int x = (*neon_func)(window_start_x, window_end_x, window_step_x,
+ input1_ptr, input2_ptr, output_ptr, voffset1,
+ voffset2, vscale1, vscale2, voffseto, invvscaleo);
+ for (; x < window_end_x; ++x)
+ {
+ const float afs = dequantize_qasymm8(*(input1_ptr + x), input1_qinfo);
+ const float bfs = dequantize_qasymm8(*(input2_ptr + x), input2_qinfo);
+ *(output_ptr + x) =
+ (*scalar_func)(afs, bfs, out->info()->quantization_info());
+ }
+ },
+ input1, input2, output);
}
}
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEActivationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEActivationLayerKernelEx.cpp
new file mode 100644
index 000000000..648705ba9
--- /dev/null
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEActivationLayerKernelEx.cpp
@@ -0,0 +1,730 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h"
+
+#include "arm_compute/core/CPP/Validate.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/NEAsymm.h"
+#include "arm_compute/core/NEON/NEFixedPoint.h"
+#include "arm_compute/core/NEON/NEMath.h"
+#include "arm_compute/core/NEON/NESymm.h"
+#include "arm_compute/core/NEON/wrapper/wrapper.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include <arm_neon.h>
+#include <array>
+#include <cmath>
+#include <map>
+#include <set>
+
+using namespace arm_compute;
+namespace
+{
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
+ const ActivationLayerInfo &activation_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
+ input, 1, DataType::U8, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
+
+ static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations = {
+ ActivationLayerInfo::ActivationFunction::RELU,
+ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+ ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
+ ActivationLayerInfo::ActivationFunction::LOGISTIC,
+ ActivationLayerInfo::ActivationFunction::TANH};
+ static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations = {
+ ActivationLayerInfo::ActivationFunction::LOGISTIC,
+ ActivationLayerInfo::ActivationFunction::TANH};
+ const DataType data_type = input->data_type();
+ const QuantizationInfo &oq_info =
+ (output != nullptr) ? output->quantization_info() : input->quantization_info();
+ const ActivationLayerInfo::ActivationFunction f_act = activation_info.activation();
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(
+ is_data_type_quantized_asymmetric(data_type) &&
+ (qasymm8_supported_activations.count(f_act) == 0),
+ "For QASYMM8 only tanh, logistic, relu and lower/upper bounded relu are supported");
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) &&
+ (qsymm16_supported_activations.count(f_act) == 0),
+ "For QSYMM16 only tanh and logistic are supported");
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) &&
+ (f_act == ActivationLayerInfo::ActivationFunction::TANH) &&
+ (oq_info != QuantizationInfo(1.f / 128.f, 128)));
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) &&
+ (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) &&
+ (oq_info != QuantizationInfo(1.f / 256.f, 0)));
+
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) &&
+ (f_act == ActivationLayerInfo::ActivationFunction::TANH) &&
+ (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
+ ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) &&
+ (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) &&
+ (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
+
+ // Checks performed when output is configured
+ if ((output != nullptr) && (output->total_size() != 0))
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ }
+
+ return Status{};
+}
+
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
+{
+ // Configure kernel window
+ Window win = calculate_max_window(*input, Steps());
+
+ if (output != nullptr)
+ {
+ // Output auto inizialitation if not yet initialized
+ auto_init_if_empty(*output, *input->clone());
+
+ // NEActivationLayerKernelEx doesn't need padding so update_window_and_padding() can be skipped
+ Coordinates coord;
+ coord.set_num_dimensions(output->num_dimensions());
+ output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
+ }
+
+ return std::make_pair(Status{}, win);
+}
+
+inline uint32x4_t vreinterpret_unsigend_int(const float32x4_t &vec)
+{
+ return vreinterpretq_u32_f32(vec);
+}
+
+inline float32x4_t vreinterpret_floating_point(const uint32x4_t &vec)
+{
+ return vreinterpretq_f32_u32(vec);
+}
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+inline uint16x8_t vreinterpret_unsigend_int(const float16x8_t &vec)
+{
+ return vreinterpretq_u16_f16(vec);
+}
+inline float16x8_t vreinterpret_floating_point(const uint16x8_t &vec)
+{
+ return vreinterpretq_f16_u16(vec);
+}
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
+} // namespace
+
+NEActivationLayerKernelEx::NEActivationLayerKernelEx()
+ : _input(nullptr), _output(nullptr), _func(nullptr), _act_info()
+{
+}
+
+void NEActivationLayerKernelEx::configure(ITensor *input, ITensor *output,
+ ActivationLayerInfo activation_info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input);
+
+ _input = input;
+ _act_info = activation_info;
+ _output = input;
+
+ // Out-of-place calculation
+ if (output != nullptr)
+ {
+ _output = output;
+ }
+
+ // Disabled activation, thus no operation needed
+ if (!activation_info.enabled())
+ {
+ _func = nullptr;
+ }
+
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(
+ input->info(), (output != nullptr) ? output->info() : nullptr, activation_info));
+
+ // Activation functions : FP32
+ static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 = {
+ {ActivationFunction::ABS,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::ABS, float>},
+ {ActivationFunction::LINEAR,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LINEAR, float>},
+ {ActivationFunction::LOGISTIC,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, float>},
+ {ActivationFunction::RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::RELU, float>},
+ {ActivationFunction::BOUNDED_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::BOUNDED_RELU, float>},
+ {ActivationFunction::LU_BOUNDED_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LU_BOUNDED_RELU, float>},
+ {ActivationFunction::LEAKY_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LEAKY_RELU, float>},
+ {ActivationFunction::SOFT_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::SOFT_RELU, float>},
+ {ActivationFunction::ELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::ELU, float>},
+ {ActivationFunction::SQRT,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::SQRT, float>},
+ {ActivationFunction::SQUARE,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::SQUARE, float>},
+ {ActivationFunction::TANH,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, float>},
+ {ActivationFunction::IDENTITY,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::IDENTITY, float>},
+ };
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ // Activation functions : FP16
+ static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f16 = {
+ {ActivationFunction::ABS,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::ABS, float16_t>},
+ {ActivationFunction::LINEAR,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LINEAR, float16_t>},
+ {ActivationFunction::LOGISTIC,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, float16_t>},
+ {ActivationFunction::RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::RELU, float16_t>},
+ {ActivationFunction::BOUNDED_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::BOUNDED_RELU, float16_t>},
+ {ActivationFunction::LU_BOUNDED_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LU_BOUNDED_RELU, float16_t>},
+ {ActivationFunction::LEAKY_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LEAKY_RELU, float16_t>},
+ {ActivationFunction::SOFT_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::SOFT_RELU, float16_t>},
+ {ActivationFunction::ELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::ELU, float16_t>},
+ {ActivationFunction::SQRT,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::SQRT, float16_t>},
+ {ActivationFunction::SQUARE,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::SQUARE, float16_t>},
+ {ActivationFunction::TANH,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, float16_t>},
+ {ActivationFunction::IDENTITY,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::IDENTITY, float16_t>},
+ };
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
+
+ // Activation functions : QASYMM8
+ static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qasymm8 = {
+ {ActivationFunction::LOGISTIC,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, qasymm8_t>},
+ {ActivationFunction::BOUNDED_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::BOUNDED_RELU, qasymm8_t>},
+ {ActivationFunction::LU_BOUNDED_RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LU_BOUNDED_RELU, qasymm8_t>},
+ {ActivationFunction::RELU,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::RELU, qasymm8_t>},
+ {ActivationFunction::TANH,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, qasymm8_t>},
+ {ActivationFunction::IDENTITY,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::IDENTITY, qasymm8_t>},
+ };
+
+ // Activation functions : QSYMM16
+ static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qsymm16 = {
+ {ActivationFunction::LOGISTIC,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, qsymm16_t>},
+ {ActivationFunction::TANH,
+ &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, qsymm16_t>},
+ };
+
+ switch (input->info()->data_type())
+ {
+ case DataType::QASYMM8:
+ _func = act_map_qasymm8[activation_info.activation()];
+ break;
+ case DataType::QSYMM16:
+ _func = act_map_qsymm16[activation_info.activation()];
+ break;
+ case DataType::F32:
+ _func = act_map_f32[activation_info.activation()];
+ break;
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ case DataType::F16:
+ _func = act_map_f16[activation_info.activation()];
+ break;
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+ default:
+ ARM_COMPUTE_ERROR("Unsupported data type.");
+ }
+
+ // Configure kernel window
+ auto win_config =
+ validate_and_configure_window(input->info(), (output != nullptr) ? output->info() : nullptr);
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ ICPPKernel::configure(win_config.second);
+}
+
+template <ActivationLayerInfo::ActivationFunction F, typename T>
+typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
+NEActivationLayerKernelEx::activation(const Window &window)
+{
+ /** NEON vector tag type. */
+ using ExactTagType =
+ typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
+
+ const int window_step_x = 16 / sizeof(T);
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+ const ActivationFunction act = F;
+
+ Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+ win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(_input, win_collapsed);
+ Iterator output(_output, win_collapsed);
+
+ const auto infinity = wrapper::vdup_n(std::numeric_limits<T>::infinity(), ExactTagType{});
+ const auto epsilon = wrapper::vdup_n(static_cast<T>(1e-24), ExactTagType{});
+ const auto const_1 = wrapper::vdup_n(static_cast<T>(1.f), ExactTagType{});
+ const auto const_0 = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{});
+ const auto va = wrapper::vdup_n(static_cast<T>(_act_info.a()), ExactTagType{});
+ const auto vb = wrapper::vdup_n(static_cast<T>(_act_info.b()), ExactTagType{});
+ const auto a = static_cast<T>(_act_info.a());
+ const auto b = static_cast<T>(_act_info.b());
+
+ execute_window_loop(
+ win_collapsed,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<T *>(output.ptr());
+
+ wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
+
+ // Compute S elements per iteration
+ int x = window_start_x;
+
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const auto vin = wrapper::vloadq(input_ptr + x);
+ switch (act)
+ {
+ case ActivationFunction::ABS:
+ tmp = wrapper::vabs(vin);
+ break;
+ case ActivationFunction::LINEAR:
+ tmp = wrapper::vmla(vb, va, vin);
+ break;
+ case ActivationFunction::LOGISTIC:
+ // exp(-vin)
+ tmp = wrapper::vexpq(wrapper::vneg(vin));
+
+ // NaN -> INF
+ tmp = vreinterpret_floating_point(wrapper::vorr(
+ wrapper::vand(wrapper::vnot(wrapper::vceq(tmp, tmp)),
+ vreinterpret_unsigend_int(infinity)),
+ wrapper::vand(wrapper::vceq(tmp, tmp), vreinterpret_unsigend_int(tmp))));
+
+ // 1 / 1 + tmp
+ tmp = wrapper::vinv(wrapper::vadd(const_1, tmp));
+ break;
+ case ActivationFunction::RELU:
+ tmp = wrapper::vmax(const_0, vin);
+ break;
+ case ActivationFunction::BOUNDED_RELU:
+ tmp = wrapper::vmin(va, wrapper::vmax(const_0, vin));
+ break;
+ case ActivationFunction::LU_BOUNDED_RELU:
+ tmp = wrapper::vmin(va, wrapper::vmax(vb, vin));
+ break;
+ case ActivationFunction::LEAKY_RELU:
+ tmp = wrapper::vbsl(wrapper::vcgt(vin, const_0), vin, wrapper::vmul(va, vin));
+ break;
+ case ActivationFunction::SOFT_RELU:
+ tmp = wrapper::vlog(wrapper::vadd(const_1, wrapper::vexpq(vin)));
+ break;
+ case ActivationFunction::ELU:
+ tmp = wrapper::vbsl(wrapper::vcge(vin, const_0), vin,
+ wrapper::vmul(va, wrapper::vsub(wrapper::vexpq(vin), const_1)));
+ break;
+ case ActivationFunction::SQRT:
+ tmp = wrapper::vinv(wrapper::vinvsqrt(vin + epsilon));
+ break;
+ case ActivationFunction::SQUARE:
+ tmp = wrapper::vmul(vin, vin);
+ break;
+ case ActivationFunction::TANH:
+ tmp = wrapper::vmul(va, wrapper::vtanh(wrapper::vmul(vb, vin)));
+ break;
+ case ActivationFunction::IDENTITY:
+ tmp = vin;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ wrapper::vstore(output_ptr + x, tmp);
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ const T in = *(reinterpret_cast<const T *>(input_ptr + x));
+ T tmp;
+ switch (act)
+ {
+ case ActivationFunction::ABS:
+ tmp = std::abs(in);
+ break;
+ case ActivationFunction::LINEAR:
+ tmp = a * in + b;
+ break;
+ case ActivationFunction::LOGISTIC:
+ tmp = static_cast<T>(1) / (static_cast<T>(1) + std::exp(-in));
+ break;
+ case ActivationFunction::RELU:
+ tmp = std::max<T>(static_cast<T>(0), in);
+ break;
+ case ActivationFunction::BOUNDED_RELU:
+ tmp = std::min<T>(a, std::max(static_cast<T>(0), in));
+ break;
+ case ActivationFunction::LU_BOUNDED_RELU:
+ tmp = std::min<T>(a, std::max<T>(b, in));
+ break;
+ case ActivationFunction::LEAKY_RELU:
+ tmp = (in > 0) ? in : a * in;
+ break;
+ case ActivationFunction::SOFT_RELU:
+ tmp = std::log(static_cast<T>(1) + std::exp(in));
+ break;
+ case ActivationFunction::ELU:
+ tmp = (in >= 0) ? in : a * (std::exp(in) - 1);
+ break;
+ case ActivationFunction::SQRT:
+ tmp = std::sqrt(in);
+ break;
+ case ActivationFunction::SQUARE:
+ tmp = in * in;
+ break;
+ case ActivationFunction::TANH:
+ tmp = a * std::tanh(b * in);
+ break;
+ case ActivationFunction::IDENTITY:
+ tmp = in;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ *(output_ptr + x) = tmp;
+ }
+ },
+ input, output);
+}
+
+template <ActivationLayerInfo::ActivationFunction F, typename T>
+typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type
+NEActivationLayerKernelEx::activation(const Window &window)
+{
+ const int window_step_x = 16 / sizeof(T);
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+ const ActivationFunction act = F;
+
+ Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+ win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(_input, win_collapsed);
+ Iterator output(_output, win_collapsed);
+
+ const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
+ const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in));
+ const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in));
+ const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in);
+ const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in);
+ const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in);
+ const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0);
+ const auto vconst_1 = vdupq_n_f32(1.f);
+ const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
+ const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
+ const float a_f32 = _act_info.a();
+ const float b_f32 = _act_info.b();
+
+ // Initialise scale/offset for re-quantization
+ float s = qi_in.scale / qi_out.scale;
+ float o = -qi_in.offset * s + qi_out.offset;
+ float32x4_t vs = vdupq_n_f32(s);
+ float32x4_t vo = vdupq_n_f32(o);
+
+ execute_window_loop(
+ win_collapsed,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<T *>(output.ptr());
+
+ wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
+
+ // Compute S elements per iteration
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const auto vin = wrapper::vloadq(input_ptr + x);
+ if (act == ActivationFunction::RELU)
+ {
+ // Perform activation
+ tmp = vmaxq_u8(vconst_0, vin);
+ // Re-quantize to new output space
+ tmp = vmlaq_qasymm8(tmp, vs, vo);
+ }
+ else if (act == ActivationFunction::BOUNDED_RELU)
+ {
+ // Perform activation
+ tmp = vminq_u8(va, vmaxq_u8(vconst_0, vin));
+ // Re-quantize to new output space
+ tmp = vmlaq_qasymm8(tmp, vs, vo);
+ }
+ else if (act == ActivationFunction::LU_BOUNDED_RELU)
+ {
+ // Perform activation
+ tmp = vminq_u8(va, vmaxq_u8(vb, vin));
+ // Re-quantize to new output space
+ tmp = vmlaq_qasymm8(tmp, vs, vo);
+ }
+ else if (act == ActivationFunction::LOGISTIC)
+ {
+ // De-quantize
+ const auto vin_deq = vdequantize(vin, qi_in);
+ // Perform activation
+ const float32x4x4_t tmp_dep = {{
+ wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
+ vin_deq.val[0])))),
+ wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
+ vin_deq.val[1])))),
+ wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
+ vin_deq.val[2])))),
+ wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
+ vin_deq.val[3])))),
+ }};
+ // Re-quantize to new output space
+ tmp = vquantize(tmp_dep, qi_out);
+ }
+ else if (act == ActivationFunction::TANH)
+ {
+ // De-quantize
+ const auto vin_deq = vdequantize(vin, qi_in);
+ // Perform activation
+ const float32x4x4_t tmp_dep = {{
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[2], vb_f32))),
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[3], vb_f32))),
+ }};
+ // Re-quantize to new output space
+ tmp = vquantize(tmp_dep, qi_out);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ wrapper::vstore(output_ptr + x, tmp);
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ T in = *(reinterpret_cast<const T *>(input_ptr + x));
+ T tmp;
+ if (act == ActivationFunction::RELU)
+ {
+ tmp = std::max(const_0, in);
+ tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
+ }
+ else if (act == ActivationFunction::BOUNDED_RELU)
+ {
+ tmp = std::min(a, std::max(const_0, in));
+ tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
+ }
+ else if (act == ActivationFunction::LU_BOUNDED_RELU)
+ {
+ tmp = std::min(a, std::max(b, in));
+ tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255));
+ }
+ else if (act == ActivationFunction::LOGISTIC)
+ {
+ float tmp_f = dequantize_qasymm8(in, qi_in);
+ tmp_f = 1.f / (1.f + std::exp(-tmp_f));
+ tmp = quantize_qasymm8(tmp_f, qi_out);
+ }
+ else if (act == ActivationFunction::TANH)
+ {
+ float tmp_f = dequantize_qasymm8(in, qi_in);
+ tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
+ tmp = quantize_qasymm8(tmp_f, qi_out);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ *(output_ptr + x) = tmp;
+ }
+ },
+ input, output);
+}
+
+template <ActivationLayerInfo::ActivationFunction F, typename T>
+typename std::enable_if<std::is_same<T, qsymm16_t>::value, void>::type
+NEActivationLayerKernelEx::activation(const Window &window)
+{
+ const int window_step_x = 16 / sizeof(T);
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+ const ActivationFunction act = F;
+
+ Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+ win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(_input, win_collapsed);
+ Iterator output(_output, win_collapsed);
+
+ const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform();
+ const auto vconst_1 = vdupq_n_f32(1.f);
+ const float32x4_t va_f32 = vdupq_n_f32(_act_info.a());
+ const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b());
+ const float a_f32 = _act_info.a();
+ const float b_f32 = _act_info.b();
+
+ execute_window_loop(
+ win_collapsed,
+ [&](const Coordinates &) {
+ const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<T *>(output.ptr());
+
+ wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp;
+ ARM_COMPUTE_UNUSED(tmp);
+
+ // Compute S elements per iteration
+ int x = window_start_x;
+ for (; x <= (window_end_x - window_step_x); x += window_step_x)
+ {
+ const auto vin = wrapper::vloadq(input_ptr + x);
+ if (act == ActivationFunction::LOGISTIC)
+ {
+ // De-quantize
+ const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
+ // Perform activation
+ const float32x4x2_t tmp_dep = {{
+ wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
+ vin_deq.val[0])))),
+ wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg(
+ vin_deq.val[1])))),
+ }};
+ // Re-quantize to new output space
+ tmp = vquantize_int16(tmp_dep, qi_out.scale);
+ }
+ else if (act == ActivationFunction::TANH)
+ {
+ // De-quantize
+ const auto vin_deq = vdequantize_int16(vin, qi_in.scale);
+ // Perform activation
+ const float32x4x2_t tmp_dep = {{
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))),
+ wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))),
+ }};
+ // Re-quantize to new output space
+ tmp = vquantize_int16(tmp_dep, qi_out.scale);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ wrapper::vstore(output_ptr + x, tmp);
+ }
+
+ // Compute left-over elements
+ for (; x < window_end_x; ++x)
+ {
+ T in = *(reinterpret_cast<const T *>(input_ptr + x));
+ T tmp;
+ if (act == ActivationFunction::LOGISTIC)
+ {
+ float tmp_f = dequantize_qsymm16(in, qi_in.scale);
+ tmp_f = 1.f / (1.f + std::exp(-tmp_f));
+ tmp = quantize_qsymm16(tmp_f, qi_out);
+ }
+ else if (act == ActivationFunction::TANH)
+ {
+ float tmp_f = dequantize_qsymm16(in, qi_in.scale);
+ tmp_f = a_f32 * std::tanh(b_f32 * tmp_f);
+ tmp = quantize_qsymm16(tmp_f, qi_out);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ *(output_ptr + x) = tmp;
+ }
+ },
+ input, output);
+}
+
+Status NEActivationLayerKernelEx::validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ActivationLayerInfo &act_info)
+{
+ ARM_COMPUTE_UNUSED(act_info);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_and_configure_window(input->clone().get(),
+ (output != nullptr) ? output->clone().get() : nullptr)
+ .first);
+
+ return Status{};
+}
+
+void NEActivationLayerKernelEx::run(const Window &window, const ThreadInfo &info)
+{
+ // Early exit on disabled activation
+ if (!_act_info.enabled())
+ {
+ return;
+ }
+
+ ARM_COMPUTE_UNUSED(info);
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+ ARM_COMPUTE_ERROR_ON(_func == nullptr);
+
+ (this->*_func)(window);
+}
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp
index d2f42de53..32d7d6237 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h"
#include "arm_compute/core/Error.h"
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp
index 7e4fc129b..fbb9dbca9 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NECastKernel.h"
#include "arm_compute/core/AccessWindowStatic.h"
@@ -394,7 +410,8 @@ template <typename FromT> void run_cast(const ITensor *input, ITensor *output, c
case DataType::QASYMM8:
{
using to_vector = typename cast_vector<float>::type;
- const QuantizationInfo &qinfo_out = output->info()->quantization_info();
+ const UniformQuantizationInfo &qinfo_out =
+ output->info()->quantization_info().uniform();
const auto vf = vcast<to_vector, from_vector>(vin);
const auto vout = vquantize(vf, qinfo_out);
store_result<qasymm8_t>(reinterpret_cast<qasymm8_t *>(out.ptr()) + x, vout);
@@ -440,7 +457,8 @@ template <typename FromT> void run_cast(const ITensor *input, ITensor *output, c
case DataType::QASYMM8:
{
const QuantizationInfo &qinfo_out = output->info()->quantization_info();
- const auto qval = qinfo_out.quantize(static_cast<float>(val), rounding_policy);
+ const auto qval =
+ quantize_qasymm8(static_cast<float>(val), qinfo_out, rounding_policy);
*(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval;
break;
}
@@ -486,8 +504,8 @@ void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &windo
#else //__aarch64__
constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO;
#endif //__aarch64__
- const auto &qinfo_in = input->info()->quantization_info();
- const auto &qinfo_out = output->info()->quantization_info();
+ const auto &qinfo_in = input->info()->quantization_info().uniform();
+ const auto &qinfo_out = output->info()->quantization_info().uniform();
execute_window_loop(
win_collapsed,
@@ -547,7 +565,7 @@ void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &windo
for (; x < window_end_x; ++x)
{
qasymm8_t qval_in = *(in_ptr + x);
- const auto val = qinfo_in.dequantize(qval_in);
+ const auto val = dequantize_qasymm8(qval_in, qinfo_in);
switch (output->info()->data_type())
{
@@ -558,7 +576,7 @@ void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &windo
}
case DataType::QASYMM8:
{
- const auto qval_out = qinfo_out.quantize(val, rounding_policy);
+ const auto qval_out = quantize_qasymm8(val, qinfo_out, rounding_policy);
*(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval_out;
break;
}
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp
index 8a2223c26..95e269dee 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h"
#include "arm_compute/core/Helpers.h"
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp
index cebd614df..200fc4f87 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h"
#include "arm_compute/core/CPP/Validate.h"
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp
index 5401afea0..091d38c56 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h"
#include "arm_compute/core/Error.h"
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp
index ce2413dc1..4c0a5e799 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h"
#include "arm_compute/core/CPP/Validate.h"
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp
index 391337bfb..30787c0a4 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h"
#include "arm_compute/core/Error.h"
@@ -118,7 +134,7 @@ void NEHashtableLookupKernel::run(const Window &window, const ThreadInfo &info)
const size_t lookup_dim = _output->info()->num_dimensions() - 1;
const int const_0 = _output->info()->data_type() == DataType::QASYMM8
- ? _output->info()->quantization_info().offset
+ ? _output->info()->quantization_info().uniform().offset
: 0;
std::unordered_map<int32_t, size_t> key_index_map;
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp
index 1ea77fb5c..49adf1462 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h"
#include "arm_compute/core/CPP/Validate.h"
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp
index de218d489..b92130cec 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h"
#include "arm_compute/core/Error.h"
@@ -71,12 +87,6 @@ inline int32x4x4_t load_value(const int32_t *input_ptr)
wrapper::vloadq(input_ptr + 8), wrapper::vloadq(input_ptr + 12)};
}
-inline float32x4x4_t load_value(const float *input_ptr)
-{
- return {wrapper::vloadq(input_ptr), wrapper::vloadq(input_ptr + 4),
- wrapper::vloadq(input_ptr + 8), wrapper::vloadq(input_ptr + 12)};
-}
-
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
inline const float32x4x4_t load_value(const float16_t *input_ptr)
{
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp
index ad1bb9051..641641b5a 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEPReLUKernel.h"
#include "arm_compute/core/ITensor.h"
@@ -63,7 +79,8 @@ template <ConditionalOperation op>
inline uint8_t elementwise_conditional_op_quantized_scalar(const float &a, const float &b,
QuantizationInfo qinfo)
{
- return qinfo.quantize(elementwise_conditional_op_scalar<op>(a, b), RoundingPolicy::TO_NEAREST_UP);
+ return quantize_qasymm8(elementwise_conditional_op_scalar<op>(a, b), qinfo,
+ RoundingPolicy::TO_NEAREST_UP);
}
template <ConditionalOperation op, typename VectorType>
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp
index acf0092eb..6ba0f1fd4 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h"
#include "arm_compute/core/Error.h"
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp
index 59e7d9beb..3b65eac10 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h"
#include "arm_compute/core/CPP/Validate.h"
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp
index 36a2f55a9..44feb200f 100644
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h"
#include "arm_compute/core/Helpers.h"
diff --git a/compute/ARMComputeEx/src/core/UtilsEx.cpp b/compute/ARMComputeEx/src/core/UtilsEx.cpp
index 94242b56b..863316909 100644
--- a/compute/ARMComputeEx/src/core/UtilsEx.cpp
+++ b/compute/ARMComputeEx/src/core/UtilsEx.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/core/UtilsEx.h"
#include "arm_compute/core/Error.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp
index ae64a6edd..2d379cf36 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLArgOperation.h"
#include "arm_compute/core/CL/kernels/CLArgOperationKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
index 7c5fe5eda..92ee69a36 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h"
#include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp
index 742fc6f59..b3118f39e 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLCast.h"
#include "arm_compute/core/CL/kernels/CLCastKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp
index c2e4ca9ff..db662505a 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLDepthToSpace.h"
#include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
index 2781784ca..3d9a28a48 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLEmbeddingLookup.h"
#include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
new file mode 100644
index 000000000..f098832b0
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp
@@ -0,0 +1,337 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h"
+
+#include "arm_compute/core/Size2D.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "support/ToolchainSupport.h"
+
+#include <algorithm>
+
+using namespace arm_compute;
+using namespace arm_compute::misc::shape_calculator;
+
+namespace
+{
+Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output)
+{
+ ARM_COMPUTE_UNUSED(input);
+ ARM_COMPUTE_UNUSED(weights);
+ ARM_COMPUTE_UNUSED(output);
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLGEMMLowpMatrixMultiplyCoreEx::validate(&input, &weights, nullptr, &output));
+
+ return Status{};
+}
+} // namespace
+
+void CLFullyConnectedHybridLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
+
+Status CLFullyConnectedHybridLayerReshapeWeights::validate(const ITensorInfo *input,
+ const ITensorInfo *output)
+{
+ return CLTransposeKernel::validate(input, output);
+}
+
+CLFullyConnectedHybridLayer::CLFullyConnectedHybridLayer(
+ std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(memory_manager), _reshape_weights_kernel(), _quant_input_kernel(),
+ _mm_gemmlowp(memory_manager), _multiply_scale_kernel(), _accumulate_biases_kernel(),
+ _reshape_weights_output(), _quantized_input(), _scale_factor(), _gemmlowp_output(),
+ _are_weights_reshaped(true), _accumulate_biases(false), _is_prepared(false),
+ _original_weights(nullptr)
+{
+}
+void CLFullyConnectedHybridLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights,
+ ICLTensor *output, bool retain_internal_weights)
+{
+ ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
+
+ ARM_COMPUTE_UNUSED(output);
+ ARM_COMPUTE_UNUSED(retain_internal_weights);
+ // Configure gemmlowp function
+ _mm_gemmlowp.configure(input, weights, nullptr, output);
+}
+
+void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTensor *weights,
+ const ICLTensor *biases, ICLTensor *output,
+ FullyConnectedLayerInfo fc_info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+
+ // Perform validate step
+ ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedHybridLayer::validate(
+ input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+ fc_info));
+
+ _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
+ _accumulate_biases = false;
+ _is_prepared = fc_info.retain_internal_weights;
+ _original_weights = weights;
+
+ // Configure accumulate biases kernel for non quantized asymmetric types
+ if (biases != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
+
+ _accumulate_biases = true;
+
+ // Configure accumulate biases kernel
+ _accumulate_biases_kernel.set_target(CLScheduler::get().target());
+ _accumulate_biases_kernel.configure(output, biases);
+ }
+
+ const ICLTensor *weights_to_use = weights;
+
+ // With the Fully Connected layer we can have 4 different cases:
+ // 1) Convolution layer -> Fully Connected layer without batches
+ // 2) Fully Connected layer -> Fully Connected layer without batches
+ // 3) Convolution layer -> Fully Connected layer with batches
+ // 4) Fully Connected layer -> Fully Connected layer with batches
+
+ // Check if we have a fully connected layer with batches
+ const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
+ bool is_fc_after_conv = false;
+ if (is_batched_fc_layer)
+ {
+ is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->info()->tensor_shape().cbegin() + 3,
+ input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
+ }
+ else
+ {
+ is_fc_after_conv = input->info()->num_dimensions() > 1 && input->info()->dimension(1) > 1;
+ }
+ ARM_COMPUTE_ERROR_ON_MSG(is_fc_after_conv,
+ "CLFullyConnectedHybridLayer does not support after conv");
+ ARM_COMPUTE_UNUSED(is_fc_after_conv);
+
+ // Reshape weights if needed
+ if (!_are_weights_reshaped)
+ {
+ // Reshape the weights
+ _reshape_weights_output.allocator()->init(
+ weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights->info())));
+ _reshape_weights_kernel.configure(weights_to_use, &_reshape_weights_output);
+ weights_to_use = &_reshape_weights_output;
+ }
+
+ // Extract scale factor
+ _scale_factor.allocator()->init(
+ TensorInfo(TensorShape{output->info()->dimension(1)}, 1, input->info()->data_type()));
+ _memory_group.manage(&_scale_factor);
+ _scale_factor_kernel.configure(input, &_scale_factor);
+
+ // Quantize input
+ _quantized_input.allocator()->init(
+ input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8));
+ _memory_group.manage(&_quantized_input);
+ _quant_input_kernel.configure(input, &_scale_factor, &_quantized_input);
+
+ // GEMMLowp
+ _gemmlowp_output.allocator()->init(
+ output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ _memory_group.manage(&_gemmlowp_output);
+ configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output,
+ fc_info.retain_internal_weights);
+ _quantized_input.allocator()->allocate();
+
+ // Multiply scale
+ _multiply_scale_kernel.configure(&_gemmlowp_output, &_scale_factor, output,
+ weights->info()->quantization_info().uniform().scale);
+ _gemmlowp_output.allocator()->allocate();
+ _scale_factor.allocator()->allocate();
+
+ _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
+}
+
+Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *biases, const ITensorInfo *output,
+ FullyConnectedLayerInfo fc_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::S8);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
+
+ bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
+ bool is_fc_after_conv = true;
+ const GPUTarget gpu_target = CLScheduler::get().target();
+
+ const ITensorInfo &reshaped_weights =
+ TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights)));
+
+ // Configure accumulate biases kernel for non quantized asymmetric types
+ if (biases != nullptr)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLGEMMMatrixAccumulateBiasesKernel::validate(output, biases, gpu_target));
+ }
+
+ // With the Fully Connected layer we can have 4 different cases:
+ // 1) Convolution layer -> Fully Connected layer without batches
+ // 2) Fully Connected layer -> Fully Connected layer without batches
+ // 3) Convolution layer -> Fully Connected layer with batches
+ // 4) Fully Connected layer -> Fully Connected layer with batches
+
+ const ITensorInfo *weights_to_use = weights;
+
+ // Check if we have a fully connected layer with batches
+ const bool is_batched_fc_layer = output->dimension(1) > 1;
+ if (is_batched_fc_layer)
+ {
+ is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->tensor_shape().cbegin() + 3, input->tensor_shape().cend(),
+ output->tensor_shape().cbegin() + 1));
+ }
+ else
+ {
+ is_fc_after_conv = input->num_dimensions() > 1 && input->dimension(1) > 1;
+ }
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_fc_after_conv,
+ "CLFullyConnectedHybridLayer does not support after conv");
+
+ if (!weights_reshaped)
+ {
+ // Validate reshape weights kernel
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights));
+ weights_to_use = &reshaped_weights;
+ }
+
+ // Validate Scale factor kernel
+ const ITensorInfo &scale_factor =
+ TensorInfo(TensorShape{output->dimension(1)}, 1, input->data_type());
+ ARM_COMPUTE_RETURN_ON_ERROR(CLScaleFactorSymm8Kernel::validate(input, &scale_factor));
+
+ // Validate quantization symm8 kernel
+ const ITensorInfo &quantized_input = TensorInfo(
+ input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input));
+
+ // Fully Connected layer after a Fully Connected Layer without batches
+ ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
+
+ // Validate matrix multiply kernel
+ const ITensorInfo &gemmlowp_output = TensorInfo(
+ output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
+
+ // Multiply scale
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLMultiplyScaleFactorKernel::validate(&gemmlowp_output, &scale_factor, output));
+
+ return Status{};
+}
+
+void CLFullyConnectedHybridLayer::run()
+{
+ prepare();
+
+ MemoryGroupResourceScope scope_mg(_memory_group);
+
+ // Extract scale_factor
+ CLScheduler::get().enqueue(_scale_factor_kernel);
+
+ // Quantize input
+ CLScheduler::get().enqueue(_quant_input_kernel);
+
+ // Run matrix multiply
+ _mm_gemmlowp.run();
+
+ // Multiply scale factor
+ CLScheduler::get().enqueue(_multiply_scale_kernel);
+
+ // Accumulate biases if provided
+ if (_accumulate_biases)
+ {
+ CLScheduler::get().enqueue(_accumulate_biases_kernel);
+ }
+}
+
+void CLFullyConnectedHybridLayer::prepare()
+{
+ if (!_is_prepared)
+ {
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
+ auto release_unused = [](CLTensor *w) {
+ if (!w->is_used())
+ {
+ CLScheduler::get().queue().finish();
+ w->allocator()->free();
+ }
+ };
+
+ // Reshape of the weights if needed (happens only once)
+ if (!_are_weights_reshaped)
+ {
+ // Run reshape weights kernel and mark weights as unused
+ _reshape_weights_output.allocator()->allocate();
+ _reshape_weights_kernel.run();
+
+ _are_weights_reshaped = true;
+ // We can not release _original_weights because it can be used in other nodes
+ }
+
+ // Prepare GEMM prepare and release unused weights
+ _mm_gemmlowp.prepare();
+
+ // Release reshaped weights if unused
+ release_unused(&_reshape_weights_output);
+
+ _is_prepared = true;
+ }
+}
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
new file mode 100644
index 000000000..63e291b36
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp
@@ -0,0 +1,583 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h"
+
+#include "arm_compute/core/Size2D.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/Cast.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "support/ToolchainSupport.h"
+
+#include <algorithm>
+
+namespace arm_compute
+{
+using namespace arm_compute::misc::shape_calculator;
+using namespace arm_compute::utils::cast;
+
+namespace
+{
+Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorInfo &weights,
+ const ITensorInfo &output,
+ GEMMLowpOutputStageInfo &gemmlowp_output_stage)
+{
+ gemmlowp_output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
+ gemmlowp_output_stage.gemmlowp_offset = 0;
+ gemmlowp_output_stage.gemmlowp_multiplier = 0;
+ gemmlowp_output_stage.gemmlowp_shift = 0;
+
+ // Configure output stage for quantized case
+ if (is_data_type_quantized_asymmetric(input.data_type()))
+ {
+ const UniformQuantizationInfo iq_info = input.quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output.quantization_info().uniform();
+
+ const auto output_quant_info = (output.total_size() == 0) ? iq_info : oq_info;
+
+ const float multiplier = (iq_info.scale * wq_info.scale) / output_quant_info.scale;
+ int output_multiplier = 0;
+ int output_shift = 0;
+ ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(
+ multiplier, &output_multiplier, &output_shift));
+
+ // Set the GEMMLowp output stage info
+ gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset;
+ gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier;
+ gemmlowp_output_stage.gemmlowp_shift = output_shift;
+ gemmlowp_output_stage.gemmlowp_min_bound = 0;
+ gemmlowp_output_stage.gemmlowp_max_bound = 255;
+ gemmlowp_output_stage.gemmlowp_multipliers.push_back(output_multiplier);
+ gemmlowp_output_stage.gemmlowp_shifts.push_back(output_shift);
+ }
+
+ return Status{};
+}
+
+Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo *bias,
+ const ITensorInfo &output, const FullyConnectedLayerInfo &fc_info)
+{
+ GEMMLowpOutputStageInfo gemmlowp_output_stage;
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage));
+
+ const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
+ false, // is_b_reshaped
+ true, // reshape_b_only_on_first_run
+ 0, // depth_output_gemm3d
+ false, // reinterpret_input_as_3d
+ fc_info.retain_internal_weights, // retain_internal_weights
+ gemmlowp_output_stage, // gemmlowp_output_stage
+ fc_info.fp_mixed_precision, // fp_mixed_precision
+ true, // broadcast_bias
+ ActivationLayerInfo()); // activation_info
+
+ if (is_data_type_quantized_asymmetric(input.data_type()))
+ {
+ const UniformQuantizationInfo iq_info = input.quantization_info().uniform();
+ const UniformQuantizationInfo wq_info = weights.quantization_info().uniform();
+
+ // Since we need negative offsets for computing convolution, we need to change
+ // QuantizationInfo()
+ // Extract and negate input and weights offset
+ const QuantizationInfo input_quantization_info(iq_info.scale, -iq_info.offset);
+ const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset);
+
+ // Validate gemmlowp function
+ ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate(
+ &input.clone()->set_quantization_info(input_quantization_info),
+ &weights.clone()->set_quantization_info(weights_quantization_info), bias, &output,
+ gemm_info));
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLGEMM::validate(&input, &weights, bias, &output, 1.f, 1.f, gemm_info));
+ }
+
+ return Status{};
+}
+} // namespace
+
+void CLFullyConnectedLayerReshapeWeightsEx::configure(const ICLTensor *input, ICLTensor *output)
+{
+ auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>();
+ k->configure(input, output);
+ _kernel = std::move(k);
+}
+
+Status CLFullyConnectedLayerReshapeWeightsEx::validate(const ITensorInfo *input,
+ const ITensorInfo *output)
+{
+ return CLTransposeKernel::validate(input, output);
+}
+
+CLFullyConnectedLayerEx::CLFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager,
+ IWeightsManager *weights_manager)
+ : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(),
+ _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(),
+ _reshape_weights_function(), _mm_gemm(memory_manager, weights_manager),
+ _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(),
+ _reshape_weights_output(), _are_weights_converted(true), _are_weights_reshaped(true),
+ _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr)
+{
+}
+void CLFullyConnectedLayerEx::configure_mm(const ICLTensor *input, const ICLTensor *weights,
+ const ICLTensor *bias, ICLTensor *output,
+ const FullyConnectedLayerInfo &fc_info)
+{
+ GEMMLowpOutputStageInfo gemmlowp_output_stage;
+ construct_gemmlowp_output_stage(*input->info(), *weights->info(), *output->info(),
+ gemmlowp_output_stage);
+
+ const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped
+ false, // is_b_reshaped
+ true, // reshape_b_only_on_first_run
+ 0, // depth_output_gemm3d
+ false, // reinterpret_input_as_3d
+ fc_info.retain_internal_weights, // retain_internal_weights
+ gemmlowp_output_stage, // gemmlowp_output_stage
+ fc_info.fp_mixed_precision, // fp_mixed_precision
+ true, // broadcast_bias
+ ActivationLayerInfo()); // activation_info
+
+ if (_is_quantized)
+ {
+ // Since we need negative offsets for computing convolution, we need to change
+ // QuantizationInfo()
+ // Extract and negate input and weights offset
+ const QuantizationInfo input_quantization_info = input->info()->quantization_info();
+ const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
+
+ input->info()->set_quantization_info(QuantizationInfo(
+ input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ weights->info()->set_quantization_info(QuantizationInfo(
+ weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
+
+ // Configure gemmlowp function
+ _mm_gemmlowp.configure(input, weights, bias, output, gemm_info);
+
+ // Revert back QuantizatioInfo as input and weights could be used in other fully connected
+ // layers
+ input->info()->set_quantization_info(input_quantization_info);
+ weights->info()->set_quantization_info(weights_quantization_info);
+ }
+ else
+ {
+ // Configure matrix multiply kernel
+ _mm_gemm.configure(input, weights, bias, output, 1.f, 1.f, gemm_info);
+ }
+}
+
+void CLFullyConnectedLayerEx::configure_conv_fc(const ICLTensor *input, const ICLTensor *weights,
+ const ICLTensor *bias, ICLTensor *output,
+ const FullyConnectedLayerInfo &fc_info)
+{
+ ARM_COMPUTE_ERROR_ON(
+ (weights->info()->dimension(1) !=
+ (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
+
+ // If the fully connected layer is called after a convolution layer, the input tensor must be
+ // linearized
+
+ // Initialize output tensor for flatten
+ TensorShape shape_flatten = compute_flatten_shape(input->info());
+ _flatten_output.allocator()->init(input->info()
+ ->clone()
+ ->set_is_resizable(true)
+ .reset_padding()
+ .set_tensor_shape(shape_flatten)
+ .set_data_layout(DataLayout::NCHW));
+
+ // Configure flatten kernel
+ _memory_group.manage(&_flatten_output);
+ _flatten_layer.configure(input, &_flatten_output);
+
+ // Configure matrix multiply kernel
+ configure_mm(&_flatten_output, weights, bias, output, fc_info);
+
+ // Allocate the output tensor for flatten once all the configure methods have been called
+ _flatten_output.allocator()->allocate();
+}
+
+void CLFullyConnectedLayerEx::configure_fc_fc(const ICLTensor *input, const ICLTensor *weights,
+ const ICLTensor *bias, ICLTensor *output,
+ const FullyConnectedLayerInfo &fc_info)
+{
+ ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
+
+ // Configure matrix multiply kernel
+ configure_mm(input, weights, bias, output, fc_info);
+}
+
+void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor *weights,
+ const ICLTensor *biases, ICLTensor *output,
+ FullyConnectedLayerInfo fc_info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+
+ // Perform validate step
+ ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedLayerEx::validate(
+ input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(),
+ fc_info));
+
+ _are_weights_converted = true;
+ _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
+ _is_fc_after_conv = true;
+ _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
+ _is_prepared = fc_info.retain_internal_weights;
+ _original_weights = weights;
+
+ if (_weights_manager)
+ {
+ _weights_manager->manage(weights);
+ }
+
+ const ICLTensor *weights_to_use = weights;
+
+ // With the Fully Connected layer we can have 4 different cases:
+ // 1) Convolution layer -> Fully Connected layer without batches
+ // 2) Fully Connected layer -> Fully Connected layer without batches
+ // 3) Convolution layer -> Fully Connected layer with batches
+ // 4) Fully Connected layer -> Fully Connected layer with batches
+
+ // Check if we have a fully connected layer with batches
+ const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
+ if (is_batched_fc_layer)
+ {
+ _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->info()->tensor_shape().cbegin() + 3,
+ input->info()->tensor_shape().cend(),
+ output->info()->tensor_shape().cbegin() + 1));
+ }
+ else
+ {
+ _is_fc_after_conv = input->info()->num_dimensions() > 1;
+ }
+
+ // Reshape weights if needed
+ if (!_are_weights_reshaped)
+ {
+ if (_weights_manager && _weights_manager->are_weights_managed(weights))
+ {
+ _reshape_weights_managed_function.configure(weights);
+ weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(
+ _weights_manager->acquire(weights, &_reshape_weights_managed_function));
+ }
+ else
+ {
+ // Reshape the weights
+ _reshape_weights_function.configure(weights, &_reshape_weights_output);
+ weights_to_use = &_reshape_weights_output;
+ }
+ }
+
+ // Convert weights if needed
+ if (_is_fc_after_conv && (input->info()->data_layout() != fc_info.weights_trained_layout))
+ {
+ if (_weights_manager && _weights_manager->are_weights_managed(weights_to_use))
+ {
+ _convert_weights_managed.configure(weights_to_use, input->info()->tensor_shape(),
+ fc_info.weights_trained_layout);
+ weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>(
+ _weights_manager->acquire(weights, &_convert_weights_managed));
+ }
+ else
+ {
+ // Convert weights
+ _convert_weights.configure(weights_to_use, &_converted_weights_output,
+ input->info()->tensor_shape(), fc_info.weights_trained_layout);
+
+ weights_to_use = &_converted_weights_output;
+ }
+ _are_weights_converted = false;
+ }
+
+ if (_is_fc_after_conv)
+ {
+ // Fully Connected layer after a Convolution Layer without batches
+ configure_conv_fc(input, weights_to_use, biases, output, fc_info);
+ }
+ else
+ {
+ // Fully Connected layer after a Fully Connected Layer without batches
+ configure_fc_fc(input, weights_to_use, biases, output, fc_info);
+ }
+}
+
+Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights,
+ const ITensorInfo *biases, const ITensorInfo *output,
+ FullyConnectedLayerInfo fc_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
+ DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2);
+
+ bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
+ bool is_fc_after_conv = true;
+
+ const ITensorInfo &flatten_input = TensorInfo(input->clone()
+ ->set_is_resizable(true)
+ .reset_padding()
+ .set_tensor_shape(compute_flatten_shape(input))
+ .set_data_layout(DataLayout::NCHW));
+ const ITensorInfo &reshaped_weights =
+ TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(
+ compute_transposed_shape(*weights)));
+ const ITensorInfo &converted_weights =
+ weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding())
+ : TensorInfo(*reshaped_weights.clone());
+
+ // With the Fully Connected layer we can have 4 different cases:
+ // 1) Convolution layer -> Fully Connected layer without batches
+ // 2) Fully Connected layer -> Fully Connected layer without batches
+ // 3) Convolution layer -> Fully Connected layer with batches
+ // 4) Fully Connected layer -> Fully Connected layer with batches
+
+ const ITensorInfo *input_to_use = input;
+ const ITensorInfo *weights_to_use = weights;
+
+ // Check if we have a fully connected layer with batches
+ const bool is_batched_fc_layer = output->dimension(1) > 1;
+ if (is_batched_fc_layer)
+ {
+ is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) &&
+ (std::equal(input->tensor_shape().cbegin() + 3, input->tensor_shape().cend(),
+ output->tensor_shape().cbegin() + 1));
+ }
+ else
+ {
+ is_fc_after_conv = input->num_dimensions() > 1;
+ }
+
+ if (!weights_reshaped)
+ {
+ // Validate reshape weights kernel
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ CLFullyConnectedLayerReshapeWeightsEx::validate(weights, &reshaped_weights));
+ weights_to_use = &reshaped_weights;
+ }
+
+ if (is_fc_after_conv && (input->data_layout() != fc_info.weights_trained_layout))
+ {
+ // Validate convert weights kernel
+ ARM_COMPUTE_RETURN_ON_ERROR(CLConvertFullyConnectedWeights::validate(
+ weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout));
+ weights_to_use = &converted_weights;
+ }
+
+ if (is_fc_after_conv)
+ {
+ // Fully Connected layer after a Convolution Layer without batches
+ ARM_COMPUTE_RETURN_ERROR_ON(
+ (weights_to_use->dimension(1) !=
+ (input->dimension(0) * input->dimension(1) * input->dimension(2))));
+
+ // Validate flatten kernel
+ ARM_COMPUTE_RETURN_ON_ERROR(CLFlattenLayer::validate(input, &flatten_input));
+ input_to_use = &flatten_input;
+ }
+ else
+ {
+ // Fully Connected layer after a Fully Connected Layer without batches
+ ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1));
+ }
+
+ // Validate matrix multiply kernel
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info));
+
+ return Status{};
+}
+
+void CLFullyConnectedLayerEx::run()
+{
+ if (!_is_prepared)
+ {
+ if (!_are_weights_reshaped)
+ _reshape_weights_output.allocator()->allocate();
+ if (!_are_weights_converted)
+ _converted_weights_output.allocator()->allocate();
+ _is_prepared = true;
+ }
+
+ {
+ if (!_weights_manager)
+ {
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+ }
+
+ // Pointer to current weights
+ const ICLTensor *cur_weights = _original_weights;
+ // Reshape of the weights
+ if (!_are_weights_reshaped)
+ {
+ if (_weights_manager && _weights_manager->are_weights_managed(cur_weights))
+ {
+ _original_weights = utils::cast::polymorphic_downcast<ICLTensor *>(
+ _weights_manager->run(cur_weights, &_reshape_weights_managed_function));
+ }
+ else
+ {
+ _reshape_weights_function.run();
+ cur_weights = &_reshape_weights_output;
+ }
+ }
+
+ // Convert weights if needed
+ if (!_are_weights_converted)
+ {
+ if (_weights_manager && _weights_manager->are_weights_managed(cur_weights))
+ {
+ _weights_manager->run(cur_weights, &_convert_weights_managed);
+ }
+ else
+ {
+ _convert_weights.run();
+ }
+ }
+
+ // Prepare GEMM prepare
+ if (!_is_quantized)
+ {
+ _mm_gemm.prepare();
+ }
+ }
+
+ MemoryGroupResourceScope scope_mg(_memory_group);
+
+ // Linearize input if it comes from a convolutional layer
+ if (_is_fc_after_conv)
+ {
+ _flatten_layer.run();
+ }
+
+ // Run matrix multiply
+ if (_is_quantized)
+ {
+ _mm_gemmlowp.run();
+ }
+ else
+ {
+ _mm_gemm.run();
+ }
+}
+
+void CLFullyConnectedLayerEx::prepare()
+{
+#if 0 // TODO Remove this block
+ if(!_is_prepared)
+ {
+ if(!_weights_manager)
+ {
+ ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+ }
+
+ auto release_unused = [](CLTensor * w)
+ {
+ if(!w->is_used())
+ {
+ CLScheduler::get().queue().finish();
+ w->allocator()->free();
+ }
+ };
+
+ // Pointer to current weights
+ const ICLTensor *cur_weights = _original_weights;
+
+ // Reshape of the weights if needed (happens only once)
+ if(!_are_weights_reshaped)
+ {
+ if(_weights_manager && _weights_manager->are_weights_managed(_original_weights))
+ {
+ cur_weights = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->run(cur_weights, &_reshape_weights_managed_function));
+ }
+ else
+ {
+ // Run reshape weights kernel and mark weights as unused
+ _reshape_weights_output.allocator()->allocate();
+ _reshape_weights_function.run();
+
+ cur_weights->mark_as_unused();
+ cur_weights = &_reshape_weights_output;
+ }
+ _are_weights_reshaped = true;
+ }
+
+ // Convert weights if needed (happens only once)
+ if(!_are_weights_converted)
+ {
+ if(_weights_manager && _weights_manager->are_weights_managed(cur_weights))
+ {
+ _weights_manager->run(cur_weights, &_convert_weights_managed);
+ }
+ else
+ {
+ _converted_weights_output.allocator()->allocate();
+ _convert_weights.run();
+ cur_weights->mark_as_unused();
+ }
+
+ _are_weights_converted = true;
+ }
+
+ // Release reshaped weights if unused
+ release_unused(&_reshape_weights_output);
+
+ // Prepare GEMM prepare and release unused weights
+ if(!_is_quantized)
+ {
+ _mm_gemm.prepare();
+ }
+
+ // Release converted weights if unused
+ release_unused(&_reshape_weights_output);
+ release_unused(&_converted_weights_output);
+
+ _is_prepared = true;
+ }
+#endif
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
index c6b166163..9aebc473e 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp
@@ -16,13 +16,18 @@
#include "arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h"
+#include <arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h>
+#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h>
+#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h>
+
using namespace arm_compute;
void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *input,
const arm_compute::ICLTensor *weights,
const arm_compute::ICLTensor *biases,
arm_compute::ICLTensor *output, bool needs_reshape,
- const arm_compute::TensorShape &reshape)
+ const arm_compute::TensorShape &reshape,
+ KernelType kernel_type)
{
_input = input;
_weights = weights;
@@ -30,6 +35,7 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp
_output = output;
_needs_reshape = needs_reshape;
+ const ICLTensor *input_to_use = input;
if (_needs_reshape)
{
// reshape
@@ -37,16 +43,44 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp
_input->info()->clone()->set_tensor_shape(reshape).set_data_layout(
_input->info()->data_layout()));
_cl_reshape.configure(_input, &_cl_buffer);
+ input_to_use = &_cl_buffer;
+ }
+
+ _cl_fc = [&]() {
+ if (kernel_type == KernelType::GENERAL)
+ {
+ auto fc = new arm_compute::CLFullyConnectedLayerEx{_memory_manager};
+ fc->configure(input_to_use, _weights, _biases, _output);
+ return std::unique_ptr<arm_compute::IFunction>(fc);
+ }
+ else
+ {
+ assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS);
+
+ bool is_hybrid = (input->info()->data_type() == DataType::F32 ||
+ input->info()->data_type() == DataType::F16) &&
+ weights->info()->data_type() == DataType::S8;
- _cl_fc.configure(&_cl_buffer, _weights, _biases, _output);
+ if (is_hybrid)
+ {
+ auto fc = new arm_compute::CLFullyConnectedHybridLayer{_memory_manager};
+ fc->configure(input_to_use, _weights, _biases, _output);
+ return std::unique_ptr<arm_compute::IFunction>(fc);
+ }
+ else
+ {
+ auto fc = new arm_compute::CLFullyConnectedLayer{_memory_manager};
+ fc->configure(input_to_use, _weights, _biases, _output);
+ return std::unique_ptr<arm_compute::IFunction>(fc);
+ }
+ }
+ }();
+ if (_needs_reshape)
+ {
// NOTE _cl_buffer is inaccessible from outside, and thus it is safe to invoke allocate here.
_cl_buffer.allocator()->allocate();
}
- else
- {
- _cl_fc.configure(_input, _weights, _biases, _output);
- }
}
void CLFullyConnectedReshapingLayer::run(void)
@@ -54,7 +88,7 @@ void CLFullyConnectedReshapingLayer::run(void)
if (_needs_reshape)
_cl_reshape.run();
- _cl_fc.run();
+ _cl_fc->run();
}
-void CLFullyConnectedReshapingLayer::prepare(void) { _cl_fc.prepare(); }
+void CLFullyConnectedReshapingLayer::prepare(void) { _cl_fc->prepare(); }
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp
new file mode 100644
index 000000000..ca5499dfc
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h"
+
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+
+namespace arm_compute
+{
+using namespace arm_compute::misc::shape_calculator;
+using namespace arm_compute::cl_gemm;
+
+namespace
+{
+inline bool is_gemm_reshaped(bool reshape_b_only_on_first_run, GPUTarget gpu_target)
+{
+ return (get_arch_from_target(gpu_target) != GPUTarget::MIDGARD) && (reshape_b_only_on_first_run);
+}
+} // namespace
+
+CLGEMMLowpMatrixMultiplyCoreEx::CLGEMMLowpMatrixMultiplyCoreEx(
+ std::shared_ptr<IMemoryManager> memory_manager)
+ : _memory_group(std::move(memory_manager)), _mm_midgard_kernel(), _mtx_a_reduction_kernel(),
+ _mtx_b_reduction_kernel(), _vector_sum_col(), _vector_sum_row(), _a_offset(0), _b_offset(0),
+ _reshape_b_only_on_first_run(false), _is_prepared(false)
+{
+}
+
+void CLGEMMLowpMatrixMultiplyCoreEx::configure(const ICLTensor *a, const ICLTensor *b,
+ const ICLTensor *c, ICLTensor *output,
+ const GEMMInfo &gemm_info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
+ ARM_COMPUTE_UNUSED(c);
+ ARM_COMPUTE_ERROR_THROW_ON(CLGEMMLowpMatrixMultiplyCoreEx::validate(
+ a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info));
+
+ _is_prepared = false;
+ _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
+ _a_offset = a->info()->quantization_info().uniform().offset;
+ _b_offset = b->info()->quantization_info().uniform().offset;
+
+ // Get the GPU target
+ const GPUTarget gpu_target = CLScheduler::get().target();
+
+ // Set the target for the kernels
+ _mm_midgard_kernel.set_target(gpu_target);
+
+ // GEMMRHSMatrixInfo rhs_info;
+ // GEMMLHSMatrixInfo lhs_info;
+
+ // Arguments used by GEMMReshapeInfo
+ // If we pass the matrix A and matrix B reshaped to CLGEMMMatrixMultiplyKernel, we need to pass m,
+ // n, k, mult_transpose1xW_width and mult_interleave4x4_height to CLGEMMReshapeInfo
+ // in order to know how the matrices have been reshaped
+ bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
+ const unsigned int m = reinterpret_input_as_3d
+ ? (a->info()->dimension(1) * a->info()->dimension(2))
+ : a->info()->dimension(1);
+ const unsigned int n = b->info()->dimension(0);
+ const unsigned int k = a->info()->dimension(0);
+ const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
+
+ const ICLTensor *matrix_b = b;
+ // Configure matrix multiply kernel
+ _mm_midgard_kernel.configure(
+ a, matrix_b, output,
+ GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d));
+}
+
+Status CLGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITensorInfo *b,
+ const ITensorInfo *c, const ITensorInfo *output,
+ const GEMMInfo &gemm_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::S8);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b);
+ ARM_COMPUTE_UNUSED(c);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(),
+ "Matrix A already reshaped is not supported");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(),
+ "Matrix B already reshaped is not supported");
+
+ const ITensorInfo *matrix_a_info = a;
+
+ // Get the GPU target
+ const GPUTarget gpu_target = CLScheduler::get().target();
+
+ bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d();
+ const unsigned int m =
+ reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1);
+ const unsigned int n = b->dimension(0);
+ const unsigned int k = a->dimension(0);
+ const int depth_output_gemm3d = gemm_info.depth_output_gemm3d();
+
+ bool reshape_matrix_b = is_gemm_reshaped(gemm_info.reshape_b_only_on_first_run(), gpu_target);
+
+ const GEMMReshapeInfo reshape_info =
+ GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d);
+
+ TensorInfo weights_info(*b);
+ const ITensorInfo *matrix_b_info = &weights_info;
+ if (reshape_matrix_b)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(false,
+ "CLGEMMLowpMatrixMultiplyCoreEx does not support reshape_b");
+ }
+
+ // Validate matrix multiply
+ ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyKernelEx::validate(
+ matrix_a_info, matrix_b_info, output, reshape_info));
+
+ return Status{};
+}
+
+void CLGEMMLowpMatrixMultiplyCoreEx::run()
+{
+ prepare();
+
+ MemoryGroupResourceScope scope_mg(_memory_group);
+
+ // Run matrix multiply
+ CLScheduler::get().enqueue(_mm_midgard_kernel, false);
+}
+
+void CLGEMMLowpMatrixMultiplyCoreEx::prepare()
+{
+ if (!_is_prepared)
+ {
+ _is_prepared = true;
+ }
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
index 6cad9bd2e..f594d7a2e 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLGatherEx.h"
#include "arm_compute/core/CL/ICLTensor.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
index 7180e9356..27ed8e828 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLHashtableLookup.h"
#include "arm_compute/core/CL/kernels/CLHashtableLookupKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
index 86ea5a66d..80393e8d1 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h"
#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
index be35ea732..28e5bc0da 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLNeg.h"
#include "arm_compute/core/CL/kernels/CLNegKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp
index 38adedd10..fbb15ab1d 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLPReLU.h"
#include "arm_compute/core/CL/kernels/CLPReLUKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp
index 2a34c0664..6049b7e70 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/CL/functions/CLRNNLayerEx.h"
#include "arm_compute/core/Helpers.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
index 13a25c901..8ce2d746c 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLReduceOperation.h"
#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp
index c03826891..1f946d37b 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLSpaceToBatchND.h"
#include "arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp
index 0f455f96f..7d7b2264b 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLSpaceToDepth.h"
#include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
index 80d50ad94..3ac95a8e6 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLTopKV2.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
index 40e21671d..e61746ef2 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/CL/functions/CLTransposeConvLayer.h"
#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp
index 0ce3e6700..07feb5a64 100644
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,11 +13,37 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2017-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/core/CL/ICLTensor.h"
#include <cmath>
#include <memory>
@@ -54,7 +79,7 @@ void CLTransposeConvLayerUpsample::run()
_output->map(CLScheduler::get().queue(), true);
if (is_data_type_quantized_asymmetric(_output->info()->data_type()))
{
- const uint8_t quantized_zero = _output->info()->quantization_info().offset;
+ const uint8_t quantized_zero = _output->info()->quantization_info().uniform().offset;
std::fill_n(_output->buffer(), _output->info()->total_size(), quantized_zero);
}
else
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEElementwiseUnaryLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp
index a95018a28..5405934ad 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEElementwiseUnaryLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp
@@ -1,6 +1,21 @@
/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,23 +37,18 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h"
-#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h"
+#include "arm_compute/runtime/CPP/functions/CPPOneHotEx.h"
+
+#include "arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h"
#include "support/ToolchainSupport.h"
-#include <utility>
+using namespace arm_compute;
-namespace arm_compute
-{
-void NENegLayer::configure(const ITensor *input, ITensor *output)
+void CPPOneHotEx::configure(const ITensor *indices, ITensor *output, const int depth,
+ const float on_value, const float off_value, const int axis)
{
- auto k = arm_compute::support::cpp14::make_unique<NEElementwiseUnaryKernelEx>();
- k->configure(ElementWiseUnaryEx::NEG, input, output);
+ auto k = arm_compute::support::cpp14::make_unique<CPPOneHotKernelEx>();
+ k->configure(indices, output, depth, on_value, off_value, axis);
_kernel = std::move(k);
}
-Status NENegLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
-{
- return NEElementwiseUnaryKernelEx::validate(ElementWiseUnaryEx::NEG, input, output);
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp
index f8e0ef8a6..6c90ef3b4 100644
--- a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/CPP/functions/CPPUpsampleEx.h"
#include "arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp
new file mode 100644
index 000000000..ff81ff854
--- /dev/null
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/NEON/functions/NEActivationLayerEx.h"
+
+#include "arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h"
+#include "arm_compute/runtime/IRuntimeContext.h"
+#include "support/ToolchainSupport.h"
+
+namespace arm_compute
+{
+NEActivationLayerEx::NEActivationLayerEx(IRuntimeContext *ctx) // NOLINT
+ : INESimpleFunctionNoBorder(ctx)
+{
+}
+void NEActivationLayerEx::configure(ITensor *input, ITensor *output,
+ ActivationLayerInfo activation_info)
+{
+ auto k = arm_compute::support::cpp14::make_unique<NEActivationLayerKernelEx>();
+ k->configure(input, output, activation_info);
+ _kernel = std::move(k);
+}
+
+Status NEActivationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ActivationLayerInfo &act_info)
+{
+ return NEActivationLayerKernelEx::validate(input, output, act_info);
+}
+} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp
deleted file mode 100644
index 5ba465b61..000000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEArgMinMax.h"
-
-#include "arm_compute/core/CPP/Validate.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-namespace arm_compute
-{
-
-template <ReductionOperation OP>
-NEArgMinMaxStatic<OP>::NEArgMinMaxStatic(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reduction_kernel(), _reduced_out(), _reshape()
-{
-}
-
-template <ReductionOperation OP>
-Status NEArgMinMaxStatic<OP>::validate(const ITensorInfo *input, int axis,
- const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
- DataType::F32);
-
- TensorShape out_shape = input->tensor_shape();
- const int input_dims = input->num_dimensions();
- int axis_local = axis;
-
- // Convert negative axis
- axis_local = wrap_around(axis_local, input_dims);
-
- ARM_COMPUTE_RETURN_ERROR_ON(axis_local > 3);
- ARM_COMPUTE_RETURN_ERROR_ON(static_cast<unsigned int>(axis_local) > input->num_dimensions() - 1);
- out_shape.remove_dimension(axis_local);
-
- const TensorInfo out_info = output->clone()->set_tensor_shape(out_shape);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info);
-
- return Status{};
-}
-
-template <ReductionOperation OP>
-void NEArgMinMaxStatic<OP>::configure(ITensor *input, int axis, ITensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input);
-
- int axis_local = axis;
- const int input_dims = input->info()->num_dimensions();
-
- // Convert negative axis
- axis_local = wrap_around(axis_local, input_dims);
-
- // Perform reduction for axis
- TensorShape intermediate_shape = input->info()->tensor_shape();
- intermediate_shape.set(axis_local, 1);
- auto in = input;
-
- _reduced_out.allocator()->init(TensorInfo(intermediate_shape, output->info()->num_channels(),
- output->info()->data_type(),
- output->info()->quantization_info()));
- _memory_group.manage(&_reduced_out);
- _reduction_kernel.configure(in, axis_local, &_reduced_out, OP);
-
- // Allocate intermediate tensor
- _reduced_out.allocator()->allocate();
-
- // Configure reshape layer if we want to drop the dimensions
- TensorShape out_shape = input->info()->tensor_shape();
- out_shape.remove_dimension(axis_local);
- auto_init_if_empty(*output->info(), output->info()->clone()->set_tensor_shape(out_shape));
- _reshape.configure(&_reduced_out, output);
-}
-
-template <ReductionOperation OP> void NEArgMinMaxStatic<OP>::run()
-{
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- _reduction_kernel.run();
- _reshape.run();
-}
-
-// Supported Specializations
-template class NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MAX>;
-template class NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MIN>;
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
index 7c15fc453..e42c453cf 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h"
#include <arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h>
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp
index f2490e4e8..dc5c62061 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp
index db419e3a8..5ec0b8677 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
index 00c3ed94f..53fb15081 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h"
#include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
index d604fedbf..f45773251 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h"
#include "arm_compute/core/Helpers.h"
@@ -154,7 +170,7 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor
// Multiply scale
_multiply_scale_kernel.configure(&_gemmlowp_output, &_scale_factor, output,
- weights->info()->quantization_info().scale);
+ weights->info()->quantization_info().uniform().scale);
_are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
@@ -220,7 +236,7 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
ARM_COMPUTE_RETURN_ON_ERROR(NEMultiplyScaleFactorKernel::validate(
- &gemmlowp_output, &scale_factor, output, weights->quantization_info().scale));
+ &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale));
return Status{};
}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
index a944f699a..cb7557a5a 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h"
#include "arm_compute/core/Helpers.h"
@@ -46,10 +62,10 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
// Since we need negative offsets for computing convolution, we need to change
// QuantizationInfo()
// Extract and negate input and weights offset
- const QuantizationInfo input_quantization_info(input.quantization_info().scale,
- -input.quantization_info().offset);
- const QuantizationInfo weights_quantization_info(weights.quantization_info().scale,
- -weights.quantization_info().offset);
+ const QuantizationInfo input_quantization_info(input.quantization_info().uniform().scale,
+ -input.quantization_info().uniform().offset);
+ const QuantizationInfo weights_quantization_info(weights.quantization_info().uniform().scale,
+ -weights.quantization_info().uniform().offset);
// Validate gemmlowp function
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(
@@ -88,10 +104,10 @@ void NEFullyConnectedLayerEx::configure_mm(const ITensor *input, const ITensor *
const QuantizationInfo input_quantization_info = input->info()->quantization_info();
const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
- input->info()->set_quantization_info(
- QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
- weights->info()->set_quantization_info(
- QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
+ input->info()->set_quantization_info(QuantizationInfo(
+ input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ weights->info()->set_quantization_info(QuantizationInfo(
+ weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
// Configure gemmlowp function
_mm_gemmlowp.configure(input, weights, nullptr, output);
@@ -236,15 +252,16 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei
// Configure output stage for asymmetric quantized types
if (_is_quantized)
{
- float multiplier = input->info()->quantization_info().scale *
- weights->info()->quantization_info().scale /
- output->info()->quantization_info().scale;
+ float multiplier = input->info()->quantization_info().uniform().scale *
+ weights->info()->quantization_info().uniform().scale /
+ output->info()->quantization_info().uniform().scale;
int output_multiplier;
int output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier,
&output_shift);
_gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier,
- output_shift, output->info()->quantization_info().offset);
+ output_shift,
+ output->info()->quantization_info().uniform().offset);
_gemmlowp_output.allocator()->allocate();
}
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp
index 11794a1ea..1290cfd39 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h"
#include "arm_compute/core/Error.h"
@@ -50,7 +66,7 @@ NEGEMMLowpMatrixMultiplyCoreEx::NEGEMMLowpMatrixMultiplyCoreEx(
_tmp_b(), _mm_result_s32(), _signed_a(), _signed_output(), _original_b(nullptr), _a_offset(0),
_b_offset(0), _run_vector_matrix_multiplication(false), _assembly_path(false),
_fused_assembly_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false),
- _fuse_output_stage(false), _run_activation(false), _flip_signedness(false)
+ _fuse_output_stage(false), _flip_signedness(false)
{
}
@@ -71,8 +87,8 @@ void NEGEMMLowpMatrixMultiplyCoreEx::configure(const ITensor *a, const ITensor *
_mtx_b_reshape_kernel = nullptr;
// Set internal variables
- _a_offset = a->info()->quantization_info().offset;
- _b_offset = b->info()->quantization_info().offset;
+ _a_offset = a->info()->quantization_info().uniform().offset;
+ _b_offset = b->info()->quantization_info().uniform().offset;
_run_vector_matrix_multiplication = a->info()->dimension(1) < 2;
_reshape_b_only_on_first_run = info.reshape_b_only_on_first_run();
_is_prepared = false;
@@ -91,7 +107,6 @@ void NEGEMMLowpMatrixMultiplyCoreEx::configure(const ITensor *a, const ITensor *
}
#ifdef __aarch64__
-#if 0 // Can use after arm compute library v19.11
switch (a->info()->data_type())
{
case DataType::QASYMM8:
@@ -119,8 +134,6 @@ void NEGEMMLowpMatrixMultiplyCoreEx::configure(const ITensor *a, const ITensor *
break;
}
}
-#endif // 0
- ARM_COMPUTE_ERROR("aarch64 not supported");
#endif /* __aarch64__ */
if (!(_assembly_path || _run_vector_matrix_multiplication))
{
@@ -277,8 +290,8 @@ Status NEGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITen
TensorInfo tmp_b_info{};
TensorInfo mm_result_s32_info{};
- int32_t a_offset = a->quantization_info().offset;
- int32_t b_offset = b->quantization_info().offset;
+ int32_t a_offset = a->quantization_info().uniform().offset;
+ int32_t b_offset = b->quantization_info().uniform().offset;
bool fuse_output_stage = info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE;
if (fuse_output_stage)
@@ -291,19 +304,16 @@ Status NEGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITen
// Check if we need to run the optimized assembly kernel
bool run_optimised = false;
bool run_optimised_requantized = false;
- const bool reshape_b_only_on_first_run = info.reshape_b_only_on_first_run();
if (a_to_use->data_type() == DataType::QASYMM8 &&
info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
{
- run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, output, 1.f, 0.f,
- reshape_b_only_on_first_run));
+ run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, c, output, gemm_info));
run_optimised_requantized = run_optimised;
}
else
{
run_optimised = bool(NEGEMMAssemblyDispatch::validate(
- a_to_use, b, fuse_output_stage ? &mm_result_s32_info : output, 1.f, 0.f,
- reshape_b_only_on_first_run));
+ a_to_use, b, c, fuse_output_stage ? &mm_result_s32_info : output, gemm_info));
}
if (run_optimised)
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
index 90dabb35a..c8bb88aea 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEGatherEx.h"
#include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
index 624185d2c..078019f4e 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp
@@ -1,6 +1,5 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,6 +13,31 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+/*
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
#include "arm_compute/runtime/NEON/functions/NEHashtableLookup.h"
#include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp
index 1c2c8f027..16d74e62d 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h"
#include "arm_compute/core/Helpers.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp
index 1150cef76..dac3b849d 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEPReLU.h"
#include "arm_compute/core/NEON/kernels/NEPReLUKernel.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp
index 84411c266..0e9a5e969 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp
index c65e93570..116bba3c0 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEReduceMeanEx.h"
#include "arm_compute/core/Helpers.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
index b36f8287a..aedb537e9 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,11 +37,14 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEReduceOperation.h"
#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/runtime/Tensor.h"
using namespace arm_compute;
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
index 3c18217ef..26a887912 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEReduceSum.h"
#include "arm_compute/core/CPP/Validate.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp
index c3431c418..2aa0d2d4b 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NEReductionOperationEx.h"
#include "arm_compute/core/Helpers.h"
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp
index c9f914fb0..198bb7672 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp
index b6ae21cc0..97697e3ea 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
index fd15ef05f..df0689273 100644
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
@@ -1,5 +1,20 @@
/*
* Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
* Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
@@ -22,6 +37,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
#include "arm_compute/runtime/NEON/functions/NETransposeConvLayer.h"
#include "arm_compute/core/Helpers.h"