diff options
author | Chunseok Lee <chunseok.lee@samsung.com> | 2020-04-23 14:45:49 +0900 |
---|---|---|
committer | Chunseok Lee <chunseok.lee@samsung.com> | 2020-04-23 14:45:49 +0900 |
commit | e2ef8438a24f7c56a0744eb579a6e293ee2fbf8e (patch) | |
tree | 44a1a7951d168dd4370e13593ed03f4bc6d920c5 /compute/ARMComputeEx/src | |
parent | 302e6564a7a76109e1178207e44e45a58631c477 (diff) | |
download | nnfw-e2ef8438a24f7c56a0744eb579a6e293ee2fbf8e.tar.gz nnfw-e2ef8438a24f7c56a0744eb579a6e293ee2fbf8e.tar.bz2 nnfw-e2ef8438a24f7c56a0744eb579a6e293ee2fbf8e.zip |
Imported Upstream version 1.4.0upstream/1.4.0submit/tizen/20200423.054851
Diffstat (limited to 'compute/ARMComputeEx/src')
107 files changed, 5647 insertions, 278 deletions
diff --git a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp index 7d4760600..191a5bc2a 100644 --- a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp +++ b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/CLKernelLibraryEx.h" @@ -53,13 +69,16 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map {"gather_ex", "gather_ex.cl"}, {"gather_ex_1d", "gather_ex.cl"}, {"gather_ex_1d_out", "gather_ex.cl"}, + {"gemmlowp_mm_midgard_ex", "gemmlowp_ex.cl"}, {"hashtable_lookup", "hashtable_lookup.cl"}, {"instance_normalization_ex", "instance_normalization_ex.cl"}, + {"multiply_scale_factor", "multiply_scale_factor.cl"}, {"neg_tensor", "neg_tensor.cl"}, {"permute_generic", "permute_ex.cl"}, {"pixelwise_mul_qasymm8", "pixelwise_mul_quantized.cl"}, {"prelu", "prelu.cl"}, {"prelu_qasymm8", "prelu_quantized.cl"}, + {"quantization_symm8", "quantization_symm8.cl"}, {"reduce_min_max", "reduce_operation.cl"}, {"reduce_sum_mean", "reduce_operation.cl"}, {"topkv2_init", "topkv2.cl"}, @@ -71,6 +90,7 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map {"radixsort_pastehistograms", "topkv2_radixsort.cl"}, {"radixsort_reorder", "topkv2_radixsort.cl"}, {"topkv2_quicksort", "topkv2_quicksort.cl"}, + {"scale_factor_symm8", "scale_factor.cl"}, {"space_to_batch_4d_nchw", "space_to_batch.cl"}, {"space_to_batch_4d_nhwc", "space_to_batch.cl"}, {"space_to_depth_nchw", "space_to_depth.cl"}, @@ -100,6 +120,10 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map #include "./cl_kernels/gather_ex.clembed" }, { + "gemmlowp_ex.cl", +#include "./cl_kernels/gemmlowp_ex.clembed" + }, + { "hashtable_lookup.cl", #include "./cl_kernels/hashtable_lookup.clembed" }, @@ -120,6 +144,10 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map #include "./cl_kernels/binary_logical_op.clembed" }, { + "multiply_scale_factor.cl", +#include "./cl_kernels/multiply_scale_factor.clembed" + }, + { "neg_tensor.cl", #include "./cl_kernels/neg_tensor.clembed" }, @@ -132,10 +160,18 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_program_source_map #include "./cl_kernels/prelu_quantized.clembed" }, { + "quantization_symm8.cl", +#include "./cl_kernels/quantization_symm8.clembed" + }, + { "reduce_operation.cl", #include "./cl_kernels/reduce_operation.clembed" }, { + "scale_factor.cl", +#include "./cl_kernels/scale_factor.clembed" + }, + { "space_to_batch.cl", #include "./cl_kernels/space_to_batch.clembed" }, @@ -180,7 +216,7 @@ Kernel CLKernelLibraryEx::create_kernel(const std::string &kernel_name, if (_kernel_program_map.end() == kernel_program_it) { - ARM_COMPUTE_ERROR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str()); + ARM_COMPUTE_ERROR_VAR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str()); } std::string concat_str; @@ -261,7 +297,7 @@ const Program &CLKernelLibraryEx::load_program(const std::string &program_name) if (_program_source_map.end() == program_source_it) { - ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str()); + ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str()); } program = Program(_context, program_name, program_source_it->second); @@ -282,7 +318,7 @@ const Program &CLKernelLibraryEx::load_program(const std::string &program_name) } else { - ARM_COMPUTE_ERROR("Kernel file %s does not exist.", source_name.c_str()); + ARM_COMPUTE_ERROR_VAR("Kernel file %s does not exist.", source_name.c_str()); } #endif /* EMBEDDED_KERNELS */ @@ -315,7 +351,7 @@ std::string CLKernelLibraryEx::get_program_source(const std::string &program_nam if (program_source_it == _program_source_map.end()) { - ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str()); + ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str()); } return program_source_it->second; diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl index 2a6dfc91f..03717cfe9 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/arg_operation.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "helpers.h" #if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl index 77e239f55..f74c1c103 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/arithmetic_op_quantized.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "helpers_asymm.h" #ifdef SATURATE diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl index 8c875516d..e249663bc 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/binary_logical_op.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "helpers.h" #ifndef VEC_SIZE diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl index 2342fda9f..4147a0017 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/cast.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "helpers.h" #ifndef SCALE diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl index e005322f7..0285c955b 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/depth_to_space.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "helpers.h" #if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE) && defined(Z_OUT) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl index dd8cb6d93..92e5dfbee 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/embedding_lookup.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "helpers.h" #ifndef VEC_SIZE diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/gather_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/gather_ex.cl index 09f776156..2236021f1 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/gather_ex.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/gather_ex.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "helpers.h" #if defined(DATA_TYPE) && defined(AXIS) && defined(INDICES_DIM) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl new file mode 100644 index 000000000..80ba73d1d --- /dev/null +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/gemmlowp_ex.cl @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "helpers.h" + +#if defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_Y) && \ + defined(COLS_A) +#define VECTOR_CHAR VEC_DATA_TYPE(char, NUM_ELEMS_PROCESSED_PER_THREAD_X) +#define VECTOR_INT VEC_DATA_TYPE(int, NUM_ELEMS_PROCESSED_PER_THREAD_X) +#define VECTOR_FLOAT VEC_DATA_TYPE(float, NUM_ELEMS_PROCESSED_PER_THREAD_X) +/** This OpenCL kernel computes the matrix multiplication between matrix A (src0) and matrix B + * (src1) in case both matrices have not beed reshaped + * + * @attention The number of matrix A columns needs to be passed at compile time using -DCOLS_A + * + * @note In case the input or output have to be reinterpreted as a 3D tensor, the following + * information must be passed at compile time: + * -# REINTERPRET_INPUT_AS_3D: To reinterpret the input as 3D + * -# REINTERPRET_OUTPUT_AS_3D: To reinterpret the output as 3D + * -# HEIGHT_GEMM3D: The height of the output in case it has to be reinterpreted as a 3D + * tensor. + * -# DEPTH_GEMM3D: The depth of the output in case it has to be reinterpreted as a 3D tensor + * (HEIGHT_GEMM3D * DEPTH_GEMM3D) = columns matrix A NOT reshaped + * + * @param[in] src0_ptr Pointer to the source matrix. Supported data type: + * QASYMM8 + * @param[in] src0_stride_x Stride of the source matrix in X dimension (in + * bytes) + * @param[in] src0_step_x src_stride_x * number of elements along X + * processed per workitem(in bytes) + * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in + * bytes) + * @param[in] src0_step_y src_stride_y * number of elements along Y + * processed per workitem(in bytes) + * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source + * matrix + * @param[in] src1_ptr Pointer to the source matrix. Supported data type: + * same as @p src0_ptr + * @param[in] src1_stride_x Stride of the source matrix in X dimension (in + * bytes) + * @param[in] src1_step_x src_stride_x * number of elements along X + * processed per workitem(in bytes) + * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in + * bytes) + * @param[in] src1_step_y src_stride_y * number of elements along Y + * processed per workitem(in bytes) + * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source + * matrix + * @param[out] dst_ptr Pointer to the destination matrix Supported data + * type: S32 + * @param[in] dst_stride_x Stride of the destination matrix in X dimension + * (in bytes) + * @param[in] dst_step_x dst_gx_stride_x * number of elements along X + * processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination matrix in Y dimension + * (in bytes) + * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y + * processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination + * matrix + * @param[in] src0_stride_z Stride of the source matrix in Z dimension (in + * bytes) + * @param[in] src1_stride_z Stride of the source matrix in Z dimension (in + * bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension + * (in bytes) + * @param[in] src_cross_plane_pad (Optional) Bottom paddings in unit of elements for + * the input tensor (only if defined REINTERPRET_INPUT_AS_3D) + * @param[in] dst_cross_plane_pad (Optional) Bottom paddings in unit of elements for + * the output tensor (only if defined REINTERPRET_OUTPUT_AS_3D) + */ +__kernel void gemmlowp_mm_midgard_ex(IMAGE_DECLARATION(src0), IMAGE_DECLARATION(src1), + IMAGE_DECLARATION(dst), uint src0_stride_z, uint src1_stride_z, + uint dst_stride_z +#if defined(REINTERPRET_INPUT_AS_3D) + , + uint src_cross_plane_pad +#endif // REINTERPRET_INPUT_AS_3D +#if defined(REINTERPRET_OUTPUT_AS_3D) + , + uint dst_cross_plane_pad +#endif // REINTERPRET_OUTPUT_AS_3D + ) +{ + int idx = get_global_id(0) * NUM_ELEMS_PROCESSED_PER_THREAD_X; + + // Compute starting address for matrix A and Matrix B + int2 src_addr = ((int2)(src0_offset_first_element_in_bytes, src1_offset_first_element_in_bytes)); + + // Update address for the matrix A + src_addr.s0 += get_global_id(1) * src0_stride_y * NUM_ELEMS_PROCESSED_PER_THREAD_Y; + + // Update address for the matrix B + src_addr.s1 += idx; + +#if defined(REINTERPRET_INPUT_AS_3D) + // Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across + // the z dimension + // in order to take into account the presence of possible cross plane paddings + // + // | | + // | plane0 | + // | | + // |__________________| + // |******************| + // | cross_plane_pad | + // |******************| + // | | + // | plane1 | + // | | + // |__________________| + + // The plane (zin) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y) + // by HEIGHT_GEMM3D + uint4 zin = ((uint4)(0, 1, 2, 3) + (uint4)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) / + (uint4)HEIGHT_GEMM3D; + zin = min(DEPTH_GEMM3D - 1, zin); + + // Add offset due to the cross plane paddings + zin *= (src_cross_plane_pad * src0_stride_y); + + // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we + // multiply src0_stride_z by DEPTH_GEMM3D + src_addr.s0 += get_global_id(2) * src0_stride_z * DEPTH_GEMM3D; + +#else // defined(REINTERPRET_INPUT_AS_3D) + + // Add offset for batched GEMM + src_addr.s0 += get_global_id(2) * src0_stride_z; + +#endif // defined(REINTERPRET_INPUT_AS_3D) + +#if defined(MATRIX_B_DEPTH) + // Do not slide matrix B if the matrix B has 3 dimensions and matrix A more than 3 + src_addr.s1 += (get_global_id(2) % MATRIX_B_DEPTH) * src1_stride_z; +#else // defined(MATRIX_B_DEPTH) + src_addr.s1 += get_global_id(2) * src1_stride_z; +#endif // defined(MATRIX_B_DEPTH) + + int end_row_vec_a = src_addr.s0 + COLS_A; + + VECTOR_INT acc0 = 0; +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 + VECTOR_INT acc1 = 0; +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 + VECTOR_INT acc2 = 0; +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + VECTOR_INT acc3 = 0; +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 + VECTOR_INT acc4 = 0; +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 + + for (; src_addr.s0 <= (end_row_vec_a - 2); src_addr += (int2)(2, 2 * src1_stride_y)) + { + // Load values from matrix A + char2 a0 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y)); +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 + char2 a1 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y)); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 + char2 a2 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y)); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + char2 a3 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y)); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 + char2 a4 = vload2(0, (__global char *)(src0_ptr + src_addr.s0 + 4 * src0_stride_y)); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 + // Load values from matrix B + VECTOR_CHAR b0 = + VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1)); + VECTOR_CHAR b1 = VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)( + 0, (__global char *)(src1_ptr + src_addr.s1 + src1_stride_y)); + + // Accumulate + acc0 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a0.s0; + acc0 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a0.s1; +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 + acc1 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a1.s0; + acc1 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a1.s1; +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 + acc2 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a2.s0; + acc2 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a2.s1; +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + acc3 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a3.s0; + acc3 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a3.s1; +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 + acc4 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a4.s0; + acc4 += CONVERT(b1, VECTOR_INT) * (VECTOR_INT)a4.s1; +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 + } + + for (; src_addr.s0 < end_row_vec_a; src_addr += (int2)(1, src1_stride_y)) + { + // Load values from matrix A + char a0 = *(__global char *)(src0_ptr + src_addr.s0 + 0 * src0_stride_y); +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 + char a1 = *(__global char *)(src0_ptr + src_addr.s0 + 1 * src0_stride_y); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 + char a2 = *(__global char *)(src0_ptr + src_addr.s0 + 2 * src0_stride_y); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + char a3 = *(__global char *)(src0_ptr + src_addr.s0 + 3 * src0_stride_y); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 + char a4 = *(__global char *)(src0_ptr + src_addr.s0 + 4 * src0_stride_y); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 + // Load values from matrix B + VECTOR_CHAR b0 = + VLOAD(NUM_ELEMS_PROCESSED_PER_THREAD_X)(0, (__global char *)(src1_ptr + src_addr.s1)); + + // Accumulate + acc0 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a0; +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 + acc1 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a1; +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 + acc2 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a2; +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + acc3 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a3; +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 + acc4 += CONVERT(b0, VECTOR_INT) * (VECTOR_INT)a4; +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 + } + + const int z = get_global_id(2); + + // Compute destination address + Image dst = CONVERT_TO_IMAGE_STRUCT(dst); + +#if defined(REINTERPRET_OUTPUT_AS_3D) + // Since we store a 2D output tile in a 3D tensor, we need to check when the plane changes across + // the z dimension + // in order to take into account the presence of possible cross plane paddings + // + // | | + // | plane0 | + // | | + // |__________________| + // |******************| + // | cross_plane_pad | + // |******************| + // | | + // | plane1 | + // | | + // |__________________| + + // The plane (zout) is calculated dividing M (get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y) + // by HEIGHT_GEMM3D + uint8 zout = ((uint8)(0, 1, 2, 3, 4, 5, 6, 7) + + (uint8)(get_global_id(1) * NUM_ELEMS_PROCESSED_PER_THREAD_Y)) / + (uint8)HEIGHT_GEMM3D; + zout = min(DEPTH_GEMM3D - 1, zout); + + // Add offset due to the cross plane paddings + zout *= (dst_cross_plane_pad * dst_stride_y); + + // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we + // multiply dst_stride_z by DEPTH_GEMM3D + dst.ptr += z * dst_stride_z * DEPTH_GEMM3D; + + // Store the result + VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) + (CONVERT(acc0, VECTOR_INT), 0, (__global int *)(dst.ptr + 0 * dst_stride_y + zout.s0)); +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 + VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) + (CONVERT(acc1, VECTOR_INT), 0, (__global int *)(dst.ptr + 1 * dst_stride_y + zout.s1)); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 + VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) + (CONVERT(acc2, VECTOR_INT), 0, (__global int *)(dst.ptr + 2 * dst_stride_y + zout.s2)); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) + (CONVERT(acc3, VECTOR_INT), 0, (__global int *)(dst.ptr + 3 * dst_stride_y + zout.s3)); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 + VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) + (CONVERT(acc4, VECTOR_INT), 0, (__global int *)(dst.ptr + 4 * dst_stride_y + zout.s4)); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 + +#else // defined(REINTERPRET_OUTPUT_AS_3D) + // Add offset for batched GEMM + dst.ptr += z * dst_stride_z; + + // Store the result + VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) + (CONVERT(acc0, VECTOR_INT), 0, (__global int *)(dst.ptr + 0 * dst_stride_y)); +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 + VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) + (CONVERT(acc1, VECTOR_INT), 0, (__global int *)(dst.ptr + 1 * dst_stride_y)); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 + VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) + (CONVERT(acc2, VECTOR_INT), 0, (__global int *)(dst.ptr + 2 * dst_stride_y)); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) + (CONVERT(acc3, VECTOR_INT), 0, (__global int *)(dst.ptr + 3 * dst_stride_y)); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 + VSTORE(NUM_ELEMS_PROCESSED_PER_THREAD_X) + (CONVERT(acc4, VECTOR_INT), 0, (__global int *)(dst.ptr + 4 * dst_stride_y)); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 4 +#endif // defined(REINTERPRET_OUTPUT_AS_3D) +} +#endif // defined(NUM_ELEMS_PROCESSED_PER_THREAD_X) && defined(NUM_ELEMS_PROCESSED_PER_THREAD_Y) && + // defined(COLS_A) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl index 73f29e3e5..a4f7dbd48 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/hashtable_lookup.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "helpers.h" #ifndef VEC_SIZE diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h index 0e123ae0a..2d0b6a299 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers.h @@ -1,4 +1,20 @@ /* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT @@ -21,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #ifndef ARM_COMPUTE_HELPER_H #define ARM_COMPUTE_HELPER_H diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h index c39138caa..a83b1a8a5 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/helpers_asymm.h @@ -1,4 +1,20 @@ /* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT @@ -21,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #ifndef ARM_COMPUTE_HELPERS_ASYMM_H #define ARM_COMPUTE_HELPERS_ASYMM_H @@ -403,4 +420,4 @@ ASYMM_RESCALE_IMPL(4) ASYMM_RESCALE_IMPL(8) ASYMM_RESCALE_IMPL(16) -#endif // ARM_COMPUTE_HELPERS_ASYMM_H
\ No newline at end of file +#endif // ARM_COMPUTE_HELPERS_ASYMM_H diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl index 1d96150f8..014842680 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/instance_normalization_ex.cl @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "helpers.h" #if defined(VEC_SIZE) && defined(DATA_TYPE) && defined(EPSILON) && defined(DIM_X) && \ diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl new file mode 100644 index 000000000..3943fc4c2 --- /dev/null +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/multiply_scale_factor.cl @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "helpers.h" + +#if defined(VEC_SIZE) && defined(DATA_TYPE) + +/** This performs to multiply input by scale_factor. + * + * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. + * -DDATA_TYPE=float + * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. + * -DVEC_SIZE=16 + * @note Quantization scale of input tensor is passed in with -DSCALE=scale. + * + * @param[in] input_ptr Pointer to the source tensor. Supported data + * types: S8 + * @param[in] input_stride_x Stride of the source tensor in X dimension (in + * bytes) + * @param[in] input_step_x input_stride_x * number of elements along X + * processed per workitem(in bytes) + * @param[in] input_stride_y Stride of the source tensor in Y dimension (in + * bytes) + * @param[in] input_step_y input_stride_y * number of elements along Y + * processed per workitem(in bytes) + * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source + * tensor + * @param[in] scale_ptr Pointer to the source tensor. Supported data + * types: S32 + * @param[in] scale_stride_x Stride of the source tensor in X dimension (in + * bytes) + * @param[in] scale_step_x scale_stride_x * number of elements along X + * processed per workitem(in bytes) + * @param[in] scale_offset_first_element_in_bytes The offset of the first element in the scale + * tensor + * @param[out] output_ptr Pointer to the destination tensor. Supported + * data types: F16/F32 + * @param[in] output_stride_x Stride of the destination tensor in X dimension + * (in bytes) + * @param[in] output_step_x output_stride_x * number of elements along X + * processed per workitem(in bytes) + * @param[in] output_stride_y Stride of the destination tensor in Y dimension + * (in bytes) + * @param[in] output_step_y output_stride_y * number of elements along Y + * processed per workitem(in bytes) + * @param[in] output_offset_first_element_in_bytes The offset of the first element in the + * destination tensor + */ +__kernel void multiply_scale_factor(IMAGE_DECLARATION(input), VECTOR_DECLARATION(scale), + IMAGE_DECLARATION(output), float multiplier) +{ + // Get pixels pointer + Image input = CONVERT_TO_IMAGE_STRUCT(input); + Image output = CONVERT_TO_IMAGE_STRUCT(output); + +#if defined(VEC_SIZE) && defined(LAST_ACCESSED_X) + // Check if access on width gets out of bounds + // If it does shift access vector to access elements within bounds + const int xi = (int)(get_global_id(0) * VEC_SIZE); + input.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * input_stride_x; + output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x; + + // Load data + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + val = CONVERT(VLOAD(VEC_SIZE)(0, (__global int *)input.ptr), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)); + + // Create scale vector + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + vscale = *(((__global DATA_TYPE *)(scale_ptr)) + get_global_id(1)); + + // Dequantize + vscale *= (DATA_TYPE)(multiplier); + val *= vscale; + + // Store result + VSTORE(VEC_SIZE) + (val, 0, (__global DATA_TYPE *)output.ptr); +#else // !defined(VEC_SIZE) || !defined(LAST_ACCESSED_X) + *((__global DATA_TYPE *)(output.ptr)) = + ((DATA_TYPE)(*((__global int *)(input.ptr)))) * + *(((__global DATA_TYPE *)(scale_ptr)) + get_global_id(1)) * (DATA_TYPE)(multiplier); +#endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X) +} + +#endif // defined(VEC_SIZE) && defined(DATA_TYPE) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl index 4aa7883c3..15c16f80c 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/neg_tensor.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "helpers.h" #ifndef VEC_SIZE diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl index 2074d3ceb..76fda9041 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_mul_quantized.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "helpers_asymm.h" #ifdef SATURATE diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl index 62a8901f6..12c8eeb79 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "helpers.h" #ifndef VEC_SIZE diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl index 5e0abd585..a66e107d1 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/prelu_quantized.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "helpers.h" #define SUB(x, y) (x) - (y) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl new file mode 100644 index 000000000..4ae9adb0b --- /dev/null +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/quantization_symm8.cl @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "helpers.h" + +#define CONVERT_RTE(x, type) (convert_##type##_rte((x))) +#define CONVERT_RTE_VEC_STR(x, type, size) (convert_##type##size##_rte((x))) +#define CONVERT_RTE_VEC(x, type, size) CONVERT_RTE_VEC_STR(x, type, size) +#define MIN_QUANT_VAL -127 +#define MAX_QUANT_VAL 127 + +#if defined(VEC_SIZE) && defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT) + +/** This performs the quantization of floating point inputs to 8-bit unsigned integers. + * + * @note Input data type should be given as a preprocessor argument using -DDATA_TYPE_IN=type. e.g. + * -DDATA_TYPE=short + * @note Output data type should be given as a preprocessor argument using -DDATA_TYPE_OUT=type. + * e.g. -DDATA_TYPE=short + * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. + * -DVEC_SIZE=16 + * @note Quantization scale should be given as a preprocessor argument using -DSCALE=scale. e.g. + * -DSCALE=0.125 + * @note Quantization offset should be given as a preprocessor argument using -DOFFSET=offset. e.g. + * -DOFFSET=125 + * @note Minimum value for quantized type should be given as a preprocessor argument using + * -DMIN_QUANT_VAL=value. e.g. -DMIN_QUANT_VAL=0 + * @note Maximum value for quantized type should be given as a preprocessor argument using + * -DMAX_QUANT_VAL=value. e.g. -DMAXIN_QUANT_VAL=255 + * + * @param[in] input_ptr Pointer to the source tensor. Supported data + * types: F32 + * @param[in] input_stride_x Stride of the source tensor in X dimension (in + * bytes) + * @param[in] input_step_x input_stride_x * number of elements along X + * processed per workitem(in bytes) + * @param[in] input_stride_y Stride of the source tensor in Y dimension (in + * bytes) + * @param[in] input_step_y input_stride_y * number of elements along Y + * processed per workitem(in bytes) + * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source + * tensor + * @param[out] output_ptr Pointer to the destination tensor. Supported + * data types: S8 + * @param[in] output_stride_x Stride of the destination tensor in X dimension + * (in bytes) + * @param[in] output_step_x output_stride_x * number of elements along X + * processed per workitem(in bytes) + * @param[in] output_stride_y Stride of the destination tensor in Y dimension + * (in bytes) + * @param[in] output_step_y output_stride_y * number of elements along Y + * processed per workitem(in bytes) + * @param[in] output_offset_first_element_in_bytes The offset of the first element in the + * destination tensor + * @param[out] scale_ptr Pointer to the scale tensor. Supported data + * types: F32 + * @param[in] scale_stride_x Stride of the destination tensor in X dimension + * (in bytes) + * @param[in] scale_step_x scale_stride_x * number of elements along X + * processed per workitem(in bytes) + */ +__kernel void quantization_symm8(IMAGE_DECLARATION(input), VECTOR_DECLARATION(scale), + IMAGE_DECLARATION(output)) +{ + // Get pixels pointer + Image input = CONVERT_TO_IMAGE_STRUCT(input); + Image output = CONVERT_TO_IMAGE_STRUCT(output); + +#if defined(VEC_SIZE) && defined(LAST_ACCESSED_X) + // Check if access on width gets out of bounds + // If it does shift access vector to access elements within bounds + const int xi = (int)(get_global_id(0) * VEC_SIZE); + input.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * input_stride_x; + output.ptr -= max(xi - (int)LAST_ACCESSED_X, 0) * output_stride_x; + + // Load data + VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE) + val = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE_IN *)input.ptr); + + // Create scale vector + const VEC_DATA_TYPE(DATA_TYPE_IN, VEC_SIZE) vscale = + *(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1)); + + // Quantize + VEC_DATA_TYPE(int, VEC_SIZE) + res = CLAMP(CONVERT_RTE_VEC(val / vscale, int, VEC_SIZE), MIN_QUANT_VAL, MAX_QUANT_VAL); + + // Store result + VSTORE(VEC_SIZE) + (CONVERT(res, VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)), 0, (__global DATA_TYPE_OUT *)output.ptr); +#else //! defined(VEC_SIZE) || !defined(LAST_ACCESSED_X) + *((__global DATA_TYPE_OUT *)(output.ptr)) = (DATA_TYPE_OUT)CLAMP( + CONVERT_RTE((*(__global DATA_TYPE_IN *)input.ptr) / + (*(((__global DATA_TYPE_IN *)(scale_ptr)) + get_global_id(1))), + int), + MIN_QUANT_VAL, MAX_QUANT_VAL); +#endif // defined(VEC_SIZE) && defined(LAST_ACCESSED_X) +} +#endif // defined(VEC_SIZE) && defined(DATA_TYPE_IN) && defined(DATA_TYPE_OUT) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl index d7ea2e2c4..832ac1270 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/reduce_operation.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "helpers.h" #if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(OP_CODE) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/scale_factor.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/scale_factor.cl new file mode 100644 index 000000000..3d5e90356 --- /dev/null +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/scale_factor.cl @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "helpers.h" + +#if defined(WIDTH) +/** This function identifies the min and maximum value of an input 3D tensor. + * + * @note The width, height and depth of the input tensor must be provided at compile time using + * -DWIDTH, -DHEIGHT and -DDEPTH (e.g. -DWIDTH=320, -DHEIGHT=240, -DDEPTH=3) + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: + * F32 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed + * per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed + * per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[in] dst_ptr Pointer to the min/max vector. Minimum value in + * position 0, maximum value in position 1. Supported data types: F32. + * @param[in] dst_stride_x Stride of the min/max vector in X dimension (in + * bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed + * per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the min/max + * vector + */ +__kernel void scale_factor_symm8(IMAGE_DECLARATION(src), VECTOR_DECLARATION(dst)) +{ + Image src = CONVERT_TO_IMAGE_STRUCT(src); + + float4 min_value = (float4)FLT_MAX; + float4 max_value = (float4)-FLT_MAX; + + int x = 0; + __global float *src_addr = (__global float *)(src.ptr); + + for (; x <= (int)(WIDTH - 8); x += 8) + { + float8 value = vload8(0, (__global float *)(src_addr + x)); + + min_value = select(value.s0123, min_value, min_value < value.s0123); + min_value = select(value.s4567, min_value, min_value < value.s4567); + + max_value = select(value.s0123, max_value, max_value > value.s0123); + max_value = select(value.s4567, max_value, max_value > value.s4567); + } + + for (; x < WIDTH; ++x) + { + float value = *(src_addr + x); + + min_value.s0 = min(min_value.s0, value); + max_value.s0 = max(max_value.s0, value); + } + + // Perform min/max reduction + min_value.s01 = min(min_value.s01, min_value.s23); + min_value.s0 = min(min_value.s0, min_value.s1); + max_value.s01 = max(max_value.s01, max_value.s23); + max_value.s0 = max(max_value.s0, max_value.s1); + + // Extract scale + max_value.s0 = max(fabs(min_value.s0), fabs(max_value.s0)) / 127.0f; + + // Store min and max + *((__global float *)(dst_ptr) + get_global_id(1)) = max_value.s0; +} +#endif // defined(WIDTH) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl index 7367da7fb..b1611043b 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_batch.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "helpers.h" #if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BATCH_IN) && defined(HEIGHT_IN) && \ diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl index a26e762e8..eb612f834 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/space_to_depth.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "helpers.h" #if defined(DATA_TYPE) && defined(DEPTH_IN) && defined(BLOCK_SIZE) && defined(Z_IN) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl index 50472e4f9..3eb1a4ce7 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +14,30 @@ * limitations under the License. */ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "helpers.h" __kernel void topkv2_init(VECTOR_DECLARATION(input), __global float *in_key_buf, diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl index 9594daf19..460de790b 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_quicksort.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +14,30 @@ * limitations under the License. */ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "helpers.h" __global inline float *get_vec_elem(Vector *vec, int idx) diff --git a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl index f6830d229..e9d4696b4 100644 --- a/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl +++ b/compute/ARMComputeEx/src/core/CL/cl_kernels/topkv2_radixsort.cl @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +14,30 @@ * limitations under the License. */ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + // reference: // https://code.google.com/archive/p/ocl-radix-sort/source/default/source // OpenCL kernel sources for the CLRadixSort class diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp index 7f4b5b0df..06eeb5b98 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLArgOperationKernel.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/core/CL/kernels/CLArgOperationKernel.h" #include "arm_compute/core/CL/CLHelpers.h" diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp index c14e73634..bb5556888 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLBinaryLogicalOpKernel.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h" #include "arm_compute/core/CL/CLHelpers.h" diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp index 35f607bd0..01ea655b4 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/core/CL/kernels/CLCastKernel.h" #include "arm_compute/core/CL/CLHelpers.h" @@ -52,8 +76,9 @@ void CLCastKernel::configure(const ICLTensor *input, ICLTensor *output, SubDataT // Create kernel if (is_data_type_quantized_asymmetric(input->info()->data_type())) { - const float scale_in = input->info()->quantization_info().scale; - const int offset_in = input->info()->quantization_info().offset; + UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); + const float scale_in = qinfo.scale; + const int offset_in = qinfo.offset; build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in)); build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in)); @@ -62,8 +87,10 @@ void CLCastKernel::configure(const ICLTensor *input, ICLTensor *output, SubDataT } else if (is_data_type_quantized_asymmetric(output->info()->data_type())) { - const float scale_in = output->info()->quantization_info().scale; - const int offset_in = output->info()->quantization_info().offset; + UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform(); + const float scale_in = qinfo.scale; + const float offset_in = qinfo.offset; + build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in)); build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in)); diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp index 2a3433c2b..389136817 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLDepthToSpaceKernel.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h" #include "arm_compute/core/CL/CLHelpers.h" diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp index 0862b78bf..79f5ce065 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLEmbeddingLookupKernel.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h" #include "arm_compute/core/CL/CLHelpers.h" diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.cpp new file mode 100644 index 000000000..235e8975d --- /dev/null +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.cpp @@ -0,0 +1,372 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernelEx.h" + +#include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/AccessWindowTranspose.h" +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibraryEx.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/OpenCL.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "support/ToolchainSupport.h" + +#include <cstddef> +#include <cstdint> +#include <tuple> + +using namespace arm_compute; +using namespace arm_compute::misc::shape_calculator; + +namespace arm_compute +{ +class Coordinates; +} // namespace arm_compute + +namespace +{ +using ElementsProcessed = Steps; + +Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, + const ITensorInfo *output, const GEMMReshapeInfo &gemm_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input0, input1, output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::S8); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input0->num_dimensions() > 4, + "The number of dimensions for the matrix A must be <= 4"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 3, + "The number of dimensions for the matrix B must be <= 3"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->num_dimensions() > 2 && + gemm_info.reinterpret_input_as_3d(), + "The input1 tensor cannot have more than 2 dimensions if input0 " + "has to be reinterpreted as 3D"); + + const int m = gemm_info.m(); + const int n = gemm_info.n(); + const int k = gemm_info.k(); + + ARM_COMPUTE_UNUSED(m); + ARM_COMPUTE_UNUSED(n); + ARM_COMPUTE_UNUSED(k); + + ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != static_cast<unsigned int>(k)); + ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) != static_cast<unsigned int>(n)); + ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(1) != static_cast<unsigned int>(k)); + if (gemm_info.reinterpret_input_as_3d()) + { + ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) * input0->dimension(2) != + static_cast<unsigned int>(m)); + } + else + { + ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != static_cast<unsigned int>(m)); + } + + if (output->total_size() != 0) + { + const TensorInfo tensor_info_output = + output->clone()->set_tensor_shape(compute_mm_shape(*input0, *input1, false, gemm_info)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32); + } + + return Status{}; +} + +std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, + ITensorInfo *output, + const GEMMReshapeInfo &gemm_info, + ElementsProcessed &num_elements_processed) +{ + unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0]; + unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1]; + bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); + bool reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0); + + Window win{}; + Window win_out{}; + bool window_changed = false; + + // In case both input and output have to be reinterpreted as 3D tensors, + // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false. + if (reinterpret_input_as_3d == reinterpret_output_as_3d) + { + reinterpret_input_as_3d = false; + reinterpret_output_as_3d = false; + } + + // Output tensor auto inizialitation if not yet initialized + auto_init_if_empty(*output, + input0->clone() + ->set_tensor_shape(compute_mm_shape(*input0, *input1, false, gemm_info)) + .set_data_type(DataType::S32)); + + TensorInfo tmp_info(*output); + + if (reinterpret_output_as_3d) + { + // Since the output tensor has to be reinterpreted as 3D and the execute window is based on a 2D + // GEMM, + // the window needs to be constructed on the 2D collapsed version of the tensor + TensorShape tmp_shape(output->tensor_shape()); + tmp_shape.collapse(2U, 1U); + tmp_info.set_tensor_shape(tmp_shape); + } + + // Special case for 1xN, 2xN, 3xN and 4xN input0 tensor. num_elems_processed_per_iteration_x + // Note: if the dot product instruction is available, the 8x2 tile has to be used + num_elems_processed_per_iteration_x = 4; + num_elems_processed_per_iteration_y = std::min(static_cast<int>(output->dimension(1)), 4); + + // Note: bottom paddings are calculated manually as the output can be reinterpreted as 3D tensor + // The only way to set properly the paddings, it is to set those explicitly through the + // AccessWindowStatic + const int m = reinterpret_input_as_3d ? input0->tensor_shape()[1] * input0->tensor_shape()[2] + : input0->tensor_shape()[1]; + const int bottom_pad = + (num_elems_processed_per_iteration_y - (m % num_elems_processed_per_iteration_y)) % + num_elems_processed_per_iteration_y; + + // Configure window + win = calculate_max_window( + tmp_info, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); + win_out = calculate_max_window( + *output, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); + + AccessWindowStatic input0_access(input0, 0, 0, input0->dimension(0), + input0->dimension(1) + bottom_pad); + AccessWindowStatic input1_access( + input1, 0, 0, ceil_to_multiple(input1->dimension(0), num_elems_processed_per_iteration_x), + input1->dimension(1)); + AccessWindowStatic output_access( + output, 0, 0, ceil_to_multiple(output->dimension(0), num_elems_processed_per_iteration_x), + output->dimension(1) + bottom_pad); + + window_changed = + update_window_and_padding(win, input0_access, + input1_access) || // window used by the execute_window_loop + update_window_and_padding( + win_out, + output_access); // window used to update the padding requirements of output tensor + + Coordinates coord; + coord.set_num_dimensions(output->num_dimensions()); + output_access.set_valid_region(win_out, ValidRegion(coord, output->tensor_shape())); + + // Collapse along the Z direction + // This collapse needs to be here in order to tune the Z dimension of LWS + Window collapsed = win; + const unsigned int dimension_to_collapse = + std::min(static_cast<unsigned int>(output->num_dimensions()), 2u); + collapsed = win.collapse(win, dimension_to_collapse); + + Status err = (window_changed) + ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") + : Status{}; + return std::make_pair(err, collapsed); +} +} // namespace + +CLGEMMLowpMatrixMultiplyKernelEx::CLGEMMLowpMatrixMultiplyKernelEx() + : _input0(nullptr), _input1(nullptr), _output(nullptr), _slide_matrix_b(true), + _reinterpret_input_as_3d(false), _reinterpret_output_as_3d(false) +{ +} + +void CLGEMMLowpMatrixMultiplyKernelEx::configure(const ICLTensor *input0, const ICLTensor *input1, + ICLTensor *output, + const GEMMReshapeInfo &gemm_info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); + + ARM_COMPUTE_ERROR_THROW_ON( + validate_arguments(input0->info(), input1->info(), output->info(), gemm_info)); + + _input0 = input0; + _input1 = input1; + _output = output; + _reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); + _reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d() != 0); + + // In case both input and output have to be reinterpreted as 3D tensors, + // force reinterpret_input_as_3d and reinterpret_output_as_3d to be false. + if (_reinterpret_input_as_3d == _reinterpret_output_as_3d) + { + _reinterpret_input_as_3d = false; + _reinterpret_output_as_3d = false; + } + + // Check if we need to slide the matrix B + const unsigned int num_dimensions_input0 = _reinterpret_input_as_3d + ? _input0->info()->num_dimensions() - 1 + : _input0->info()->num_dimensions(); + _slide_matrix_b = (_input1->info()->num_dimensions() >= num_dimensions_input0); + + ElementsProcessed num_elements_processed{}; + + // Configure kernel window + auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info(), + gemm_info, num_elements_processed); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + ICLKernel::configure_internal(win_config.second); + + // Create build options + std::string kernel_name(" "); + CLBuildOptions build_opts; + build_opts.add_option_if(_reinterpret_input_as_3d, "-DREINTERPRET_INPUT_AS_3D"); + build_opts.add_option_if(_reinterpret_output_as_3d, "-DREINTERPRET_OUTPUT_AS_3D"); + build_opts.add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d, + "-DHEIGHT_GEMM3D=" + + support::cpp11::to_string(output->info()->dimension(1))); + build_opts.add_option_if(_reinterpret_input_as_3d || _reinterpret_output_as_3d, + "-DDEPTH_GEMM3D=" + + support::cpp11::to_string(output->info()->dimension(2))); + build_opts.add_option_if(!_slide_matrix_b, + "-DMATRIX_B_DEPTH=" + + support::cpp11::to_string(input1->info()->dimension(2))); + build_opts.add_option("-DCOLS_A=" + support::cpp11::to_string(input0->info()->dimension(0))); + build_opts.add_option("-DNUM_ELEMS_PROCESSED_PER_THREAD_X=" + + support::cpp11::to_string(num_elements_processed.x())); + build_opts.add_option("-DNUM_ELEMS_PROCESSED_PER_THREAD_Y=" + + support::cpp11::to_string(num_elements_processed.y())); + + kernel_name = "gemmlowp_mm_midgard_ex"; + + // Create kernel + _kernel = static_cast<cl::Kernel>( + CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts.options())); + + // Set config_id for enabling LWS tuning + _config_id = kernel_name; + _config_id += "_"; + _config_id += (_reinterpret_input_as_3d ? "3di_" : ""); + _config_id += (_reinterpret_output_as_3d ? "3do_" : ""); + _config_id += lower_string(string_from_data_type(input0->info()->data_type())); + _config_id += "_"; + _config_id += support::cpp11::to_string(output->info()->dimension(1)); + _config_id += "_"; + _config_id += support::cpp11::to_string(output->info()->dimension(0)); +} + +Status CLGEMMLowpMatrixMultiplyKernelEx::validate(const ITensorInfo *input0, + const ITensorInfo *input1, + const ITensorInfo *output, + const GEMMReshapeInfo &gemm_info) +{ + ElementsProcessed num_elements_processed{}; + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output, gemm_info)); + ARM_COMPUTE_RETURN_ON_ERROR( + validate_and_configure_window(input0->clone().get(), input1->clone().get(), + output->clone().get(), gemm_info, num_elements_processed) + .first); + + return Status{}; +} + +void CLGEMMLowpMatrixMultiplyKernelEx::run(const Window &window, cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + + if (_input1->info()->num_dimensions() < 3) + { + // The stride_z for matrix B must be zero if we do not slice + ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0); + } + + Window slice = window.first_slice_window_3D(); + Window slice_matrix_b = slice; + + slice_matrix_b.set(Window::DimX, Window::Dimension(0, 1, 1)); + slice_matrix_b.set(Window::DimY, Window::Dimension(0, 1, 1)); + + if (_reinterpret_input_as_3d) + { + // Pass bottom paddings to the kernel if the input has to be reinterpreted as 3D tensor + const unsigned int idx0 = 3 * num_arguments_per_2D_tensor() + 3; + const unsigned int total_cross_plane_pad = + _input0->info()->padding().top + _input0->info()->padding().bottom; + _kernel.setArg<cl_uint>(idx0, static_cast<unsigned int>(total_cross_plane_pad)); + } + + if (_reinterpret_output_as_3d) + { + // Pass bottom paddings to the kernel if the output has to be reinterpreted as 3D tensor + const unsigned int idx0 = + 3 * num_arguments_per_2D_tensor() + 3 + (_reinterpret_input_as_3d ? 1 : 0); + const unsigned int total_cross_plane_pad = + _output->info()->padding().top + _output->info()->padding().bottom; + _kernel.setArg<cl_uint>(idx0, static_cast<unsigned int>(total_cross_plane_pad)); + } + + do + { + Window slice_b = slice; + // Don't slice matrix B along the z dimension if matrix B has just 2 dimensions and matrix A + // more than 2 + // This scenario can happen when the matrix multiplication is used to perform a convolution + // operation + if (!_slide_matrix_b) + { + slice_b = slice_matrix_b; + } + + unsigned int idx = 0; + add_2D_tensor_argument(idx, _input0, slice); + add_2D_tensor_argument(idx, _input1, slice_b); + add_2D_tensor_argument(idx, _output, slice); + _kernel.setArg<cl_uint>(idx++, + static_cast<unsigned int>(_input0->info()->strides_in_bytes()[2])); + _kernel.setArg<cl_uint>(idx++, + static_cast<unsigned int>(_input1->info()->strides_in_bytes()[2])); + _kernel.setArg<cl_uint>(idx++, + static_cast<unsigned int>(_output->info()->strides_in_bytes()[2])); + enqueue(queue, *this, slice, lws_hint()); + } while (window.slide_window_slice_3D(slice)); +} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp index 718f615f9..3a25987d0 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLGatherExKernel.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/core/CL/kernels/CLGatherExKernel.h" #include "arm_compute/core/CL/CLHelpers.h" diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp index 31e98c9a8..7fbdcdaa7 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLHashtableLookupKernel.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/core/CL/kernels/CLHashtableLookupKernel.h" #include "arm_compute/core/CL/CLHelpers.h" diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp index 5db414f62..b45f6bb24 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h" #include "arm_compute/core/CL/CLHelpers.h" diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp new file mode 100644 index 000000000..d305896ea --- /dev/null +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLMultiplyScaleFactorKernel.cpp @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/CL/kernels/CLMultiplyScaleFactorKernel.h" + +#include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibraryEx.h" +#include "arm_compute/core/CL/CLValidate.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +using namespace arm_compute; + +namespace +{ +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *scale_factor, + const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scale_factor, 1, DataType::F16, + DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->tensor_shape().total_size() == 0); + ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->num_dimensions() > 1); + ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->dimension(0) != input->dimension(1)); + ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); + + // Checks performed when output is configured + if ((output->total_size() != 0)) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); + } + + return Status{}; +} + +std::tuple<Status, Window> validate_and_configure_window(const ITensorInfo *input, + ITensorInfo *output) +{ + // Configure kernel window + Window win = calculate_max_window(*input, Steps()); + + // Output tensor auto initialization if not yet initialized + auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::F32); + + // CLMultiplyScaleFactorKernel doesn't need padding so update_window_and_padding() can be + // skipped + Coordinates coord; + coord.set_num_dimensions(output->num_dimensions()); + output->set_valid_region(ValidRegion(coord, output->tensor_shape())); + + return std::make_tuple(Status{}, win); +} +} // namespace + +CLMultiplyScaleFactorKernel::CLMultiplyScaleFactorKernel() + : _input(nullptr), _scale_factor(nullptr), _output(nullptr), _multiplier(1.f) +{ +} + +void CLMultiplyScaleFactorKernel::configure(const ICLTensor *input, const ICLTensor *scale_factor, + ICLTensor *output, float multiplier) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_ERROR_THROW_ON( + validate_arguments(input->info(), scale_factor->info(), output->info())); + + _input = input; + _scale_factor = scale_factor; + _output = output; + _multiplier = multiplier; + + const int vec_size_x = 16 / output->info()->element_size(); + const int output_width_x = output->info()->tensor_shape().x(); + const bool multi_access_x = (output_width_x / vec_size_x > 0); + + // Create and update the window (if needed) + Window win = calculate_max_window(*output->info()); + if (multi_access_x) + { + win.set(Window::DimX, + Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), + vec_size_x)); + } + ICLKernel::configure_internal(win); + + // Create kernel + CLBuildOptions build_opts; + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x)); + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type())); + build_opts.add_option_if( + multi_access_x, "-DLAST_ACCESSED_X=" + + support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0))); + + _kernel = static_cast<cl::Kernel>( + CLKernelLibraryEx::get().create_kernel("multiply_scale_factor", build_opts.options())); +} + +Status CLMultiplyScaleFactorKernel::validate(const ITensorInfo *input, + const ITensorInfo *scale_factor, + const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, scale_factor, output)); + ARM_COMPUTE_RETURN_ON_ERROR( + std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get()))); + return Status{}; +} + +void CLMultiplyScaleFactorKernel::run(const Window &window, cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + + Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); + Window slice = window_collapsed.first_slice_window_2D(); + + // Set scale_factor window + Window win_scale = calculate_max_window(*_scale_factor->info(), Steps()); + + do + { + unsigned int idx = 0; + add_2D_tensor_argument(idx, _input, slice); + add_1D_tensor_argument(idx, _scale_factor, win_scale); + add_2D_tensor_argument(idx, _output, slice); + _kernel.setArg<float>(idx++, _multiplier); + enqueue(queue, *this, slice, lws_hint()); + } while (window_collapsed.slide_window_slice_2D(slice)); +} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp index ecfe05a51..74f7b4158 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLNegKernel.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/core/CL/kernels/CLNegKernel.h" #include "arm_compute/core/CL/CLHelpers.h" diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp index e7d587029..8910a7b80 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/core/CL/kernels/CLPReLUKernel.h" #include "arm_compute/core/CL/CLHelpers.h" @@ -72,18 +96,18 @@ void CLPReLUKernel::configure(const ICLTensor *input, const ICLTensor *alpha, IC if (is_data_type_quantized_asymmetric(input->info()->data_type())) { - build_opts.emplace("-DOFF_IN=" + - support::cpp11::to_string(input->info()->quantization_info().offset)); - build_opts.emplace("-DOFF_ALPHA=" + - support::cpp11::to_string(alpha->info()->quantization_info().offset)); - build_opts.emplace("-DOFF_OUT=" + - support::cpp11::to_string(output->info()->quantization_info().offset)); - build_opts.emplace("-DSCALE_IN=" + - support::cpp11::to_string(input->info()->quantization_info().scale)); - build_opts.emplace("-DSCALE_ALPHA=" + - support::cpp11::to_string(alpha->info()->quantization_info().scale)); - build_opts.emplace("-DSCALE_OUT=" + - support::cpp11::to_string(output->info()->quantization_info().scale)); + build_opts.emplace("-DOFF_IN=" + support::cpp11::to_string( + input->info()->quantization_info().uniform().offset)); + build_opts.emplace("-DOFF_ALPHA=" + support::cpp11::to_string( + alpha->info()->quantization_info().uniform().offset)); + build_opts.emplace("-DOFF_OUT=" + support::cpp11::to_string( + output->info()->quantization_info().uniform().offset)); + build_opts.emplace("-DSCALE_IN=" + support::cpp11::to_string( + input->info()->quantization_info().uniform().scale)); + build_opts.emplace("-DSCALE_ALPHA=" + support::cpp11::to_string( + alpha->info()->quantization_info().uniform().scale)); + build_opts.emplace("-DSCALE_OUT=" + support::cpp11::to_string( + output->info()->quantization_info().uniform().scale)); kernel_name += "_qasymm8"; } _kernel = diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp new file mode 100644 index 000000000..2d551f654 --- /dev/null +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLQuantizationSymmetricKernel.cpp @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/CL/kernels/CLQuantizationSymmetricKernel.h" + +#include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibraryEx.h" +#include "arm_compute/core/CL/CLValidate.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +namespace arm_compute +{ +namespace +{ +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *scale_factor, + const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16); + ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2); + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); + + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, scale_factor); + ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->tensor_shape().total_size() == 0); + ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->num_dimensions() > 1); + ARM_COMPUTE_RETURN_ERROR_ON(scale_factor->dimension(0) != input->dimension(1)); + + // Output must always be initialized + ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S8); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); + + return Status{}; +} + +std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) +{ + // Configure kernel window + Window win = calculate_max_window(*input, Steps()); + + const int vec_size_x = 16 / input->element_size(); + const int input_width_x = input->tensor_shape().x(); + const bool multi_access_x = (input_width_x / vec_size_x > 0); + + if (multi_access_x) + { + win.set(Window::DimX, + Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), + vec_size_x)); + } + + Coordinates coord; + coord.set_num_dimensions(output->num_dimensions()); + output->set_valid_region(ValidRegion(coord, output->tensor_shape())); + + return std::make_pair(Status{}, win); +} +} // namespace + +CLQuantizationSymmetricKernel::CLQuantizationSymmetricKernel() + : _input(nullptr), _scale_factor(nullptr), _output(nullptr) +{ +} + +void CLQuantizationSymmetricKernel::configure(const ICLTensor *input, const ICLTensor *scale_factor, + ICLTensor *output) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, scale_factor, output); + ARM_COMPUTE_ERROR_THROW_ON( + validate_arguments(input->info(), scale_factor->info(), output->info())); + + _input = input; + _scale_factor = scale_factor; + _output = output; + + const int vec_size_x = 16 / input->info()->element_size(); + const int input_width_x = input->info()->tensor_shape().x(); + const bool multi_access_x = (input_width_x / vec_size_x > 0); + + // Configure kernel window + auto win_config = validate_and_configure_window(input->info(), output->info()); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + ICLKernel::configure_internal(win_config.second); + + // Create kernel + CLBuildOptions build_opts; + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x)); + build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type())); + build_opts.add_option("-DDATA_TYPE_OUT=" + + get_cl_type_from_data_type(output->info()->data_type())); + build_opts.add_option_if( + multi_access_x, "-DLAST_ACCESSED_X=" + + support::cpp11::to_string(std::max<int>(input_width_x - vec_size_x, 0))); + + _kernel = static_cast<cl::Kernel>( + CLKernelLibraryEx::get().create_kernel("quantization_symm8", build_opts.options())); +} + +Status CLQuantizationSymmetricKernel::validate(const ITensorInfo *input, + const ITensorInfo *scale_factor, + const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, scale_factor, output)); + ARM_COMPUTE_RETURN_ON_ERROR( + validate_and_configure_window(input->clone().get(), output->clone().get()).first); + + return Status{}; +} + +void CLQuantizationSymmetricKernel::run(const Window &window, cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + + // Support only 2D + Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); + Window slice = window_collapsed.first_slice_window_2D(); + + do + { + Window scale_slice = slice.shift_dimensions(1); + + unsigned int idx = 0; + add_2D_tensor_argument(idx, _input, slice); + add_1D_tensor_argument(idx, _scale_factor, scale_slice); + add_2D_tensor_argument(idx, _output, slice); + enqueue(queue, *this, slice, lws_hint()); + } while (window_collapsed.slide_window_slice_2D(slice)); +} +} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp index 24e89db28..a98318323 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLReduceOperationKernel.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2017-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h" #include "arm_compute/core/CL/CLHelpers.h" diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp new file mode 100644 index 000000000..ff1904abd --- /dev/null +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLScaleFactorSymm8Kernel.cpp @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/CL/kernels/CLScaleFactorSymm8Kernel.h" + +#include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibraryEx.h" +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" + +#include <climits> + +using namespace arm_compute; +using namespace arm_compute::misc::shape_calculator; + +namespace +{ +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2); + + if (output->tensor_shape().total_size() > 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + + TensorShape output_shape = TensorShape{input->dimension(1)}; + + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape); + } + + return Status{}; +} + +std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) +{ + TensorShape output_shape = TensorShape{input->dimension(1)}; + + // Output auto initialization if not yet initialized + auto_init_if_empty(*output, output_shape, 1, input->data_type()); + + const unsigned int num_elems_processed_per_iteration = 1; + + // Configure kernel window + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); + AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); + AccessWindowStatic output_access(output, 0, 0, output->dimension(0), 1); + + bool window_changed = update_window_and_padding(win, input_access, output_access); + + output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); + + Status err = (window_changed) + ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") + : Status{}; + return std::make_tuple(err, win); +} +} // namespace + +CLScaleFactorSymm8Kernel::CLScaleFactorSymm8Kernel() : _input(nullptr), _output(nullptr) {} + +void CLScaleFactorSymm8Kernel::configure(const ICLTensor *input, ICLTensor *output) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); + + _input = input; + _output = output; + + std::set<std::string> build_opts; + build_opts.emplace("-DWIDTH=" + support::cpp11::to_string(input->info()->dimension(0))); + + // Create kernel + _kernel = static_cast<cl::Kernel>( + CLKernelLibraryEx::get().create_kernel("scale_factor_symm8", build_opts)); + + auto win_config = validate_and_configure_window(input->info(), output->info()); + + ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); + + ICLKernel::configure_internal(std::get<1>(win_config)); +} + +Status CLScaleFactorSymm8Kernel::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); + ARM_COMPUTE_RETURN_ON_ERROR( + std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get()))); + + return Status{}; +} + +void CLScaleFactorSymm8Kernel::run(const Window &window, cl::CommandQueue &queue) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); + + Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); + Window slice = window_collapsed.first_slice_window_2D(); + slice.set(Window::DimX, Window::Dimension(0, 1, 1)); + + do + { + Window output_slice = slice.shift_dimensions(1); + + unsigned int idx = 0; + // Set inputs + add_2D_tensor_argument(idx, _input, slice); + add_1D_tensor_argument(idx, _output, output_slice); + enqueue(queue, *this, slice, lws_hint()); + } while (window_collapsed.slide_window_slice_2D(slice)); +} diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp index f7836b6cd..8b9b57fd8 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h" #include "arm_compute/core/CL/CLHelpers.h" @@ -147,8 +171,8 @@ void CLSpaceToBatchNDKernel::configure(const ICLTensor *input, const ICLTensor * build_opts.emplace("-DBATCH_IN=" + support::cpp11::to_string(input->info()->dimension(3))); if (input->info()->data_type() == DataType::QASYMM8) { - build_opts.emplace("-DZERO_VALUE=" + - support::cpp11::to_string(input->info()->quantization_info().offset)); + build_opts.emplace("-DZERO_VALUE=" + support::cpp11::to_string( + input->info()->quantization_info().uniform().offset)); } else { diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp index b085192a2..64fc0384e 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToDepthKernel.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h" #include "arm_compute/core/CL/CLHelpers.h" diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp index 4f2b388c9..151d45e8d 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLTopKV2Kernel.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/core/CL/kernels/CLTopKV2Kernel.h" #include "arm_compute/core/CL/CLHelpers.h" diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp index 6cc8d9d13..61999cbd4 100644 --- a/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp +++ b/compute/ARMComputeEx/src/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017-2019 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h" #include "arm_compute/core/CL/CLHelpers.h" diff --git a/compute/ARMComputeEx/src/core/CPP/kernels/CPPOneHotKernelEx.cpp b/compute/ARMComputeEx/src/core/CPP/kernels/CPPOneHotKernelEx.cpp new file mode 100644 index 000000000..d6c49b2b4 --- /dev/null +++ b/compute/ARMComputeEx/src/core/CPP/kernels/CPPOneHotKernelEx.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h" +#include "arm_compute/core/Coordinates.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/Traits.h" + +namespace arm_compute +{ +CPPOneHotKernelEx::CPPOneHotKernelEx() + : _indices(nullptr), _output(nullptr), _depth(0), _on_value(0), _off_value(0), _axis(-1) +{ +} + +void CPPOneHotKernelEx::configure(const ITensor *indices, ITensor *output, const int depth, + const float on_value, const float off_value, const int axis) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(indices, output); + ARM_COMPUTE_ERROR_THROW_ON(validate(indices, depth, on_value, off_value, axis)); + + _indices = indices; + _output = output; + _depth = depth; + _on_value = on_value; + _off_value = off_value; + _axis = axis; + + ICPPKernel::configure(Window()); // Default 1 iteration window +} + +Status CPPOneHotKernelEx::validate(const ITensor *indices, const int depth, const float on_value, + const float off_value, const int axis) +{ + ARM_COMPUTE_UNUSED(on_value, off_value); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(indices, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(indices->info()->num_dimensions() != 1, + "Only 1D indices are supported."); + ARM_COMPUTE_RETURN_ERROR_ON(depth <= 0); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis != -1, "Only axis = -1 is supported."); + return Status{}; +} + +bool CPPOneHotKernelEx::is_parallelisable() const { return false; } + +void CPPOneHotKernelEx::run(const Window &window, const ThreadInfo &info) +{ + ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IKernel::window(), window); + + const auto num_indices = _indices->info()->dimension(0); + for (size_t i = 0; i < num_indices; ++i) + { + const auto index = *reinterpret_cast<int32_t *>(_indices->ptr_to_element(Coordinates{i})); + for (int d = 0; d < _depth; ++d) + *reinterpret_cast<float *>(_output->ptr_to_element(Coordinates(d, i))) = + (d == index) ? _on_value : _off_value; + } +} +} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp b/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp index 8ac667ceb..648afb304 100644 --- a/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp +++ b/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h" #include "arm_compute/core/Error.h" @@ -81,7 +97,7 @@ void CPPUpsampleKernelEx::run(const Window &window, const ThreadInfo &info) // The fill value is normally 0, but for QASYMM8 the '0' corresponds to the offset const uint8_t fill_value = _output->info()->data_type() == DataType::QASYMM8 - ? utility::clamp<uint8_t>(_output->info()->quantization_info().offset) + ? utility::clamp<uint8_t>(_output->info()->quantization_info().uniform().offset) : 0; // Filling a value different than 0 works only for QASYMM8 datatype since we are filling 1byte // values in a buffer of uint8_ts diff --git a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp b/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp index 4508f5800..254c33ea9 100644 --- a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp +++ b/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT @@ -221,8 +236,9 @@ void elementwise_op_quantized( const auto window_end_x = static_cast<int>(window.x().end()); const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); - const float output_scale = out->info()->quantization_info().scale; - const int output_offset = out->info()->quantization_info().offset; + UniformQuantizationInfo qinfo = out->info()->quantization_info().uniform(); + const float output_scale = qinfo.scale; + const int output_offset = qinfo.offset; // Output quantization info (add 0.5 to round toward the nearest integer - 0.5 rounds away from // zero) @@ -238,8 +254,10 @@ void elementwise_op_quantized( const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1; const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1; - const QuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info(); - const QuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info(); + const UniformQuantizationInfo broadcast_qinfo = + broadcast_tensor->info()->quantization_info().uniform(); + const UniformQuantizationInfo non_broadcast_qinfo = + non_broadcast_tensor->info()->quantization_info().uniform(); const int32x4_t voffset_non_broadcast = vdupq_n_s32(non_broadcast_qinfo.offset); const float32x4_t vscale_non_broadcast = vdupq_n_f32(non_broadcast_qinfo.scale); @@ -269,10 +287,8 @@ void elementwise_op_quantized( for (; x < window_end_x; ++x) { const float afs = - scvt_f32_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo.scale, - non_broadcast_qinfo.offset); - const float bfs = - scvt_f32_qasymm8(broadcast_value, broadcast_qinfo.scale, broadcast_qinfo.offset); + dequantize_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo); + const float bfs = dequantize_qasymm8(broadcast_value, broadcast_qinfo); *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs, out->info()->quantization_info()); @@ -283,12 +299,14 @@ void elementwise_op_quantized( else { // Input1 quantization info - const int32x4_t voffset1 = vdupq_n_s32(in1->info()->quantization_info().offset); - const float32x4_t vscale1 = vdupq_n_f32(in1->info()->quantization_info().scale); + UniformQuantizationInfo qinfo = in1->info()->quantization_info().uniform(); + const int32x4_t voffset1 = vdupq_n_s32(qinfo.offset); + const float32x4_t vscale1 = vdupq_n_f32(qinfo.scale); // Input2 quantization info - const int32x4_t voffset2 = vdupq_n_s32(in2->info()->quantization_info().offset); - const float32x4_t vscale2 = vdupq_n_f32(in2->info()->quantization_info().scale); + qinfo = in2->info()->quantization_info().uniform(); + const int32x4_t voffset2 = vdupq_n_s32(qinfo.offset); + const float32x4_t vscale2 = vdupq_n_f32(qinfo.scale); // Clear X Dimension on execution window as we handle manually input1_win.set(Window::DimX, Window::Dimension(0, 1, 1)); @@ -301,26 +319,24 @@ void elementwise_op_quantized( Iterator input2(in2, input2_win); Iterator output(out, win); - execute_window_loop( - win, - [&](const Coordinates &) { - const auto input1_ptr = reinterpret_cast<const uint8_t *>(input1.ptr()); - const auto input2_ptr = reinterpret_cast<const uint8_t *>(input2.ptr()); - const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr()); + execute_window_loop(win, + [&](const Coordinates &) { + const auto input1_ptr = reinterpret_cast<const uint8_t *>(input1.ptr()); + const auto input2_ptr = reinterpret_cast<const uint8_t *>(input2.ptr()); + const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr()); - int x = - (*neon_func)(window_start_x, window_end_x, window_step_x, input1_ptr, input2_ptr, - output_ptr, voffset1, voffset2, vscale1, vscale2, voffseto, invvscaleo); - for (; x < window_end_x; ++x) - { - const float afs = - scvt_f32_qasymm8(*(input1_ptr + x), input1_qinfo.scale, input1_qinfo.offset); - const float bfs = - scvt_f32_qasymm8(*(input2_ptr + x), input2_qinfo.scale, input2_qinfo.offset); - *(output_ptr + x) = (*scalar_func)(afs, bfs, out->info()->quantization_info()); - } - }, - input1, input2, output); + int x = (*neon_func)(window_start_x, window_end_x, window_step_x, + input1_ptr, input2_ptr, output_ptr, voffset1, + voffset2, vscale1, vscale2, voffseto, invvscaleo); + for (; x < window_end_x; ++x) + { + const float afs = dequantize_qasymm8(*(input1_ptr + x), input1_qinfo); + const float bfs = dequantize_qasymm8(*(input2_ptr + x), input2_qinfo); + *(output_ptr + x) = + (*scalar_func)(afs, bfs, out->info()->quantization_info()); + } + }, + input1, input2, output); } } diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEActivationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEActivationLayerKernelEx.cpp new file mode 100644 index 000000000..648705ba9 --- /dev/null +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEActivationLayerKernelEx.cpp @@ -0,0 +1,730 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h" + +#include "arm_compute/core/CPP/Validate.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/NEAsymm.h" +#include "arm_compute/core/NEON/NEFixedPoint.h" +#include "arm_compute/core/NEON/NEMath.h" +#include "arm_compute/core/NEON/NESymm.h" +#include "arm_compute/core/NEON/wrapper/wrapper.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include <arm_neon.h> +#include <array> +#include <cmath> +#include <map> +#include <set> + +using namespace arm_compute; +namespace +{ +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, + const ActivationLayerInfo &activation_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( + input, 1, DataType::U8, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32); + + static std::set<ActivationLayerInfo::ActivationFunction> qasymm8_supported_activations = { + ActivationLayerInfo::ActivationFunction::RELU, + ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, + ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, + ActivationLayerInfo::ActivationFunction::LOGISTIC, + ActivationLayerInfo::ActivationFunction::TANH}; + static std::set<ActivationLayerInfo::ActivationFunction> qsymm16_supported_activations = { + ActivationLayerInfo::ActivationFunction::LOGISTIC, + ActivationLayerInfo::ActivationFunction::TANH}; + const DataType data_type = input->data_type(); + const QuantizationInfo &oq_info = + (output != nullptr) ? output->quantization_info() : input->quantization_info(); + const ActivationLayerInfo::ActivationFunction f_act = activation_info.activation(); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG( + is_data_type_quantized_asymmetric(data_type) && + (qasymm8_supported_activations.count(f_act) == 0), + "For QASYMM8 only tanh, logistic, relu and lower/upper bounded relu are supported"); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_symmetric(data_type) && + (qsymm16_supported_activations.count(f_act) == 0), + "For QSYMM16 only tanh and logistic are supported"); + ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && + (f_act == ActivationLayerInfo::ActivationFunction::TANH) && + (oq_info != QuantizationInfo(1.f / 128.f, 128))); + ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(data_type) && + (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && + (oq_info != QuantizationInfo(1.f / 256.f, 0))); + + ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && + (f_act == ActivationLayerInfo::ActivationFunction::TANH) && + (oq_info != QuantizationInfo(1.f / 32768.f, 0))); + ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && + (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && + (oq_info != QuantizationInfo(1.f / 32768.f, 0))); + + // Checks performed when output is configured + if ((output != nullptr) && (output->total_size() != 0)) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + } + + return Status{}; +} + +std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) +{ + // Configure kernel window + Window win = calculate_max_window(*input, Steps()); + + if (output != nullptr) + { + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output, *input->clone()); + + // NEActivationLayerKernelEx doesn't need padding so update_window_and_padding() can be skipped + Coordinates coord; + coord.set_num_dimensions(output->num_dimensions()); + output->set_valid_region(ValidRegion(coord, output->tensor_shape())); + } + + return std::make_pair(Status{}, win); +} + +inline uint32x4_t vreinterpret_unsigend_int(const float32x4_t &vec) +{ + return vreinterpretq_u32_f32(vec); +} + +inline float32x4_t vreinterpret_floating_point(const uint32x4_t &vec) +{ + return vreinterpretq_f32_u32(vec); +} + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +inline uint16x8_t vreinterpret_unsigend_int(const float16x8_t &vec) +{ + return vreinterpretq_u16_f16(vec); +} +inline float16x8_t vreinterpret_floating_point(const uint16x8_t &vec) +{ + return vreinterpretq_f16_u16(vec); +} +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/ +} // namespace + +NEActivationLayerKernelEx::NEActivationLayerKernelEx() + : _input(nullptr), _output(nullptr), _func(nullptr), _act_info() +{ +} + +void NEActivationLayerKernelEx::configure(ITensor *input, ITensor *output, + ActivationLayerInfo activation_info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input); + + _input = input; + _act_info = activation_info; + _output = input; + + // Out-of-place calculation + if (output != nullptr) + { + _output = output; + } + + // Disabled activation, thus no operation needed + if (!activation_info.enabled()) + { + _func = nullptr; + } + + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments( + input->info(), (output != nullptr) ? output->info() : nullptr, activation_info)); + + // Activation functions : FP32 + static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f32 = { + {ActivationFunction::ABS, + &NEActivationLayerKernelEx::activation<ActivationFunction::ABS, float>}, + {ActivationFunction::LINEAR, + &NEActivationLayerKernelEx::activation<ActivationFunction::LINEAR, float>}, + {ActivationFunction::LOGISTIC, + &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, float>}, + {ActivationFunction::RELU, + &NEActivationLayerKernelEx::activation<ActivationFunction::RELU, float>}, + {ActivationFunction::BOUNDED_RELU, + &NEActivationLayerKernelEx::activation<ActivationFunction::BOUNDED_RELU, float>}, + {ActivationFunction::LU_BOUNDED_RELU, + &NEActivationLayerKernelEx::activation<ActivationFunction::LU_BOUNDED_RELU, float>}, + {ActivationFunction::LEAKY_RELU, + &NEActivationLayerKernelEx::activation<ActivationFunction::LEAKY_RELU, float>}, + {ActivationFunction::SOFT_RELU, + &NEActivationLayerKernelEx::activation<ActivationFunction::SOFT_RELU, float>}, + {ActivationFunction::ELU, + &NEActivationLayerKernelEx::activation<ActivationFunction::ELU, float>}, + {ActivationFunction::SQRT, + &NEActivationLayerKernelEx::activation<ActivationFunction::SQRT, float>}, + {ActivationFunction::SQUARE, + &NEActivationLayerKernelEx::activation<ActivationFunction::SQUARE, float>}, + {ActivationFunction::TANH, + &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, float>}, + {ActivationFunction::IDENTITY, + &NEActivationLayerKernelEx::activation<ActivationFunction::IDENTITY, float>}, + }; + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + // Activation functions : FP16 + static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_f16 = { + {ActivationFunction::ABS, + &NEActivationLayerKernelEx::activation<ActivationFunction::ABS, float16_t>}, + {ActivationFunction::LINEAR, + &NEActivationLayerKernelEx::activation<ActivationFunction::LINEAR, float16_t>}, + {ActivationFunction::LOGISTIC, + &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, float16_t>}, + {ActivationFunction::RELU, + &NEActivationLayerKernelEx::activation<ActivationFunction::RELU, float16_t>}, + {ActivationFunction::BOUNDED_RELU, + &NEActivationLayerKernelEx::activation<ActivationFunction::BOUNDED_RELU, float16_t>}, + {ActivationFunction::LU_BOUNDED_RELU, + &NEActivationLayerKernelEx::activation<ActivationFunction::LU_BOUNDED_RELU, float16_t>}, + {ActivationFunction::LEAKY_RELU, + &NEActivationLayerKernelEx::activation<ActivationFunction::LEAKY_RELU, float16_t>}, + {ActivationFunction::SOFT_RELU, + &NEActivationLayerKernelEx::activation<ActivationFunction::SOFT_RELU, float16_t>}, + {ActivationFunction::ELU, + &NEActivationLayerKernelEx::activation<ActivationFunction::ELU, float16_t>}, + {ActivationFunction::SQRT, + &NEActivationLayerKernelEx::activation<ActivationFunction::SQRT, float16_t>}, + {ActivationFunction::SQUARE, + &NEActivationLayerKernelEx::activation<ActivationFunction::SQUARE, float16_t>}, + {ActivationFunction::TANH, + &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, float16_t>}, + {ActivationFunction::IDENTITY, + &NEActivationLayerKernelEx::activation<ActivationFunction::IDENTITY, float16_t>}, + }; +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/ + + // Activation functions : QASYMM8 + static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qasymm8 = { + {ActivationFunction::LOGISTIC, + &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, qasymm8_t>}, + {ActivationFunction::BOUNDED_RELU, + &NEActivationLayerKernelEx::activation<ActivationFunction::BOUNDED_RELU, qasymm8_t>}, + {ActivationFunction::LU_BOUNDED_RELU, + &NEActivationLayerKernelEx::activation<ActivationFunction::LU_BOUNDED_RELU, qasymm8_t>}, + {ActivationFunction::RELU, + &NEActivationLayerKernelEx::activation<ActivationFunction::RELU, qasymm8_t>}, + {ActivationFunction::TANH, + &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, qasymm8_t>}, + {ActivationFunction::IDENTITY, + &NEActivationLayerKernelEx::activation<ActivationFunction::IDENTITY, qasymm8_t>}, + }; + + // Activation functions : QSYMM16 + static std::map<ActivationFunction, ActivationFunctionExecutorPtr> act_map_qsymm16 = { + {ActivationFunction::LOGISTIC, + &NEActivationLayerKernelEx::activation<ActivationFunction::LOGISTIC, qsymm16_t>}, + {ActivationFunction::TANH, + &NEActivationLayerKernelEx::activation<ActivationFunction::TANH, qsymm16_t>}, + }; + + switch (input->info()->data_type()) + { + case DataType::QASYMM8: + _func = act_map_qasymm8[activation_info.activation()]; + break; + case DataType::QSYMM16: + _func = act_map_qsymm16[activation_info.activation()]; + break; + case DataType::F32: + _func = act_map_f32[activation_info.activation()]; + break; +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + case DataType::F16: + _func = act_map_f16[activation_info.activation()]; + break; +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + default: + ARM_COMPUTE_ERROR("Unsupported data type."); + } + + // Configure kernel window + auto win_config = + validate_and_configure_window(input->info(), (output != nullptr) ? output->info() : nullptr); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + ICPPKernel::configure(win_config.second); +} + +template <ActivationLayerInfo::ActivationFunction F, typename T> +typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type +NEActivationLayerKernelEx::activation(const Window &window) +{ + /** NEON vector tag type. */ + using ExactTagType = + typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>; + + const int window_step_x = 16 / sizeof(T); + const auto window_start_x = static_cast<int>(window.x().start()); + const auto window_end_x = static_cast<int>(window.x().end()); + const ActivationFunction act = F; + + Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); + win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); + + Iterator input(_input, win_collapsed); + Iterator output(_output, win_collapsed); + + const auto infinity = wrapper::vdup_n(std::numeric_limits<T>::infinity(), ExactTagType{}); + const auto epsilon = wrapper::vdup_n(static_cast<T>(1e-24), ExactTagType{}); + const auto const_1 = wrapper::vdup_n(static_cast<T>(1.f), ExactTagType{}); + const auto const_0 = wrapper::vdup_n(static_cast<T>(0.f), ExactTagType{}); + const auto va = wrapper::vdup_n(static_cast<T>(_act_info.a()), ExactTagType{}); + const auto vb = wrapper::vdup_n(static_cast<T>(_act_info.b()), ExactTagType{}); + const auto a = static_cast<T>(_act_info.a()); + const auto b = static_cast<T>(_act_info.b()); + + execute_window_loop( + win_collapsed, + [&](const Coordinates &) { + const auto input_ptr = reinterpret_cast<const T *>(input.ptr()); + const auto output_ptr = reinterpret_cast<T *>(output.ptr()); + + wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp; + + // Compute S elements per iteration + int x = window_start_x; + + for (; x <= (window_end_x - window_step_x); x += window_step_x) + { + const auto vin = wrapper::vloadq(input_ptr + x); + switch (act) + { + case ActivationFunction::ABS: + tmp = wrapper::vabs(vin); + break; + case ActivationFunction::LINEAR: + tmp = wrapper::vmla(vb, va, vin); + break; + case ActivationFunction::LOGISTIC: + // exp(-vin) + tmp = wrapper::vexpq(wrapper::vneg(vin)); + + // NaN -> INF + tmp = vreinterpret_floating_point(wrapper::vorr( + wrapper::vand(wrapper::vnot(wrapper::vceq(tmp, tmp)), + vreinterpret_unsigend_int(infinity)), + wrapper::vand(wrapper::vceq(tmp, tmp), vreinterpret_unsigend_int(tmp)))); + + // 1 / 1 + tmp + tmp = wrapper::vinv(wrapper::vadd(const_1, tmp)); + break; + case ActivationFunction::RELU: + tmp = wrapper::vmax(const_0, vin); + break; + case ActivationFunction::BOUNDED_RELU: + tmp = wrapper::vmin(va, wrapper::vmax(const_0, vin)); + break; + case ActivationFunction::LU_BOUNDED_RELU: + tmp = wrapper::vmin(va, wrapper::vmax(vb, vin)); + break; + case ActivationFunction::LEAKY_RELU: + tmp = wrapper::vbsl(wrapper::vcgt(vin, const_0), vin, wrapper::vmul(va, vin)); + break; + case ActivationFunction::SOFT_RELU: + tmp = wrapper::vlog(wrapper::vadd(const_1, wrapper::vexpq(vin))); + break; + case ActivationFunction::ELU: + tmp = wrapper::vbsl(wrapper::vcge(vin, const_0), vin, + wrapper::vmul(va, wrapper::vsub(wrapper::vexpq(vin), const_1))); + break; + case ActivationFunction::SQRT: + tmp = wrapper::vinv(wrapper::vinvsqrt(vin + epsilon)); + break; + case ActivationFunction::SQUARE: + tmp = wrapper::vmul(vin, vin); + break; + case ActivationFunction::TANH: + tmp = wrapper::vmul(va, wrapper::vtanh(wrapper::vmul(vb, vin))); + break; + case ActivationFunction::IDENTITY: + tmp = vin; + break; + default: + ARM_COMPUTE_ERROR("Unsupported activation function"); + } + wrapper::vstore(output_ptr + x, tmp); + } + + // Compute left-over elements + for (; x < window_end_x; ++x) + { + const T in = *(reinterpret_cast<const T *>(input_ptr + x)); + T tmp; + switch (act) + { + case ActivationFunction::ABS: + tmp = std::abs(in); + break; + case ActivationFunction::LINEAR: + tmp = a * in + b; + break; + case ActivationFunction::LOGISTIC: + tmp = static_cast<T>(1) / (static_cast<T>(1) + std::exp(-in)); + break; + case ActivationFunction::RELU: + tmp = std::max<T>(static_cast<T>(0), in); + break; + case ActivationFunction::BOUNDED_RELU: + tmp = std::min<T>(a, std::max(static_cast<T>(0), in)); + break; + case ActivationFunction::LU_BOUNDED_RELU: + tmp = std::min<T>(a, std::max<T>(b, in)); + break; + case ActivationFunction::LEAKY_RELU: + tmp = (in > 0) ? in : a * in; + break; + case ActivationFunction::SOFT_RELU: + tmp = std::log(static_cast<T>(1) + std::exp(in)); + break; + case ActivationFunction::ELU: + tmp = (in >= 0) ? in : a * (std::exp(in) - 1); + break; + case ActivationFunction::SQRT: + tmp = std::sqrt(in); + break; + case ActivationFunction::SQUARE: + tmp = in * in; + break; + case ActivationFunction::TANH: + tmp = a * std::tanh(b * in); + break; + case ActivationFunction::IDENTITY: + tmp = in; + break; + default: + ARM_COMPUTE_ERROR("Unsupported activation function"); + } + *(output_ptr + x) = tmp; + } + }, + input, output); +} + +template <ActivationLayerInfo::ActivationFunction F, typename T> +typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type +NEActivationLayerKernelEx::activation(const Window &window) +{ + const int window_step_x = 16 / sizeof(T); + const auto window_start_x = static_cast<int>(window.x().start()); + const auto window_end_x = static_cast<int>(window.x().end()); + const ActivationFunction act = F; + + Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); + win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); + + Iterator input(_input, win_collapsed); + Iterator output(_output, win_collapsed); + + const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); + const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in)); + const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in)); + const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in); + const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in); + const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in); + const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0); + const auto vconst_1 = vdupq_n_f32(1.f); + const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); + const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); + const float a_f32 = _act_info.a(); + const float b_f32 = _act_info.b(); + + // Initialise scale/offset for re-quantization + float s = qi_in.scale / qi_out.scale; + float o = -qi_in.offset * s + qi_out.offset; + float32x4_t vs = vdupq_n_f32(s); + float32x4_t vo = vdupq_n_f32(o); + + execute_window_loop( + win_collapsed, + [&](const Coordinates &) { + const auto input_ptr = reinterpret_cast<const T *>(input.ptr()); + const auto output_ptr = reinterpret_cast<T *>(output.ptr()); + + wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp; + + // Compute S elements per iteration + int x = window_start_x; + for (; x <= (window_end_x - window_step_x); x += window_step_x) + { + const auto vin = wrapper::vloadq(input_ptr + x); + if (act == ActivationFunction::RELU) + { + // Perform activation + tmp = vmaxq_u8(vconst_0, vin); + // Re-quantize to new output space + tmp = vmlaq_qasymm8(tmp, vs, vo); + } + else if (act == ActivationFunction::BOUNDED_RELU) + { + // Perform activation + tmp = vminq_u8(va, vmaxq_u8(vconst_0, vin)); + // Re-quantize to new output space + tmp = vmlaq_qasymm8(tmp, vs, vo); + } + else if (act == ActivationFunction::LU_BOUNDED_RELU) + { + // Perform activation + tmp = vminq_u8(va, vmaxq_u8(vb, vin)); + // Re-quantize to new output space + tmp = vmlaq_qasymm8(tmp, vs, vo); + } + else if (act == ActivationFunction::LOGISTIC) + { + // De-quantize + const auto vin_deq = vdequantize(vin, qi_in); + // Perform activation + const float32x4x4_t tmp_dep = {{ + wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg( + vin_deq.val[0])))), + wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg( + vin_deq.val[1])))), + wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg( + vin_deq.val[2])))), + wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg( + vin_deq.val[3])))), + }}; + // Re-quantize to new output space + tmp = vquantize(tmp_dep, qi_out); + } + else if (act == ActivationFunction::TANH) + { + // De-quantize + const auto vin_deq = vdequantize(vin, qi_in); + // Perform activation + const float32x4x4_t tmp_dep = {{ + wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))), + wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))), + wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[2], vb_f32))), + wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[3], vb_f32))), + }}; + // Re-quantize to new output space + tmp = vquantize(tmp_dep, qi_out); + } + else + { + ARM_COMPUTE_ERROR("Unsupported activation function"); + } + wrapper::vstore(output_ptr + x, tmp); + } + + // Compute left-over elements + for (; x < window_end_x; ++x) + { + T in = *(reinterpret_cast<const T *>(input_ptr + x)); + T tmp; + if (act == ActivationFunction::RELU) + { + tmp = std::max(const_0, in); + tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255)); + } + else if (act == ActivationFunction::BOUNDED_RELU) + { + tmp = std::min(a, std::max(const_0, in)); + tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255)); + } + else if (act == ActivationFunction::LU_BOUNDED_RELU) + { + tmp = std::min(a, std::max(b, in)); + tmp = std::max<int32_t>(0, std::min<int32_t>(tmp * s + o, 255)); + } + else if (act == ActivationFunction::LOGISTIC) + { + float tmp_f = dequantize_qasymm8(in, qi_in); + tmp_f = 1.f / (1.f + std::exp(-tmp_f)); + tmp = quantize_qasymm8(tmp_f, qi_out); + } + else if (act == ActivationFunction::TANH) + { + float tmp_f = dequantize_qasymm8(in, qi_in); + tmp_f = a_f32 * std::tanh(b_f32 * tmp_f); + tmp = quantize_qasymm8(tmp_f, qi_out); + } + else + { + ARM_COMPUTE_ERROR("Unsupported activation function"); + } + *(output_ptr + x) = tmp; + } + }, + input, output); +} + +template <ActivationLayerInfo::ActivationFunction F, typename T> +typename std::enable_if<std::is_same<T, qsymm16_t>::value, void>::type +NEActivationLayerKernelEx::activation(const Window &window) +{ + const int window_step_x = 16 / sizeof(T); + const auto window_start_x = static_cast<int>(window.x().start()); + const auto window_end_x = static_cast<int>(window.x().end()); + const ActivationFunction act = F; + + Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); + win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); + + Iterator input(_input, win_collapsed); + Iterator output(_output, win_collapsed); + + const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); + const auto vconst_1 = vdupq_n_f32(1.f); + const float32x4_t va_f32 = vdupq_n_f32(_act_info.a()); + const float32x4_t vb_f32 = vdupq_n_f32(_act_info.b()); + const float a_f32 = _act_info.a(); + const float b_f32 = _act_info.b(); + + execute_window_loop( + win_collapsed, + [&](const Coordinates &) { + const auto input_ptr = reinterpret_cast<const T *>(input.ptr()); + const auto output_ptr = reinterpret_cast<T *>(output.ptr()); + + wrapper::traits::neon_bitvector_t<T, wrapper::traits::BitWidth::W128> tmp; + ARM_COMPUTE_UNUSED(tmp); + + // Compute S elements per iteration + int x = window_start_x; + for (; x <= (window_end_x - window_step_x); x += window_step_x) + { + const auto vin = wrapper::vloadq(input_ptr + x); + if (act == ActivationFunction::LOGISTIC) + { + // De-quantize + const auto vin_deq = vdequantize_int16(vin, qi_in.scale); + // Perform activation + const float32x4x2_t tmp_dep = {{ + wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg( + vin_deq.val[0])))), + wrapper::vdiv(vconst_1, wrapper::vadd(vconst_1, wrapper::vexpq(wrapper::vneg( + vin_deq.val[1])))), + }}; + // Re-quantize to new output space + tmp = vquantize_int16(tmp_dep, qi_out.scale); + } + else if (act == ActivationFunction::TANH) + { + // De-quantize + const auto vin_deq = vdequantize_int16(vin, qi_in.scale); + // Perform activation + const float32x4x2_t tmp_dep = {{ + wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[0], vb_f32))), + wrapper::vmul(va_f32, wrapper::vtanh(wrapper::vmul(vin_deq.val[1], vb_f32))), + }}; + // Re-quantize to new output space + tmp = vquantize_int16(tmp_dep, qi_out.scale); + } + else + { + ARM_COMPUTE_ERROR("Unsupported activation function"); + } + wrapper::vstore(output_ptr + x, tmp); + } + + // Compute left-over elements + for (; x < window_end_x; ++x) + { + T in = *(reinterpret_cast<const T *>(input_ptr + x)); + T tmp; + if (act == ActivationFunction::LOGISTIC) + { + float tmp_f = dequantize_qsymm16(in, qi_in.scale); + tmp_f = 1.f / (1.f + std::exp(-tmp_f)); + tmp = quantize_qsymm16(tmp_f, qi_out); + } + else if (act == ActivationFunction::TANH) + { + float tmp_f = dequantize_qsymm16(in, qi_in.scale); + tmp_f = a_f32 * std::tanh(b_f32 * tmp_f); + tmp = quantize_qsymm16(tmp_f, qi_out); + } + else + { + ARM_COMPUTE_ERROR("Unsupported activation function"); + } + *(output_ptr + x) = tmp; + } + }, + input, output); +} + +Status NEActivationLayerKernelEx::validate(const ITensorInfo *input, const ITensorInfo *output, + const ActivationLayerInfo &act_info) +{ + ARM_COMPUTE_UNUSED(act_info); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info)); + ARM_COMPUTE_RETURN_ON_ERROR( + validate_and_configure_window(input->clone().get(), + (output != nullptr) ? output->clone().get() : nullptr) + .first); + + return Status{}; +} + +void NEActivationLayerKernelEx::run(const Window &window, const ThreadInfo &info) +{ + // Early exit on disabled activation + if (!_act_info.enabled()) + { + return; + } + + ARM_COMPUTE_UNUSED(info); + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); + ARM_COMPUTE_ERROR_ON(_func == nullptr); + + (this->*_func)(window); +} diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp index d2f42de53..32d7d6237 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEBinaryLogicalOperationKernel.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h" #include "arm_compute/core/Error.h" diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp index 7e4fc129b..fbb9dbca9 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/core/NEON/kernels/NECastKernel.h" #include "arm_compute/core/AccessWindowStatic.h" @@ -394,7 +410,8 @@ template <typename FromT> void run_cast(const ITensor *input, ITensor *output, c case DataType::QASYMM8: { using to_vector = typename cast_vector<float>::type; - const QuantizationInfo &qinfo_out = output->info()->quantization_info(); + const UniformQuantizationInfo &qinfo_out = + output->info()->quantization_info().uniform(); const auto vf = vcast<to_vector, from_vector>(vin); const auto vout = vquantize(vf, qinfo_out); store_result<qasymm8_t>(reinterpret_cast<qasymm8_t *>(out.ptr()) + x, vout); @@ -440,7 +457,8 @@ template <typename FromT> void run_cast(const ITensor *input, ITensor *output, c case DataType::QASYMM8: { const QuantizationInfo &qinfo_out = output->info()->quantization_info(); - const auto qval = qinfo_out.quantize(static_cast<float>(val), rounding_policy); + const auto qval = + quantize_qasymm8(static_cast<float>(val), qinfo_out, rounding_policy); *(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval; break; } @@ -486,8 +504,8 @@ void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &windo #else //__aarch64__ constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO; #endif //__aarch64__ - const auto &qinfo_in = input->info()->quantization_info(); - const auto &qinfo_out = output->info()->quantization_info(); + const auto &qinfo_in = input->info()->quantization_info().uniform(); + const auto &qinfo_out = output->info()->quantization_info().uniform(); execute_window_loop( win_collapsed, @@ -547,7 +565,7 @@ void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &windo for (; x < window_end_x; ++x) { qasymm8_t qval_in = *(in_ptr + x); - const auto val = qinfo_in.dequantize(qval_in); + const auto val = dequantize_qasymm8(qval_in, qinfo_in); switch (output->info()->data_type()) { @@ -558,7 +576,7 @@ void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &windo } case DataType::QASYMM8: { - const auto qval_out = qinfo_out.quantize(val, rounding_policy); + const auto qval_out = quantize_qasymm8(val, qinfo_out, rounding_policy); *(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval_out; break; } diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp index 8a2223c26..95e269dee 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernelEx.h" #include "arm_compute/core/Helpers.h" diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp index cebd614df..200fc4f87 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEElementwiseUnaryKernelEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h" #include "arm_compute/core/CPP/Validate.h" diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp index 5401afea0..091d38c56 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEEmbeddingLookupKernel.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h" #include "arm_compute/core/Error.h" diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp index ce2413dc1..4c0a5e799 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEGatherKernelEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h" #include "arm_compute/core/CPP/Validate.h" diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp index 391337bfb..30787c0a4 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h" #include "arm_compute/core/Error.h" @@ -118,7 +134,7 @@ void NEHashtableLookupKernel::run(const Window &window, const ThreadInfo &info) const size_t lookup_dim = _output->info()->num_dimensions() - 1; const int const_0 = _output->info()->data_type() == DataType::QASYMM8 - ? _output->info()->quantization_info().offset + ? _output->info()->quantization_info().uniform().offset : 0; std::unordered_map<int32_t, size_t> key_index_map; diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp index 1ea77fb5c..49adf1462 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernelEx.h" #include "arm_compute/core/CPP/Validate.h" diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp index de218d489..b92130cec 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEMultiplyScaleFactorKernel.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/core/NEON/kernels/NEMuliplyScaleFactorKernel.h" #include "arm_compute/core/Error.h" @@ -71,12 +87,6 @@ inline int32x4x4_t load_value(const int32_t *input_ptr) wrapper::vloadq(input_ptr + 8), wrapper::vloadq(input_ptr + 12)}; } -inline float32x4x4_t load_value(const float *input_ptr) -{ - return {wrapper::vloadq(input_ptr), wrapper::vloadq(input_ptr + 4), - wrapper::vloadq(input_ptr + 8), wrapper::vloadq(input_ptr + 12)}; -} - #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC inline const float32x4x4_t load_value(const float16_t *input_ptr) { diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp index ad1bb9051..641641b5a 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/core/NEON/kernels/NEPReLUKernel.h" #include "arm_compute/core/ITensor.h" @@ -63,7 +79,8 @@ template <ConditionalOperation op> inline uint8_t elementwise_conditional_op_quantized_scalar(const float &a, const float &b, QuantizationInfo qinfo) { - return qinfo.quantize(elementwise_conditional_op_scalar<op>(a, b), RoundingPolicy::TO_NEAREST_UP); + return quantize_qasymm8(elementwise_conditional_op_scalar<op>(a, b), qinfo, + RoundingPolicy::TO_NEAREST_UP); } template <ConditionalOperation op, typename VectorType> diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp index acf0092eb..6ba0f1fd4 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEQuantizationSymmetricKernel.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/core/NEON/kernels/NEQuantizationSymmetricKernel.h" #include "arm_compute/core/Error.h" diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp index 59e7d9beb..3b65eac10 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEReductionOperationKernelEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/core/NEON/kernels/NEReductionOperationKernelEx.h" #include "arm_compute/core/CPP/Validate.h" diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp index 36a2f55a9..44feb200f 100644 --- a/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp +++ b/compute/ARMComputeEx/src/core/NEON/kernels/NESpaceToDepthLayerKernelEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernelEx.h" #include "arm_compute/core/Helpers.h" diff --git a/compute/ARMComputeEx/src/core/UtilsEx.cpp b/compute/ARMComputeEx/src/core/UtilsEx.cpp index 94242b56b..863316909 100644 --- a/compute/ARMComputeEx/src/core/UtilsEx.cpp +++ b/compute/ARMComputeEx/src/core/UtilsEx.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/core/UtilsEx.h" #include "arm_compute/core/Error.h" diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp index ae64a6edd..2d379cf36 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLArgOperation.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/runtime/CL/functions/CLArgOperation.h" #include "arm_compute/core/CL/kernels/CLArgOperationKernel.h" diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp index 7c5fe5eda..92ee69a36 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLBinaryLogicalOp.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/runtime/CL/functions/CLBinaryLogicalOp.h" #include "arm_compute/core/CL/kernels/CLBinaryLogicalOpKernel.h" diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp index 742fc6f59..b3118f39e 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLCast.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/runtime/CL/functions/CLCast.h" #include "arm_compute/core/CL/kernels/CLCastKernel.h" diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp index c2e4ca9ff..db662505a 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLDepthToSpace.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/runtime/CL/functions/CLDepthToSpace.h" #include "arm_compute/core/CL/kernels/CLDepthToSpaceKernel.h" diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp index 2781784ca..3d9a28a48 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLEmbeddingLookup.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/runtime/CL/functions/CLEmbeddingLookup.h" #include "arm_compute/core/CL/kernels/CLEmbeddingLookupKernel.h" diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp new file mode 100644 index 000000000..f098832b0 --- /dev/null +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedHybridLayer.cpp @@ -0,0 +1,337 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h" + +#include "arm_compute/core/Size2D.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/ToolchainSupport.h" + +#include <algorithm> + +using namespace arm_compute; +using namespace arm_compute::misc::shape_calculator; + +namespace +{ +Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo &output) +{ + ARM_COMPUTE_UNUSED(input); + ARM_COMPUTE_UNUSED(weights); + ARM_COMPUTE_UNUSED(output); + ARM_COMPUTE_RETURN_ON_ERROR( + CLGEMMLowpMatrixMultiplyCoreEx::validate(&input, &weights, nullptr, &output)); + + return Status{}; +} +} // namespace + +void CLFullyConnectedHybridLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output) +{ + auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>(); + k->configure(input, output); + _kernel = std::move(k); +} + +Status CLFullyConnectedHybridLayerReshapeWeights::validate(const ITensorInfo *input, + const ITensorInfo *output) +{ + return CLTransposeKernel::validate(input, output); +} + +CLFullyConnectedHybridLayer::CLFullyConnectedHybridLayer( + std::shared_ptr<IMemoryManager> memory_manager) + : _memory_group(memory_manager), _reshape_weights_kernel(), _quant_input_kernel(), + _mm_gemmlowp(memory_manager), _multiply_scale_kernel(), _accumulate_biases_kernel(), + _reshape_weights_output(), _quantized_input(), _scale_factor(), _gemmlowp_output(), + _are_weights_reshaped(true), _accumulate_biases(false), _is_prepared(false), + _original_weights(nullptr) +{ +} +void CLFullyConnectedHybridLayer::configure_mm(const ICLTensor *input, const ICLTensor *weights, + ICLTensor *output, bool retain_internal_weights) +{ + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1)); + + ARM_COMPUTE_UNUSED(output); + ARM_COMPUTE_UNUSED(retain_internal_weights); + // Configure gemmlowp function + _mm_gemmlowp.configure(input, weights, nullptr, output); +} + +void CLFullyConnectedHybridLayer::configure(const ICLTensor *input, const ICLTensor *weights, + const ICLTensor *biases, ICLTensor *output, + FullyConnectedLayerInfo fc_info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); + + // Perform validate step + ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedHybridLayer::validate( + input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), + fc_info)); + + _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; + _accumulate_biases = false; + _is_prepared = fc_info.retain_internal_weights; + _original_weights = weights; + + // Configure accumulate biases kernel for non quantized asymmetric types + if (biases != nullptr) + { + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); + + _accumulate_biases = true; + + // Configure accumulate biases kernel + _accumulate_biases_kernel.set_target(CLScheduler::get().target()); + _accumulate_biases_kernel.configure(output, biases); + } + + const ICLTensor *weights_to_use = weights; + + // With the Fully Connected layer we can have 4 different cases: + // 1) Convolution layer -> Fully Connected layer without batches + // 2) Fully Connected layer -> Fully Connected layer without batches + // 3) Convolution layer -> Fully Connected layer with batches + // 4) Fully Connected layer -> Fully Connected layer with batches + + // Check if we have a fully connected layer with batches + const bool is_batched_fc_layer = output->info()->dimension(1) > 1; + bool is_fc_after_conv = false; + if (is_batched_fc_layer) + { + is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && + (std::equal(input->info()->tensor_shape().cbegin() + 3, + input->info()->tensor_shape().cend(), + output->info()->tensor_shape().cbegin() + 1)); + } + else + { + is_fc_after_conv = input->info()->num_dimensions() > 1 && input->info()->dimension(1) > 1; + } + ARM_COMPUTE_ERROR_ON_MSG(is_fc_after_conv, + "CLFullyConnectedHybridLayer does not support after conv"); + ARM_COMPUTE_UNUSED(is_fc_after_conv); + + // Reshape weights if needed + if (!_are_weights_reshaped) + { + // Reshape the weights + _reshape_weights_output.allocator()->init( + weights->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( + compute_transposed_shape(*weights->info()))); + _reshape_weights_kernel.configure(weights_to_use, &_reshape_weights_output); + weights_to_use = &_reshape_weights_output; + } + + // Extract scale factor + _scale_factor.allocator()->init( + TensorInfo(TensorShape{output->info()->dimension(1)}, 1, input->info()->data_type())); + _memory_group.manage(&_scale_factor); + _scale_factor_kernel.configure(input, &_scale_factor); + + // Quantize input + _quantized_input.allocator()->init( + input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8)); + _memory_group.manage(&_quantized_input); + _quant_input_kernel.configure(input, &_scale_factor, &_quantized_input); + + // GEMMLowp + _gemmlowp_output.allocator()->init( + output->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); + _memory_group.manage(&_gemmlowp_output); + configure_mm(&_quantized_input, weights_to_use, &_gemmlowp_output, + fc_info.retain_internal_weights); + _quantized_input.allocator()->allocate(); + + // Multiply scale + _multiply_scale_kernel.configure(&_gemmlowp_output, &_scale_factor, output, + weights->info()->quantization_info().uniform().scale); + _gemmlowp_output.allocator()->allocate(); + _scale_factor.allocator()->allocate(); + + _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights; +} + +Status CLFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, + const ITensorInfo *biases, const ITensorInfo *output, + FullyConnectedLayerInfo fc_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::S8); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); + + bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; + bool is_fc_after_conv = true; + const GPUTarget gpu_target = CLScheduler::get().target(); + + const ITensorInfo &reshaped_weights = + TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( + compute_transposed_shape(*weights))); + + // Configure accumulate biases kernel for non quantized asymmetric types + if (biases != nullptr) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); + ARM_COMPUTE_RETURN_ON_ERROR( + CLGEMMMatrixAccumulateBiasesKernel::validate(output, biases, gpu_target)); + } + + // With the Fully Connected layer we can have 4 different cases: + // 1) Convolution layer -> Fully Connected layer without batches + // 2) Fully Connected layer -> Fully Connected layer without batches + // 3) Convolution layer -> Fully Connected layer with batches + // 4) Fully Connected layer -> Fully Connected layer with batches + + const ITensorInfo *weights_to_use = weights; + + // Check if we have a fully connected layer with batches + const bool is_batched_fc_layer = output->dimension(1) > 1; + if (is_batched_fc_layer) + { + is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && + (std::equal(input->tensor_shape().cbegin() + 3, input->tensor_shape().cend(), + output->tensor_shape().cbegin() + 1)); + } + else + { + is_fc_after_conv = input->num_dimensions() > 1 && input->dimension(1) > 1; + } + ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_fc_after_conv, + "CLFullyConnectedHybridLayer does not support after conv"); + + if (!weights_reshaped) + { + // Validate reshape weights kernel + ARM_COMPUTE_RETURN_ON_ERROR( + CLFullyConnectedHybridLayerReshapeWeights::validate(weights_to_use, &reshaped_weights)); + weights_to_use = &reshaped_weights; + } + + // Validate Scale factor kernel + const ITensorInfo &scale_factor = + TensorInfo(TensorShape{output->dimension(1)}, 1, input->data_type()); + ARM_COMPUTE_RETURN_ON_ERROR(CLScaleFactorSymm8Kernel::validate(input, &scale_factor)); + + // Validate quantization symm8 kernel + const ITensorInfo &quantized_input = TensorInfo( + input->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S8)); + ARM_COMPUTE_RETURN_ON_ERROR( + CLQuantizationSymmetricKernel::validate(input, &scale_factor, &quantized_input)); + + // Fully Connected layer after a Fully Connected Layer without batches + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1)); + + // Validate matrix multiply kernel + const ITensorInfo &gemmlowp_output = TensorInfo( + output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output)); + + // Multiply scale + ARM_COMPUTE_RETURN_ON_ERROR( + CLMultiplyScaleFactorKernel::validate(&gemmlowp_output, &scale_factor, output)); + + return Status{}; +} + +void CLFullyConnectedHybridLayer::run() +{ + prepare(); + + MemoryGroupResourceScope scope_mg(_memory_group); + + // Extract scale_factor + CLScheduler::get().enqueue(_scale_factor_kernel); + + // Quantize input + CLScheduler::get().enqueue(_quant_input_kernel); + + // Run matrix multiply + _mm_gemmlowp.run(); + + // Multiply scale factor + CLScheduler::get().enqueue(_multiply_scale_kernel); + + // Accumulate biases if provided + if (_accumulate_biases) + { + CLScheduler::get().enqueue(_accumulate_biases_kernel); + } +} + +void CLFullyConnectedHybridLayer::prepare() +{ + if (!_is_prepared) + { + ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); + + auto release_unused = [](CLTensor *w) { + if (!w->is_used()) + { + CLScheduler::get().queue().finish(); + w->allocator()->free(); + } + }; + + // Reshape of the weights if needed (happens only once) + if (!_are_weights_reshaped) + { + // Run reshape weights kernel and mark weights as unused + _reshape_weights_output.allocator()->allocate(); + _reshape_weights_kernel.run(); + + _are_weights_reshaped = true; + // We can not release _original_weights because it can be used in other nodes + } + + // Prepare GEMM prepare and release unused weights + _mm_gemmlowp.prepare(); + + // Release reshaped weights if unused + release_unused(&_reshape_weights_output); + + _is_prepared = true; + } +} diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp new file mode 100644 index 000000000..63e291b36 --- /dev/null +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedLayerEx.cpp @@ -0,0 +1,583 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h" + +#include "arm_compute/core/Size2D.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/misc/Cast.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "support/ToolchainSupport.h" + +#include <algorithm> + +namespace arm_compute +{ +using namespace arm_compute::misc::shape_calculator; +using namespace arm_compute::utils::cast; + +namespace +{ +Status construct_gemmlowp_output_stage(const ITensorInfo &input, const ITensorInfo &weights, + const ITensorInfo &output, + GEMMLowpOutputStageInfo &gemmlowp_output_stage) +{ + gemmlowp_output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; + gemmlowp_output_stage.gemmlowp_offset = 0; + gemmlowp_output_stage.gemmlowp_multiplier = 0; + gemmlowp_output_stage.gemmlowp_shift = 0; + + // Configure output stage for quantized case + if (is_data_type_quantized_asymmetric(input.data_type())) + { + const UniformQuantizationInfo iq_info = input.quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights.quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output.quantization_info().uniform(); + + const auto output_quant_info = (output.total_size() == 0) ? iq_info : oq_info; + + const float multiplier = (iq_info.scale * wq_info.scale) / output_quant_info.scale; + int output_multiplier = 0; + int output_shift = 0; + ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one( + multiplier, &output_multiplier, &output_shift)); + + // Set the GEMMLowp output stage info + gemmlowp_output_stage.gemmlowp_offset = output_quant_info.offset; + gemmlowp_output_stage.gemmlowp_multiplier = output_multiplier; + gemmlowp_output_stage.gemmlowp_shift = output_shift; + gemmlowp_output_stage.gemmlowp_min_bound = 0; + gemmlowp_output_stage.gemmlowp_max_bound = 255; + gemmlowp_output_stage.gemmlowp_multipliers.push_back(output_multiplier); + gemmlowp_output_stage.gemmlowp_shifts.push_back(output_shift); + } + + return Status{}; +} + +Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const ITensorInfo *bias, + const ITensorInfo &output, const FullyConnectedLayerInfo &fc_info) +{ + GEMMLowpOutputStageInfo gemmlowp_output_stage; + ARM_COMPUTE_RETURN_ON_ERROR( + construct_gemmlowp_output_stage(input, weights, output, gemmlowp_output_stage)); + + const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped + false, // is_b_reshaped + true, // reshape_b_only_on_first_run + 0, // depth_output_gemm3d + false, // reinterpret_input_as_3d + fc_info.retain_internal_weights, // retain_internal_weights + gemmlowp_output_stage, // gemmlowp_output_stage + fc_info.fp_mixed_precision, // fp_mixed_precision + true, // broadcast_bias + ActivationLayerInfo()); // activation_info + + if (is_data_type_quantized_asymmetric(input.data_type())) + { + const UniformQuantizationInfo iq_info = input.quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights.quantization_info().uniform(); + + // Since we need negative offsets for computing convolution, we need to change + // QuantizationInfo() + // Extract and negate input and weights offset + const QuantizationInfo input_quantization_info(iq_info.scale, -iq_info.offset); + const QuantizationInfo weights_quantization_info(wq_info.scale, -wq_info.offset); + + // Validate gemmlowp function + ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate( + &input.clone()->set_quantization_info(input_quantization_info), + &weights.clone()->set_quantization_info(weights_quantization_info), bias, &output, + gemm_info)); + } + else + { + ARM_COMPUTE_RETURN_ON_ERROR( + CLGEMM::validate(&input, &weights, bias, &output, 1.f, 1.f, gemm_info)); + } + + return Status{}; +} +} // namespace + +void CLFullyConnectedLayerReshapeWeightsEx::configure(const ICLTensor *input, ICLTensor *output) +{ + auto k = arm_compute::support::cpp14::make_unique<CLTransposeKernel>(); + k->configure(input, output); + _kernel = std::move(k); +} + +Status CLFullyConnectedLayerReshapeWeightsEx::validate(const ITensorInfo *input, + const ITensorInfo *output) +{ + return CLTransposeKernel::validate(input, output); +} + +CLFullyConnectedLayerEx::CLFullyConnectedLayerEx(std::shared_ptr<IMemoryManager> memory_manager, + IWeightsManager *weights_manager) + : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(), + _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(), + _reshape_weights_function(), _mm_gemm(memory_manager, weights_manager), + _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(), + _reshape_weights_output(), _are_weights_converted(true), _are_weights_reshaped(true), + _is_fc_after_conv(true), _is_quantized(false), _is_prepared(false), _original_weights(nullptr) +{ +} +void CLFullyConnectedLayerEx::configure_mm(const ICLTensor *input, const ICLTensor *weights, + const ICLTensor *bias, ICLTensor *output, + const FullyConnectedLayerInfo &fc_info) +{ + GEMMLowpOutputStageInfo gemmlowp_output_stage; + construct_gemmlowp_output_stage(*input->info(), *weights->info(), *output->info(), + gemmlowp_output_stage); + + const GEMMInfo &gemm_info = GEMMInfo(false, // is_a_reshaped + false, // is_b_reshaped + true, // reshape_b_only_on_first_run + 0, // depth_output_gemm3d + false, // reinterpret_input_as_3d + fc_info.retain_internal_weights, // retain_internal_weights + gemmlowp_output_stage, // gemmlowp_output_stage + fc_info.fp_mixed_precision, // fp_mixed_precision + true, // broadcast_bias + ActivationLayerInfo()); // activation_info + + if (_is_quantized) + { + // Since we need negative offsets for computing convolution, we need to change + // QuantizationInfo() + // Extract and negate input and weights offset + const QuantizationInfo input_quantization_info = input->info()->quantization_info(); + const QuantizationInfo weights_quantization_info = weights->info()->quantization_info(); + + input->info()->set_quantization_info(QuantizationInfo( + input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); + weights->info()->set_quantization_info(QuantizationInfo( + weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); + + // Configure gemmlowp function + _mm_gemmlowp.configure(input, weights, bias, output, gemm_info); + + // Revert back QuantizatioInfo as input and weights could be used in other fully connected + // layers + input->info()->set_quantization_info(input_quantization_info); + weights->info()->set_quantization_info(weights_quantization_info); + } + else + { + // Configure matrix multiply kernel + _mm_gemm.configure(input, weights, bias, output, 1.f, 1.f, gemm_info); + } +} + +void CLFullyConnectedLayerEx::configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, + const ICLTensor *bias, ICLTensor *output, + const FullyConnectedLayerInfo &fc_info) +{ + ARM_COMPUTE_ERROR_ON( + (weights->info()->dimension(1) != + (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2)))); + + // If the fully connected layer is called after a convolution layer, the input tensor must be + // linearized + + // Initialize output tensor for flatten + TensorShape shape_flatten = compute_flatten_shape(input->info()); + _flatten_output.allocator()->init(input->info() + ->clone() + ->set_is_resizable(true) + .reset_padding() + .set_tensor_shape(shape_flatten) + .set_data_layout(DataLayout::NCHW)); + + // Configure flatten kernel + _memory_group.manage(&_flatten_output); + _flatten_layer.configure(input, &_flatten_output); + + // Configure matrix multiply kernel + configure_mm(&_flatten_output, weights, bias, output, fc_info); + + // Allocate the output tensor for flatten once all the configure methods have been called + _flatten_output.allocator()->allocate(); +} + +void CLFullyConnectedLayerEx::configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, + const ICLTensor *bias, ICLTensor *output, + const FullyConnectedLayerInfo &fc_info) +{ + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1)); + + // Configure matrix multiply kernel + configure_mm(input, weights, bias, output, fc_info); +} + +void CLFullyConnectedLayerEx::configure(const ICLTensor *input, const ICLTensor *weights, + const ICLTensor *biases, ICLTensor *output, + FullyConnectedLayerInfo fc_info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); + + // Perform validate step + ARM_COMPUTE_ERROR_THROW_ON(CLFullyConnectedLayerEx::validate( + input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), + fc_info)); + + _are_weights_converted = true; + _are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; + _is_fc_after_conv = true; + _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); + _is_prepared = fc_info.retain_internal_weights; + _original_weights = weights; + + if (_weights_manager) + { + _weights_manager->manage(weights); + } + + const ICLTensor *weights_to_use = weights; + + // With the Fully Connected layer we can have 4 different cases: + // 1) Convolution layer -> Fully Connected layer without batches + // 2) Fully Connected layer -> Fully Connected layer without batches + // 3) Convolution layer -> Fully Connected layer with batches + // 4) Fully Connected layer -> Fully Connected layer with batches + + // Check if we have a fully connected layer with batches + const bool is_batched_fc_layer = output->info()->dimension(1) > 1; + if (is_batched_fc_layer) + { + _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && + (std::equal(input->info()->tensor_shape().cbegin() + 3, + input->info()->tensor_shape().cend(), + output->info()->tensor_shape().cbegin() + 1)); + } + else + { + _is_fc_after_conv = input->info()->num_dimensions() > 1; + } + + // Reshape weights if needed + if (!_are_weights_reshaped) + { + if (_weights_manager && _weights_manager->are_weights_managed(weights)) + { + _reshape_weights_managed_function.configure(weights); + weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>( + _weights_manager->acquire(weights, &_reshape_weights_managed_function)); + } + else + { + // Reshape the weights + _reshape_weights_function.configure(weights, &_reshape_weights_output); + weights_to_use = &_reshape_weights_output; + } + } + + // Convert weights if needed + if (_is_fc_after_conv && (input->info()->data_layout() != fc_info.weights_trained_layout)) + { + if (_weights_manager && _weights_manager->are_weights_managed(weights_to_use)) + { + _convert_weights_managed.configure(weights_to_use, input->info()->tensor_shape(), + fc_info.weights_trained_layout); + weights_to_use = utils::cast::polymorphic_downcast<ICLTensor *>( + _weights_manager->acquire(weights, &_convert_weights_managed)); + } + else + { + // Convert weights + _convert_weights.configure(weights_to_use, &_converted_weights_output, + input->info()->tensor_shape(), fc_info.weights_trained_layout); + + weights_to_use = &_converted_weights_output; + } + _are_weights_converted = false; + } + + if (_is_fc_after_conv) + { + // Fully Connected layer after a Convolution Layer without batches + configure_conv_fc(input, weights_to_use, biases, output, fc_info); + } + else + { + // Fully Connected layer after a Fully Connected Layer without batches + configure_fc_fc(input, weights_to_use, biases, output, fc_info); + } +} + +Status CLFullyConnectedLayerEx::validate(const ITensorInfo *input, const ITensorInfo *weights, + const ITensorInfo *biases, const ITensorInfo *output, + FullyConnectedLayerInfo fc_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, + DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); + ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); + + bool weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true; + bool is_fc_after_conv = true; + + const ITensorInfo &flatten_input = TensorInfo(input->clone() + ->set_is_resizable(true) + .reset_padding() + .set_tensor_shape(compute_flatten_shape(input)) + .set_data_layout(DataLayout::NCHW)); + const ITensorInfo &reshaped_weights = + TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape( + compute_transposed_shape(*weights))); + const ITensorInfo &converted_weights = + weights_reshaped ? TensorInfo(weights->clone()->set_is_resizable(true).reset_padding()) + : TensorInfo(*reshaped_weights.clone()); + + // With the Fully Connected layer we can have 4 different cases: + // 1) Convolution layer -> Fully Connected layer without batches + // 2) Fully Connected layer -> Fully Connected layer without batches + // 3) Convolution layer -> Fully Connected layer with batches + // 4) Fully Connected layer -> Fully Connected layer with batches + + const ITensorInfo *input_to_use = input; + const ITensorInfo *weights_to_use = weights; + + // Check if we have a fully connected layer with batches + const bool is_batched_fc_layer = output->dimension(1) > 1; + if (is_batched_fc_layer) + { + is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && + (std::equal(input->tensor_shape().cbegin() + 3, input->tensor_shape().cend(), + output->tensor_shape().cbegin() + 1)); + } + else + { + is_fc_after_conv = input->num_dimensions() > 1; + } + + if (!weights_reshaped) + { + // Validate reshape weights kernel + ARM_COMPUTE_RETURN_ON_ERROR( + CLFullyConnectedLayerReshapeWeightsEx::validate(weights, &reshaped_weights)); + weights_to_use = &reshaped_weights; + } + + if (is_fc_after_conv && (input->data_layout() != fc_info.weights_trained_layout)) + { + // Validate convert weights kernel + ARM_COMPUTE_RETURN_ON_ERROR(CLConvertFullyConnectedWeights::validate( + weights_to_use, &converted_weights, input->tensor_shape(), fc_info.weights_trained_layout)); + weights_to_use = &converted_weights; + } + + if (is_fc_after_conv) + { + // Fully Connected layer after a Convolution Layer without batches + ARM_COMPUTE_RETURN_ERROR_ON( + (weights_to_use->dimension(1) != + (input->dimension(0) * input->dimension(1) * input->dimension(2)))); + + // Validate flatten kernel + ARM_COMPUTE_RETURN_ON_ERROR(CLFlattenLayer::validate(input, &flatten_input)); + input_to_use = &flatten_input; + } + else + { + // Fully Connected layer after a Fully Connected Layer without batches + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != weights_to_use->dimension(1)); + } + + // Validate matrix multiply kernel + ARM_COMPUTE_RETURN_ON_ERROR( + validate_mm(*input_to_use, *weights_to_use, biases, *output, fc_info)); + + return Status{}; +} + +void CLFullyConnectedLayerEx::run() +{ + if (!_is_prepared) + { + if (!_are_weights_reshaped) + _reshape_weights_output.allocator()->allocate(); + if (!_are_weights_converted) + _converted_weights_output.allocator()->allocate(); + _is_prepared = true; + } + + { + if (!_weights_manager) + { + ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); + } + + // Pointer to current weights + const ICLTensor *cur_weights = _original_weights; + // Reshape of the weights + if (!_are_weights_reshaped) + { + if (_weights_manager && _weights_manager->are_weights_managed(cur_weights)) + { + _original_weights = utils::cast::polymorphic_downcast<ICLTensor *>( + _weights_manager->run(cur_weights, &_reshape_weights_managed_function)); + } + else + { + _reshape_weights_function.run(); + cur_weights = &_reshape_weights_output; + } + } + + // Convert weights if needed + if (!_are_weights_converted) + { + if (_weights_manager && _weights_manager->are_weights_managed(cur_weights)) + { + _weights_manager->run(cur_weights, &_convert_weights_managed); + } + else + { + _convert_weights.run(); + } + } + + // Prepare GEMM prepare + if (!_is_quantized) + { + _mm_gemm.prepare(); + } + } + + MemoryGroupResourceScope scope_mg(_memory_group); + + // Linearize input if it comes from a convolutional layer + if (_is_fc_after_conv) + { + _flatten_layer.run(); + } + + // Run matrix multiply + if (_is_quantized) + { + _mm_gemmlowp.run(); + } + else + { + _mm_gemm.run(); + } +} + +void CLFullyConnectedLayerEx::prepare() +{ +#if 0 // TODO Remove this block + if(!_is_prepared) + { + if(!_weights_manager) + { + ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); + } + + auto release_unused = [](CLTensor * w) + { + if(!w->is_used()) + { + CLScheduler::get().queue().finish(); + w->allocator()->free(); + } + }; + + // Pointer to current weights + const ICLTensor *cur_weights = _original_weights; + + // Reshape of the weights if needed (happens only once) + if(!_are_weights_reshaped) + { + if(_weights_manager && _weights_manager->are_weights_managed(_original_weights)) + { + cur_weights = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->run(cur_weights, &_reshape_weights_managed_function)); + } + else + { + // Run reshape weights kernel and mark weights as unused + _reshape_weights_output.allocator()->allocate(); + _reshape_weights_function.run(); + + cur_weights->mark_as_unused(); + cur_weights = &_reshape_weights_output; + } + _are_weights_reshaped = true; + } + + // Convert weights if needed (happens only once) + if(!_are_weights_converted) + { + if(_weights_manager && _weights_manager->are_weights_managed(cur_weights)) + { + _weights_manager->run(cur_weights, &_convert_weights_managed); + } + else + { + _converted_weights_output.allocator()->allocate(); + _convert_weights.run(); + cur_weights->mark_as_unused(); + } + + _are_weights_converted = true; + } + + // Release reshaped weights if unused + release_unused(&_reshape_weights_output); + + // Prepare GEMM prepare and release unused weights + if(!_is_quantized) + { + _mm_gemm.prepare(); + } + + // Release converted weights if unused + release_unused(&_reshape_weights_output); + release_unused(&_converted_weights_output); + + _is_prepared = true; + } +#endif +} +} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp index c6b166163..9aebc473e 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLFullyConnectedReshapingLayer.cpp @@ -16,13 +16,18 @@ #include "arm_compute/runtime/CL/functions/CLFullyConnectedReshapingLayer.h" +#include <arm_compute/runtime/CL/functions/CLFullyConnectedHybridLayer.h> +#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h> +#include <arm_compute/runtime/CL/functions/CLFullyConnectedLayerEx.h> + using namespace arm_compute; void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *input, const arm_compute::ICLTensor *weights, const arm_compute::ICLTensor *biases, arm_compute::ICLTensor *output, bool needs_reshape, - const arm_compute::TensorShape &reshape) + const arm_compute::TensorShape &reshape, + KernelType kernel_type) { _input = input; _weights = weights; @@ -30,6 +35,7 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp _output = output; _needs_reshape = needs_reshape; + const ICLTensor *input_to_use = input; if (_needs_reshape) { // reshape @@ -37,16 +43,44 @@ void CLFullyConnectedReshapingLayer::configure(const arm_compute::ICLTensor *inp _input->info()->clone()->set_tensor_shape(reshape).set_data_layout( _input->info()->data_layout())); _cl_reshape.configure(_input, &_cl_buffer); + input_to_use = &_cl_buffer; + } + + _cl_fc = [&]() { + if (kernel_type == KernelType::GENERAL) + { + auto fc = new arm_compute::CLFullyConnectedLayerEx{_memory_manager}; + fc->configure(input_to_use, _weights, _biases, _output); + return std::unique_ptr<arm_compute::IFunction>(fc); + } + else + { + assert(kernel_type == KernelType::PREPROCESSED_WEIGHTS); + + bool is_hybrid = (input->info()->data_type() == DataType::F32 || + input->info()->data_type() == DataType::F16) && + weights->info()->data_type() == DataType::S8; - _cl_fc.configure(&_cl_buffer, _weights, _biases, _output); + if (is_hybrid) + { + auto fc = new arm_compute::CLFullyConnectedHybridLayer{_memory_manager}; + fc->configure(input_to_use, _weights, _biases, _output); + return std::unique_ptr<arm_compute::IFunction>(fc); + } + else + { + auto fc = new arm_compute::CLFullyConnectedLayer{_memory_manager}; + fc->configure(input_to_use, _weights, _biases, _output); + return std::unique_ptr<arm_compute::IFunction>(fc); + } + } + }(); + if (_needs_reshape) + { // NOTE _cl_buffer is inaccessible from outside, and thus it is safe to invoke allocate here. _cl_buffer.allocator()->allocate(); } - else - { - _cl_fc.configure(_input, _weights, _biases, _output); - } } void CLFullyConnectedReshapingLayer::run(void) @@ -54,7 +88,7 @@ void CLFullyConnectedReshapingLayer::run(void) if (_needs_reshape) _cl_reshape.run(); - _cl_fc.run(); + _cl_fc->run(); } -void CLFullyConnectedReshapingLayer::prepare(void) { _cl_fc.prepare(); } +void CLFullyConnectedReshapingLayer::prepare(void) { _cl_fc->prepare(); } diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp new file mode 100644 index 000000000..ca5499dfc --- /dev/null +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.cpp @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCoreEx.h" + +#include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/MemoryGroup.h" + +namespace arm_compute +{ +using namespace arm_compute::misc::shape_calculator; +using namespace arm_compute::cl_gemm; + +namespace +{ +inline bool is_gemm_reshaped(bool reshape_b_only_on_first_run, GPUTarget gpu_target) +{ + return (get_arch_from_target(gpu_target) != GPUTarget::MIDGARD) && (reshape_b_only_on_first_run); +} +} // namespace + +CLGEMMLowpMatrixMultiplyCoreEx::CLGEMMLowpMatrixMultiplyCoreEx( + std::shared_ptr<IMemoryManager> memory_manager) + : _memory_group(std::move(memory_manager)), _mm_midgard_kernel(), _mtx_a_reduction_kernel(), + _mtx_b_reduction_kernel(), _vector_sum_col(), _vector_sum_row(), _a_offset(0), _b_offset(0), + _reshape_b_only_on_first_run(false), _is_prepared(false) +{ +} + +void CLGEMMLowpMatrixMultiplyCoreEx::configure(const ICLTensor *a, const ICLTensor *b, + const ICLTensor *c, ICLTensor *output, + const GEMMInfo &gemm_info) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output); + ARM_COMPUTE_UNUSED(c); + ARM_COMPUTE_ERROR_THROW_ON(CLGEMMLowpMatrixMultiplyCoreEx::validate( + a->info(), b->info(), c != nullptr ? c->info() : nullptr, output->info(), gemm_info)); + + _is_prepared = false; + _reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run(); + _a_offset = a->info()->quantization_info().uniform().offset; + _b_offset = b->info()->quantization_info().uniform().offset; + + // Get the GPU target + const GPUTarget gpu_target = CLScheduler::get().target(); + + // Set the target for the kernels + _mm_midgard_kernel.set_target(gpu_target); + + // GEMMRHSMatrixInfo rhs_info; + // GEMMLHSMatrixInfo lhs_info; + + // Arguments used by GEMMReshapeInfo + // If we pass the matrix A and matrix B reshaped to CLGEMMMatrixMultiplyKernel, we need to pass m, + // n, k, mult_transpose1xW_width and mult_interleave4x4_height to CLGEMMReshapeInfo + // in order to know how the matrices have been reshaped + bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); + const unsigned int m = reinterpret_input_as_3d + ? (a->info()->dimension(1) * a->info()->dimension(2)) + : a->info()->dimension(1); + const unsigned int n = b->info()->dimension(0); + const unsigned int k = a->info()->dimension(0); + const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); + + const ICLTensor *matrix_b = b; + // Configure matrix multiply kernel + _mm_midgard_kernel.configure( + a, matrix_b, output, + GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d)); +} + +Status CLGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITensorInfo *b, + const ITensorInfo *c, const ITensorInfo *output, + const GEMMInfo &gemm_info) +{ + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::S8); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b); + ARM_COMPUTE_UNUSED(c); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), + "Matrix A already reshaped is not supported"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), + "Matrix B already reshaped is not supported"); + + const ITensorInfo *matrix_a_info = a; + + // Get the GPU target + const GPUTarget gpu_target = CLScheduler::get().target(); + + bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d(); + const unsigned int m = + reinterpret_input_as_3d ? (a->dimension(1) * a->dimension(2)) : a->dimension(1); + const unsigned int n = b->dimension(0); + const unsigned int k = a->dimension(0); + const int depth_output_gemm3d = gemm_info.depth_output_gemm3d(); + + bool reshape_matrix_b = is_gemm_reshaped(gemm_info.reshape_b_only_on_first_run(), gpu_target); + + const GEMMReshapeInfo reshape_info = + GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d); + + TensorInfo weights_info(*b); + const ITensorInfo *matrix_b_info = &weights_info; + if (reshape_matrix_b) + { + ARM_COMPUTE_RETURN_ERROR_ON_MSG(false, + "CLGEMMLowpMatrixMultiplyCoreEx does not support reshape_b"); + } + + // Validate matrix multiply + ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyKernelEx::validate( + matrix_a_info, matrix_b_info, output, reshape_info)); + + return Status{}; +} + +void CLGEMMLowpMatrixMultiplyCoreEx::run() +{ + prepare(); + + MemoryGroupResourceScope scope_mg(_memory_group); + + // Run matrix multiply + CLScheduler::get().enqueue(_mm_midgard_kernel, false); +} + +void CLGEMMLowpMatrixMultiplyCoreEx::prepare() +{ + if (!_is_prepared) + { + _is_prepared = true; + } +} +} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp index 6cad9bd2e..f594d7a2e 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLGatherEx.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/runtime/CL/functions/CLGatherEx.h" #include "arm_compute/core/CL/ICLTensor.h" diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp index 7180e9356..27ed8e828 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLHashtableLookup.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/runtime/CL/functions/CLHashtableLookup.h" #include "arm_compute/core/CL/kernels/CLHashtableLookupKernel.h" diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp index 86ea5a66d..80393e8d1 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLInstanceNormalizationLayerEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/runtime/CL/functions/CLInstanceNormalizationLayerEx.h" #include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernelEx.h" diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp index be35ea732..28e5bc0da 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLNeg.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/runtime/CL/functions/CLNeg.h" #include "arm_compute/core/CL/kernels/CLNegKernel.h" diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp index 38adedd10..fbb15ab1d 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLPReLU.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/runtime/CL/functions/CLPReLU.h" #include "arm_compute/core/CL/kernels/CLPReLUKernel.h" diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp index 2a34c0664..6049b7e70 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLRNNLayerEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2018 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/runtime/CL/functions/CLRNNLayerEx.h" #include "arm_compute/core/Helpers.h" diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp index 13a25c901..8ce2d746c 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLReduceOperation.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2017-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/runtime/CL/functions/CLReduceOperation.h" #include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h" diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp index c03826891..1f946d37b 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToBatchND.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/runtime/CL/functions/CLSpaceToBatchND.h" #include "arm_compute/core/CL/kernels/CLSpaceToBatchNDKernel.h" diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp index 0f455f96f..7d7b2264b 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLSpaceToDepth.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/runtime/CL/functions/CLSpaceToDepth.h" #include "arm_compute/core/CL/kernels/CLSpaceToDepthKernel.h" diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp index 80d50ad94..3ac95a8e6 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTopKV2.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/runtime/CL/functions/CLTopKV2.h" #include "arm_compute/runtime/CL/CLScheduler.h" diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp index 40e21671d..e61746ef2 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayer.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/runtime/CL/functions/CLTransposeConvLayer.h" #include "arm_compute/core/utils/misc/ShapeCalculatorEx.h" diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp index 0ce3e6700..07feb5a64 100644 --- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp +++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,11 +13,37 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2017-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Utils.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/core/CL/ICLTensor.h" #include <cmath> #include <memory> @@ -54,7 +79,7 @@ void CLTransposeConvLayerUpsample::run() _output->map(CLScheduler::get().queue(), true); if (is_data_type_quantized_asymmetric(_output->info()->data_type())) { - const uint8_t quantized_zero = _output->info()->quantization_info().offset; + const uint8_t quantized_zero = _output->info()->quantization_info().uniform().offset; std::fill_n(_output->buffer(), _output->info()->total_size(), quantized_zero); } else diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEElementwiseUnaryLayerEx.cpp b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp index a95018a28..5405934ad 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEElementwiseUnaryLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPOneHotEx.cpp @@ -1,6 +1,21 @@ /* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -22,23 +37,18 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h" -#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h" +#include "arm_compute/runtime/CPP/functions/CPPOneHotEx.h" + +#include "arm_compute/core/CPP/kernels/CPPOneHotKernelEx.h" #include "support/ToolchainSupport.h" -#include <utility> +using namespace arm_compute; -namespace arm_compute -{ -void NENegLayer::configure(const ITensor *input, ITensor *output) +void CPPOneHotEx::configure(const ITensor *indices, ITensor *output, const int depth, + const float on_value, const float off_value, const int axis) { - auto k = arm_compute::support::cpp14::make_unique<NEElementwiseUnaryKernelEx>(); - k->configure(ElementWiseUnaryEx::NEG, input, output); + auto k = arm_compute::support::cpp14::make_unique<CPPOneHotKernelEx>(); + k->configure(indices, output, depth, on_value, off_value, axis); _kernel = std::move(k); } -Status NENegLayer::validate(const ITensorInfo *input, const ITensorInfo *output) -{ - return NEElementwiseUnaryKernelEx::validate(ElementWiseUnaryEx::NEG, input, output); -} -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp index f8e0ef8a6..6c90ef3b4 100644 --- a/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp +++ b/compute/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/runtime/CPP/functions/CPPUpsampleEx.h" #include "arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h" diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp new file mode 100644 index 000000000..ff81ff854 --- /dev/null +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEActivationLayerEx.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/NEON/functions/NEActivationLayerEx.h" + +#include "arm_compute/core/NEON/kernels/NEActivationLayerKernelEx.h" +#include "arm_compute/runtime/IRuntimeContext.h" +#include "support/ToolchainSupport.h" + +namespace arm_compute +{ +NEActivationLayerEx::NEActivationLayerEx(IRuntimeContext *ctx) // NOLINT + : INESimpleFunctionNoBorder(ctx) +{ +} +void NEActivationLayerEx::configure(ITensor *input, ITensor *output, + ActivationLayerInfo activation_info) +{ + auto k = arm_compute::support::cpp14::make_unique<NEActivationLayerKernelEx>(); + k->configure(input, output, activation_info); + _kernel = std::move(k); +} + +Status NEActivationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, + const ActivationLayerInfo &act_info) +{ + return NEActivationLayerKernelEx::validate(input, output, act_info); +} +} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp deleted file mode 100644 index 5ba465b61..000000000 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/functions/NEArgMinMax.h" - -#include "arm_compute/core/CPP/Validate.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" - -namespace arm_compute -{ - -template <ReductionOperation OP> -NEArgMinMaxStatic<OP>::NEArgMinMaxStatic(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _reduction_kernel(), _reduced_out(), _reshape() -{ -} - -template <ReductionOperation OP> -Status NEArgMinMaxStatic<OP>::validate(const ITensorInfo *input, int axis, - const ITensorInfo *output) -{ - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); - ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, - DataType::F32); - - TensorShape out_shape = input->tensor_shape(); - const int input_dims = input->num_dimensions(); - int axis_local = axis; - - // Convert negative axis - axis_local = wrap_around(axis_local, input_dims); - - ARM_COMPUTE_RETURN_ERROR_ON(axis_local > 3); - ARM_COMPUTE_RETURN_ERROR_ON(static_cast<unsigned int>(axis_local) > input->num_dimensions() - 1); - out_shape.remove_dimension(axis_local); - - const TensorInfo out_info = output->clone()->set_tensor_shape(out_shape); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info); - - return Status{}; -} - -template <ReductionOperation OP> -void NEArgMinMaxStatic<OP>::configure(ITensor *input, int axis, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input); - - int axis_local = axis; - const int input_dims = input->info()->num_dimensions(); - - // Convert negative axis - axis_local = wrap_around(axis_local, input_dims); - - // Perform reduction for axis - TensorShape intermediate_shape = input->info()->tensor_shape(); - intermediate_shape.set(axis_local, 1); - auto in = input; - - _reduced_out.allocator()->init(TensorInfo(intermediate_shape, output->info()->num_channels(), - output->info()->data_type(), - output->info()->quantization_info())); - _memory_group.manage(&_reduced_out); - _reduction_kernel.configure(in, axis_local, &_reduced_out, OP); - - // Allocate intermediate tensor - _reduced_out.allocator()->allocate(); - - // Configure reshape layer if we want to drop the dimensions - TensorShape out_shape = input->info()->tensor_shape(); - out_shape.remove_dimension(axis_local); - auto_init_if_empty(*output->info(), output->info()->clone()->set_tensor_shape(out_shape)); - _reshape.configure(&_reduced_out, output); -} - -template <ReductionOperation OP> void NEArgMinMaxStatic<OP>::run() -{ - MemoryGroupResourceScope scope_mg(_memory_group); - - _reduction_kernel.run(); - _reshape.run(); -} - -// Supported Specializations -template class NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MAX>; -template class NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MIN>; -} // namespace arm_compute diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp index 7c15fc453..e42c453cf 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEBinaryLogicalOperation.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h" #include <arm_compute/core/NEON/kernels/NEBinaryLogicalOperationKernel.h> diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp index f2490e4e8..dc5c62061 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NECast.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp index db419e3a8..5ec0b8677 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEDepthToSpaceLayerEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2019 ARM Limited. * * SPDX-License-Identifier: MIT diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp index 00c3ed94f..53fb15081 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEEmbeddingLookup.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h" #include "arm_compute/core/NEON/kernels/NEEmbeddingLookupKernel.h" diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp index d604fedbf..f45773251 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/runtime/NEON/functions/NEFullyConnectedHybridLayer.h" #include "arm_compute/core/Helpers.h" @@ -154,7 +170,7 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor // Multiply scale _multiply_scale_kernel.configure(&_gemmlowp_output, &_scale_factor, output, - weights->info()->quantization_info().scale); + weights->info()->quantization_info().uniform().scale); _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights; @@ -220,7 +236,7 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output)); ARM_COMPUTE_RETURN_ON_ERROR(NEMultiplyScaleFactorKernel::validate( - &gemmlowp_output, &scale_factor, output, weights->quantization_info().scale)); + &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale)); return Status{}; } diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp index a944f699a..cb7557a5a 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayerEx.h" #include "arm_compute/core/Helpers.h" @@ -46,10 +62,10 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I // Since we need negative offsets for computing convolution, we need to change // QuantizationInfo() // Extract and negate input and weights offset - const QuantizationInfo input_quantization_info(input.quantization_info().scale, - -input.quantization_info().offset); - const QuantizationInfo weights_quantization_info(weights.quantization_info().scale, - -weights.quantization_info().offset); + const QuantizationInfo input_quantization_info(input.quantization_info().uniform().scale, + -input.quantization_info().uniform().offset); + const QuantizationInfo weights_quantization_info(weights.quantization_info().uniform().scale, + -weights.quantization_info().uniform().offset); // Validate gemmlowp function ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate( @@ -88,10 +104,10 @@ void NEFullyConnectedLayerEx::configure_mm(const ITensor *input, const ITensor * const QuantizationInfo input_quantization_info = input->info()->quantization_info(); const QuantizationInfo weights_quantization_info = weights->info()->quantization_info(); - input->info()->set_quantization_info( - QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset)); - weights->info()->set_quantization_info( - QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset)); + input->info()->set_quantization_info(QuantizationInfo( + input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); + weights->info()->set_quantization_info(QuantizationInfo( + weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); // Configure gemmlowp function _mm_gemmlowp.configure(input, weights, nullptr, output); @@ -236,15 +252,16 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei // Configure output stage for asymmetric quantized types if (_is_quantized) { - float multiplier = input->info()->quantization_info().scale * - weights->info()->quantization_info().scale / - output->info()->quantization_info().scale; + float multiplier = input->info()->quantization_info().uniform().scale * + weights->info()->quantization_info().uniform().scale / + output->info()->quantization_info().uniform().scale; int output_multiplier; int output_shift; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier, - output_shift, output->info()->quantization_info().offset); + output_shift, + output->info()->quantization_info().uniform().offset); _gemmlowp_output.allocator()->allocate(); } diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp index 11794a1ea..1290cfd39 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.h" #include "arm_compute/core/Error.h" @@ -50,7 +66,7 @@ NEGEMMLowpMatrixMultiplyCoreEx::NEGEMMLowpMatrixMultiplyCoreEx( _tmp_b(), _mm_result_s32(), _signed_a(), _signed_output(), _original_b(nullptr), _a_offset(0), _b_offset(0), _run_vector_matrix_multiplication(false), _assembly_path(false), _fused_assembly_path(false), _reshape_b_only_on_first_run(false), _is_prepared(false), - _fuse_output_stage(false), _run_activation(false), _flip_signedness(false) + _fuse_output_stage(false), _flip_signedness(false) { } @@ -71,8 +87,8 @@ void NEGEMMLowpMatrixMultiplyCoreEx::configure(const ITensor *a, const ITensor * _mtx_b_reshape_kernel = nullptr; // Set internal variables - _a_offset = a->info()->quantization_info().offset; - _b_offset = b->info()->quantization_info().offset; + _a_offset = a->info()->quantization_info().uniform().offset; + _b_offset = b->info()->quantization_info().uniform().offset; _run_vector_matrix_multiplication = a->info()->dimension(1) < 2; _reshape_b_only_on_first_run = info.reshape_b_only_on_first_run(); _is_prepared = false; @@ -91,7 +107,6 @@ void NEGEMMLowpMatrixMultiplyCoreEx::configure(const ITensor *a, const ITensor * } #ifdef __aarch64__ -#if 0 // Can use after arm compute library v19.11 switch (a->info()->data_type()) { case DataType::QASYMM8: @@ -119,8 +134,6 @@ void NEGEMMLowpMatrixMultiplyCoreEx::configure(const ITensor *a, const ITensor * break; } } -#endif // 0 - ARM_COMPUTE_ERROR("aarch64 not supported"); #endif /* __aarch64__ */ if (!(_assembly_path || _run_vector_matrix_multiplication)) { @@ -277,8 +290,8 @@ Status NEGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITen TensorInfo tmp_b_info{}; TensorInfo mm_result_s32_info{}; - int32_t a_offset = a->quantization_info().offset; - int32_t b_offset = b->quantization_info().offset; + int32_t a_offset = a->quantization_info().uniform().offset; + int32_t b_offset = b->quantization_info().uniform().offset; bool fuse_output_stage = info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE; if (fuse_output_stage) @@ -291,19 +304,16 @@ Status NEGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITen // Check if we need to run the optimized assembly kernel bool run_optimised = false; bool run_optimised_requantized = false; - const bool reshape_b_only_on_first_run = info.reshape_b_only_on_first_run(); if (a_to_use->data_type() == DataType::QASYMM8 && info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT) { - run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, output, 1.f, 0.f, - reshape_b_only_on_first_run)); + run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, c, output, gemm_info)); run_optimised_requantized = run_optimised; } else { run_optimised = bool(NEGEMMAssemblyDispatch::validate( - a_to_use, b, fuse_output_stage ? &mm_result_s32_info : output, 1.f, 0.f, - reshape_b_only_on_first_run)); + a_to_use, b, c, fuse_output_stage ? &mm_result_s32_info : output, gemm_info)); } if (run_optimised) diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp index 90dabb35a..c8bb88aea 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGatherEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/runtime/NEON/functions/NEGatherEx.h" #include "arm_compute/core/NEON/kernels/NEGatherKernelEx.h" diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp index 624185d2c..078019f4e 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEHashtableLookup.cpp @@ -1,6 +1,5 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +13,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* + * Copyright (c) 2016-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + #include "arm_compute/runtime/NEON/functions/NEHashtableLookup.h" #include "arm_compute/core/NEON/kernels/NEHashtableLookupKernel.h" diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp index 1c2c8f027..16d74e62d 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEInstanceNormalizationLayerEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayerEx.h" #include "arm_compute/core/Helpers.h" diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp index 1150cef76..dac3b849d 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEPReLU.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/runtime/NEON/functions/NEPReLU.h" #include "arm_compute/core/NEON/kernels/NEPReLUKernel.h" diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp index 84411c266..0e9a5e969 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NERNNLayerEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp index c65e93570..116bba3c0 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceMeanEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/runtime/NEON/functions/NEReduceMeanEx.h" #include "arm_compute/core/Helpers.h" diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp index b36f8287a..aedb537e9 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,11 +37,14 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/runtime/NEON/functions/NEReduceOperation.h" #include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/runtime/Tensor.h" using namespace arm_compute; diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp index 3c18217ef..26a887912 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceSum.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/runtime/NEON/functions/NEReduceSum.h" #include "arm_compute/core/CPP/Validate.h" diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp index c3431c418..2aa0d2d4b 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReductionOperationEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/runtime/NEON/functions/NEReductionOperationEx.h" #include "arm_compute/core/Helpers.h" diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp index c9f914fb0..198bb7672 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToBatchLayerEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2019 ARM Limited. * * SPDX-License-Identifier: MIT diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp index b6ae21cc0..97697e3ea 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NESpaceToDepthLayerEx.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2019 ARM Limited. * * SPDX-License-Identifier: MIT diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp index fd15ef05f..df0689273 100644 --- a/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp +++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp @@ -1,5 +1,20 @@ /* * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT @@ -22,6 +37,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/runtime/NEON/functions/NETransposeConvLayer.h" #include "arm_compute/core/Helpers.h" |