diff options
Diffstat (limited to 'runtimes/libs/srcn/src/common.h')
-rw-r--r-- | runtimes/libs/srcn/src/common.h | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/runtimes/libs/srcn/src/common.h b/runtimes/libs/srcn/src/common.h new file mode 100644 index 000000000..e8abc1440 --- /dev/null +++ b/runtimes/libs/srcn/src/common.h @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_SRCN_COMMON_H__ +#define __NNFW_SRCN_COMMON_H__ + +#include <string.h> +#include <limits> +#include <arm_neon.h> + +#include "srcn/conv_type.h" + +namespace nnfw +{ +namespace srcn +{ + +#define sizeof_RhsScalar 4 +#define sizeof_LhsScalar 4 +#define sizeof_ResScalar 4 + +#define MIN(a, b) (a) > (b) ? (b) : (a) +#define MAX(a, b) (a) > (b) ? (a) : (b) + +enum shardType_t +{ + shardByCol = 0, + shardByRow +}; + +#ifdef TIZEN +#define L1_CACHE_SIZE (16536 * 2) +#define L2_CACHE_SIZE (524288 * 2) +#define L3_CACHE_SIZE (0) // no L3 +#define MAX_K (512) +// single-thread +#define GEN_COL (1440) +// multi-threads +#define MAX_COL (90) +#define MIN_COL (32) +#elif defined ANDROID +#define L1_CACHE_SIZE (16536 * 4) +#define L2_CACHE_SIZE (524288 * 8) +#define L3_CACHE_SIZE (0) //(524288 * 8) //no L3 +#define MAX_K (512 * 2) +// single-thread +#define GEN_COL (1440) +// multi-threads +#if __aarch64__ +#define MAX_COL (1024) +#else +#define MAX_COL (90) +#endif +#define MIN_COL (32) +#endif + +enum +{ + USE_COMMON_KENEL = 0, + USE_12BIT_KERNEL, + USE_NONZERO_KERENL +}; + +template <typename T> static T divup(const T &x, const T &y) +{ + return static_cast<T>((x + y - 1) / y); +} + +#ifdef NCNN +static inline size_t alignSize(size_t sz, int n) { return (sz + n - 1) / n * n; } + +static inline size_t alignBy2(size_t sz) { return (sz + 1) & -2; } +#endif // NCNN + +static inline int32_t BitNot(int32_t a) { return ~a; } + +static inline int32_t MaskIfNonZero(int32_t a) +{ + static int32_t zero = 0; + return a ? BitNot(zero) : zero; +} + +static inline int32_t BitAnd(int32_t a, int32_t b) { return a & b; } + +static inline int32_t ShiftRight(int32_t a, int offset) { return a >> offset; } + +static inline int32_t MaskIfLessThan(int32_t a, int32_t b) { return MaskIfNonZero(a < b); } + +static inline int32_t MaskIfGreaterThan(int32_t a, int32_t b) { return MaskIfNonZero(a > b); } + +static inline int32_t Add(int32_t a, int32_t b) { return a + b; } + +static inline int32_t RoundingDivideByPOT(int32_t x, int exponent) +{ + const int32_t mask = (1ll << exponent) - 1; + const int32_t zero = 0; + const int32_t one = 1; + const int32_t remainder = BitAnd(x, mask); + const int32_t threshold = Add(ShiftRight(mask, 1), BitAnd(MaskIfLessThan(x, zero), one)); + return Add(ShiftRight(x, exponent), BitAnd(MaskIfGreaterThan(remainder, threshold), one)); +} +static inline int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b) +{ + bool overflow = a == b && a == std::numeric_limits<int32_t>::min(); + int64_t a_64(a); + int64_t b_64(b); + int64_t ab_64 = a_64 * b_64; + int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30)); + int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31)); + return overflow ? std::numeric_limits<int32_t>::max() : ab_x2_high32; +} + +static inline int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, + int shift) +{ + int left_shift = shift > 0 ? shift : 0; + int right_shift = shift > 0 ? 0 : -shift; + return RoundingDivideByPOT( + SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift); +} + +static inline int32x4_t SaturatingRoundingDoublingHighMulV(int32x4_t a, int32x4_t b) +{ + return vqrdmulhq_s32(a, b); +} + +static inline int32x4_t RoundingDivideByPOTV(int32x4_t x, int exponent) +{ + const int32x4_t shift_vec = vdupq_n_s32(-exponent); + const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift_vec), 31); + const int32x4_t fixed_up_x = vqaddq_s32(x, fixup); + return vrshlq_s32(fixed_up_x, shift_vec); +} + +static inline int32x4_t MultiplyByQuantizedMultiplierV(int32x4_t x, int32_t quantized_multiplier, + int shift) +{ + int left_shift = shift > 0 ? shift : 0; + int right_shift = shift > 0 ? 0 : -shift; + return RoundingDivideByPOTV( + SaturatingRoundingDoublingHighMulV(vrshlq_s32(x, vdupq_n_s32(left_shift)), + vdupq_n_s32(quantized_multiplier)), + right_shift); +} + +} // namespace srcn +} // namespace nnfw + +#endif // __NNFW_SRCN_COMMON_H__ |