diff options
Diffstat (limited to 'compute/ncnn/src/srcn/common.h')
-rw-r--r-- | compute/ncnn/src/srcn/common.h | 162 |
1 files changed, 0 insertions, 162 deletions
diff --git a/compute/ncnn/src/srcn/common.h b/compute/ncnn/src/srcn/common.h deleted file mode 100644 index 778a17a80..000000000 --- a/compute/ncnn/src/srcn/common.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __NNFW_SRCN_COMMON_H__ -#define __NNFW_SRCN_COMMON_H__ - -#include <string.h> -#include <limits> -#include <arm_neon.h> - -#include "ncnn/srcn/conv_type.h" - -namespace nnfw -{ -namespace srcn -{ - -#define sizeof_RhsScalar 4 -#define sizeof_LhsScalar 4 -#define sizeof_ResScalar 4 - -#define MIN(a, b) (a) > (b) ? (b) : (a) -#define MAX(a, b) (a) > (b) ? (a) : (b) - -enum shardType_t -{ - shardByCol = 0, - shardByRow -}; - -#ifdef TIZEN -#define L1_CACHE_SIZE (16536 * 2) -#define L2_CACHE_SIZE (524288 * 2) -#define L3_CACHE_SIZE (0) // no L3 -#define MAX_K (512) -// single-thread -#define GEN_COL (1440) -// multi-threads -#define MAX_COL (90) -#define MIN_COL (32) -#elif defined ANDROID -#define L1_CACHE_SIZE (16536 * 4) -#define L2_CACHE_SIZE (524288 * 8) -#define L3_CACHE_SIZE (0) //(524288 * 8) //no L3 -#define MAX_K (512 * 2) -// single-thread -#define GEN_COL (1440) -// multi-threads -#if __aarch64__ -#define MAX_COL (1024) -#else -#define MAX_COL (90) -#endif -#define MIN_COL (32) -#endif - -enum -{ - USE_COMMON_KENEL = 0, - USE_12BIT_KERNEL, - USE_NONZERO_KERENL -}; - -template <typename T> static T divup(const T &x, const T &y) -{ - return static_cast<T>((x + y - 1) / y); -} - -#ifdef NCNN -static inline size_t alignSize(size_t sz, int n) { return (sz + n - 1) / n * n; } - -static inline size_t alignBy2(size_t sz) { return (sz + 1) & -2; } -#endif // NCNN - -static inline int32_t BitNot(int32_t a) { return ~a; } - -static inline int32_t MaskIfNonZero(int32_t a) -{ - static int32_t zero = 0; - return a ? BitNot(zero) : zero; -} - -static inline int32_t BitAnd(int32_t a, int32_t b) { return a & b; } - -static inline int32_t ShiftRight(int32_t a, int offset) { return a >> offset; } - -static inline int32_t MaskIfLessThan(int32_t a, int32_t b) { return MaskIfNonZero(a < b); } - -static inline int32_t MaskIfGreaterThan(int32_t a, int32_t b) { return MaskIfNonZero(a > b); } - -static inline int32_t Add(int32_t a, int32_t b) { return a + b; } - -static inline int32_t RoundingDivideByPOT(int32_t x, int exponent) -{ - const int32_t mask = (1ll << exponent) - 1; - const int32_t zero = 0; - const int32_t one = 1; - const int32_t remainder = BitAnd(x, mask); - const int32_t threshold = Add(ShiftRight(mask, 1), BitAnd(MaskIfLessThan(x, zero), one)); - return Add(ShiftRight(x, exponent), BitAnd(MaskIfGreaterThan(remainder, threshold), one)); -} -static inline int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b) -{ - bool overflow = a == b && a == std::numeric_limits<int32_t>::min(); - int64_t a_64(a); - int64_t b_64(b); - int64_t ab_64 = a_64 * b_64; - int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30)); - int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31)); - return overflow ? std::numeric_limits<int32_t>::max() : ab_x2_high32; -} - -static inline int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, - int shift) -{ - int left_shift = shift > 0 ? shift : 0; - int right_shift = shift > 0 ? 0 : -shift; - return RoundingDivideByPOT( - SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift); -} - -static inline int32x4_t SaturatingRoundingDoublingHighMulV(int32x4_t a, int32x4_t b) -{ - return vqrdmulhq_s32(a, b); -} - -static inline int32x4_t RoundingDivideByPOTV(int32x4_t x, int exponent) -{ - const int32x4_t shift_vec = vdupq_n_s32(-exponent); - const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift_vec), 31); - const int32x4_t fixed_up_x = vqaddq_s32(x, fixup); - return vrshlq_s32(fixed_up_x, shift_vec); -} - -static inline int32x4_t MultiplyByQuantizedMultiplierV(int32x4_t x, int32_t quantized_multiplier, - int shift) -{ - int left_shift = shift > 0 ? shift : 0; - int right_shift = shift > 0 ? 0 : -shift; - return RoundingDivideByPOTV( - SaturatingRoundingDoublingHighMulV(vrshlq_s32(x, vdupq_n_s32(left_shift)), - vdupq_n_s32(quantized_multiplier)), - right_shift); -} - -} // namespace srcn -} // namespace nnfw - -#endif // __NNFW_SRCN_COMMON_H__ |