summaryrefslogtreecommitdiff
path: root/runtimes/libs/srcn/src/common.h
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes/libs/srcn/src/common.h')
-rw-r--r--runtimes/libs/srcn/src/common.h162
1 files changed, 162 insertions, 0 deletions
diff --git a/runtimes/libs/srcn/src/common.h b/runtimes/libs/srcn/src/common.h
new file mode 100644
index 000000000..e8abc1440
--- /dev/null
+++ b/runtimes/libs/srcn/src/common.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_SRCN_COMMON_H__
+#define __NNFW_SRCN_COMMON_H__
+
+#include <string.h>
+#include <limits>
+#include <arm_neon.h>
+
+#include "srcn/conv_type.h"
+
+namespace nnfw
+{
+namespace srcn
+{
+
+#define sizeof_RhsScalar 4
+#define sizeof_LhsScalar 4
+#define sizeof_ResScalar 4
+
+#define MIN(a, b) (a) > (b) ? (b) : (a)
+#define MAX(a, b) (a) > (b) ? (a) : (b)
+
+enum shardType_t
+{
+ shardByCol = 0,
+ shardByRow
+};
+
+#ifdef TIZEN
+#define L1_CACHE_SIZE (16536 * 2)
+#define L2_CACHE_SIZE (524288 * 2)
+#define L3_CACHE_SIZE (0) // no L3
+#define MAX_K (512)
+// single-thread
+#define GEN_COL (1440)
+// multi-threads
+#define MAX_COL (90)
+#define MIN_COL (32)
+#elif defined ANDROID
+#define L1_CACHE_SIZE (16536 * 4)
+#define L2_CACHE_SIZE (524288 * 8)
+#define L3_CACHE_SIZE (0) //(524288 * 8) //no L3
+#define MAX_K (512 * 2)
+// single-thread
+#define GEN_COL (1440)
+// multi-threads
+#if __aarch64__
+#define MAX_COL (1024)
+#else
+#define MAX_COL (90)
+#endif
+#define MIN_COL (32)
+#endif
+
+enum
+{
+ USE_COMMON_KENEL = 0,
+ USE_12BIT_KERNEL,
+ USE_NONZERO_KERENL
+};
+
+template <typename T> static T divup(const T &x, const T &y)
+{
+ return static_cast<T>((x + y - 1) / y);
+}
+
+#ifdef NCNN
+static inline size_t alignSize(size_t sz, int n) { return (sz + n - 1) / n * n; }
+
+static inline size_t alignBy2(size_t sz) { return (sz + 1) & -2; }
+#endif // NCNN
+
+static inline int32_t BitNot(int32_t a) { return ~a; }
+
+static inline int32_t MaskIfNonZero(int32_t a)
+{
+ static int32_t zero = 0;
+ return a ? BitNot(zero) : zero;
+}
+
+static inline int32_t BitAnd(int32_t a, int32_t b) { return a & b; }
+
+static inline int32_t ShiftRight(int32_t a, int offset) { return a >> offset; }
+
+static inline int32_t MaskIfLessThan(int32_t a, int32_t b) { return MaskIfNonZero(a < b); }
+
+static inline int32_t MaskIfGreaterThan(int32_t a, int32_t b) { return MaskIfNonZero(a > b); }
+
+static inline int32_t Add(int32_t a, int32_t b) { return a + b; }
+
+static inline int32_t RoundingDivideByPOT(int32_t x, int exponent)
+{
+ const int32_t mask = (1ll << exponent) - 1;
+ const int32_t zero = 0;
+ const int32_t one = 1;
+ const int32_t remainder = BitAnd(x, mask);
+ const int32_t threshold = Add(ShiftRight(mask, 1), BitAnd(MaskIfLessThan(x, zero), one));
+ return Add(ShiftRight(x, exponent), BitAnd(MaskIfGreaterThan(remainder, threshold), one));
+}
+static inline int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
+{
+ bool overflow = a == b && a == std::numeric_limits<int32_t>::min();
+ int64_t a_64(a);
+ int64_t b_64(b);
+ int64_t ab_64 = a_64 * b_64;
+ int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
+ int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31));
+ return overflow ? std::numeric_limits<int32_t>::max() : ab_x2_high32;
+}
+
+static inline int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier,
+ int shift)
+{
+ int left_shift = shift > 0 ? shift : 0;
+ int right_shift = shift > 0 ? 0 : -shift;
+ return RoundingDivideByPOT(
+ SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
+}
+
+static inline int32x4_t SaturatingRoundingDoublingHighMulV(int32x4_t a, int32x4_t b)
+{
+ return vqrdmulhq_s32(a, b);
+}
+
+static inline int32x4_t RoundingDivideByPOTV(int32x4_t x, int exponent)
+{
+ const int32x4_t shift_vec = vdupq_n_s32(-exponent);
+ const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift_vec), 31);
+ const int32x4_t fixed_up_x = vqaddq_s32(x, fixup);
+ return vrshlq_s32(fixed_up_x, shift_vec);
+}
+
+static inline int32x4_t MultiplyByQuantizedMultiplierV(int32x4_t x, int32_t quantized_multiplier,
+ int shift)
+{
+ int left_shift = shift > 0 ? shift : 0;
+ int right_shift = shift > 0 ? 0 : -shift;
+ return RoundingDivideByPOTV(
+ SaturatingRoundingDoublingHighMulV(vrshlq_s32(x, vdupq_n_s32(left_shift)),
+ vdupq_n_s32(quantized_multiplier)),
+ right_shift);
+}
+
+} // namespace srcn
+} // namespace nnfw
+
+#endif // __NNFW_SRCN_COMMON_H__