1 files changed, 162 insertions, 0 deletions
diff --git a/runtimes/libs/srcn/src/common.h b/runtimes/libs/srcn/src/common.h
new file mode 100644
index 000000000..e8abc1440
--- /dev/null
+++ b/runtimes/libs/srcn/src/common.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NNFW_SRCN_COMMON_H__
+#define __NNFW_SRCN_COMMON_H__
+
+#include <string.h>
+#include <limits>
+#include <arm_neon.h>
+
+#include "srcn/conv_type.h"
+
+namespace nnfw
+{
+namespace srcn
+{
+
+#define sizeof_RhsScalar 4
+#define sizeof_LhsScalar 4
+#define sizeof_ResScalar 4
+
+#define MIN(a, b) (a) > (b) ? (b) : (a)
+#define MAX(a, b) (a) > (b) ? (a) : (b)
+
+enum shardType_t
+{
+  shardByCol = 0,
+  shardByRow
+};
+
+#ifdef TIZEN
+#define L1_CACHE_SIZE (16536 * 2)
+#define L2_CACHE_SIZE (524288 * 2)
+#define L3_CACHE_SIZE (0) // no L3
+#define MAX_K (512)
+// single-thread
+#define GEN_COL (1440)
+// multi-threads
+#define MAX_COL (90)
+#define MIN_COL (32)
+#elif defined ANDROID
+#define L1_CACHE_SIZE (16536 * 4)
+#define L2_CACHE_SIZE (524288 * 8)
+#define L3_CACHE_SIZE (0) //(524288 * 8)  //no L3
+#define MAX_K (512 * 2)
+// single-thread
+#define GEN_COL (1440)
+// multi-threads
+#if __aarch64__
+#define MAX_COL (1024)
+#else
+#define MAX_COL (90)
+#endif
+#define MIN_COL (32)
+#endif
+
+enum
+{
+  USE_COMMON_KENEL = 0,
+  USE_12BIT_KERNEL,
+  USE_NONZERO_KERENL
+};
+
+template <typename T> static T divup(const T &x, const T &y)
+{
+  return static_cast<T>((x + y - 1) / y);
+}
+
+#ifdef NCNN
+static inline size_t alignSize(size_t sz, int n) { return (sz + n - 1) / n * n; }
+
+static inline size_t alignBy2(size_t sz) { return (sz + 1) & -2; }
+#endif // NCNN
+
+static inline int32_t BitNot(int32_t a) { return ~a; }
+
+static inline int32_t MaskIfNonZero(int32_t a)
+{
+  static int32_t zero = 0;
+  return a ? BitNot(zero) : zero;
+}
+
+static inline int32_t BitAnd(int32_t a, int32_t b) { return a & b; }
+
+static inline int32_t ShiftRight(int32_t a, int offset) { return a >> offset; }
+
+static inline int32_t MaskIfLessThan(int32_t a, int32_t b) { return MaskIfNonZero(a < b); }
+
+static inline int32_t MaskIfGreaterThan(int32_t a, int32_t b) { return MaskIfNonZero(a > b); }
+
+static inline int32_t Add(int32_t a, int32_t b) { return a + b; }
+
+static inline int32_t RoundingDivideByPOT(int32_t x, int exponent)
+{
+  const int32_t mask = (1ll << exponent) - 1;
+  const int32_t zero = 0;
+  const int32_t one = 1;
+  const int32_t remainder = BitAnd(x, mask);
+  const int32_t threshold = Add(ShiftRight(mask, 1), BitAnd(MaskIfLessThan(x, zero), one));
+  return Add(ShiftRight(x, exponent), BitAnd(MaskIfGreaterThan(remainder, threshold), one));
+}
+static inline int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b)
+{
+  bool overflow = a == b && a == std::numeric_limits<int32_t>::min();
+  int64_t a_64(a);
+  int64_t b_64(b);
+  int64_t ab_64 = a_64 * b_64;
+  int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30));
+  int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31));
+  return overflow ? std::numeric_limits<int32_t>::max() : ab_x2_high32;
+}
+
+static inline int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier,
+                                                    int shift)
+{
+  int left_shift = shift > 0 ? shift : 0;
+  int right_shift = shift > 0 ? 0 : -shift;
+  return RoundingDivideByPOT(
+      SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier), right_shift);
+}
+
+static inline int32x4_t SaturatingRoundingDoublingHighMulV(int32x4_t a, int32x4_t b)
+{
+  return vqrdmulhq_s32(a, b);
+}
+
+static inline int32x4_t RoundingDivideByPOTV(int32x4_t x, int exponent)
+{
+  const int32x4_t shift_vec = vdupq_n_s32(-exponent);
+  const int32x4_t fixup = vshrq_n_s32(vandq_s32(x, shift_vec), 31);
+  const int32x4_t fixed_up_x = vqaddq_s32(x, fixup);
+  return vrshlq_s32(fixed_up_x, shift_vec);
+}
+
+static inline int32x4_t MultiplyByQuantizedMultiplierV(int32x4_t x, int32_t quantized_multiplier,
+                                                       int shift)
+{
+  int left_shift = shift > 0 ? shift : 0;
+  int right_shift = shift > 0 ? 0 : -shift;
+  return RoundingDivideByPOTV(
+      SaturatingRoundingDoublingHighMulV(vrshlq_s32(x, vdupq_n_s32(left_shift)),
+                                         vdupq_n_s32(quantized_multiplier)),
+      right_shift);
+}
+
+} // namespace srcn
+} // namespace nnfw
+
+#endif // __NNFW_SRCN_COMMON_H__