compute/cker/include/cker/Utils.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159

/*
 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
 * Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef __NNFW_CKER_UTILS_H__
#define __NNFW_CKER_UTILS_H__

#include <algorithm>
#include <cstdint>

#include "cker/gemmlowp/FixedPoint.h"
#include "Shape.h"

namespace nnfw
{
namespace cker
{

template <typename T>
inline T ActivationFunctionWithMinMax(T x, T output_activation_min, T output_activation_max)
{
  return std::min<T>(std::max<T>(x, output_activation_min), output_activation_max);
}

inline int32_t MultiplyByQuantizedMultiplier(int32_t x, int32_t quantized_multiplier, int shift)
{
  int left_shift = shift > 0 ? shift : 0;
  int right_shift = shift > 0 ? 0 : -shift;
  return gemmlowp::RoundingDivideByPOT(
      gemmlowp::SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier),
      right_shift);
}

inline int32_t MultiplyByQuantizedMultiplierGreaterThanOne(int32_t x, int32_t quantized_multiplier,
                                                           int left_shift)
{
  return gemmlowp::SaturatingRoundingDoublingHighMul(x * (1 << left_shift), quantized_multiplier);
}

inline int NodeOffset(int b, int h, int w, int height, int width)
{
  return (b * height + h) * width + w;
}

inline int CountLeadingZeros(uint32_t integer_input)
{
  const uint32_t one_in_leading_positive = 1U << 31;
  int leading_zeros = 0;
  while (integer_input < one_in_leading_positive)
  {
    integer_input <<= 1;
    ++leading_zeros;
  }
  return leading_zeros;
}

// Comment from tensorflow lite:
//
// DO NOT USE THIS STRUCT FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
// BROADCASTING.
//
// NdArrayDesc<N> describes the shape and memory layout of an N-dimensional
// rectangular array of numbers.
//
// NdArrayDesc<N> is basically identical to Dims<N> defined in types.h.
// However, as Dims<N> is to be deprecated, this class exists as an adaptor
// to enable simple unoptimized implementations of element-wise broadcasting
// operations.
template <int N> struct NdArrayDesc
{
  // The "extent" of each dimension. Indices along dimension d must be in the
  // half-open interval [0, extents[d]).
  int extents[N];

  // The number of *elements* (not bytes) between consecutive indices of each
  // dimension.
  int strides[N];
};

// Comment from tensorflow lite:
//
// DO NOT USE THIS FUNCTION FOR NEW FUNCTIONALITY BEYOND IMPLEMENTING
// BROADCASTING.
//
// Same as Offset(), except takes as NdArrayDesc<N> instead of Dims<N>.
inline int SubscriptToIndex(const NdArrayDesc<4> &desc, int i0, int i1, int i2, int i3)
{
  assert(i0 >= 0 && i0 < desc.extents[0]);
  assert(i1 >= 0 && i1 < desc.extents[1]);
  assert(i2 >= 0 && i2 < desc.extents[2]);
  assert(i3 >= 0 && i3 < desc.extents[3]);
  return i0 * desc.strides[0] + i1 * desc.strides[1] + i2 * desc.strides[2] + i3 * desc.strides[3];
}

template <int N>
inline void
NdArrayDescsForElementwiseBroadcast(const Shape &input0_shape, const Shape &input1_shape,
                                    NdArrayDesc<N> *desc0_out, NdArrayDesc<N> *desc1_out)
{
  assert(desc0_out != nullptr);
  assert(desc1_out != nullptr);

  auto extended_input0_shape = Shape::ExtendedShape(N, input0_shape);
  auto extended_input1_shape = Shape::ExtendedShape(N, input1_shape);

  // Copy dims to desc, calculating strides.
  int desc0_stride = 1;
  int desc1_stride = 1;
  for (int i = N - 1; i >= 0; --i)
  {
    desc0_out->extents[i] = extended_input0_shape.Dims(i);
    desc0_out->strides[i] = desc0_stride;
    desc0_stride *= extended_input0_shape.Dims(i);
    desc1_out->extents[i] = extended_input1_shape.Dims(i);
    desc1_out->strides[i] = desc1_stride;
    desc1_stride *= extended_input1_shape.Dims(i);
  }

  // Walk over each dimension. If the extents are equal do nothing.
  // Otherwise, set the desc with extent 1 to have extent equal to the other and
  // stride 0.
  for (int i = 0; i < N; ++i)
  {
    const int extent0 = extended_input0_shape.Dims(i);
    const int extent1 = extended_input1_shape.Dims(i);
    if (extent0 != extent1)
    {
      if (extent0 == 1)
      {
        desc0_out->strides[i] = 0;
        desc0_out->extents[i] = extent1;
      }
      else
      {
        assert(extent1 == 1);
        desc1_out->strides[i] = 0;
        desc1_out->extents[i] = extent0;
      }
    }
  }
}

} // namespace cker
} // namespace nnfw

#endif // __NNFW_CKER_UTILS_H__