/* * Copyright (c) 2020 Samsung Electronics Co., Ltd. All Rights Reserved * Copyright 2017 The TensorFlow Authors. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef LUCI_INTERPRETER_KERNELS_UTILS_H #define LUCI_INTERPRETER_KERNELS_UTILS_H #include "luci_interpreter/core/Tensor.h" #include #include #include namespace luci_interpreter { namespace kernels { using Activation = luci_interpreter::FusedActFunc; #define LUCI_INTERPRETER_CHECK(cond) \ if (!(cond)) \ { \ assert(false && "LUCI_INTERPRETER_CHECK fails"); \ } inline int32_t computePadding(int32_t stride, int32_t dilation_rate, int32_t in_size, int32_t filter_size, int32_t out_size) { const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1; const int32_t padding = ((out_size - 1) * stride + effective_filter_size - in_size) / 2; return padding > 0 ? padding : 0; } inline int32_t computePaddingWithOffset(int32_t stride, int32_t dilation_rate, int32_t in_size, int32_t filter_size, int32_t out_size, int32_t *offset) { int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1; int32_t total_padding = ((out_size - 1) * stride + effective_filter_size - in_size); total_padding = total_padding > 0 ? total_padding : 0; *offset = total_padding % 2; return total_padding / 2; } inline int32_t computeOutputSize(Padding padding, int32_t image_size, int32_t filter_size, int32_t stride, int32_t dilation_rate = 1) { const int32_t effective_filter_size = (filter_size - 1) * dilation_rate + 1; switch (padding) { case Padding::SAME: return (image_size + stride - 1) / stride; case Padding::VALID: return (image_size + stride - effective_filter_size) / stride; default: assert(false); return 0; } } inline int32_t calcOffset(const circle::Tensor *tensor, int32_t d0, int32_t d1, int32_t d2, int32_t d3) { return ((d0 * Tensor::dim(tensor, 1) + d1) * Tensor::dim(tensor, 2) + d2) * Tensor::dim(tensor, 3) + d3; } template void calculateActivationRange(Activation activation, T *activation_min, T *activation_max); tflite::RuntimeShape calculateShapeForBroadcast(const circle::Tensor *input1, const circle::Tensor *input2); // Helper wrapper to hide broadcast logic template class BroadcastableWrapper { public: BroadcastableWrapper(const std::vector &v) : _v(v), _stride(v.size() == 1 ? 0 : 1) {} T operator[](int idx) { return _v[idx * _stride]; } private: const std::vector &_v; int _stride; }; inline tflite::RuntimeShape getTensorShape(const circle::Tensor *tensor) { if (tensor == nullptr) return tflite::RuntimeShape(); tflite::RuntimeShape runtime_shape(Tensor::num_dims(tensor)); for (int i = 0; i < Tensor::num_dims(tensor); ++i) { runtime_shape.SetDim(i, Tensor::dim(tensor, i)); } return runtime_shape; } template const T *getTensorData(const uint8_t *tensor_data) { return tensor_data != nullptr ? reinterpret_cast(tensor_data) : nullptr; } template T *getTensorData(uint8_t *tensor_data) { return tensor_data != nullptr ? reinterpret_cast(tensor_data) : nullptr; } // A list of tensors in a format that can be used by kernels like split and // concatenation. template class VectorOfTensors { public: using ElementT = typename std::conditional::type; using TensorT = typename std::conditional::type; // Build with the tensors in 'tensor_list'. explicit VectorOfTensors(const std::vector &tensor_list) { const int num_tensors = tensor_list.size(); all_data_.reserve(num_tensors); all_shape_.reserve(num_tensors); all_shape_ptr_.reserve(num_tensors); for (TensorT *tensor : tensor_list) { all_data_.push_back(getTensorData(tensor)); all_shape_.push_back(getTensorShape(tensor)); } // Taking the pointer from inside a std::vector is only OK if the vector is // never modified, so we populate all_shape in the previous loop and then we // are free to grab iterators here. for (tflite::RuntimeShape &shape : all_shape_) { all_shape_ptr_.push_back(&shape); } } // Return a pointer to the data pointers of all tensors in the list. For // example: // float* const* f = v.data(); // f[0][1] is the second element of the first tensor. ElementT *const *data() const { return all_data_.data(); } // Return a pointer the shape pointers of all tensors in the list. For // example: // const RuntimeShape* const* d = v.dims(); // dims[1] are the dimensions of the second tensor in the list. const tflite::RuntimeShape *const *shapes() const { return all_shape_ptr_.data(); } private: std::vector all_data_; std::vector all_shape_; std::vector all_shape_ptr_; }; #ifndef DIS_QUANT void calculateActivationRangeQuantized(Activation activation, const circle::Tensor *output, int32_t *activation_min, int32_t *activation_max); void calculateActivationRangeQuantized(Activation activation, int32_t output_zero_point, float output_scale, DataType data_type, int32_t *activation_min, int32_t *activation_max); template constexpr bool one_of_types() { return false; } // Checks if T is equal to one of {U,Other} types template constexpr bool one_of_types() { return std::is_same::value || one_of_types(); } void matrixScalarMultiplyAccumulate(const int8_t *matrix, int32_t scalar, int32_t n_row, int32_t n_col, int32_t *output); /** * Fills activation min and max parameters depending on given data type and activation * * T is a template parameter, so after optimization this code left with only required if case * * @tparam T data type of arithmetic operation output tensor * @param params tflite params to fill * @param activation luci_interpreter::Activation of arithmetic operation */ template void fillArithmeticActivationRange(tflite::ArithmeticParams &p, Activation act) { static_assert(one_of_types(), "Unsupported dtype"); if (std::is_same::value) calculateActivationRange(act, &p.float_activation_min, &p.float_activation_max); if (std::is_same::value) calculateActivationRange(act, &p.quantized_activation_min, &p.quantized_activation_max); else calculateActivationRange(act, &p.int64_activation_min, &p.int64_activation_max); } // Decompose a double multiplier into a Q0.31 int32 representation of its // significand, and shift representation of its exponent. // // Handles an arbitrary positive multiplier. The 'shift' output-value is // basically the 'floating-point exponent' of the multiplier: // Negative for a right-shift (when the multiplier is <1), positive for a // left-shift (when the multiplier is >1) void quantizeMultiplier(double double_multiplier, int32_t *quantized_multiplier, int *shift); // Decompose a double multiplier into a Q0.31 int32 representation of its // significand, and shift representation of NEGATIVE its exponent --- // this is intended as a RIGHT-shift. // // Restricted to the case where the multiplier < 1 (and non-negative). void quantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t *quantized_multiplier, int *left_shift); inline double getQuantizedConvolutionMultipler(float input_scale, float filter_scale, float output_scale) { const double input_product_scale = static_cast(input_scale * filter_scale); LUCI_INTERPRETER_CHECK(input_product_scale >= 0); return input_product_scale / static_cast(output_scale); } // TODO rename getQuantizedConvolutionMultiplers to something more general // it is used for non conv operators too inline std::vector getQuantizedConvolutionMultiplers(float input_scale, const std::vector &filter_scale, float output_scale) { std::vector effective_output_scales; size_t n = filter_scale.size(); effective_output_scales.reserve(n); for (size_t i = 0; i < n; ++i) { effective_output_scales.push_back( getQuantizedConvolutionMultipler(input_scale, filter_scale[i], output_scale)); } return effective_output_scales; } struct ChannelQuantMultipliers { int shift; int32_t multiplier; ChannelQuantMultipliers() = default; }; inline std::vector quantizeMultipliers(const std::vector &effective_scale) { size_t n = effective_scale.size(); std::vector params(n); for (size_t i = 0; i < n; ++i) { quantizeMultiplier(effective_scale[i], ¶ms[i].multiplier, ¶ms[i].shift); } return params; } // A list of quantized tensors in a format that can be used by kernels like // split and concatenation. template class VectorOfQuantizedTensors : public VectorOfTensors { public: using typename VectorOfTensors::TensorT; // Build with the tensors in 'tensor_list'. explicit VectorOfQuantizedTensors(const std::vector &tensor_list) : VectorOfTensors(tensor_list) { for (TensorT *tensor : tensor_list) { zero_point_.push_back(tensor->zero_point()); scale_.push_back(tensor->scale()); } } const float *scale() const { return scale_.data(); } const int32_t *zero_point() const { return zero_point_.data(); } private: std::vector zero_point_; std::vector scale_; }; #endif // DIS_QUANT } // namespace kernels } // namespace luci_interpreter #endif // LUCI_INTERPRETER_KERNELS_UTILS_H