summaryrefslogtreecommitdiff
path: root/runtimes/nn/depend/external/gemmlowp/public/output_stages.h
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes/nn/depend/external/gemmlowp/public/output_stages.h')
-rw-r--r--runtimes/nn/depend/external/gemmlowp/public/output_stages.h185
1 files changed, 0 insertions, 185 deletions
diff --git a/runtimes/nn/depend/external/gemmlowp/public/output_stages.h b/runtimes/nn/depend/external/gemmlowp/public/output_stages.h
deleted file mode 100644
index 23bcdc05f..000000000
--- a/runtimes/nn/depend/external/gemmlowp/public/output_stages.h
+++ /dev/null
@@ -1,185 +0,0 @@
-// Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// output_stages.h: public definitions of the output stages that can
-// be assembled into an output pipeline, to control how internal
-// 32-bit accumulators are transformed to obtain the final uint8
-// result matrix entries.
-
-#ifndef GEMMLOWP_PUBLIC_OUTPUT_STAGES_H_
-#define GEMMLOWP_PUBLIC_OUTPUT_STAGES_H_
-
-#include <tuple>
-
-#include "../internal/common.h"
-
-namespace gemmlowp {
-
-// This output stage takes int32 values and returns still int32 values,
-// but "quantized down" to the uint8 scale; in other words, its output
-// is typically what one would then clamp to [0..255] and cast to uint8
-// (see OutputStageSaturatingCastToUint8).
-//
-// This "quantization down" process depends on 3 parameters,
-// result_offset, result_mult_int, result_shift,
-// and the result is:
-// ((input + result_offset) * result_mult_int + rounding) >> result_shift
-// where
-// rounding = (result_shift < 1) ? 0 : (1 << (result_shift - 1));
-struct OutputStageQuantizeDownInt32ToUint8Scale {
- std::int32_t result_offset;
- std::int32_t result_mult_int;
- std::int32_t result_shift;
-};
-
-// This output stage takes int32 values and returns still int32 values,
-// but "quantized down" to the uint8 scale; in other words, its output
-// is typically what one would then clamp to [0..255] and cast to uint8
-// (see OutputStageSaturatingCastToUint8).
-//
-// This "quantization down" process depends on 3 parameters,
-// result_offset, result_mult_int, result_shift,
-// and the result is:
-// ((input + result_offset) * result_mult_int + rounding) >> result_shift
-// where
-// rounding = (result_shift < 1) ? 0 : (1 << (result_shift - 1));
-//
-// Difference from OutputStageQuantizeDownInt32ToUint8Scale here is that each
-// row or column of the output (depending on tShape) has its own result_offset
-// and result_mult_int numbers.
-template <VectorShape tShape>
-struct OutputStageQuantizeDownInt32ToUint8ScalePC {
- VectorMap<const std::int32_t, tShape> result_offset;
- VectorMap<const std::int32_t, tShape> result_mult_int;
- std::int32_t result_shift;
-};
-
-// This output stage takes int32 values and returns still int32 values,
-// but "quantized down" to the uint8 scale; in other words, its output
-// is typically what one would then clamp to [0..255] and cast to uint8
-// (see OutputStageSaturatingCastToUint8).
-//
-// This "quantization down" process depends on 3 parameters,
-// result_offset, result_fixedpoint_multiplier, result_shift,
-// and the result is:
-// ((FixedPointMul(input, result_fixedpoint_multiplier) +
-// rounding) >> result_shift) + result_offset_after_shift
-// where
-// rounding = (result_shift < 1) ? 0 : (1 << (result_shift - 1));
-// and where FixedPointMul(x, y) is the nearest integer to the following
-// mathematical expression, evaluated without overflow or intermediate
-// rounding:
-// (x * y) / 2^31
-// In practice, it is expected that FixedPointMul will be implemented
-// using hardware "rounding doubling int32 multiply high" instructions,
-// such as VQRDMULH on ARM. See in fixedpoint.h the generic function,
-// SaturatingRoundingDoublingHighMul.
-//
-// Notice that the other difference from
-// OutputStageQuantizeDownInt32ToUint8Scale is that the result offset
-// is applied after the multiplier and shift, not before. This ensures
-// that no matter what the multiplier and shift are, the result offset
-// is effectively integral: offsetting the final result by an integer.
-// The motivation for this is to faithfully support quantization schemes
-// where the formula linking quantized values to the real mathematical
-// values that they represent, is of the form
-//
-// real_value = scale * (quantized_value - zero_point)
-//
-// where scale is a real number (represented in quantized form by
-// result_fixedpoint_multiplier and result_shift) and zero_point
-// is an integer telling which quantized value correspond to the
-// real value 0, and is represented here by (the opposite of)
-// result_offset_after_shift.
-// The motivation for such a quantization scheme, designed to
-// ensure that 0 is always a representable value, is that in
-// many applications, we need to 0-pad arrays and that can only be
-// done for quantized arrays if 0 is a representable value in
-// quantized form. In particular, convolution-like operations
-// are often implemented using 0-padding, or "im2col"-like
-// expansions that implicitly rely on 0-padding. If 0 were not
-// a representable value, such operations would have to pad
-// using a nonzero value, introducing bias in the computation.
-struct OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint {
- std::int32_t result_fixedpoint_multiplier;
- std::int32_t result_shift;
- std::int32_t result_offset_after_shift;
-};
-
-// This output stage takes int32 values that are expected to be already
-// on the final uint8 scale, but not necessarily in the [0..255] range.
-// It clamps them to the [0..255] range and returns them casted to uint8.
-struct OutputStageSaturatingCastToUint8 {};
-
-// This output stage depends on a "bias vector" that should contain int32
-// entries, and be either a row-vector of the same number of columns as the
-// result matrix, or a column-vector of the same number of rows as the
-// result matrix. This output stage takes int32 values and adds to them
-// the corresponding entry of the bias vector (broadcasted in the other
-// direction to fit the matrix's shape), outputting int32 values.
-template <typename VectorType>
-struct OutputStageBiasAddition {
- VectorType bias_vector;
-};
-
-// This output stage clamps value between the specified min and max bounds.
-// It can be used to implement "rectified linear unit" activation functions
-// in neural networks.
-struct OutputStageClamp {
- std::int32_t min;
- std::int32_t max;
-};
-
-struct OutputStageTanh {
- std::int32_t real_zero_as_int32;
- std::int32_t real_amplitude_as_int32;
-};
-
-// An output pipeline is just a std::tuple of output stages.
-// This function generates a standard output pipeline consisting of two stages:
-// OutputStageQuantizeDownInt32ToUint8Scale, OutputStageSaturatingCastToUint8.
-inline std::tuple<OutputStageQuantizeDownInt32ToUint8Scale,
- OutputStageSaturatingCastToUint8>
-MakeStandardOutputPipeline(std::int32_t result_offset,
- std::int32_t result_mult_int,
- std::int32_t result_shift) {
- OutputStageQuantizeDownInt32ToUint8Scale quantize_down_stage;
- quantize_down_stage.result_offset = result_offset;
- quantize_down_stage.result_mult_int = result_mult_int;
- quantize_down_stage.result_shift = result_shift;
- OutputStageSaturatingCastToUint8 saturating_cast_stage;
- return std::make_tuple(quantize_down_stage, saturating_cast_stage);
-}
-
-// An output pipeline is just a std::tuple of output stages.
-// This function generates a standard output pipeline consisting of two stages:
-// OutputStageQuantizeDownInt32ToUint8ScalePC, OutputStageSaturatingCastToUint8.
-template <VectorShape tShape>
-inline std::tuple<OutputStageQuantizeDownInt32ToUint8ScalePC<tShape>,
- OutputStageSaturatingCastToUint8>
-MakeStandardOutputPipeline(
- const VectorMap<const std::int32_t, tShape>& result_offset,
- const VectorMap<const std::int32_t, tShape>& result_mult_int,
- std::int32_t result_shift) {
- OutputStageQuantizeDownInt32ToUint8ScalePC<tShape> quantize_down_stage;
- quantize_down_stage.result_offset = result_offset;
- quantize_down_stage.result_mult_int = result_mult_int;
- quantize_down_stage.result_shift = result_shift;
- OutputStageSaturatingCastToUint8 saturating_cast_stage;
- return std::make_tuple(quantize_down_stage, saturating_cast_stage);
-}
-
-} // namespace gemmlowp
-
-#endif // GEMMLOWP_PUBLIC_OUTPUT_STAGES_H_