diff options
Diffstat (limited to 'runtimes/nn/depend/external/gemmlowp/public/output_stages.h')
-rw-r--r-- | runtimes/nn/depend/external/gemmlowp/public/output_stages.h | 185 |
1 files changed, 0 insertions, 185 deletions
diff --git a/runtimes/nn/depend/external/gemmlowp/public/output_stages.h b/runtimes/nn/depend/external/gemmlowp/public/output_stages.h deleted file mode 100644 index 23bcdc05f..000000000 --- a/runtimes/nn/depend/external/gemmlowp/public/output_stages.h +++ /dev/null @@ -1,185 +0,0 @@ -// Copyright 2015 The Gemmlowp Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// output_stages.h: public definitions of the output stages that can -// be assembled into an output pipeline, to control how internal -// 32-bit accumulators are transformed to obtain the final uint8 -// result matrix entries. - -#ifndef GEMMLOWP_PUBLIC_OUTPUT_STAGES_H_ -#define GEMMLOWP_PUBLIC_OUTPUT_STAGES_H_ - -#include <tuple> - -#include "../internal/common.h" - -namespace gemmlowp { - -// This output stage takes int32 values and returns still int32 values, -// but "quantized down" to the uint8 scale; in other words, its output -// is typically what one would then clamp to [0..255] and cast to uint8 -// (see OutputStageSaturatingCastToUint8). -// -// This "quantization down" process depends on 3 parameters, -// result_offset, result_mult_int, result_shift, -// and the result is: -// ((input + result_offset) * result_mult_int + rounding) >> result_shift -// where -// rounding = (result_shift < 1) ? 0 : (1 << (result_shift - 1)); -struct OutputStageQuantizeDownInt32ToUint8Scale { - std::int32_t result_offset; - std::int32_t result_mult_int; - std::int32_t result_shift; -}; - -// This output stage takes int32 values and returns still int32 values, -// but "quantized down" to the uint8 scale; in other words, its output -// is typically what one would then clamp to [0..255] and cast to uint8 -// (see OutputStageSaturatingCastToUint8). -// -// This "quantization down" process depends on 3 parameters, -// result_offset, result_mult_int, result_shift, -// and the result is: -// ((input + result_offset) * result_mult_int + rounding) >> result_shift -// where -// rounding = (result_shift < 1) ? 0 : (1 << (result_shift - 1)); -// -// Difference from OutputStageQuantizeDownInt32ToUint8Scale here is that each -// row or column of the output (depending on tShape) has its own result_offset -// and result_mult_int numbers. -template <VectorShape tShape> -struct OutputStageQuantizeDownInt32ToUint8ScalePC { - VectorMap<const std::int32_t, tShape> result_offset; - VectorMap<const std::int32_t, tShape> result_mult_int; - std::int32_t result_shift; -}; - -// This output stage takes int32 values and returns still int32 values, -// but "quantized down" to the uint8 scale; in other words, its output -// is typically what one would then clamp to [0..255] and cast to uint8 -// (see OutputStageSaturatingCastToUint8). -// -// This "quantization down" process depends on 3 parameters, -// result_offset, result_fixedpoint_multiplier, result_shift, -// and the result is: -// ((FixedPointMul(input, result_fixedpoint_multiplier) + -// rounding) >> result_shift) + result_offset_after_shift -// where -// rounding = (result_shift < 1) ? 0 : (1 << (result_shift - 1)); -// and where FixedPointMul(x, y) is the nearest integer to the following -// mathematical expression, evaluated without overflow or intermediate -// rounding: -// (x * y) / 2^31 -// In practice, it is expected that FixedPointMul will be implemented -// using hardware "rounding doubling int32 multiply high" instructions, -// such as VQRDMULH on ARM. See in fixedpoint.h the generic function, -// SaturatingRoundingDoublingHighMul. -// -// Notice that the other difference from -// OutputStageQuantizeDownInt32ToUint8Scale is that the result offset -// is applied after the multiplier and shift, not before. This ensures -// that no matter what the multiplier and shift are, the result offset -// is effectively integral: offsetting the final result by an integer. -// The motivation for this is to faithfully support quantization schemes -// where the formula linking quantized values to the real mathematical -// values that they represent, is of the form -// -// real_value = scale * (quantized_value - zero_point) -// -// where scale is a real number (represented in quantized form by -// result_fixedpoint_multiplier and result_shift) and zero_point -// is an integer telling which quantized value correspond to the -// real value 0, and is represented here by (the opposite of) -// result_offset_after_shift. -// The motivation for such a quantization scheme, designed to -// ensure that 0 is always a representable value, is that in -// many applications, we need to 0-pad arrays and that can only be -// done for quantized arrays if 0 is a representable value in -// quantized form. In particular, convolution-like operations -// are often implemented using 0-padding, or "im2col"-like -// expansions that implicitly rely on 0-padding. If 0 were not -// a representable value, such operations would have to pad -// using a nonzero value, introducing bias in the computation. -struct OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint { - std::int32_t result_fixedpoint_multiplier; - std::int32_t result_shift; - std::int32_t result_offset_after_shift; -}; - -// This output stage takes int32 values that are expected to be already -// on the final uint8 scale, but not necessarily in the [0..255] range. -// It clamps them to the [0..255] range and returns them casted to uint8. -struct OutputStageSaturatingCastToUint8 {}; - -// This output stage depends on a "bias vector" that should contain int32 -// entries, and be either a row-vector of the same number of columns as the -// result matrix, or a column-vector of the same number of rows as the -// result matrix. This output stage takes int32 values and adds to them -// the corresponding entry of the bias vector (broadcasted in the other -// direction to fit the matrix's shape), outputting int32 values. -template <typename VectorType> -struct OutputStageBiasAddition { - VectorType bias_vector; -}; - -// This output stage clamps value between the specified min and max bounds. -// It can be used to implement "rectified linear unit" activation functions -// in neural networks. -struct OutputStageClamp { - std::int32_t min; - std::int32_t max; -}; - -struct OutputStageTanh { - std::int32_t real_zero_as_int32; - std::int32_t real_amplitude_as_int32; -}; - -// An output pipeline is just a std::tuple of output stages. -// This function generates a standard output pipeline consisting of two stages: -// OutputStageQuantizeDownInt32ToUint8Scale, OutputStageSaturatingCastToUint8. -inline std::tuple<OutputStageQuantizeDownInt32ToUint8Scale, - OutputStageSaturatingCastToUint8> -MakeStandardOutputPipeline(std::int32_t result_offset, - std::int32_t result_mult_int, - std::int32_t result_shift) { - OutputStageQuantizeDownInt32ToUint8Scale quantize_down_stage; - quantize_down_stage.result_offset = result_offset; - quantize_down_stage.result_mult_int = result_mult_int; - quantize_down_stage.result_shift = result_shift; - OutputStageSaturatingCastToUint8 saturating_cast_stage; - return std::make_tuple(quantize_down_stage, saturating_cast_stage); -} - -// An output pipeline is just a std::tuple of output stages. -// This function generates a standard output pipeline consisting of two stages: -// OutputStageQuantizeDownInt32ToUint8ScalePC, OutputStageSaturatingCastToUint8. -template <VectorShape tShape> -inline std::tuple<OutputStageQuantizeDownInt32ToUint8ScalePC<tShape>, - OutputStageSaturatingCastToUint8> -MakeStandardOutputPipeline( - const VectorMap<const std::int32_t, tShape>& result_offset, - const VectorMap<const std::int32_t, tShape>& result_mult_int, - std::int32_t result_shift) { - OutputStageQuantizeDownInt32ToUint8ScalePC<tShape> quantize_down_stage; - quantize_down_stage.result_offset = result_offset; - quantize_down_stage.result_mult_int = result_mult_int; - quantize_down_stage.result_shift = result_shift; - OutputStageSaturatingCastToUint8 saturating_cast_stage; - return std::make_tuple(quantize_down_stage, saturating_cast_stage); -} - -} // namespace gemmlowp - -#endif // GEMMLOWP_PUBLIC_OUTPUT_STAGES_H_ |