diff options
author | 오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com> | 2019-04-02 15:47:32 +0900 |
---|---|---|
committer | 박세희/On-Device Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com> | 2019-04-02 15:47:32 +0900 |
commit | 1fa465f8b95342c332698eb61d0765914c515d4b (patch) | |
tree | 0c1fd03ef4f4c4781db642ec5a866c6327ce12da /libs | |
parent | a89b80776908ba33319cc0b1d6ad4f23e452a2b1 (diff) | |
download | nnfw-1fa465f8b95342c332698eb61d0765914c515d4b.tar.gz nnfw-1fa465f8b95342c332698eb61d0765914c515d4b.tar.bz2 nnfw-1fa465f8b95342c332698eb61d0765914c515d4b.zip |
Introduce cpu quant8 fully connected kernel (#4918)
Introduce cpu quantized int8 fully connected kernel from tflite
Use kernel in neurun cpu backend
Signed-off-by: Hyeongseok Oh <hseok82.oh@samsung.com>
Diffstat (limited to 'libs')
-rw-r--r-- | libs/cker/include/cker/operation/FullyConnected.h | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/libs/cker/include/cker/operation/FullyConnected.h b/libs/cker/include/cker/operation/FullyConnected.h index b253b21fd..60218e34a 100644 --- a/libs/cker/include/cker/operation/FullyConnected.h +++ b/libs/cker/include/cker/operation/FullyConnected.h @@ -82,6 +82,58 @@ inline void FullyConnected(const FullyConnectedParams ¶ms, const Shape &inpu } } +inline void FullyConnected(const FullyConnectedParams ¶ms, const Shape &input_shape, + const uint8_t *input_data, const Shape &filter_shape, + const uint8_t *filter_data, const Shape &bias_shape, + const int32_t *bias_data, const Shape &output_shape, + uint8_t *output_data) +{ + const int32_t input_offset = params.input_offset; + const int32_t filter_offset = params.weights_offset; + const int32_t output_offset = params.output_offset; + const int32_t output_multiplier = params.output_multiplier; + const int output_shift = params.output_shift; + const int32_t output_activation_min = params.quantized_activation_min; + const int32_t output_activation_max = params.quantized_activation_max; + TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2); + TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1); + + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + // TODO(benoitjacob): This really should be: + // const int batches = ArraySize(output_dims, 1); + // but the current --variable_batch hack consists in overwriting the 3rd + // dimension with the runtime batch size, as we don't keep track for each + // array of which dimension is the batch dimension in it. + const int output_dim_count = output_shape.DimensionsCount(); + const int filter_dim_count = filter_shape.DimensionsCount(); + const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1); + const int output_depth = + MatchingDim(filter_shape, filter_dim_count - 2, output_shape, output_dim_count - 1); + const int accum_depth = filter_shape.Dims(filter_dim_count - 1); + for (int b = 0; b < batches; ++b) + { + for (int out_c = 0; out_c < output_depth; ++out_c) + { + int32_t acc = 0; + for (int d = 0; d < accum_depth; ++d) + { + int32_t input_val = input_data[b * accum_depth + d]; + int32_t filter_val = filter_data[out_c * accum_depth + d]; + acc += (filter_val + filter_offset) * (input_val + input_offset); + } + if (bias_data) + { + acc += bias_data[out_c]; + } + acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift); + acc += output_offset; + acc = std::max(acc, output_activation_min); + acc = std::min(acc, output_activation_max); + output_data[out_c + output_depth * b] = static_cast<uint8>(acc); + } + } +} + } // namespace cker } // namespace nnfw |