summaryrefslogtreecommitdiff
path: root/libs
diff options
context:
space:
mode:
author오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>2019-04-02 15:47:32 +0900
committer박세희/On-Device Lab(SR)/Principal Engineer/삼성전자 <saehie.park@samsung.com>2019-04-02 15:47:32 +0900
commit1fa465f8b95342c332698eb61d0765914c515d4b (patch)
tree0c1fd03ef4f4c4781db642ec5a866c6327ce12da /libs
parenta89b80776908ba33319cc0b1d6ad4f23e452a2b1 (diff)
downloadnnfw-1fa465f8b95342c332698eb61d0765914c515d4b.tar.gz
nnfw-1fa465f8b95342c332698eb61d0765914c515d4b.tar.bz2
nnfw-1fa465f8b95342c332698eb61d0765914c515d4b.zip
Introduce cpu quant8 fully connected kernel (#4918)
Introduce cpu quantized int8 fully connected kernel from tflite Use kernel in neurun cpu backend Signed-off-by: Hyeongseok Oh <hseok82.oh@samsung.com>
Diffstat (limited to 'libs')
-rw-r--r--libs/cker/include/cker/operation/FullyConnected.h52
1 files changed, 52 insertions, 0 deletions
diff --git a/libs/cker/include/cker/operation/FullyConnected.h b/libs/cker/include/cker/operation/FullyConnected.h
index b253b21fd..60218e34a 100644
--- a/libs/cker/include/cker/operation/FullyConnected.h
+++ b/libs/cker/include/cker/operation/FullyConnected.h
@@ -82,6 +82,58 @@ inline void FullyConnected(const FullyConnectedParams &params, const Shape &inpu
}
}
+inline void FullyConnected(const FullyConnectedParams &params, const Shape &input_shape,
+ const uint8_t *input_data, const Shape &filter_shape,
+ const uint8_t *filter_data, const Shape &bias_shape,
+ const int32_t *bias_data, const Shape &output_shape,
+ uint8_t *output_data)
+{
+ const int32_t input_offset = params.input_offset;
+ const int32_t filter_offset = params.weights_offset;
+ const int32_t output_offset = params.output_offset;
+ const int32_t output_multiplier = params.output_multiplier;
+ const int output_shift = params.output_shift;
+ const int32_t output_activation_min = params.quantized_activation_min;
+ const int32_t output_activation_max = params.quantized_activation_max;
+ TFLITE_DCHECK_GE(filter_shape.DimensionsCount(), 2);
+ TFLITE_DCHECK_GE(output_shape.DimensionsCount(), 1);
+
+ TFLITE_DCHECK_LE(output_activation_min, output_activation_max);
+ // TODO(benoitjacob): This really should be:
+ // const int batches = ArraySize(output_dims, 1);
+ // but the current --variable_batch hack consists in overwriting the 3rd
+ // dimension with the runtime batch size, as we don't keep track for each
+ // array of which dimension is the batch dimension in it.
+ const int output_dim_count = output_shape.DimensionsCount();
+ const int filter_dim_count = filter_shape.DimensionsCount();
+ const int batches = FlatSizeSkipDim(output_shape, output_dim_count - 1);
+ const int output_depth =
+ MatchingDim(filter_shape, filter_dim_count - 2, output_shape, output_dim_count - 1);
+ const int accum_depth = filter_shape.Dims(filter_dim_count - 1);
+ for (int b = 0; b < batches; ++b)
+ {
+ for (int out_c = 0; out_c < output_depth; ++out_c)
+ {
+ int32_t acc = 0;
+ for (int d = 0; d < accum_depth; ++d)
+ {
+ int32_t input_val = input_data[b * accum_depth + d];
+ int32_t filter_val = filter_data[out_c * accum_depth + d];
+ acc += (filter_val + filter_offset) * (input_val + input_offset);
+ }
+ if (bias_data)
+ {
+ acc += bias_data[out_c];
+ }
+ acc = MultiplyByQuantizedMultiplier(acc, output_multiplier, output_shift);
+ acc += output_offset;
+ acc = std::max(acc, output_activation_min);
+ acc = std::min(acc, output_activation_max);
+ output_data[out_c + output_depth * b] = static_cast<uint8>(acc);
+ }
+ }
+}
+
} // namespace cker
} // namespace nnfw