summaryrefslogtreecommitdiff
path: root/compute/cker/include/cker/operation/FullyConnected.h
diff options
context:
space:
mode:
Diffstat (limited to 'compute/cker/include/cker/operation/FullyConnected.h')
-rw-r--r--compute/cker/include/cker/operation/FullyConnected.h138
1 files changed, 90 insertions, 48 deletions
diff --git a/compute/cker/include/cker/operation/FullyConnected.h b/compute/cker/include/cker/operation/FullyConnected.h
index 428fb1b53..01b925efb 100644
--- a/compute/cker/include/cker/operation/FullyConnected.h
+++ b/compute/cker/include/cker/operation/FullyConnected.h
@@ -19,69 +19,66 @@
#define __NNFW_CKER_FULLY_CONNECTED_H__
#include "cker/Shape.h"
+#include "cker/Types.h"
#include "cker/Utils.h"
+#include "cker/TensorUtils.h"
namespace nnfw
{
namespace cker
{
-struct FullyConnectedParams
+class FCTempArena
{
- // uint8 inference params.
- // TODO(b/65838351): Use smaller types if appropriate.
- int32_t input_offset;
- int32_t weights_offset;
- int32_t output_offset;
- int32_t output_multiplier;
- int output_shift;
- // uint8, etc, activation params.
- int32_t quantized_activation_min;
- int32_t quantized_activation_max;
- // float activation params.
- float float_activation_min;
- float float_activation_max;
- // FullyConnectedWeightsFormat weights_format;
+public:
+ FCTempArena(void) : prepared(false), input_quantized(), scaling_factors()
+ {
+ // DO NOTHING
+ }
+
+ void prepare(const Shape &input_shape, const Shape &weights_shape)
+ {
+ auto input_size = input_shape.FlatSize();
+ input_quantized.resize(input_size);
+
+ assert(weights_shape.DimensionsCount() == 2);
+ int batch_size = input_size / weights_shape.Dims(1);
+ scaling_factors.resize(batch_size);
+ prepared = true;
+ }
+
+public:
+ bool prepared;
+ std::vector<int8_t> input_quantized;
+ std::vector<float> scaling_factors;
};
inline void FullyConnected(const FullyConnectedParams &params, const Shape &input_shape,
const float *input_data, const Shape &weights_shape,
- const float *weights_data, const Shape &bias_shape,
- const float *bias_data, const Shape &output_shape, float *output_data)
+ const float *weights_data, const Shape &, const float *bias_data,
+ const Shape &, float *output_data)
{
- UNUSED_RELEASE(input_shape);
- UNUSED_RELEASE(bias_shape);
- const float output_activation_min = params.float_activation_min;
- const float output_activation_max = params.float_activation_max;
- // TODO(benoitjacob): This really should be:
- // const int batches = ArraySize(output_dims, 1);
- // but the current --variable_batch hack consists in overwriting the 3rd
- // dimension with the runtime batch size, as we don't keep track for each
- // array of which dimension is the batch dimension in it.
- const int output_dims_count = output_shape.DimensionsCount();
- const int weights_dims_count = weights_shape.DimensionsCount();
- const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
- const int output_depth =
- MatchingDim(weights_shape, weights_dims_count - 2, output_shape, output_dims_count - 1);
- const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
- for (int b = 0; b < batches; ++b)
+ int total_input_size = input_shape.FlatSize();
+ int input_size = weights_shape.Dims(1);
+ const int batch_size = total_input_size / input_size;
+ const int num_units = weights_shape.Dims(0);
+
+ // Output = bias if bias tensor exists.
+ if (bias_data)
{
- for (int out_c = 0; out_c < output_depth; ++out_c)
- {
- float total = 0.f;
- for (int d = 0; d < accum_depth; ++d)
- {
- total += input_data[b * accum_depth + d] * weights_data[out_c * accum_depth + d];
- }
- float bias_value = 0.0f;
- if (bias_data)
- {
- bias_value = bias_data[out_c];
- }
- output_data[out_c + output_depth * b] = ActivationFunctionWithMinMax(
- total + bias_value, output_activation_min, output_activation_max);
- }
+ VectorBatchVectorAssign(bias_data, num_units, batch_size, output_data);
+ }
+ else
+ {
+ ZeroVector(output_data, batch_size * num_units);
}
+
+ // Compute output += weight * input
+ MatrixBatchVectorMultiplyAccumulate(weights_data, num_units, input_size, input_data, batch_size,
+ output_data, /*result_stride=*/1);
+
+ // Apply activation function
+ ApplyActivationToVector(output_data, batch_size * num_units, params.activation, output_data);
}
inline void FullyConnected(const FullyConnectedParams &params, const Shape &input_shape,
@@ -138,6 +135,51 @@ inline void FullyConnected(const FullyConnectedParams &params, const Shape &inpu
}
}
+inline void FullyConnectedHybrid(const FullyConnectedParams &params, const Shape &input_shape,
+ const float *input_data, const Shape &filter_shape,
+ const int8_t *filter_data, const Shape &, const float *bias_data,
+ const Shape &, float *output_data, FCTempArena &temp_arena)
+{
+ int total_input_size = input_shape.FlatSize();
+ const int input_size = filter_shape.Dims(1);
+ const int batch_size = total_input_size / input_size;
+ const int num_units = filter_shape.Dims(0);
+
+ // Output = bias if bias tensor exists.
+ VectorBatchVectorAssign(bias_data, num_units, batch_size, output_data);
+
+ // Save matrix multiplication computation for all zero input.
+ if (IsZeroVector(input_data, total_input_size))
+ {
+ ApplyActivationToVector(output_data, batch_size * num_units, params.activation, output_data);
+ return;
+ }
+
+ // Quantize input from float to uint8 + quantization params (scaling factor).
+ float unused_min, unused_max;
+ float *scaling_factors_ptr = temp_arena.scaling_factors.data();
+ int8_t *quant_data = temp_arena.input_quantized.data();
+
+ // Quantize each batch independently.
+ for (int b = 0; b < batch_size; ++b)
+ {
+ const int offset = b * input_size;
+ SymmetricQuantizeFloats(input_data + offset, input_size, quant_data + offset, &unused_min,
+ &unused_max, &scaling_factors_ptr[b]);
+ // Incorporate scaling of the filter.
+ scaling_factors_ptr[b] *= params.weights_scale;
+ }
+
+ // Compute output += weight * quantized_input
+ MatrixBatchVectorMultiplyAccumulate(filter_data, num_units, input_size, quant_data,
+ scaling_factors_ptr, batch_size, output_data,
+ /*result_stride=*/1);
+
+ // Apply activation function to floats.
+ ApplyActivationToVector(output_data, batch_size * num_units, params.activation, output_data);
+ return;
+}
+
} // namespace cker
} // namespace nnfw