summaryrefslogtreecommitdiff
path: root/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc')
-rw-r--r--runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc77
1 files changed, 26 insertions, 51 deletions
diff --git a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
index 41b9afc0c..abe82db5e 100644
--- a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
@@ -44,64 +44,39 @@ FullyConnectedLayer::FullyConnectedLayer()
static std::mutex executionMutex;
bool FullyConnectedLayer::fullyConnectedFloat32()
{
- float output_activation_min, output_activation_max;
- CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
- // b/80425683, optimized implementation produces incorrect results when the
- // number of input elements is the squre of batch_size.
- uint32_t batch_size = getSizeOfDimension(_outputShape, 0);
- uint32_t input_n_elements = getNumberOfElements(_inputShape);
- if (batch_size * batch_size == input_n_elements)
+ int total_input_size = 1;
+ for (int i = 0; i < _inputShape.dimensions.size(); i++)
{
- ::tflite::reference_ops::FullyConnected(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
- reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape),
- reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape),
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape));
- }
- else
- {
- ::tflite::optimized_ops::FullyConnected(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
- reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape),
- reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape),
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape));
+ total_input_size *= _inputShape.dimensions[i];
}
+
+ int input_size = _weightsShape.dimensions[1];
+ const int batch_size = total_input_size / input_size;
+ const int num_units = _weightsShape.dimensions[0];
+
+ TfLiteFusedActivation act = convertFusedActivation(_activation);
+
+ ::tflite::tensor_utils::VectorBatchVectorAssign(reinterpret_cast<const float *>(_biasData),
+ num_units, batch_size,
+ reinterpret_cast<float *>(_outputData));
+
+ // Compute output += weight * input
+ ::tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+ reinterpret_cast<const float *>(_weightsData), num_units, input_size,
+ reinterpret_cast<const float *>(_inputData), batch_size,
+ reinterpret_cast<float *>(_outputData), /*result_stride=*/1);
+
+ // Apply activation function
+ ::tflite::tensor_utils::ApplyActivationToVector(reinterpret_cast<float *>(_outputData),
+ batch_size * num_units, act,
+ reinterpret_cast<float *>(_outputData));
+
return true;
}
bool FullyConnectedLayer::fullyConnectedQuant8()
{
- int32_t inputOffset = -_inputShape.offset;
- int32_t weightsOffset = -_weightsShape.offset;
- int32_t outputOffset = _outputShape.offset;
- float real_multiplier = 0.0;
- int32_t output_multiplier = 0;
- int32_t output_shift = 0;
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- // Caution : 'Convolution' can make misleading. It seems it is just math term.
- if (!GetQuantizedConvolutionMultipler(_inputShape, _weightsShape, _biasShape, _outputShape,
- &real_multiplier) ||
- !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift))
- {
- return false;
- }
- CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
- &output_activation_max);
- static gemmlowp::GemmContext gemm_context;
- // Prevent concurrent executions that access gemm_context.
- std::unique_lock<std::mutex> lock(executionMutex);
- // Alow gemmlowp automatically decide how many threads to use.
- gemm_context.set_max_num_threads(0);
- ::tflite::optimized_ops::FullyConnected(
- _inputData, convertShapeToDims(_inputShape), inputOffset, _weightsData,
- convertShapeToDims(_weightsShape), weightsOffset,
- reinterpret_cast<const int32_t *>(_biasData), convertShapeToDims(_biasShape), outputOffset,
- output_multiplier, output_shift, output_activation_min, output_activation_max, _outputData,
- convertShapeToDims(_outputShape), &gemm_context);
- return true;
+ throw std::runtime_error{"FullyConnectedLayer : Not tested for TENSOR_QUANT8_ASYMM"};
}
void FullyConnectedLayer::configure(uint8_t *inputData, const Shape inputShape,