diff options
Diffstat (limited to 'runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc')
-rw-r--r-- | runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc | 77 |
1 files changed, 26 insertions, 51 deletions
diff --git a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc index 41b9afc0c..abe82db5e 100644 --- a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc +++ b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc @@ -44,64 +44,39 @@ FullyConnectedLayer::FullyConnectedLayer() static std::mutex executionMutex; bool FullyConnectedLayer::fullyConnectedFloat32() { - float output_activation_min, output_activation_max; - CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); - // b/80425683, optimized implementation produces incorrect results when the - // number of input elements is the squre of batch_size. - uint32_t batch_size = getSizeOfDimension(_outputShape, 0); - uint32_t input_n_elements = getNumberOfElements(_inputShape); - if (batch_size * batch_size == input_n_elements) + int total_input_size = 1; + for (int i = 0; i < _inputShape.dimensions.size(); i++) { - ::tflite::reference_ops::FullyConnected( - reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), - reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape), - reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape), - output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData), - convertShapeToDims(_outputShape)); - } - else - { - ::tflite::optimized_ops::FullyConnected( - reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), - reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape), - reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape), - output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData), - convertShapeToDims(_outputShape)); + total_input_size *= _inputShape.dimensions[i]; } + + int input_size = _weightsShape.dimensions[1]; + const int batch_size = total_input_size / input_size; + const int num_units = _weightsShape.dimensions[0]; + + TfLiteFusedActivation act = convertFusedActivation(_activation); + + ::tflite::tensor_utils::VectorBatchVectorAssign(reinterpret_cast<const float *>(_biasData), + num_units, batch_size, + reinterpret_cast<float *>(_outputData)); + + // Compute output += weight * input + ::tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate( + reinterpret_cast<const float *>(_weightsData), num_units, input_size, + reinterpret_cast<const float *>(_inputData), batch_size, + reinterpret_cast<float *>(_outputData), /*result_stride=*/1); + + // Apply activation function + ::tflite::tensor_utils::ApplyActivationToVector(reinterpret_cast<float *>(_outputData), + batch_size * num_units, act, + reinterpret_cast<float *>(_outputData)); + return true; } bool FullyConnectedLayer::fullyConnectedQuant8() { - int32_t inputOffset = -_inputShape.offset; - int32_t weightsOffset = -_weightsShape.offset; - int32_t outputOffset = _outputShape.offset; - float real_multiplier = 0.0; - int32_t output_multiplier = 0; - int32_t output_shift = 0; - int32_t output_activation_min = 0; - int32_t output_activation_max = 0; - // Caution : 'Convolution' can make misleading. It seems it is just math term. - if (!GetQuantizedConvolutionMultipler(_inputShape, _weightsShape, _biasShape, _outputShape, - &real_multiplier) || - !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift)) - { - return false; - } - CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min, - &output_activation_max); - static gemmlowp::GemmContext gemm_context; - // Prevent concurrent executions that access gemm_context. - std::unique_lock<std::mutex> lock(executionMutex); - // Alow gemmlowp automatically decide how many threads to use. - gemm_context.set_max_num_threads(0); - ::tflite::optimized_ops::FullyConnected( - _inputData, convertShapeToDims(_inputShape), inputOffset, _weightsData, - convertShapeToDims(_weightsShape), weightsOffset, - reinterpret_cast<const int32_t *>(_biasData), convertShapeToDims(_biasShape), outputOffset, - output_multiplier, output_shift, output_activation_min, output_activation_max, _outputData, - convertShapeToDims(_outputShape), &gemm_context); - return true; + throw std::runtime_error{"FullyConnectedLayer : Not tested for TENSOR_QUANT8_ASYMM"}; } void FullyConnectedLayer::configure(uint8_t *inputData, const Shape inputShape, |