summaryrefslogtreecommitdiff
path: root/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc')
-rw-r--r--runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc139
1 files changed, 139 insertions, 0 deletions
diff --git a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
new file mode 100644
index 000000000..41b9afc0c
--- /dev/null
+++ b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "FullyConnectedLayer.h"
+
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "kernel/cpu/OperationUtils.h"
+
+#include <mutex>
+
+namespace neurun
+{
+namespace kernel
+{
+namespace cpu
+{
+
+FullyConnectedLayer::FullyConnectedLayer()
+ : _inputData(nullptr), _weightsData(nullptr), _biasData(nullptr), _outputData(nullptr),
+ _inputShape(), _weightsShape(), _biasShape(), _outputShape(),
+ _activation(ANEURALNETWORKS_FUSED_NONE), _inputType(OperandType::SCALAR_FLOAT32)
+{
+ // DO NOTHING
+}
+
+// executionMutex is used to protect concurrent access of non-threadsafe resources
+// like gemmlowp::GemmContext.
+// std::mutex is safe for pthreads on Android.
+static std::mutex executionMutex;
+bool FullyConnectedLayer::fullyConnectedFloat32()
+{
+ float output_activation_min, output_activation_max;
+ CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ // b/80425683, optimized implementation produces incorrect results when the
+ // number of input elements is the squre of batch_size.
+ uint32_t batch_size = getSizeOfDimension(_outputShape, 0);
+ uint32_t input_n_elements = getNumberOfElements(_inputShape);
+ if (batch_size * batch_size == input_n_elements)
+ {
+ ::tflite::reference_ops::FullyConnected(
+ reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
+ reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape),
+ reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape),
+ output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
+ convertShapeToDims(_outputShape));
+ }
+ else
+ {
+ ::tflite::optimized_ops::FullyConnected(
+ reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
+ reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape),
+ reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape),
+ output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
+ convertShapeToDims(_outputShape));
+ }
+ return true;
+}
+
+bool FullyConnectedLayer::fullyConnectedQuant8()
+{
+ int32_t inputOffset = -_inputShape.offset;
+ int32_t weightsOffset = -_weightsShape.offset;
+ int32_t outputOffset = _outputShape.offset;
+ float real_multiplier = 0.0;
+ int32_t output_multiplier = 0;
+ int32_t output_shift = 0;
+ int32_t output_activation_min = 0;
+ int32_t output_activation_max = 0;
+ // Caution : 'Convolution' can make misleading. It seems it is just math term.
+ if (!GetQuantizedConvolutionMultipler(_inputShape, _weightsShape, _biasShape, _outputShape,
+ &real_multiplier) ||
+ !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift))
+ {
+ return false;
+ }
+ CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
+ &output_activation_max);
+ static gemmlowp::GemmContext gemm_context;
+ // Prevent concurrent executions that access gemm_context.
+ std::unique_lock<std::mutex> lock(executionMutex);
+ // Alow gemmlowp automatically decide how many threads to use.
+ gemm_context.set_max_num_threads(0);
+ ::tflite::optimized_ops::FullyConnected(
+ _inputData, convertShapeToDims(_inputShape), inputOffset, _weightsData,
+ convertShapeToDims(_weightsShape), weightsOffset,
+ reinterpret_cast<const int32_t *>(_biasData), convertShapeToDims(_biasShape), outputOffset,
+ output_multiplier, output_shift, output_activation_min, output_activation_max, _outputData,
+ convertShapeToDims(_outputShape), &gemm_context);
+ return true;
+}
+
+void FullyConnectedLayer::configure(uint8_t *inputData, const Shape inputShape,
+ uint8_t *weightsData, const Shape weightsShape,
+ uint8_t *biasData, const Shape biasShape, FuseCode activation,
+ uint8_t *outputData, const Shape outputShape)
+{
+ _inputData = inputData;
+ _inputShape = inputShape;
+ _inputType = inputShape.type;
+ _weightsData = weightsData;
+ _weightsShape = weightsShape;
+ _biasData = biasData;
+ _biasShape = biasShape;
+ _activation = activation;
+ _outputData = outputData;
+ _outputShape = outputShape;
+}
+
+void FullyConnectedLayer::run()
+{
+ if (_inputType == OperandType::TENSOR_FLOAT32)
+ {
+ fullyConnectedFloat32();
+ }
+ else if (_inputType == OperandType::TENSOR_QUANT8_ASYMM)
+ {
+ throw std::runtime_error{"FullyConnectedLayer : Not tested for TENSOR_QUANT8_ASYMM"};
+ // fullyConnectedQuant8();
+ }
+}
+
+} // namespace cpu
+} // namespace kernel
+} // namespace neurun