summaryrefslogtreecommitdiff
path: root/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc')
-rw-r--r--runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc85
1 files changed, 69 insertions, 16 deletions
diff --git a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
index c00be64e5..05da33abf 100644
--- a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
@@ -18,6 +18,8 @@
#include "../Tensor.h"
#include <cker/operation/FullyConnected.h>
+#include <cker/TensorUtils.h>
+#include <misc/polymorphic_downcast.h>
namespace onert
{
@@ -31,7 +33,7 @@ namespace ops
FullyConnectedLayer::FullyConnectedLayer()
: _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
_activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
- _is_hybrid(false)
+ _external_context(nullptr), _is_hybrid(false)
{
// DO NOTHING
}
@@ -102,7 +104,8 @@ void FullyConnectedLayer::fullyConnectedHybrid()
op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
getTensorShape(_weights), reinterpret_cast<const int8_t *>(_weights->buffer()),
getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena);
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
+ _external_context->ruy_context());
#else
nnfw::cker::FullyConnectedHybrid(
op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
@@ -110,31 +113,67 @@ void FullyConnectedLayer::fullyConnectedHybrid()
(_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
: reinterpret_cast<const int8_t *>(_weights->buffer()),
getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena);
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
+ _external_context->ruy_context());
-// TODO Enable calling decrease_ref
-#if 0
if (_cached_weights == nullptr || _is_weights_freed)
return;
- auto weight_tensor = dynamic_cast<const Tensor *>(_weights);
- if (weight_tensor)
+ // '_cached_weights is not nullptr and _is_weights_freed is false' means
+ // this weight shape is satisfied with the ruy kernel's prepack cache's condition.
+ // After entering here, it will not enter again except below the case - input is zero-vector
+
+ // if input's elements are filled with zero, it by-passes(does not enter ruy-kernel path)
+ // so that handle this case
+ const int input_size = getTensorShape(_input).FlatSize();
+ if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_input->buffer()), input_size))
+ return;
+
+ auto weight_tensor = nnfw::misc::polymorphic_downcast<const Tensor *>(_weights);
+
+ // This weight tensor could be other ops' const tensor.
+ // Therefore, below reference should be checked like following
+ auto tensor = const_cast<Tensor *>(weight_tensor);
+ if (tensor->buffer() == nullptr) // ref is already 0?
{
- auto tensor = const_cast<Tensor *>(weight_tensor);
+ _is_weights_freed = true;
+ return;
+ }
- tensor->decrease_ref();
- if (tensor->buffer() == nullptr) // ref == 0?
- {
- _is_weights_freed = true;
- }
+ tensor->decrease_ref();
+ if (tensor->buffer() == nullptr) // ref == 0?
+ {
+ _is_weights_freed = true;
}
-#endif // if 0
#endif
}
+void FullyConnectedLayer::fullyConnectedSparseWeight()
+{
+ float output_activation_min = 0, output_activation_max = 0;
+ CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
+
+ nnfw::cker::FullyConnectedParams op_params;
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+ op_params.activation = convertActivationType(_activation);
+
+ int w0_size = getTensorShape(_weights).Dims(0);
+ const uint16_t *w1_segments = _weights->w1_segments();
+ const uint16_t *w1_indices = _weights->w1_indices();
+
+ nnfw::cker::FullyConnectedSparseWeight(
+ op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
+ getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
+ getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
+ getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w0_size, w1_segments,
+ w1_indices);
+}
+
void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
const IPortableTensor *bias, ir::Activation activation,
- IPortableTensor *output)
+ IPortableTensor *output,
+ const std::shared_ptr<ExternalContext> &external_context)
{
_input = input;
_weights = weights;
@@ -143,6 +182,7 @@ void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortabl
_output = output;
_is_hybrid = input->data_type() == OperandType::FLOAT32 &&
weights->data_type() == OperandType::QUANT_INT8_SYMM;
+ _external_context = external_context;
}
void FullyConnectedLayer::run()
@@ -151,6 +191,10 @@ void FullyConnectedLayer::run()
{
fullyConnectedHybrid();
}
+ else if (_weights->is_sparse())
+ {
+ fullyConnectedSparseWeight();
+ }
else if (_input->data_type() == OperandType::FLOAT32)
{
fullyConnectedFloat32();
@@ -167,7 +211,16 @@ void FullyConnectedLayer::run()
void FullyConnectedLayer::prepare()
{
-#ifdef USE_RUY_GEMV
+ if (_bias && _bias->is_constant())
+ {
+ const int bias_size = getTensorShape(_bias).FlatSize();
+ if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
+ {
+ _bias = nullptr;
+ }
+ }
+
+#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(USE_RUY_GEMV)
// TODO This is workaround
// The only fc hybrid will use ruy kernel
if (_input->data_type() != OperandType::FLOAT32 ||