summaryrefslogtreecommitdiff
path: root/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc')
-rw-r--r--runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc156
1 files changed, 92 insertions, 64 deletions
diff --git a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
index 05da33abf..32cad84cb 100644
--- a/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
+++ b/runtime/onert/backend/cpu/ops/FullyConnectedLayer.cc
@@ -31,9 +31,9 @@ namespace ops
{
FullyConnectedLayer::FullyConnectedLayer()
- : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
- _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
- _external_context(nullptr), _is_hybrid(false)
+ : _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr),
+ _activation(ir::Activation::NONE), _temp_arena(new nnfw::cker::FCTempArena()),
+ _external_context(nullptr), _is_hybrid(false), _is_shuffled16x1float32(false)
{
// DO NOTHING
}
@@ -42,19 +42,22 @@ FullyConnectedLayer::~FullyConnectedLayer() = default;
void FullyConnectedLayer::fullyConnectedFloat32()
{
- float output_activation_min = 0, output_activation_max = 0;
+ nnfw::cker::FullyConnectedParams op_params;
+ float output_activation_min = 0;
+ float output_activation_max = 0;
CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
- nnfw::cker::FullyConnectedParams op_params;
+ op_params.activation = convertActivationType(_activation);
op_params.float_activation_min = output_activation_min;
op_params.float_activation_max = output_activation_max;
- op_params.activation = convertActivationType(_activation);
-
- nnfw::cker::FullyConnected(
- op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
- getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()));
+ // TODO Set both cachables as false when training
+ op_params.lhs_cacheable = _weights->is_constant();
+ op_params.rhs_cacheable = _input->is_constant();
+
+ nnfw::cker::FullyConnected(op_params, getShape(_input), getBuffer<float>(_input),
+ getShape(_weights), getBuffer<float>(_weights), getShape(_bias),
+ _bias ? getBuffer<float>(_bias) : nullptr, getShape(_output),
+ getBuffer<float>(_output));
}
// executionMutex is used to protect concurrent access of non-threadsafe resources
@@ -68,23 +71,22 @@ void FullyConnectedLayer::fullyConnectedQuant8()
int32_t output_activation_max = 0;
GetQuantizedConvolutionMultiplier(_input, _weights, _bias, _output, &real_multiplier);
QuantizeMultiplier(real_multiplier, &output_multiplier, &output_shift);
- CalculateActivationRangeUint8(_activation, _output, &output_activation_min,
- &output_activation_max);
+ CalculateActivationRangeQuantized(_activation, _output, &output_activation_min,
+ &output_activation_max);
nnfw::cker::FullyConnectedParams op_params;
- op_params.input_offset = -_input->data_offset();
- op_params.weights_offset = -_weights->data_offset();
- op_params.output_offset = _output->data_offset();
+ op_params.input_offset = -_input->data_zero_point();
+ op_params.weights_offset = -_weights->data_zero_point();
+ op_params.output_offset = _output->data_zero_point();
op_params.output_multiplier = output_multiplier;
op_params.output_shift = output_shift;
op_params.quantized_activation_min = output_activation_min;
op_params.quantized_activation_max = output_activation_max;
- nnfw::cker::FullyConnected(
- op_params, getTensorShape(_input), reinterpret_cast<const uint8_t *>(_input->buffer()),
- getTensorShape(_weights), reinterpret_cast<const uint8_t *>(_weights->buffer()),
- getTensorShape(_bias), reinterpret_cast<const int32_t *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<uint8_t *>(_output->buffer()));
+ nnfw::cker::FullyConnected(op_params, getShape(_input), getBuffer<uint8_t>(_input),
+ getShape(_weights), getBuffer<uint8_t>(_weights), getShape(_bias),
+ _bias ? getBuffer<int32_t>(_bias) : nullptr, getShape(_output),
+ getBuffer<uint8_t>(_output));
}
void FullyConnectedLayer::fullyConnectedHybrid()
@@ -92,7 +94,7 @@ void FullyConnectedLayer::fullyConnectedHybrid()
nnfw::cker::FCTempArena &temp_arena = *_temp_arena;
if (!temp_arena.prepared)
{
- temp_arena.prepare(getTensorShape(_input), getTensorShape(_weights));
+ temp_arena.prepare(getShape(_input), getShape(_weights));
}
nnfw::cker::FullyConnectedParams op_params;
@@ -101,20 +103,16 @@ void FullyConnectedLayer::fullyConnectedHybrid()
#ifndef USE_RUY_GEMV
nnfw::cker::FullyConnectedHybrid(
- op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_weights), reinterpret_cast<const int8_t *>(_weights->buffer()),
- getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
- _external_context->ruy_context());
+ op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
+ getBuffer<int8_t>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
+ getShape(_output), getBuffer<float>(_output), temp_arena, _external_context->ruy_context());
#else
nnfw::cker::FullyConnectedHybrid(
- op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_weights),
- (_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
- : reinterpret_cast<const int8_t *>(_weights->buffer()),
- getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), temp_arena,
- _external_context->ruy_context());
+ op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
+ (_cached_weights) ? reinterpret_cast<const int8_t *>(_cached_weights)
+ : getBuffer<int8_t>(_weights),
+ getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr, getShape(_output),
+ getBuffer<float>(_output), temp_arena, _external_context->ruy_context());
if (_cached_weights == nullptr || _is_weights_freed)
return;
@@ -125,8 +123,8 @@ void FullyConnectedLayer::fullyConnectedHybrid()
// if input's elements are filled with zero, it by-passes(does not enter ruy-kernel path)
// so that handle this case
- const int input_size = getTensorShape(_input).FlatSize();
- if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_input->buffer()), input_size))
+ const int input_size = getShape(_input).FlatSize();
+ if (nnfw::cker::IsZeroVector(getBuffer<float>(_input), input_size))
return;
auto weight_tensor = nnfw::misc::polymorphic_downcast<const Tensor *>(_weights);
@@ -143,6 +141,10 @@ void FullyConnectedLayer::fullyConnectedHybrid()
tensor->decrease_ref();
if (tensor->buffer() == nullptr) // ref == 0?
{
+#if defined(__ANDROID__) && (__ANDROID_API__ >= 26)
+ // NOTE This line forces OS to release any unused memory immediately
+ mallopt(M_PURGE, 0);
+#endif
_is_weights_freed = true;
}
#endif
@@ -150,28 +152,52 @@ void FullyConnectedLayer::fullyConnectedHybrid()
void FullyConnectedLayer::fullyConnectedSparseWeight()
{
+ nnfw::cker::FullyConnectedParams op_params;
+ op_params.activation = convertActivationType(_activation);
+
+ const uint16_t *w1_segments = _weights->sparsity()->w1_segments();
+ const uint16_t *w1_indices = _weights->sparsity()->w1_indices();
+
+ auto block_size = _weights->sparsity()->block_size();
+ if (block_size.size() == 0)
+ {
+ nnfw::cker::FullyConnectedSparseWeightRandom(
+ op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
+ getBuffer<float>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
+ getShape(_output), getBuffer<float>(_output), w1_segments, w1_indices);
+ }
+ else if (block_size.size() == 2 && block_size[0] == 16 && block_size[1] == 1)
+ {
+ nnfw::cker::FullyConnectedSparseWeight16x1(
+ op_params, getShape(_input), getBuffer<float>(_input), getShape(_weights),
+ getBuffer<float>(_weights), getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
+ getShape(_output), getBuffer<float>(_output), w1_segments, w1_indices);
+ }
+ else
+ throw std::runtime_error{"FullyConnected: unsupported sparsity"};
+}
+
+void FullyConnectedLayer::fullyConnected16x1Float32()
+{
+#if defined(__aarch64__) && defined(USE_NEON)
float output_activation_min = 0, output_activation_max = 0;
CalculateActivationRange(_activation, &output_activation_min, &output_activation_max);
nnfw::cker::FullyConnectedParams op_params;
- op_params.float_activation_min = output_activation_min;
- op_params.float_activation_max = output_activation_max;
op_params.activation = convertActivationType(_activation);
- int w0_size = getTensorShape(_weights).Dims(0);
- const uint16_t *w1_segments = _weights->w1_segments();
- const uint16_t *w1_indices = _weights->w1_indices();
-
- nnfw::cker::FullyConnectedSparseWeight(
- op_params, getTensorShape(_input), reinterpret_cast<const float *>(_input->buffer()),
- getTensorShape(_weights), reinterpret_cast<const float *>(_weights->buffer()),
- getTensorShape(_bias), reinterpret_cast<const float *>(_bias ? _bias->buffer() : nullptr),
- getTensorShape(_output), reinterpret_cast<float *>(_output->buffer()), w0_size, w1_segments,
- w1_indices);
+ nnfw::cker::FullyConnected16x1Float32(op_params, getShape(_input), getBuffer<float>(_input),
+ getShape(_weights), getBuffer<float>(_weights),
+ getShape(_bias), _bias ? getBuffer<float>(_bias) : nullptr,
+ getShape(_output), getBuffer<float>(_output));
+#else
+ throw std::runtime_error{"FullyConnected: Shuffled16x1Float32 weights_format is not supported."};
+#endif
}
void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortableTensor *weights,
const IPortableTensor *bias, ir::Activation activation,
+ ir::FullyConnectedWeightsFormat weights_format,
IPortableTensor *output,
const std::shared_ptr<ExternalContext> &external_context)
{
@@ -182,6 +208,14 @@ void FullyConnectedLayer::configure(const IPortableTensor *input, const IPortabl
_output = output;
_is_hybrid = input->data_type() == OperandType::FLOAT32 &&
weights->data_type() == OperandType::QUANT_INT8_SYMM;
+ _is_shuffled16x1float32 = weights_format == ir::FullyConnectedWeightsFormat::Shuffled16x1Float32;
+#if !defined(__aarch64__) || !defined(USE_NEON)
+ if (_is_shuffled16x1float32)
+ {
+ throw std::runtime_error{
+ "FullyConnected: Shuffled16x1Float32 weights_format is not supported."};
+ }
+#endif
_external_context = external_context;
}
@@ -191,13 +225,13 @@ void FullyConnectedLayer::run()
{
fullyConnectedHybrid();
}
- else if (_weights->is_sparse())
+ else if (_weights->sparsity())
{
fullyConnectedSparseWeight();
}
else if (_input->data_type() == OperandType::FLOAT32)
{
- fullyConnectedFloat32();
+ _is_shuffled16x1float32 ? fullyConnected16x1Float32() : fullyConnectedFloat32();
}
else if (_input->data_type() == OperandType::QUANT_UINT8_ASYMM)
{
@@ -213,8 +247,8 @@ void FullyConnectedLayer::prepare()
{
if (_bias && _bias->is_constant())
{
- const int bias_size = getTensorShape(_bias).FlatSize();
- if (nnfw::cker::IsZeroVector(reinterpret_cast<float *>(_bias->buffer()), bias_size))
+ const int bias_size = getShape(_bias).FlatSize();
+ if (nnfw::cker::IsZeroVector(getBuffer<float>(_bias), bias_size))
{
_bias = nullptr;
}
@@ -236,20 +270,14 @@ void FullyConnectedLayer::prepare()
if (_input->is_dynamic() || !_weights->is_constant())
return;
- const int rows = getTensorShape(_weights).Dims(0);
+ const int rows = getShape(_weights).Dims(0);
if (rows % 4 == 0)
{
- const int total_input_size = getTensorShape(_input).FlatSize();
- const int input_size = getTensorShape(_weights).Dims(1);
- const int batch_size = total_input_size / input_size;
- if (batch_size <= 4)
- {
- // TODO If it's possible to extract precaching from ruy kernel,
- // place this instead of below code
+ // TODO If it's possible to extract precaching from ruy kernel,
+ // place this instead of below code
- // buffer will be used by ruy kernel as a cache key
- _cached_weights = _weights->buffer();
- }
+ // buffer will be used by ruy kernel as a cache key
+ _cached_weights = _weights->buffer();
}
#endif
}