summaryrefslogtreecommitdiff
path: root/runtimes
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes')
-rw-r--r--runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc38
-rw-r--r--runtimes/neurun/src/kernel/cpu/ConcatLayer.cc63
-rw-r--r--runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc146
-rw-r--r--runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc77
-rw-r--r--runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc37
-rw-r--r--runtimes/neurun/src/kernel/cpu/OperationUtils.h47
-rw-r--r--runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc72
7 files changed, 301 insertions, 179 deletions
diff --git a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc
index 2a6a84e10..f434a6dec 100644
--- a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc
@@ -27,14 +27,14 @@ namespace kernel
namespace cpu
{
-#define AVGPOOLING_PARAMETERS \
- uint32_t height = getSizeOfDimension(_inputShape, 1); \
- uint32_t width = getSizeOfDimension(_inputShape, 2); \
- uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
- uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
- \
- uint32_t paddingHeight = (uint32_t)_paddingTop; \
- uint32_t paddingWidth = (uint32_t)_paddingLeft;
+#define AVGPOOLING_PARAMETERS \
+ tflite::PoolParams op_params; \
+ op_params.stride_height = _strideHeight; \
+ op_params.stride_width = _strideWidth; \
+ op_params.filter_height = _kernelHeight; \
+ op_params.filter_width = _kernelWidth; \
+ op_params.padding_values.height = (int8_t)_paddingTop; \
+ op_params.padding_values.width = (int8_t)_paddingLeft;
AvgPoolLayer::AvgPoolLayer()
: _inputData(nullptr), _outputData(nullptr), _inputShape(), _outputShape(), _paddingLeft(0),
@@ -47,31 +47,31 @@ AvgPoolLayer::AvgPoolLayer()
bool AvgPoolLayer::averagePoolFloat32()
{
-
AVGPOOLING_PARAMETERS
float output_activation_min, output_activation_max;
CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
- ::tflite::optimized_ops::AveragePool(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), _strideWidth,
- _strideHeight, paddingWidth, paddingHeight, _kernelWidth, _kernelHeight,
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape));
+ ::tflite::optimized_ops::AveragePool(op_params, convertShapeToTFLiteShape(_inputShape),
+ reinterpret_cast<const float *>(_inputData),
+ convertShapeToTFLiteShape(_outputShape),
+ reinterpret_cast<float *>(_outputData));
return true;
}
bool AvgPoolLayer::averagePoolQuant8()
{
-
AVGPOOLING_PARAMETERS
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
&output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
- ::tflite::optimized_ops::AveragePool(_inputData, convertShapeToDims(_inputShape), _strideWidth,
- _strideHeight, paddingWidth, paddingHeight, _kernelWidth,
- _kernelHeight, output_activation_min, output_activation_max,
- _outputData, convertShapeToDims(_outputShape));
+ ::tflite::optimized_ops::AveragePool(op_params, convertShapeToTFLiteShape(_inputShape),
+ _inputData, convertShapeToTFLiteShape(_outputShape),
+ _outputData);
return true;
}
diff --git a/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc b/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc
index 5fe5e3993..be093b437 100644
--- a/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc
@@ -24,6 +24,7 @@ namespace neurun
{
namespace kernel
{
+
namespace cpu
{
@@ -36,13 +37,21 @@ ConcatLayer::ConcatLayer()
bool ConcatLayer::concatenationFloat32()
{
- int num_inputs = _inputShapes.size();
- std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs);
- std::vector<::tflite::Dims<4>> inputDims(num_inputs);
- for (int i = 0; i < num_inputs; i++)
+ uint32_t num_inputs = _inputShapes.size();
+
+ tflite::ConcatenationParams op_params;
+ op_params.axis = _axis;
+ op_params.inputs_count = num_inputs;
+
+ std::vector<::tflite::RuntimeShape *> inputDimsPtr;
+ std::vector<::tflite::RuntimeShape> inputDims;
+ inputDimsPtr.reserve(num_inputs);
+ inputDims.reserve(num_inputs);
+
+ for (uint32_t i = 0; i < num_inputs; i++)
{
- inputDims[i] = convertShapeToDims(_inputShapes[i]);
- inputDimsPtr[i] = &inputDims[i];
+ inputDims.push_back(convertShapeToTFLiteShape(_inputShapes[i]));
+ inputDimsPtr.push_back(&inputDims[i]);
}
std::vector<const float *> inputFloatPtrs;
@@ -52,24 +61,44 @@ bool ConcatLayer::concatenationFloat32()
inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(ptr));
}
- ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, float>(
- getNumberOfDimensions(_outputShape) - _axis - 1, inputFloatPtrs.data(), inputDimsPtr.data(),
- num_inputs, reinterpret_cast<float *>(_outputData), convertShapeToDims(_outputShape));
+ ::tflite::optimized_ops::Concatenation<float>(
+ op_params, inputDimsPtr.data(), inputFloatPtrs.data(),
+ convertShapeToTFLiteShape(_outputShape), reinterpret_cast<float *>(_outputData));
return true;
}
bool ConcatLayer::concatenationQuant8()
{
int num_inputs = _inputShapes.size();
- std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs);
- std::vector<::tflite::Dims<4>> inputDims(num_inputs);
- for (int i = 0; i < num_inputs; i++)
+
+ std::vector<int32_t> input_zeropoints(num_inputs);
+ std::vector<float> input_scales(num_inputs);
+ for (uint32_t i = 0; i < num_inputs; i++)
{
- inputDims[i] = convertShapeToDims(_inputShapes[i]);
- inputDimsPtr[i] = &inputDims[i];
+ input_zeropoints[i] = _inputShapes[i].offset;
+ input_scales[i] = _inputShapes[i].scale;
}
- ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, uint8_t>(
- getNumberOfDimensions(_outputShape) - _axis - 1, _inputDataPtrs.data(), inputDimsPtr.data(),
- num_inputs, _outputData, convertShapeToDims(_outputShape));
+
+ tflite::ConcatenationParams op_params;
+ op_params.axis = _axis;
+ op_params.inputs_count = num_inputs;
+ op_params.input_zeropoint = input_zeropoints.data();
+ op_params.input_scale = input_scales.data();
+ op_params.output_zeropoint = _outputShape.offset;
+ op_params.output_scale = _outputShape.scale;
+
+ std::vector<::tflite::RuntimeShape *> inputDimsPtr;
+ std::vector<::tflite::RuntimeShape> inputDims;
+ inputDimsPtr.reserve(num_inputs);
+ inputDims.reserve(num_inputs);
+ for (uint32_t i = 0; i < num_inputs; i++)
+ {
+ inputDims.push_back(convertShapeToTFLiteShape(_inputShapes[i]));
+ inputDimsPtr.push_back(&inputDims[i]);
+ }
+
+ ::tflite::optimized_ops::Concatenation<uint8_t>(
+ op_params, inputDimsPtr.data(), _inputDataPtrs.data(),
+ convertShapeToTFLiteShape(_outputShape), _outputData);
return true;
}
diff --git a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc
index 81e88e0f0..c694fa75f 100644
--- a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc
@@ -33,55 +33,51 @@ static constexpr int kStaticBufferSize = 1605632;
static char static_scratch_buffer[kStaticBufferSize];
static std::mutex executionMutex;
-#define ANDROID_NN_CONV_PARAMETERS(Type) \
- uint32_t height = getSizeOfDimension(_inputShape, 1); \
- uint32_t width = getSizeOfDimension(_inputShape, 2); \
- uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \
- uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \
- uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
- uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
- uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \
- \
- uint32_t paddingHeight = (uint32_t)_paddingTop; \
- uint32_t paddingWidth = (uint32_t)_paddingLeft; \
- \
- ::tflite::Dims<4> im2colDim; \
- im2colDim.sizes[3] = (int)getSizeOfDimension(_outputShape, 0); \
- im2colDim.sizes[2] = (int)getSizeOfDimension(_outputShape, 1); \
- im2colDim.sizes[1] = (int)getSizeOfDimension(_outputShape, 2); \
- im2colDim.sizes[0] = (int)inDepth * kernelHeight * kernelWidth; \
- \
- im2colDim.strides[0] = 1; \
- for (int i = 1; i < 4; i++) \
- { \
- im2colDim.strides[i] = im2colDim.strides[i - 1] * im2colDim.sizes[i - 1]; \
- } \
- Type *im2colData = nullptr; \
- uint64_t im2colByteSize = sizeof(Type); \
- std::unique_ptr<Type[]> im2colGuard; \
- for (int i = 0; i < 4; i++) \
- { \
- im2colByteSize *= im2colDim.sizes[i]; \
- } \
- /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \
- if (im2colByteSize >= 0x7fffffff) \
- { \
- std::cout << "Conv size is too large, not enough memory" << std::endl; \
- return false; \
- } \
- if (im2colByteSize <= kStaticBufferSize) \
- { \
- im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \
- } \
- else \
- { \
- im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \
- if (im2colData == nullptr) \
- { \
- std::cout << "Conv size is too large, not enough memory" << std::endl; \
- return false; \
- } \
- im2colGuard.reset(im2colData); \
+#define ANDROID_NN_CONV_PARAMETERS(Type) \
+ uint32_t height = getSizeOfDimension(_inputShape, 1); \
+ uint32_t width = getSizeOfDimension(_inputShape, 2); \
+ uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \
+ uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \
+ uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
+ uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
+ uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \
+ \
+ uint32_t paddingHeight = (uint32_t)_paddingTop; \
+ uint32_t paddingWidth = (uint32_t)_paddingLeft; \
+ \
+ Shape im2colShape; \
+ im2colShape.dimensions.resize(4); \
+ im2colShape.dimensions[0] = getSizeOfDimension(_outputShape, 0); \
+ im2colShape.dimensions[1] = getSizeOfDimension(_outputShape, 1); \
+ im2colShape.dimensions[2] = getSizeOfDimension(_outputShape, 2); \
+ im2colShape.dimensions[3] = inDepth * kernelHeight * kernelWidth; \
+ \
+ Type *im2colData = nullptr; \
+ uint64_t im2colByteSize = sizeof(Type); \
+ std::unique_ptr<Type[]> im2colGuard; \
+ for (int i = 0; i < 4; i++) \
+ { \
+ im2colByteSize *= im2colShape.dimensions[i]; \
+ } \
+ /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \
+ if (im2colByteSize >= 0x7fffffff) \
+ { \
+ std::cout << "Conv size is too large, not enough memory" << std::endl; \
+ return false; \
+ } \
+ if (im2colByteSize <= kStaticBufferSize) \
+ { \
+ im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \
+ } \
+ else \
+ { \
+ im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \
+ if (im2colData == nullptr) \
+ { \
+ std::cout << "Conv size is too large, not enough memory" << std::endl; \
+ return false; \
+ } \
+ im2colGuard.reset(im2colData); \
}
ConvolutionLayer::ConvolutionLayer()
@@ -112,19 +108,32 @@ bool ConvolutionLayer::convFloat32()
float output_activation_min, output_activation_max;
CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
int32_t dilationWidthFactor = 1, dilationHeightFactor = 1;
+
+ ::tflite::ConvParams op_params;
+ op_params.padding_type = ::tflite::PaddingType::kSame;
+ op_params.padding_values.width = paddingWidth;
+ op_params.padding_values.height = paddingHeight;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = dilationWidthFactor;
+ op_params.dilation_height_factor = dilationHeightFactor;
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
::tflite::optimized_ops::Conv(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
- reinterpret_cast<const float *>(_kernelData), convertShapeToDims(_kernelShape),
- reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape), _strideWidth,
- _strideHeight, dilationWidthFactor, dilationHeightFactor, paddingWidth, paddingHeight,
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape), im2colDataToPass, im2colDim);
+ op_params, convertShapeToTFLiteShape(_inputShape),
+ reinterpret_cast<const float *>(_inputData), convertShapeToTFLiteShape(_kernelShape),
+ reinterpret_cast<const float *>(_kernelData), convertShapeToTFLiteShape(_biasShape),
+ reinterpret_cast<const float *>(_biasData), convertShapeToTFLiteShape(_outputShape),
+ reinterpret_cast<float *>(_outputData), convertShapeToTFLiteShape(im2colShape),
+ im2colDataToPass);
return true;
}
bool ConvolutionLayer::convQuant8()
{
ANDROID_NN_CONV_PARAMETERS(uint8_t)
+
int32_t inputOffset = -_inputShape.offset;
int32_t kernelOffset = -_kernelShape.offset;
int32_t outputOffset = _outputShape.offset;
@@ -141,6 +150,24 @@ bool ConvolutionLayer::convQuant8()
}
CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
&output_activation_max);
+ int32_t dilationWidthFactor = 1, dilationHeightFactor = 1;
+
+ ::tflite::ConvParams op_params;
+ op_params.padding_type = ::tflite::PaddingType::kSame;
+ op_params.padding_values.width = paddingWidth;
+ op_params.padding_values.height = paddingHeight;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = dilationWidthFactor;
+ op_params.dilation_height_factor = dilationHeightFactor;
+ op_params.input_offset = inputOffset;
+ op_params.weights_offset = kernelOffset;
+ op_params.output_offset = outputOffset;
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+
static gemmlowp::GemmContext gemm_context;
// Prevent concurrent executions that may access the scratch buffer and
// gemm_context.
@@ -148,11 +175,10 @@ bool ConvolutionLayer::convQuant8()
// Alow gemmlowp automatically decide how many threads to use.
gemm_context.set_max_num_threads(0);
::tflite::optimized_ops::Conv(
- _inputData, convertShapeToDims(_inputShape), inputOffset, _kernelData,
- convertShapeToDims(_kernelShape), kernelOffset, reinterpret_cast<const int32_t *>(_biasData),
- convertShapeToDims(_biasShape), _strideWidth, _strideHeight, paddingWidth, paddingHeight,
- outputOffset, output_multiplier, output_shift, output_activation_min, output_activation_max,
- _outputData, convertShapeToDims(_outputShape), im2colData, im2colDim, &gemm_context);
+ op_params, convertShapeToTFLiteShape(_inputShape), _inputData,
+ convertShapeToTFLiteShape(_kernelShape), _kernelData, convertShapeToTFLiteShape(_biasShape),
+ reinterpret_cast<const int32_t *>(_biasData), convertShapeToTFLiteShape(_outputShape),
+ _outputData, convertShapeToTFLiteShape(im2colShape), im2colData, &gemm_context);
return true;
}
diff --git a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
index 41b9afc0c..abe82db5e 100644
--- a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
@@ -44,64 +44,39 @@ FullyConnectedLayer::FullyConnectedLayer()
static std::mutex executionMutex;
bool FullyConnectedLayer::fullyConnectedFloat32()
{
- float output_activation_min, output_activation_max;
- CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
- // b/80425683, optimized implementation produces incorrect results when the
- // number of input elements is the squre of batch_size.
- uint32_t batch_size = getSizeOfDimension(_outputShape, 0);
- uint32_t input_n_elements = getNumberOfElements(_inputShape);
- if (batch_size * batch_size == input_n_elements)
+ int total_input_size = 1;
+ for (int i = 0; i < _inputShape.dimensions.size(); i++)
{
- ::tflite::reference_ops::FullyConnected(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
- reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape),
- reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape),
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape));
- }
- else
- {
- ::tflite::optimized_ops::FullyConnected(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
- reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape),
- reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape),
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape));
+ total_input_size *= _inputShape.dimensions[i];
}
+
+ int input_size = _weightsShape.dimensions[1];
+ const int batch_size = total_input_size / input_size;
+ const int num_units = _weightsShape.dimensions[0];
+
+ TfLiteFusedActivation act = convertFusedActivation(_activation);
+
+ ::tflite::tensor_utils::VectorBatchVectorAssign(reinterpret_cast<const float *>(_biasData),
+ num_units, batch_size,
+ reinterpret_cast<float *>(_outputData));
+
+ // Compute output += weight * input
+ ::tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+ reinterpret_cast<const float *>(_weightsData), num_units, input_size,
+ reinterpret_cast<const float *>(_inputData), batch_size,
+ reinterpret_cast<float *>(_outputData), /*result_stride=*/1);
+
+ // Apply activation function
+ ::tflite::tensor_utils::ApplyActivationToVector(reinterpret_cast<float *>(_outputData),
+ batch_size * num_units, act,
+ reinterpret_cast<float *>(_outputData));
+
return true;
}
bool FullyConnectedLayer::fullyConnectedQuant8()
{
- int32_t inputOffset = -_inputShape.offset;
- int32_t weightsOffset = -_weightsShape.offset;
- int32_t outputOffset = _outputShape.offset;
- float real_multiplier = 0.0;
- int32_t output_multiplier = 0;
- int32_t output_shift = 0;
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- // Caution : 'Convolution' can make misleading. It seems it is just math term.
- if (!GetQuantizedConvolutionMultipler(_inputShape, _weightsShape, _biasShape, _outputShape,
- &real_multiplier) ||
- !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift))
- {
- return false;
- }
- CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
- &output_activation_max);
- static gemmlowp::GemmContext gemm_context;
- // Prevent concurrent executions that access gemm_context.
- std::unique_lock<std::mutex> lock(executionMutex);
- // Alow gemmlowp automatically decide how many threads to use.
- gemm_context.set_max_num_threads(0);
- ::tflite::optimized_ops::FullyConnected(
- _inputData, convertShapeToDims(_inputShape), inputOffset, _weightsData,
- convertShapeToDims(_weightsShape), weightsOffset,
- reinterpret_cast<const int32_t *>(_biasData), convertShapeToDims(_biasShape), outputOffset,
- output_multiplier, output_shift, output_activation_min, output_activation_max, _outputData,
- convertShapeToDims(_outputShape), &gemm_context);
- return true;
+ throw std::runtime_error{"FullyConnectedLayer : Not tested for TENSOR_QUANT8_ASYMM"};
}
void FullyConnectedLayer::configure(uint8_t *inputData, const Shape inputShape,
diff --git a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc
index 3d96bb401..c4a288b07 100644
--- a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc
@@ -26,14 +26,14 @@ namespace kernel
namespace cpu
{
-#define MAXPOOLING_PARAMETERS \
- uint32_t height = getSizeOfDimension(_inputShape, 1); \
- uint32_t width = getSizeOfDimension(_inputShape, 2); \
- uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
- uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
- \
- uint32_t paddingHeight = (uint32_t)_paddingTop; \
- uint32_t paddingWidth = (uint32_t)_paddingLeft;
+#define MAXPOOLING_PARAMETERS \
+ tflite::PoolParams op_params; \
+ op_params.stride_height = _strideHeight; \
+ op_params.stride_width = _strideWidth; \
+ op_params.filter_height = _kernelHeight; \
+ op_params.filter_width = _kernelWidth; \
+ op_params.padding_values.height = (int8_t)_paddingTop; \
+ op_params.padding_values.width = (int8_t)_paddingLeft;
MaxPoolLayer::MaxPoolLayer()
: _inputData(nullptr), _outputData(nullptr), _inputShape(), _outputShape(), _paddingLeft(0),
@@ -46,31 +46,30 @@ MaxPoolLayer::MaxPoolLayer()
bool MaxPoolLayer::maxPoolFloat32()
{
-
MAXPOOLING_PARAMETERS
float output_activation_min, output_activation_max;
CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
- ::tflite::optimized_ops::MaxPool(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), _strideWidth,
- _strideHeight, paddingWidth, paddingHeight, _kernelWidth, _kernelHeight,
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape));
+ ::tflite::optimized_ops::MaxPool(op_params, convertShapeToTFLiteShape(_inputShape),
+ reinterpret_cast<const float *>(_inputData),
+ convertShapeToTFLiteShape(_outputShape),
+ reinterpret_cast<float *>(_outputData));
return true;
}
bool MaxPoolLayer::maxPoolQuant8()
{
-
MAXPOOLING_PARAMETERS
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
&output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
- ::tflite::optimized_ops::MaxPool(_inputData, convertShapeToDims(_inputShape), _strideWidth,
- _strideHeight, paddingWidth, paddingHeight, _kernelWidth,
- _kernelHeight, output_activation_min, output_activation_max,
- _outputData, convertShapeToDims(_outputShape));
+ ::tflite::optimized_ops::MaxPool(op_params, convertShapeToTFLiteShape(_inputShape), _inputData,
+ convertShapeToTFLiteShape(_outputShape), _outputData);
return true;
}
diff --git a/runtimes/neurun/src/kernel/cpu/OperationUtils.h b/runtimes/neurun/src/kernel/cpu/OperationUtils.h
index 5914d04e3..066b1e573 100644
--- a/runtimes/neurun/src/kernel/cpu/OperationUtils.h
+++ b/runtimes/neurun/src/kernel/cpu/OperationUtils.h
@@ -23,7 +23,9 @@
#include <limits>
#include <vector>
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
#include "tensorflow/contrib/lite/kernels/internal/types.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
#include "graph/operand/Object.h"
#include "graph/operand/DataType.h"
@@ -75,6 +77,51 @@ inline ::tflite::Dims<4> convertShapeToDims(const Shape &shape)
return dims;
}
+inline ::tflite::RuntimeShape convertShapeToTFLiteShape(const Shape &shape)
+{
+ std::vector<int32_t> raw_shape;
+ raw_shape.resize(4);
+
+ for (uint32_t i = 0; i < 4; ++i)
+ {
+ if (i >= shape.dimensions.size())
+ {
+ raw_shape[i] = 1;
+ }
+ else
+ {
+ raw_shape[i] = shape.dimensions[i];
+ }
+ }
+
+ return ::tflite::GetTensorShape(raw_shape);
+}
+
+inline TfLiteFusedActivation convertFusedActivation(FuseCode act)
+{
+ if (act == ANEURALNETWORKS_FUSED_NONE)
+ {
+ return kTfLiteActNone;
+ }
+
+ if (act == ANEURALNETWORKS_FUSED_RELU)
+ {
+ return kTfLiteActRelu;
+ }
+
+ if (act == ANEURALNETWORKS_FUSED_RELU1)
+ {
+ return kTfLiteActRelu1;
+ }
+
+ if (act == ANEURALNETWORKS_FUSED_RELU6)
+ {
+ return kTfLiteActRelu6;
+ }
+
+ return kTfLiteActNone;
+}
+
__wur bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier,
int32_t *right_shift);
diff --git a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc
index 4f5a69f2e..c998c65f6 100644
--- a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc
@@ -33,45 +33,86 @@ SoftMaxLayer::SoftMaxLayer()
// DO NOTHING
}
+// Performs softmax along the input of size (input_size * batch_size).
+void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
+ float *out)
+{
+ TF_LITE_ASSERT(input_size > 0);
+
+ // For each batch
+ for (int b = 0; b < batch_size; b++)
+ {
+ // Find the max coeff.
+ float max_coeff = in[0];
+ for (int i = 1; i < input_size; i++)
+ {
+ if (in[i] > max_coeff)
+ max_coeff = in[i];
+ }
+
+ // Compute the normalized sum of exps.
+ float exp_sum = 0.0;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] = std::exp((in[i] - max_coeff) * beta);
+ exp_sum += out[i];
+ }
+
+ // Divide by the sum of exps.
+ float reciprocal_sum_exp = 1.f / exp_sum;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] *= reciprocal_sum_exp;
+ }
+
+ // Advance in and out pointers for the next batch.
+ in += input_size;
+ out += input_size;
+ }
+}
+
bool SoftMaxLayer::softmaxFloat32()
{
- ::tflite::Dims<4> dim;
+ Shape shapeIn4D;
+
if (getNumberOfDimensions(_inputShape) == 2)
{
uint32_t batch_size = getSizeOfDimension(_inputShape, 0);
uint32_t input_size = getNumberOfElements(_inputShape) / batch_size;
- Shape shapeIn4D;
- shapeIn4D.dimensions = {batch_size, 1, 1, input_size};
- dim = convertShapeToDims(shapeIn4D);
+ Softmax(reinterpret_cast<const float *>(_inputData), input_size, batch_size, _beta,
+ reinterpret_cast<float *>(_outputData));
}
else if (getNumberOfDimensions(_inputShape) == 4)
{
- dim = convertShapeToDims(_inputShape);
+ ::tflite::SoftmaxParams op_params;
+ op_params.beta = _beta;
+ ::tflite::optimized_ops::Softmax(op_params, convertShapeToTFLiteShape(_inputShape),
+ reinterpret_cast<const float *>(_inputData),
+ convertShapeToTFLiteShape(_outputShape),
+ reinterpret_cast<float *>(_outputData));
}
else
{
std::cout << "only 2D and 4D tensors supported" << std::endl;
return false;
}
- ::tflite::optimized_ops::Softmax(reinterpret_cast<const float *>(_inputData), dim, _beta,
- reinterpret_cast<float *>(_outputData), dim);
+
return true;
}
bool SoftMaxLayer::softmaxQuant8()
{
- ::tflite::Dims<4> dim;
+ Shape shapeIn4D = _inputShape;
+
if (getNumberOfDimensions(_inputShape) == 2)
{
uint32_t batch_size = getSizeOfDimension(_inputShape, 0);
uint32_t input_size = getNumberOfElements(_inputShape) / batch_size;
- Shape shapeIn4D;
shapeIn4D.dimensions = {batch_size, 1, 1, input_size};
- dim = convertShapeToDims(shapeIn4D);
}
else if (getNumberOfDimensions(_inputShape) == 4)
{
- dim = convertShapeToDims(_inputShape);
+ shapeIn4D = _inputShape;
}
else
{
@@ -94,8 +135,13 @@ bool SoftMaxLayer::softmaxQuant8()
return false;
}
float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift);
- ::tflite::optimized_ops::Softmax(_inputData, dim, input_multiplier, input_left_shift, diff_min,
- _outputData, dim);
+
+ ::tflite::SoftmaxParams op_params;
+ op_params.input_multiplier = input_multiplier;
+ op_params.input_left_shift = input_left_shift;
+ op_params.diff_min = diff_min;
+ ::tflite::optimized_ops::Softmax(op_params, convertShapeToTFLiteShape(shapeIn4D), _inputData,
+ convertShapeToTFLiteShape(shapeIn4D), _outputData);
return true;
}