summaryrefslogtreecommitdiff
path: root/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc')
-rw-r--r--runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc146
1 files changed, 86 insertions, 60 deletions
diff --git a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc
index 81e88e0f0..c694fa75f 100644
--- a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc
@@ -33,55 +33,51 @@ static constexpr int kStaticBufferSize = 1605632;
static char static_scratch_buffer[kStaticBufferSize];
static std::mutex executionMutex;
-#define ANDROID_NN_CONV_PARAMETERS(Type) \
- uint32_t height = getSizeOfDimension(_inputShape, 1); \
- uint32_t width = getSizeOfDimension(_inputShape, 2); \
- uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \
- uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \
- uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
- uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
- uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \
- \
- uint32_t paddingHeight = (uint32_t)_paddingTop; \
- uint32_t paddingWidth = (uint32_t)_paddingLeft; \
- \
- ::tflite::Dims<4> im2colDim; \
- im2colDim.sizes[3] = (int)getSizeOfDimension(_outputShape, 0); \
- im2colDim.sizes[2] = (int)getSizeOfDimension(_outputShape, 1); \
- im2colDim.sizes[1] = (int)getSizeOfDimension(_outputShape, 2); \
- im2colDim.sizes[0] = (int)inDepth * kernelHeight * kernelWidth; \
- \
- im2colDim.strides[0] = 1; \
- for (int i = 1; i < 4; i++) \
- { \
- im2colDim.strides[i] = im2colDim.strides[i - 1] * im2colDim.sizes[i - 1]; \
- } \
- Type *im2colData = nullptr; \
- uint64_t im2colByteSize = sizeof(Type); \
- std::unique_ptr<Type[]> im2colGuard; \
- for (int i = 0; i < 4; i++) \
- { \
- im2colByteSize *= im2colDim.sizes[i]; \
- } \
- /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \
- if (im2colByteSize >= 0x7fffffff) \
- { \
- std::cout << "Conv size is too large, not enough memory" << std::endl; \
- return false; \
- } \
- if (im2colByteSize <= kStaticBufferSize) \
- { \
- im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \
- } \
- else \
- { \
- im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \
- if (im2colData == nullptr) \
- { \
- std::cout << "Conv size is too large, not enough memory" << std::endl; \
- return false; \
- } \
- im2colGuard.reset(im2colData); \
+#define ANDROID_NN_CONV_PARAMETERS(Type) \
+ uint32_t height = getSizeOfDimension(_inputShape, 1); \
+ uint32_t width = getSizeOfDimension(_inputShape, 2); \
+ uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \
+ uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \
+ uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
+ uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
+ uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \
+ \
+ uint32_t paddingHeight = (uint32_t)_paddingTop; \
+ uint32_t paddingWidth = (uint32_t)_paddingLeft; \
+ \
+ Shape im2colShape; \
+ im2colShape.dimensions.resize(4); \
+ im2colShape.dimensions[0] = getSizeOfDimension(_outputShape, 0); \
+ im2colShape.dimensions[1] = getSizeOfDimension(_outputShape, 1); \
+ im2colShape.dimensions[2] = getSizeOfDimension(_outputShape, 2); \
+ im2colShape.dimensions[3] = inDepth * kernelHeight * kernelWidth; \
+ \
+ Type *im2colData = nullptr; \
+ uint64_t im2colByteSize = sizeof(Type); \
+ std::unique_ptr<Type[]> im2colGuard; \
+ for (int i = 0; i < 4; i++) \
+ { \
+ im2colByteSize *= im2colShape.dimensions[i]; \
+ } \
+ /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \
+ if (im2colByteSize >= 0x7fffffff) \
+ { \
+ std::cout << "Conv size is too large, not enough memory" << std::endl; \
+ return false; \
+ } \
+ if (im2colByteSize <= kStaticBufferSize) \
+ { \
+ im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \
+ } \
+ else \
+ { \
+ im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \
+ if (im2colData == nullptr) \
+ { \
+ std::cout << "Conv size is too large, not enough memory" << std::endl; \
+ return false; \
+ } \
+ im2colGuard.reset(im2colData); \
}
ConvolutionLayer::ConvolutionLayer()
@@ -112,19 +108,32 @@ bool ConvolutionLayer::convFloat32()
float output_activation_min, output_activation_max;
CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
int32_t dilationWidthFactor = 1, dilationHeightFactor = 1;
+
+ ::tflite::ConvParams op_params;
+ op_params.padding_type = ::tflite::PaddingType::kSame;
+ op_params.padding_values.width = paddingWidth;
+ op_params.padding_values.height = paddingHeight;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = dilationWidthFactor;
+ op_params.dilation_height_factor = dilationHeightFactor;
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
::tflite::optimized_ops::Conv(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
- reinterpret_cast<const float *>(_kernelData), convertShapeToDims(_kernelShape),
- reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape), _strideWidth,
- _strideHeight, dilationWidthFactor, dilationHeightFactor, paddingWidth, paddingHeight,
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape), im2colDataToPass, im2colDim);
+ op_params, convertShapeToTFLiteShape(_inputShape),
+ reinterpret_cast<const float *>(_inputData), convertShapeToTFLiteShape(_kernelShape),
+ reinterpret_cast<const float *>(_kernelData), convertShapeToTFLiteShape(_biasShape),
+ reinterpret_cast<const float *>(_biasData), convertShapeToTFLiteShape(_outputShape),
+ reinterpret_cast<float *>(_outputData), convertShapeToTFLiteShape(im2colShape),
+ im2colDataToPass);
return true;
}
bool ConvolutionLayer::convQuant8()
{
ANDROID_NN_CONV_PARAMETERS(uint8_t)
+
int32_t inputOffset = -_inputShape.offset;
int32_t kernelOffset = -_kernelShape.offset;
int32_t outputOffset = _outputShape.offset;
@@ -141,6 +150,24 @@ bool ConvolutionLayer::convQuant8()
}
CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
&output_activation_max);
+ int32_t dilationWidthFactor = 1, dilationHeightFactor = 1;
+
+ ::tflite::ConvParams op_params;
+ op_params.padding_type = ::tflite::PaddingType::kSame;
+ op_params.padding_values.width = paddingWidth;
+ op_params.padding_values.height = paddingHeight;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = dilationWidthFactor;
+ op_params.dilation_height_factor = dilationHeightFactor;
+ op_params.input_offset = inputOffset;
+ op_params.weights_offset = kernelOffset;
+ op_params.output_offset = outputOffset;
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+
static gemmlowp::GemmContext gemm_context;
// Prevent concurrent executions that may access the scratch buffer and
// gemm_context.
@@ -148,11 +175,10 @@ bool ConvolutionLayer::convQuant8()
// Alow gemmlowp automatically decide how many threads to use.
gemm_context.set_max_num_threads(0);
::tflite::optimized_ops::Conv(
- _inputData, convertShapeToDims(_inputShape), inputOffset, _kernelData,
- convertShapeToDims(_kernelShape), kernelOffset, reinterpret_cast<const int32_t *>(_biasData),
- convertShapeToDims(_biasShape), _strideWidth, _strideHeight, paddingWidth, paddingHeight,
- outputOffset, output_multiplier, output_shift, output_activation_min, output_activation_max,
- _outputData, convertShapeToDims(_outputShape), im2colData, im2colDim, &gemm_context);
+ op_params, convertShapeToTFLiteShape(_inputShape), _inputData,
+ convertShapeToTFLiteShape(_kernelShape), _kernelData, convertShapeToTFLiteShape(_biasShape),
+ reinterpret_cast<const int32_t *>(_biasData), convertShapeToTFLiteShape(_outputShape),
+ _outputData, convertShapeToTFLiteShape(im2colShape), im2colData, &gemm_context);
return true;
}