diff options
Diffstat (limited to 'runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc')
-rw-r--r-- | runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc | 146 |
1 files changed, 86 insertions, 60 deletions
diff --git a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc index 81e88e0f0..c694fa75f 100644 --- a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc +++ b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc @@ -33,55 +33,51 @@ static constexpr int kStaticBufferSize = 1605632; static char static_scratch_buffer[kStaticBufferSize]; static std::mutex executionMutex; -#define ANDROID_NN_CONV_PARAMETERS(Type) \ - uint32_t height = getSizeOfDimension(_inputShape, 1); \ - uint32_t width = getSizeOfDimension(_inputShape, 2); \ - uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \ - uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \ - uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \ - uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \ - uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \ - \ - uint32_t paddingHeight = (uint32_t)_paddingTop; \ - uint32_t paddingWidth = (uint32_t)_paddingLeft; \ - \ - ::tflite::Dims<4> im2colDim; \ - im2colDim.sizes[3] = (int)getSizeOfDimension(_outputShape, 0); \ - im2colDim.sizes[2] = (int)getSizeOfDimension(_outputShape, 1); \ - im2colDim.sizes[1] = (int)getSizeOfDimension(_outputShape, 2); \ - im2colDim.sizes[0] = (int)inDepth * kernelHeight * kernelWidth; \ - \ - im2colDim.strides[0] = 1; \ - for (int i = 1; i < 4; i++) \ - { \ - im2colDim.strides[i] = im2colDim.strides[i - 1] * im2colDim.sizes[i - 1]; \ - } \ - Type *im2colData = nullptr; \ - uint64_t im2colByteSize = sizeof(Type); \ - std::unique_ptr<Type[]> im2colGuard; \ - for (int i = 0; i < 4; i++) \ - { \ - im2colByteSize *= im2colDim.sizes[i]; \ - } \ - /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \ - if (im2colByteSize >= 0x7fffffff) \ - { \ - std::cout << "Conv size is too large, not enough memory" << std::endl; \ - return false; \ - } \ - if (im2colByteSize <= kStaticBufferSize) \ - { \ - im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \ - } \ - else \ - { \ - im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \ - if (im2colData == nullptr) \ - { \ - std::cout << "Conv size is too large, not enough memory" << std::endl; \ - return false; \ - } \ - im2colGuard.reset(im2colData); \ +#define ANDROID_NN_CONV_PARAMETERS(Type) \ + uint32_t height = getSizeOfDimension(_inputShape, 1); \ + uint32_t width = getSizeOfDimension(_inputShape, 2); \ + uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \ + uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \ + uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \ + uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \ + uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \ + \ + uint32_t paddingHeight = (uint32_t)_paddingTop; \ + uint32_t paddingWidth = (uint32_t)_paddingLeft; \ + \ + Shape im2colShape; \ + im2colShape.dimensions.resize(4); \ + im2colShape.dimensions[0] = getSizeOfDimension(_outputShape, 0); \ + im2colShape.dimensions[1] = getSizeOfDimension(_outputShape, 1); \ + im2colShape.dimensions[2] = getSizeOfDimension(_outputShape, 2); \ + im2colShape.dimensions[3] = inDepth * kernelHeight * kernelWidth; \ + \ + Type *im2colData = nullptr; \ + uint64_t im2colByteSize = sizeof(Type); \ + std::unique_ptr<Type[]> im2colGuard; \ + for (int i = 0; i < 4; i++) \ + { \ + im2colByteSize *= im2colShape.dimensions[i]; \ + } \ + /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \ + if (im2colByteSize >= 0x7fffffff) \ + { \ + std::cout << "Conv size is too large, not enough memory" << std::endl; \ + return false; \ + } \ + if (im2colByteSize <= kStaticBufferSize) \ + { \ + im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \ + } \ + else \ + { \ + im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \ + if (im2colData == nullptr) \ + { \ + std::cout << "Conv size is too large, not enough memory" << std::endl; \ + return false; \ + } \ + im2colGuard.reset(im2colData); \ } ConvolutionLayer::ConvolutionLayer() @@ -112,19 +108,32 @@ bool ConvolutionLayer::convFloat32() float output_activation_min, output_activation_max; CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max); int32_t dilationWidthFactor = 1, dilationHeightFactor = 1; + + ::tflite::ConvParams op_params; + op_params.padding_type = ::tflite::PaddingType::kSame; + op_params.padding_values.width = paddingWidth; + op_params.padding_values.height = paddingHeight; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = dilationWidthFactor; + op_params.dilation_height_factor = dilationHeightFactor; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + ::tflite::optimized_ops::Conv( - reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), - reinterpret_cast<const float *>(_kernelData), convertShapeToDims(_kernelShape), - reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape), _strideWidth, - _strideHeight, dilationWidthFactor, dilationHeightFactor, paddingWidth, paddingHeight, - output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData), - convertShapeToDims(_outputShape), im2colDataToPass, im2colDim); + op_params, convertShapeToTFLiteShape(_inputShape), + reinterpret_cast<const float *>(_inputData), convertShapeToTFLiteShape(_kernelShape), + reinterpret_cast<const float *>(_kernelData), convertShapeToTFLiteShape(_biasShape), + reinterpret_cast<const float *>(_biasData), convertShapeToTFLiteShape(_outputShape), + reinterpret_cast<float *>(_outputData), convertShapeToTFLiteShape(im2colShape), + im2colDataToPass); return true; } bool ConvolutionLayer::convQuant8() { ANDROID_NN_CONV_PARAMETERS(uint8_t) + int32_t inputOffset = -_inputShape.offset; int32_t kernelOffset = -_kernelShape.offset; int32_t outputOffset = _outputShape.offset; @@ -141,6 +150,24 @@ bool ConvolutionLayer::convQuant8() } CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min, &output_activation_max); + int32_t dilationWidthFactor = 1, dilationHeightFactor = 1; + + ::tflite::ConvParams op_params; + op_params.padding_type = ::tflite::PaddingType::kSame; + op_params.padding_values.width = paddingWidth; + op_params.padding_values.height = paddingHeight; + op_params.stride_width = _strideWidth; + op_params.stride_height = _strideHeight; + op_params.dilation_width_factor = dilationWidthFactor; + op_params.dilation_height_factor = dilationHeightFactor; + op_params.input_offset = inputOffset; + op_params.weights_offset = kernelOffset; + op_params.output_offset = outputOffset; + op_params.output_multiplier = output_multiplier; + op_params.output_shift = output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + static gemmlowp::GemmContext gemm_context; // Prevent concurrent executions that may access the scratch buffer and // gemm_context. @@ -148,11 +175,10 @@ bool ConvolutionLayer::convQuant8() // Alow gemmlowp automatically decide how many threads to use. gemm_context.set_max_num_threads(0); ::tflite::optimized_ops::Conv( - _inputData, convertShapeToDims(_inputShape), inputOffset, _kernelData, - convertShapeToDims(_kernelShape), kernelOffset, reinterpret_cast<const int32_t *>(_biasData), - convertShapeToDims(_biasShape), _strideWidth, _strideHeight, paddingWidth, paddingHeight, - outputOffset, output_multiplier, output_shift, output_activation_min, output_activation_max, - _outputData, convertShapeToDims(_outputShape), im2colData, im2colDim, &gemm_context); + op_params, convertShapeToTFLiteShape(_inputShape), _inputData, + convertShapeToTFLiteShape(_kernelShape), _kernelData, convertShapeToTFLiteShape(_biasShape), + reinterpret_cast<const int32_t *>(_biasData), convertShapeToTFLiteShape(_outputShape), + _outputData, convertShapeToTFLiteShape(im2colShape), im2colData, &gemm_context); return true; } |