Assembly kernel glue. More...

#include <AssemblyHelper.h>

Collaboration diagram for AssemblyKernelGlue< TypeInput, TypeOutput >:

Public Types
using	TypeOperator = TypeInput
	Operator type. More...

using	TypeResult = TypeOutput
	Result type. More...

using	AssemblyGemm = arm_gemm::GemmCommon< TypeInput, TypeOutput >
	Assembly Gemm. More...

Public Member Functions
	AssemblyKernelGlue ()
	Default constructor. More...

const AssemblyKernelGlue< TypeInput, TypeOutput > &	operator= (const AssemblyKernelGlue< TypeInput, TypeOutput > &)=delete
	Prevent instances of this class from being copy constructed. More...

	AssemblyKernelGlue (const AssemblyKernelGlue< TypeInput, TypeOutput > &)=delete
	Prevent instances of this class from being copied. More...

void	run ()
	Configures the arrays pointers and strides in the assembly kernel and executes the assembly kernel. More...

Data Fields
std::unique_ptr< AssemblyGemm >	_gemm_kernel_asm
	Assembly Gemm kernel. More...

std::unique_ptr< INEKernel >	_optimised_kernel
	Optimised NEON kernel. More...

const ITensor *	_a
	Input A. More...

const ITensor *	_b
	Input B. More...

ITensor *	_d
	Output. More...

ITensor *	_pretranspose
	Pre-transpose tensor. More...

Detailed Description

template<typename TypeInput, typename TypeOutput>
class arm_compute::AssemblyKernelGlue< TypeInput, TypeOutput >

Assembly kernel glue.

Definition at line 45 of file AssemblyHelper.h.

Member Typedef Documentation

using AssemblyGemm = arm_gemm::GemmCommon<TypeInput, TypeOutput>

Assembly Gemm.

Definition at line 58 of file AssemblyHelper.h.

using TypeOperator = TypeInput

Operator type.

Definition at line 49 of file AssemblyHelper.h.

using TypeResult = TypeOutput

Result type.

Definition at line 51 of file AssemblyHelper.h.

Constructor & Destructor Documentation

AssemblyKernelGlue ( )

inline

Default constructor.

Definition at line 53 of file AssemblyHelper.h.

         : _gemm_kernel_asm(nullptr), _optimised_kernel(nullptr), _a(nullptr), _b(nullptr), _d(nullptr), _pretranspose(nullptr)
     {
     }

AssemblyKernelGlue ( const AssemblyKernelGlue< TypeInput, TypeOutput > & )

delete

Prevent instances of this class from being copied.

Member Function Documentation

const AssemblyKernelGlue<TypeInput, TypeOutput>& operator= ( const AssemblyKernelGlue< TypeInput, TypeOutput > & )

delete

Prevent instances of this class from being copy constructed.

void run ( )

inline

Configures the arrays pointers and strides in the assembly kernel and executes the assembly kernel.

The call to set_arrays is needed to deal with the input sizes containing batches (dims > 2)

Definition at line 81 of file AssemblyHelper.h.

     {
         const int lda = _a->info()->strides_in_bytes().y() / sizeof(TypeInput);
         const int ldb = _b->info()->strides_in_bytes().y() / sizeof(TypeInput);
         const int ldd = _d->info()->strides_in_bytes().y() / sizeof(TypeOutput);
 
         // In the case of NHWC we want to interpret the output shape as 3D. Thus, the batch stride for A is
         // the relevant multiple of the row stride.
         const bool is_nhwc           = _a->info()->data_layout() == DataLayout::NHWC;
         const int  stride_in_bytes_a = is_nhwc ? _a->info()->strides_in_bytes().y() * _d->info()->dimension(1) : _a->info()->strides_in_bytes().z();
 
         const int batch_stride_a = stride_in_bytes_a / sizeof(TypeInput);
         const int batch_stride_d = _d->info()->strides_in_bytes().z() / sizeof(TypeOutput);
 
         const int multi_stride_a = _a->info()->strides_in_bytes()[3] / sizeof(TypeInput);
         const int multi_stride_b = _b->info()->strides_in_bytes().z() / sizeof(TypeInput);
         const int multi_stride_d = _d->info()->strides_in_bytes()[3] / sizeof(TypeOutput);
 
         const auto in0_ptr = reinterpret_cast<const TypeInput *>(_a->buffer());
         const auto in1_ptr = reinterpret_cast<const TypeInput *>(_b->buffer());
         auto       out_ptr = reinterpret_cast<TypeOutput *>(_d->buffer());
 
         _gemm_kernel_asm->set_arrays(in0_ptr, lda, batch_stride_a, multi_stride_a, in1_ptr, ldb, multi_stride_b, out_ptr, ldd, batch_stride_d, multi_stride_d);
         if(_gemm_kernel_asm->B_pretranspose_required())
         {
             // Forcing 128-byte alignment (required by 32-bit kernels)
             const unsigned int alignment   = 128;
             void              *raw_ptr     = reinterpret_cast<void *>(_pretranspose->buffer());
             size_t             space       = _pretranspose->info()->total_size();
             void              *aligned_ptr = support::cpp11::align(alignment, _gemm_kernel_asm->get_B_pretransposed_array_size(), raw_ptr, space);
             ARM_COMPUTE_ERROR_ON(_pretranspose == nullptr || _pretranspose->buffer() == nullptr);
             _gemm_kernel_asm->pretranspose_B_array(aligned_ptr, in1_ptr, ldb, multi_stride_b);
             _b->mark_as_unused();
         }
 
         NEScheduler::get().schedule(_optimised_kernel.get(), Window::DimX);
     }

Field Documentation

const ITensor* _a

Input A.

Definition at line 70 of file AssemblyHelper.h.

const ITensor* _b

Input B.

Definition at line 72 of file AssemblyHelper.h.

ITensor* _d

Output.

Definition at line 74 of file AssemblyHelper.h.

std::unique_ptr<AssemblyGemm> _gemm_kernel_asm

Assembly Gemm kernel.

Definition at line 66 of file AssemblyHelper.h.

std::unique_ptr<INEKernel> _optimised_kernel

Optimised NEON kernel.

Definition at line 68 of file AssemblyHelper.h.

ITensor* _pretranspose

Pre-transpose tensor.

Definition at line 76 of file AssemblyHelper.h.

The documentation for this class was generated from the following file:

arm_compute/runtime/NEON/AssemblyHelper.h

Public Types

Public Member Functions