Compute Library
18.05
|
#include "helpers.h"
Go to the source code of this file.
Macros | |
#define | MAX_OP(x, y, type, size) max((x), (y)) |
#define | ADD_OP(x, y, type, size) ((x) + (y)) |
#define | SUB_OP(x, y, type, size) ((x) - (y)) |
#define | MUL_OP(x, y, type, size) ((x) * (y)) |
#define | DIV_OP(x, y, type, size) ((x) / (y)) |
#define | EXP_OP(x, type, size) exp((x)) |
#define | MINVAL -FLT_MAX |
#define | SELECT_DATA_TYPE int |
#define | GRID_SIZE 1 |
#define | VECTOR_SIZE 16 |
#define | LOG_VECTOR_SIZE 4 |
Functions | |
__kernel void | softmax_layer_norm (__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_stride_z, uint src_step_z, uint src_offset_first_element_in_bytes, __global uchar *sum_ptr, uint sum_stride_x, uint sum_step_x, uint sum_stride_y, uint sum_step_y, uint sum_stride_z, uint sum_step_z, uint sum_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_stride_z, uint dst_step_z, uint dst_offset_first_element_in_bytes) |
Divides all the values of the input tensor by the sum calculated from softmax_layer_shift_exp_sum kernel. More... | |
__kernel void | softmax_layer_max_shift_exp_sum_serial (__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_stride_z, uint src_step_z, uint src_offset_first_element_in_bytes, __global uchar *maxo_ptr, uint maxo_stride_x, uint maxo_step_x, uint maxo_stride_y, uint maxo_step_y, uint maxo_stride_z, uint maxo_step_z, uint maxo_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_stride_z, uint dst_step_z, uint dst_offset_first_element_in_bytes, __global uchar *sum_ptr, uint sum_stride_x, uint sum_step_x, uint sum_stride_y, uint sum_step_y, uint sum_stride_z, uint sum_step_z, uint sum_offset_first_element_in_bytes, uint width) |
Identifies the maximum value across the 1st dimension and shifts the values of the input tensor by this maximum value, then gets the exponent of each element as sums all elements across each row. More... | |
__kernel void | softmax_layer_max_shift_exp_sum_parallel (__global uchar *src_ptr, uint src_stride_x, uint src_step_x, uint src_stride_y, uint src_step_y, uint src_stride_z, uint src_step_z, uint src_offset_first_element_in_bytes, __global uchar *maxo_ptr, uint maxo_stride_x, uint maxo_step_x, uint maxo_stride_y, uint maxo_step_y, uint maxo_stride_z, uint maxo_step_z, uint maxo_offset_first_element_in_bytes, __global uchar *dst_ptr, uint dst_stride_x, uint dst_step_x, uint dst_stride_y, uint dst_step_y, uint dst_stride_z, uint dst_step_z, uint dst_offset_first_element_in_bytes, __global uchar *sum_ptr, uint sum_stride_x, uint sum_step_x, uint sum_stride_y, uint sum_step_y, uint sum_stride_z, uint sum_step_z, uint sum_offset_first_element_in_bytes, uint width) |
Identifies the maximum value across the 1st dimension and shifts the values of the input tensor by this maximum value, then gets the exponent of each element as sums all elements across each row. More... | |
Variables | |
__constant DATA_TYPE16 | type_min_ = ( DATA_TYPE16 )( -FLT_MAX ) |
__constant uint16 | idx__ = (uint16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) |
__constant DATA_TYPE16 | type_min = ( DATA_TYPE16 )( -FLT_MAX ) |
__constant uint16 | idx16 = (uint16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) |
__constant uint4 | idx4 = (uint4)(0, 1, 2, 3) |
#define ADD_OP | ( | x, | |
y, | |||
type, | |||
size | |||
) | ((x) + (y)) |
Definition at line 44 of file softmax_layer.cl.
Referenced by softmax_layer_max_shift_exp_sum_parallel(), and softmax_layer_max_shift_exp_sum_serial().
#define DIV_OP | ( | x, | |
y, | |||
type, | |||
size | |||
) | ((x) / (y)) |
Definition at line 47 of file softmax_layer.cl.
Referenced by softmax_layer_norm().
#define EXP_OP | ( | x, | |
type, | |||
size | |||
) | exp((x)) |
Definition at line 48 of file softmax_layer.cl.
Referenced by softmax_layer_max_shift_exp_sum_parallel(), and softmax_layer_max_shift_exp_sum_serial().
#define GRID_SIZE 1 |
Definition at line 62 of file softmax_layer.cl.
Referenced by softmax_layer_max_shift_exp_sum_parallel().
#define LOG_VECTOR_SIZE 4 |
Definition at line 80 of file softmax_layer.cl.
Referenced by softmax_layer_max_shift_exp_sum_serial().
#define MAX_OP | ( | x, | |
y, | |||
type, | |||
size | |||
) | max((x), (y)) |
Definition at line 43 of file softmax_layer.cl.
Referenced by softmax_layer_max_shift_exp_sum_parallel(), and softmax_layer_max_shift_exp_sum_serial().
#define MINVAL -FLT_MAX |
Definition at line 54 of file softmax_layer.cl.
#define MUL_OP | ( | x, | |
y, | |||
type, | |||
size | |||
) | ((x) * (y)) |
Definition at line 46 of file softmax_layer.cl.
Referenced by softmax_layer_max_shift_exp_sum_parallel(), and softmax_layer_max_shift_exp_sum_serial().
#define SELECT_DATA_TYPE int |
Definition at line 55 of file softmax_layer.cl.
Referenced by softmax_layer_max_shift_exp_sum_parallel(), and softmax_layer_max_shift_exp_sum_serial().
#define SUB_OP | ( | x, | |
y, | |||
type, | |||
size | |||
) | ((x) - (y)) |
Definition at line 45 of file softmax_layer.cl.
Referenced by softmax_layer_max_shift_exp_sum_parallel(), and softmax_layer_max_shift_exp_sum_serial().
#define VECTOR_SIZE 16 |
Definition at line 79 of file softmax_layer.cl.
Referenced by softmax_layer_max_shift_exp_sum_serial().
__kernel void softmax_layer_max_shift_exp_sum_parallel | ( | __global uchar * | src_ptr, |
uint | src_stride_x, | ||
uint | src_step_x, | ||
uint | src_stride_y, | ||
uint | src_step_y, | ||
uint | src_stride_z, | ||
uint | src_step_z, | ||
uint | src_offset_first_element_in_bytes, | ||
__global uchar * | maxo_ptr, | ||
uint | maxo_stride_x, | ||
uint | maxo_step_x, | ||
uint | maxo_stride_y, | ||
uint | maxo_step_y, | ||
uint | maxo_stride_z, | ||
uint | maxo_step_z, | ||
uint | maxo_offset_first_element_in_bytes, | ||
__global uchar * | dst_ptr, | ||
uint | dst_stride_x, | ||
uint | dst_step_x, | ||
uint | dst_stride_y, | ||
uint | dst_step_y, | ||
uint | dst_stride_z, | ||
uint | dst_step_z, | ||
uint | dst_offset_first_element_in_bytes, | ||
__global uchar * | sum_ptr, | ||
uint | sum_stride_x, | ||
uint | sum_step_x, | ||
uint | sum_stride_y, | ||
uint | sum_step_y, | ||
uint | sum_stride_z, | ||
uint | sum_step_z, | ||
uint | sum_offset_first_element_in_bytes, | ||
uint | width | ||
) |
Identifies the maximum value across the 1st dimension and shifts the values of the input tensor by this maximum value, then gets the exponent of each element as sums all elements across each row.
[in] | src_ptr | Pointer to the source tensor slice. Supported data types: QS8/QS16/F16/F32 |
[in] | src_stride_x | Stride of the source tensor in X dimension (in bytes) |
[in] | src_step_x | src_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | src_stride_y | Stride of the source tensor in Y dimension (in bytes) |
[in] | src_step_y | src_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | src_stride_z | Stride of the source tensor in Z dimension (in bytes) |
[in] | src_step_z | src_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | src_offset_first_element_in_bytes | The offset of the first element in the source tensor |
[in] | maxo_ptr | Pointer to the max values tensor slice. Supported data types: same as src_ptr |
[in] | maxo_stride_x | Stride of the max values tensor in X dimension (in bytes) |
[in] | maxo_step_x | max_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | maxo_stride_y | Stride of the max values tensor in Y dimension (in bytes) |
[in] | maxo_step_y | max_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | maxo_stride_z | Stride of the max values tensor in Z dimension (in bytes) |
[in] | maxo_step_z | max_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | maxo_offset_first_element_in_bytes | The offset of the first element in the max values tensor |
[out] | dst_ptr | Pointer to the destination tensor slice. Supported data types: same as src_ptr |
[in] | dst_stride_x | Stride of the destination tensor in X dimension (in bytes) |
[in] | dst_step_x | dst_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | dst_stride_y | Stride of the destination tensor in Y dimension (in bytes) |
[in] | dst_step_y | dst_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | dst_stride_z | Stride of the destination tensor in Z dimension (in bytes) |
[in] | dst_step_z | dst_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | dst_offset_first_element_in_bytes | The offset of the first element in the destination tensor |
[out] | sum_ptr | Pointer to the sum values tensor slice. Supported data types: same as src_ptr |
[in] | sum_stride_x | Stride of the sum values tensor in X dimension (in bytes) |
[in] | sum_step_x | sum_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | sum_stride_y | Stride of the sum values tensor in Y dimension (in bytes) |
[in] | sum_step_y | sum_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | sum_stride_z | Stride of the sum values tensor in Z dimension (in bytes) |
[in] | sum_step_z | sum_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | sum_offset_first_element_in_bytes | The offset of the first element in the sum values tensor |
[in] | width | Input image width |
Definition at line 328 of file softmax_layer.cl.
References ADD_OP, arm_compute::test::validation::beta, CONVERT, CONVERT_TENSOR3D_TO_IMAGE_STRUCT, arm_compute::test::validation::dst, EXP_OP, GRID_SIZE, idx4, MAX_OP, MUL_OP, offset(), Image::ptr, SELECT_DATA_TYPE, arm_compute::test::validation::src, SUB_OP, sum(), type_min_, VEC_DATA_TYPE, VLOAD, and VSTORE.
__kernel void softmax_layer_max_shift_exp_sum_serial | ( | __global uchar * | src_ptr, |
uint | src_stride_x, | ||
uint | src_step_x, | ||
uint | src_stride_y, | ||
uint | src_step_y, | ||
uint | src_stride_z, | ||
uint | src_step_z, | ||
uint | src_offset_first_element_in_bytes, | ||
__global uchar * | maxo_ptr, | ||
uint | maxo_stride_x, | ||
uint | maxo_step_x, | ||
uint | maxo_stride_y, | ||
uint | maxo_step_y, | ||
uint | maxo_stride_z, | ||
uint | maxo_step_z, | ||
uint | maxo_offset_first_element_in_bytes, | ||
__global uchar * | dst_ptr, | ||
uint | dst_stride_x, | ||
uint | dst_step_x, | ||
uint | dst_stride_y, | ||
uint | dst_step_y, | ||
uint | dst_stride_z, | ||
uint | dst_step_z, | ||
uint | dst_offset_first_element_in_bytes, | ||
__global uchar * | sum_ptr, | ||
uint | sum_stride_x, | ||
uint | sum_step_x, | ||
uint | sum_stride_y, | ||
uint | sum_step_y, | ||
uint | sum_stride_z, | ||
uint | sum_step_z, | ||
uint | sum_offset_first_element_in_bytes, | ||
uint | width | ||
) |
Identifies the maximum value across the 1st dimension and shifts the values of the input tensor by this maximum value, then gets the exponent of each element as sums all elements across each row.
[in] | src_ptr | Pointer to the source tensor slice. Supported data types: QS8/QS16/F16/F32 |
[in] | src_stride_x | Stride of the source tensor in X dimension (in bytes) |
[in] | src_step_x | src_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | src_stride_y | Stride of the source tensor in Y dimension (in bytes) |
[in] | src_step_y | src_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | src_stride_z | Stride of the source tensor in Z dimension (in bytes) |
[in] | src_step_z | src_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | src_offset_first_element_in_bytes | The offset of the first element in the source tensor |
[in] | maxo_ptr | Pointer to the max values tensor slice. Supported data types: same as src_ptr |
[in] | maxo_stride_x | Stride of the max values tensor in X dimension (in bytes) |
[in] | maxo_step_x | max_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | maxo_stride_y | Stride of the max values tensor in Y dimension (in bytes) |
[in] | maxo_step_y | max_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | maxo_stride_z | Stride of the max values tensor in Z dimension (in bytes) |
[in] | maxo_step_z | max_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | maxo_offset_first_element_in_bytes | The offset of the first element in the max values tensor |
[out] | dst_ptr | Pointer to the destination tensor slice. Supported data types: same as src_ptr |
[in] | dst_stride_x | Stride of the destination tensor in X dimension (in bytes) |
[in] | dst_step_x | dst_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | dst_stride_y | Stride of the destination tensor in Y dimension (in bytes) |
[in] | dst_step_y | dst_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | dst_stride_z | Stride of the destination tensor in Z dimension (in bytes) |
[in] | dst_step_z | dst_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | dst_offset_first_element_in_bytes | The offset of the first element in the destination tensor |
[out] | sum_ptr | Pointer to the sum values tensor slice. Supported data types: same as src_ptr |
[in] | sum_stride_x | Stride of the sum values tensor in X dimension (in bytes) |
[in] | sum_step_x | sum_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | sum_stride_y | Stride of the sum values tensor in Y dimension (in bytes) |
[in] | sum_step_y | sum_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | sum_stride_z | Stride of the sum values tensor in Z dimension (in bytes) |
[in] | sum_step_z | sum_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | sum_offset_first_element_in_bytes | The offset of the first element in the sum values tensor |
[in] | width | Input image width |
Definition at line 178 of file softmax_layer.cl.
References ADD_OP, arm_compute::test::validation::beta, CL_VEC_DATA_TYPE, CONVERT, CONVERT_TENSOR3D_TO_IMAGE_STRUCT, arm_compute::test::validation::dst, EXP_OP, EXPAND, idx__, LOG_VECTOR_SIZE, MAX_OP, MUL_OP, offset(), Image::ptr, SELECT_DATA_TYPE, arm_compute::test::validation::src, SUB_OP, sum(), type_min_, VEC_DATA_TYPE, VECTOR_SIZE, VLOAD, and VSTORE.
__kernel void softmax_layer_norm | ( | __global uchar * | src_ptr, |
uint | src_stride_x, | ||
uint | src_step_x, | ||
uint | src_stride_y, | ||
uint | src_step_y, | ||
uint | src_stride_z, | ||
uint | src_step_z, | ||
uint | src_offset_first_element_in_bytes, | ||
__global uchar * | sum_ptr, | ||
uint | sum_stride_x, | ||
uint | sum_step_x, | ||
uint | sum_stride_y, | ||
uint | sum_step_y, | ||
uint | sum_stride_z, | ||
uint | sum_step_z, | ||
uint | sum_offset_first_element_in_bytes, | ||
__global uchar * | dst_ptr, | ||
uint | dst_stride_x, | ||
uint | dst_step_x, | ||
uint | dst_stride_y, | ||
uint | dst_step_y, | ||
uint | dst_stride_z, | ||
uint | dst_step_z, | ||
uint | dst_offset_first_element_in_bytes | ||
) |
Divides all the values of the input tensor by the sum calculated from softmax_layer_shift_exp_sum kernel.
[in] | src_ptr | Pointer to the source tensor slice. Supported data types: QS8/QS16/F16/F32 |
[in] | src_stride_x | Stride of the source tensor in X dimension (in bytes) |
[in] | src_step_x | src_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | src_stride_y | Stride of the source tensor in Y dimension (in bytes) |
[in] | src_step_y | src_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | src_stride_z | Stride of the source tensor in Z dimension (in bytes) |
[in] | src_step_z | src_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | src_offset_first_element_in_bytes | The offset of the first element in the source tensor |
[in] | sum_ptr | Pointer to the sum values tensor slice. Supported data types: same as src_ptr |
[in] | sum_stride_x | Stride of the sum values tensor in X dimension (in bytes) |
[in] | sum_step_x | sum_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | sum_stride_y | Stride of the sum values tensor in Y dimension (in bytes) |
[in] | sum_step_y | sum_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | sum_stride_z | Stride of the sum values tensor in Z dimension (in bytes) |
[in] | sum_step_z | sum_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | sum_offset_first_element_in_bytes | The offset of the first element in the sum values tensor |
[out] | dst_ptr | Pointer to the destination tensor slice. Supported data types: same as src_ptr |
[in] | dst_stride_x | Stride of the destination tensor in X dimension (in bytes) |
[in] | dst_step_x | dst_stride_x * number of elements along X processed per workitem(in bytes) |
[in] | dst_stride_y | Stride of the destination tensor in Y dimension (in bytes) |
[in] | dst_step_y | dst_stride_y * number of elements along Y processed per workitem(in bytes) |
[in] | dst_stride_z | Stride of the destination tensor in Z dimension (in bytes) |
[in] | dst_step_z | dst_stride_z * number of elements along Z processed per workitem(in bytes) |
[in] | dst_offset_first_element_in_bytes | The offset of the first element in the destination tensor |
Definition at line 120 of file softmax_layer.cl.
References CONVERT_TENSOR3D_TO_IMAGE_STRUCT, CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP, DIV_OP, offset(), and VEC_DATA_TYPE.
__constant uint16 idx16 = (uint16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) |
Definition at line 87 of file softmax_layer.cl.
__constant uint4 idx4 = (uint4)(0, 1, 2, 3) |
Definition at line 88 of file softmax_layer.cl.
Referenced by softmax_layer_max_shift_exp_sum_parallel().
__constant uint16 idx__ = (uint16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) |
Definition at line 82 of file softmax_layer.cl.
Referenced by softmax_layer_max_shift_exp_sum_serial().
__constant DATA_TYPE16 type_min = ( DATA_TYPE16 )( -FLT_MAX ) |
Definition at line 86 of file softmax_layer.cl.
__constant DATA_TYPE16 type_min_ = ( DATA_TYPE16 )( -FLT_MAX ) |
Definition at line 81 of file softmax_layer.cl.
Referenced by softmax_layer_max_shift_exp_sum_parallel(), and softmax_layer_max_shift_exp_sum_serial().