28 #define VEC_MAX vec4_max
31 #define VEC_MAX vec8_max
33 #error "Unsupported data size"
39 temp = fmax(vec.lo, vec.hi);
40 return fmax(temp.x, temp.y);
46 temp = fmax(vec.lo, vec.hi);
64 if((region_end_x <= region_start_x) || (region_end_y <= region_start_y))
70 int num_iter = (int)((region_end_x - region_start_x) / VEC_SIZE);
73 for(
int j = region_start_y; j < region_end_y; ++j)
75 int i = region_start_x;
76 for(; i < region_start_x + num_iter * VEC_SIZE; i += VEC_SIZE)
80 curr_max = fmax(val, curr_max);
82 for(; i < region_end_x; ++i)
85 curr_max = fmax(curr_max, val);
127 unsigned int input_stride_w,
unsigned int output_stride_w)
134 const int px = get_global_id(0);
135 const int py = get_global_id(1);
136 const int pw = get_global_id(2);
141 const ushort4 roi = vload4(0, (__global ushort *)
vector_offset(&rois, pw));
142 const ushort roi_batch = *((__global ushort *)
vector_offset(&rois, pw) + 4);
143 const int2 roi_anchor = convert_int2_sat(
round(convert_float2(roi.s01) * (
float)SPATIAL_SCALE));
144 const int2 roi_dims = convert_int2_sat(fmax(
round(convert_float2(roi.s23) * (
float)SPATIAL_SCALE), 1.f));
147 const float2 spatial_indx = (float2)(px, py);
148 const float2 pooled_dims = (float2)(POOLED_DIM_X, POOLED_DIM_Y);
149 const int2 max_spatial_dims = (int2)(MAX_DIM_X, MAX_DIM_Y);
150 int2 region_start = convert_int2_sat(floor(spatial_indx / pooled_dims * convert_float2(roi_dims))) + roi_anchor;
151 int2 region_end = convert_int2_sat(floor((spatial_indx + 1) / pooled_dims * convert_float2(roi_dims))) + roi_anchor;
153 region_start =
clamp(region_start, 0, max_spatial_dims);
154 region_end =
clamp(region_end, 0, max_spatial_dims);
157 input.
ptr += roi_batch * input_stride_w;
158 output.
ptr += pw * output_stride_w;
160 for(
int pz = 0; pz < MAX_DIM_Z; ++pz)
Structure to hold Vector information.
DATA_TYPE vec8_max(DATA_TYPE8 vec)
T clamp(const T &n, const T &lower, const T &upper)
Performs clamping among a lower and upper value.
__kernel void roi_pooling_layer(__global uchar *input_ptr, uint input_stride_x, uint input_step_x, uint input_stride_y, uint input_step_y, uint input_stride_z, uint input_step_z, uint input_offset_first_element_in_bytes, __global uchar *rois_ptr, uint rois_stride_x, uint rois_step_x, uint rois_offset_first_element_in_bytes, __global uchar *output_ptr, uint output_stride_x, uint output_step_x, uint output_stride_y, uint output_step_y, uint output_stride_z, uint output_step_z, uint output_offset_first_element_in_bytes, unsigned int input_stride_w, unsigned int output_stride_w)
Performs a roi pooling function.
Structure to hold 3D tensor information.
__global const uchar * tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
Get the pointer position of a Tensor3D.
#define VECTOR_DECLARATION(name)
#define TENSOR3D_DECLARATION(name)
#define VEC_DATA_TYPE(type, size)
T round(T value)
Round floating-point value with half value rounding away from zero.
#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name)
#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name)
DATA_TYPE roi_pool_1x1(const Tensor3D *input, int region_start_x, int region_end_x, int region_start_y, int region_end_y, int pz)
Performs a roi pooling on a single output pixel.
__global const uchar * vector_offset(const Vector *vec, int x)
Get the pointer position of a Vector.
__global uchar * ptr
Pointer to the starting postion of the buffer.
DATA_TYPE vec4_max(DATA_TYPE4 vec)