Compute Library  17.09
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
roi_pooling_layer.cl
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 ARM Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "helpers.h"
25 
26 #if DATA_SIZE == 32
27 #define VEC_SIZE 4
28 #define VEC_MAX vec4_max
29 #elif DATA_SIZE == 16
30 #define VEC_SIZE 8
31 #define VEC_MAX vec8_max
32 #else /* DATA_SIZE not equals 32 or 16 */
33 #error "Unsupported data size"
34 #endif /* DATA_SIZE == 32 */
35 
37 {
39  temp = fmax(vec.lo, vec.hi);
40  return fmax(temp.x, temp.y);
41 }
42 
44 {
46  temp = fmax(vec.lo, vec.hi);
47  return vec4_max(temp);
48 }
49 
61 inline DATA_TYPE roi_pool_1x1(const Tensor3D *input, int region_start_x, int region_end_x, int region_start_y, int region_end_y, int pz)
62 {
63  // Iterate through the pooling region
64  if((region_end_x <= region_start_x) || (region_end_y <= region_start_y))
65  {
66  return (DATA_TYPE)0;
67  }
68  else
69  {
70  int num_iter = (int)((region_end_x - region_start_x) / VEC_SIZE);
71  VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
72  curr_max = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))(-FLT_MAX);
73  for(int j = region_start_y; j < region_end_y; ++j)
74  {
75  int i = region_start_x;
76  for(; i < region_start_x + num_iter * VEC_SIZE; i += VEC_SIZE)
77  {
78  VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
79  val = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)tensor3D_offset(input, i, j, pz));
80  curr_max = fmax(val, curr_max);
81  }
82  for(; i < region_end_x; ++i)
83  {
84  DATA_TYPE val = *(__global DATA_TYPE *)tensor3D_offset(input, i, j, pz);
85  curr_max = fmax(curr_max, val);
86  }
87  }
88  return (DATA_TYPE)VEC_MAX(curr_max);
89  }
90 }
91 
123 __kernel void roi_pooling_layer(
124  TENSOR3D_DECLARATION(input),
125  VECTOR_DECLARATION(rois),
126  TENSOR3D_DECLARATION(output),
127  unsigned int input_stride_w, unsigned int output_stride_w)
128 {
129  // Get pixels pointer
133 
134  const int px = get_global_id(0);
135  const int py = get_global_id(1);
136  const int pw = get_global_id(2);
137 
138  // Load roi parameters
139  // roi is laid out as follows:
140  // { x, y, width, height, batch_index }
141  const ushort4 roi = vload4(0, (__global ushort *)vector_offset(&rois, pw));
142  const ushort roi_batch = *((__global ushort *)vector_offset(&rois, pw) + 4);
143  const int2 roi_anchor = convert_int2_sat(round(convert_float2(roi.s01) * (float)SPATIAL_SCALE));
144  const int2 roi_dims = convert_int2_sat(fmax(round(convert_float2(roi.s23) * (float)SPATIAL_SCALE), 1.f));
145 
146  // Calculate pooled region start and end
147  const float2 spatial_indx = (float2)(px, py);
148  const float2 pooled_dims = (float2)(POOLED_DIM_X, POOLED_DIM_Y);
149  const int2 max_spatial_dims = (int2)(MAX_DIM_X, MAX_DIM_Y);
150  int2 region_start = convert_int2_sat(floor(spatial_indx / pooled_dims * convert_float2(roi_dims))) + roi_anchor;
151  int2 region_end = convert_int2_sat(floor((spatial_indx + 1) / pooled_dims * convert_float2(roi_dims))) + roi_anchor;
152 
153  region_start = clamp(region_start, 0, max_spatial_dims);
154  region_end = clamp(region_end, 0, max_spatial_dims);
155 
156  // Move input and output pointer across the fourth dimension
157  input.ptr += roi_batch * input_stride_w;
158  output.ptr += pw * output_stride_w;
159 
160  for(int pz = 0; pz < MAX_DIM_Z; ++pz)
161  {
162  *(__global DATA_TYPE *)tensor3D_offset(&output, px, py, pz) = (__global DATA_TYPE)roi_pool_1x1(&input,
163  region_start.x,
164  region_end.x,
165  region_start.y,
166  region_end.y, pz);
167  }
168 }
Structure to hold Vector information.
Definition: helpers.h:123
#define VLOAD(size)
Definition: helpers.h:34
#define DATA_TYPE
DATA_TYPE vec8_max(DATA_TYPE8 vec)
T clamp(const T &n, const T &lower, const T &upper)
Performs clamping among a lower and upper value.
Definition: Helpers.h:201
__kernel void roi_pooling_layer(__global uchar *input_ptr, uint input_stride_x, uint input_step_x, uint input_stride_y, uint input_step_y, uint input_stride_z, uint input_step_z, uint input_offset_first_element_in_bytes, __global uchar *rois_ptr, uint rois_stride_x, uint rois_step_x, uint rois_offset_first_element_in_bytes, __global uchar *output_ptr, uint output_stride_x, uint output_step_x, uint output_stride_y, uint output_step_y, uint output_stride_z, uint output_step_z, uint output_offset_first_element_in_bytes, unsigned int input_stride_w, unsigned int output_stride_w)
Performs a roi pooling function.
Structure to hold 3D tensor information.
Definition: helpers.h:140
__global const uchar * tensor3D_offset(const Tensor3D *tensor, int x, int y, int z)
Get the pointer position of a Tensor3D.
Definition: helpers.h:304
#define VECTOR_DECLARATION(name)
Definition: helpers.h:51
#define TENSOR3D_DECLARATION(name)
Definition: helpers.h:65
#define VEC_DATA_TYPE(type, size)
Definition: fixed_point.h:93
T round(T value)
Round floating-point value with half value rounding away from zero.
#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name)
Definition: helpers.h:112
#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name)
Definition: helpers.h:90
DATA_TYPE roi_pool_1x1(const Tensor3D *input, int region_start_x, int region_end_x, int region_start_y, int region_end_y, int pz)
Performs a roi pooling on a single output pixel.
__global const uchar * vector_offset(const Vector *vec, int x)
Get the pointer position of a Vector.
Definition: helpers.h:281
__global uchar * ptr
Pointer to the starting postion of the buffer.
Definition: helpers.h:142
DATA_TYPE vec4_max(DATA_TYPE4 vec)