inference-engine/src/cldnn_engine/cldnn_global_custom_kernels/prior_box_clustered.cl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78

// Copyright (c) 2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

__kernel void prior_box_clustered(
    const __global INPUT0_TYPE*  input0,
    const __global INPUT0_TYPE*  input1,
    __global OUTPUT0_TYPE* output)
{
    const int num_priors_ = sizeof(width_)/sizeof(width_[0]);
    const int var_size = sizeof(variance_)/sizeof(variance_[0]);

    const float img_width  = (img_w_ == 0) ? INPUT1_DIMS[3] : img_w_;
    const float img_height = (img_h_ == 0) ? INPUT1_DIMS[2] : img_h_;

    const float r_img_width  = 1.f/img_width;
    const float r_img_height = 1.f/img_height;

    float step_w = (step_w_ == 0) ? step_ : step_w_;
    float step_h = (step_h_ == 0) ? step_ : step_h_;

    if ((step_w == 0) & (step_h == 0))
    {
        step_w = img_width / INPUT0_DIMS[3];
        step_h = img_height / INPUT0_DIMS[2];
    }

    int h = get_global_id(0);
    int w = get_global_id(1);

    __global OUTPUT0_TYPE* top_data = output + h*INPUT0_DIMS[3]*num_priors_*4 + w*num_priors_*4;
    __global OUTPUT0_TYPE* top_data_var = output + OUTPUT0_DIMS[2] + h*INPUT0_DIMS[3]*num_priors_*var_size + w * num_priors_ * var_size;

    const float center_x = (w + offset_) * step_w;
    const float center_y = (h + offset_) * step_h;

    int idx = 0;
    for (int s = 0; s < num_priors_; ++s)
    {
        const float box_width = width_[s];
        const float box_height = height_[s];

        OUTPUT0_TYPE xmin = (center_x - box_width*0.5f)  * r_img_width;
        OUTPUT0_TYPE ymin = (center_y - box_height*0.5f) * r_img_height;
        OUTPUT0_TYPE xmax = (center_x + box_width*0.5f)  * r_img_width;
        OUTPUT0_TYPE ymax = (center_y + box_height*0.5f) * r_img_height;

        if (clip_)
        {
            xmin = min(max(xmin, (OUTPUT0_TYPE)(0.0f)), (OUTPUT0_TYPE)(1.0f));
            ymin = min(max(ymin, (OUTPUT0_TYPE)(0.0f)), (OUTPUT0_TYPE)(1.0f));
            xmax = min(max(xmax, (OUTPUT0_TYPE)(0.0f)), (OUTPUT0_TYPE)(1.0f));
            ymax = min(max(ymax, (OUTPUT0_TYPE)(0.0f)), (OUTPUT0_TYPE)(1.0f));
        }

        top_data[idx++] = xmin;
        top_data[idx++] = ymin;
        top_data[idx++] = xmax;
        top_data[idx++] = ymax;

        for (int i = 0; i < var_size; i++)
        {
            top_data_var[s * var_size + i] = variance_[i];
        }
    }
}