summaryrefslogtreecommitdiff
path: root/inference-engine/thirdparty/clDNN/kernel_selector/core/kernel_selector_params.h
blob: d4351f2b42e4bc1065eedacecd6ea060336163a6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
/*
// Copyright (c) 2016 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/

#pragma once

#include <string>
#include <memory>
#include <cstddef>
#include "common_types.h"
#include "common_tools.h"
#include "tensor_type.h"

namespace kernel_selector
{
    using DataTensor = Tensor::DataTensor;
    using WeightsTensor = Tensor::WeightsTensor;
    using DataLayout = Tensor::DataLayout;
    using WeightsLayout = Tensor::WeightsLayout;
    using MultiDataTensor = std::vector<DataTensor>;
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    // ParamsKey
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    class ParamsKey
    {
    public:
        ParamsKey()
        {
            key.restrict.raw = 0;
            key.enableTuning = 1;
            key.machineInfo.raw = 0;
            key.inputType.raw = 0;
            key.outputType.raw = 0;
            key.inputWeightsType.raw = 0;
            key.outputWeightsType.raw = 0;
            key.inputLayout = 0;
            key.outputLayout = 0;
            key.weightsInputLayout = 0;
            key.weightsOutputLayout = 0;
        }

        struct Key
        {
            union restrict_t
            {
                struct val_t
                {
                    uint32_t different_types : 1;
                    uint32_t offset : 1;
                    uint32_t pitches : 1;
                    uint32_t batching : 1;
                    uint32_t biasPerFeatureMap : 1;
                    uint32_t biasPerOutput : 1;
                    uint32_t nonBias : 1;
                    uint32_t activationAdditionalParamsAsInput : 1;
                    uint32_t FP16Emulation : 1;
                    uint32_t gradient : 1;
                    uint32_t momentum : 1;

                    union dedicated_t
                    {
                        struct lookt_t
                        {
                            uint32_t axisX : 1;
                            uint32_t axisY : 1;
                            uint32_t axisFeature : 1;
                            uint32_t axisBatch : 1;
                            uint32_t axisXYF : 1;
                            uint32_t indicesF32 : 1;
                            uint32_t indicesOther : 1;
                        } lookt;
						struct argm_t
						{
							uint32_t axisX : 1;
							uint32_t axisY : 1;
							uint32_t axisFeature : 1;
							uint32_t axisBatch : 1;
							uint32_t axisXYF : 1;
						} argm;
                        struct idxsel_t
                        {
                            uint32_t axisX : 1;
                            uint32_t axisY : 1;
                            uint32_t axisFeature : 1;
                            uint32_t axisBatch : 1;
                        } idxsel;
                        struct norm_t
                        {
                            uint32_t across : 1;
                            uint32_t within : 1;
                            uint32_t fixedKenrelDivider : 1;
                            uint32_t dynamicKenrelDivider : 1;
                        } norm;
                        struct mvn_t
                        {
                            uint32_t across : 1;
                            uint32_t within : 1;
                            uint32_t normalize_variance : 1;
                        } mvn;
                        struct pooling_t
                        {
                            uint32_t max : 1;
                            uint32_t avg : 1;
                            uint32_t floor : 1;
                            uint32_t max_with_argmax : 1;
                            uint32_t ceil : 1;
                            uint32_t bilinear : 1;
                            uint32_t fixedKenrelDivider : 1;
                            uint32_t dynamicKenrelDivider : 1;
                            uint32_t dynamicKenrelDividerWithPadding : 1;
                        } pooling;
                        struct conv_t
                        {
                            uint32_t split : 1;
                            uint32_t dilation : 1;
                            uint32_t depthwiseSeparableOpt : 1;
                            uint32_t transposed : 1;
                            uint32_t quantization : 1;
                            uint32_t calibration : 1;
                        } conv;
                        struct fc_t {} fc;
                        struct softmax_t
                        {
                            uint32_t dimX : 1;
                            uint32_t dimY : 1;
                            uint32_t dimFeature : 1;
                        } softmax;
                        struct region_yolo_t
                        {
                            uint32_t dimX : 1;
                            uint32_t dimY : 1;
                            uint32_t dimFeature : 1;
                            uint32_t coords : 1;
                            uint32_t classes : 1;
                            uint32_t num : 1;
                        } region_yolo;
                        struct reorg_yolo_t
                        {
                            uint32_t dimX : 1;
                            uint32_t dimY : 1;
                            uint32_t dimFeature : 1;
                            uint32_t stride : 1;
                        } reorg_yolo;
                        struct concat_t
                        {
                            uint32_t axisX : 1;
                            uint32_t axisY : 1;
                            uint32_t axisFeature : 1;
                            uint32_t axisBatch : 1;
                            uint32_t kernelPerInput : 1;
                            uint32_t oneKernel : 1;
                        } concat;
                        struct upsample_t
                        {
                            uint32_t nearest : 1;
                            uint32_t bilinear : 1;
                        } upsample;
                        struct reorder_t
                        {
                            uint32_t winograd : 1;
                        } reorder;
                        struct lstm_gemm_t {
                            uint32_t bias : 1;
                            uint32_t hidden : 1;
                        } lstm_gemm;
                        struct lstm_elt_t {
                            uint32_t cell : 1;
                        } lstm_elt;
                    } dedicated;
                } val;
                uint64_t raw;
            } restrict;

            union machine_info_t
            {
                struct val_t
                {
                    uint32_t subgroup : 1;
                    uint32_t subgroupShort : 1;
                } val;
                uint32_t raw;
            } machineInfo;

            static_assert(sizeof(restrict_t) == sizeof(uint64_t), "problem with union");

            typedef union DataTypesKey_t
            {
                struct val_t
                {
                    uint32_t int8 : 1;
                    uint32_t uint8 : 1;
                    uint32_t int16 : 1;
                    uint32_t uint16 : 1;
                    uint32_t int32 : 1;
                    uint32_t uint32 : 1;
                    uint32_t int64 : 1;
                    uint32_t F16 : 1;
                    uint32_t F32 : 1;
                } val;
                uint32_t raw;
            } DataTypesKey;

            uint32_t enableTuning;
            DataTypesKey inputType;
            DataTypesKey outputType;
            DataTypesKey inputWeightsType;
            DataTypesKey outputWeightsType;
            uint32_t inputLayout;
            uint32_t outputLayout;
            uint32_t weightsInputLayout;
            uint32_t weightsOutputLayout;
        };

        void EnableInputDataType(Datatype dt);
        void EnableAllInputDataType();
        void EnableOutputDataType(Datatype dt);
        void EnableAllOutputDataType();
        void EnableInputWeightsType(WeightsType wt);
        void EnableAllInputWeightsType();
        void EnableOutputWeightsType(WeightsType wt);
        void EnableAllOutputWeightsType();
        void EnableFP16Emulation() { key.restrict.val.FP16Emulation = 1; }
        void EnableDifferentTypes() { key.restrict.val.different_types = 1; }
        void EnableInputLayout(DataLayout l) { key.inputLayout |= (1 << l); }
        void EnableAllInputLayout() { key.inputLayout = 0xffffffff; }
        void EnableOutputLayout(DataLayout l) { key.outputLayout |= (1 << l); }
        void EnableAllOutputLayout() { key.outputLayout = 0xffffffff; }
        void EnableInputWeightsLayout(WeightsLayout l) { key.weightsInputLayout |= (1 << l); }
        void EnableAllInputWeightsLayout() { key.weightsInputLayout = 0xffffffff; }
        void EnableOutputWeightsLayout(WeightsLayout l) { key.weightsOutputLayout |= (1 << l); }
        void EnableAllOutputWeightsLayout() { key.weightsOutputLayout = 0xffffffff; }
        void EnableTensorOffset() { key.restrict.val.offset = 1; }
        void EnableTensorPitches() { key.restrict.val.pitches = 1; }
        void EnableBatching() { key.restrict.val.batching = 1; }
        void EnableGradient() { key.restrict.val.gradient = 1; }
        void EnableSubGroup() { key.machineInfo.val.subgroup = 1; }
        void EnableSubGroupShort() { key.machineInfo.val.subgroupShort = 1; }
        void EnableNonBiasTerm() { key.restrict.val.nonBias = 1; }
        void EnableBiasPerFeature() { key.restrict.val.biasPerFeatureMap = 1; }
        void EnableBiasPerOutput() { key.restrict.val.biasPerOutput = 1; }
        void EnableActivationAdditionalParamsAsInput() { key.restrict.val.activationAdditionalParamsAsInput = 1; }
        void EnableMomentum() { key.restrict.val.momentum = 1; }
        void EnableLRNMode(LRNMode m);
        void EnableLookUpTableAxis(LookUpTableAxis m);
        void EnableNormalizeMode(NormalizeMode m);
        void EnableMVNMode(MVNMode m);
        void EnableMVNNormalizeVariance();
        void EnableLRNKernelDividerMode(KernelDividerMode m);
        void EnablePoolKernelDividerMode(KernelDividerMode m);
        void EnablePoolType(PoolType t);
        void EnablePoolRemainder(PoolRemainder r);
        void EnableSplitSupport() { key.restrict.val.dedicated.conv.split = 1; }
        void EnableDilation() { key.restrict.val.dedicated.conv.dilation = 1; }
        void EnableDepthwiseSeparableOpt() { key.restrict.val.dedicated.conv.depthwiseSeparableOpt = 1; }
        void EnableTranspose() { key.restrict.val.dedicated.conv.transposed = 1; }
        void EnableInt8Quantization() { key.restrict.val.dedicated.conv.quantization = 1; }
        void EnableOutputCalibration() { key.restrict.val.dedicated.conv.calibration = 1; }
        void EnableWinogradReorder() { key.restrict.val.dedicated.reorder.winograd = 1; }
        void EnableSoftmaxDim(SoftmaxDim d);
        void EnableConcatAxis(ConcatAxis a);
        void EnableUpSamplingSampleType(SampleType a);
        void EnableLSTMGEMMBias() { key.restrict.val.dedicated.lstm_gemm.bias = 1; }
        void EnableLSTMGEMMHidden() { key.restrict.val.dedicated.lstm_gemm.hidden = 1; }
        void EnableLSTMEltCell() { key.restrict.val.dedicated.lstm_elt.cell = 1; }
        void EnableConcatKernelPerInput() { key.restrict.val.dedicated.concat.kernelPerInput = 1; }
        void DisableTuning() { key.enableTuning = 0; }
        void EnableConcatOneKernel() { key.restrict.val.dedicated.concat.oneKernel = 1; }
        void EnableArgMaxMinAxis(ArgMaxMinAxis a);
        void EnableLookUpTableIndicesFormat(Datatype a);
        void EnableIndexSelectAxis(IndexSelectAxis a);
        bool Support(const ParamsKey& k) const;
        bool TuningSupport() const
        {
            if (key.enableTuning == 1)
                return true;
            return false;
        }
        ParamsKey Merge(const ParamsKey& k) const;

    private:
        Key key;
    };

    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    // EngineInfo
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    struct EngineInfo
    {
        bool bSubGroupSupport = false;
        bool bSubGroupShortSupport = false;
        bool bFP16Support = false;
        bool bFP64Support = false;
        bool bImageSupport = false;
        bool bIMADSupport = false;
        bool bIMMADSupport = false;
        uint64_t maxWorkGroupSize = 0;
        uint64_t maxLocalMemSize = 0;
        uint64_t maxImage2dWidth = 0;
        uint64_t maxImage2dHeight = 0;
        std::string deviceId = "";
        std::string driverVersion = "";
        std::string hostVersion = "";
    };

    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    // Params
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    struct Params
    {
        virtual ~Params() {}

        KernelType GetType() const { return kType; }
        virtual ParamsKey GetParamsKey() const;

    protected:
        Params(KernelType kt, const std::string& id) : kType(kt), layerID(id) {}
        KernelType kType;

    public:
        std::string layerID;
        EngineInfo engineInfo;

        virtual std::string to_string() const;
    };

    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    // base_params
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    struct base_params : public Params
    {
        virtual ~base_params() {}

        ActivationFunction  activationFunc = ActivationFunction::NONE;
        NonLinearParams     activationParams;
        MultiDataTensor     inputs;
        DataTensor          output;
        bool                gradient = false;

        virtual std::string to_string() const;
        virtual ParamsKey GetParamsKey() const;
    protected:

        base_params(KernelType kt) : Params(kt, ""), inputs(1){}
    };

    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    // Auto tuner parameters
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    class KernelRunnerInterface;
    struct TuningParams
    {
        TuningMode mode;
        std::string cacheFilePath;
        std::shared_ptr<KernelRunnerInterface> runner;

        TuningParams() : mode(TuningMode::TUNING_DISABLED), cacheFilePath(""), runner(nullptr) {}
    };

    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    // optional_params
    ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
    struct optional_params
    {
        virtual ~optional_params() {}

        KernelType GetType() const { return kType; }

        std::vector<DataLayout> inputLayouts;
        std::vector<DataLayout> outputLayouts;

        bool meaningfulKernelsNames     = false;    // use layer name instead of internal kernel name
        bool allowStaticInputReordering = true;     // allow kernel to provide a kernel which reorder static data like weights/bias/tables...
        bool allowInputReordering       = false;    // allow kernel to ask graph compiler to reorder the input data before executing its
        bool allowOutputReordering      = false;    // allow kernel to ask graph compiler to reorder the output data before executing the next kernel

        TuningParams tuningParams;

        virtual ParamsKey GetSupportedKey() const;
    protected:
        optional_params(KernelType kt) : kType(kt) {}
        KernelType kType;
    };
}