caffe2/proto/caffe2.proto


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337

syntax = "proto2";

package caffe2;

// A few notes about the Caffe2's protobuffer convention:
// (1) Most objects are registered by their types, such as operators and nets.
//     For these, we have a string-type field "type" for registration purposes.
// (2) We do not use extension because that used to create quite some conflicts
//     in Caffe's protobuf design.
// (3) We have not used any proto3 specific features, such as Any or Map. This
//     is mainly for backward compability purposes but we may consider using
//     those in the future.

// TensorProto stores serialized Tensor objects.
message TensorProto {
  // The dimensions in the tensor.
  repeated int64 dims = 1;
  enum DataType {
    UNDEFINED = 0;
    FLOAT = 1;  // float
    INT32 = 2;  // int
    BYTE = 3;  // BYTE, when deserialized, is going to be restored as uint8.
    STRING = 4;  // string
    // Less-commonly used data types.
    BOOL = 5;  // bool
    UINT8 = 6;  // uint8_t
    INT8 = 7;  // int8_t
    UINT16 = 8;  // uint16_t
    INT16 = 9;  // int16_t
    INT64 = 10;  // int64_t
    FLOAT16 = 12;  // at::Half
    DOUBLE = 13;  // double
  }
  optional DataType data_type = 2 [default = FLOAT];
  // For float
  repeated float float_data = 3 [packed = true];
  // For int32, uint8, int8, uint16, int16, bool, and float16
  // Note about float16: in storage we will basically convert float16 byte-wise
  // to unsigned short and then store them in the int32_data field.
  repeated int32 int32_data = 4 [packed = true];
  // For bytes
  optional bytes byte_data = 5;
  // For strings
  repeated bytes string_data = 6;
  // For double
  repeated double double_data = 9 [packed = true];
  // For int64
  repeated int64 int64_data = 10 [packed = true];
  // Optionally, a name for the tensor.
  optional string name = 7;

  // Optionally, a TensorProto can contain the details about the device that
  // it was serialized from. This is useful in cases like snapshotting a whole
  // workspace in a multi-GPU environment.
  optional DeviceOption device_detail = 8;
  // When loading from chunks this is going to indicate where to put data in the
  // full array. When not used full data have to be present
  message Segment {
    required int64 begin = 1;
    required int64 end = 2;
  }
  optional Segment segment = 11;
}

message QTensorProto {
  repeated int64 dims = 1;
  required int32 precision = 2;
  required double scale = 3;
  required double bias = 4;
  required bool is_signed = 5;
  repeated int32 data = 6 [packed = true];
  optional string name = 7;
  optional TensorProto.DataType data_type = 8 [default = INT32];
}

// TensorProtos stores multiple TensorProto objects in one single proto. This
// is useful for small tensors; For anything big, consider using a DB for
// storage.
message TensorProtos {
  repeated TensorProto protos = 1;
}

message TensorShape {
  repeated int64 dims = 1;
  optional TensorProto.DataType data_type = 2 [default = FLOAT];
  repeated int32 unknown_dims = 3;
  optional bool unknown_shape = 4 [default = false];
  optional string name = 5;

}

message TensorShapes {
  repeated TensorShape shapes = 1;
}

// A named argument containing either singular float, integer and string
// values, or repeated float, int and string arrays.
message Argument {
  optional string name = 1;
  optional float f = 2;
  optional int64 i = 3;
  optional bytes s = 4;
  optional NetDef n = 8;
  repeated float floats = 5;
  repeated int64 ints = 6;
  repeated bytes strings = 7;
  repeated NetDef nets = 9;
}

// DeviceType that Caffe2 currently supports.
// Note: if you add a device type, make sure you add the corresponding device
// line in the DeviceTypeName() function in caffe2/utils/proto_utils.cc
// and update ATen/core/DeviceType.h
enum DeviceTypeProto {
  PROTO_CPU = 0;                    // In default, we will use CPU.
  PROTO_CUDA = 1;                   // CUDA.
  PROTO_MKLDNN = 2;                 // Reserved for explicit MKLDNN
  PROTO_OPENGL = 3;                 // OpenGL
  PROTO_OPENCL = 4;                 // OpenCL
  PROTO_IDEEP = 5;                  // IDEEP.
  PROTO_HIP = 6;                    // AMD HIP
  // Change the following number if you add more devices in the code.
  PROTO_COMPILE_TIME_MAX_DEVICE_TYPES = 7;
  PROTO_ONLY_FOR_TEST = 20901701;   // This device type is only for test.
}

// Device-specific options. We do not distinguish DeviceOption protos for
// different DeviceTypes, so currently all devices share the same DeviceOption
// proto. Fields that are specific to a device type is ignored if the type does
// not match.
// Note: if you add fields to the DeviceOption, make sure you add the
// corresponding changes to IsSameDevice() function in utils/proto_utils.{h,cc}.
message DeviceOption {
  // [general] Options that need to be carried out before running the execution.
  // optional DeviceType device_type = 1 [ default = CPU ];
  optional int32 device_type = 1 [ default = 0 ]; // 0 is CPU.
  // [CUDA specific] the cuda gpu id.
  optional int32 device_id = 2;
  // [general] The random seed to start the device random number generator with.
  optional uint32 random_seed = 3;
  // [general] What node this op should execute on.
  // Used for net transformation purposes. Must be empty at execution time.
  optional string node_name = 4;
  // [CPU and Linux specific] NUMA node id
  optional int32 numa_node_id = 5;
  // [general] Extra information passed, not used at execution time currently.
  repeated string extra_info = 6;
  // [HIP specific] the hip gpu id.
  optional int32 hip_gpu_id = 7;
}

// Operator Definition.
message OperatorDef {
  repeated string input = 1; // the name of the input blobs
  repeated string output = 2; // the name of output top blobs
  optional string name = 3; // the operator name. This is optional.
  // the operator type. This is needed to create the object from the operator
  // registry.
  optional string type = 4;
  repeated Argument arg = 5;

  // The device option that the operator should run under.
  optional DeviceOption device_option = 6;

  // Optionally, one can specify an engine when there are multiple
  // implementations available simultaneously for one device type.
  // If one specifies an engine but that engine does not exist in the compiled
  // Caffe2 binary, Caffe2 will fall back to the default engine of that device
  // type.
  optional string engine = 7;


  // Additional 'fake' inputs used for expressing control dependencies
  // in the operator graph. This can be used to ensure that an
  // operator does not run until another operator is ready, for e.g.
  // scheduling control. These are not passed as actual inputs to the
  // Operator implementation, and are only used by the Net class for
  // scheduling purposes.
  repeated string control_input = 8;

  // is_gradient_op argument is only used as a hint in shape inference
  // and has no runtime significance
  optional bool is_gradient_op = 9 [default = false];

  // debug information associated with the construction of the operator.
  // This is an optional string with no assumed characteristics as
  // operators can be constructed in any language.
  optional string debug_info = 10;
}

// Network definition.
message NetDef {
  optional string name = 1; // the network's name
  // Operators that the network contains.
  // Note: this is not named "operator" because that is a reserved word in C++.
  repeated OperatorDef op = 2;

  // The type of network that the net should be run with. This routes the
  // network instantiation to different execution modes. The default mode,
  // "simple", runs the operators in a sequential way as the original Caffe
  // implementation does.
  optional string type = 3;

  // the number of workers, if the operators in the network is to be carried out
  // in parallel.
  // Note: This is to be deprecated. Using the arg field with "num_workers" as
  // key.
  optional int32 num_workers = 4 [deprecated=true];

  // The device option for the network. If a network has a specific device
  // option and one of its operators does not have it set, we will copy over the
  // device option to the operator. This allows us to basically avoid putting
  // device options at every operator.
  optional DeviceOption device_option = 5;

  repeated Argument arg = 6;

  // Two optional fields to declare external input and output of a net.
  // If these two are set, when a net is created, we will sanity check for
  // every op whether its input is declared (either as an external input,
  // or as an intermediate blob created by one of the ops), and sanity check
  // if all blobs in external_output are produced.
  //
  // In cases of memory optimization, declaring external_input and
  // external_output also ensures that storage of these blobs are persistent:
  // for any blob in external_input and external_output, after a network run
  // finishes, their content are actually the right content. Any intermediate
  // blobs' contents may be overwritten.
  repeated string external_input = 7;
  repeated string external_output = 8;
}

// ExecutionStep is actually a sort-of-hacky way we simulate iteration right
// now.
message ExecutionStep {
  // ExecutionStep should either contain a set of substeps, or a set of
  // network names to run in this execution step. They should NOT both be set
  // at the same time.
  optional string name = 1;
  // An execution step could be recursive, in which it involves a set of
  // substeps.
  repeated ExecutionStep substep = 2;
  // Alternatively, an execution step could involve one or more networks.
  // Note that you cannot have both substeps and networks. Choose one.
  // Note that an execution step refers networks by their name. The actual
  // network definition of the same name should be included in the network field
  // of the plan. The reason is that a network object might hold internal states
  // (think of a data layer), so we want to have the same network object that
  // multiple steps could ask to run.
  repeated string network = 3;
  // Number of iterations to run this step. The substeps or the networks
  // specified will be run sequentially, and one sequential run is considered
  // one iteration. If this is not set, the number of iterations is assumed to
  // be 1.
  optional int64 num_iter = 4;

  // Criteria network specifies a single output (TensorCPU<bool>) of
  // size (1), is run on every iteration by the executor, and
  // execution terminates when the output[0] is `false`.
  optional string criteria_network = 5 [deprecated=true];

  // DEPRECATED. Use `run_every_ms`.
  optional string report_net = 7;
  optional int32 report_interval = 8;

  // If provided, execute this step at every time interval (in millisecs)
  // while its sibiling execution steps execute in parallel. This step is
  // guaranteed to run at least once after all non-interval siblings finished.
  optional int64 run_every_ms = 11;

  // If false or not set, execute sub-steps serially.
  // If true, execute all substeps concurrently, each one in a separte thread.
  optional bool concurrent_substeps = 6;

  // Name of a scalar boolean tensor.
  // ES checks this blob AFTER every substeps/subnets.
  // If specified, and the value is true, then ES will skip the rest and return
  // immediately.
  // This means that the report_net and the first step will always be called.
  // Use cases:
  // 1) the first substep stops the rest if data condition not met
  // 2) the first substep decide which of the rest of the steps should be run.
  // 3) external control
  //
  // ** It is the user's responsibility to not to put this blob in race conditions.
  // ** For example when setting this blob in concurrent substeps
  optional string should_stop_blob = 9;

  // if only_once is true, this step will only be executed once. this ONLY takes
  // effect when using should_stop_blob
  optional bool only_once = 10;

  // Whether to create a child workspace for this step.
  // If yes, the workflow and nets are re-created every time this step is run.
  optional bool create_workspace = 12;

  // How many copies of the children execution steps to run concurrently.
  optional int32 num_concurrent_instances = 13;
}

message PlanDef {
  // All the networks that are used in this execution. Note that networks should
  // be ordered in the way they are executed, i.e. for a layer in a network, all
  // its input blobs should already have been initialized by the layers or
  // networks defined before it.
  optional string name = 1;
  // The networks that are going to be used in this plan.
  repeated NetDef network = 2;
  repeated ExecutionStep execution_step = 3;
}

// Protobuf format for blobs that are not Tensors. We use a key to store the
// type of the blob. For example for a serialized DBProto, the type should
// be "DBReader" and the content should be a serialized DBProto object.
message BlobProto {
  optional string name = 1;
  optional string type = 2;
  optional TensorProto tensor = 3;
  optional bytes content = 4;
  optional QTensorProto qtensor = 5;
  // If blob is not Tensor and is divided into chunks, content_num_chunks
  // contains number of chunks, into which blob was divided.
  optional int32 content_num_chunks = 6;
  optional int32 content_chunk_id = 7;
}

// Protobuf format to serialize DBReader.
message DBReaderProto {
  // The name for the DB object in the workspace.
  optional string name = 1;
  // The source of the DB
  optional string source = 2;
  // The type of the DB
  optional string db_type = 3;
  // The current key of the DB if the DB supports seeking.
  optional string key = 4;
}