1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
|
syntax = "proto2";
package caffe2;
// A few notes about the Caffe2's protobuffer convention:
// (1) Most objects are registered by their types, such as operators and nets.
// For these, we have a string-type field "type" for registration purposes.
// (2) We do not use extension because that used to create quite some conflicts
// in Caffe's protobuf design.
// (3) We have not used any proto3 specific features, such as Any or Map. This
// is mainly for backward compability purposes but we may consider using
// those in the future.
// ExternalDataProto stores the pointer to the content of TensorProto
// the content are stored in the raw format as little endian
message ExternalDataProto {
// type of the external storage type, can be the following:
enum SourceType {
// the container defined in torch/csrc/jit/serialization.h is used,
// and record_id is the tag to help the runtime identify the data
// this type of storage is set as DEFAULT and recommended for external
// data storage
INLINE_CONTAINER = 0;
// use external file to store the data, and record_id is the POSIX relative path
// to the file. this (simple) file is only for the data, and the data is stored
// as little endian in the file
SIMPLE_FILE = 1;
}
optional SourceType source_type = 1 [default = INLINE_CONTAINER];
// used together with type
optional string record_id = 2;
// the offset of the starting point, the content may be shared between
// multiple tensors
optional int64 offset = 3;
// the strides of the content
repeated int64 strides = 4;
}
// TensorProto stores serialized Tensor objects.
message TensorProto {
// The dimensions in the tensor.
repeated int64 dims = 1;
// Data type
enum DataType {
UNDEFINED = 0;
// Basic types
FLOAT = 1; // float
INT32 = 2; // int
BYTE = 3; // byte, when deserialized, is going to be restored as uint8
STRING = 4; // string
// Less-commonly used data types
BOOL = 5; // bool
UINT8 = 6; // uint8_t
INT8 = 7; // int8_t
UINT16 = 8; // uint16_t
INT16 = 9; // int16_t
INT64 = 10; // int64_t
FLOAT16 = 12; // at::Half
DOUBLE = 13; // double
}
optional DataType data_type = 2 [default = FLOAT];
// data storage
enum StorageType {
// the content is stored in typed field, for example, if the data_type is
// FLOAT, float_data is used to store the content.
TYPED = 1;
// the content is serialized in field raw_data as little-endian
RAW = 2;
// the pointer to the content is stored in field external_data
// the content is serialized as little-endian
EXTERNAL = 3;
// When StorageType is NO_CONTENT, we use TensorProto to store only type
// and shape information. Reuse TensorProto to store type and shape
// because we can just have one proto, not having another ValueInfoProto
NO_CONTENT = 4;
}
optional StorageType storage_type = 12 [default = TYPED];
// For float
repeated float float_data = 3 [packed = true];
// For int32, uint8, int8, uint16, int16, bool, and float16
// Note about float16: in storage we will basically convert float16 byte-wise
// to unsigned short and then store them in the int32_data field.
repeated int32 int32_data = 4 [packed = true];
// For bytes
optional bytes byte_data = 5;
// For strings
repeated bytes string_data = 6;
// For double
repeated double double_data = 9 [packed = true];
// For int64
repeated int64 int64_data = 10 [packed = true];
// store the raw data, contents are serialized as little-endian
optional bytes raw_data = 13;
// store the pointer to the data
optional ExternalDataProto external_data = 14;
// Optionally, a name for the tensor.
optional string name = 7;
// Optionally, a TensorProto can contain the details about the device that
// it was serialized from. This is useful in cases like snapshotting a whole
// workspace in a multi-GPU environment.
optional DeviceOption device_detail = 8;
// When loading from chunks this is going to indicate where to put data in the
// full array. When not used full data have to be present
message Segment {
required int64 begin = 1;
required int64 end = 2;
}
optional Segment segment = 11;
}
message QTensorProto {
repeated int64 dims = 1;
required int32 precision = 2;
required double scale = 3;
required double bias = 4;
required bool is_signed = 5;
repeated int32 data = 6 [packed = true];
optional string name = 7;
optional TensorProto.DataType data_type = 8 [default = INT32];
}
// TensorProtos stores multiple TensorProto objects in one single proto. This
// is useful for small tensors; For anything big, consider using a DB for
// storage.
message TensorProtos {
repeated TensorProto protos = 1;
}
message TensorShape {
repeated int64 dims = 1;
optional TensorProto.DataType data_type = 2 [default = FLOAT];
repeated int32 unknown_dims = 3;
optional bool unknown_shape = 4 [default = false];
optional string name = 5;
}
message TensorShapes {
repeated TensorShape shapes = 1;
}
// A named argument containing either singular float, integer and string
// values, or repeated float, int and string arrays.
message Argument {
optional string name = 1;
optional float f = 2;
optional int64 i = 3;
optional bytes s = 4;
optional TensorProto t = 10;
optional NetDef n = 8;
repeated float floats = 5;
repeated int64 ints = 6;
repeated bytes strings = 7;
repeated TensorProto tensors = 11;
repeated NetDef nets = 9;
}
// DeviceType that Caffe2 currently supports.
// Note: if you add a device type, make sure you add the corresponding device
// line in the DeviceTypeName() function in caffe2/utils/proto_utils.cc
// and update ATen/core/DeviceType.h
enum DeviceTypeProto {
PROTO_CPU = 0; // In default, we will use CPU.
PROTO_CUDA = 1; // CUDA.
PROTO_MKLDNN = 2; // Reserved for explicit MKLDNN
PROTO_OPENGL = 3; // OpenGL
PROTO_OPENCL = 4; // OpenCL
PROTO_IDEEP = 5; // IDEEP.
PROTO_HIP = 6; // AMD HIP
// Change the following number if you add more devices in the code.
PROTO_COMPILE_TIME_MAX_DEVICE_TYPES = 7;
PROTO_ONLY_FOR_TEST = 20901; // This device type is only for test.
}
// Device-specific options. We do not distinguish DeviceOption protos for
// different DeviceTypes, so currently all devices share the same DeviceOption
// proto. Fields that are specific to a device type is ignored if the type does
// not match.
// Note: if you add fields to the DeviceOption, make sure you add the
// corresponding changes to IsSameDevice() function in utils/proto_utils.{h,cc}.
message DeviceOption {
// [general] Options that need to be carried out before running the execution.
// optional DeviceType device_type = 1 [ default = CPU ];
optional int32 device_type = 1 [ default = 0 ]; // 0 is CPU.
// [general] Used together with device_type to identify the exact device
optional int32 device_id = 2;
// [general] The random seed to start the device random number generator with.
optional uint32 random_seed = 3;
// [general] What node this op should execute on.
// Used for net transformation purposes. Must be empty at execution time.
optional string node_name = 4;
// [CPU and Linux specific] NUMA node id
optional int32 numa_node_id = 5;
// [general] Extra information passed, not used at execution time currently.
repeated string extra_info = 6;
}
// Operator Definition.
message OperatorDef {
repeated string input = 1; // the name of the input blobs
repeated string output = 2; // the name of output top blobs
optional string name = 3; // the operator name. This is optional.
// the operator type. This is needed to create the object from the operator
// registry.
optional string type = 4;
// arg is for the argument defined in operator schema
repeated Argument arg = 5;
// The device option that the operator should run under.
optional DeviceOption device_option = 6;
// Optionally, one can specify an engine when there are multiple
// implementations available simultaneously for one device type.
// If one specifies an engine but that engine does not exist in the compiled
// Caffe2 binary, Caffe2 will fall back to the default engine of that device
// type.
optional string engine = 7;
// Additional 'fake' inputs used for expressing control dependencies
// in the operator graph. This can be used to ensure that an
// operator does not run until another operator is ready, for e.g.
// scheduling control. These are not passed as actual inputs to the
// Operator implementation, and are only used by the Net class for
// scheduling purposes.
repeated string control_input = 8;
// is_gradient_op argument is only used as a hint in shape inference
// and has no runtime significance
optional bool is_gradient_op = 9 [default = false];
// debug information associated with the construction of the operator.
// This is an optional string with no assumed characteristics as
// operators can be constructed in any language.
optional string debug_info = 10;
// the domain of the operator to help runtime distinguish which operator
// library this OperatorDef refers to. For example, both caffe2 and aten
// has `Add` operator, with domain, we can easily decide which operator
// to execute. to support multiple operator libs, we use domain to
// distinguish which operator lib we refer to:
// - "caffe2" means this uses Caffe2 operator library
// - "aten" means this uses ATen operator library
// - "c10" is for the fused library
// - if the domain is missing or empty, we use "caffe2", this is for
// legacy models, new serializer should always export an OperatorDef
// with domain and op_version
optional string domain = 11;
// each operator is has its own version number.
// operator version information
// each time, we change the API or semantics of the operator,
// we bump the version for the operator.
// the runtime system should check the op_version of each OperatorDef
// and decide it should reject or accept the model
optional int64 op_version = 12;
}
// Network definition.
message NetDef {
optional string name = 1; // the network's name
// Operators that the network contains.
// Note: this is not named "operator" because that is a reserved word in C++.
repeated OperatorDef op = 2;
// The type of network that the net should be run with. This routes the
// network instantiation to different execution modes. The default mode,
// "simple", runs the operators in a sequential way as the original Caffe
// implementation does.
optional string type = 3;
// the number of workers, if the operators in the network is to be carried out
// in parallel.
// Note: This is to be deprecated. Using the arg field with "num_workers" as
// key.
optional int32 num_workers = 4 [deprecated=true];
// The device option for the network. If a network has a specific device
// option and one of its operators does not have it set, we will copy over the
// device option to the operator. This allows us to basically avoid putting
// device options at every operator.
optional DeviceOption device_option = 5;
repeated Argument arg = 6;
// Two optional fields to declare external input and output of a net.
// If these two are set, when a net is created, we will sanity check for
// every op whether its input is declared (either as an external input,
// or as an intermediate blob created by one of the ops), and sanity check
// if all blobs in external_output are produced.
//
// In cases of memory optimization, declaring external_input and
// external_output also ensures that storage of these blobs are persistent:
// for any blob in external_input and external_output, after a network run
// finishes, their content are actually the right content. Any intermediate
// blobs' contents may be overwritten.
repeated string external_input = 7;
repeated string external_output = 8;
}
// ExecutionStep is actually a sort-of-hacky way we simulate iteration right
// now.
message ExecutionStep {
// ExecutionStep should either contain a set of substeps, or a set of
// network names to run in this execution step. They should NOT both be set
// at the same time.
optional string name = 1;
// An execution step could be recursive, in which it involves a set of
// substeps.
repeated ExecutionStep substep = 2;
// Alternatively, an execution step could involve one or more networks.
// Note that you cannot have both substeps and networks. Choose one.
// Note that an execution step refers networks by their name. The actual
// network definition of the same name should be included in the network field
// of the plan. The reason is that a network object might hold internal states
// (think of a data layer), so we want to have the same network object that
// multiple steps could ask to run.
repeated string network = 3;
// Number of iterations to run this step. The substeps or the networks
// specified will be run sequentially, and one sequential run is considered
// one iteration. If this is not set, the number of iterations is assumed to
// be 1.
optional int64 num_iter = 4;
// Criteria network specifies a single output (TensorCPU<bool>) of
// size (1), is run on every iteration by the executor, and
// execution terminates when the output[0] is `false`.
optional string criteria_network = 5 [deprecated=true];
// DEPRECATED. Use `run_every_ms`.
optional string report_net = 7;
optional int32 report_interval = 8;
// If provided, execute this step at every time interval (in millisecs)
// while its sibiling execution steps execute in parallel. This step is
// guaranteed to run at least once after all non-interval siblings finished.
optional int64 run_every_ms = 11;
// If false or not set, execute sub-steps serially.
// If true, execute all substeps concurrently, each one in a separte thread.
optional bool concurrent_substeps = 6;
// Name of a scalar boolean tensor.
// ES checks this blob AFTER every substeps/subnets.
// If specified, and the value is true, then ES will skip the rest and return
// immediately.
// This means that the report_net and the first step will always be called.
// Use cases:
// 1) the first substep stops the rest if data condition not met
// 2) the first substep decide which of the rest of the steps should be run.
// 3) external control
//
// ** It is the user's responsibility to not to put this blob in race conditions.
// ** For example when setting this blob in concurrent substeps
optional string should_stop_blob = 9;
// if only_once is true, this step will only be executed once. this ONLY takes
// effect when using should_stop_blob
optional bool only_once = 10;
// Whether to create a child workspace for this step.
// If yes, the workflow and nets are re-created every time this step is run.
optional bool create_workspace = 12;
// How many copies of the children execution steps to run concurrently.
optional int32 num_concurrent_instances = 13;
}
message PlanDef {
// All the networks that are used in this execution. Note that networks should
// be ordered in the way they are executed, i.e. for a layer in a network, all
// its input blobs should already have been initialized by the layers or
// networks defined before it.
optional string name = 1;
// The networks that are going to be used in this plan.
repeated NetDef network = 2;
repeated ExecutionStep execution_step = 3;
}
// Protobuf format for blobs that are not Tensors. We use a key to store the
// type of the blob. For example for a serialized DBProto, the type should
// be "DBReader" and the content should be a serialized DBProto object.
message BlobProto {
optional string name = 1;
optional string type = 2;
optional TensorProto tensor = 3;
optional bytes content = 4;
optional QTensorProto qtensor = 5;
// If blob is not Tensor and is divided into chunks, content_num_chunks
// contains number of chunks, into which blob was divided.
optional int32 content_num_chunks = 6;
optional int32 content_chunk_id = 7;
}
// Protobuf format to serialize DBReader.
message DBReaderProto {
// The name for the DB object in the workspace.
optional string name = 1;
// The source of the DB
optional string source = 2;
// The type of the DB
optional string db_type = 3;
// The current key of the DB if the DB supports seeking.
optional string key = 4;
}
|