diff options
Diffstat (limited to 'include')
62 files changed, 5555 insertions, 568 deletions
diff --git a/include/NeuralNetworks.h b/include/NeuralNetworks.h index beaf6befc..6414af688 100644 --- a/include/NeuralNetworks.h +++ b/include/NeuralNetworks.h @@ -42,8 +42,6 @@ * - DO NOT CHANGE THE LAYOUT OR SIZE OF STRUCTURES */ -#if __ANDROID_API__ >= __ANDROID_API_O_MR1__ - #include <stddef.h> #include <stdint.h> #include <sys/cdefs.h> @@ -61,27 +59,24 @@ __BEGIN_DECLS * and {@link ANEURALNETWORKS_INT32}. */ typedef enum { - /** The following entries are used to declare scalars. */ - /** A 32 bit floating point scalar value. */ - ANEURALNETWORKS_FLOAT32 = 0, + ANEURALNETWORKS_FLOAT32 = 0, /** A signed 32 bit integer scalar value. */ - ANEURALNETWORKS_INT32 = 1, + ANEURALNETWORKS_INT32 = 1, /** An unsigned 32 bit integer scalar value. */ - ANEURALNETWORKS_UINT32 = 2, - - /** The following entries are used to declare tensors. */ + ANEURALNETWORKS_UINT32 = 2, /** A tensor of 32 bit floating point values. */ - ANEURALNETWORKS_TENSOR_FLOAT32 = 3, + ANEURALNETWORKS_TENSOR_FLOAT32 = 3, /** A tensor of 32 bit integer values. */ - ANEURALNETWORKS_TENSOR_INT32 = 4, - /** A tensor of 8 bit integers that represent real numbers. + ANEURALNETWORKS_TENSOR_INT32 = 4, + /** + * A tensor of 8 bit integers that represent real numbers. * - * Attached to this tensor are two numbers that can be used to convert - * the 8 bit integer to the real value and vice versa. These two numbers are: - * - scale: a 32 bit non-negative floating point value. - * - zeroPoint: an 32 bit integer, in range [0, 255]. + * Attached to this tensor are two numbers that can be used to convert the + * 8 bit integer to the real value and vice versa. These two numbers are: + * - scale: a 32 bit floating point value greater than zero. + * - zeroPoint: a 32 bit integer, in range [0, 255]. * * The formula is: * real_value = (integer_value - zeroPoint) * scale. @@ -95,17 +90,20 @@ typedef enum { * The type of operations that can be added to a model. */ typedef enum { - /** Adds two tensors, element-wise. + /** + * Adds two tensors, element-wise. * - * Takes two input tensors of identical type and compatible dimensions. The output - * is the sum of both input tensors, optionally modified by an activation function. + * Takes two input tensors of identical {@link OperandCode} and compatible + * dimensions. The output is the sum of both input tensors, optionally + * modified by an activation function. * * Two dimensions are compatible when: * 1. they are equal, or * 2. one of them is 1 * - * The size of the output is the maximum size along each dimension of the input operands. - * It starts with the trailing dimensions, and works its way forward. + * The size of the output is the maximum size along each dimension of the + * input operands. It starts with the trailing dimensions, and works its + * way forward. * * Example: * @@ -113,7 +111,7 @@ typedef enum { * input2.dimension = {5, 4, 3, 1} * output.dimension = {5, 4, 3, 2} * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} * @@ -121,95 +119,119 @@ typedef enum { * * Inputs: * * 0: A tensor. - * * 1: A tensor of the same type, and compatible dimensions as input0. - * * 2: An INT32 value, and has to be one of the {@link FuseCode} values. - * Specifies the activation to invoke on the result of each addition. + * * 1: A tensor of the same {@link OperandCode}, and compatible dimensions + * as input0. + * * 2: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the + * {@link FuseCode} values. Specifies the activation to + * invoke on the result. * * Outputs: - * * 0: The sum, a tensor of the same type as input0. + * * 0: The sum, a tensor of the same {@link OperandCode} as input0. */ ANEURALNETWORKS_ADD = 0, - /** Performs a 2-D average pooling operation. + /** + * Performs a 2-D average pooling operation. * - * The output dimensions are functions of the filter dimensions, stride, and padding. + * The output dimensions are functions of the filter dimensions, stride, and + * padding. * * The values in the output tensor are computed as: * * output[batch, row, col, channel] = * sum_{i, j}(input[batch, row + i, col + j, channel]) / sum(1) * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} * - * Supported tensor rank: 4, with "NHWC" (i.e., Num_samples, Height, Width, and Channels) - * data layout. + * Supported tensor rank: 4, with "NHWC" (i.e., Num_samples, Height, Width, + * and Channels) data layout. * * Both explicit padding and implicit padding are supported. * * Inputs (explicit padding): - * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. - * * 1: An INT32 value, specifying the padding on the left, in the ‘width’ dimension. - * * 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension. - * * 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension. - * * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension. - * * 5: An INT32 value, specifying the stride when walking through input - * in the ‘width’ dimension. - * * 6: An INT32 value, specifying the stride when walking through input - * in the ‘height’ dimension. - * * 7: An INT32 value, specifying the filter width. - * * 8: An INT32 value, specifying the filter height. - * * 9: An INT32 value, and has to be one of the {@link FuseCode} values. - * Specifies the activation to invoke on the result of each addition. + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying + * the input. + * * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the left, in the ‘width’ dimension. + * * 2: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the right, in the ‘width’ dimension. + * * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the top, in the ‘height’ dimension. + * * 4: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the bottom, in the ‘height’ dimension. + * * 5: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘width’ dimension. + * * 6: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘height’ dimension. + * * 7: An {@link ANEURALNETWORKS_INT32} scalar, specifying the filter + * width. + * * 8: An {@link ANEURALNETWORKS_INT32} scalar, specifying the filter + * height. + * * 9: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the + * {@link FuseCode} values. Specifies the activation to + * invoke on the result. * * Inputs (implicit padding): - * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. - * * 1: An INT32 value, specifying the implicit padding scheme, has to be one of the + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying + * the input. + * * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit + * padding scheme, has to be one of the * {@link PaddingCode} values. - * * 2: An INT32 value, specifying the stride when walking through input - * in the ‘width’ dimension. - * * 3: An INT32 value, specifying the stride when walking through input - * in the ‘height’ dimension. - * * 4: An INT32 value, specifying the filter width. - * * 5: An INT32 value, specifying the filter height. - * * 6: An INT32 value, and has to be one of the {@link FuseCode} values. - * Specifies the activation to invoke on the result of each addition. + * * 2: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘width’ dimension. + * * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘height’ dimension. + * * 4: An {@link ANEURALNETWORKS_INT32} scalar, specifying the filter + * width. + * * 5: An {@link ANEURALNETWORKS_INT32} scalar, specifying the filter + * height. + * * 6: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the + * {@link FuseCode} values. Specifies the activation to + * invoke on the result. * * Outputs: - * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth]. + * * 0: The output 4-D tensor, of shape + [batches, out_height, out_width, depth]. */ ANEURALNETWORKS_AVERAGE_POOL_2D = 1, - /** Concatenates the input tensors along the given dimension. + /** + * Concatenates the input tensors along the given dimension. * - * The input tensors must have identical type and the same dimensions except the - * dimension along the concatenation axis. + * The input tensors must have identical {@link OperandCode} and the same + * dimensions except the dimension along the concatenation axis. * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} * * Supported tensor rank: up to 4 * * Inputs: - * * 0 ~ n-1: The list of n input tensors, of shape [D0, D1, ..., Daxis(i), ..., Dm]. - * For inputs of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, all - * input tensors must have the same scale and zeroPoint. - * * n: An INT32 value, specifying the concatenation axis. + * * 0 ~ n-1: The list of n input tensors, of shape + * [D0, D1, ..., Daxis(i), ..., Dm]. For inputs of + * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, all input tensors + * must have the same scale and zeroPoint. + * * n: An {@link ANEURALNETWORKS_INT32} scalar, specifying the + * concatenation axis. * * Outputs: - * * 0: The output, a tensor of the same type as the input tensors. - * The output shape is [D0, D1, ..., sum(Daxis(i)), ..., Dm]. + * * 0: The output, a tensor of the same {@link OperandCode} as the input + * tensors. The output shape is [D0, D1, ..., sum(Daxis(i)), ..., Dm]. */ ANEURALNETWORKS_CONCATENATION = 2, - /** Performs an 2-D convolution operation. + /** + * Performs an 2-D convolution operation. * - * The CONV_2D op sweeps a 2-D filter that can mix channels together over a batch of - * images, applying the filter to each window of each image of the appropriate size. + * The CONV_2D op sweeps a 2-D filter that can mix channels together over a + * batch of images, applying the filter to each window of each image of the + * appropriate size. * - * The output dimensions are functions of the filter dimensions, stride, and padding. + * The output dimensions are functions of the filter dimensions, stride, and + * padding. * * The values in the output tensor are computed as: * @@ -220,7 +242,7 @@ typedef enum { * bias[channel] * ) * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} * @@ -229,62 +251,77 @@ typedef enum { * Both explicit padding and implicit padding are supported. * * Inputs (explicit padding): - * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input. - * * 1: A 4-D tensor, of shape [depth_out, filter_height, filter_width, depth_in], - * specifying the filter. + * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], + * specifying the input. + * * 1: A 4-D tensor, of shape + * [depth_out, filter_height, filter_width, depth_in], specifying the + * filter. * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. - * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the bias should - * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}. - * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias - * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and - * bias_scale == input_scale * filter_scale. - * * 3: An INT32 value, specifying the padding on the left, in the ‘width’ dimension. - * * 4: An INT32 value, specifying the padding on the right,in the ‘width’ dimension. - * * 5: An INT32 value, specifying the padding on the top, in the ‘height’ dimension. - * * 6: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension. - * * 7: An INT32 value, specifying the stride when walking through input - * in the ‘width’ dimension. - * * 8: An INT32 value, specifying the stride when walking through input - * in the ‘height’ dimension. - * * 9: An INT32 value, and has to be one of the {@link FuseCode} values. - * Specifies the activation to invoke on the result of each addition. + * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, the bias + * should also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}. For input + * tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the bias + * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of + * 0 and bias_scale == input_scale * filter_scale. + * * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the left, in the ‘width’ dimension. + * * 4: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the right, in the ‘width’ dimension. + * * 5: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the top, in the ‘height’ dimension. + * * 6: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the bottom, in the ‘height’ dimension. + * * 7: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘width’ dimension. + * * 8: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘height’ dimension. + * * 9: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the + * {@link FuseCode} values. Specifies the activation to + * invoke on the result. * * Inputs (implicit padding): - * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input. - * * 1: A 4-D tensor, of shape [depth_out, filter_height, filter_width, depth_in], - * specifying the filter. - * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. - * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the bias should - * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}. - * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias - * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and + * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], + * specifying the input. + * * 1: A 4-D tensor, of shape + * [depth_out, filter_height, filter_width, depth_in], specifying the + * filter. + * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input + * tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, the bias should + * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}. For input tensor + * of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the bias should be + * of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and * bias_scale == input_scale * filter_scale. - * * 3: An INT32 value, specifying the implicit padding scheme, has to be one of the + * * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit + * padding scheme, has to be one of the * {@link PaddingCode} values. - * * 4: An INT32 value, specifying the stride when walking through input - * in the ‘width’ dimension. - * * 5: An INT32 value, specifying the stride when walking through input - * in the ‘height’ dimension. - * * 6: An INT32 value, and has to be one of the {@link FuseCode} values. - * Specifies the activation to invoke on the result of each addition. + * * 4: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘width’ dimension. + * * 5: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘height’ dimension. + * * 6: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the + * {@link FuseCode} values. Specifies the activation to + * invoke on the result. * * Outputs: - * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth_out]. - * For output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the following - * condition must be satisfied: output_scale > input_scale * filter_scale. + * * 0: The output 4-D tensor, of shape + * [batches, out_height, out_width, depth_out]. For output tensor of + * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the following condition + * must be satisfied: output_scale > input_scale * filter_scale. */ ANEURALNETWORKS_CONV_2D = 3, - /** Performs a depthwise 2-D convolution operation. + /** + * Performs a depthwise 2-D convolution operation. * - * Given an input tensor of shape [batches, height, width, depth_in] and a filter - * tensor of shape [1, filter_height, filter_width, depth_out] containing - * depth_out convolutional filters of depth 1, DEPTHWISE_CONV applies a different - * filter to each input channel (expanding from 1 channel to channel_multiplier channels - * for each), then concatenates the results together. + * Given an input tensor of shape [batches, height, width, depth_in] and a + * filter tensor of shape [1, filter_height, filter_width, depth_out] + * containing depth_out convolutional filters of depth 1, DEPTHWISE_CONV + * applies a different filter to each input channel (expanding from 1 + * channel to channel_multiplier channels for each), then concatenates the + * results together. * * The output has depth_out = depth_in * depth_multiplier channels. - * The output dimensions are functions of the filter dimensions, stride, and padding. + * The output dimensions are functions of the filter dimensions, stride, and + * padding. * * The values in the output tensor are computed as: * @@ -294,7 +331,7 @@ typedef enum { * filter[1, di, dj, k * channel_multiplier + q] * ) * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} * @@ -303,105 +340,123 @@ typedef enum { * Both explicit padding and implicit padding are supported. * * Inputs (explicit padding): - * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input. + * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], + * specifying the input. * * 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out], * specifying the filter. - * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. - * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the bias should - * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}. - * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias - * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and + * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input + * tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, the bias should + * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}. For input tensor + * of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the bias should be + * of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and * bias_scale == input_scale * filter_scale. - * * 3: An INT32 value, specifying the padding on the left, in the ‘width’ dimension. - * * 4: An INT32 value, specifying the padding on the right,in the ‘width’ dimension. - * * 5: An INT32 value, specifying the padding on the top, in the ‘height’ dimension. - * * 6: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension. - * * 7: An INT32 value, specifying the stride when walking through input - * in the ‘width’ dimension. - * * 8: An INT32 value, specifying the stride when walking through input - * in the ‘height’ dimension. - * * 9: An INT32 value, specifying the depthwise multiplier. - * * 10: An INT32 value, and has to be one of the {@link FuseCode} values. - * Specifies the activation to invoke on the result of each addition. + * * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the left, in the ‘width’ dimension. + * * 4: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the right, in the ‘width’ dimension. + * * 5: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the top, in the ‘height’ dimension. + * * 6: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the bottom, in the ‘height’ dimension. + * * 7: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘width’ dimension. + * * 8: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘height’ dimension. + * * 9: An {@link ANEURALNETWORKS_INT32} scalar, specifying the depthwise + * multiplier. + * * 10: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the + * {@link FuseCode} values. Specifies the activation to + * invoke on the result. * - * Inputs (explicit padding): - * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input. + * Inputs (implicit padding): + * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], + * specifying the input. * * 1: A 4-D tensor, of shape [1, filter_height, filter_width, depth_out], * specifying the filter. - * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. - * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the bias should - * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}. - * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias - * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and + * * 2: A 1-D tensor, of shape [depth_out], specifying the bias. For input + * tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, the bias should + * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}. For input tensor + * of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the bias should be + * of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and * bias_scale == input_scale * filter_scale. - * * 3: An INT32 value, specifying the implicit padding scheme, has to be one of the + * * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit + * padding scheme, has to be one of the * {@link PaddingCode} values. - * * 4: An INT32 value, specifying the stride when walking through input - * in the ‘width’ dimension. - * * 5: An INT32 value, specifying the stride when walking through input - * in the ‘height’ dimension. - * * 6: An INT32 value, specifying the depthwise multiplier. - * * 7: An INT32 value, and has to be one of the {@link FuseCode} values. - * Specifies the activation to invoke on the result of each addition. + * * 4: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘width’ dimension. + * * 5: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘height’ dimension. + * * 6: An {@link ANEURALNETWORKS_INT32} scalar, specifying the depthwise + * multiplier. + * * 7: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the + * {@link FuseCode} values. Specifies the activation to + * invoke on the result. * * Outputs: - * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth_out]. - * For output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the following - * condition must be satisfied: output_scale > input_scale * filter_scale. + * * 0: The output 4-D tensor, of shape + * [batches, out_height, out_width, depth_out]. For output tensor of + * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the following condition + * must be satisfied: output_scale > input_scale * filter_scale. */ ANEURALNETWORKS_DEPTHWISE_CONV_2D = 4, - /** Rearranges data from depth into blocks of spatial data. + /** + * Rearranges data from depth into blocks of spatial data. * - * More specifically, this op outputs a copy of the input tensor where values from - * the depth dimension are moved in spatial blocks to the height and width dimensions. - * The value block_size indicates the input block size and how the data is moved. + * More specifically, this op outputs a copy of the input tensor where + * values from the depth dimension are moved in spatial blocks to the height + * and width dimensions. The value block_size indicates the input block size + * and how the data is moved. * - * Chunks of data of size block_size * block_size from depth are rearranged into - * non-overlapping blocks of size block_size x block_size. + * Chunks of data of size block_size * block_size from depth are rearranged + * into non-overlapping blocks of size block_size x block_size. * - * The width of the output tensor is input_depth * block_size, whereas the height is - * input_height * block_size. - * The depth of the input tensor must be divisible by block_size * block_size + * The width of the output tensor is input_depth * block_size, whereas the + * height is input_height * block_size. The depth of the input tensor must + * be divisible by block_size * block_size * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} * * Supported tensor rank: 4, with "NHWC" data layout. * * Inputs: - * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input. - * * 1: An INT32 value, specifying the block_size. block_size must be >=1 and - * block_size * block_size must be a divisor of the input depth. + * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], + * specifying the input. + * * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the block_size. + * block_size must be >=1 and block_size * block_size must be a divisor + * of the input depth. * * Outputs: - * * 0: The output 4-D tensor, of shape [batch, height*block_size, width*block_size, - * depth/(block_size*block_size)]. + * * 0: The output 4-D tensor, of shape [batch, height*block_size, + * width*block_size, depth/(block_size*block_size)]. */ ANEURALNETWORKS_DEPTH_TO_SPACE = 5, - /** Dequantizes the input tensor. + /** + * Dequantizes the input tensor. * * The formula is: * * output = (input - zeroPoint) * scale. * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} * * Supported tensor rank: up to 4 * * Inputs: - * * 0: A tensor of type {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}. + * * 0: A tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}. * * Outputs: - * * 0: The output tensor of same shape as input0, but with type + * * 0: The output tensor of same shape as input0, but with * {@link ANEURALNETWORKS_TENSOR_FLOAT32}. */ ANEURALNETWORKS_DEQUANTIZE = 6, - /** Looks up sub-tensors in the input tensor. + /** + * Looks up sub-tensors in the input tensor. * * This operator takes for input a tensor of values (Values) and * a one-dimensional tensor of selection indices (Lookups). @@ -413,15 +468,15 @@ typedef enum { * to create the output tensor. * * For example, if Values has shape of [40, 200, 300] and - * Lookups has shape of [3], we would expect all three values - * found in Lookups to be between 0 and 39. The resulting tensor will + * Lookups has shape of [3], all three values found in Lookups are + * expected to be between 0 and 39. The resulting tensor must * have shape of [3, 200, 300]. * - * If a value in Lookups is out of bounds, the operation will fail - * and an error will be reported. + * If a value in Lookups is out of bounds, the operation must fail + * and an error must be reported. * * Inputs: - * * 0: Lookups. A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_INT32} type. + * * 0: Lookups. A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_INT32}. * The values are indices into the first dimension of Values. * * 1: Values. An n-D tensor, where n >= 2, from which sub-tensors are * extracted. @@ -433,9 +488,10 @@ typedef enum { */ ANEURALNETWORKS_EMBEDDING_LOOKUP = 7, - /** Computes element-wise floor() on the input tensor. + /** + * Computes element-wise floor() on the input tensor. * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * Supported tensor rank: up to 4 @@ -444,47 +500,56 @@ typedef enum { * * 0: A tensor. * * Outputs: - * * 0: The output tensor, of the same type and dimensions as the input tensor. + * * 0: The output tensor, of the same {@link OperandCode} and dimensions as + * the input tensor. */ ANEURALNETWORKS_FLOOR = 8, - /** Denotes a fully (densely) connected layer, which connects all elements in the input - * tensor with each element in the output tensor. + /** + * Denotes a fully (densely) connected layer, which connects all elements + * in the input tensor with each element in the output tensor. * * This layer implements the operation: * * outputs = activation(inputs * weights’ + bias) * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} * * Supported tensor rank: up to 4. * * Inputs: - * * 0: A tensor, specifying the input. If rank is greater than 2, then it gets flattened to - * a 2-D Tensor. The 2-D Tensor is handled as if dimensions corresponded to shape - * [batch_size, input_size], where “batch_size” corresponds to the batching dimension, - * and “input_size” is the size of the input. - * * 1: A 2-D tensor, specifying the weights, of shape [num_units, input_size], where - * "num_units" corresponds to the number of output nodes. - * * 2: A 1-D tensor, of shape [num_units], specifying the bias. - * For input tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} type, the bias should - * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}. - * For input tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the bias - * should be of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and + * * 0: A tensor of at least rank 2, specifying the input. If rank is + * greater than 2, then it gets flattened to a 2-D Tensor. The + * (flattened) 2-D Tensor is reshaped (if necessary) to + * [batch_size, input_size], where "input_size" corresponds to the + * number of inputs to the layer, matching the second dimension of + * weights, and "batch_size" is calculated by dividing the number of + * elements by "input_size". + * * 1: A 2-D tensor, specifying the weights, of shape + * [num_units, input_size], where "num_units" corresponds to the number + * of output nodes. + * * 2: A 1-D tensor, of shape [num_units], specifying the bias. For input + * tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, the bias should + * also be of {@link ANEURALNETWORKS_TENSOR_FLOAT32}. For input tensor + * of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the bias should be + * of {@link ANEURALNETWORKS_TENSOR_INT32}, with zeroPoint of 0 and * bias_scale == input_scale * filter_scale. - * * 3: An INT32 value, and has to be one of the {@link FuseCode} values. - * Specifies the activation to invoke on the result of each addition. + * * 3: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the + * {@link FuseCode} values. Specifies the activation to + * invoke on the result. * * Outputs: - * * 0: The output tensor, of shape [batch_size, num_units]. - * For output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the following - * condition must be satisfied: output_scale > input_scale * filter_scale. + * * 0: The output tensor, of shape [batch_size, num_units]. For output + * tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, the following + * condition must be satisfied: + * output_scale > input_scale * filter_scale. */ ANEURALNETWORKS_FULLY_CONNECTED = 9, - /** Looks up sub-tensors in the input tensor using a key-value map. + /** + * Looks up sub-tensors in the input tensor using a key-value map. * * This operator takes for input a tensor of values (Values), * a one-dimensional tensor of selection values (Lookups) and @@ -498,37 +563,41 @@ typedef enum { * same index as the Maps entry that matches the value in Lookups. * * For a hit, the corresponding sub-tensor of Values is included - * in the Output tensor. For a miss, the corresponding sub-tensor in - * Output will have zero values. + * in the Output tensor. For a miss, the corresponding sub-tensor in + * Output must have zero values. * * For example, if Values has shape of [40, 200, 300], * Keys should have a shape of [40]. If Lookups tensor has shape - * of [3], we're concatenating three slices, so the resulting tensor - * will have the shape of [3, 200, 300]. If the first entry in - * Lookups has the value 123456, we'll look for that value in Keys tensor. - * If the sixth entry of Keys contains 123456, we'll select the sixth - * slice of Values. If no entry in Keys has 123456, a slice of zeroes - * will be concatenated. + * of [3], three slices are being concatenated, so the resulting tensor + * must have the shape of [3, 200, 300]. If the first entry in Lookups + * has the value 123456, that value must be located in Keys tensor. + * If the sixth entry of Keys contains 123456, the sixth slice of Values + * must be selected. If no entry in Keys has 123456, a slice of zeroes + * must be concatenated. * * Inputs: - * * 0: Lookups. A 1-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor with shape [ k ]. - * * 1: Keys. A 1-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor with shape [ n ]; - * Keys and Values pair represent a map, i.e., the ith element - * in Keys (Keys[i]) is the key to select the ith sub-tensor - * in Values (Values[i]), where 0 <= i <= n-1. - * Keys tensor *MUST* be sorted in ascending order. - * * 2: Values. A tensor with shape of [ n, … ]; i.e., the first dimension must be n. + * * 0: Lookups. A 1-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor with + * shape [ k ]. + * * 1: Keys. A 1-D {@link ANEURALNETWORKS_TENSOR_INT32} tensor with shape + * [ n ]; Keys and Values pair represent a map, i.e., the ith element + * in Keys (Keys[i]) is the key to select the ith sub-tensor in Values + * (Values[i]), where 0 <= i <= n-1. Keys tensor *MUST* be sorted in + * ascending order. + * * 2: Values. A tensor with shape of [ n, … ]; i.e., the first dimension + * must be n. * * Outputs: * * 0: Output. A tensor with shape [ k …]. * * 1: Hits. A boolean tensor with shape [ k ] indicates whether the lookup * hits (True) or not (False). - * Stored as {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} with offset 0 and scale 1.0f. + * Stored as {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} with offset 0 + * and scale 1.0f. * A non-zero byte represents True, a hit. A zero indicates otherwise. */ ANEURALNETWORKS_HASHTABLE_LOOKUP = 10, - /** Applies L2 normalization along the depth dimension. + /** + * Applies L2 normalization along the depth dimension. * * The values in the output tensor are computed as: * @@ -536,31 +605,37 @@ typedef enum { * input[batch, row, col, channel] / * sqrt(sum_{c} pow(input[batch, row, col, c], 2)) * - * For input tensor with more dimensions, independently normalizes each 1-D slice along dimension dim. + * For input tensor with more dimensions, independently normalizes each 1-D + * slice along dimension dim. * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * - * Supported tensor rank: 4, with "NHWC" data layout (i.e., Num_samples, Height, Width, and Channels). + * Supported tensor rank: 4, with "NHWC" data layout (i.e., Num_samples, + * Height, Width, and Channels). * * Inputs: * * 0: A 4-D tensor, of shape [batches, height, width, depth]. * * Outputs: - * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth]. + * * 0: The output 4-D tensor, of the same shape as input + * [batches, height, width, depth]. */ ANEURALNETWORKS_L2_NORMALIZATION = 11, - /** Performs an 2-D L2 pooling operation. + /** + * Performs an 2-D L2 pooling operation. * - * The output dimensions are functions of the filter dimensions, stride, and padding. + * The output dimensions are functions of the filter dimensions, stride, and + * padding. * * The values in the output tensor are computed as: * * output[batch, row, col, channel] = - * sqrt(sum_{i, j} pow(input[batch, row + i, col + j, channel], 2) / sum(1)) + * sqrt(sum_{i, j} pow(input[batch, row + i, col + j, channel], 2) / + * sum(1)) * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * Supported tensor rank: 4, with "NHWC" data layout. @@ -568,74 +643,96 @@ typedef enum { * Both explicit padding and implicit padding are supported. * * Inputs (explicit padding): - * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. - * * 1: An INT32 value, specifying the padding on the left, in the ‘width’ dimension. - * * 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension. - * * 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension. - * * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension. - * * 5: An INT32 value, specifying the stride when walking through input - * in the ‘width’ dimension. - * * 6: An INT32 value, specifying the stride when walking through input - * in the ‘height’ dimension. - * * 7: An INT32 value, specifying the filter width. - * * 8: An INT32 value, specifying the filter height. - * * 9: An INT32 value, and has to be one of the {@link FuseCode} values. - * Specifies the activation to invoke on the result of each addition. + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying + * the input. + * * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the left, in the ‘width’ dimension. + * * 2: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the right, in the ‘width’ dimension. + * * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the top, in the ‘height’ dimension. + * * 4: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the bottom, in the ‘height’ dimension. + * * 5: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘width’ dimension. + * * 6: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘height’ dimension. + * * 7: An {@link ANEURALNETWORKS_INT32} scalar, specifying the filter + * width. + * * 8: An {@link ANEURALNETWORKS_INT32} scalar, specifying the filter + * height. + * * 9: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the + * {@link FuseCode} values. Specifies the activation to + * invoke on the result. * * Inputs (implicit padding): - * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. - * * 1: An INT32 value, specifying the implicit padding scheme, has to be one of the + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying + * the input. + * * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit + * padding scheme, has to be one of the * {@link PaddingCode} values. - * * 2: An INT32 value, specifying the stride when walking through input - * in the ‘width’ dimension. - * * 3: An INT32 value, specifying the stride when walking through input - * in the ‘height’ dimension. - * * 4: An INT32 value, specifying the filter width. - * * 5: An INT32 value, specifying the filter height. - * * 6: An INT32 value, and has to be one of the {@link FuseCode} values. - * Specifies the activation to invoke on the result of each addition. + * * 2: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘width’ dimension. + * * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘height’ dimension. + * * 4: An {@link ANEURALNETWORKS_INT32} scalar, specifying the filter + * width. + * * 5: An {@link ANEURALNETWORKS_INT32} scalar, specifying the filter + * height. + * * 6: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the + * {@link FuseCode} values. Specifies the activation to + * invoke on the result. * * Outputs: - * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth]. + * * 0: The output 4-D tensor, of shape + * [batches, out_height, out_width, depth]. */ ANEURALNETWORKS_L2_POOL_2D = 12, - /** Applies Local Response Normalization along the depth dimension. + /** + * Applies Local Response Normalization along the depth dimension. * - * The 4-D input tensor is treated as a 3-D array of 1-D vectors (along the last - * dimension), and each vector is normalized independently. Within a given vector, - * each component is divided by the weighted, squared sum of inputs within depth_radius. + * The 4-D input tensor is treated as a 3-D array of 1-D vectors (along the + * last dimension), and each vector is normalized independently. Within a + * given vector, each component is divided by the weighted, squared sum of + * inputs within depth_radius. * * The output is calculated using this formula: * - * sqr_sum[a, b, c, d] = - * sum(pow(input[a, b, c, d - depth_radius : d + depth_radius + 1], 2) + * sqr_sum[a, b, c, d] = sum( + * pow(input[a, b, c, d - depth_radius : d + depth_radius + 1], 2)) * output = input / pow((bias + alpha * sqr_sum), beta) * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * Supported tensor rank: 4, with "NHWC" data layout. * * Inputs: - * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. - * * 1: An INT32 value, specifying the radius of the normalization window. - * * 2: A FLOAT32 value, specifying the bias, must not be zero. - * * 3: A FLOAT32 value, specifying the scale factor, alpha. - * * 4: A FLOAT32 value, specifying the exponent, beta. + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying + * the input. + * * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the radius of + * the normalization window. + * * 2: An {@link ANEURALNETWORKS_FLOAT32} scalar, specifying the bias, must + * not be zero. + * * 3: An {@link ANEURALNETWORKS_FLOAT32} scalar, specifying the scale + * factor, alpha. + * * 4: An {@link ANEURALNETWORKS_FLOAT32} scalar, specifying the exponent, + * beta. * * Outputs: * * 0: The output tensor of same shape as input0. */ ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION = 13, - /** Computes sigmoid activation on the input tensor element-wise. + /** + * Computes sigmoid activation on the input tensor element-wise. * * The output is calculated using this formula: * * output = 1 / (1 + exp(-input)) * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} * @@ -646,7 +743,7 @@ typedef enum { * * Outputs: * * 0: The output tensor of same shape as input0. - * For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, + * For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, * the scale must be 1.f / 256 and the zeroPoint must be 0. */ ANEURALNETWORKS_LOGISTIC = 14, @@ -662,18 +759,19 @@ typedef enum { * * * 1: Input. Dim.size >= 1, no restriction on DataType. * * 2: Weight. Optional. Dim.size == 1, DataType: Float. - * If not set, each input element is considered to have the same weight of - * 1.0. + * If not set, each input element is considered to have the same weight + * of 1.0. * Tensor[1].Dim[0] == Tensor[2].Dim[0] * * 3: Type: * Sparse: Value LSHProjectionType_SPARSE(=1). * Computed bit vector is considered to be sparse. - * Each output element is an int32 made up of multiple bits computed from - * hash functions. + * Each output element is an int32 made up of multiple bits + * computed from hash functions. * * Dense: Value LSHProjectionType_DENSE(=2). - * Computed bit vector is considered to be dense. Each output element - * represents a bit and can take the value of either 0 or 1. + * Computed bit vector is considered to be dense. Each output + * element represents a bit and can take the value of either + * 0 or 1. * * Outputs: * * 0: If the projection type is sparse: @@ -686,117 +784,211 @@ typedef enum { ANEURALNETWORKS_LSH_PROJECTION = 15, /** - * Long short-term memory unit (LSTM) recurrent network layer. - * - * The default non-peephole implementation is based on: - * http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf + * Performs a single time step in a Long Short-Term Memory (LSTM) layer + * + * The LSTM operation is described by the following equations. + * + * \f{eqnarray*}{ + * i_t =& \sigma(W_{xi}x_t+W_{hi}h_{t-1}+W_{ci}C_{t-1}+b_i) & \\ + * f_t =& \sigma(W_{xf}x_t+W_{hf}h_{t-1}+W_{cf}C_{t-1}+b_f) & \\ + * C_t =& clip(f_t \odot C_{t-1} + i_t \odot + * g(W_{xc}x_t+W_{hc}h_{t-1}+b_c),\ t_{cell}) & \\ + * o_t =& \sigma(W_{xo}x_t+W_{ho}h_{t-1}+W_{co}C_t+b_o) & \\ + * & & \\ + * & clip(W_{proj}(o_t \odot g(C_t))+b_{proj},\ t_{proj}) + * & if\ there\ is\ a\ projection; \\ + * h_t =& & \\ + * & o_t \odot g(C_t) & otherwise. \\ + * \f} + * Where: + * * \f$x_t\f$ is the input, + * * \f$i_t\f$ is the input gate, + * * \f$f_t\f$ is the forget gate, + * * \f$C_t\f$ is the cell state, + * * \f$o_t\f$ is the output, + * * \f$h_t\f$ is the output state, + * * \f$\sigma\f$ is the logistic sigmoid function, + * * \f$g\f$ is the cell input and cell output activation function, usually + * \f$tahn\f$, + * * \f$W_{xi}\f$ is the input-to-input weight matrix, + * * \f$W_{hi}\f$ is the recurrent to input weight matrix, + * * \f$W_{ci}\f$ is the cell-to-input weight matrix, + * * \f$b_i\f$ is the input gate bias, + * * \f$W_{xf}\f$ is the input-to-forget weight matrix, + * * \f$W_{hf}\f$ is the recurrent-to-forget weight matrix, + * * \f$W_{cf}\f$ is the cell-to-forget weight matrix, + * * \f$b_f\f$ is the forget gate bias, + * * \f$W_{xc}\f$ is the input-to-cell weight matrix, + * * \f$W_{hc}\f$ is the recurrent-to-cell weight matrix, + * * \f$b_c\f$ is the cell bias, + * * \f$W_{xo}\f$ is the input-to-output weight matrix, + * * \f$W_{ho}\f$ is the recurrent-to-output weight matrix, + * * \f$W_{co}\f$ is the cell-to-output weight matrix, + * * \f$b_o\f$ is the output gate bias, + * * \f$W_{proj}\f$ is the projection weight matrix, + * * \f$b_{proj}\f$ is the projection bias, + * * \f$t_{cell}\f$ is the threshold for clipping the cell state, and + * * \f$t_{proj}\f$ is the threshold for clipping the projected output. + * * \f$\odot\f$ is the + * <a href="https://en.wikipedia.org/wiki/Hadamard_product_(matrices)"> + * Hadamard product</a> that takes two matrices and produces another + * matrix, each element of which is the product of the corresponding + * elements of the input matrices. + * + * The operation has the following independently optional inputs: + * * The input-to-input weights (\f$W_{xi}\f$), recurrent-to-input weights + * (\f$W_{hi}\f$), cell-to-input (\f$W_{ci}\f$) weights, and input gate + * bias (\f$b_i\f$) either all have values, or none of them have values + * (i.e., all set to null). If they have no values, coupling of input and + * forget gates (CIFG) is used, in which case the input gate (\f$i_t\f$) + * is calculated using the following equation instead. + * \f{eqnarray*}{ + * i_t = 1 - f_t + * \f} + * * The cell-to-forget weights (\f$W_{cf}\f$) and cell-to-output weights + * (\f$W_{co}\f$) either both have values or neither of them have values. + * If they have values, the peephole optimization is used. Additionally, + * if CIFG is not used, cell-to-input weights (\f$W_{ci}\f$) is also + * required to have values for peephole optimization. + * * The projection weights (\f$W_{proj}\f$) is required only for the + * recurrent projection layer, and should otherwise have no value. + * * The projection bias (\f$b_{proj}\f$) may (but not required to) have a + * value if the recurrent projection layer exists, and should otherwise + * have no value. + * + * References: + * + * The default non-peephole non-CIFG implementation is based on: + * http://www.bioinf.jku.at/publications/older/2604.pdf * S. Hochreiter and J. Schmidhuber. "Long Short-Term Memory". Neural * Computation, 9(8):1735-1780, 1997. * - * The peephole implementation is based on: + * The peephole implementation and projection layer is based on: * https://research.google.com/pubs/archive/43905.pdf * Hasim Sak, Andrew Senior, and Francoise Beaufays. "Long short-term memory - * recurrent neural network architectures for large scale acoustic modeling." - * INTERSPEECH, 2014. + * recurrent neural network architectures for large scale acoustic + * modeling." INTERSPEECH, 2014. + * (However, the concept of peephole optimization was introduced in work + * prior to this paper.) * * The coupling of input and forget gate (CIFG) is based on: * http://arxiv.org/pdf/1503.04069.pdf * Greff et al. "LSTM: A Search Space Odyssey" * - * The class has the following independently optional inputs: - * * If input gate (if CIFG): “input_to_forget_weights”, - * “recurrent_to_input_weights”, “cell_to_input_weights”, “input_gate_bias”. - * * If no peephole connections: “cell_to_input_weights”, - * “cell_to_forget_weights”, “cell_to_output_weights”. - * * If no projection layer: “projection_weights” and “projection_bias”. - * * If no projection bias: “projection_bias”. - * - * Supported tensor types (type T): + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * Inputs: - * * 0: Input. - * A 2-D tensor of type T, of shape [batch_size, input_size], where - * “batch_size” corresponds to the batching dimension, and “input_size” - * is the size of the input. - * * 1: input_to_input_weights. - * A 2-D tensor of type T, of shape [num_units, input_size], where - * “num_units” corresponds to the number of cell units. - * * 2: input_to_forget_weights. - * A 2-D tensor of type T, of shape [num_units, input_size]. - * * 3: input_to_cell_weights. - * A 2-D tensor of type T, of shape [num_units, input_size]. - * * 4: input_to_output_weights. - * A 2-D tensor of type T, of shape [num_units, input_size]. - * * 5: recurrent_to_input_weights. - * A 2-D tensor of type T, of shape [num_units, output_size], where - * “output_size” corresponds to either the number of cell units (i.e., - * “num_units”), or the second dimension of the “projection_weights”, if - * defined. - * * 6: recurrent_to_forget_weights. - * A 2-D tensor of type T, of shape [num_units, output_size]. - * * 7: recurrent_to_cell_weights. - * A 2-D tensor of type T, of shape [num_units, output_size]. - * * 8: recurrent_to_output_weights. - * A 2-D tensor of type T, of shape [num_units, output_size]. - * * 9: cell_to_input_weights. - * A 1-D tensor of type T, of shape [num_units]. - * * 10:cell_to_forget_weights. - * A 1-D tensor of type T, of shape [num_units]. - * * 11:cell_to_output_weights. - * A 1-D tensor of type T, of shape [num_units]. - * * 12:input_gate_bias. - * A 1-D tensor of type T, of shape [num_units]. - * * 13:forget_gate_bias. - * A 1-D tensor of type T, of shape [num_units]. - * * 14:cell_bias. - * A 1-D tensor of type T, of shape [num_units]. - * * 15:output_gate_bias. - * A 1-D tensor of type T, of shape [num_units]. - * * 16:projection_weights. - * A 2-D tensor of type T, of shape [output_size, num_units]. - * * 17:projection_bias. - * A 1-D tensor of type T, of shape [output_size]. - * * 18: output_state (in). - * A 2-D tensor of type T, of shape [batch_size, output_size]. - * * 19: cell_state (in). - * A 2-D tensor of type T, of shape [batch_size, num_units]. - * * 20:fused_activation_function. - * An optional {@link FuseCode} value indicating the activation - * function. - * If “NONE” is specified then it results in a linear activation. - * * 21:cell_clip. - * A clipping threshold for the cell state, such that values are bound - * within [-cell_clip, cell_clip]. If set to 0.0 then clipping is - * disabled. - * * 22:proj_clip. - * A clipping threshold for the output from the projection layer, such - * that values are bound within [-proj_clip, proj_clip]. If set to 0.0 + * * 0: The input (\f$x_t\f$). + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [batch_size, input_size], where “batch_size” corresponds to the + * batching dimension, and “input_size” is the size of the input. + * * 1: The input-to-input weights (\f$W_{xi}\f$). Optional. + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units, input_size], where “num_units” corresponds to the + * number of cell units. + * * 2: The input-to-forget weights (\f$W_{xf}\f$). + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units, input_size]. + * * 3: The input-to-cell weights (\f$W_{xc}\f$). + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units, input_size]. + * * 4: The input-to-output weights (\f$W_{xo}\f$). + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units, input_size]. + * * 5: The recurrent-to-input weights (\f$W_{hi}\f$). Optional. + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units, output_size], where “output_size” corresponds to either + * the number of cell units (i.e., “num_units”), or the second + * dimension of the “projection_weights”, if defined. + * * 6: The recurrent-to-forget weights (\f$W_{hf}\f$). + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units, output_size]. + * * 7: The recurrent-to-cell weights (\f$W_{hc}\f$). + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units, output_size]. + * * 8: The recurrent-to-output weights (\f$W_{ho}\f$). + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units, output_size]. + * * 9: The cell-to-input weights (\f$W_{ci}\f$). Optional. + * A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units]. + * * 10:The cell-to-forget weights (\f$W_{cf}\f$). Optional. + * A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units]. + * * 11:The cell-to-output weights (\f$W_{co}\f$). Optional. + * A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units]. + * * 12:The input gate bias (\f$b_i\f$). Optional. + * A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units]. + * * 13:The forget gate bias (\f$b_f\f$). + * A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units]. + * * 14:The cell bias (\f$b_c\f$). + * A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units]. + * * 15:The output gate bias (\f$b_o\f$). + * A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units]. + * * 16:The projection weights (\f$W_{proj}\f$). Optional. + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [output_size, num_units]. + * * 17:The projection bias (\f$b_{proj}\f$). Optional. + * A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [output_size]. + * * 18:The output state (in) (\f$h_{t-1}\f$). + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [batch_size, output_size]. + * * 19:The cell state (in) (\f$C_{t-1}\f$). + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [batch_size, num_units]. + * * 20:The activation function (\f$g\f$). + * A value indicating the activation function: + * <ul> + * <li>0: None; + * <li>1: Relu; + * <li>3: Relu6; + * <li>4: Tanh; + * <li>6: Sigmoid. + * </ul> + * * 21:The clipping threshold (\f$t_{cell}\f$) for the cell state, such + * that values are bound within [-cell_clip, cell_clip]. If set to 0.0 * then clipping is disabled. + * * 22:The clipping threshold (\f$t_{proj}\f$) for the output from the + * projection layer, such that values are bound within + * [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled. * * Outputs: - * * 0: scratch_buffer. - * A 3-D tensor of type T, of shape [batch_size, num_cell, 4]. - * * 1: output_state (out). - * A 2-D tensor of type T, of shape [batch_size, output_size]. - * * 2: cell_state (out). - * A 2-D tensor of type T, of shape [batch_size, num_units]. - * * 3: output. - * A 2-D tensor of type T, of shape [batch_size, output_size]. This is - * effectively the same as the current “output_state” value. + * * 0: The scratch buffer. + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [batch_size, num_units * 4] with CIFG, or + * [batch_size, num_units * 3] without CIFG. + * * 1: The output state (out) (\f$h_t\f$). + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [batch_size, output_size]. + * * 2: The cell state (out) (\f$C_t\f$). + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [batch_size, num_units]. + * * 3: The output (\f$o_t\f$). + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [batch_size, output_size]. This is effectively the same as the + * current “output state (out)” value. */ ANEURALNETWORKS_LSTM = 16, - /** Performs an 2-D max pooling operation. + /** + * Performs an 2-D max pooling operation. * - * The output dimensions are functions of the filter dimensions, stride, and padding. + * The output dimensions are functions of the filter dimensions, stride, and + * padding. * * The values in the output tensor are computed as: * * output[batch, row, col, channel] = * max_{i, j} (input[batch, row + i, col + j, channel]) * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} * @@ -805,51 +997,68 @@ typedef enum { * Both explicit padding and implicit padding are supported. * * Inputs (explicit padding): - * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. - * * 1: An INT32 value, specifying the padding on the left, in the ‘width’ dimension. - * * 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension. - * * 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension. - * * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension. - * * 5: An INT32 value, specifying the stride when walking through input - * in the ‘width’ dimension. - * * 6: An INT32 value, specifying the stride when walking through input - * in the ‘height’ dimension. - * * 7: An INT32 value, specifying the filter width. - * * 8: An INT32 value, specifying the filter height. - * * 9: An INT32 value, and has to be one of the {@link FuseCode} values. - * Specifies the activation to invoke on the result of each addition. + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying + * the input. + * * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the left, in the ‘width’ dimension. + * * 2: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the right, in the ‘width’ dimension. + * * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the top, in the ‘height’ dimension. + * * 4: An {@link ANEURALNETWORKS_INT32} scalar, specifying the padding on + * the bottom, in the ‘height’ dimension. + * * 5: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘width’ dimension. + * * 6: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘height’ dimension. + * * 7: An {@link ANEURALNETWORKS_INT32} scalar, specifying the filter + * width. + * * 8: An {@link ANEURALNETWORKS_INT32} scalar, specifying the filter + * height. + * * 9: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the + * {@link FuseCode} values. Specifies the activation to + * invoke on the result. * * Inputs (implicit padding): - * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. - * * 1: An INT32 value, specifying the implicit padding scheme, has to be one of the + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying + * the input. + * * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the implicit + * padding scheme, has to be one of the * {@link PaddingCode} values. - * * 2: An INT32 value, specifying the stride when walking through input - * in the ‘width’ dimension. - * * 3: An INT32 value, specifying the stride when walking through input - * in the ‘height’ dimension. - * * 4: An INT32 value, specifying the filter width. - * * 5: An INT32 value, specifying the filter height. - * * 6: An INT32 value, and has to be one of the {@link FuseCode} values. - * Specifies the activation to invoke on the result of each addition. + * * 2: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘width’ dimension. + * * 3: An {@link ANEURALNETWORKS_INT32} scalar, specifying the stride when + * walking through input in the ‘height’ dimension. + * * 4: An {@link ANEURALNETWORKS_INT32} scalar, specifying the filter + * width. + * * 5: An {@link ANEURALNETWORKS_INT32} scalar, specifying the filter + * height. + * * 6: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the + * {@link FuseCode} values. Specifies the activation to + * invoke on the result. * * Outputs: - * * 0: The output 4-D tensor, of shape [batches, out_height, out_width, depth]. + * * 0: The output 4-D tensor, of shape + * [batches, out_height, out_width, depth]. */ ANEURALNETWORKS_MAX_POOL_2D = 17, - /** Multiplies two tensors, element-wise. + /** + * Multiplies two tensors, element-wise. * - * Takes two input tensors of identical type and compatible dimensions. The output - * is the product of both input tensors, optionally modified by an activation function. + * Takes two input tensors of identical {@link OperandCode} and compatible + * dimensions. The output is the product of both input tensors, optionally + * modified by an activation function. * * Two dimensions are compatible when: * 1. they are equal, or * 2. one of them is 1 * - * The size of the resulting output is the maximum size along each dimension of the - * input operands. It starts with the trailing dimensions, and works its way forward. + * The size of the resulting output is the maximum size along each dimension + * of the input operands. It starts with the trailing dimensions, and works + * its way forward. * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} * @@ -857,24 +1066,28 @@ typedef enum { * * Inputs: * * 0: A tensor. - * * 1: A tensor of the same type, and compatible dimensions as input0. - * * 2: An INT32 value, and has to be one of the {@link FuseCode} values. - * Specifies the activation to invoke on the result of each addition. + * * 1: A tensor of the same {@link OperandCode}, and compatible dimensions + * as input0. + * * 2: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the + * {@link FuseCode} values. Specifies the activation to + * invoke on the result. * * Outputs: - * * 0: The product, a tensor of the same type as input0. - * For output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, the following - * condition must be satisfied: output_scale > input1_scale * input2_scale. + * * 0: The product, a tensor of the same {@link OperandCode} as input0. + * For output tensor of {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, + * the following condition must be satisfied: + * output_scale > input1_scale * input2_scale. */ ANEURALNETWORKS_MUL = 18, - /** Computes rectified linear activation on the input tensor element-wise. + /** + * Computes rectified linear activation on the input tensor element-wise. * * The output is calculated using this formula: * * output = max(0, input) * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} * @@ -888,13 +1101,14 @@ typedef enum { */ ANEURALNETWORKS_RELU = 19, - /** Computes rectified linear 1 activation on the input tensor element-wise. + /** + * Computes rectified linear 1 activation on the input tensor element-wise. * * The output is calculated using this formula: * * output = min(1.f, max(-1.f, input)) * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} * @@ -908,13 +1122,14 @@ typedef enum { */ ANEURALNETWORKS_RELU1 = 20, - /** Computes rectified linear 6 activation on the input tensor element-wise. + /** + * Computes rectified linear 6 activation on the input tensor element-wise. * * The output is calculated using this formula: * * output = min(6, max(0, input)) * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} * @@ -928,12 +1143,13 @@ typedef enum { */ ANEURALNETWORKS_RELU6 = 21, - /** Reshapes a tensor. + /** + * Reshapes a tensor. * - * Given tensor, this operation returns a tensor that has the same values as tensor, - * but with a newly specified shape. + * Given tensor, this operation returns a tensor that has the same values as + * tensor, but with a newly specified shape. * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} * @@ -941,32 +1157,38 @@ typedef enum { * * Inputs: * * 0: A tensor, specifying the tensor to be reshaped. - * * 1: A 1-D tensor of type {@link ANEURALNETWORKS_TENSOR_INT32}, defining the shape - * of the output tensor. The number of elements implied by shape must be the same - * as the number of elements in the input tensor. + * * 1: A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, defining the + * shape of the output tensor. The number of elements implied by shape + * must be the same as the number of elements in the input tensor. * * Outputs: * * 0: The output tensor, of shape specified by the input shape. */ ANEURALNETWORKS_RESHAPE = 22, - /** Resizes images to given size using the bilinear interpretation. + /** + * Resizes images to given size using the bilinear interpretation. * - * Resized images will be distorted if their output aspect ratio is not the same as - * input aspect ratio. + * Resized images must be distorted if their output aspect ratio is not the + * same as input aspect ratio. The corner pixels of output may not be the + * same as corner pixels of input. * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * Supported tensor rank: 4, with "NHWC" data layout. * * Inputs: - * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input. - * * 1: An INT32 value, specifying the output height of the output tensor. - * * 2: An INT32 value, specifying the output width of the output tensor. + * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying + * the input. + * * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the output + * height of the output tensor. + * * 2: An {@link ANEURALNETWORKS_INT32} scalar, specifying the output + * width of the output tensor. * * Outputs: - * * 0: The output 4-D tensor, of shape [batches, new_height, new_width, depth]. + * * 0: The output 4-D tensor, of shape + * [batches, new_height, new_width, depth]. */ ANEURALNETWORKS_RESIZE_BILINEAR = 23, @@ -974,7 +1196,8 @@ typedef enum { * A basic recurrent neural network layer. * * This layer implements the operation: - * outputs = state = activation(inputs * input_weights + state * recurrent_weights + bias) + * outputs = state = activation(inputs * input_weights + + * state * recurrent_weights + bias) * * Where: * * “input_weights” is a weight matrix that multiplies the inputs; @@ -985,41 +1208,49 @@ typedef enum { * * “activation” is the function passed as the “fused_activation_function” * argument (if not “NONE”). * - * Supported tensor types (Type T): + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * Inputs: * * 0: input. - * A 2-D tensor of type T, of shape [batch_size, input_size], where - * “batch_size” corresponds to the batching dimension, and “input_size” is - * the size of the input. + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32} of shape + * [batch_size, input_size], where “batch_size” corresponds to the + * batching dimension, and “input_size” is the size of the input. * * 1: weights. - * A 2-D tensor of type T, of shape [num_units, input_size], where - * “num_units” corresponds to the number of units. + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units, input_size], where “num_units” corresponds to the + * number of units. * * 2: recurrent_weights. - * A 2-D tensor of type T, of shape [num_units, num_units], with columns - * corresponding to the weights from each unit. + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units, num_units], with columns corresponding to the weights + * from each unit. * * 3: bias. - * A 1-D tensor of type T, of shape [num_units]. + * A 1-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units]. * * 4: hidden state (in). - * A 2-D tensor of type T, of shape [batch_size, num_units]. + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [batch_size, num_units]. * * 5: fused_activation_function. - * An optional {@link FuseCode} value indicating the activation - * function. If “NONE” is specified then it results in a linear - * activation. + * An optional {@link FuseCode} value indicating the + * activation function. If “NONE” is specified then it results in a + * linear activation. * * Outputs: * * 0: hidden state (out). - * A 2-D tensor of type T, of shape [batch_size, num_units]. + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [batch_size, num_units]. * * * 1: output. - * A 2-D tensor of type T, of shape [batch_size, num_units]. This is - * effectively the same as the current state value. + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [batch_size, num_units]. This is effectively the same as the + * current state value. */ ANEURALNETWORKS_RNN = 24, - /** Computes the softmax activation on the input tensor element-wise, per batch, by - * normalizing the input vector so the maximum coefficient is zero. + /** + * Computes the softmax activation on the input tensor element-wise, per + * batch, by normalizing the input vector so the maximum coefficient is + * zero. * * The output is calculated using this formula: * @@ -1027,7 +1258,7 @@ typedef enum { * exp((input[batch, i] - max(input[batch, :])) * beta) / * sum_{k}{exp((input[batch, k] - max(input[batch, :])) * beta)} * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} * @@ -1035,41 +1266,46 @@ typedef enum { * * Inputs: * * 0: A 2-D or 4-D tensor, specifying the tensor to be reshaped. - * * 1: A FLOAT32 value, specifying the positive scaling factor for the exponent, beta. + * * 1: An {@link ANEURALNETWORKS_FLOAT32} scalar, specifying the positive + * scaling factor for the exponent, beta. * * Outputs: * * 0: The output tensor of same shape as input0. - * For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} type, + * For {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM}, * the scale must be 1.f / 256 and the zeroPoint must be 0. */ ANEURALNETWORKS_SOFTMAX = 25, - /** Rearranges blocks of spatial data, into depth. + /** + * Rearranges blocks of spatial data, into depth. * - * More specifically, this op outputs a copy of the input tensor where values from - * the height and width dimensions are moved to the depth dimension. - * The value block_size indicates the input block size and how the data is moved. + * More specifically, this op outputs a copy of the input tensor where + * values from the height and width dimensions are moved to the depth + * dimension. The value block_size indicates the input block size and how + * the data is moved. * - * Chunks of data of size block_size * block_size from depth are rearranged into - * non-overlapping blocks of size block_size x block_size. + * Chunks of data of size block_size * block_size from depth are rearranged + * into non-overlapping blocks of size block_size x block_size. * * The depth of the output tensor is input_depth * block_size * block_size. * The input tensor's height and width must be divisible by block_size. * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} * * Supported tensor rank: 4, with "NHWC" data layout. * * Inputs: - * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], specifying the input. - * * 1: An INT32 value, specifying the block_size. block_size must be >=1 and - * block_size must be a divisor of both the input height and width. + * * 0: A 4-D tensor, of shape [batches, height, width, depth_in], + * specifying the input. + * * 1: An {@link ANEURALNETWORKS_INT32} scalar, specifying the block_size. + * block_size must be >=1 and block_size must be a divisor of both the + * input height and width. * * Outputs: - * * 0: The output 4-D tensor, of shape [batch, height/block_size, width/block_size, - * depth*block_size*block_size]. + * * 0: The output 4-D tensor, of shape [batches, height/block_size, + * width/block_size, depth_in*block_size*block_size]. */ ANEURALNETWORKS_SPACE_TO_DEPTH = 26, @@ -1086,22 +1322,22 @@ typedef enum { * INTERSPEECH, 2015. * * It processes the incoming input using a 2-stage filtering mechanism: - * * stage 1 performs filtering on the "features" dimension, whose outputs get - * pushed into a memory of fixed-size memory_size. + * * stage 1 performs filtering on the "features" dimension, whose outputs + * get pushed into a memory of fixed-size memory_size. * * stage 2 performs filtering on the "time" dimension of the memory_size * memoized outputs of stage 1. * * Specifically, for rank 1, this layer implements the operation: * - * memory = push(conv1d(inputs, weights_feature, feature_dim, - * "ANEURALNETWORKS_PADDING_VALID")); - * outputs = activation(memory * weights_time + bias); + * memory = push(conv1d(inputs, weights_feature, feature_dim, + * "ANEURALNETWORKS_PADDING_VALID")); + * outputs = activation(memory * weights_time + bias); * * Where: * * “weights_feature” is a weights matrix that processes the inputs (by - * convolving the input with every “feature filter”), and whose outputs get - * pushed, stacked in order, into the fixed-size “memory” (the oldest entry - * gets dropped); + * convolving the input with every “feature filter”), and whose outputs + * get pushed, stacked in order, into the fixed-size “memory” (the oldest + * entry gets dropped); * * “weights_time” is a weights matrix that processes the “memory” (by a * batched matrix multiplication on the num_units); * * “bias” is an optional bias vector (added to each output vector in the @@ -1112,45 +1348,53 @@ typedef enum { * Each rank adds a dimension to the weights matrices by means of stacking * the filters. * - * Supported tensor types (type T): + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * Inputs: * * 0: input. - * A 2-D tensor of type T, of shape [batch_size, input_size], where - * “batch_size” corresponds to the batching dimension, and “input_size” is - * the size of the input. + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [batch_size, input_size], where “batch_size” corresponds to the + * batching dimension, and “input_size” is the size of the input. * * 1: weights_feature. - * A 2-D tensor of type T, of shape [num_units, input_size], where - * “num_units” corresponds to the number of units. + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units, input_size], where “num_units” corresponds to the + * number of units. * * 2: weights_time. - * A 2-D tensor of type T, of shape [num_units, memory_size], where - * “memory_size” corresponds to the fixed-size of the memory. + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [num_units, memory_size], where “memory_size” corresponds to the + * fixed-size of the memory. * * 3: bias. - * An optional 1-D tensor of type T, of shape [num_units]. + * An optional 1-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, + * of shape [num_units]. * * 4: state (in). - * A 2-D tensor of type T, of shape [batch_size, (memory_size - 1) * num_units * rank]. + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [batch_size, (memory_size - 1) * num_units * rank]. * * 5: rank. * The rank of the SVD approximation. * * 6: fused_activation_function. - * An optional {@link FuseCode} value indicating the activation function. - * If “NONE” is specified then it results in a linear activation. + * An optional {@link FuseCode} value indicating the + * activation function. If “NONE” is specified then it results in a + * linear activation. * * Outputs: * * 0: state (out). - * A 2-D tensor of type T, of shape [batch_size, (memory_size - 1) * num_units * rank]. + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [batch_size, (memory_size - 1) * num_units * rank]. * * 1: output. - * A 2-D tensor of type T, of shape [batch_size, num_units]. + * A 2-D tensor of {@link ANEURALNETWORKS_TENSOR_FLOAT32}, of shape + * [batch_size, num_units]. */ ANEURALNETWORKS_SVDF = 27, - /** Computes hyperbolic tangent of input tensor element-wise. + /** + * Computes hyperbolic tangent of input tensor element-wise. * * The output is calculated using this formula: * * output = tanh(input) * - * Supported tensor types: + * Supported tensor {@link OperandCode}: * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} * * Supported tensor rank: up to 4. @@ -1162,6 +1406,304 @@ typedef enum { * * 0: The output tensor of same shape as input0. */ ANEURALNETWORKS_TANH = 28, + + // TODO: make the description easier to understand. + /** + * BatchToSpace for N-dimensional tensors. + * + * This operation reshapes the batch dimension (dimension 0) into M + 1 + * dimensions of shape block_shape + [batch], interleaves these blocks back + * into the grid defined by the spatial dimensions [1, ..., M], to obtain a + * result with the same rank as the input. + * + * This is the reverse of SpaceToBatch. + * + * Supported tensor {@link OperandCode}: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: 4 + * + * Inputs: + * * 0: An n-D tensor, specifying the tensor to be reshaped + * * 1: A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the block + * sizes for each spatial dimension of the input tensor. All values + * must be >= 1. + * + * Outputs: + * * 0: A tensor of the same {@link OperandCode} as input0. + */ + ANEURALNETWORKS_BATCH_TO_SPACE_ND = 29, + + /** + * Element-wise division of two tensors. + * + * Takes two input tensors of identical {@link OperandCode} and compatible + * dimensions. The output is the result of dividing the first input tensor + * by the second, optionally modified by an activation function. + * + * Two dimensions are compatible when: + * 1. they are equal, or + * 2. one of them is 1 + * + * The size of the output is the maximum size along each dimension of the + * input operands. It starts with the trailing dimensions, and works its way + * forward. + * + * Example: + * input1.dimension = {4, 1, 2} + * input2.dimension = {5, 4, 3, 1} + * output.dimension = {5, 4, 3, 2} + * + * Supported tensor {@link OperandCode}: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * + * Supported tensor rank: up to 4 + * + * Inputs: + * * 0: An n-D tensor, specifying the first input. + * * 1: A tensor of the same {@link OperandCode}, and compatible dimensions + * as input0. + * * 2: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the + * {@link FuseCode} values. Specifies the activation to + * invoke on the result. + * + * Outputs: + * * 0: A tensor of the same {@link OperandCode} as input0. + */ + ANEURALNETWORKS_DIV = 30, + + /** + * Computes the mean of elements across dimensions of a tensor. + * + * Reduces the input tensor along the given dimensions to reduce. Unless + * keep_dims is true, the rank of the tensor is reduced by 1 for each entry + * in axis. If keep_dims is true, the reduced dimensions are retained with + * length 1. + * + * If dimensions to reduce have no entries, all dimensions are reduced, and + * a tensor with a single element is returned. + * + * Supported tensor {@link OperandCode}: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: up to 4 + * + * Inputs: + * * 0: A tensor, specifying the input. + * * 1: A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}. The dimensions + * to reduce. If None (the default), reduces all dimensions. Must be in + * the range [-rank(input_tensor), rank(input_tensor)). + * * 2: An {@link ANEURALNETWORKS_INT32} scalar, keep_dims. If positive, + * retains reduced dimensions with length 1. + * + * Outputs: + * * 0: A tensor of the same {@link OperandCode} as input0. + */ + ANEURALNETWORKS_MEAN = 31, + + /** + * Pads a tensor. + * + * This operation pads a tensor according to the specified paddings. + * + * Supported tensor {@link OperandCode}: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: up to 4 + * + * Inputs: + * * 0: An n-D tensor, specifying the tensor to be padded. + * * 1: A 2-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the paddings + * for each spatial dimension of the input tensor. The shape of the + * tensor must be {rank(input0), 2}. + * padding[i, 0] specifies the number of elements to be padded in the + * front of dimension i. + * padding[i, 1] specifies the number of elements to be padded after the + * end of dimension i. + * + * Outputs: + * * 0: A tensor of the same {@link OperandCode} as input0. The + * output tensor has the same rank as input0, and each + * dimension of the output tensor has the same size as the + * corresponding dimension of the input tensor plus the size + * of the padding: + * output0.dimension[i] = + * padding[i, 0] + input0.dimension[i] + padding[i, 1] + */ + ANEURALNETWORKS_PAD = 32, + + // TODO: make the description easier to understand. + /** + * SpaceToBatch for N-Dimensional tensors. + * + * This operation divides "spatial" dimensions [1, ..., M] of the input into + * a grid of blocks of shape block_shape, and interleaves these blocks with + * the "batch" dimension (0) such that in the output, the spatial dimensions + * [1, ..., M] correspond to the position within the grid, and the batch + * dimension combines both the position within a spatial block and the + * original batch position. Prior to division into blocks, the spatial + * dimensions of the input are optionally zero padded according to paddings. + * + * Supported tensor {@link OperandCode}: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: 4 + * + * Inputs: + * * 0: An n-D tensor, specifying the input. + * * 1: A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the block + * sizes for each spatial dimension of the input tensor. All values + * must be >= 1. + * * 2: A 2-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the paddings + * for each spatial dimension of the input tensor. All values must be + * >= 0. The shape of the tensor must be {rank(input0), 2}. + * padding[i, 0] specifies the number of element to be padded in the + * front of dimension i. + * padding[i, 1] specifies the number of element to be padded after the + * end of dimension i. + * + * Outputs: + * * 0: A tensor of the same {@link OperandCode} as input0. + */ + ANEURALNETWORKS_SPACE_TO_BATCH_ND = 33, + + /** + * Removes dimensions of size 1 from the shape of a tensor. + * + * Given a tensor input, this operation returns a tensor of the same + * {@link OperandCode} with all dimensions of size 1 removed. If you don't + * want to remove all size 1 dimensions, you can remove specific size 1 + * dimensions by specifying the axes (input1). + * + * Supported tensor {@link OperandCode}: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: up to 4 + * + * Inputs: + * * 0: An n-D tensor, the tensor to be squeezed. + * * 1: An optional 1-D tensor of {@link ANEURALNETWORKS_TENSOR_INT32}. The + * dimensions to squeeze. If specified only squeezes the dimensions + * listed. Otherwise, squeezes all dimensions. The dimension index + * starts at 0. An error must be reported if squeezing a dimension that + * is not 1. + * + * Outputs: + * * 0: A tensor of the same {@link OperandCode} as input0. Contains the + * same data as input, but has one or more dimensions of size 1 + * removed. + */ + ANEURALNETWORKS_SQUEEZE = 34, + + /** + * Extracts a strided slice of a tensor. + * + * Roughly speaking, this op extracts a slice of size (end - begin) / stride + * from the given input tensor. Starting at the location specified by begin + * the slice continues by adding stride to the index until all dimensions + * are not less than end. Note that a stride can be negative, which causes a + * reverse slice. + * + * Supported tensor {@link OperandCode}: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: up to 4 + * + * Inputs: + * * 0: An n-D tensor, specifying the tensor to be sliced. + * * 1: A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the starts of + * the dimensions of the input tensor to be sliced. The length must be + * of rank(input0). + * * 2: A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the ends of + * the dimensions of the input tensor to be sliced. The length must be + * of rank(input0). + * * 3: A 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, the strides of + * the dimensions of the input tensor to be sliced. The length must be + * of rank(input0). + * * 4: An {@link ANEURALNETWORKS_INT32} scalar, begin_mask. If the ith bit + * of begin_mask is set, begin[i] is ignored and the fullest possible + * range in that dimension is used instead. + * * 5: An {@link ANEURALNETWORKS_INT32} scalar, end_mask. If the ith bit of + * end_mask is set, end[i] is ignored and the fullest possible range in + * that dimension is used instead. + * * 6: An {@link ANEURALNETWORKS_INT32} scalar, shrink_axis_mask. An int32 + * mask. If the ith bit of shrink_axis_mask is set, it implies that the + * ith specification shrinks the dimensionality by 1. A slice of size 1 + * starting from begin[i] in the dimension must be preserved. + * + * Outputs: + * * 0: A tensor of the same {@link OperandCode} as input0. + */ + ANEURALNETWORKS_STRIDED_SLICE = 35, + + /** + * Element-wise subtraction of two tensors. + * + * Takes two input tensors of identical {@link OperandCode} and compatible + * dimensions. The output is the result of subtracting the second input + * tensor from the first one, optionally modified by an activation function. + * + * Two dimensions are compatible when: + * 1. they are equal, or + * 2. one of them is 1 + * + * The size of the output is the maximum size along each dimension of the + * input operands. It starts with the trailing dimensions, and works its way + * forward. + * + * Example: + * input1.dimension = {4, 1, 2} + * input2.dimension = {5, 4, 3, 1} + * output.dimension = {5, 4, 3, 2} + * + * Supported tensor {@link OperandCode}: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * + * Supported tensor rank: up to 4 + * + * Inputs: + * * 0: An n-D tensor, specifying the first input. + * * 1: A tensor of the same {@link OperandCode}, and compatible dimensions + * as input0. + * * 2: An {@link ANEURALNETWORKS_INT32} scalar, and has to be one of the + * {@link FuseCode} values. Specifies the activation to + * invoke on the result. + * + * Outputs: + * * 0: A tensor of the same {@link OperandCode} as input0. + */ + ANEURALNETWORKS_SUB = 36, + + /** + * Transposes the input tensor, permuting the dimensions according to the + * perm tensor. + * + * The returned tensor's dimension i corresponds to the input dimension + * perm[i]. If perm is not given, it is set to (n-1...0), where n is the + * rank of the input tensor. Hence by default, this operation performs a + * regular matrix transpose on 2-D input Tensors. + * + * Supported tensor {@link OperandCode}: + * * {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * * {@link ANEURALNETWORKS_TENSOR_QUANT8_ASYMM} + * + * Supported tensor rank: up to 4 + * + * Inputs: + * * 0: An n-D tensor, specifying the tensor to be transposed. + * * 1: An optional 1-D Tensor of {@link ANEURALNETWORKS_TENSOR_INT32}, + * the permutation of the dimensions of the input tensor. + * + * Outputs: + * * 0: A tensor of the same {@link OperandCode} as input0. + */ + ANEURALNETWORKS_TRANSPOSE = 37, } OperationCode; /** @@ -1242,8 +1784,8 @@ typedef enum { ANEURALNETWORKS_UNEXPECTED_NULL = 3, ANEURALNETWORKS_BAD_DATA = 4, ANEURALNETWORKS_OP_FAILED = 5, - ANEURALNETWORKS_UNMAPPABLE = 5, ANEURALNETWORKS_BAD_STATE = 6, + ANEURALNETWORKS_UNMAPPABLE = 7, } ResultCode; /** @@ -1266,8 +1808,7 @@ enum { * {@link ANeuralNetworksModel_setOperandValueFromMemory}. An application * should typically create one shared memory object that contains every tensor * needed to define a model. {@link ANeuralNetworksMemory_createFromFd} can be - * used to create shared memory from a file handle. {@link ANeuralNetworksMemory_createShared} - * can be used to directly created shared memory. + * used to create shared memory from a file handle. * * Memory objects can also be used to specify the input and output arguments of * an execution. See {@link ANeuralNetworksExecution_setInputFromMemory} @@ -1279,12 +1820,18 @@ typedef struct ANeuralNetworksMemory ANeuralNetworksMemory; * ANeuralNetworksModel is an opaque type that contains a description of the * mathematical operations that constitute the model. * - * <p>The model will be built by calling<ul> - * <li>{@link ANeuralNetworksModel_create},</li> - * <li>{@link ANeuralNetworksModel_addOperation},</li> - * <li>{@link ANeuralNetworksModel_addOperand},</li> + * <p>Build the model by calling<ul> + * <li>{@link ANeuralNetworksModel_create}</li> + * <li>{@link ANeuralNetworksModel_addOperation}</li> + * <li>{@link ANeuralNetworksModel_addOperand}</li> * </ul> * + * This forms a graph in which each operation and operand is a node, a + * directed edge from an operand to an operation indicates that the + * operand is an input to the operation, and a directed edge from an + * operation to an operand indicates that the operand is an output + * from the operation. This graph must be acyclic. + * * A model is completed by calling {@link ANeuralNetworksModel_finish}. * A model is destroyed by calling {@link ANeuralNetworksModel_free}. * @@ -1340,10 +1887,10 @@ typedef struct ANeuralNetworksCompilation ANeuralNetworksCompilation; * <p>To use:<ul> * <li>Create a new execution instance by calling the * {@link ANeuralNetworksExecution_create} function.</li> - * <li>Associate data to the model inputs with + * <li>Associate input buffers or memory regions to the model inputs with * {@link ANeuralNetworksExecution_setInput} or * {@link ANeuralNetworksExecution_setInputFromMemory}.</li> - * <li>Associate output buffers to the model outputs with + * <li>Associate output buffers or memory regions to the model outputs with * {@link ANeuralNetworksExecution_setOutput} or * {@link ANeuralNetworksExecution_setOutputFromMemory}.</li> * <li>Apply the model with {@link ANeuralNetworksExecution_startCompute}.</li> @@ -1352,6 +1899,11 @@ typedef struct ANeuralNetworksCompilation ANeuralNetworksCompilation; * <li>Destroy the execution with * {@link ANeuralNetworksExecution_free}.</li></ul></p> * + * <p>An output buffer or memory region must not overlap with any + * other output buffer or memory region, with an input buffer or + * memory region, or with an operand value in a memory object + * ({@link ANeuralNetworksModel_setOperandValueFromMemory}).</p> + * * <p>An execution cannot be modified once {@link ANeuralNetworksExecution_startCompute} * has been called on it.</p> * @@ -1364,18 +1916,55 @@ typedef struct ANeuralNetworksCompilation ANeuralNetworksCompilation; * thread to use {@link ANeuralNetworksEvent_wait} at the same time.</p> * * <p>It is also the application's responsibility to ensure that there are no other - * uses of the request after calling {@link ANeuralNetworksExecution_free}.</p> + * uses of the execution after calling {@link ANeuralNetworksExecution_free}.</p> */ typedef struct ANeuralNetworksExecution ANeuralNetworksExecution; /** * ANeuralNetworksOperandType describes the type of an operand. * This structure is used to describe both scalars and tensors. + * + * A tensor operand type must have a specified rank (number of + * dimensions) but may have any of its dimensions unspecified. + * + * A tensor operand type with all dimensions specified is "fully + * specified". Whenever possible (i.e., whenever the dimensions are + * known at model construction time), a tensor operand type should be + * (but is not required to be) fully specified, in order to enable the + * best possible performance. + * + * If a tensor operand's type is not fully specified, the dimensions + * of the operand are deduced from the operand types and values of the + * operation for which that operand is an output. + * + * <p>In the following situations, a tensor operand type must be fully + * specified:<ul> + * <li>The operand has a constant value, set by + * {@link ANeuralNetworksModel_setOperandValue} (with a + * non-nullptr buffer) or + * {@link ANeuralNetworksModel_setOperandValueFromMemory}.</li> + * <li>The operand is a model input or model output (see + * {@link ANeuralNetworksModel_identifyInputsAndOutputs}). A + * fully specified tensor operand type must either be provided + * to {@link ANeuralNetworksModel_addOperand}; or it must be + * provided to the corresponding + * {@link ANeuralNetworksExecution_setInput}, + * {@link ANeuralNetworksExecution_setInputFromMemory}, + * {@link ANeuralNetworksExecution_setOutput}, or + * {@link ANeuralNetworksModel_setOperandValueFromMemory}. + * EXCEPTION: If the input or output is optional and omitted + * (by passing nullptr for buffer to + * {@link ANeuralNetworksExecution_setInput} or + * {@link ANeuralNetworksExecution_setOutput}) then it need + * not have a fully specified tensor operand type.</li></ul> + * + * A tensor operand type with some number of unspecified dimensions is + * represented by setting each unspecified dimension to 0. */ typedef struct ANeuralNetworksOperandType { /** The data type, e.g ANEURALNETWORKS_INT8. */ int32_t type; - /** The number of dimensions. It should be 0 for scalars. */ + /** The number of dimensions (rank). It should be 0 for scalars. */ uint32_t dimensionCount; /** The dimensions of the tensor. It should be nullptr for scalars. */ const uint32_t* dimensions; @@ -1490,17 +2079,35 @@ int ANeuralNetworksModel_finish(ANeuralNetworksModel* model); * * The order in which the operands are added is important. The first one added * to a model will have the index value 0, the second 1, etc. These indexes are - * used as operand identifiers in {@link ANeuralNetworksModel_addOperation}, + * used as operand identifiers in + * {@link ANeuralNetworksModel_addOperation}, + * {@link ANeuralNetworksModel_identifyInputsAndOutputs}, + * {@link ANeuralNetworksModel_setOperandValue}, + * {@link ANeuralNetworksModel_setOperandValueFromMemory}, * {@link ANeuralNetworksExecution_setInput}, * {@link ANeuralNetworksExecution_setInputFromMemory}, * {@link ANeuralNetworksExecution_setOutput}, * {@link ANeuralNetworksExecution_setOutputFromMemory} and * {@link ANeuralNetworksExecution_setOperandValue}. * - * To build a model that can accomodate inputs of various sizes, as you may want - * to do for a CNN, set the size of the dimensions that will vary at run time to 0. - * If you do so, provide the full dimensions when calling - * {@link ANeuralNetworksExecution_setInput} or {@link ANeuralNetworksExecution_setInputFromMemory}. + * <p>Every operand must be referenced in exactly one of the following + * ways:<ul> + * <li>It is identified as a model input with + * {@link ANeuralNetworksModel_identifyInputsAndOutputs}.</li> + * <li>It is identified as a constant with + * {@link ANeuralNetworksModel_setOperandValue} or + * {@link ANeuralNetworksModel_setOperandValueFromMemory}.</li> + * <li>It is identified as an output of exactly one operation with + * {@link ANeuralNetworksModel_addOperation}.</li></p> + * <p>An operand that is identified as a model input or as a constant + * must not also be identified as a model output with + * {@link ANeuralNetworksModel_identifyInputsAndOutputs}.</p> + * + * To build a model that can accommodate inputs of various sizes, as + * you may want to do for a CNN, leave unspecified the dimensions that + * will vary at run time. If you do so, fully specify dimensions + * when calling {@link ANeuralNetworksExecution_setInput} or + * {@link ANeuralNetworksExecution_setInputFromMemory}. * * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has been * called will return an error. @@ -1509,7 +2116,9 @@ int ANeuralNetworksModel_finish(ANeuralNetworksModel* model); * * @param model The model to be modified. * @param type The {@link ANeuralNetworksOperandType} that describes the shape - * of the operand. + * of the operand. Neither the {@link ANeuralNetworksOperandType} + * nor the dimensions it points to need to outlive the call to + * {@link ANeuralNetworksModel_addOperand}. * * @return ANEURALNETWORKS_NO_ERROR if successful. */ @@ -1585,7 +2194,7 @@ int ANeuralNetworksModel_setOperandValueFromMemory(ANeuralNetworksModel* model, * Add an operation to a model. * * @param model The model to be modified. - * @param type The type of the operation. + * @param type The {@link ANeuralNetworksOperationType} of the operation. * @param inputCount The number of entries in the inputs array. * @param inputs An array of indexes identifying each operand. * @param outputCount The number of entries in the outputs array. @@ -1607,7 +2216,8 @@ int ANeuralNetworksModel_addOperation(ANeuralNetworksModel* model, const uint32_t* outputs); /** - * Specfifies which operands will be the model's inputs and outputs. + * Specifies which operands will be the model's inputs and + * outputs. Every model must have at least one input and one output. * * An operand cannot be used for both input and output. Doing so will * return an error. @@ -1632,6 +2242,28 @@ int ANeuralNetworksModel_identifyInputsAndOutputs(ANeuralNetworksModel* model, u const uint32_t* outputs); /** + * Specifies whether {@link ANEURALNETWORKS_TENSOR_FLOAT32} is allowed to be + * calculated with range and/or precision as low as that of the IEEE 754 16-bit + * floating-point format. By default, {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * must be calculated using at least the range and precision of the IEEE 754 + * 32-bit floating-point format. + * + * @param model The model to be modified. + * @param allow 'true' indicates {@link ANEURALNETWORKS_TENSOR_FLOAT32} may be + * calculated with range and/or precision as low as that of the + * IEEE 754 16-bit floating point format. 'false' indicates + * {@link ANEURALNETWORKS_TENSOR_FLOAT32} must be calculated using + * at least the range and precision of the IEEE 754 32-bit floating + * point format. + * + * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has been + * called will return an error. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + */ +int ANeuralNetworksModel_relaxComputationFloat32toFloat16(ANeuralNetworksModel* model, bool allow); + +/** * Create a {@link ANeuralNetworksCompilation} to compile the given model. * * <p>This only creates the object. Compilation is only performed once @@ -1756,12 +2388,18 @@ void ANeuralNetworksExecution_free(ANeuralNetworksExecution* execution); * @param index The index of the input argument we are setting. It is * an index into the lists passed to * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not - * the index associated with {@link ANeuralNetworksModel_addOperand}. - * @param type The type of the operand. This should be used to specify the - * dimensions that were set to 0 when the operand was added to the - * model. All other properties of the type must be the same as - * specified in the model. If the type is the same as specified - * when the model was built, NULL can be passed. + * the index associated with + * {@link ANeuralNetworksModel_addOperand}. + * @param type The {@link ANeuralNetworksOperandType} of the + * operand. Unless the input is omitted, this should be + * used to specify the dimensions that were left + * unspecified when the operand was added to the + * model. All other properties of the type must be the + * same as specified in the model. If the type is the same + * as specified when the model was built, NULL can be + * passed. Neither the {@link ANeuralNetworksOperandType} + * nor the dimensions it points to need to outlive the call + * to {@link ANeuralNetworksExecution_setInput}. * @param buffer The buffer containing the data. * @param length The length in bytes of the buffer. * @@ -1779,7 +2417,7 @@ int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution* execution, int32 * <p>The provided memory must outlive the execution.</p> * * If the input is optional, you can indicate that it is omitted by - * using @{Link ANeuralNetworks_setInput} instead, passing nullptr for buffer + * using {@link ANeuralNetworks_setInput} instead, passing nullptr for buffer * and 0 for length. * * See {@link ANeuralNetworksExecution} for information on multithreaded usage. @@ -1789,13 +2427,17 @@ int ANeuralNetworksExecution_setInput(ANeuralNetworksExecution* execution, int32 * an index into the lists passed to * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not * the index associated with {@link ANeuralNetworksModel_addOperand}. - * @param type The type of the operand. This can be used to specify the - * dimensions that were set to 0 when the operand was added to the - * model. All other values must be the same as specified in the - * model. If the type is the same as specified when the model - * was built, NULL can be passed. + * @param type The {@link ANeuralNetworksOperandType} of the + * operand. This should be used to specify the dimensions + * that were left unspecified when the operand was added + * to the model. All other properties of the type must be + * the same as specified in the model. If the type is the + * same as specified when the model was built, NULL can be + * passed. Neither the {@link ANeuralNetworksOperandType} + * nor the dimensions it points to need to outlive the call + * to {@link ANeuralNetworksExecution_setInputFromMemory}. * @param memory The memory containing the data. - * @param offset This specifies the location of the data whithin the memory. + * @param offset This specifies the location of the data within the memory. * The offset is in bytes from the start of memory. * @param length The size in bytes of the data value. * @@ -1823,11 +2465,16 @@ int ANeuralNetworksExecution_setInputFromMemory(ANeuralNetworksExecution* execut * an index into the lists passed to * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not * the index associated with {@link ANeuralNetworksModel_addOperand}. - * @param type The type of the operand. This can be used to specify the - * dimensions that were set to 0 when the operand was added to the - * model. All other values must be the same as specified in the - * model. If the type is the same as specified when the model - * was built, NULL can be passed. + * @param type The {@link ANeuralNetworksOperandType} of the + * operand. Unless the output is omitted, this should be + * used to specify the dimensions that were left + * unspecified when the operand was added to the + * model. All other properties of the type must be the + * same as specified in the model. If the type is the same + * as specified when the model was built, NULL can be + * passed. Neither the {@link ANeuralNetworksOperandType} + * nor the dimensions it points to need to outlive the call + * to {@link ANeuralNetworksExecution_setOutput}. * @param buffer The buffer where the data is to be written. * @param length The length in bytes of the buffer. * @@ -1843,7 +2490,7 @@ int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution* execution, int3 * {@link ANeuralNetworksExecution}. * * If the output is optional, you can indicate that it is omitted by - * using @{Link ANeuralNetworks_setOutput} instead, passing nullptr for buffer + * using {@link ANeuralNetworks_setOutput} instead, passing nullptr for buffer * and 0 for length. * * <p>The provided memory must outlive the execution.</p> @@ -1855,13 +2502,17 @@ int ANeuralNetworksExecution_setOutput(ANeuralNetworksExecution* execution, int3 * an index into the lists passed to * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not * the index associated with {@link ANeuralNetworksModel_addOperand}. - * @param type The type of the operand. This can be used to specify the - * dimensions that were set to 0 when the operand was added to the - * model. All other values must be the same as specified in the - * model. If the type is the same as specified when the model - * was built, NULL can be passed. + * @param type The {@link ANeuralNetworksOperandType} of the operand. This should be + * used to specify the dimensions that were left + * unspecified when the operand was added to the + * model. All other properties of the type must be the + * same as specified in the model. If the type is the same + * as specified when the model was built, NULL can be + * passed. Neither the {@link ANeuralNetworksOperandType} + * nor the dimensions it points to need to outlive the call + * to {@link ANeuralNetworksExecution_setOutputFromMemory}. * @param memory The memory where the data is to be stored. - * @param offset This specifies the location of the data whithin the memory. + * @param offset This specifies the location of the data within the memory. * The offset is in bytes from the start of memory. * @param length The length in bytes of the data value. * @@ -1922,8 +2573,6 @@ void ANeuralNetworksEvent_free(ANeuralNetworksEvent* event); __END_DECLS -#endif // __ANDROID_API__ >= 27 - #endif // ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_H /** @} */ diff --git a/include/NeuralNetworksEx.h b/include/NeuralNetworksEx.h new file mode 100644 index 000000000..284f3af84 --- /dev/null +++ b/include/NeuralNetworksEx.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef NN_RUNTIME_NEURAL_NETWORKS_EX_H +#define NN_RUNTIME_NEURAL_NETWORKS_EX_H + +#include <sys/cdefs.h> + +__BEGIN_DECLS + +typedef enum { + /** extends operation. */ + ANEURALNETWORKS_CAST_EX = 50000, + ANEURALNETWORKS_GATHER_EX = 50001, + ANEURALNETWORKS_TOPK_V2_EX = 50002, + ANEURALNETWORKS_TENSORFLOW_MAX_EX = 50003, + ANEURALNETWORKS_SPLIT_EX = 50004, + ANEURALNETWORKS_RSQRT_EX = 50005, + ANEURALNETWORKS_SQUARED_DIFFERENCE_EX = 50006, +} OperationCodeEx; // extends OperationCode + +typedef OperationCodeEx ANeuralNetworksOperationTypeEx; + +/** + * Add an extended operation to a model. + * + * @param model The model to be modified. + * @param type The type of extended operation. + * @param inputCount The number of entries in the inputs array. + * @param inputs An array of indexes identifying each operand. + * @param outputCount The number of entries in the outputs array. + * @param outputs An array of indexes identifying each operand. + * + * The operands specified by inputs and outputs must have been + * previously added by calls to {@link ANeuralNetworksModel_addOperand}. + * + * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has been + * called will return an error. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +int ANeuralNetworksModel_addOperationEx(ANeuralNetworksModel* model, + ANeuralNetworksOperationTypeEx type, uint32_t inputCount, + const uint32_t* inputs, uint32_t outputCount, + const uint32_t* outputs); + +__END_DECLS + +#endif // NN_RUNTIME_NEURAL_NETWORKS_EX_H diff --git a/include/NeuralNetworksExShim.h b/include/NeuralNetworksExShim.h new file mode 100644 index 000000000..0fee60a8d --- /dev/null +++ b/include/NeuralNetworksExShim.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef NN_API_EX_SHIM_H +#define NN_API_EX_SHIM_H + +#include "NeuralNetworksEx.h" +#include "NeuralNetworksLoadHelpers.h" + +typedef int (*ANeuralNetworksModel_addOperationEx_fn)( + ANeuralNetworksModel *model, ANeuralNetworksOperationTypeEx type, + uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs); + +/** + * Add an extended operation to a model. + * + * @param model The model to be modified. + * @param type The type of extended operation. + * @param inputCount The number of entries in the inputs array. + * @param inputs An array of indexes identifying each operand. + * @param outputCount The number of entries in the outputs array. + * @param outputs An array of indexes identifying each operand. + * + * The operands specified by inputs and outputs must have been + * previously added by calls to {@link ANeuralNetworksModel_addOperand}. + * + * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has + * been + * called will return an error. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ + +inline int ANeuralNetworksModel_addOperationEx( + ANeuralNetworksModel *model, ANeuralNetworksOperationTypeEx type, + uint32_t inputCount, const uint32_t *inputs, uint32_t outputCount, + const uint32_t *outputs) { + LOAD_FUNCTION(ANeuralNetworksModel_addOperationEx); + EXECUTE_FUNCTION_RETURN(model, type, inputCount, inputs, outputCount, + outputs); +} + +#endif // NN_API_EX_SHIM_H diff --git a/include/NeuralNetworksLoadHelpers.h b/include/NeuralNetworksLoadHelpers.h new file mode 100644 index 000000000..93dd967bf --- /dev/null +++ b/include/NeuralNetworksLoadHelpers.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NEURAL_NETWORKS_LOAD_HELPER_H__ +#define __NEURAL_NETWORKS_LOAD_HELPER_H__ + +#include <dlfcn.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#define NNAPI_LOG(format, ...) printf(format "\n", __VA_ARGS__); +#define LOAD_FUNCTION(name) \ + static name##_fn fn = reinterpret_cast<name##_fn>(loadNNAPIFunction(#name)); +#define EXECUTE_FUNCTION(...) \ + if (fn != nullptr) { \ + fn(__VA_ARGS__); \ + } +#define EXECUTE_FUNCTION_RETURN(...) return fn != nullptr ? fn(__VA_ARGS__) : 0; + +inline void* loadNNAPILibrary(const char* name) { + // TODO: change RTLD_LOCAL? Assumes there can be multiple instances of nn + // api RT + void* handle = dlopen(name, RTLD_LAZY | RTLD_LOCAL); + if (handle == nullptr) { + NNAPI_LOG("nnapi error: unable to open library %s", name); + } + return handle; +} + +inline void* getNNAPILibraryHandle() { + static void* handle = loadNNAPILibrary("libneuralnetworks.so"); + return handle; +} + +inline void* loadNNAPIFunction(const char* name) { + void* fn = nullptr; + if (getNNAPILibraryHandle() != nullptr) { + fn = dlsym(getNNAPILibraryHandle(), name); + } + if (fn == nullptr) + { + NNAPI_LOG("nnapi error: unable to open function %s", name); + abort(); + } + else + { +#ifdef _GNU_SOURCE + Dl_info info; + dladdr(fn, &info); + NNAPI_LOG("nnapi function '%s' is loaded from '%s' ", name, info.dli_fname); +#endif // _GNU_SOURCE + } + return fn; +} + +inline bool NNAPIExists() { + static bool nnapi_is_available = getNNAPILibraryHandle(); + return nnapi_is_available; +} + +#endif // __NEURAL_NETWORKS_LOAD_HELPER_H__ diff --git a/include/NeuralNetworksShim.h b/include/NeuralNetworksShim.h new file mode 100644 index 000000000..a7bd745fb --- /dev/null +++ b/include/NeuralNetworksShim.h @@ -0,0 +1,675 @@ +/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// NOTE This header is derived from the following file (in TensorFlow) +// 'externals/tensorflow/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h' +#ifndef __NEURAL_NETWORKS_SHIM__ +#define __NEURAL_NETWORKS_SHIM__ + +#include "NeuralNetworks.h" +#include "NeuralNetworksLoadHelpers.h" + +// nn api function types + +typedef int (*ANeuralNetworksMemory_createFromFd_fn)( + size_t size, int protect, int fd, size_t offset, + ANeuralNetworksMemory** memory); + +typedef void (*ANeuralNetworksMemory_free_fn)(ANeuralNetworksMemory* memory); + +typedef int (*ANeuralNetworksModel_create_fn)(ANeuralNetworksModel** model); + +typedef int (*ANeuralNetworksModel_finish_fn)(ANeuralNetworksModel* model); + +typedef void (*ANeuralNetworksModel_free_fn)(ANeuralNetworksModel* model); + +typedef int (*ANeuralNetworksCompilation_create_fn)( + ANeuralNetworksModel* model, ANeuralNetworksCompilation** compilation); + +typedef void (*ANeuralNetworksCompilation_free_fn)( + ANeuralNetworksCompilation* compilation); + +typedef int (*ANeuralNetworksCompilation_setPreference_fn)( + ANeuralNetworksCompilation* compilation, int32_t preference); + +typedef int (*ANeuralNetworksCompilation_finish_fn)( + ANeuralNetworksCompilation* compilation); + +typedef int (*ANeuralNetworksModel_addOperand_fn)( + ANeuralNetworksModel* model, const ANeuralNetworksOperandType* type); + +typedef int (*ANeuralNetworksModel_setOperandValue_fn)( + ANeuralNetworksModel* model, int32_t index, const void* buffer, + size_t length); + +typedef int (*ANeuralNetworksModel_setOperandValueFromMemory_fn)( + ANeuralNetworksModel* model, int32_t index, + const ANeuralNetworksMemory* memory, size_t offset, size_t length); + +typedef int (*ANeuralNetworksModel_addOperation_fn)( + ANeuralNetworksModel* model, ANeuralNetworksOperationType type, + uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, + const uint32_t* outputs); + +typedef int (*ANeuralNetworksModel_identifyInputsAndOutputs_fn)( + ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs, + uint32_t outputCount, const uint32_t* outputs); + +typedef int (*ANeuralNetworksExecution_create_fn)( + ANeuralNetworksCompilation* compilation, + ANeuralNetworksExecution** execution); + +typedef void (*ANeuralNetworksExecution_free_fn)( + ANeuralNetworksExecution* execution); + +typedef int (*ANeuralNetworksExecution_setInput_fn)( + ANeuralNetworksExecution* execution, int32_t index, + const ANeuralNetworksOperandType* type, const void* buffer, size_t length); + +typedef int (*ANeuralNetworksExecution_setInputFromMemory_fn)( + ANeuralNetworksExecution* execution, int32_t index, + const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, + size_t offset, size_t length); + +typedef int (*ANeuralNetworksExecution_setOutput_fn)( + ANeuralNetworksExecution* execution, int32_t index, + const ANeuralNetworksOperandType* type, void* buffer, size_t length); + +typedef int (*ANeuralNetworksExecution_setOutputFromMemory_fn)( + ANeuralNetworksExecution* execution, int32_t index, + const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, + size_t offset, size_t length); + +typedef int (*ANeuralNetworksExecution_startCompute_fn)( + ANeuralNetworksExecution* execution, ANeuralNetworksEvent** event); + +typedef int (*ANeuralNetworksEvent_wait_fn)(ANeuralNetworksEvent* event); + +typedef void (*ANeuralNetworksEvent_free_fn)(ANeuralNetworksEvent* event); + +/** + * Creates a shared memory object from a file descriptor. + * + * The shared memory is backed by a file descriptor via mmap. + * See {@link ANeuralNetworksMemory} for a description on how to use + * this shared memory. + * + * @param size The requested size in bytes. + * Must not be larger than the file size. + * @param prot The desired memory protection for the mapping. + * It is either PROT_NONE or the bitwise OR of one or + * more of the following flags: PROT_READ, PROT_WRITE. + * @param fd The requested file descriptor. + * The file descriptor has to be mmap-able. The file + * descriptor will be duplicated. + * @param offset The offset to the beginning of the file of the area to map. + * The offset has to be aligned to a page size. + * @param memory The memory object to be created. + * Set to NULL if unsuccessful. + * + * @return ANEURALNETWORKS_NO_ERROR if the request completed normally. + */ +inline int ANeuralNetworksMemory_createFromFd(size_t size, int protect, int fd, + size_t offset, + ANeuralNetworksMemory** memory) { + LOAD_FUNCTION(ANeuralNetworksMemory_createFromFd); + EXECUTE_FUNCTION_RETURN(size, protect, fd, offset, memory); +} + +/** + * Delete a memory object. + * + * Destroys the object used by the run time to keep track of the memory. + * This will free the underlying actual memory if no other code has open + * handles to this memory. + * + * @param memory The memory object to be freed. + */ +inline void ANeuralNetworksMemory_free(ANeuralNetworksMemory* memory) { + LOAD_FUNCTION(ANeuralNetworksMemory_free); + EXECUTE_FUNCTION(memory); +} + +/** + * Create an empty {@link ANeuralNetworksModel}. + * + * <p>This only creates the object. Computation is performed once + * {@link ANeuralNetworksExecution_startCompute} is invoked. + * + * The model should be constructed with calls to + * {@link ANeuralNetworksModel_addOperation} and + * {@link ANeuralNetworksModel_addOperand} + * + * <p>{@link ANeuralNetworksModel_finish} should be called once the model + * has been fully constructed.</p> + * + * <p>{@link ANeuralNetworksModel_free} should be called once the model + * is no longer needed.</p> + * + * @param model The {@link ANeuralNetworksModel} to be created. + * Set to NULL if unsuccessful. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksModel_create(ANeuralNetworksModel** model) { + LOAD_FUNCTION(ANeuralNetworksModel_create); + EXECUTE_FUNCTION_RETURN(model); +} + +/** + * Destroy a model. + * + * The model need not have been finished by a call to + * {@link ANeuralNetworksModel_finish}. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + * + * @param model The model to be destroyed. Passing NULL is acceptable and + * results in no operation. + */ +inline void ANeuralNetworksModel_free(ANeuralNetworksModel* model) { + LOAD_FUNCTION(ANeuralNetworksModel_free); + EXECUTE_FUNCTION(model); +} + +/** + * Indicate that we have finished modifying a model. Required before + * calling {@link ANeuralNetworksCompilation_compile}. + * + * An application is responsible to make sure that no other thread uses + * the model at the same time. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + * + * @param model The model to be finished. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksModel_finish(ANeuralNetworksModel* model) { + LOAD_FUNCTION(ANeuralNetworksModel_finish); + EXECUTE_FUNCTION_RETURN(model); +} + +/** + * Add an operand to a model. + * + * The order in which the operands are added is important. The first one added + * to a model will have the index value 0, the second 1, etc. These indexes are + * used as operand identifiers in {@link ANeuralNetworksModel_addOperation}, + * {@link ANeuralNetworksExecution_setInput}, + * {@link ANeuralNetworksExecution_setInputFromMemory}, + * {@link ANeuralNetworksExecution_setOutput}, + * {@link ANeuralNetworksExecution_setOutputFromMemory} and + * {@link ANeuralNetworksExecution_setOperandValue}. + * + * To build a model that can accommodate inputs of various sizes, as you may + * want to do for a CNN, set the size of the dimensions that will vary at run + * time to 0. If you do so, provide the full dimensions when calling + * {@link ANeuralNetworksExecution_setInput} or {@link + * ANeuralNetworksExecution_setInputFromMemory}. + * + * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has + * been called will return an error. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + * + * @param model The model to be modified. + * @param type The {@link ANeuralNetworksOperandType} that describes the shape + * of the operand. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksModel_addOperand( + ANeuralNetworksModel* model, const ANeuralNetworksOperandType* type) { + LOAD_FUNCTION(ANeuralNetworksModel_addOperand); + EXECUTE_FUNCTION_RETURN(model, type); +} + +/** + * Sets an operand to a constant value. + * + * For scalar values, the content of buffer is copied into the model. + * + * For tensor values, a pointer to the buffer is stored within the model. + * The application is responsible for not changing the content of this region + * until all executions using this model have completed. As the data may + * be copied during processing, modifying the data after this call yields + * undefined results. + * + * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has + * been called will return an error. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + * + * @param model The model to be modified. + * @param index The index of the model operand we're setting. + * @param buffer A pointer to the data to use. + * @param length The size in bytes of the data value. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksModel_setOperandValue(ANeuralNetworksModel* model, + int32_t index, + const void* buffer, + size_t length) { + LOAD_FUNCTION(ANeuralNetworksModel_setOperandValue); + EXECUTE_FUNCTION_RETURN(model, index, buffer, length); +} + +/** + * Sets an operand to a value stored in a memory object. + * + * The content of the memory is not copied. A reference to that memory is stored + * inside the model. The application is responsible for not changing the content + * of the memory region until all executions using this model have completed. + * As the data may be copied during processing, modifying the data after this + * call yields undefined results. + * + * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has + * been called will return an error. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + * + * @param model The model to be modified. + * @param index The index of the model operand we're setting. + * @param buffer A pointer to the data to use. + * @param memory The memory containing the data. + * @param offset This specifies the location of the data within the memory. + * The offset is in bytes from the start of memory. + * @param length The size in bytes of the data value. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksModel_setOperandValueFromMemory( + ANeuralNetworksModel* model, int32_t index, + const ANeuralNetworksMemory* memory, size_t offset, size_t length) { + LOAD_FUNCTION(ANeuralNetworksModel_setOperandValueFromMemory); + EXECUTE_FUNCTION_RETURN(model, index, memory, offset, length); +} + +/** + * Add an operation to a model. + * + * @param model The model to be modified. + * @param type The type of the operation. + * @param inputCount The number of entries in the inputs array. + * @param inputs An array of indexes identifying each operand. + * @param outputCount The number of entries in the outputs array. + * @param outputs An array of indexes identifying each operand. + * + * The operands specified by inputs and outputs must have been + * previously added by calls to {@link ANeuralNetworksModel_addOperand}. + * + * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has + * been called will return an error. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksModel_addOperation(ANeuralNetworksModel* model, + ANeuralNetworksOperationType type, + uint32_t inputCount, + const uint32_t* inputs, + uint32_t outputCount, + const uint32_t* outputs) { + LOAD_FUNCTION(ANeuralNetworksModel_addOperation); + EXECUTE_FUNCTION_RETURN(model, type, inputCount, inputs, outputCount, + outputs); +} + +/** + * Specifies which operands will be the model's inputs and outputs. + * + * An operand cannot be used for both input and output. Doing so will + * return an error. + * + * @param model The model to be modified. + * @param inputCount The number of entries in the inputs array. + * @param inputs An array of indexes identifying the input operands. + * @param outputCount The number of entries in the outputs array. + * @param outputs An array of indexes identifying the output operands. + * + * The operands specified by inputs and outputs must have been + * previously added by calls to {@link ANeuralNetworksModel_addOperand}. + * + * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has + * been called will return an error. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + * + */ +inline int ANeuralNetworksModel_identifyInputsAndOutputs( + ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs, + uint32_t outputCount, const uint32_t* outputs) { + LOAD_FUNCTION(ANeuralNetworksModel_identifyInputsAndOutputs); + EXECUTE_FUNCTION_RETURN(model, inputCount, inputs, outputCount, outputs); +} + +/** + * Create a {@link ANeuralNetworksCompilation} to compile the given model. + * This only creates the object. Compilation is only performed once + * {@link ANeuralNetworksCompilation_start} is invoked. + * + * <p>The provided model must outlive the compilation.</p> + * + * The model must already have been finished by a call to + * {@link ANeuralNetworksModel_finish}. + * + * See {@link ANeuralNetworksCompilation} for information on multithreaded + * usage. + * + * @param model The {@link ANeuralNetworksModel} to be compiled. + * @param compilation The newly created object or NULL if unsuccessful. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA + * if the model is invalid. + */ +inline int ANeuralNetworksCompilation_create( + ANeuralNetworksModel* model, ANeuralNetworksCompilation** compilation) { + LOAD_FUNCTION(ANeuralNetworksCompilation_create); + EXECUTE_FUNCTION_RETURN(model, compilation); +} + +/** + * Destroy a compilation. + * + * <p>If called on a compilation for which + * {@link ANeuralNetworksCompilation_start} has been called, the + * function will return immediately but will mark the compilation to be deleted + * once the compilation completes. The {@link ANeuralNetworksCompilation_wait} + * will return ERROR_DELETED. + * + * See {@link ANeuralNetworksCompilation} for information on multithreaded + * usage. + * + * @param compilation The compilation to be destroyed. Passing NULL is + * acceptable and results in no operation. + */ +inline void ANeuralNetworksCompilation_free( + ANeuralNetworksCompilation* compilation) { + LOAD_FUNCTION(ANeuralNetworksCompilation_free); + EXECUTE_FUNCTION(compilation); +} + +/** + * Sets the execution preference. + * + * <p>Provides guidance to the runtime when trade-offs are possible.</p> + * + * See {@link ANeuralNetworksCompilation} for information on multithreaded + * usage. + * + * @param compilation The compilation to be modified. + * @param preference Either {@link PREFER_LOW_POWER}, + * {@link PREFER_SINGLE_FAST_ANSWER}, or + * {@link PREFER_SUSTAINED_SPEED}. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksCompilation_setPreference( + ANeuralNetworksCompilation* compilation, int32_t preference) { + LOAD_FUNCTION(ANeuralNetworksCompilation_setPreference); + EXECUTE_FUNCTION_RETURN(compilation, preference); +} + +/** + * Waits until the compilation completes. + * + * More than one thread can wait on a compilation. When the compilation + * completes, all threads will be released. + * + * See {@link ANeuralNetworksCompilation} for information on multithreaded + * usage. + * + * @return ANEURALNETWORKS_NO_ERROR if the compilation completed normally. + */ +inline int ANeuralNetworksCompilation_finish( + ANeuralNetworksCompilation* compilation) { + LOAD_FUNCTION(ANeuralNetworksCompilation_finish); + EXECUTE_FUNCTION_RETURN(compilation); +} +/** + * Create a {@link ANeuralNetworksExecution} to apply the given compilation. + * This only creates the object. Computation is only performed once + * {@link ANeuralNetworksExecution_startCompute} is invoked. + * + * <p>The provided compilation must outlive the execution.</p> + * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * @param compilation The {@link ANeuralNetworksCompilation} to be evaluated. + * @param execution The newly created object or NULL if unsuccessful. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA + * if the compilation is invalid. + */ +inline int ANeuralNetworksExecution_create( + ANeuralNetworksCompilation* compilation, + ANeuralNetworksExecution** execution) { + LOAD_FUNCTION(ANeuralNetworksExecution_create); + EXECUTE_FUNCTION_RETURN(compilation, execution); +} + +/** + * Destroy an execution. + * + * <p>If called on an execution for which + * {@link ANeuralNetworksExecution_startCompute} has been called, the + * function will return immediately but will mark the execution to be deleted + * once the computation completes. The {link ANeuralNetworksExecution_wait} + * will return ANEURALNETWORKS_ERROR_DELETED. + * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * @param execution The execution to be destroyed. Passing NULL is acceptable + * and results in no operation. + */ +inline void ANeuralNetworksExecution_free(ANeuralNetworksExecution* execution) { + LOAD_FUNCTION(ANeuralNetworksExecution_free); + EXECUTE_FUNCTION(execution); +} + +/** + * Associate a user buffer with an input of the model of the + * {@link ANeuralNetworksExecution}. + * + * <p>The provided buffer must outlive the execution.</p> + * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * @param execution The execution to be modified. + * @param index The index of the input argument we are setting. It is + * an index into the lists passed to + * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not + * the index associated with {@link + * ANeuralNetworksModel_addOperand}. + * @param type The type of the operand. This should be used to specify the + * dimensions that were set to 0 when the operand was added to the + * model. All other properties of the type must be the same as + * specified in the model. If the type is the same as specified + * when the model was built, NULL can be passed. + * @param buffer The buffer containing the data. + * @param length The length in bytes of the buffer. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if + * the name is not recognized or the buffer is too small for the input. + */ +inline int ANeuralNetworksExecution_setInput( + ANeuralNetworksExecution* execution, int32_t index, + const ANeuralNetworksOperandType* type, const void* buffer, size_t length) { + LOAD_FUNCTION(ANeuralNetworksExecution_setInput); + EXECUTE_FUNCTION_RETURN(execution, index, type, buffer, length); +} + +/** + * Associate part of a memory object with an input of the model of the + * {@link ANeuralNetworksExecution}. + * + * <p>The provided memory must outlive the execution.</p> + * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * @param execution The execution to be modified. + * @param index The index of the input argument we are setting. It is + * an index into the lists passed to + * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not + * the index associated with {@link + * ANeuralNetworksModel_addOperand}. + * @param type The type of the operand. This can be used to specify the + * dimensions that were set to 0 when the operand was added to the + * model. All other values must be the same as specified in the + * model. If the type is the same as specified when the model + * was built, NULL can be passed. + * @param memory The memory containing the data. + * @param offset This specifies the location of the data within the memory. + * The offset is in bytes from the start of memory. + * @param length The size in bytes of the data value. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if + * the name is not recognized or the buffer is too small for the input. + */ +inline int ANeuralNetworksExecution_setInputFromMemory( + ANeuralNetworksExecution* execution, int32_t index, + const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, + size_t offset, size_t length) { + LOAD_FUNCTION(ANeuralNetworksExecution_setInputFromMemory); + EXECUTE_FUNCTION_RETURN(execution, index, type, memory, offset, length); +} + +/** + * Associate a user buffer with an output of the model of the + * {@link ANeuralNetworksExecution}. + * + * <p>The provided buffer must outlive the execution.</p> + * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * @param execution The execution to be modified. + * @param index The index of the output argument we are setting. It is + * an index into the lists passed to + * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not + * the index associated with {@link + * ANeuralNetworksModel_addOperand}. + * @param type The type of the operand. This can be used to specify the + * dimensions that were set to 0 when the operand was added to the + * model. All other values must be the same as specified in the + * model. If the type is the same as specified when the model + * was built, NULL can be passed. + * @param buffer The buffer where the data is to be written. + * @param length The length in bytes of the buffer. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if + * the name is not recognized or the buffer is too small for the output. + */ +inline int ANeuralNetworksExecution_setOutput( + ANeuralNetworksExecution* execution, int32_t index, + const ANeuralNetworksOperandType* type, void* buffer, size_t length) { + LOAD_FUNCTION(ANeuralNetworksExecution_setOutput); + EXECUTE_FUNCTION_RETURN(execution, index, type, buffer, length); +} + +/** + * Associate part of a memory object with an output of the model of the + * {@link ANeuralNetworksExecution}. + * + * <p>The provided memory must outlive the execution.</p> + * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * @param execution The execution to be modified. + * @param index The index of the output argument we are setting. It is + * an index into the lists passed to + * {@link ANeuralNetworksModel_identifyInputsAndOutputs}. It is not + * the index associated with {@link + * ANeuralNetworksModel_addOperand}. + * @param type The type of the operand. This can be used to specify the + * dimensions that were set to 0 when the operand was added to the + * model. All other values must be the same as specified in the + * model. If the type is the same as specified when the model + * was built, NULL can be passed. + * @param memory The memory where the data is to be stored. + * @param offset This specifies the location of the data within the memory. + * The offset is in bytes from the start of memory. + * @param length The length in bytes of the data value. + * + * @return ANEURALNETWORKS_NO_ERROR if successful, ANEURALNETWORKS_BAD_DATA if + * the name is not recognized or the buffer is too small for the output. + */ +inline int ANeuralNetworksExecution_setOutputFromMemory( + ANeuralNetworksExecution* execution, int32_t index, + const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, + size_t offset, size_t length) { + LOAD_FUNCTION(ANeuralNetworksExecution_setOutputFromMemory); + EXECUTE_FUNCTION_RETURN(execution, index, type, memory, offset, length); +} + +/** + * Schedule evaluation of the execution. + * + * <p>Schedules evaluation of the execution. Once the model has been + * applied and the outputs are ready to be consumed, the execution will be + * signaled. Use {@link ANeuralNetworksExecution_wait} to wait for that signal. + * </p> + * + * Multiple executions can be scheduled and evaluated concurrently, and + * compilations can be performed concurrently with executions. The runtime makes + * no guarantee on the ordering of the completion of compilations and + * executions. If it's important to the application, the application should + * enforce the ordering by using {@link ANeuralNetworksCompilation_wait} and + * {@link ANeuralNetworksExecution_wait}. + * + * ANeuralNetworksExecution_wait must be called to recuperate the resources used + * by the execution. + * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * @param execution The execution to be scheduled and executed. + * + * @return ANEURALNETWORKS_NO_ERROR if successful. + */ +inline int ANeuralNetworksExecution_startCompute( + ANeuralNetworksExecution* execution, ANeuralNetworksEvent** event) { + LOAD_FUNCTION(ANeuralNetworksExecution_startCompute); + EXECUTE_FUNCTION_RETURN(execution, event); +} + +/** + * Waits until the execution completes. + * + * More than one thread can wait on an event. When the execution completes, + * all threads will be released. + * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + * + * @return ANEURALNETWORKS_NO_ERROR if the execution completed normally. + */ +inline int ANeuralNetworksEvent_wait(ANeuralNetworksEvent* event) { + LOAD_FUNCTION(ANeuralNetworksEvent_wait); + EXECUTE_FUNCTION_RETURN(event); +} + +/** + * Destroys the event. + * + * See {@link ANeuralNetworksExecution} for information on multithreaded usage. + */ +inline void ANeuralNetworksEvent_free(ANeuralNetworksEvent* event) { + LOAD_FUNCTION(ANeuralNetworksEvent_free); + EXECUTE_FUNCTION(event); +} + +#endif // __NEURAL_NETWORKS_SHIM__ diff --git a/include/kernel/acl/Add.h b/include/kernel/acl/Add.h new file mode 100644 index 000000000..73a7d752a --- /dev/null +++ b/include/kernel/acl/Add.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_KERNEL_ACL_ADD_H__ +#define __NNFW_KERNEL_ACL_ADD_H__ + +#include <OperationsUtils.h> + +namespace nnfw { +namespace kernel { +namespace acl { + +bool addFloat32(const float* inputData1, const nnfw::rt::Shape& inputShape1, + const float* inputData2, const nnfw::rt::Shape& inputShape2, + int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape); + +namespace neon { + +bool addFloat32(const float* inputData1, const nnfw::rt::Shape& inputShape1, + const float* inputData2, const nnfw::rt::Shape& inputShape2, + int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape); + +} // namespace neon + +} // namespace acl +} // namespace kernal +} // namespace nnfw + +#endif // __NNFW_KERNEL_ACL_ADD_H__ diff --git a/include/kernel/acl/Mul.h b/include/kernel/acl/Mul.h new file mode 100644 index 000000000..7e0c15c6c --- /dev/null +++ b/include/kernel/acl/Mul.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_KERNEL_ACL_MUL_H__ +#define __NNFW_KERNEL_ACL_MUL_H__ + +#include <OperationsUtils.h> + +namespace nnfw { +namespace kernel { +namespace acl { + +bool mulFloat32(const float* inputData1, const nnfw::rt::Shape& inputShape1, + const float* inputData2, const nnfw::rt::Shape& inputShape2, + int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape); + +namespace neon { + +bool mulFloat32(const float* inputData1, const nnfw::rt::Shape& inputShape1, + const float* inputData2, const nnfw::rt::Shape& inputShape2, + int32_t activation, + float* outputData, const nnfw::rt::Shape& outputShape); + +} // namespace neon +} // namespace acl +} // namespace kernal +} // namespace nnfw + +#endif // __NNFW_KERNEL_ACL_MUL_H__ diff --git a/include/kernel/acl/ReLU.h b/include/kernel/acl/ReLU.h new file mode 100644 index 000000000..20278bf98 --- /dev/null +++ b/include/kernel/acl/ReLU.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_KERNEL_ACL_RELU_H__ +#define __NNFW_KERNEL_ACL_RELU_H__ + +#include <OperationsUtils.h> + +namespace nnfw { +namespace kernel { +namespace acl { + +bool reluFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + float* outputData, const nnfw::rt::Shape& outputShape); + +namespace neon { + + bool reluFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + float* outputData, const nnfw::rt::Shape& outputShape); + +} // namespace neon + +} // namespace acl +} // namespace kernel +} // namespace nnfw + +#endif // __NNFW_KERNEL_ACL_RELU_H__ diff --git a/include/kernel/acl/ReLU6.h b/include/kernel/acl/ReLU6.h new file mode 100644 index 000000000..d68c792e4 --- /dev/null +++ b/include/kernel/acl/ReLU6.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_KERNEL_ACL_RELU6H__ +#define __NNFW_KERNEL_ACL_RELU6H__ + +#include <OperationsUtils.h> + +namespace nnfw { +namespace kernel { +namespace acl { + +bool relu6Float32(const float* inputData, const nnfw::rt::Shape& inputShape, + float* outputData, const nnfw::rt::Shape& outputShape); + +bool relu6LUFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + float* outputData, const nnfw::rt::Shape& outputShape); +namespace neon { + + bool relu6Float32(const float* inputData, const nnfw::rt::Shape& inputShape, + float* outputData, const nnfw::rt::Shape& outputShape); + + bool relu6LUFloat32(const float* inputData, const nnfw::rt::Shape& inputShape, + float* outputData, const nnfw::rt::Shape& outputShape); + +} // namespace neon + +} // namespace acl +} // namespace kernel +} // namespace nnfw + +#endif // __NNFW_KERNEL_ACL_RELU6H__ diff --git a/include/nnfw/std/memory.h b/include/nnfw/std/memory.h new file mode 100644 index 000000000..dd0236958 --- /dev/null +++ b/include/nnfw/std/memory.h @@ -0,0 +1,17 @@ +#ifndef __NNFW_STD_MEMORY_H__ +#define __NNFW_STD_MEMORY_H__ + +#include <memory> + +namespace nnfw +{ + +template <typename T, typename... Args> std::unique_ptr<T> make_unique(Args &&... args) +{ + // NOTE std::make_unique is missing in C++11 standard + return std::unique_ptr<T>(new T(std::forward<Args>(args)...)); +} + +} // namespace nnfw + +#endif // __NNFW_STD_MEMORY_H__ diff --git a/include/support/nnapi/Utils.h b/include/support/nnapi/Utils.h new file mode 100644 index 000000000..d34d2f64b --- /dev/null +++ b/include/support/nnapi/Utils.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_SUPPORT_NNAPI_UTILS_H__ +#define __NNFW_SUPPORT_NNAPI_UTILS_H__ + +#include "NeuralNetworks.h" + +namespace nnfw +{ +namespace support +{ +namespace nnapi +{ + +const char *to_string(const PaddingCode &code); + +} // namespace nnapi +} // namespace support +} // namespace nnfw + +#endif // __NNFW_SUPPORT_NNAPI_UTILS_H__ diff --git a/include/support/nnapi/feature/Reader.h b/include/support/nnapi/feature/Reader.h index bf632f29d..b6ab0a1e2 100644 --- a/include/support/nnapi/feature/Reader.h +++ b/include/support/nnapi/feature/Reader.h @@ -31,26 +31,24 @@ namespace nnapi namespace feature { -template<typename T> class Reader; - -template<> class Reader<float> : public nnfw::util::feature::Reader<float> +template<typename T> class Reader : public nnfw::util::feature::Reader<T> { public: - Reader(const nnfw::util::feature::Shape &shape, const float *base) + Reader(const nnfw::util::feature::Shape &shape, const T *base) : _shape{shape}, _base{base} { // DO NOTHING } public: - float at(uint32_t ch, uint32_t row, uint32_t col) const override + T at(uint32_t ch, uint32_t row, uint32_t col) const override { return *(_base + indexOf(_shape, ch, row, col)); } private: nnfw::util::feature::Shape _shape; - const float *_base; + const T *_base; }; } // namespace feature diff --git a/include/support/tflite/Assert.h b/include/support/tflite/Assert.h new file mode 100644 index 000000000..f5c6bf219 --- /dev/null +++ b/include/support/tflite/Assert.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_SUPPORT_TFLITE_ASSERT_H__ +#define __NNFW_SUPPORT_TFLITE_ASSERT_H__ + +#include "tensorflow/contrib/lite/context.h" + +#include <sstream> + +#define STR_DETAIL(value) #value +#define STR(value) STR_DETAIL(value) + +#define TFLITE_ENSURE(exp) { \ + const TfLiteStatus status = (exp); \ + \ + if (status != kTfLiteOk) \ + { \ + std::ostringstream ss; \ + ss << #exp << " failed (" << __FILE__ << ":" << __LINE__ << ")"; \ + throw std::runtime_error{ss.str()}; \ + } \ +} + +#endif // __NNFW_SUPPORT_TFLITE_ASSERT_H__ diff --git a/include/support/tflite/Diff.h b/include/support/tflite/Diff.h index b17c9313c..f4f3f6fe8 100644 --- a/include/support/tflite/Diff.h +++ b/include/support/tflite/Diff.h @@ -20,88 +20,92 @@ #include "tensorflow/contrib/lite/interpreter.h" #include "util/tensor/Index.h" +#include "util/tensor/Diff.h" +#include "util/tensor/Shape.h" +#include "util/tensor/Comparator.h" #include "support/tflite/TensorView.h" #include <functional> #include <vector> -// NOTE The code below is subject to change. -// TODO Introduce namespaces -struct TfLiteTensorDiff -{ - nnfw::util::tensor::Index index; - float expected; - float obtained; - - TfLiteTensorDiff(const nnfw::util::tensor::Index &i) : index(i) - { - // DO NOTHING - } -}; - -class TfLiteTensorComparator +class TfLiteInterpMatchApp { public: - TfLiteTensorComparator(const std::function<bool (float lhs, float rhs)> &fn) : _compare_fn{fn} + TfLiteInterpMatchApp(const nnfw::util::tensor::Comparator &comparator) + : _verbose{false}, _comparator(comparator) { // DO NOTHING } public: - struct Observer - { - virtual void notify(const nnfw::util::tensor::Index &index, float expected, float obtained) = 0; - }; + int &verbose(void) { return _verbose; } + +private: + int _verbose; public: - // NOTE Observer should live longer than comparator - std::vector<TfLiteTensorDiff> compare(const nnfw::support::tflite::TensorView<float> &expected, - const nnfw::support::tflite::TensorView<float> &obtained, - Observer *observer = nullptr) const; + bool run(::tflite::Interpreter &pure, ::tflite::Interpreter &nnapi) const; + template <typename T> + bool compareSingleTensorView(const nnfw::support::tflite::TensorView<T> &expected, + const nnfw::support::tflite::TensorView<T> &obtained, + int id) const; private: - std::function<bool (float lhs, float rhs)> _compare_fn; + const nnfw::util::tensor::Comparator &_comparator; }; -class TfLiteInterpMatchApp +#include "support/tflite/interp/Builder.h" +#include "support/tflite/Quantization.h" + +#include <random> + +class RandomGenerator { public: - TfLiteInterpMatchApp(const TfLiteTensorComparator &comparator) - : _verbose{false}, _comparator(comparator) + RandomGenerator(int seed, float mean, float stddev, + const TfLiteQuantizationParams quantization = make_default_quantization()) + : _rand{seed}, _dist{mean, stddev}, _quantization{quantization} { // DO NOTHING } public: - int &verbose(void) { return _verbose; } - -private: - int _verbose; + template <typename T> + T generate(const ::nnfw::util::tensor::Shape &, const ::nnfw::util::tensor::Index &) + { + return generate<T>(); + } -public: - bool run(::tflite::Interpreter &pure, ::tflite::Interpreter &nnapi) const; + template <typename T> T generate(void) + { + return _dist(_rand); + } private: - const TfLiteTensorComparator &_comparator; + std::minstd_rand _rand; + std::normal_distribution<float> _dist; + const TfLiteQuantizationParams _quantization; }; -#include "support/tflite/interp/Builder.h" - -#include <random> +template <> +uint8_t RandomGenerator::generate<uint8_t>(void); // For NNAPI testing struct RandomTestParam { int verbose; int tolerance; + int tensor_logging = 0; + std::string log_path = ""; // meaningful only when tensor_logging is 1 }; class RandomTestRunner { public: - RandomTestRunner(int seed, const RandomTestParam ¶m) - : _rand{seed}, _param{param} + RandomTestRunner(int seed, const RandomTestParam ¶m, + const TfLiteQuantizationParams quantization = make_default_quantization()) + : _randgen{seed, 0.0f, 2.0f, quantization}, _param{param} { // DO NOTHING } @@ -111,9 +115,15 @@ public: // Return 0 if test succeeds int run(const nnfw::support::tflite::interp::Builder &builder); +public: + RandomGenerator &generator() { return _randgen; }; + private: - std::minstd_rand _rand; + RandomGenerator _randgen; const RandomTestParam _param; + +public: + static RandomTestRunner make(int seed); }; #endif // __NNFW_SUPPORT_TFLITE_COMPARE_H__ diff --git a/include/support/tflite/InterpreterSession.h b/include/support/tflite/InterpreterSession.h new file mode 100644 index 000000000..662de1d17 --- /dev/null +++ b/include/support/tflite/InterpreterSession.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_SUPPORT_TFLITE_INTERPRETER_SESSION_H__ +#define __NNFW_SUPPORT_TFLITE_INTERPRETER_SESSION_H__ + +#include "Session.h" + +namespace nnfw +{ +namespace support +{ +namespace tflite +{ + +class InterpreterSession final : public Session +{ +public: + InterpreterSession(::tflite::Interpreter *interp) : _interp{interp} + { + // DO NOTHING + } + +public: + ::tflite::Interpreter *interp(void) override { return _interp; } + +public: + bool prepare(void) override + { + _interp->UseNNAPI(false); + + if (kTfLiteOk != _interp->AllocateTensors()) + { + return false; + } + + return true; + } + + bool run(void) override + { + // Return true if Invoke returns kTfLiteOk + return kTfLiteOk == _interp->Invoke(); + } + + bool teardown(void) override + { + // Do NOTHING currently + return true; + } + +private: + ::tflite::Interpreter * const _interp; +}; + +} // namespace tflite +} // namespace support +} // namespace nnfw + +#endif // __NNFW_SUPPORT_TFLITE_INTERPRETER_SESSION_H__ diff --git a/include/support/tflite/NNAPISession.h b/include/support/tflite/NNAPISession.h new file mode 100644 index 000000000..4a8a2162b --- /dev/null +++ b/include/support/tflite/NNAPISession.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_SUPPORT_TFLITE_NNAPI_SESSION_H__ +#define __NNFW_SUPPORT_TFLITE_NNAPI_SESSION_H__ + +#include "Session.h" +#include "support/tflite/nnapi_delegate.h" + +namespace nnfw +{ +namespace support +{ +namespace tflite +{ + +class NNAPISession final : public Session +{ +public: + NNAPISession(::tflite::Interpreter *interp) : _interp{interp} + { + // Construct Graph from Interpreter + _delegate.BuildGraph(_interp); + } + +public: + ::tflite::Interpreter *interp(void) override { return _interp; } + +public: + bool prepare(void) override + { + // Explicitly turn off T/F lite internal NNAPI delegation in order to use locally defined + // NNAPI delegation. + _interp->UseNNAPI(false); + + if (kTfLiteOk != _interp->AllocateTensors()) + { + return false; + } + + return true; + } + + bool run(void) override + { + return kTfLiteOk == _delegate.Invoke(_interp); + } + + bool teardown(void) override + { + // DO NOTHING + return true; + } + +private: + ::tflite::Interpreter * const _interp; + nnfw::NNAPIDelegate _delegate; +}; + +} // namespace tflite +} // namespace support +} // namespace nnfw + +#endif // __NNFW_SUPPORT_TFLITE_NNAPI_SESSION_H__ diff --git a/include/support/tflite/Quantization.h b/include/support/tflite/Quantization.h new file mode 100644 index 000000000..a9027278a --- /dev/null +++ b/include/support/tflite/Quantization.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_SUPPORT_TFLITE_QUANTIZATION_H__ +#define __NNFW_SUPPORT_TFLITE_QUANTIZATION_H__ + +union BitwiseIntToFloat { + int i; + float f; +}; + +static const float FLOAT_NEAREST_TO_1 = BitwiseIntToFloat{0x3f7fffff}.f; + +#include "tensorflow/contrib/lite/context.h" + +TfLiteQuantizationParams make_default_quantization(void); + +#endif // __NNFW_SUPPORT_TFLITE_QUANTIZATION_H__ diff --git a/include/support/tflite/Session.h b/include/support/tflite/Session.h new file mode 100644 index 000000000..2ee2abe23 --- /dev/null +++ b/include/support/tflite/Session.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_SUPPORT_TFLITE_SESSION_H__ +#define __NNFW_SUPPORT_TFLITE_SESSION_H__ + +#include <tensorflow/contrib/lite/interpreter.h> + +namespace nnfw +{ +namespace support +{ +namespace tflite +{ + +struct Session +{ + virtual ~Session() = default; + + virtual ::tflite::Interpreter *interp(void) = 0; + + virtual bool prepare(void) = 0; + virtual bool run(void) = 0; + virtual bool teardown(void) = 0; +}; + +} // namespace tflite +} // namespace support +} // namespace nnfw + +#endif // __NNFW_SUPPORT_TFLITE_INTERP_SESSION_H__ diff --git a/include/support/tflite/TensorLogger.h b/include/support/tflite/TensorLogger.h new file mode 100644 index 000000000..97a0a49d7 --- /dev/null +++ b/include/support/tflite/TensorLogger.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_SUPPORT_TFLITE_TENSOR_LOGGER_H__ +#define __NNFW_SUPPORT_TFLITE_TENSOR_LOGGER_H__ + +#include "util/tensor/IndexIterator.h" +#include "support/tflite/TensorView.h" + +#include <tensorflow/contrib/lite/interpreter.h> +#include <tensorflow/contrib/lite/context.h> +#include <fstream> +#include <iomanip> + +namespace nnfw +{ +namespace support +{ +namespace tflite +{ + +/* +This is a utility to write input and output value / shape into a file in python form. +any python app can load this value by running the python code below: + + exec(open(filename).read()) + +generated python code looks like the following: + +# ------------- test name ------------- +tensor_shape_gen = [] +tensor_value_gen = [] + +tensor_shape_gen.append("{2, 1, 2}") +tensor_value_gen.append([1, 2, 3, 4]) + +tensor_shape_gen.append("{2}") +tensor_value_gen.append([1, 2]) + +tensor_shape_gen.append("{2, 1, 2}") +tensor_value_gen.append([1, 4, 3, 8]) +# ----------------------------------------- +*/ + +class TensorLogger +{ + private: + std::ofstream _outfile; + + public: + + static TensorLogger &instance() + { + static TensorLogger instance; + return instance; + } + + void save(const std::string &path, ::tflite::Interpreter &interp) + { + open(path); + + int log_index = 0; + for (const auto id : interp.inputs()) + { + _outfile << "# input tensors" << std::endl; + printTensor(interp, id, log_index++); + } + for (const auto id : interp.outputs()) + { + _outfile << "# output tensors" << std::endl; + printTensor(interp, id, log_index++); + } + close(); + } + + private: + + void open(const std::string &path) + { + if (! _outfile.is_open()) + _outfile.open(path, std::ios_base::out); + + _outfile << "# ------ file: " << path << " ------" << std::endl + << "tensor_shape_gen = []" << std::endl + << "tensor_value_gen = []" << std::endl << std::endl; + } + + void printTensor(::tflite::Interpreter &interp, const int id, const int log_index) + { + const TfLiteTensor* tensor = interp.tensor(id); + + _outfile << "# tensor name: " << tensor->name << std::endl; + _outfile << "# tflite::interpreter.tensor("<< id <<") -> " + "tensor_value_gen["<< log_index << "]" << std::endl; + + if (tensor->type == kTfLiteInt32) + { + printTensorShape(tensor); + printTensorValue<int32_t>(tensor, tensor->data.i32); + } + else if (interp.tensor(id)->type == kTfLiteUInt8) + { + printTensorShape(tensor); + printTensorValue<uint8_t>(tensor, tensor->data.uint8); + } + else if (tensor->type == kTfLiteFloat32) + { + printTensorShape(tensor); + printTensorValue<float>(tensor, tensor->data.f); + } + } + + void printTensorShape(const TfLiteTensor* tensor) + { + _outfile << "tensor_shape_gen.append('{"; + + size_t r = 0; + for (; r < tensor->dims->size - 1; r++) + { + _outfile << tensor->dims->data[r] + << ", "; + } + _outfile << tensor->dims->data[r]; + + _outfile << "}')" << std::endl; + } + + template <typename T> + void printTensorValue(const TfLiteTensor* tensor, T* tensor_data_ptr) + { + _outfile << "tensor_value_gen.append(["; + + _outfile << std::fixed << std::setprecision(10); + + const T *end = reinterpret_cast<const T *>(tensor->data.raw_const + tensor->bytes); + for (T *ptr = tensor_data_ptr; ptr < end; ptr++) + _outfile << *ptr << ", "; + + _outfile << "])" << std::endl << std::endl; + } + + void close() + { + _outfile << "# --------- tensor shape and value defined above ---------" << std::endl; + _outfile.close(); + } +}; + +} // namespace tflite +} // namespace support +} // namespace nnfw + +#endif // __NNFW_SUPPORT_TFLITE_TENSOR_LOGGER_H__ diff --git a/include/support/tflite/TensorShapeUtils.h b/include/support/tflite/TensorShapeUtils.h new file mode 100644 index 000000000..711128b48 --- /dev/null +++ b/include/support/tflite/TensorShapeUtils.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_SUPPORT_TFLITE_TENSOR_SHAPE_UTILS_H__ +#define __NNFW_SUPPORT_TFLITE_TENSOR_SHAPE_UTILS_H__ + +#include "util/tensor/Shape.h" + +#include <vector> + +namespace nnfw +{ +namespace support +{ +namespace tflite +{ + +// Converts tensor::Shape into a vector +static inline std::vector<int32_t> as_dims(const nnfw::util::tensor::Shape &shape) +{ + std::vector<int32_t> dims; + + for (uint32_t axis = 0; axis < shape.rank(); ++axis) + { + dims.emplace_back(shape.dim(axis)); + } + + return dims; +} + +nnfw::util::tensor::Shape broadcast(const nnfw::util::tensor::Shape &lhs_shape, + const nnfw::util::tensor::Shape &rhs_shape); + +} // namespace tflite +} // namespace support +} // namespace nnfw + +#endif // __NNFW_SUPPORT_TFLITE_TENSOR_SHAPE_UTILS_H__ diff --git a/include/support/tflite/TensorView.h b/include/support/tflite/TensorView.h index 35c90a372..0475a4b45 100644 --- a/include/support/tflite/TensorView.h +++ b/include/support/tflite/TensorView.h @@ -31,30 +31,56 @@ namespace support namespace tflite { -template<typename T> class TensorView; - -template<> class TensorView<float> final : public nnfw::util::tensor::Reader<float> +template<typename T> class TensorView final : public nnfw::util::tensor::Reader<T> { public: - TensorView(const nnfw::util::tensor::Shape &shape, float *base); + TensorView(const nnfw::util::tensor::Shape &shape, T *base) + : _shape{shape}, _base{base} + { + // Set 'stride' + _stride.init(_shape); + } public: const nnfw::util::tensor::Shape &shape(void) const { return _shape; } public: - float at(const nnfw::util::tensor::Index &index) const override; - float &at(const nnfw::util::tensor::Index &index); + T at(const nnfw::util::tensor::Index &index) const override + { + const auto offset = _stride.offset(index); + return *(_base + offset); + } + +public: + T &at(const nnfw::util::tensor::Index &index) + { + const auto offset = _stride.offset(index); + return *(_base + offset); + } private: nnfw::util::tensor::Shape _shape; public: - float *_base; + T *_base; nnfw::util::tensor::NonIncreasingStride _stride; public: // TODO Introduce Operand ID class - static TensorView<float> make(::tflite::Interpreter &interp, int operand_id); + static TensorView<T> make(::tflite::Interpreter &interp, int tensor_index) + { + auto tensor_ptr = interp.tensor(tensor_index); + + // Set 'shape' + nnfw::util::tensor::Shape shape(tensor_ptr->dims->size); + + for (uint32_t axis = 0; axis < shape.rank(); ++axis) + { + shape.dim(axis) = tensor_ptr->dims->data[axis]; + } + + return TensorView<T>(shape, interp.typed_tensor<T>(tensor_index)); + } }; } // namespace tflite diff --git a/include/support/tflite/kernels/CustomOps.h b/include/support/tflite/kernels/CustomOps.h new file mode 100644 index 000000000..36c1c972c --- /dev/null +++ b/include/support/tflite/kernels/CustomOps.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_SUPPORT_TFLITE_KERNELS_CUSTOM_OP_H__ +#define __NNFW_SUPPORT_TFLITE_KERNELS_CUSTOM_OP_H__ + +#include "tensorflow/contrib/lite/context.h" +#include "support/tflite/kernels/TensorFlowMax.h" +#include "support/tflite/kernels/RSQRT.h" +#include "support/tflite/kernels/SquaredDifference.h" + +namespace tflite +{ +namespace ops +{ +namespace custom +{ +namespace nnfw +{ + +#define REGISTER_FUNCTION(Name) \ + TfLiteRegistration *Register_##Name(void) \ + { \ + static TfLiteRegistration r = { Name::Init##Name , Name::Free##Name , Name::Prepare##Name , \ + Name::Eval##Name , 0, #Name}; \ + return &r; \ + } + +REGISTER_FUNCTION(TensorFlowMax) +REGISTER_FUNCTION(RSQRT) +REGISTER_FUNCTION(SquaredDifference) +#undef REGISTER_FUNCTION + +} // namespace nnfw +} // namespace custom +} // namespace ops +} // namespace tflite + +#endif // __NNFW_SUPPORT_TFLITE_KERNELS_CUSTOM_OP_H__ diff --git a/include/support/tflite/kernels/RSQRT.h b/include/support/tflite/kernels/RSQRT.h new file mode 100644 index 000000000..d52442861 --- /dev/null +++ b/include/support/tflite/kernels/RSQRT.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_SUPPORT_TFLITE_KERNELS_RSQRT_H__ +#define __NNFW_SUPPORT_TFLITE_KERNELS_RSQRT_H__ + +#include "tensorflow/contrib/lite/context.h" + +namespace tflite +{ +namespace ops +{ +namespace custom +{ +namespace nnfw +{ +namespace RSQRT +{ + + void *InitRSQRT(TfLiteContext *context, const char *buffer, size_t length); + void FreeRSQRT(TfLiteContext *context, void *buffer); + TfLiteStatus PrepareRSQRT(TfLiteContext *context, TfLiteNode *node); + TfLiteStatus EvalRSQRT(TfLiteContext *context, TfLiteNode *node); + +} // namespace RSQRT +} // namespace nnfw +} // namespace custom +} // namespace ops +} // namespace tflite + +#endif diff --git a/include/support/tflite/kernels/SquaredDifference.h b/include/support/tflite/kernels/SquaredDifference.h new file mode 100644 index 000000000..d0b7d3e7b --- /dev/null +++ b/include/support/tflite/kernels/SquaredDifference.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_SUPPORT_TFLITE_KERNELS_SQUARED_DIFFERENCE_H__ +#define __NNFW_SUPPORT_TFLITE_KERNELS_SQUARED_DIFFERENCE_H__ + +#include "tensorflow/contrib/lite/context.h" + +namespace tflite +{ +namespace ops +{ +namespace custom +{ +namespace nnfw +{ +namespace SquaredDifference +{ + + void *InitSquaredDifference(TfLiteContext *context, const char *buffer, size_t length); + void FreeSquaredDifference(TfLiteContext *context, void *buffer); + TfLiteStatus PrepareSquaredDifference(TfLiteContext *context, TfLiteNode *node); + TfLiteStatus EvalSquaredDifference(TfLiteContext *context, TfLiteNode *node); + +} // namespace SquaredDifference +} // namespace nnfw +} // namespace custom +} // namespace ops +} // namespace tflite + +#endif diff --git a/include/support/tflite/kernels/TensorFlowMax.h b/include/support/tflite/kernels/TensorFlowMax.h new file mode 100644 index 000000000..99c9fdc68 --- /dev/null +++ b/include/support/tflite/kernels/TensorFlowMax.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_SUPPORT_TFLITE_KERNELS_TENSORFLOW_MAX_H__ +#define __NNFW_SUPPORT_TFLITE_KERNELS_TENSORFLOW_MAX_H__ + +#include "tensorflow/contrib/lite/context.h" + +namespace tflite +{ +namespace ops +{ +namespace custom +{ +namespace nnfw +{ +namespace TensorFlowMax +{ + + void *InitTensorFlowMax(TfLiteContext *context, const char *buffer, size_t length); + void FreeTensorFlowMax(TfLiteContext *context, void *buffer); + TfLiteStatus PrepareTensorFlowMax(TfLiteContext *context, TfLiteNode *node); + TfLiteStatus EvalTensorFlowMax(TfLiteContext *context, TfLiteNode *node); + +} // namespace TensorFlowMax +} // namespace nnfw +} // namespace custom +} // namespace ops +} // namespace tflite + +#endif diff --git a/include/support/tflite/kernels/register.h b/include/support/tflite/kernels/register.h new file mode 100644 index 000000000..43a4c1a23 --- /dev/null +++ b/include/support/tflite/kernels/register.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// NOTE This header is derived from the following file (in TensorFlow) +// 'externals/tensorflow/tensorflow/contrib/lite/kernels/register.h' +#ifndef __NNFW_SUPPORT_TFLITE_KERNELS_REGISTER_H__ +#define __NNFW_SUPPORT_TFLITE_KERNELS_REGISTER_H__ + +#include <unordered_map> +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/model.h" + +// TODO Use namespace nnfw +namespace tflite { +namespace ops { +namespace builtin { + +class BuiltinOpResolver : public MutableOpResolver { + public: + BuiltinOpResolver(); +}; + +} // namespace builtin +} // namespace ops +} // namespace tflite + +#endif // __NNFW_SUPPORT_TFLITE_KERNELS_REGISTER_H__ diff --git a/include/support/tflite/nnapi_delegate.h b/include/support/tflite/nnapi_delegate.h new file mode 100644 index 000000000..a5da8ac39 --- /dev/null +++ b/include/support/tflite/nnapi_delegate.h @@ -0,0 +1,84 @@ +/* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// NOTE To minimize diff with upstream tensorflow, disable clang-format +// clang-format off + +// NOTE This header is derived from the following file (in TensorFlow) +// 'externals/tensorflow/tensorflow/contrib/lite/nnapi_delegate.h' +#ifndef __NNFW_SUPPORT_TFLITE_NNAPI_DELEGATE_H__ +#define __NNFW_SUPPORT_TFLITE_NNAPI_DELEGATE_H__ + +#include "tensorflow/contrib/lite/allocation.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/error_reporter.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "NeuralNetworksShim.h" + +class ANeuralNetworksModel; +class ANeuralNetworksCompilation; + +namespace nnfw { + +class NNAPIAllocation : public tflite::MMAPAllocation { + public: + NNAPIAllocation(const char* filename, ::tflite::ErrorReporter* error_reporter); + ~NNAPIAllocation(); + + size_t offset(const void* ptr) const { + auto signed_offset = reinterpret_cast<const uint8_t*>(ptr) - + reinterpret_cast<const uint8_t*>(mmapped_buffer_); + + return static_cast<size_t>(signed_offset); + } + + ANeuralNetworksMemory* memory() const { return handle_; } + bool valid() const override { return handle_ != nullptr; } + + private: + mutable ANeuralNetworksMemory* handle_ = nullptr; +}; + +class NNAPIDelegate { + public: + ~NNAPIDelegate(); + + // Convert a tflite graph to NNAPI + TfLiteStatus BuildGraph(::tflite::Interpreter* interpreter); + + // Run + TfLiteStatus Invoke(::tflite::Interpreter* interpreter); + + private: + // The NN API model handle + ANeuralNetworksModel* nn_model_ = nullptr; + // The NN API compilation handle + ANeuralNetworksCompilation* nn_compiled_model_ = nullptr; + + // List of state tensors for LSTM, RNN, SVDF. + // NN API does not allow ops to maintain states across multiple + // invocations. We need to manually create state input tensors from + // corresponding state output tensors of TFLite operations, and map them + // correctly. + std::vector<int> model_states_inputs_; // holds NNAPI operand ids + std::vector<int> model_states_outputs_; // holds TFLite tensor ids +}; + +} // namespace nnfw + +#endif // TENSORFLOW_CONTRIB_LITE_NNAPI_DELEGATE_H_ + +// clang-format on diff --git a/include/util/EnvVar.h b/include/util/EnvVar.h new file mode 100644 index 000000000..5512fedbd --- /dev/null +++ b/include/util/EnvVar.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_ENV_VAR__ +#define __NNFW_UTIL_ENV_VAR__ + +#include <algorithm> +#include <array> +#include <cstdlib> +#include <string> + + +namespace nnfw +{ +namespace util +{ + +class EnvVar +{ +public: + EnvVar(const std::string &key) + { + const char *value = std::getenv(key.c_str()); + if (value == nullptr) + { + // An empty string is considered as an empty value + _value = ""; + } + else + { + _value = value; + } + } + + std::string asString(const std::string &def) const + { + if (_value.empty()) + return def; + return _value; + } + + bool asBool(bool def) const + { + if (_value.empty()) + return def; + static const std::array<std::string, 5> false_list{"0", "OFF", "FALSE", "N", "NO"}; + return std::find(false_list.begin(), false_list.end(), _value); + } + + int asInt(int def) const + { + if (_value.empty()) + return def; + return std::stoi(_value); + } + +private: + std::string _value; +}; + +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_ENV_VAR__ diff --git a/include/util/benchmark.h b/include/util/benchmark.h new file mode 100644 index 000000000..b07656c88 --- /dev/null +++ b/include/util/benchmark.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_BENCHMARK_H__ +#define __NNFW_UTIL_BENCHMARK_H__ + +#include <chrono> + +namespace nnfw +{ +namespace util +{ +// Benckmark support +namespace benchmark +{ + +template <typename T> class Accumulator +{ +public: + Accumulator(T &ref) : _ref(ref) + { + // DO NOTHING + } + +public: + T &operator()(void) { return _ref; } + +private: + T &_ref; +}; + +template <typename T, typename Callable> +Accumulator<T> &operator<<(Accumulator<T> &&acc, Callable cb) +{ + auto begin = std::chrono::steady_clock::now(); + cb(); + auto end = std::chrono::steady_clock::now(); + + acc() += std::chrono::duration_cast<T>(end - begin); + + return acc; +} + +template <typename T> Accumulator<T> measure(T &out) { return Accumulator<T>(out); } + +} // namespace benchmark +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_BENCHMARK_H__ diff --git a/include/util/environment.h b/include/util/environment.h new file mode 100644 index 000000000..84df65f95 --- /dev/null +++ b/include/util/environment.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_ENVIRONMENT_H__ +#define __UTIL_ENVIRONMENT_H__ + +namespace nnfw +{ +namespace util +{ + +int get_env_int(const char *name, int defaultValue = 0); +bool get_env_bool(const char *name, bool defaultValue = false); +} +} + +#include <string> + +namespace nnfw +{ +namespace util +{ +namespace env +{ + +template <typename T> struct Accessor +{ + virtual ~Accessor() = default; + + virtual bool access(T &out) const = 0; +}; + +class IntAccessor : public Accessor<int> +{ +public: + IntAccessor(const std::string &tag); + +public: + bool access(int &out) const override; + +private: + std::string _tag; +}; + +class FloatAccessor : public Accessor<float> +{ +public: + FloatAccessor(const std::string &tag); + +public: + bool access(float &out) const override; + +private: + std::string _tag; +}; + +} // namespace env +} // namespace util +} // namespace nnfw + +#endif // __UTIL_ENVIRONMENT_H__ diff --git a/include/util/feature/Index.h b/include/util/feature/Index.h new file mode 100644 index 000000000..83b51f8a6 --- /dev/null +++ b/include/util/feature/Index.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_FEATURE_INDEX_H__ +#define __NNFW_UTIL_FEATURE_INDEX_H__ + +#include <cstdint> + +namespace nnfw +{ +namespace util +{ +namespace feature +{ + +class Index +{ +public: + Index() = default; + +public: + Index(int32_t ch, int32_t row, int32_t col) : _batch{1}, _ch{ch}, _row{row}, _col{col} + { + // DO NOTHING + } + Index(int32_t batch, int32_t ch, int32_t row, int32_t col) : _batch{batch}, _ch{ch}, _row{row}, _col{col} + { + // DO NOTHING + } + +public: + int32_t batch(void) const { return _batch; } + int32_t ch(void) const { return _ch; } + int32_t row(void) const { return _row; } + int32_t col(void) const { return _col; } + +public: + int32_t &batch(void) { return _batch; } + int32_t &ch(void) { return _ch; } + int32_t &row(void) { return _row; } + int32_t &col(void) { return _col; } + +private: + int32_t _batch; + int32_t _ch; + int32_t _row; + int32_t _col; +}; + +} // namespace feature +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_FEATURE_INDEX_H__ diff --git a/include/util/feature/IndexIterator.h b/include/util/feature/IndexIterator.h new file mode 100644 index 000000000..e2a7196e8 --- /dev/null +++ b/include/util/feature/IndexIterator.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_FEATURE_INDEX_ITERATOR_H__ +#define __NNFW_UTIL_FEATURE_INDEX_ITERATOR_H__ + +#include "util/feature/Shape.h" + +namespace nnfw +{ +namespace util +{ +namespace feature +{ + +class IndexIterator +{ +public: + IndexIterator(const Shape &shape) : _shape{shape} + { + // DO NOTHING + } + +public: + template <typename Callable> IndexIterator &iter(Callable cb) + { + for (int32_t batch = 0; batch < _shape.N; ++batch) + { + for (int32_t ch = 0; ch < _shape.C; ++ch) + { + for (int32_t row = 0; row < _shape.H; ++row) + { + for (int32_t col = 0; col < _shape.W; ++col) + { + cb(batch, ch, row, col); + } + } + } + } + + return (*this); + } + +private: + const Shape _shape; +}; + +static inline IndexIterator iterate(const Shape &shape) { return IndexIterator{shape}; } + +template <typename Callable> IndexIterator &operator<<(IndexIterator &&it, Callable cb) +{ + return it.iter(cb); +} + +} // namespace feature +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_FEATURE_INDEX_ITERATOR_H__ diff --git a/include/util/feature/Object.h b/include/util/feature/Object.h new file mode 100644 index 000000000..ce66c2437 --- /dev/null +++ b/include/util/feature/Object.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_FEATURE_OBJECT_H__ +#define __NNFW_UTIL_FEATURE_OBJECT_H__ + +#include "util/feature/Shape.h" +#include "util/feature/Index.h" +#include "util/feature/Reader.h" + +#include <vector> + +namespace nnfw +{ +namespace util +{ +namespace feature +{ + +template <typename T> class Object final : public Reader<T> +{ +public: + using Generator = std::function<T(const Shape &shape, const Index &index)>; + +public: + Object(const Shape &shape, const Generator &fn) : _shape{shape} + { + _value.resize(_shape.C * _shape.H * _shape.W); + + for (int32_t ch = 0; ch < _shape.C; ++ch) + { + for (int32_t row = 0; row < _shape.H; ++row) + { + for (int32_t col = 0; col < _shape.W; ++col) + { + _value.at(offsetOf(ch, row, col)) = fn(_shape, Index{ch, row, col}); + } + } + } + } + +public: + const Shape &shape(void) const { return _shape; } + +public: + T at(uint32_t ch, uint32_t row, uint32_t col) const override + { + return _value.at(offsetOf(ch, row, col)); + } + +private: + uint32_t offsetOf(uint32_t ch, uint32_t row, uint32_t col) const + { + return ch * _shape.H * _shape.W + row * _shape.W + col; + } + +private: + Shape _shape; + std::vector<T> _value; +}; + +} // namespace feature +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_FEATURE_OBJECT_H__ diff --git a/include/util/feature/Reader.h b/include/util/feature/Reader.h new file mode 100644 index 000000000..f870f7d11 --- /dev/null +++ b/include/util/feature/Reader.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_FEATURE_READER_H__ +#define __NNFW_UTIL_FEATURE_READER_H__ + +#include <cstdint> + +namespace nnfw +{ +namespace util +{ +namespace feature +{ + +template <typename T> struct Reader +{ + virtual ~Reader() = default; + + virtual T at(uint32_t ch, uint32_t row, uint32_t col) const = 0; + virtual T at(uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) const = 0; +}; + +} // namespace feature +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_FEATURE_READER_H__ diff --git a/include/util/feature/Shape.h b/include/util/feature/Shape.h new file mode 100644 index 000000000..2a2c59eed --- /dev/null +++ b/include/util/feature/Shape.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_FEATURE_SHAPE_H__ +#define __NNFW_UTIL_FEATURE_SHAPE_H__ + +#include <cstdint> + +namespace nnfw +{ +namespace util +{ +namespace feature +{ + +struct Shape +{ + int32_t N; // Batch + int32_t C; // Depth + int32_t H; // Height + int32_t W; // Width + + Shape() = default; + Shape(int32_t depth, int32_t height, int32_t width) : N{1}, C{depth}, H{height}, W{width} + { + // DO NOTHING + } + Shape(int32_t batch, int32_t depth, int32_t height, int32_t width) : N{batch}, C{depth}, H{height}, W{width} + { + // DO NOTHING + } +}; + +} // namespace feature +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_FEATURE_H__ diff --git a/include/util/feature/TextFormatter.h b/include/util/feature/TextFormatter.h new file mode 100644 index 000000000..de5c4c13a --- /dev/null +++ b/include/util/feature/TextFormatter.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_FEATURE_TEXT_FORMATTER_H__ +#define __NNFW_UTIL_FEATURE_TEXT_FORMATTER_H__ + +#include "util/feature/Shape.h" +#include "util/feature/Reader.h" + +#include <ostream> +#include <iomanip> +#include <limits> + +namespace nnfw +{ +namespace util +{ +namespace feature +{ + +template <typename T> class TextFormatter +{ +public: + TextFormatter(const Shape &shape, const Reader<T> &data) : _shape(shape), _data(data) + { + // DO NOTHING + } + +public: + const Shape &shape(void) const { return _shape; } + const Reader<T> &data(void) const { return _data; } + +private: + const Shape &_shape; + const Reader<T> &_data; +}; + +template <typename T> std::ostream &operator<<(std::ostream &os, const TextFormatter<T> &fmt) +{ + const auto &shape = fmt.shape(); + + for (uint32_t ch = 0; ch < shape.C; ++ch) + { + os << " Channel " << ch << ":" << std::endl; + for (uint32_t row = 0; row < shape.H; ++row) + { + os << " "; + for (uint32_t col = 0; col < shape.W; ++col) + { + const auto value = fmt.data().at(ch, row, col); + os << std::right; + os << std::fixed; + os << std::setw(std::numeric_limits<T>::digits10 + 2); + os << std::setprecision(5); + os << value; + os << " "; + } + os << std::endl; + } + } + + return os; +} + +} // namespace feature +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_FEATURE_TEXT_FORMATTER_H__ diff --git a/include/util/fp32.h b/include/util/fp32.h new file mode 100644 index 000000000..604435470 --- /dev/null +++ b/include/util/fp32.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_FP32_H__ +#define __NNFW_UTIL_FP32_H__ + +#include <cmath> +#include <cfloat> +#include <algorithm> +#include <cstdint> + +namespace nnfw +{ +namespace util +{ +namespace fp32 +{ + +inline float relative_diff(float lhs, float rhs) +{ + const auto diff = std::fabs(lhs - rhs); + const auto base = std::max(std::fabs(lhs), std::fabs(rhs)); + + return diff / base; +} + +inline bool epsilon_equal(float expected, float obtained, uint32_t tolerance = 1) +{ + if (std::isnan(expected) && std::isnan(obtained)) + { + return true; + } + + // Let's use relative epsilon comparision + const auto diff = std::fabs(expected - obtained); + const auto max = std::max(std::fabs(expected), std::fabs(obtained)); + + return diff <= (max * FLT_EPSILON * tolerance); +} + +inline bool absolute_epsilon_equal(float expected, float obtained, float tolerance = 0.001) +{ + if (std::isnan(expected) && std::isnan(obtained)) + { + return true; + } + + // Let's use absolute epsilon comparision + const auto diff = std::fabs(expected - obtained); + + return diff <= tolerance; +} + +} // namespace fp32 +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_FP32_H__ diff --git a/include/util/kernel/IndexIterator.h b/include/util/kernel/IndexIterator.h new file mode 100644 index 000000000..d01656447 --- /dev/null +++ b/include/util/kernel/IndexIterator.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_KERNEL_INDEX_ITERATOR_H__ +#define __NNFW_UTIL_KERNEL_INDEX_ITERATOR_H__ + +#include "util/kernel/Shape.h" + +namespace nnfw +{ +namespace util +{ +namespace kernel +{ + +class IndexIterator +{ +public: + IndexIterator(const Shape &shape) : _shape{shape} + { + // DO NOTHING + } + +public: + template <typename Callable> IndexIterator &iter(Callable cb) + { + for (int32_t nth = 0; nth < _shape.N; ++nth) + { + for (int32_t ch = 0; ch < _shape.C; ++ch) + { + for (int32_t row = 0; row < _shape.H; ++row) + { + for (int32_t col = 0; col < _shape.W; ++col) + { + cb(nth, ch, row, col); + } + } + } + } + + return (*this); + } + +private: + const Shape _shape; +}; + +inline IndexIterator iterate(const Shape &shape) { return IndexIterator{shape}; } + +template <typename Callable> IndexIterator &operator<<(IndexIterator &&it, Callable cb) +{ + return it.iter(cb); +} + +} // namespace kernel +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_FEATURE_INDEX_ITERATOR_H__ diff --git a/include/util/kernel/RandomObject.h b/include/util/kernel/RandomObject.h new file mode 100644 index 000000000..d6fca9a66 --- /dev/null +++ b/include/util/kernel/RandomObject.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_KERNEL_RANDOM_OBJECT_H__ +#define __NNFW_UTIL_KERNEL_RANDOM_OBJECT_H__ + +#include "util/kernel/Shape.h" +#include "util/kernel/Reader.h" + +#include <vector> + +namespace nnfw +{ +namespace util +{ +namespace kernel +{ + +template <typename T> class RandomObject final : public Reader<T> +{ +public: + RandomObject(const Shape &shape) : _shape{shape} + { + const uint32_t size = _shape.N * _shape.C * _shape.H * _shape.W; + + // TODO Use random number + for (uint32_t off = 0; off < size; ++off) + { + _value.emplace_back(static_cast<float>(off)); + } + } + +public: + const Shape &shape(void) const { return _shape; } + +public: + T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const override + { + uint32_t index = 0; + + index += nth * _shape.C * _shape.H * _shape.W; + index += ch * _shape.H * _shape.W; + index += row * _shape.W; + index += col; + + return _value.at(index); + } + +private: + const Shape _shape; + std::vector<T> _value; +}; + +} // namespace kernel +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_KERNEL_RANDOM_OBJECT_H__ diff --git a/include/util/kernel/Reader.h b/include/util/kernel/Reader.h new file mode 100644 index 000000000..9d8f33ad6 --- /dev/null +++ b/include/util/kernel/Reader.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_KERNEL_READER_H__ +#define __NNFW_UTIL_KERNEL_READER_H__ + +#include <cstdint> + +namespace nnfw +{ +namespace util +{ +namespace kernel +{ + +template <typename T> struct Reader +{ + virtual ~Reader() = default; + + virtual T at(uint32_t nth, uint32_t ch, uint32_t row, uint32_t col) const = 0; +}; + +} // namespace kernel +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_KERNEL_READER_H__ diff --git a/include/util/kernel/Shape.h b/include/util/kernel/Shape.h new file mode 100644 index 000000000..bd2332989 --- /dev/null +++ b/include/util/kernel/Shape.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_KERNEL_SHAPE_H__ +#define __NNFW_UTIL_KERNEL_SHAPE_H__ + +#include <cstdint> + +namespace nnfw +{ +namespace util +{ +namespace kernel +{ + +struct Shape +{ + int32_t N; + int32_t C; + int32_t H; + int32_t W; + + Shape() = default; + Shape(int32_t count, int32_t depth, int32_t height, int32_t width) + : N{count}, C{depth}, H{height}, W{width} + { + // DO NOTHING + } +}; + +} // namespace kernel +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_KERNEL_SHAPE_H__ diff --git a/include/util/matrix/IndexIterator.h b/include/util/matrix/IndexIterator.h new file mode 100644 index 000000000..b6fccff45 --- /dev/null +++ b/include/util/matrix/IndexIterator.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_MATRIX_INDEX_ITERATOR_H__ +#define __NNFW_UTIL_MATRIX_INDEX_ITERATOR_H__ + +#include "util/matrix/Shape.h" + +namespace nnfw +{ +namespace util +{ +namespace matrix +{ + +class IndexIterator +{ +public: + IndexIterator(const Shape &shape) : _shape{shape} + { + // DO NOTHING + } + +public: + template <typename Callable> IndexIterator &iter(Callable cb) + { + for (uint32_t row = 0; row < _shape.H; ++row) + { + for (uint32_t col = 0; col < _shape.W; ++col) + { + cb(row, col); + } + } + + return (*this); + } + +private: + const Shape _shape; +}; + +inline IndexIterator iterate(const Shape &shape) { return IndexIterator{shape}; } + +template <typename Callable> IndexIterator &operator<<(IndexIterator &&it, Callable cb) +{ + return it.iter(cb); +} + +} // namespace matrix +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_MATRIX_INDEX_ITERATOR_H__ diff --git a/include/util/matrix/Reader.h b/include/util/matrix/Reader.h new file mode 100644 index 000000000..526eaf5cd --- /dev/null +++ b/include/util/matrix/Reader.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_MATRIX_READER_H__ +#define __NNFW_UTIL_MATRIX_READER_H__ + +#include <cstdint> + +namespace nnfw +{ +namespace util +{ +namespace matrix +{ + +template <typename T> struct Reader +{ + virtual ~Reader() = default; + + virtual T at(uint32_t row, uint32_t col) const = 0; +}; + +} // namespace matrix +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_MATRIX_READER_H__ diff --git a/include/util/matrix/Shape.h b/include/util/matrix/Shape.h new file mode 100644 index 000000000..e2c20b4ca --- /dev/null +++ b/include/util/matrix/Shape.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// for 2D tensor +#ifndef __NNFW_UTIL_MATRIX_SHAPE_H__ +#define __NNFW_UTIL_MATRIX_SHAPE_H__ + +#include <cstdint> + +namespace nnfw +{ +namespace util +{ +namespace matrix +{ + +struct Shape +{ + int32_t H; // Height + int32_t W; // Width + + Shape() = default; + Shape(int32_t height, int32_t width) : H{height}, W{width} + { + // DO NOTHING + } +}; + +} // namespace feature +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_MATRIX_SHAPE_H__ diff --git a/include/util/profiling/profile_buffer.h b/include/util/profiling/profile_buffer.h new file mode 100644 index 000000000..8785a40d3 --- /dev/null +++ b/include/util/profiling/profile_buffer.h @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_BUFFER_H_ +#define TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_BUFFER_H_ + +#include <cstddef> +#include <cstdint> + +#include "util/profiling/time.h" + +namespace tflite { +namespace profiling { + +// A profiling event. +struct ProfileEvent { + // Describes the type of event. + // The event_metadata field may contain additional data for interpreting + // the event. + enum class EventType { + // Default event type, the metadata field has no special significance. + DEFAULT = 0, + // The event is an operator invocation and the event_metadata field is the + // index of operator node. + OPERATOR_INVOKE_EVENT = 1 + }; + + // Label of the event. This usually describes the event. + const char* tag; + // Timestamp in microseconds when the event began. + uint64_t begin_timestamp_us; + // Timestamp in microseconds when the event ended. + uint64_t end_timestamp_us; + // The field containing the type of event. This must be one of the event types + // in EventType. + EventType event_type; + // Extra data describing the details of the event. + uint32_t event_metadata; +}; +} // namespace profiling +} // namespace tflite + +#ifdef TFLITE_PROFILING_ENABLED + +#include <sys/time.h> +#include <vector> + +namespace tflite { +namespace profiling { +constexpr uint32_t kInvalidEventHandle = static_cast<uint32_t>(~0) - 1; + +// A ring buffer of profile events. +// This class is not thread safe. +class ProfileBuffer { + public: + ProfileBuffer(uint32_t max_num_entries, bool enabled) + : enabled_(enabled), current_index_(0), event_buffer_(max_num_entries) {} + + // Adds an event to the buffer with begin timestamp set to the current + // timestamp. Returns a handle to event that can be used to call EndEvent. If + // buffer is disabled this has no affect. + // The tag of the event should remain valid till the buffer is valid. + uint32_t BeginEvent(const char* tag, ProfileEvent::EventType event_type, + uint32_t event_metadata) { + if (!enabled_) { + return kInvalidEventHandle; + } + uint64_t timestamp = time::NowMicros(); + int index = current_index_ % event_buffer_.size(); + event_buffer_[index].tag = tag; + event_buffer_[index].event_type = event_type; + event_buffer_[index].event_metadata = event_metadata; + event_buffer_[index].begin_timestamp_us = timestamp; + event_buffer_[index].end_timestamp_us = 0; + current_index_++; + return index; + } + + // Sets the enabled state of buffer to |enabled| + void SetEnabled(bool enabled) { enabled_ = enabled; } + + // Sets the end timestamp for event for the handle to current time. + // If the buffer is disabled or previous event has been overwritten this + // operation has not effect. + void EndEvent(uint32_t event_handle) { + if (!enabled_ || event_handle == kInvalidEventHandle || + event_handle > current_index_) { + return; + } + const uint32_t max_size = event_buffer_.size(); + if (current_index_ > (max_size + event_handle)) { + // Ignore, buffer has already overflowed. + return; + } + + int event_index = event_handle % max_size; + event_buffer_[event_index].end_timestamp_us = time::NowMicros(); + } + + // Returns the size of the buffer. + size_t Size() const { + return (current_index_ >= event_buffer_.size()) ? event_buffer_.size() + : current_index_; + } + + // Resets the buffer. + void Reset() { + enabled_ = false; + current_index_ = 0; + } + + // Returns the profile event at the given index. If the index is invalid a + // nullptr is returned. The return event may get overwritten if more events + // are added to buffer. + const struct ProfileEvent* const At(int index) const { + size_t size = Size(); + if (index >= size) { + return nullptr; + } + const uint32_t max_size = event_buffer_.size(); + uint32_t start = + (current_index_ > max_size) ? current_index_ % max_size : max_size; + index = (index + start) % max_size; + return &event_buffer_[index]; + } + + private: + bool enabled_; + uint32_t current_index_; + std::vector<ProfileEvent> event_buffer_; +}; +} // namespace profiling +} // namespace tflite +#endif // TFLITE_PROFILING_ENABLED +#endif // TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILE_BUFFER_H_ diff --git a/include/util/profiling/profiler.h b/include/util/profiling/profiler.h new file mode 100644 index 000000000..0d68b8f4b --- /dev/null +++ b/include/util/profiling/profiler.h @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILER_H_ +#define TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILER_H_ + +#include <vector> + +#include "util/profiling/profile_buffer.h" + +#ifdef TFLITE_PROFILING_ENABLED + +namespace tflite { +namespace profiling { +class ScopedProfile; +class ScopedOperatorProfile; + +// Controls whether profiling is enabled or disabled and collects profiles. +// TFLite is used on platforms that don't have posix threads, so the profiler is +// kept as simple as possible. It is designed to be used only on a single +// thread. +// +// Profiles are collected using Scoped*Profile objects that begin and end a +// profile event. +// An example usage is shown in the example below: +// +// Say Worker class has a DoWork method and we are interested in profiling +// the overall execution time for DoWork and time spent in Task1 and Task2 +// functions. +// +// class Worker { +// public: +// void DoWork() { +// ScopedProfile(&controller, "DoWork"); +// Task1(); +// Task2(); +// ..... +// } +// +// void Task1() { +// ScopedProfile(&controller, "Task1"); +// .... +// } +// +// void Task2() { +// ScopedProfile(&controller, "Task2"); +// } +// +// Profiler profiler; +// } +// +// We instrument the functions that need to be profiled. +// +// Profile can be collected by enable profiling and then getting profile +// events. +// +// void ProfileWorker() { +// Worker worker; +// worker.profiler.EnableProfiling(); +// worker.DoWork(); +// worker.profiler.DisableProfiling(); +// // Profiling is complete, extract profiles. +// auto profile_events = worker.profiler.GetProfiles(); +// } +// +// +class Profiler { + public: + Profiler() : buffer_(1024, false) {} + + void StartProfiling() { buffer_.SetEnabled(true); } + void StopProfiling() { buffer_.SetEnabled(false); } + void Reset() { buffer_.Reset(); } + std::vector<const ProfileEvent*> GetProfileEvents() { + std::vector<const ProfileEvent*> profile_events; + profile_events.reserve(buffer_.Size()); + for (size_t i = 0; i < buffer_.Size(); i++) { + profile_events.push_back(buffer_.At(i)); + } + return profile_events; + } + + private: + friend class ScopedProfile; + friend class ScopedOperatorProfile; + ProfileBuffer* GetProfileBuffer() { return &buffer_; } + ProfileBuffer buffer_; +}; + +class ScopedProfile { + public: + // Adds a profile event to profile that begins with the construction + // of object and ends when the object goes out of scope. + // The lifetime of tag should be at least the lifetime of profiler. + + ScopedProfile(Profiler* profiler, const char* tag) + : buffer_(nullptr), event_handle_(0) { + if (profiler) { + buffer_ = profiler->GetProfileBuffer(); + event_handle_ = + buffer_->BeginEvent(tag, ProfileEvent::EventType::DEFAULT, 0); + } + } + ~ScopedProfile() { + if (buffer_) { + buffer_->EndEvent(event_handle_); + } + } + + private: + ProfileBuffer* buffer_; + int32_t event_handle_; +}; + +class ScopedOperatorProfile { + public: + // Adds a profile event to profile that begins with the construction + // of object and ends when the object goes out of scope. + // The lifetime of tag should be at least the lifetime of profiler. + ScopedOperatorProfile(Profiler* profiler, const char* tag, int node_index) + : buffer_(nullptr), event_handle_(0) { + if (profiler) { + buffer_ = profiler->GetProfileBuffer(); + event_handle_ = buffer_->BeginEvent( + tag, ProfileEvent::EventType::OPERATOR_INVOKE_EVENT, node_index); + } + } + + ~ScopedOperatorProfile() { + if (buffer_) { + buffer_->EndEvent(event_handle_); + } + } + + private: + ProfileBuffer* buffer_; + int32_t event_handle_; +}; + +} // namespace profiling +} // namespace tflite + +#define VARNAME_UNIQ(name, ctr) name##ctr + +#define SCOPED_OPERATOR_PROFILE(profiler, node_index) \ + tflite::profiling::ScopedOperatorProfile VARNAME_UNIQ( \ + _profile_, __COUNTER__)((profiler), "OpInvoke", (node_index)) +#else + +namespace tflite { +namespace profiling { +// A noop version of profiler when profiling is disabled. +class Profiler { + public: + Profiler() {} + void StartProfiling() {} + void StopProfiling() {} + void Reset() {} + std::vector<const ProfileEvent*> GetProfileEvents() { return {}; } +}; +} // namespace profiling +} // namespace tflite + +#define SCOPED_OPERATOR_PROFILE(profiler, node_index) + +#endif // TFLITE_PROFILING_ENABLED + +#endif // TENSORFLOW_CONTRIB_LITE_PROFILING_PROFILER_H_ diff --git a/include/util/profiling/profiling.h b/include/util/profiling/profiling.h new file mode 100644 index 000000000..3365dc8d6 --- /dev/null +++ b/include/util/profiling/profiling.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_PROFILING_H__ +#define __NNFW_UTIL_PROFILING_H__ + +#include <iostream> + +namespace tflite { +namespace profiling { +class Profiler; // forward declaration +} +} + +namespace profiling +{ + +class Sync +{ +public: + Sync() : _enabled{false} + { + auto env = std::getenv("PURE_ARM_COMPUTE_SYNC_ENABLE"); + + if (env && std::atoi(env) != 0) + { + _enabled = true; + } + } + +public: + bool enabled(void) const { return _enabled; } + +private: + bool _enabled; +}; + +} // namespace profiling + +namespace profiling +{ + +class Context +{ +public: + Context() : _sync(), _profiler(nullptr) {} + +public: + const Sync &sync(void) const { return _sync; } + tflite::profiling::Profiler* getProfiler() { return _profiler; } + void setProfiler(tflite::profiling::Profiler* p) { _profiler = p; } + +private: + Sync _sync; + tflite::profiling::Profiler* _profiler; + +public: + static Context &get(void) + { + static Context ctx{}; + return ctx; + } +}; + +} // namespace profiling +#endif // __NNFW_UTIL_PROFILING_H__ diff --git a/include/util/profiling/time.h b/include/util/profiling/time.h new file mode 100644 index 000000000..da3289359 --- /dev/null +++ b/include/util/profiling/time.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_PROFILING_TIME_H_ +#define TENSORFLOW_CONTRIB_LITE_PROFILING_TIME_H_ + +#include <cstdint> + +namespace tflite { +namespace profiling { +namespace time { +uint64_t NowMicros(); +} // namespace time +} // namespace profiling +} // namespace tflite +#endif // TENSORFLOW_CONTRIB_LITE_PROFILING_TIME_H_ diff --git a/include/util/tensor/Comparator.h b/include/util/tensor/Comparator.h new file mode 100644 index 000000000..f0ab4ab20 --- /dev/null +++ b/include/util/tensor/Comparator.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_TENSOR_COMPARATOR_H__ +#define __NNFW_UTIL_TENSOR_COMPARATOR_H__ + +#include "util/tensor/Index.h" +#include "util/tensor/Shape.h" +#include "util/tensor/Reader.h" +#include "util/tensor/Diff.h" + +#include <functional> + +#include <vector> + +namespace nnfw +{ +namespace util +{ +namespace tensor +{ + +class Comparator +{ +public: + Comparator(const std::function<bool (float lhs, float rhs)> &fn) : _compare_fn{fn} + { + // DO NOTHING + } + +public: + struct Observer + { + virtual void notify(const Index &index, float expected, float obtained) = 0; + }; + +public: + // NOTE Observer should live longer than comparator + std::vector<Diff<float>> compare(const Shape &shape, + const Reader<float> &expected, + const Reader<float> &obtained, + Observer *observer = nullptr) const; + +private: + std::function<bool (float lhs, float rhs)> _compare_fn; +}; + +} // namespace tensor +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_TENSOR_COMPARATOR_H__ diff --git a/include/util/tensor/Diff.h b/include/util/tensor/Diff.h new file mode 100644 index 000000000..25a9bafcf --- /dev/null +++ b/include/util/tensor/Diff.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_TENSOR_DIFF_H__ +#define __NNFW_UTIL_TENSOR_DIFF_H__ + +#include "util/tensor/Index.h" + +namespace nnfw +{ +namespace util +{ +namespace tensor +{ + +template<typename T> struct Diff +{ + Index index; + + T expected; + T obtained; + + Diff(const Index &i) : index(i) + { + // DO NOTHING + } + + Diff(const Index &i, const T &e, const T &o) : index(i), expected{e}, obtained{o} + { + // DO NOTHING + } +}; + +} // namespace tensor +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_TENSOR_DIFF_H__ diff --git a/include/util/tensor/Index.h b/include/util/tensor/Index.h new file mode 100644 index 000000000..bc41d3c8e --- /dev/null +++ b/include/util/tensor/Index.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_TENSOR_INDEX_H__ +#define __NNFW_UTIL_TENSOR_INDEX_H__ + +#include <cstdint> +#include <cstddef> + +#include <vector> +#include <initializer_list> + +namespace nnfw +{ +namespace util +{ +namespace tensor +{ + +struct Index +{ +public: + Index(size_t rank) { _offsets.resize(rank); } + +public: + Index(std::initializer_list<int32_t> offsets) : _offsets{offsets} + { + // DO NOTHING + } + +public: + size_t rank(void) const { return _offsets.size(); } + +public: + int32_t at(size_t n) const { return _offsets.at(n); } + int32_t &at(size_t n) { return _offsets.at(n); } + +private: + std::vector<int32_t> _offsets; +}; + +// This is used to convert NNAPI tensor index to ARM tensor index or vice versa +inline static Index copy_reverse(const Index &origin) +{ + size_t rank = origin.rank(); + Index target(rank); + for (int i = 0; i < rank; i++) + target.at(i) = origin.at(rank-1 -i); + return target; +} + +} // namespace tensor +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_TENSOR_INDEX_H__ diff --git a/include/util/tensor/IndexEnumerator.h b/include/util/tensor/IndexEnumerator.h new file mode 100644 index 000000000..30325cbfa --- /dev/null +++ b/include/util/tensor/IndexEnumerator.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_TENSOR_INDEX_ENUMERATOR_H__ +#define __NNFW_UTIL_TENSOR_INDEX_ENUMERATOR_H__ + +#include "util/tensor/Shape.h" +#include "util/tensor/Index.h" + +namespace nnfw +{ +namespace util +{ +namespace tensor +{ + +class IndexEnumerator +{ +public: + explicit IndexEnumerator(const Shape &shape) : _shape(shape), _index(shape.rank()), _cursor(0) + { + const size_t rank = _shape.rank(); + + for (size_t axis = 0; axis < rank; ++axis) + { + _index.at(axis) = 0; + } + + for (_cursor = 0; _cursor < rank; ++_cursor) + { + if (_index.at(_cursor) < _shape.dim(_cursor)) + { + break; + } + } + } + +public: + IndexEnumerator(IndexEnumerator &&) = delete; + IndexEnumerator(const IndexEnumerator &) = delete; + +public: + bool valid(void) const { return _cursor < _shape.rank(); } + +public: + const Index &curr(void) const { return _index; } + +public: + void advance(void) + { + const size_t rank = _shape.rank(); + + // Find axis to be updated + while((_cursor < rank) && !(_index.at(_cursor) + 1 < _shape.dim(_cursor))) + { + ++_cursor; + } + + if(_cursor == rank) + { + return; + } + + // Update index + _index.at(_cursor) += 1; + + for (size_t axis = 0; axis < _cursor; ++axis) + { + _index.at(axis) = 0; + } + + // Update cursor + _cursor = 0; + } + +public: + const Shape _shape; + +private: + size_t _cursor; + Index _index; +}; + +} // namespace tensor +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_TENSOR_INDEX_ENUMERATOR_H__ diff --git a/include/util/tensor/IndexFormatter.h b/include/util/tensor/IndexFormatter.h new file mode 100644 index 000000000..8014a42b6 --- /dev/null +++ b/include/util/tensor/IndexFormatter.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_TENSOR_INDEX_FORMATTER_H__ +#define __NNFW_UTIL_TENSOR_INDEX_FORMATTER_H__ + +#include "util/tensor/Index.h" + +#include <ostream> + +namespace nnfw +{ +namespace util +{ +namespace tensor +{ + +class IndexFormatter +{ +public: + IndexFormatter(const nnfw::util::tensor::Index &index) : _index(index) + { + // DO NOTHING + } + +public: + const nnfw::util::tensor::Index &index(void) const { return _index; } + +private: + const nnfw::util::tensor::Index &_index; +}; + +std::ostream &operator<<(std::ostream &os, const IndexFormatter &fmt); + +} // namespace tensor +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_TENSOR_INDEX_FORMATTER_H__ diff --git a/include/util/tensor/IndexIterator.h b/include/util/tensor/IndexIterator.h new file mode 100644 index 000000000..cbe895166 --- /dev/null +++ b/include/util/tensor/IndexIterator.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_TENSOR_INDEX_ITERATOR_H__ +#define __NNFW_UTIL_TENSOR_INDEX_ITERATOR_H__ + +#include "util/tensor/Shape.h" +#include "util/tensor/Index.h" +#include "util/tensor/IndexEnumerator.h" + +namespace nnfw +{ +namespace util +{ +namespace tensor +{ + +class IndexIterator +{ +public: + IndexIterator(const Shape &shape) : _shape(shape) + { + // DO NOTHING + } + +public: + // Allow move, but disallow copy + IndexIterator(IndexIterator &&) = default; + IndexIterator(const IndexIterator &) = delete; + +public: + template <typename Callable> IndexIterator &iter(Callable fn) + { + for (IndexEnumerator e{_shape}; e.valid(); e.advance()) + { + fn(e.curr()); + } + + return (*this); + } + +private: + const Shape &_shape; +}; + +inline IndexIterator iterate(const Shape &shape) { return IndexIterator{shape}; } + +template <typename Callable> IndexIterator &operator<<(IndexIterator &&it, Callable cb) +{ + return it.iter(cb); +} + +} // namespace tensor +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_TENSOR_INDEX_ITERATOR_H__ diff --git a/include/util/tensor/NonIncreasingStride.h b/include/util/tensor/NonIncreasingStride.h new file mode 100644 index 000000000..ff013ffa2 --- /dev/null +++ b/include/util/tensor/NonIncreasingStride.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_TENSOR_NON_INCREASING_STRIDE_H__ +#define __NNFW_UTIL_TENSOR_NON_INCREASING_STRIDE_H__ + +#include "util/tensor/Shape.h" +#include "util/tensor/Index.h" + +#include <vector> + +namespace nnfw +{ +namespace util +{ +namespace tensor +{ + +// As its name suggests, stride[N-1] >= stride[N] holds for all N < rank in NonIncreasingStride. +class NonIncreasingStride +{ +public: + void init(const Shape &shape) + { + _stride.resize(shape.rank()); + _stride.at(shape.rank() - 1) = 1; + + for (uint32_t axis = shape.rank() - 1; axis > 0; --axis) + { + _stride.at(axis - 1) = _stride.at(axis) * shape.dim(axis); + } + } + +public: + uint32_t at(uint32_t axis) const { return _stride.at(axis); } + +public: + uint32_t offset(const Index &index) const; + +private: + std::vector<uint32_t> _stride; +}; + +} // namespace tensor +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_TENSOR_NON_INCREASING_STRIDE_H__ diff --git a/include/util/tensor/Object.h b/include/util/tensor/Object.h new file mode 100644 index 000000000..7afd089ea --- /dev/null +++ b/include/util/tensor/Object.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_TENSOR_OBJECT_H__ +#define __NNFW_UTIL_TENSOR_OBJECT_H__ + +#include "util/tensor/Shape.h" +#include "util/tensor/Index.h" +#include "util/tensor/IndexIterator.h" +#include "util/tensor/NonIncreasingStride.h" +#include "util/tensor/Reader.h" + +#include <vector> + +namespace nnfw +{ +namespace util +{ +namespace tensor +{ + +template <typename T> class Object final : public Reader<T> +{ +public: + using Generator = std::function<T(const Shape &shape, const Index &index)>; + +public: + Object(const Shape &shape, const Generator &fn) : _shape{shape} + { + // Set 'stride' + _stride.init(shape); + + // Pre-allocate buffer + _values.resize(_shape.dim(0) * _stride.at(0)); + + // Set 'value' + iterate(_shape) << + [this, &fn](const Index &index) { _values.at(_stride.offset(index)) = fn(_shape, index); }; + } + +public: + const Shape &shape(void) const { return _shape; } + +public: + T at(const Index &index) const override { return _values.at(_stride.offset(index)); } + +private: + Shape _shape; + NonIncreasingStride _stride; + +private: + std::vector<T> _values; +}; + +} // namespace tensor +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_FEATURE_OBJECT_H__ diff --git a/include/util/tensor/Reader.h b/include/util/tensor/Reader.h new file mode 100644 index 000000000..654214880 --- /dev/null +++ b/include/util/tensor/Reader.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_TENSOR_READER_H__ +#define __NNFW_UTIL_TENSOR_READER_H__ + +#include "util/tensor/Index.h" + +namespace nnfw +{ +namespace util +{ +namespace tensor +{ + +template <typename T> struct Reader +{ + virtual ~Reader() = default; + + virtual T at(const Index &index) const = 0; +}; + +} // namespace tensor +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_TENSOR_READER_H__ diff --git a/include/util/tensor/Shape.h b/include/util/tensor/Shape.h new file mode 100644 index 000000000..a4401c1ed --- /dev/null +++ b/include/util/tensor/Shape.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_TENSOR_SHAPE_H__ +#define __NNFW_UTIL_TENSOR_SHAPE_H__ + +#include <cstdint> +#include <cstddef> +#include <deque> +#include <initializer_list> +#include <ostream> +#include <string> + +namespace nnfw +{ +namespace util +{ +namespace tensor +{ + +class Shape +{ +public: + Shape(size_t rank) { _dimensions.resize(rank); } + +public: + Shape(const std::initializer_list<int32_t> &dimensions) : _dimensions{dimensions} + { + // DO NOTHING + } + + Shape(const Shape &origin) = default; + +public: + void prepend(int32_t d) { _dimensions.emplace_front(d); } + void append(int32_t d) { _dimensions.emplace_back(d); } + +public: + size_t rank(void) const { return _dimensions.size(); } + +public: + int32_t dim(size_t n) const { return _dimensions.at(n); } + int32_t &dim(size_t n) { return _dimensions.at(n); } + +public: + size_t element_nums() const + { + size_t nums = 1; + for (auto d : _dimensions) + { + nums *= d; + } + return nums; + } + +private: + std::deque<int32_t> _dimensions; + +public: + static Shape from(const std::string &s); +}; + +bool operator==(const Shape &, const Shape &); + +std::ostream &operator<<(std::ostream &os, const Shape &shape); + +} // namespace tensor +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_TENSOR_SHAPE_H__ diff --git a/include/util/tensor/Zipper.h b/include/util/tensor/Zipper.h new file mode 100644 index 000000000..5d40736f3 --- /dev/null +++ b/include/util/tensor/Zipper.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_TENSOR_ZIPPER_H__ +#define __NNFW_UTIL_TENSOR_ZIPPER_H__ + +#include "util/tensor/Index.h" +#include "util/tensor/IndexIterator.h" +#include "util/tensor/Reader.h" + +namespace nnfw +{ +namespace util +{ +namespace tensor +{ + +template <typename T> class Zipper +{ +public: + Zipper(const Shape &shape, const Reader<T> &lhs, const Reader<T> &rhs) + : _shape{shape}, _lhs{lhs}, _rhs{rhs} + { + // DO NOTHING + } + +public: + template <typename Callable> void zip(Callable cb) const + { + iterate(_shape) << + [this, &cb](const Index &index) { cb(index, _lhs.at(index), _rhs.at(index)); }; + } + +private: + const Shape &_shape; + const Reader<T> &_lhs; + const Reader<T> &_rhs; +}; + +template <typename T, typename Callable> +const Zipper<T> &operator<<(const Zipper<T> &zipper, Callable cb) +{ + zipper.zip(cb); + return zipper; +} + +template <typename T> Zipper<T> zip(const Shape &shape, const Reader<T> &lhs, const Reader<T> &rhs) +{ + return Zipper<T>{shape, lhs, rhs}; +} + +} // namespace tensor +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_TENSOR_ZIPPER_H__ diff --git a/include/util/vector.h b/include/util/vector.h new file mode 100644 index 000000000..02f78257c --- /dev/null +++ b/include/util/vector.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_VECTOR_H__ +#define __NNFW_UTIL_VECTOR_H__ + +#include <vector> + +template <typename T> bool operator==(const std::vector<T> &lhs, const std::vector<T> &rhs) +{ + if (lhs.size() != rhs.size()) + { + return false; + } + + for (size_t ind = 0; ind < lhs.size(); ++ind) + { + if (lhs.at(ind) != rhs.at(ind)) + { + return false; + } + } + + return true; +} + +#endif // __NNFW_UTIL_VECTOR_H__ diff --git a/include/util/vector/Object.h b/include/util/vector/Object.h new file mode 100644 index 000000000..407c06dc6 --- /dev/null +++ b/include/util/vector/Object.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_VECTOR_OBJECT_H__ +#define __NNFW_UTIL_VECTOR_OBJECT_H__ + +#include "util/vector/Reader.h" + +#include <vector> +#include <functional> + +namespace nnfw +{ +namespace util +{ +namespace vector +{ + +template <typename T> class Object final : public Reader<T> +{ +public: + using Generator = std::function<T(int32_t size, int32_t offset)>; + +public: + Object(int32_t size, const Generator &gen) : _size{size} + { + _value.resize(_size); + + for (int32_t offset = 0; offset < size; ++offset) + { + _value.at(offset) = gen(size, offset); + } + } + +public: + int32_t size(void) const { return _size; } + +public: + T at(uint32_t nth) const override { return _value.at(nth); } + +private: + const int32_t _size; + std::vector<T> _value; +}; + +} // namespace vector +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_VECTOR_OBJECT_H__ diff --git a/include/util/vector/Reader.h b/include/util/vector/Reader.h new file mode 100644 index 000000000..a3c5cb359 --- /dev/null +++ b/include/util/vector/Reader.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NNFW_UTIL_VECTOR_READER_H__ +#define __NNFW_UTIL_VECTOR_READER_H__ + +#include <cstdint> + +namespace nnfw +{ +namespace util +{ +namespace vector +{ + +template <typename T> struct Reader +{ + virtual ~Reader() = default; + + virtual T at(uint32_t nth) const = 0; +}; + +} // namespace vector +} // namespace util +} // namespace nnfw + +#endif // __NNFW_UTIL_VECTOR_READER_H__ |