summaryrefslogtreecommitdiff
path: root/compiler/ann-ref/src/Executor.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/ann-ref/src/Executor.cpp')
-rw-r--r--compiler/ann-ref/src/Executor.cpp814
1 files changed, 814 insertions, 0 deletions
diff --git a/compiler/ann-ref/src/Executor.cpp b/compiler/ann-ref/src/Executor.cpp
new file mode 100644
index 000000000..888fc9c81
--- /dev/null
+++ b/compiler/ann-ref/src/Executor.cpp
@@ -0,0 +1,814 @@
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Executor.h"
+
+#include "NeuralNetworks.h"
+#include "Shape.h"
+
+#include "ops/Add.h"
+#include "ops/Add.float.h"
+#include "ops/Conv2D.h"
+#include "ops/Conv2D.float.h"
+#include "ops/DepthwiseConv2D.h"
+#include "ops/DepthwiseConv2D.float.h"
+#include "ops/AvgPool2D.h"
+#include "ops/AvgPool2D.float.h"
+#include "ops/MaxPool2D.h"
+#include "ops/MaxPool2D.float.h"
+#include "ops/Mul.h"
+#include "ops/Mul.float.h"
+#include "ops/ReLU.h"
+#include "ops/ReLU.float.h"
+#include "ops/ReLU6.h"
+#include "ops/ReLU6.float.h"
+#include "ops/Concatenation.h"
+#include "ops/Concatenation.float.h"
+#include "ops/Reshape.h"
+#include "ops/Softmax.h"
+#include "ops/Softmax.float.h"
+#include "ops/FullyConnected.h"
+#include "ops/FullyConnected.float.h"
+#include "ops/Pad.h"
+#include "ops/Sub.h"
+#include "ops/Sub.float.h"
+#include "ops/Div.h"
+#include "ops/Div.float.h"
+
+#include "Logging.h"
+#include "Assert.h"
+
+enum PaddingScheme
+{
+ kPaddingUnknown = 0,
+ kPaddingSame = 1,
+ kPaddingValid = 2,
+};
+
+inline void calculateExplicitPadding(int32_t in_size, int32_t stride, int32_t filter_size,
+ int32_t padding_implicit, int32_t *padding_head,
+ int32_t *padding_tail)
+{
+ *padding_head = 0;
+ *padding_tail = 0;
+
+ if (padding_implicit == kPaddingSame)
+ {
+ int32_t out_size = (in_size + stride - 1) / stride;
+ int32_t tmp = (out_size - 1) * stride + filter_size;
+ if (tmp > in_size)
+ {
+ *padding_head = (tmp - in_size) / 2;
+ *padding_tail = (tmp - in_size) - *padding_head;
+ }
+ }
+}
+
+template <typename T> static inline T getScalarData(const RunTimeOperandInfo &info)
+{
+ // TODO: Check buffer is at least as long as size of data.
+ T *data = reinterpret_cast<T *>(info.buffer);
+ return data[0];
+}
+
+// Updates the RunTimeOperandInfo with the newly calculated shape.
+// Allocate the buffer if we need to.
+static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo *info, const Shape &shape)
+{
+ // For user-provided model output operands, the parameters must match the Shape
+ // calculated from the preparation step.
+ if (info->lifetime == OperandLifeTime::MODEL_OUTPUT)
+ {
+ if (info->type != shape.type || info->dimensions != shape.dimensions)
+ {
+ LOG(ERROR) << "Invalid type or dimensions for model output";
+ return false;
+ }
+ if (info->type == OperandType::TENSOR_QUANT8_ASYMM &&
+ (info->scale != shape.scale || info->zeroPoint != shape.offset))
+ {
+ LOG(ERROR) << "Invalid scale or zeroPoint for model output";
+ return false;
+ }
+ }
+ info->type = shape.type;
+ info->dimensions = shape.dimensions;
+ info->scale = shape.scale;
+ info->zeroPoint = shape.offset;
+ if (info->lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info->buffer == nullptr)
+ {
+ uint32_t length = sizeOfData(info->type, info->dimensions);
+ info->buffer = new uint8_t[length];
+ if (info->buffer == nullptr)
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+// Ignore the .pools entry in model and request. This will have been taken care of
+// by the caller.
+int Executor::run(const Model &model, const Request &request,
+ const std::vector<RunTimePoolInfo> &modelPoolInfos,
+ const std::vector<RunTimePoolInfo> &requestPoolInfos)
+{
+ VLOG(CPUEXE) << "Executor::run()";
+
+ mModel = &model;
+ mRequest = &request; // TODO check if mRequest is needed
+ initializeRunTimeInfo(modelPoolInfos, requestPoolInfos);
+ // The model has serialized the operation in execution order.
+ for (const auto &operation : model.operations)
+ {
+ int n = executeOperation(operation);
+ if (n != ANEURALNETWORKS_NO_ERROR)
+ {
+ return n;
+ }
+ }
+ mModel = nullptr;
+ mRequest = nullptr;
+ VLOG(CPUEXE) << "Completed run normally";
+ return ANEURALNETWORKS_NO_ERROR;
+}
+
+bool Executor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo> &modelPoolInfos,
+ const std::vector<RunTimePoolInfo> &requestPoolInfos)
+{
+ VLOG(CPUEXE) << "Executor::initializeRunTimeInfo";
+ const size_t count = mModel->operands.size();
+ mOperands.resize(count);
+
+ // Start by setting the runtime info to what's in the model.
+ for (size_t i = 0; i < count; i++)
+ {
+ const Operand &from = mModel->operands[i];
+ RunTimeOperandInfo &to = mOperands[i];
+ to.type = from.type;
+ to.dimensions = from.dimensions;
+ to.scale = from.scale;
+ to.zeroPoint = from.zeroPoint;
+ to.length = from.location.length;
+ to.lifetime = from.lifetime;
+ switch (from.lifetime)
+ {
+ case OperandLifeTime::TEMPORARY_VARIABLE:
+ to.buffer = nullptr;
+ to.numberOfUsesLeft = from.numberOfConsumers;
+ break;
+ case OperandLifeTime::CONSTANT_COPY:
+ to.buffer = const_cast<uint8_t *>(&mModel->operandValues[from.location.offset]);
+ to.numberOfUsesLeft = 0;
+ break;
+ case OperandLifeTime::CONSTANT_REFERENCE:
+ {
+ auto poolIndex = from.location.poolIndex;
+ ASSERT(poolIndex < modelPoolInfos.size());
+ auto &r = modelPoolInfos[poolIndex];
+ to.buffer = r.buffer + from.location.offset;
+ to.numberOfUsesLeft = 0;
+ break;
+ }
+ case OperandLifeTime::MODEL_INPUT:
+ case OperandLifeTime::MODEL_OUTPUT:
+ case OperandLifeTime::NO_VALUE:
+ to.buffer = nullptr;
+ to.numberOfUsesLeft = 0;
+ break;
+ default:
+ ASSERT(false);
+ break;
+ }
+ }
+
+ // Adjust the runtime info for the arguments passed to the model,
+ // modifying the buffer location, and possibly the dimensions.
+ auto updateForArguments = [this, &requestPoolInfos](const std::vector<uint32_t> &indexes,
+ const std::vector<RequestArgument> &arguments) {
+ ASSERT(indexes.size() == arguments.size());
+ for (size_t i = 0; i < indexes.size(); i++)
+ {
+ const uint32_t operandIndex = indexes[i];
+ const RequestArgument &from = arguments[i];
+ RunTimeOperandInfo &to = mOperands[operandIndex];
+ if (from.dimensions.size() > 0)
+ {
+ // It's the responsibility of the caller to validate that
+ // from.dimensions only modifies the dimensions that were
+ // unspecified in the model. That's the case in SampleDriver.cpp
+ // with the call to validateRequest().
+ // TODO make sure that's the case for the default CPU path.
+ to.dimensions = from.dimensions;
+ }
+ if (from.hasNoValue)
+ {
+ to.lifetime = OperandLifeTime::NO_VALUE;
+ ASSERT(to.buffer == nullptr);
+ }
+ else
+ {
+ auto poolIndex = from.location.poolIndex;
+ ASSERT(poolIndex < requestPoolInfos.size());
+ auto &r = requestPoolInfos[poolIndex];
+ to.buffer = r.buffer + from.location.offset;
+ }
+ }
+ };
+ updateForArguments(mModel->inputIndexes, mRequest->inputs);
+ updateForArguments(mModel->outputIndexes, mRequest->outputs);
+
+ return true;
+}
+
+void Executor::freeNoLongerUsedOperands(const std::vector<uint32_t> &inputs)
+{
+ for (uint32_t i : inputs)
+ {
+ auto &info = mOperands[i];
+ // Check if it's a static or model input/output.
+ if (info.numberOfUsesLeft == 0)
+ {
+ continue;
+ }
+ info.numberOfUsesLeft--;
+ if (info.numberOfUsesLeft == 0)
+ {
+ ASSERT(info.buffer != nullptr);
+ delete[] info.buffer;
+ info.buffer = nullptr;
+ }
+ }
+}
+
+int Executor::executeOperation(const Operation &operation)
+{
+ const std::vector<uint32_t> &ins = operation.inputs;
+ const std::vector<uint32_t> &outs = operation.outputs;
+ bool success = false;
+
+ // Function to verify that the number of input and output parameters
+ // matches what is expected. Also checks that all the parameters have
+ // values. This function is to be used only for operations that do not
+ // accept optional arguments.
+ // TODO Have a version that works for optional arguments.
+ auto allParametersPresent = [&operation, &ins, &outs, this](size_t requiredIns,
+ size_t requiredOuts) -> bool {
+ auto verify = [&operation, this](size_t requiredCount, const std::vector<uint32_t> &indexes,
+ const char *type) -> bool {
+ size_t actualCount = indexes.size();
+ if (actualCount != requiredCount)
+ {
+ LOG(ERROR) << getOperationName(operation.type) << ": Invalid number of " << type
+ << " operands. Got " << actualCount << " of " << requiredCount;
+ return false;
+ }
+ for (size_t i = 0; i < actualCount; i++)
+ {
+ if (mOperands[indexes[i]].lifetime == OperandLifeTime::NO_VALUE)
+ {
+ LOG(ERROR) << getOperationName(operation.type) << " " << type << " operand " << i
+ << " is required but missing.";
+ return false;
+ }
+ }
+ return true;
+ };
+ return verify(requiredIns, ins, "in") && verify(requiredOuts, outs, "out");
+ };
+
+ switch (operation.type)
+ {
+ case OperationType::ADD:
+ {
+ if (!allParametersPresent(3, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &in1 = mOperands[ins[0]];
+ const RunTimeOperandInfo &in2 = mOperands[ins[1]];
+ int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
+
+ RunTimeOperandInfo &out = mOperands[outs[0]];
+ Shape outShape = out.shape();
+
+ ASSERT(in1.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = addPrepare(in1.shape(), in2.shape(), &outShape) &&
+ setInfoAndAllocateIfNeeded(&out, outShape) &&
+ addFloat32(reinterpret_cast<const float *>(in1.buffer), in1.shape(),
+ reinterpret_cast<const float *>(in2.buffer), in2.shape(), activation,
+ reinterpret_cast<float *>(out.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::DEPTHWISE_CONV_2D:
+ {
+ const size_t inCount = ins.size();
+ if ((inCount != 11 && inCount != 8) || !allParametersPresent(inCount, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &input = mOperands[ins[0]];
+ const RunTimeOperandInfo &filter = mOperands[ins[1]];
+ const RunTimeOperandInfo &bias = mOperands[ins[2]];
+
+ int32_t padding_left, padding_right;
+ int32_t padding_top, padding_bottom;
+ int32_t stride_width, stride_height;
+ int32_t depth_multiplier;
+ int32_t activation;
+
+ if (inCount == 11)
+ {
+ padding_left = getScalarData<int32_t>(mOperands[ins[3]]);
+ padding_right = getScalarData<int32_t>(mOperands[ins[4]]);
+ padding_top = getScalarData<int32_t>(mOperands[ins[5]]);
+ padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]);
+ stride_width = getScalarData<int32_t>(mOperands[ins[7]]);
+ stride_height = getScalarData<int32_t>(mOperands[ins[8]]);
+ depth_multiplier = getScalarData<int32_t>(mOperands[ins[9]]);
+ activation = getScalarData<int32_t>(mOperands[ins[10]]);
+ }
+ else
+ {
+ int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
+ stride_width = getScalarData<int32_t>(mOperands[ins[4]]);
+ stride_height = getScalarData<int32_t>(mOperands[ins[5]]);
+ depth_multiplier = getScalarData<int32_t>(mOperands[ins[6]]);
+ activation = getScalarData<int32_t>(mOperands[ins[7]]);
+
+ Shape inputShape = input.shape();
+ Shape filterShape = filter.shape();
+ int32_t input_width = getSizeOfDimension(inputShape, 2);
+ int32_t input_height = getSizeOfDimension(inputShape, 1);
+ int32_t filter_width = getSizeOfDimension(filterShape, 2);
+ int32_t filter_height = getSizeOfDimension(filterShape, 1);
+ calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit,
+ &padding_left, &padding_right);
+ calculateExplicitPadding(input_height, stride_height, filter_height, padding_implicit,
+ &padding_top, &padding_bottom);
+ }
+
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ ASSERT(input.type == OperandType::TENSOR_FLOAT32);
+ {
+ success =
+ depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(), padding_left,
+ padding_right, padding_top, padding_bottom, stride_width,
+ stride_height, &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ depthwiseConvFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(),
+ reinterpret_cast<const float *>(filter.buffer), filter.shape(),
+ reinterpret_cast<const float *>(bias.buffer), bias.shape(), padding_left,
+ padding_right, padding_top, padding_bottom, stride_width, stride_height,
+ depth_multiplier, activation, reinterpret_cast<float *>(output.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::CONV_2D:
+ {
+ const size_t inCount = ins.size();
+ if ((inCount != 10 && inCount != 7) || !allParametersPresent(inCount, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &input = mOperands[ins[0]];
+ const RunTimeOperandInfo &filter = mOperands[ins[1]];
+ const RunTimeOperandInfo &bias = mOperands[ins[2]];
+
+ int32_t padding_left, padding_right;
+ int32_t padding_top, padding_bottom;
+ int32_t stride_width, stride_height;
+ int32_t activation;
+
+ if (inCount == 10)
+ {
+ padding_left = getScalarData<int32_t>(mOperands[ins[3]]);
+ padding_right = getScalarData<int32_t>(mOperands[ins[4]]);
+ padding_top = getScalarData<int32_t>(mOperands[ins[5]]);
+ padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]);
+ stride_width = getScalarData<int32_t>(mOperands[ins[7]]);
+ stride_height = getScalarData<int32_t>(mOperands[ins[8]]);
+ activation = getScalarData<int32_t>(mOperands[ins[9]]);
+ }
+ else
+ {
+ int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
+ stride_width = getScalarData<int32_t>(mOperands[ins[4]]);
+ stride_height = getScalarData<int32_t>(mOperands[ins[5]]);
+ activation = getScalarData<int32_t>(mOperands[ins[6]]);
+
+ Shape inputShape = input.shape();
+ Shape filterShape = filter.shape();
+ int32_t input_width = getSizeOfDimension(inputShape, 2);
+ int32_t input_height = getSizeOfDimension(inputShape, 1);
+ int32_t filter_width = getSizeOfDimension(filterShape, 2);
+ int32_t filter_height = getSizeOfDimension(filterShape, 1);
+ calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit,
+ &padding_left, &padding_right);
+ calculateExplicitPadding(input_height, stride_height, filter_height, padding_implicit,
+ &padding_top, &padding_bottom);
+ }
+
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ ASSERT(input.type == OperandType::TENSOR_FLOAT32);
+ {
+ success =
+ convPrepare(input.shape(), filter.shape(), bias.shape(), padding_left, padding_right,
+ padding_top, padding_bottom, stride_width, stride_height, &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ convFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(),
+ reinterpret_cast<const float *>(filter.buffer), filter.shape(),
+ reinterpret_cast<const float *>(bias.buffer), bias.shape(), padding_left,
+ padding_right, padding_top, padding_bottom, stride_width, stride_height,
+ activation, reinterpret_cast<float *>(output.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::AVERAGE_POOL_2D:
+ {
+ const size_t inCount = ins.size();
+ if ((inCount != 10 && inCount != 7) || !allParametersPresent(inCount, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &input = mOperands[ins[0]];
+
+ int32_t padding_left, padding_right;
+ int32_t padding_top, padding_bottom;
+ int32_t stride_width, stride_height;
+ int32_t filter_width, filter_height;
+ int32_t activation;
+
+ if (inCount == 10)
+ {
+ padding_left = getScalarData<int32_t>(mOperands[ins[1]]);
+ padding_right = getScalarData<int32_t>(mOperands[ins[2]]);
+ padding_top = getScalarData<int32_t>(mOperands[ins[3]]);
+ padding_bottom = getScalarData<int32_t>(mOperands[ins[4]]);
+ stride_width = getScalarData<int32_t>(mOperands[ins[5]]);
+ stride_height = getScalarData<int32_t>(mOperands[ins[6]]);
+ filter_width = getScalarData<int32_t>(mOperands[ins[7]]);
+ filter_height = getScalarData<int32_t>(mOperands[ins[8]]);
+ activation = getScalarData<int32_t>(mOperands[ins[9]]);
+ }
+ else
+ {
+ int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
+ stride_width = getScalarData<int32_t>(mOperands[ins[2]]);
+ stride_height = getScalarData<int32_t>(mOperands[ins[3]]);
+ filter_width = getScalarData<int32_t>(mOperands[ins[4]]);
+ filter_height = getScalarData<int32_t>(mOperands[ins[5]]);
+ activation = getScalarData<int32_t>(mOperands[ins[6]]);
+
+ Shape inputShape = input.shape();
+ int32_t input_width = getSizeOfDimension(inputShape, 2);
+ int32_t input_height = getSizeOfDimension(inputShape, 1);
+ calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit,
+ &padding_left, &padding_right);
+ calculateExplicitPadding(input_height, stride_height, filter_height, padding_implicit,
+ &padding_top, &padding_bottom);
+ }
+
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ ASSERT(input.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = averagePoolPrepare(input.shape(), padding_left, padding_right, padding_top,
+ padding_bottom, stride_width, stride_height, filter_width,
+ filter_height, &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ averagePoolFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(), padding_left,
+ padding_right, padding_top, padding_bottom, stride_width, stride_height,
+ filter_width, filter_height, activation,
+ reinterpret_cast<float *>(output.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::MAX_POOL_2D:
+ {
+ const size_t inCount = ins.size();
+ if ((inCount != 10 && inCount != 7) || !allParametersPresent(inCount, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &input = mOperands[ins[0]];
+
+ int32_t padding_left, padding_right;
+ int32_t padding_top, padding_bottom;
+ int32_t stride_width, stride_height;
+ int32_t filter_width, filter_height;
+ int32_t activation;
+
+ if (inCount == 10)
+ {
+ padding_left = getScalarData<int32_t>(mOperands[ins[1]]);
+ padding_right = getScalarData<int32_t>(mOperands[ins[2]]);
+ padding_top = getScalarData<int32_t>(mOperands[ins[3]]);
+ padding_bottom = getScalarData<int32_t>(mOperands[ins[4]]);
+ stride_width = getScalarData<int32_t>(mOperands[ins[5]]);
+ stride_height = getScalarData<int32_t>(mOperands[ins[6]]);
+ filter_width = getScalarData<int32_t>(mOperands[ins[7]]);
+ filter_height = getScalarData<int32_t>(mOperands[ins[8]]);
+ activation = getScalarData<int32_t>(mOperands[ins[9]]);
+ }
+ else
+ {
+ int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
+ stride_width = getScalarData<int32_t>(mOperands[ins[2]]);
+ stride_height = getScalarData<int32_t>(mOperands[ins[3]]);
+ filter_width = getScalarData<int32_t>(mOperands[ins[4]]);
+ filter_height = getScalarData<int32_t>(mOperands[ins[5]]);
+ activation = getScalarData<int32_t>(mOperands[ins[6]]);
+
+ Shape inputShape = input.shape();
+ int32_t input_width = getSizeOfDimension(inputShape, 2);
+ int32_t input_height = getSizeOfDimension(inputShape, 1);
+ calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit,
+ &padding_left, &padding_right);
+ calculateExplicitPadding(input_height, stride_height, filter_height, padding_implicit,
+ &padding_top, &padding_bottom);
+ }
+
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ ASSERT(input.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = maxPoolPrepare(input.shape(), padding_left, padding_right, padding_top,
+ padding_bottom, stride_width, stride_height, filter_width,
+ filter_height, &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ maxPoolFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(), padding_left,
+ padding_right, padding_top, padding_bottom, stride_width, stride_height,
+ filter_width, filter_height, activation,
+ reinterpret_cast<float *>(output.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::MUL:
+ {
+ if (!allParametersPresent(3, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &in1 = mOperands[ins[0]];
+ const RunTimeOperandInfo &in2 = mOperands[ins[1]];
+ int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
+
+ RunTimeOperandInfo &out = mOperands[outs[0]];
+ Shape outShape = out.shape();
+
+ ASSERT(in1.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = mulPrepare(in1.shape(), in2.shape(), &outShape) &&
+ setInfoAndAllocateIfNeeded(&out, outShape) &&
+ mulFloat32(reinterpret_cast<const float *>(in1.buffer), in1.shape(),
+ reinterpret_cast<const float *>(in2.buffer), in2.shape(), activation,
+ reinterpret_cast<float *>(out.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::RELU:
+ {
+ if (!allParametersPresent(1, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &input = mOperands[ins[0]];
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ ASSERT(input.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = reluPrepare(input.shape(), &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ reluFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(),
+ reinterpret_cast<float *>(output.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::RELU6:
+ {
+ if (!allParametersPresent(1, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &input = mOperands[ins[0]];
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ ASSERT(input.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = relu6Prepare(input.shape(), &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ relu6Float32(reinterpret_cast<const float *>(input.buffer), input.shape(),
+ reinterpret_cast<float *>(output.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::SOFTMAX:
+ {
+ if (!allParametersPresent(2, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ RunTimeOperandInfo &input = mOperands[ins[0]];
+ float beta = getScalarData<float>(mOperands[ins[1]]);
+ if (beta <= 0.0f)
+ {
+ LOG(ERROR) << "beta must be positive for softmax";
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ ASSERT(input.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = softmaxPrepare(input.shape(), &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ softmaxFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(), beta,
+ reinterpret_cast<float *>(output.buffer), output.shape());
+ }
+ }
+ break;
+ case OperationType::FULLY_CONNECTED:
+ {
+ if (!allParametersPresent(4, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ RunTimeOperandInfo &input = mOperands[ins[0]];
+ RunTimeOperandInfo &weights = mOperands[ins[1]];
+ RunTimeOperandInfo &bias = mOperands[ins[2]];
+
+ int32_t activation = getScalarData<int32_t>(mOperands[ins[3]]);
+
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ ASSERT(input.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(), &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ fullyConnectedFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(),
+ reinterpret_cast<const float *>(weights.buffer), weights.shape(),
+ reinterpret_cast<const float *>(bias.buffer), bias.shape(), activation,
+ reinterpret_cast<float *>(output.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::CONCATENATION:
+ {
+ if (outs.size() != 1 || ins.size() < 2)
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ int numInputTensors = ins.size() - 1;
+ int32_t axis = getScalarData<int32_t>(mOperands[ins[numInputTensors]]);
+
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ const RunTimeOperandInfo &firstInput = mOperands[ins[0]];
+ ASSERT(firstInput.type == OperandType::TENSOR_FLOAT32);
+ {
+ std::vector<Shape> inputShapes(numInputTensors);
+ std::vector<const float *> inputDataPtrs(numInputTensors);
+
+ for (int i = 0; i < numInputTensors; i++)
+ {
+ RunTimeOperandInfo &input = mOperands[ins[i]];
+ inputShapes[i] = input.shape();
+ inputDataPtrs[i] = reinterpret_cast<const float *>(input.buffer);
+ }
+ success = concatenationPrepare(inputShapes, axis, &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ concatenationFloat32(inputDataPtrs, inputShapes, axis, reinterpret_cast<float *>(output.buffer),
+ outShape);
+ }
+ }
+ break;
+ case OperationType::RESHAPE:
+ {
+ if (!allParametersPresent(2, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &input = mOperands[ins[0]];
+ const RunTimeOperandInfo &targetShape = mOperands[ins[1]];
+
+ RunTimeOperandInfo &output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ success = reshapePrepare(input.shape(), reinterpret_cast<const int32_t *>(targetShape.buffer),
+ getNumberOfElements(targetShape.shape()), &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ reshapeGeneric(reinterpret_cast<const void *>(input.buffer), input.shape(),
+ reinterpret_cast<void *>(output.buffer), outShape);
+ }
+ break;
+ case OperationType::PAD:
+ {
+ if (!allParametersPresent(2, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo& input = mOperands[ins[0]];
+ const RunTimeOperandInfo& paddings = mOperands[ins[1]];
+
+ RunTimeOperandInfo& output = mOperands[outs[0]];
+ Shape outShape = output.shape();
+
+ success = padPrepare(input.shape(),
+ reinterpret_cast<const int32_t*>(paddings.buffer),
+ paddings.shape(),
+ &outShape) &&
+ setInfoAndAllocateIfNeeded(&output, outShape) &&
+ padGeneric(input.buffer,
+ input.shape(),
+ reinterpret_cast<const int32_t*>(paddings.buffer),
+ output.buffer,
+ outShape);
+ }
+ break;
+ case OperationType::SUB:
+ {
+ if (!allParametersPresent(3, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &in1 = mOperands[ins[0]];
+ const RunTimeOperandInfo &in2 = mOperands[ins[1]];
+ int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
+
+ RunTimeOperandInfo &out = mOperands[outs[0]];
+ Shape outShape = out.shape();
+
+ ASSERT(in1.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = subPrepare(in1.shape(), in2.shape(), &outShape) &&
+ setInfoAndAllocateIfNeeded(&out, outShape) &&
+ subFloat32(reinterpret_cast<const float *>(in1.buffer), in1.shape(),
+ reinterpret_cast<const float *>(in2.buffer), in2.shape(), activation,
+ reinterpret_cast<float *>(out.buffer), outShape);
+ }
+ }
+ break;
+ case OperationType::DIV:
+ {
+ if (!allParametersPresent(3, 1))
+ {
+ return ANEURALNETWORKS_BAD_DATA;
+ }
+ const RunTimeOperandInfo &in1 = mOperands[ins[0]];
+ const RunTimeOperandInfo &in2 = mOperands[ins[1]];
+ int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
+
+ RunTimeOperandInfo &out = mOperands[outs[0]];
+ Shape outShape = out.shape();
+
+ ASSERT(in1.type == OperandType::TENSOR_FLOAT32);
+ {
+ success = divPrepare(in1.shape(), in2.shape(), &outShape) &&
+ setInfoAndAllocateIfNeeded(&out, outShape) &&
+ divFloat32(reinterpret_cast<const float *>(in1.buffer), in1.shape(),
+ reinterpret_cast<const float *>(in2.buffer), in2.shape(), activation,
+ reinterpret_cast<float *>(out.buffer), outShape);
+ }
+ }
+ break;
+ default:
+ NYI(getOperationName(operation.type));
+ break;
+ }
+ if (!success)
+ {
+ LOG(ERROR) << getOperationName(operation.type) << " failed.";
+ return ANEURALNETWORKS_OP_FAILED;
+ }
+
+ freeNoLongerUsedOperands(ins);
+ return ANEURALNETWORKS_NO_ERROR;
+}