diff options
Diffstat (limited to 'compiler/ann-ref/src/Executor.cpp')
-rw-r--r-- | compiler/ann-ref/src/Executor.cpp | 814 |
1 files changed, 814 insertions, 0 deletions
diff --git a/compiler/ann-ref/src/Executor.cpp b/compiler/ann-ref/src/Executor.cpp new file mode 100644 index 000000000..888fc9c81 --- /dev/null +++ b/compiler/ann-ref/src/Executor.cpp @@ -0,0 +1,814 @@ +/* + * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Executor.h" + +#include "NeuralNetworks.h" +#include "Shape.h" + +#include "ops/Add.h" +#include "ops/Add.float.h" +#include "ops/Conv2D.h" +#include "ops/Conv2D.float.h" +#include "ops/DepthwiseConv2D.h" +#include "ops/DepthwiseConv2D.float.h" +#include "ops/AvgPool2D.h" +#include "ops/AvgPool2D.float.h" +#include "ops/MaxPool2D.h" +#include "ops/MaxPool2D.float.h" +#include "ops/Mul.h" +#include "ops/Mul.float.h" +#include "ops/ReLU.h" +#include "ops/ReLU.float.h" +#include "ops/ReLU6.h" +#include "ops/ReLU6.float.h" +#include "ops/Concatenation.h" +#include "ops/Concatenation.float.h" +#include "ops/Reshape.h" +#include "ops/Softmax.h" +#include "ops/Softmax.float.h" +#include "ops/FullyConnected.h" +#include "ops/FullyConnected.float.h" +#include "ops/Pad.h" +#include "ops/Sub.h" +#include "ops/Sub.float.h" +#include "ops/Div.h" +#include "ops/Div.float.h" + +#include "Logging.h" +#include "Assert.h" + +enum PaddingScheme +{ + kPaddingUnknown = 0, + kPaddingSame = 1, + kPaddingValid = 2, +}; + +inline void calculateExplicitPadding(int32_t in_size, int32_t stride, int32_t filter_size, + int32_t padding_implicit, int32_t *padding_head, + int32_t *padding_tail) +{ + *padding_head = 0; + *padding_tail = 0; + + if (padding_implicit == kPaddingSame) + { + int32_t out_size = (in_size + stride - 1) / stride; + int32_t tmp = (out_size - 1) * stride + filter_size; + if (tmp > in_size) + { + *padding_head = (tmp - in_size) / 2; + *padding_tail = (tmp - in_size) - *padding_head; + } + } +} + +template <typename T> static inline T getScalarData(const RunTimeOperandInfo &info) +{ + // TODO: Check buffer is at least as long as size of data. + T *data = reinterpret_cast<T *>(info.buffer); + return data[0]; +} + +// Updates the RunTimeOperandInfo with the newly calculated shape. +// Allocate the buffer if we need to. +static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo *info, const Shape &shape) +{ + // For user-provided model output operands, the parameters must match the Shape + // calculated from the preparation step. + if (info->lifetime == OperandLifeTime::MODEL_OUTPUT) + { + if (info->type != shape.type || info->dimensions != shape.dimensions) + { + LOG(ERROR) << "Invalid type or dimensions for model output"; + return false; + } + if (info->type == OperandType::TENSOR_QUANT8_ASYMM && + (info->scale != shape.scale || info->zeroPoint != shape.offset)) + { + LOG(ERROR) << "Invalid scale or zeroPoint for model output"; + return false; + } + } + info->type = shape.type; + info->dimensions = shape.dimensions; + info->scale = shape.scale; + info->zeroPoint = shape.offset; + if (info->lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info->buffer == nullptr) + { + uint32_t length = sizeOfData(info->type, info->dimensions); + info->buffer = new uint8_t[length]; + if (info->buffer == nullptr) + { + return false; + } + } + return true; +} + +// Ignore the .pools entry in model and request. This will have been taken care of +// by the caller. +int Executor::run(const Model &model, const Request &request, + const std::vector<RunTimePoolInfo> &modelPoolInfos, + const std::vector<RunTimePoolInfo> &requestPoolInfos) +{ + VLOG(CPUEXE) << "Executor::run()"; + + mModel = &model; + mRequest = &request; // TODO check if mRequest is needed + initializeRunTimeInfo(modelPoolInfos, requestPoolInfos); + // The model has serialized the operation in execution order. + for (const auto &operation : model.operations) + { + int n = executeOperation(operation); + if (n != ANEURALNETWORKS_NO_ERROR) + { + return n; + } + } + mModel = nullptr; + mRequest = nullptr; + VLOG(CPUEXE) << "Completed run normally"; + return ANEURALNETWORKS_NO_ERROR; +} + +bool Executor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo> &modelPoolInfos, + const std::vector<RunTimePoolInfo> &requestPoolInfos) +{ + VLOG(CPUEXE) << "Executor::initializeRunTimeInfo"; + const size_t count = mModel->operands.size(); + mOperands.resize(count); + + // Start by setting the runtime info to what's in the model. + for (size_t i = 0; i < count; i++) + { + const Operand &from = mModel->operands[i]; + RunTimeOperandInfo &to = mOperands[i]; + to.type = from.type; + to.dimensions = from.dimensions; + to.scale = from.scale; + to.zeroPoint = from.zeroPoint; + to.length = from.location.length; + to.lifetime = from.lifetime; + switch (from.lifetime) + { + case OperandLifeTime::TEMPORARY_VARIABLE: + to.buffer = nullptr; + to.numberOfUsesLeft = from.numberOfConsumers; + break; + case OperandLifeTime::CONSTANT_COPY: + to.buffer = const_cast<uint8_t *>(&mModel->operandValues[from.location.offset]); + to.numberOfUsesLeft = 0; + break; + case OperandLifeTime::CONSTANT_REFERENCE: + { + auto poolIndex = from.location.poolIndex; + ASSERT(poolIndex < modelPoolInfos.size()); + auto &r = modelPoolInfos[poolIndex]; + to.buffer = r.buffer + from.location.offset; + to.numberOfUsesLeft = 0; + break; + } + case OperandLifeTime::MODEL_INPUT: + case OperandLifeTime::MODEL_OUTPUT: + case OperandLifeTime::NO_VALUE: + to.buffer = nullptr; + to.numberOfUsesLeft = 0; + break; + default: + ASSERT(false); + break; + } + } + + // Adjust the runtime info for the arguments passed to the model, + // modifying the buffer location, and possibly the dimensions. + auto updateForArguments = [this, &requestPoolInfos](const std::vector<uint32_t> &indexes, + const std::vector<RequestArgument> &arguments) { + ASSERT(indexes.size() == arguments.size()); + for (size_t i = 0; i < indexes.size(); i++) + { + const uint32_t operandIndex = indexes[i]; + const RequestArgument &from = arguments[i]; + RunTimeOperandInfo &to = mOperands[operandIndex]; + if (from.dimensions.size() > 0) + { + // It's the responsibility of the caller to validate that + // from.dimensions only modifies the dimensions that were + // unspecified in the model. That's the case in SampleDriver.cpp + // with the call to validateRequest(). + // TODO make sure that's the case for the default CPU path. + to.dimensions = from.dimensions; + } + if (from.hasNoValue) + { + to.lifetime = OperandLifeTime::NO_VALUE; + ASSERT(to.buffer == nullptr); + } + else + { + auto poolIndex = from.location.poolIndex; + ASSERT(poolIndex < requestPoolInfos.size()); + auto &r = requestPoolInfos[poolIndex]; + to.buffer = r.buffer + from.location.offset; + } + } + }; + updateForArguments(mModel->inputIndexes, mRequest->inputs); + updateForArguments(mModel->outputIndexes, mRequest->outputs); + + return true; +} + +void Executor::freeNoLongerUsedOperands(const std::vector<uint32_t> &inputs) +{ + for (uint32_t i : inputs) + { + auto &info = mOperands[i]; + // Check if it's a static or model input/output. + if (info.numberOfUsesLeft == 0) + { + continue; + } + info.numberOfUsesLeft--; + if (info.numberOfUsesLeft == 0) + { + ASSERT(info.buffer != nullptr); + delete[] info.buffer; + info.buffer = nullptr; + } + } +} + +int Executor::executeOperation(const Operation &operation) +{ + const std::vector<uint32_t> &ins = operation.inputs; + const std::vector<uint32_t> &outs = operation.outputs; + bool success = false; + + // Function to verify that the number of input and output parameters + // matches what is expected. Also checks that all the parameters have + // values. This function is to be used only for operations that do not + // accept optional arguments. + // TODO Have a version that works for optional arguments. + auto allParametersPresent = [&operation, &ins, &outs, this](size_t requiredIns, + size_t requiredOuts) -> bool { + auto verify = [&operation, this](size_t requiredCount, const std::vector<uint32_t> &indexes, + const char *type) -> bool { + size_t actualCount = indexes.size(); + if (actualCount != requiredCount) + { + LOG(ERROR) << getOperationName(operation.type) << ": Invalid number of " << type + << " operands. Got " << actualCount << " of " << requiredCount; + return false; + } + for (size_t i = 0; i < actualCount; i++) + { + if (mOperands[indexes[i]].lifetime == OperandLifeTime::NO_VALUE) + { + LOG(ERROR) << getOperationName(operation.type) << " " << type << " operand " << i + << " is required but missing."; + return false; + } + } + return true; + }; + return verify(requiredIns, ins, "in") && verify(requiredOuts, outs, "out"); + }; + + switch (operation.type) + { + case OperationType::ADD: + { + if (!allParametersPresent(3, 1)) + { + return ANEURALNETWORKS_BAD_DATA; + } + const RunTimeOperandInfo &in1 = mOperands[ins[0]]; + const RunTimeOperandInfo &in2 = mOperands[ins[1]]; + int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]); + + RunTimeOperandInfo &out = mOperands[outs[0]]; + Shape outShape = out.shape(); + + ASSERT(in1.type == OperandType::TENSOR_FLOAT32); + { + success = addPrepare(in1.shape(), in2.shape(), &outShape) && + setInfoAndAllocateIfNeeded(&out, outShape) && + addFloat32(reinterpret_cast<const float *>(in1.buffer), in1.shape(), + reinterpret_cast<const float *>(in2.buffer), in2.shape(), activation, + reinterpret_cast<float *>(out.buffer), outShape); + } + } + break; + case OperationType::DEPTHWISE_CONV_2D: + { + const size_t inCount = ins.size(); + if ((inCount != 11 && inCount != 8) || !allParametersPresent(inCount, 1)) + { + return ANEURALNETWORKS_BAD_DATA; + } + const RunTimeOperandInfo &input = mOperands[ins[0]]; + const RunTimeOperandInfo &filter = mOperands[ins[1]]; + const RunTimeOperandInfo &bias = mOperands[ins[2]]; + + int32_t padding_left, padding_right; + int32_t padding_top, padding_bottom; + int32_t stride_width, stride_height; + int32_t depth_multiplier; + int32_t activation; + + if (inCount == 11) + { + padding_left = getScalarData<int32_t>(mOperands[ins[3]]); + padding_right = getScalarData<int32_t>(mOperands[ins[4]]); + padding_top = getScalarData<int32_t>(mOperands[ins[5]]); + padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]); + stride_width = getScalarData<int32_t>(mOperands[ins[7]]); + stride_height = getScalarData<int32_t>(mOperands[ins[8]]); + depth_multiplier = getScalarData<int32_t>(mOperands[ins[9]]); + activation = getScalarData<int32_t>(mOperands[ins[10]]); + } + else + { + int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]); + stride_width = getScalarData<int32_t>(mOperands[ins[4]]); + stride_height = getScalarData<int32_t>(mOperands[ins[5]]); + depth_multiplier = getScalarData<int32_t>(mOperands[ins[6]]); + activation = getScalarData<int32_t>(mOperands[ins[7]]); + + Shape inputShape = input.shape(); + Shape filterShape = filter.shape(); + int32_t input_width = getSizeOfDimension(inputShape, 2); + int32_t input_height = getSizeOfDimension(inputShape, 1); + int32_t filter_width = getSizeOfDimension(filterShape, 2); + int32_t filter_height = getSizeOfDimension(filterShape, 1); + calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit, + &padding_left, &padding_right); + calculateExplicitPadding(input_height, stride_height, filter_height, padding_implicit, + &padding_top, &padding_bottom); + } + + RunTimeOperandInfo &output = mOperands[outs[0]]; + Shape outShape = output.shape(); + + ASSERT(input.type == OperandType::TENSOR_FLOAT32); + { + success = + depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(), padding_left, + padding_right, padding_top, padding_bottom, stride_width, + stride_height, &outShape) && + setInfoAndAllocateIfNeeded(&output, outShape) && + depthwiseConvFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(), + reinterpret_cast<const float *>(filter.buffer), filter.shape(), + reinterpret_cast<const float *>(bias.buffer), bias.shape(), padding_left, + padding_right, padding_top, padding_bottom, stride_width, stride_height, + depth_multiplier, activation, reinterpret_cast<float *>(output.buffer), outShape); + } + } + break; + case OperationType::CONV_2D: + { + const size_t inCount = ins.size(); + if ((inCount != 10 && inCount != 7) || !allParametersPresent(inCount, 1)) + { + return ANEURALNETWORKS_BAD_DATA; + } + const RunTimeOperandInfo &input = mOperands[ins[0]]; + const RunTimeOperandInfo &filter = mOperands[ins[1]]; + const RunTimeOperandInfo &bias = mOperands[ins[2]]; + + int32_t padding_left, padding_right; + int32_t padding_top, padding_bottom; + int32_t stride_width, stride_height; + int32_t activation; + + if (inCount == 10) + { + padding_left = getScalarData<int32_t>(mOperands[ins[3]]); + padding_right = getScalarData<int32_t>(mOperands[ins[4]]); + padding_top = getScalarData<int32_t>(mOperands[ins[5]]); + padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]); + stride_width = getScalarData<int32_t>(mOperands[ins[7]]); + stride_height = getScalarData<int32_t>(mOperands[ins[8]]); + activation = getScalarData<int32_t>(mOperands[ins[9]]); + } + else + { + int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]); + stride_width = getScalarData<int32_t>(mOperands[ins[4]]); + stride_height = getScalarData<int32_t>(mOperands[ins[5]]); + activation = getScalarData<int32_t>(mOperands[ins[6]]); + + Shape inputShape = input.shape(); + Shape filterShape = filter.shape(); + int32_t input_width = getSizeOfDimension(inputShape, 2); + int32_t input_height = getSizeOfDimension(inputShape, 1); + int32_t filter_width = getSizeOfDimension(filterShape, 2); + int32_t filter_height = getSizeOfDimension(filterShape, 1); + calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit, + &padding_left, &padding_right); + calculateExplicitPadding(input_height, stride_height, filter_height, padding_implicit, + &padding_top, &padding_bottom); + } + + RunTimeOperandInfo &output = mOperands[outs[0]]; + Shape outShape = output.shape(); + + ASSERT(input.type == OperandType::TENSOR_FLOAT32); + { + success = + convPrepare(input.shape(), filter.shape(), bias.shape(), padding_left, padding_right, + padding_top, padding_bottom, stride_width, stride_height, &outShape) && + setInfoAndAllocateIfNeeded(&output, outShape) && + convFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(), + reinterpret_cast<const float *>(filter.buffer), filter.shape(), + reinterpret_cast<const float *>(bias.buffer), bias.shape(), padding_left, + padding_right, padding_top, padding_bottom, stride_width, stride_height, + activation, reinterpret_cast<float *>(output.buffer), outShape); + } + } + break; + case OperationType::AVERAGE_POOL_2D: + { + const size_t inCount = ins.size(); + if ((inCount != 10 && inCount != 7) || !allParametersPresent(inCount, 1)) + { + return ANEURALNETWORKS_BAD_DATA; + } + const RunTimeOperandInfo &input = mOperands[ins[0]]; + + int32_t padding_left, padding_right; + int32_t padding_top, padding_bottom; + int32_t stride_width, stride_height; + int32_t filter_width, filter_height; + int32_t activation; + + if (inCount == 10) + { + padding_left = getScalarData<int32_t>(mOperands[ins[1]]); + padding_right = getScalarData<int32_t>(mOperands[ins[2]]); + padding_top = getScalarData<int32_t>(mOperands[ins[3]]); + padding_bottom = getScalarData<int32_t>(mOperands[ins[4]]); + stride_width = getScalarData<int32_t>(mOperands[ins[5]]); + stride_height = getScalarData<int32_t>(mOperands[ins[6]]); + filter_width = getScalarData<int32_t>(mOperands[ins[7]]); + filter_height = getScalarData<int32_t>(mOperands[ins[8]]); + activation = getScalarData<int32_t>(mOperands[ins[9]]); + } + else + { + int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]); + stride_width = getScalarData<int32_t>(mOperands[ins[2]]); + stride_height = getScalarData<int32_t>(mOperands[ins[3]]); + filter_width = getScalarData<int32_t>(mOperands[ins[4]]); + filter_height = getScalarData<int32_t>(mOperands[ins[5]]); + activation = getScalarData<int32_t>(mOperands[ins[6]]); + + Shape inputShape = input.shape(); + int32_t input_width = getSizeOfDimension(inputShape, 2); + int32_t input_height = getSizeOfDimension(inputShape, 1); + calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit, + &padding_left, &padding_right); + calculateExplicitPadding(input_height, stride_height, filter_height, padding_implicit, + &padding_top, &padding_bottom); + } + + RunTimeOperandInfo &output = mOperands[outs[0]]; + Shape outShape = output.shape(); + + ASSERT(input.type == OperandType::TENSOR_FLOAT32); + { + success = averagePoolPrepare(input.shape(), padding_left, padding_right, padding_top, + padding_bottom, stride_width, stride_height, filter_width, + filter_height, &outShape) && + setInfoAndAllocateIfNeeded(&output, outShape) && + averagePoolFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(), padding_left, + padding_right, padding_top, padding_bottom, stride_width, stride_height, + filter_width, filter_height, activation, + reinterpret_cast<float *>(output.buffer), outShape); + } + } + break; + case OperationType::MAX_POOL_2D: + { + const size_t inCount = ins.size(); + if ((inCount != 10 && inCount != 7) || !allParametersPresent(inCount, 1)) + { + return ANEURALNETWORKS_BAD_DATA; + } + const RunTimeOperandInfo &input = mOperands[ins[0]]; + + int32_t padding_left, padding_right; + int32_t padding_top, padding_bottom; + int32_t stride_width, stride_height; + int32_t filter_width, filter_height; + int32_t activation; + + if (inCount == 10) + { + padding_left = getScalarData<int32_t>(mOperands[ins[1]]); + padding_right = getScalarData<int32_t>(mOperands[ins[2]]); + padding_top = getScalarData<int32_t>(mOperands[ins[3]]); + padding_bottom = getScalarData<int32_t>(mOperands[ins[4]]); + stride_width = getScalarData<int32_t>(mOperands[ins[5]]); + stride_height = getScalarData<int32_t>(mOperands[ins[6]]); + filter_width = getScalarData<int32_t>(mOperands[ins[7]]); + filter_height = getScalarData<int32_t>(mOperands[ins[8]]); + activation = getScalarData<int32_t>(mOperands[ins[9]]); + } + else + { + int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]); + stride_width = getScalarData<int32_t>(mOperands[ins[2]]); + stride_height = getScalarData<int32_t>(mOperands[ins[3]]); + filter_width = getScalarData<int32_t>(mOperands[ins[4]]); + filter_height = getScalarData<int32_t>(mOperands[ins[5]]); + activation = getScalarData<int32_t>(mOperands[ins[6]]); + + Shape inputShape = input.shape(); + int32_t input_width = getSizeOfDimension(inputShape, 2); + int32_t input_height = getSizeOfDimension(inputShape, 1); + calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit, + &padding_left, &padding_right); + calculateExplicitPadding(input_height, stride_height, filter_height, padding_implicit, + &padding_top, &padding_bottom); + } + + RunTimeOperandInfo &output = mOperands[outs[0]]; + Shape outShape = output.shape(); + + ASSERT(input.type == OperandType::TENSOR_FLOAT32); + { + success = maxPoolPrepare(input.shape(), padding_left, padding_right, padding_top, + padding_bottom, stride_width, stride_height, filter_width, + filter_height, &outShape) && + setInfoAndAllocateIfNeeded(&output, outShape) && + maxPoolFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(), padding_left, + padding_right, padding_top, padding_bottom, stride_width, stride_height, + filter_width, filter_height, activation, + reinterpret_cast<float *>(output.buffer), outShape); + } + } + break; + case OperationType::MUL: + { + if (!allParametersPresent(3, 1)) + { + return ANEURALNETWORKS_BAD_DATA; + } + const RunTimeOperandInfo &in1 = mOperands[ins[0]]; + const RunTimeOperandInfo &in2 = mOperands[ins[1]]; + int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]); + + RunTimeOperandInfo &out = mOperands[outs[0]]; + Shape outShape = out.shape(); + + ASSERT(in1.type == OperandType::TENSOR_FLOAT32); + { + success = mulPrepare(in1.shape(), in2.shape(), &outShape) && + setInfoAndAllocateIfNeeded(&out, outShape) && + mulFloat32(reinterpret_cast<const float *>(in1.buffer), in1.shape(), + reinterpret_cast<const float *>(in2.buffer), in2.shape(), activation, + reinterpret_cast<float *>(out.buffer), outShape); + } + } + break; + case OperationType::RELU: + { + if (!allParametersPresent(1, 1)) + { + return ANEURALNETWORKS_BAD_DATA; + } + const RunTimeOperandInfo &input = mOperands[ins[0]]; + RunTimeOperandInfo &output = mOperands[outs[0]]; + Shape outShape = output.shape(); + + ASSERT(input.type == OperandType::TENSOR_FLOAT32); + { + success = reluPrepare(input.shape(), &outShape) && + setInfoAndAllocateIfNeeded(&output, outShape) && + reluFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(), + reinterpret_cast<float *>(output.buffer), outShape); + } + } + break; + case OperationType::RELU6: + { + if (!allParametersPresent(1, 1)) + { + return ANEURALNETWORKS_BAD_DATA; + } + const RunTimeOperandInfo &input = mOperands[ins[0]]; + RunTimeOperandInfo &output = mOperands[outs[0]]; + Shape outShape = output.shape(); + + ASSERT(input.type == OperandType::TENSOR_FLOAT32); + { + success = relu6Prepare(input.shape(), &outShape) && + setInfoAndAllocateIfNeeded(&output, outShape) && + relu6Float32(reinterpret_cast<const float *>(input.buffer), input.shape(), + reinterpret_cast<float *>(output.buffer), outShape); + } + } + break; + case OperationType::SOFTMAX: + { + if (!allParametersPresent(2, 1)) + { + return ANEURALNETWORKS_BAD_DATA; + } + RunTimeOperandInfo &input = mOperands[ins[0]]; + float beta = getScalarData<float>(mOperands[ins[1]]); + if (beta <= 0.0f) + { + LOG(ERROR) << "beta must be positive for softmax"; + return ANEURALNETWORKS_BAD_DATA; + } + + RunTimeOperandInfo &output = mOperands[outs[0]]; + Shape outShape = output.shape(); + + ASSERT(input.type == OperandType::TENSOR_FLOAT32); + { + success = softmaxPrepare(input.shape(), &outShape) && + setInfoAndAllocateIfNeeded(&output, outShape) && + softmaxFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(), beta, + reinterpret_cast<float *>(output.buffer), output.shape()); + } + } + break; + case OperationType::FULLY_CONNECTED: + { + if (!allParametersPresent(4, 1)) + { + return ANEURALNETWORKS_BAD_DATA; + } + RunTimeOperandInfo &input = mOperands[ins[0]]; + RunTimeOperandInfo &weights = mOperands[ins[1]]; + RunTimeOperandInfo &bias = mOperands[ins[2]]; + + int32_t activation = getScalarData<int32_t>(mOperands[ins[3]]); + + RunTimeOperandInfo &output = mOperands[outs[0]]; + Shape outShape = output.shape(); + + ASSERT(input.type == OperandType::TENSOR_FLOAT32); + { + success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(), &outShape) && + setInfoAndAllocateIfNeeded(&output, outShape) && + fullyConnectedFloat32(reinterpret_cast<const float *>(input.buffer), input.shape(), + reinterpret_cast<const float *>(weights.buffer), weights.shape(), + reinterpret_cast<const float *>(bias.buffer), bias.shape(), activation, + reinterpret_cast<float *>(output.buffer), outShape); + } + } + break; + case OperationType::CONCATENATION: + { + if (outs.size() != 1 || ins.size() < 2) + { + return ANEURALNETWORKS_BAD_DATA; + } + int numInputTensors = ins.size() - 1; + int32_t axis = getScalarData<int32_t>(mOperands[ins[numInputTensors]]); + + RunTimeOperandInfo &output = mOperands[outs[0]]; + Shape outShape = output.shape(); + + const RunTimeOperandInfo &firstInput = mOperands[ins[0]]; + ASSERT(firstInput.type == OperandType::TENSOR_FLOAT32); + { + std::vector<Shape> inputShapes(numInputTensors); + std::vector<const float *> inputDataPtrs(numInputTensors); + + for (int i = 0; i < numInputTensors; i++) + { + RunTimeOperandInfo &input = mOperands[ins[i]]; + inputShapes[i] = input.shape(); + inputDataPtrs[i] = reinterpret_cast<const float *>(input.buffer); + } + success = concatenationPrepare(inputShapes, axis, &outShape) && + setInfoAndAllocateIfNeeded(&output, outShape) && + concatenationFloat32(inputDataPtrs, inputShapes, axis, reinterpret_cast<float *>(output.buffer), + outShape); + } + } + break; + case OperationType::RESHAPE: + { + if (!allParametersPresent(2, 1)) + { + return ANEURALNETWORKS_BAD_DATA; + } + const RunTimeOperandInfo &input = mOperands[ins[0]]; + const RunTimeOperandInfo &targetShape = mOperands[ins[1]]; + + RunTimeOperandInfo &output = mOperands[outs[0]]; + Shape outShape = output.shape(); + + success = reshapePrepare(input.shape(), reinterpret_cast<const int32_t *>(targetShape.buffer), + getNumberOfElements(targetShape.shape()), &outShape) && + setInfoAndAllocateIfNeeded(&output, outShape) && + reshapeGeneric(reinterpret_cast<const void *>(input.buffer), input.shape(), + reinterpret_cast<void *>(output.buffer), outShape); + } + break; + case OperationType::PAD: + { + if (!allParametersPresent(2, 1)) + { + return ANEURALNETWORKS_BAD_DATA; + } + const RunTimeOperandInfo& input = mOperands[ins[0]]; + const RunTimeOperandInfo& paddings = mOperands[ins[1]]; + + RunTimeOperandInfo& output = mOperands[outs[0]]; + Shape outShape = output.shape(); + + success = padPrepare(input.shape(), + reinterpret_cast<const int32_t*>(paddings.buffer), + paddings.shape(), + &outShape) && + setInfoAndAllocateIfNeeded(&output, outShape) && + padGeneric(input.buffer, + input.shape(), + reinterpret_cast<const int32_t*>(paddings.buffer), + output.buffer, + outShape); + } + break; + case OperationType::SUB: + { + if (!allParametersPresent(3, 1)) + { + return ANEURALNETWORKS_BAD_DATA; + } + const RunTimeOperandInfo &in1 = mOperands[ins[0]]; + const RunTimeOperandInfo &in2 = mOperands[ins[1]]; + int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]); + + RunTimeOperandInfo &out = mOperands[outs[0]]; + Shape outShape = out.shape(); + + ASSERT(in1.type == OperandType::TENSOR_FLOAT32); + { + success = subPrepare(in1.shape(), in2.shape(), &outShape) && + setInfoAndAllocateIfNeeded(&out, outShape) && + subFloat32(reinterpret_cast<const float *>(in1.buffer), in1.shape(), + reinterpret_cast<const float *>(in2.buffer), in2.shape(), activation, + reinterpret_cast<float *>(out.buffer), outShape); + } + } + break; + case OperationType::DIV: + { + if (!allParametersPresent(3, 1)) + { + return ANEURALNETWORKS_BAD_DATA; + } + const RunTimeOperandInfo &in1 = mOperands[ins[0]]; + const RunTimeOperandInfo &in2 = mOperands[ins[1]]; + int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]); + + RunTimeOperandInfo &out = mOperands[outs[0]]; + Shape outShape = out.shape(); + + ASSERT(in1.type == OperandType::TENSOR_FLOAT32); + { + success = divPrepare(in1.shape(), in2.shape(), &outShape) && + setInfoAndAllocateIfNeeded(&out, outShape) && + divFloat32(reinterpret_cast<const float *>(in1.buffer), in1.shape(), + reinterpret_cast<const float *>(in2.buffer), in2.shape(), activation, + reinterpret_cast<float *>(out.buffer), outShape); + } + } + break; + default: + NYI(getOperationName(operation.type)); + break; + } + if (!success) + { + LOG(ERROR) << getOperationName(operation.type) << " failed."; + return ANEURALNETWORKS_OP_FAILED; + } + + freeNoLongerUsedOperands(ins); + return ANEURALNETWORKS_NO_ERROR; +} |