summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--CMakeLists.txt6
-rw-r--r--cmake/option/option_armv7l-linux.cmake1
-rw-r--r--cmake/packages/AbslSourceConfig.cmake19
-rw-r--r--cmake/packages/EigenSourceConfig.cmake2
-rw-r--r--cmake/packages/FarmhashSourceConfig.cmake2
-rw-r--r--cmake/packages/FlatBuffersConfig.cmake10
-rw-r--r--cmake/packages/FlatBuffersSourceConfig.cmake4
-rw-r--r--cmake/packages/GEMMLowpSourceConfig.cmake2
-rw-r--r--cmake/packages/NEON2SSESourceConfig.cmake2
-rw-r--r--cmake/packages/TensorFlowSourceConfig.cmake2
-rw-r--r--externals/CMakeLists.txt12
-rw-r--r--include/NeuralNetworksShim.h33
-rw-r--r--include/support/tflite/nnapi_delegate.h13
-rw-r--r--libs/support/tflite/src/nnapi_delegate.cpp456
-rw-r--r--runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc38
-rw-r--r--runtimes/neurun/src/kernel/cpu/ConcatLayer.cc63
-rw-r--r--runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc146
-rw-r--r--runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc77
-rw-r--r--runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc37
-rw-r--r--runtimes/neurun/src/kernel/cpu/OperationUtils.h47
-rw-r--r--runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc72
22 files changed, 742 insertions, 303 deletions
diff --git a/.gitignore b/.gitignore
index 0eaad0dc7..e7a60c0b1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -85,3 +85,4 @@ GRTAGS
/externals/neon_2_sse
/externals/tensorflow
/externals/acl
+/externals/absl
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6238ce073..85a81dd94 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -38,6 +38,7 @@ link_directories(${CMAKE_INSTALL_PREFIX}/lib)
# Download configuration
option(DOWNLOAD_TENSORFLOW "Download Tensorflow source" ON)
+option(DOWNLOAD_ABSL "Download Absl source" ON)
option(DOWNLOAD_EIGEN "Download Eigen source" ON)
option(DOWNLOAD_FARMHASH "Download farmhash source" ON)
option(DOWNLOAD_GEMMLOWP "Download GEMM low precesion library source" ON)
@@ -49,6 +50,11 @@ option(DOWNLOAD_ARMCOMPUTE "Download ARM Compute source" ON)
option(BUILD_GTEST "Download and build Google Test" ON)
nnfw_find_package(GTest QUIET)
+# NOTE Workaround to avoid build fail by tensorflow (or acl) package version mismatch on obs build
+if(OBS_BUILD)
+ add_definitions(-DOBS_BUILD)
+endif(OBS_BUILD)
+
# TODO For now Android build is being enabled incrementally so not all subdirectories are added yet.
# However we are going to have the same subdirectories with other OS eventually.
if("${TARGET_OS}" STREQUAL "android")
diff --git a/cmake/option/option_armv7l-linux.cmake b/cmake/option/option_armv7l-linux.cmake
index 42988bc9d..b295b4a82 100644
--- a/cmake/option/option_armv7l-linux.cmake
+++ b/cmake/option/option_armv7l-linux.cmake
@@ -18,6 +18,7 @@ set(FLAGS_COMMON ${FLAGS_COMMON}
"-mfpu=neon-vfpv4"
"-funsafe-math-optimizations"
"-ftree-vectorize"
+ "-fPIC"
)
# remove warning from arm cl
diff --git a/cmake/packages/AbslSourceConfig.cmake b/cmake/packages/AbslSourceConfig.cmake
new file mode 100644
index 000000000..9075b7397
--- /dev/null
+++ b/cmake/packages/AbslSourceConfig.cmake
@@ -0,0 +1,19 @@
+function(_AbslSource_import)
+ if(NOT DOWNLOAD_ABSL)
+ set(AbslSource_FOUND FALSE PARENT_SCOPE)
+ return()
+ endif(NOT DOWNLOAD_ABSL)
+
+ nnfw_include(ExternalSourceTools)
+ nnfw_include(OptionTools)
+
+ # NOTE The following URL comes from TensorFlow 1.12
+ envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
+ set(ABSL_URL ${EXTERNAL_DOWNLOAD_SERVER}/abseil/abseil-cpp/archive/48cd2c3f351ff188bc85684b84a91b6e6d17d896.tar.gz)
+ ExternalSource_Download("absl" ${ABSL_URL})
+
+ set(AbslSource_DIR ${absl_SOURCE_DIR} PARENT_SCOPE)
+ set(AbslSource_FOUND TRUE PARENT_SCOPE)
+endfunction(_AbslSource_import)
+
+_AbslSource_import()
diff --git a/cmake/packages/EigenSourceConfig.cmake b/cmake/packages/EigenSourceConfig.cmake
index aac30dff5..dd94e069e 100644
--- a/cmake/packages/EigenSourceConfig.cmake
+++ b/cmake/packages/EigenSourceConfig.cmake
@@ -7,7 +7,7 @@ function(_EigenSource_import)
nnfw_include(ExternalSourceTools)
nnfw_include(OptionTools)
- # NOTE The following URL comes from TensorFlow 1.9
+ # NOTE TensorFlow 1.12 downloads Eign from the following URL
envoption(EXTERNAL_DOWNLOAD_SERVER "https://bitbucket.org")
set(EIGEN_URL ${EXTERNAL_DOWNLOAD_SERVER}/eigen/eigen/get/fd6845384b86.tar.gz)
ExternalSource_Download("eigen" ${EIGEN_URL})
diff --git a/cmake/packages/FarmhashSourceConfig.cmake b/cmake/packages/FarmhashSourceConfig.cmake
index 29bc7f213..802367968 100644
--- a/cmake/packages/FarmhashSourceConfig.cmake
+++ b/cmake/packages/FarmhashSourceConfig.cmake
@@ -7,7 +7,7 @@ function(_FarmhashSource_import)
nnfw_include(ExternalSourceTools)
nnfw_include(OptionTools)
- # NOTE TensorFlow 1.9 downloads farmhash from the following URL
+ # NOTE TensorFlow 1.12 downloads farmhash from the following URL
envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
set(FARMHASH_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz)
ExternalSource_Download("farmhash" ${FARMHASH_URL})
diff --git a/cmake/packages/FlatBuffersConfig.cmake b/cmake/packages/FlatBuffersConfig.cmake
index fab08fe39..064673158 100644
--- a/cmake/packages/FlatBuffersConfig.cmake
+++ b/cmake/packages/FlatBuffersConfig.cmake
@@ -15,18 +15,22 @@ function(_FlatBuffers_import)
# From FlatBuffers's CMakeLists.txt
list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/src/idl_gen_cpp.cpp")
+ list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/src/idl_gen_dart.cpp")
+ list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/src/idl_gen_fbs.cpp")
list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/src/idl_gen_general.cpp")
list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/src/idl_gen_go.cpp")
+ list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/src/idl_gen_grpc.cpp")
list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/src/idl_gen_js.cpp")
+ list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/src/idl_gen_json_schema.cpp")
+ list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/src/idl_gen_lobster.cpp")
+ list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/src/idl_gen_lua.cpp")
list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/src/idl_gen_php.cpp")
list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/src/idl_gen_python.cpp")
- list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/src/idl_gen_fbs.cpp")
- list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/src/idl_gen_grpc.cpp")
- list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/src/idl_gen_json_schema.cpp")
list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/src/flatc.cpp")
list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/src/flatc_main.cpp")
list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/grpc/src/compiler/cpp_generator.cc")
list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/grpc/src/compiler/go_generator.cc")
+ list(APPEND FlatBuffers_Compiler_SRCS "${FlatBuffersSource_DIR}/grpc/src/compiler/java_generator.cc")
if(NOT TARGET flatbuffers)
add_library(flatbuffers ${FlatBuffers_Library_SRCS})
diff --git a/cmake/packages/FlatBuffersSourceConfig.cmake b/cmake/packages/FlatBuffersSourceConfig.cmake
index f062c2608..5f142bff7 100644
--- a/cmake/packages/FlatBuffersSourceConfig.cmake
+++ b/cmake/packages/FlatBuffersSourceConfig.cmake
@@ -7,9 +7,9 @@ function(_FlatBuffersSource_import)
nnfw_include(ExternalSourceTools)
nnfw_include(OptionTools)
- # NOTE TensorFlow 1.9 downloads FlatBuffers from the following URL
+ # NOTE TensorFlow 1.12 downloads FlatBuffers from the following URL
envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
- set(FLATBUFFERS_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/flatbuffers/archive/971a68110e4fc1bace10fcb6deeb189e7e1a34ce.tar.gz)
+ set(FLATBUFFERS_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/flatbuffers/archive/1f5eae5d6a135ff6811724f6c57f911d1f46bb15.tar.gz)
ExternalSource_Download("flatbuffers" ${FLATBUFFERS_URL})
set(FlatBuffersSource_DIR ${flatbuffers_SOURCE_DIR} PARENT_SCOPE)
diff --git a/cmake/packages/GEMMLowpSourceConfig.cmake b/cmake/packages/GEMMLowpSourceConfig.cmake
index f5e73355e..613ff29b5 100644
--- a/cmake/packages/GEMMLowpSourceConfig.cmake
+++ b/cmake/packages/GEMMLowpSourceConfig.cmake
@@ -7,7 +7,7 @@ function(_GEMMLowpSource_import)
nnfw_include(ExternalSourceTools)
nnfw_include(OptionTools)
- # NOTE TensorFlow 1.9 uses the following URL
+ # NOTE TensorFlow 1.12 uses the following URL
envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
set(GEMMLOWP_URL ${EXTERNAL_DOWNLOAD_SERVER}/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.tar.gz)
ExternalSource_Download("gemmlowp" ${GEMMLOWP_URL})
diff --git a/cmake/packages/NEON2SSESourceConfig.cmake b/cmake/packages/NEON2SSESourceConfig.cmake
index 6258b536d..b656f5700 100644
--- a/cmake/packages/NEON2SSESourceConfig.cmake
+++ b/cmake/packages/NEON2SSESourceConfig.cmake
@@ -7,7 +7,7 @@ function(_NEON2SSESource_import)
nnfw_include(ExternalSourceTools)
nnfw_include(OptionTools)
- # NOTE TensorFlow 1.9 downloads NEON2SSE from the following URL
+ # NOTE TensorFlow 1.12 downloads NEON2SSE from the following URL
envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
set(NEON2SSE_URL ${EXTERNAL_DOWNLOAD_SERVER}/intel/ARM_NEON_2_x86_SSE/archive/0f77d9d182265259b135dad949230ecbf1a2633d.tar.gz)
ExternalSource_Download("neon_2_sse" ${NEON2SSE_URL})
diff --git a/cmake/packages/TensorFlowSourceConfig.cmake b/cmake/packages/TensorFlowSourceConfig.cmake
index ee88f9335..5828334c7 100644
--- a/cmake/packages/TensorFlowSourceConfig.cmake
+++ b/cmake/packages/TensorFlowSourceConfig.cmake
@@ -8,7 +8,7 @@ function(_TensorFlowSource_import)
nnfw_include(OptionTools)
envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
- set(TENSORFLOW_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v1.9.0.tar.gz)
+ set(TENSORFLOW_URL ${EXTERNAL_DOWNLOAD_SERVER}/tensorflow/tensorflow/archive/v1.12.0.tar.gz)
ExternalSource_Download("tensorflow" ${TENSORFLOW_URL})
set(TensorFlowSource_DIR ${tensorflow_SOURCE_DIR} PARENT_SCOPE)
diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
index 5bfc4ad4e..c6f5b5b8b 100644
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@@ -2,6 +2,7 @@ set(TENSORFLOW_BASE ${CMAKE_CURRENT_SOURCE_DIR}/tensorflow)
set(TENSORFLOW_LITE_BASE ${TENSORFLOW_BASE}/tensorflow/contrib/lite)
# Required source & package
+nnfw_find_package(AbslSource REQUIRED)
nnfw_find_package(Eigen REQUIRED)
nnfw_find_package(FarmhashSource REQUIRED)
nnfw_find_package(FlatBuffersSource REQUIRED)
@@ -19,10 +20,20 @@ file(GLOB_RECURSE TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/*.cc")
file(GLOB_RECURSE TFLITE_KERNEL_TESTS "${TENSORFLOW_LITE_BASE}/kernels/*test*.cc")
list(REMOVE_ITEM TFLITE_KERNEL_SRCS ${TFLITE_KERNEL_TESTS})
+file(GLOB TFLITE_LIB_SRCS "${TENSORFLOW_LITE_BASE}/c/*.c" "${TENSORFLOW_LITE_BASE}/c/*.cc")
+file(GLOB TFLITE_LIB_TESTS "${TENSORFLOW_LITE_BASE}/c/*test*.cc")
+list(REMOVE_ITEM TFLITE_LIB_SRCS ${TFLITE_LIB_TESTS})
+
+file(GLOB TFLITE_API_SRCS "${TENSORFLOW_LITE_BASE}/core/api/*.c" "${TENSORFLOW_LITE_BASE}/core/api/*.cc")
+file(GLOB TFLITE_API_TESTS "${TENSORFLOW_LITE_BASE}/core/api/*test*.cc")
+list(REMOVE_ITEM TFLITE_API_SRCS ${TFLITE_API_TESTS})
+
# We will use our own BuiltinOpResolver
list(REMOVE_ITEM TFLITE_KERNEL_SRCS "${TENSORFLOW_LITE_BASE}/kernels/register.cc")
list(APPEND TFLITE_SRCS ${TFLITE_CORE_SRCS})
list(APPEND TFLITE_SRCS ${TFLITE_KERNEL_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_LIB_SRCS})
+list(APPEND TFLITE_SRCS ${TFLITE_API_SRCS})
list(APPEND TFLITE_SRCS "${TFLITE_DEPEND_DIR}/farmhash/src/farmhash.cc")
@@ -36,6 +47,7 @@ if(BUILD_TFLITE_BENCHMARK_MODEL)
endif()
list(APPEND TFLITE_INCLUDES "${CMAKE_CURRENT_SOURCE_DIR}/tensorflow")
+list(APPEND TFLITE_INCLUDES "${TFLITE_DEPEND_DIR}/absl")
list(APPEND TFLITE_INCLUDES "${TFLITE_DEPEND_DIR}/gemmlowp")
list(APPEND TFLITE_INCLUDES "${TFLITE_DEPEND_DIR}/farmhash/src")
list(APPEND TFLITE_INCLUDES "${TFLITE_DEPEND_DIR}/flatbuffers/include")
diff --git a/include/NeuralNetworksShim.h b/include/NeuralNetworksShim.h
index a7bd745fb..b310a44cd 100644
--- a/include/NeuralNetworksShim.h
+++ b/include/NeuralNetworksShim.h
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
-// NOTE This header is derived from the following file (in TensorFlow)
+// NOTE This header is derived from the following file (in TensorFlow v1.12)
// 'externals/tensorflow/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h'
#ifndef __NEURAL_NETWORKS_SHIM__
#define __NEURAL_NETWORKS_SHIM__
@@ -68,6 +68,9 @@ typedef int (*ANeuralNetworksModel_identifyInputsAndOutputs_fn)(
ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs,
uint32_t outputCount, const uint32_t* outputs);
+typedef int (*ANeuralNetworksModel_relaxComputationFloat32toFloat16_fn)(
+ ANeuralNetworksModel* model, bool allow);
+
typedef int (*ANeuralNetworksExecution_create_fn)(
ANeuralNetworksCompilation* compilation,
ANeuralNetworksExecution** execution);
@@ -360,6 +363,34 @@ inline int ANeuralNetworksModel_identifyInputsAndOutputs(
}
/**
+ * Specifies whether {@link ANEURALNETWORKS_TENSOR_FLOAT32} is allowed to be
+ * calculated with range and/or precision as low as that of the IEEE 754 16-bit
+ * floating-point format. By default, {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * must be calculated using at least the range and precision of the IEEE 754
+ * 32-bit floating-point format.
+ *
+ * @param model The model to be modified.
+ * @param allow 'true' indicates {@link ANEURALNETWORKS_TENSOR_FLOAT32} may be
+ * calculated with range and/or precision as low as that of the
+ * IEEE 754 16-bit floating point format. 'false' indicates
+ * {@link ANEURALNETWORKS_TENSOR_FLOAT32} must be calculated using
+ * at least the range and precision of the IEEE 754 32-bit floating
+ * point format.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
+ * been called will return an error.
+ *
+ * Available since API level 28.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ */
+inline int ANeuralNetworksModel_relaxComputationFloat32toFloat16(
+ ANeuralNetworksModel* model, bool allow) {
+ LOAD_FUNCTION(ANeuralNetworksModel_relaxComputationFloat32toFloat16);
+ EXECUTE_FUNCTION_RETURN(model, allow);
+}
+
+/**
* Create a {@link ANeuralNetworksCompilation} to compile the given model.
* This only creates the object. Compilation is only performed once
* {@link ANeuralNetworksCompilation_start} is invoked.
diff --git a/include/support/tflite/nnapi_delegate.h b/include/support/tflite/nnapi_delegate.h
index a5da8ac39..b396d77f2 100644
--- a/include/support/tflite/nnapi_delegate.h
+++ b/include/support/tflite/nnapi_delegate.h
@@ -17,18 +17,24 @@ limitations under the License.
// NOTE To minimize diff with upstream tensorflow, disable clang-format
// clang-format off
-// NOTE This header is derived from the following file (in TensorFlow)
+// NOTE This header is derived from the following file (in TensorFlow v1.12)
// 'externals/tensorflow/tensorflow/contrib/lite/nnapi_delegate.h'
#ifndef __NNFW_SUPPORT_TFLITE_NNAPI_DELEGATE_H__
#define __NNFW_SUPPORT_TFLITE_NNAPI_DELEGATE_H__
#include "tensorflow/contrib/lite/allocation.h"
+#ifdef OBS_BUILD
#include "tensorflow/contrib/lite/context.h"
#include "tensorflow/contrib/lite/error_reporter.h"
+#else
+#include "tensorflow/contrib/lite/c/c_api_internal.h"
+#include "tensorflow/contrib/lite/core/api/error_reporter.h"
+#endif
#include "tensorflow/contrib/lite/interpreter.h"
#include "NeuralNetworksShim.h"
class ANeuralNetworksModel;
+class ANeuralNetworksMemory;
class ANeuralNetworksCompilation;
namespace nnfw {
@@ -62,11 +68,16 @@ class NNAPIDelegate {
// Run
TfLiteStatus Invoke(::tflite::Interpreter* interpreter);
+ // Whether the current platform supports NNAPI delegation.
+ static bool IsSupported();
+
private:
// The NN API model handle
ANeuralNetworksModel* nn_model_ = nullptr;
// The NN API compilation handle
ANeuralNetworksCompilation* nn_compiled_model_ = nullptr;
+ // Model status
+ TfLiteStatus model_status_ = kTfLiteOk;
// List of state tensors for LSTM, RNN, SVDF.
// NN API does not allow ops to maintain states across multiple
diff --git a/libs/support/tflite/src/nnapi_delegate.cpp b/libs/support/tflite/src/nnapi_delegate.cpp
index 987cd77c5..57322f531 100644
--- a/libs/support/tflite/src/nnapi_delegate.cpp
+++ b/libs/support/tflite/src/nnapi_delegate.cpp
@@ -17,47 +17,85 @@ limitations under the License.
// NOTE To minimize diff with upstream tensorflow, disable clang-format
// clang-format off
-// NOTE This code is derived from the following file (in TensorFlow)
+// NOTE This code is derived from the following file (in TensorFlow v1.11)
// 'externals/tensorflow/tensorflow/contrib/lite/nnapi_delegate.cc'
#include "support/tflite/nnapi_delegate.h"
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
+#ifdef OBS_BUILD
#include "tensorflow/contrib/lite/builtin_op_data.h"
#include "tensorflow/contrib/lite/error_reporter.h"
+#else
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
+#include "tensorflow/contrib/lite/core/api/error_reporter.h"
+#endif
#include "tensorflow/contrib/lite/model.h"
#include "NeuralNetworksShim.h"
#include "NeuralNetworksExShim.h"
#ifdef __ANDROID__
+#include <android/log.h>
#include <sys/system_properties.h>
#endif
-namespace nnfw
-{
+namespace nnfw {
-// TODO(aselle): FATAL leaves resources hanging.
-void FATAL(const char* format, ...) {
- va_list args;
- va_start(args, format);
- vfprintf(stderr, format, args);
- va_end(args);
+void logError(const char* format, ...) {
+ // stderr is convenient for native tests, but is not captured for apps
+ va_list args_for_stderr;
+ va_start(args_for_stderr, format);
+ vfprintf(stderr, format, args_for_stderr);
+ va_end(args_for_stderr);
+ fprintf(stderr, "\n");
fflush(stderr);
- exit(1);
+#ifdef __ANDROID__
+ // produce logcat output for general consumption
+ va_list args_for_log;
+ va_start(args_for_log, format);
+ __android_log_vprint(ANDROID_LOG_ERROR, "tflite", format, args_for_log);
+ va_end(args_for_log);
+#endif
}
+#define FATAL(...) \
+ logError(__VA_ARGS__); \
+ exit(1);
+
// TODO(aselle): Change the error model to use status codes.
-#define CHECK_TFLITE_SUCCESS(x) \
- if (x != kTfLiteOk) { \
- FATAL("Aborting since tflite returned failure."); \
+#define CHECK_TFLITE_SUCCESS(x) \
+ if (x != kTfLiteOk) { \
+ FATAL("Aborting since tflite returned failure nnapi_delegate.cc:%d.", \
+ __LINE__); \
}
-#define CHECK_NN(x) \
- if (x != ANEURALNETWORKS_NO_ERROR) { \
- FATAL("Aborting since tflite returned failure."); \
+#define CHECK_NN(x) \
+ if (x != ANEURALNETWORKS_NO_ERROR) { \
+ FATAL("Aborting since NNAPI returned failure nnapi_delegate.cc:%d", \
+ __LINE__); \
}
+#define RETURN_ERROR_IF_TFLITE_FAILED(x) \
+ if (x != kTfLiteOk) { \
+ logError( \
+ "Returning error since TFLite returned failure nnapi_delegate.cc:%d.", \
+ __LINE__); \
+ return kTfLiteError; \
+ }
+
+#define RETURN_ERROR_IF_NN_FAILED(x) \
+ if (x != ANEURALNETWORKS_NO_ERROR) { \
+ logError( \
+ "Returning error since NNAPI returned failure nnapi_delegate.cc:%d.", \
+ __LINE__); \
+ return kTfLiteError; \
+ }
+
+// Tracking of NNAPI operand ids
+static const int64_t kOperandIdNotSet = -1;
+static const int64_t kOperandNotNeeded = -2;
+
namespace {
int32_t GetAndroidSdkVersion() {
@@ -80,7 +118,10 @@ int32_t GetAndroidSdkVersion() {
return 0;
}
-static const int32_t kAndroidSdkVersion = GetAndroidSdkVersion();
+int32_t GetAndroidSdkVersionCached() {
+ static int32_t androidSdkVersion = GetAndroidSdkVersion();
+ return androidSdkVersion;
+}
} // namespace
@@ -112,21 +153,16 @@ NNAPIDelegate::~NNAPIDelegate() {
}
// Adds the tensors of the interpreter to the NN API model.
-// Returns the number of operands added.
-uint32_t addTensorOperands(tflite::Interpreter* interpreter,
+TfLiteStatus addTensorOperands(tflite::Interpreter* interpreter,
ANeuralNetworksModel* nn_model,
- const std::vector<uint32_t>& skip_list) {
+ uint32_t* no_of_operands_added,
+ std::vector<int64_t>* nnapi_ids) {
uint32_t next_id = 0;
for (size_t i = 0; i < interpreter->tensors_size(); i++) {
- // skip temporaries tensors.
- bool shouldSkip = false;
- for (auto skip_idx : skip_list) {
- if (i == skip_idx) {
- shouldSkip = true;
- break;
- }
- }
- if (shouldSkip) continue;
+ // Skip temporaries and RNN back-edges.
+ if ((*nnapi_ids)[i] == kOperandNotNeeded) continue;
+
+ (*nnapi_ids)[i] = int64_t(next_id);
int32_t nn_type = 0;
// NNAPI requires 32-bit float scale to be zero, tflite doesn't care
@@ -160,7 +196,28 @@ uint32_t addTensorOperands(tflite::Interpreter* interpreter,
zeroPoint = tensor->params.zero_point;
break;
default:
- FATAL("Unsupported type.");
+ logError("Unsupported tensor type %d", tensor->type);
+ return kTfLiteError;
+ }
+ if (tensor->dims->size == 0) {
+ // WORKAROUND Some model have dimension zero
+ switch (tensor->type) {
+ case kTfLiteFloat32:
+ nn_type = ANEURALNETWORKS_FLOAT32;
+ break;
+ case kTfLiteInt32:
+ nn_type = ANEURALNETWORKS_INT32;
+ break;
+ default:
+ logError("NNAPI doesn't support tensors with rank 0 (index %d name %s)",
+ i, tensor->name);
+ return kTfLiteError;
+ }
+ }
+ if (tensor->dims->size > 4) {
+ logError("NNAPI doesn't support tensors with rank > 4 (index %d name %s)",
+ i, tensor->name);
+ return kTfLiteError;
}
// TODO(aselle): Note, many of these are intermediate results. Do I need
// to ever specify these sizes. I am currently below doing setValue
@@ -170,36 +227,53 @@ uint32_t addTensorOperands(tflite::Interpreter* interpreter,
ANeuralNetworksOperandType operand_type{
nn_type, static_cast<uint32_t>(tensor->dims->size),
reinterpret_cast<uint32_t*>(tensor->dims->data), scale, zeroPoint};
- CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type));
+ RETURN_ERROR_IF_NN_FAILED(
+ ANeuralNetworksModel_addOperand(nn_model, &operand_type));
// TODO(aselle): Based on Michael's suggestion, limiting this to read
// only memory
if (tensor->allocation_type == kTfLiteMmapRo) {
if (const NNAPIAllocation* alloc = dynamic_cast<const NNAPIAllocation*>(
- static_cast<const ::tflite::Allocation*>(tensor->allocation))) {
- CHECK_NN(ANeuralNetworksModel_setOperandValueFromMemory(
- nn_model, next_id, alloc->memory(), alloc->offset(tensor->data.raw),
- tensor->bytes));
+ static_cast<const tflite::Allocation*>(tensor->allocation))) {
+ RETURN_ERROR_IF_NN_FAILED(
+ ANeuralNetworksModel_setOperandValueFromMemory(
+ nn_model, next_id, alloc->memory(),
+ alloc->offset(tensor->data.raw), tensor->bytes));
} else {
- CHECK_NN(ANeuralNetworksModel_setOperandValue(
+ RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_setOperandValue(
nn_model, next_id, tensor->data.raw, tensor->bytes));
}
} else if (tensor->bytes == 0) {
// These size 0 tensors are optional tensors reserved.
- CHECK_NN(
+ RETURN_ERROR_IF_NN_FAILED(
ANeuralNetworksModel_setOperandValue(nn_model, next_id, nullptr, 0));
}
++next_id;
}
- return next_id;
+ *no_of_operands_added = next_id;
+ return kTfLiteOk;
+}
+
+void MapAndAddTensorIds(const int* from_ids_buf, size_t from_ids_count,
+ std::vector<uint32_t>* into,
+ const std::vector<int64_t>& map) {
+ for (size_t i = 0; i < from_ids_count; i++) {
+ int from_id = from_ids_buf[i];
+ if (from_id == kOptionalTensor) {
+ into->push_back(from_id);
+ } else {
+ into->push_back(map[from_id]);
+ }
+ }
}
// Adds the operations and their parameters to the NN API model.
// 'next-id' is the operand ID of the next operand of the model.
-void AddOpsAndParams(tflite::Interpreter* interpreter,
- ANeuralNetworksModel* nn_model, uint32_t next_id,
- std::vector<int>* model_state_inputs,
- std::vector<int>* model_state_outputs) {
+TfLiteStatus AddOpsAndParams(
+ tflite::Interpreter* interpreter, ANeuralNetworksModel* nn_model,
+ uint32_t next_id, std::vector<int>* model_state_inputs,
+ std::vector<int>* model_state_outputs,
+ const std::vector<int64_t>& tensor_id_to_nnapi_id) {
for (size_t i = 0; i < interpreter->nodes_size(); i++) {
const auto* node_and_registration = interpreter->node_and_registration(i);
const TfLiteNode& node = node_and_registration->first;
@@ -208,10 +282,11 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
static_cast<tflite::BuiltinOperator>(registration.builtin_code);
// Add the parameters.
- std::vector<uint32_t> augmented_inputs(
- node.inputs->data, node.inputs->data + node.inputs->size);
- std::vector<uint32_t> augmented_outputs(
- node.outputs->data, node.outputs->data + node.outputs->size);
+ std::vector<uint32_t> augmented_inputs, augmented_outputs;
+ MapAndAddTensorIds(node.inputs->data, node.inputs->size, &augmented_inputs,
+ tensor_id_to_nnapi_id);
+ MapAndAddTensorIds(node.outputs->data, node.outputs->size,
+ &augmented_outputs, tensor_id_to_nnapi_id);
auto add_scalar_int32 = [&nn_model, &augmented_inputs,
&next_id](int value) {
@@ -260,47 +335,68 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
model_state_outputs->push_back(tensor_id);
next_id++;
};
+ auto check_and_add_activation = [&add_scalar_int32](int activation) {
+ if (activation > kTfLiteActRelu6) {
+ logError("NNAPI only supports RELU, RELU1 and RELU6 activations");
+ return kTfLiteError;
+ }
+ add_scalar_int32(activation);
+ return kTfLiteOk;
+ };
- auto add_add_params = [&add_scalar_int32]() { add_scalar_int32(0); };
+ auto add_add_params = [&add_scalar_int32](void* data) {
+ auto* builtin = reinterpret_cast<TfLiteAddParams*>(data);
+ if (builtin->activation > kTfLiteActRelu6) {
+ logError("NNAPI only supports RELU, RELU1 and RELU6 activations");
+ return kTfLiteError;
+ }
+ add_scalar_int32(builtin->activation);
+ return kTfLiteOk;
+ };
- auto add_pooling_params = [&add_scalar_int32](void* data) {
+ auto add_pooling_params = [&add_scalar_int32,
+ &check_and_add_activation](void* data) {
auto builtin = reinterpret_cast<TfLitePoolParams*>(data);
add_scalar_int32(builtin->padding);
add_scalar_int32(builtin->stride_width);
add_scalar_int32(builtin->stride_height);
add_scalar_int32(builtin->filter_width);
add_scalar_int32(builtin->filter_height);
- add_scalar_int32(builtin->activation);
+ return check_and_add_activation(builtin->activation);
};
- auto add_convolution_params = [&add_scalar_int32](void* data) {
+ auto add_convolution_params = [&add_scalar_int32,
+ &check_and_add_activation](void* data) {
auto builtin = reinterpret_cast<TfLiteConvParams*>(data);
add_scalar_int32(builtin->padding);
add_scalar_int32(builtin->stride_width);
add_scalar_int32(builtin->stride_height);
- add_scalar_int32(builtin->activation);
+ return check_and_add_activation(builtin->activation);
};
- auto add_depthwise_conv_params = [&add_scalar_int32](void* data) {
+ auto add_depthwise_conv_params = [&add_scalar_int32,
+ &check_and_add_activation](void* data) {
auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(data);
add_scalar_int32(builtin->padding);
add_scalar_int32(builtin->stride_width);
add_scalar_int32(builtin->stride_height);
add_scalar_int32(builtin->depth_multiplier);
- add_scalar_int32(builtin->activation);
+ return check_and_add_activation(builtin->activation);
};
- auto add_fully_connected_params = [&add_scalar_int32](void* data) {
+ auto add_fully_connected_params = [&check_and_add_activation](void* data) {
auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(data);
- add_scalar_int32(builtin->activation);
+ return check_and_add_activation(builtin->activation);
};
auto add_concatenation_params = [&add_scalar_int32](void* data) {
auto builtin = reinterpret_cast<TfLiteConcatenationParams*>(data);
add_scalar_int32(builtin->axis);
if (builtin->activation != kTfLiteActNone) {
- FATAL("Concatenation does not support fused activation in NNAPI");
+ logError("Concatenation does not support fused activation in NNAPI");
+ return kTfLiteError;
}
+ return kTfLiteOk;
};
auto add_softmax_params = [&add_scalar_float32](void* data) {
@@ -324,6 +420,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
// LSTM in NNAPI requires scratch tensor as an output operand.
auto add_lstm_scratch_tensor_float32 = [interpreter, &node, &nn_model,
&next_id, &augmented_outputs]() {
+ if (node.temporaries->size == 0) return;
int scratch_buffer_index = node.temporaries->data[0];
const TfLiteTensor* tensor = interpreter->tensor(scratch_buffer_index);
ANeuralNetworksOperandType operand_type{
@@ -336,7 +433,11 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
};
auto add_mean_params = [&add_scalar_int32](void* data) {
+#ifdef OBS_BUILD
auto builtin = reinterpret_cast<TfLiteMeanParams*>(data);
+#else
+ auto builtin = reinterpret_cast<TfLiteReducerParams*>(data);
+#endif
add_scalar_int32(builtin->keep_dims);
};
@@ -351,7 +452,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
add_scalar_int32(builtin->activation);
};
- auto add_squeeze_params = [&add_vector_int32](void* data) {
+ auto add_squeeze_params = [&](void* data) {
const auto* builtin = reinterpret_cast<TfLiteSqueezeParams*>(data);
// Note that we add the squeeze dimensions even if the dimensions were
// unspecified (empty), as NNAPI requires the operand.
@@ -382,26 +483,34 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
switch (builtin) {
case tflite::BuiltinOperator_ADD:
nn_op_type = ANEURALNETWORKS_ADD;
- add_add_params();
+ RETURN_ERROR_IF_TFLITE_FAILED(add_add_params(node.builtin_data));
break;
case tflite::BuiltinOperator_MUL:
nn_op_type = ANEURALNETWORKS_MUL;
- add_add_params();
+ RETURN_ERROR_IF_TFLITE_FAILED(add_add_params(node.builtin_data));
break;
case tflite::BuiltinOperator_AVERAGE_POOL_2D:
- add_pooling_params(node.builtin_data);
+ RETURN_ERROR_IF_TFLITE_FAILED(add_pooling_params(node.builtin_data));
nn_op_type = ANEURALNETWORKS_AVERAGE_POOL_2D;
break;
case tflite::BuiltinOperator_MAX_POOL_2D:
- add_pooling_params(node.builtin_data);
+ RETURN_ERROR_IF_TFLITE_FAILED(add_pooling_params(node.builtin_data));
nn_op_type = ANEURALNETWORKS_MAX_POOL_2D;
break;
case tflite::BuiltinOperator_L2_POOL_2D:
- add_pooling_params(node.builtin_data);
+ RETURN_ERROR_IF_TFLITE_FAILED(add_pooling_params(node.builtin_data));
nn_op_type = ANEURALNETWORKS_L2_POOL_2D;
break;
- case tflite::BuiltinOperator_CONV_2D:
- add_convolution_params(node.builtin_data);
+ case tflite::BuiltinOperator_CONV_2D: {
+ auto builtin = reinterpret_cast<TfLiteConvParams*>(node.builtin_data);
+ if (builtin->dilation_width_factor != 1 ||
+ builtin->dilation_height_factor != 1 || node.inputs->size != 3) {
+ logError("NNAPI does not support dilated Conv2D.");
+ return kTfLiteError;
+ }
+ }
+ RETURN_ERROR_IF_TFLITE_FAILED(
+ add_convolution_params(node.builtin_data));
nn_op_type = ANEURALNETWORKS_CONV_2D;
break;
case tflite::BuiltinOperator_RELU:
@@ -423,11 +532,13 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
nn_op_type = ANEURALNETWORKS_LOGISTIC;
break;
case tflite::BuiltinOperator_DEPTHWISE_CONV_2D:
- add_depthwise_conv_params(node.builtin_data);
+ RETURN_ERROR_IF_TFLITE_FAILED(
+ add_depthwise_conv_params(node.builtin_data));
nn_op_type = ANEURALNETWORKS_DEPTHWISE_CONV_2D;
break;
case tflite::BuiltinOperator_CONCATENATION:
- add_concatenation_params(node.builtin_data);
+ RETURN_ERROR_IF_TFLITE_FAILED(
+ add_concatenation_params(node.builtin_data));
nn_op_type = ANEURALNETWORKS_CONCATENATION;
break;
case tflite::BuiltinOperator_SOFTMAX:
@@ -435,10 +546,15 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
nn_op_type = ANEURALNETWORKS_SOFTMAX;
break;
case tflite::BuiltinOperator_FULLY_CONNECTED:
- add_fully_connected_params(node.builtin_data);
+ RETURN_ERROR_IF_TFLITE_FAILED(
+ add_fully_connected_params(node.builtin_data));
nn_op_type = ANEURALNETWORKS_FULLY_CONNECTED;
break;
case tflite::BuiltinOperator_RESHAPE:
+ if (node.inputs->size != 2) {
+ logError("NNAPI only supports 2-input RESHAPE");
+ return kTfLiteError;
+ }
nn_op_type = ANEURALNETWORKS_RESHAPE;
// add_reshape_params(node.builtin_data);
break;
@@ -451,6 +567,10 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH;
break;
case tflite::BuiltinOperator_LSTM: {
+ if (node.inputs->size + /* no of params */ 3 != 21) {
+ logError("NNAPI only supports 21-input LSTMs");
+ return kTfLiteError;
+ }
duplicate_state_tensor_float32(
node.outputs->data[/*kOutputStateTensor*/ 0]);
duplicate_state_tensor_float32(
@@ -489,10 +609,6 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
add_mean_params(node.builtin_data);
nn_op_type = ANEURALNETWORKS_MEAN;
break;
- case tflite::BuiltinOperator_L2_NORMALIZATION:
- nn_op_type = ANEURALNETWORKS_L2_NORMALIZATION;
- check_l2normalization_params(node.builtin_data);
- break;
case tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION:
nn_op_type = ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION;
add_lrn_params(node.builtin_data);
@@ -500,20 +616,54 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
case tflite::BuiltinOperator_DIV:
nnapi_version = 11; // require NNAPI 1.1
nn_op_type = ANEURALNETWORKS_DIV;
- add_add_params();
- break;
- case tflite::BuiltinOperator_HASHTABLE_LOOKUP:
- nn_op_type = ANEURALNETWORKS_HASHTABLE_LOOKUP;
+ RETURN_ERROR_IF_TFLITE_FAILED(check_and_add_activation(
+ reinterpret_cast<TfLiteDivParams*>(node.builtin_data)->activation));
break;
case tflite::BuiltinOperator_SUB:
nnapi_version = 11; // require NNAPI 1.1
nn_op_type = ANEURALNETWORKS_SUB;
- add_add_params();
+ RETURN_ERROR_IF_TFLITE_FAILED(check_and_add_activation(
+ reinterpret_cast<TfLiteSubParams*>(node.builtin_data)->activation));
break;
case tflite::BuiltinOperator_SQUEEZE:
- nnapi_version = 11; // require NNAPI 1.1
- nn_op_type = ANEURALNETWORKS_SQUEEZE;
+ nnapi_version = 11; // requires NNAPI 1.1
add_squeeze_params(node.builtin_data);
+ nn_op_type = ANEURALNETWORKS_SQUEEZE;
+ break;
+ case tflite::BuiltinOperator_TRANSPOSE:
+ // The permutation input tensor value dictates the output dimensions.
+ // TODO(b/110888333): Support dynamically-sized tensors in delegates.
+ if ((node.inputs->size > 1) &&
+ (interpreter->tensor(node.inputs->data[1])->allocation_type !=
+ kTfLiteMmapRo)) {
+ logError("NNAPI does not yet support dynamic tensors.");
+ return kTfLiteError;
+ }
+ nnapi_version = 11; // require NNAPI 1.1
+ nn_op_type = ANEURALNETWORKS_TRANSPOSE;
+ break;
+ case tflite::BuiltinOperator_L2_NORMALIZATION:
+ nn_op_type = ANEURALNETWORKS_L2_NORMALIZATION;
+ if (reinterpret_cast<TfLiteL2NormParams*>(node.builtin_data)
+ ->activation != kTfLiteActNone) {
+ logError(
+ "NNAPI does not support L2Normalization with fused activations");
+ return kTfLiteError;
+ }
+ if ((node.inputs->size > 0) &&
+ (interpreter->tensor(node.inputs->data[0])->dims->size != 4)) {
+ logError("NNAPI only supports input rank 4 for L2Normalization");
+ return kTfLiteError;
+ }
+ break;
+ case tflite::BuiltinOperator_HASHTABLE_LOOKUP:
+ if (interpreter->tensor(node.outputs->data[0])->type !=
+ kTfLiteFloat32) {
+ logError("NNAPI only support HASHTABLE_LOOKUP with float32 output",
+ builtin);
+ return kTfLiteError;
+ }
+ nn_op_type = ANEURALNETWORKS_HASHTABLE_LOOKUP;
break;
case tflite::BuiltinOperator_STRIDED_SLICE:
add_strided_slice_params(node.builtin_data);
@@ -557,10 +707,6 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
reinterpret_cast<uint32_t*>(node.outputs->data)));
continue;
- case tflite::BuiltinOperator_TRANSPOSE:
- nn_op_type = ANEURALNETWORKS_TRANSPOSE;
- // param is almost same as reshape
- break;
case tflite::BuiltinOperator_NEG:
CHECK_NN(ANeuralNetworksModel_addOperationEx(
nn_model, ANEURALNETWORKS_NEG_EX,
@@ -582,28 +728,71 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
case tflite::BuiltinOperator_EMBEDDING_LOOKUP_SPARSE:
case tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM:
case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
+ //case tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION:
case tflite::BuiltinOperator_PADV2:
+ //case tflite::BuiltinOperator_RESIZE_BILINEAR:
case tflite::BuiltinOperator_CALL:
case tflite::BuiltinOperator_SKIP_GRAM:
+ //case tflite::BuiltinOperator_RELU_N1_TO_1:
+ //case tflite::BuiltinOperator_GATHER:
+ //case tflite::BuiltinOperator_SPACE_TO_BATCH_ND:
+ //case tflite::BuiltinOperator_BATCH_TO_SPACE_ND:
+ //case tflite::BuiltinOperator_TOPK_V2:
+ //case tflite::BuiltinOperator_SPLIT:
+ //case tflite::BuiltinOperator_STRIDED_SLICE:
+ //case tflite::BuiltinOperator_EXP:
case tflite::BuiltinOperator_LOG_SOFTMAX:
+ //case tflite::BuiltinOperator_DEQUANTIZE:
case tflite::BuiltinOperator_DELEGATE:
+ //case tflite::BuiltinOperator_CAST:
case tflite::BuiltinOperator_PRELU:
case tflite::BuiltinOperator_MAXIMUM:
case tflite::BuiltinOperator_MINIMUM:
case tflite::BuiltinOperator_ARG_MAX:
+#ifndef OBS_BUILD
+ case tflite::BuiltinOperator_ARG_MIN:
+#endif
case tflite::BuiltinOperator_GREATER:
case tflite::BuiltinOperator_GREATER_EQUAL:
case tflite::BuiltinOperator_LESS:
case tflite::BuiltinOperator_LESS_EQUAL:
+ //case tflite::BuiltinOperator_NEG:
case tflite::BuiltinOperator_SELECT:
case tflite::BuiltinOperator_SLICE:
case tflite::BuiltinOperator_SIN:
+ //case tflite::BuiltinOperator_LOG:
case tflite::BuiltinOperator_TRANSPOSE_CONV:
+#ifndef OBS_BUILD
+ case tflite::BuiltinOperator_TILE:
+ case tflite::BuiltinOperator_EXPAND_DIMS:
case tflite::BuiltinOperator_SPARSE_TO_DENSE:
- FATAL("Op code %d is currently not delegated to NNAPI", builtin);
- nn_op_type = -1; // set to invalid
+ case tflite::BuiltinOperator_EQUAL:
+ case tflite::BuiltinOperator_NOT_EQUAL:
+ case tflite::BuiltinOperator_SUM:
+ case tflite::BuiltinOperator_REDUCE_MAX:
+ case tflite::BuiltinOperator_REDUCE_MIN:
+ case tflite::BuiltinOperator_REDUCE_PROD:
+ case tflite::BuiltinOperator_SQRT:
+ case tflite::BuiltinOperator_RSQRT:
+ case tflite::BuiltinOperator_SHAPE:
+ case tflite::BuiltinOperator_POW:
+ case tflite::BuiltinOperator_FAKE_QUANT:
+ case tflite::BuiltinOperator_PACK:
+ case tflite::BuiltinOperator_LOGICAL_OR:
+ case tflite::BuiltinOperator_ONE_HOT:
+ case tflite::BuiltinOperator_LOGICAL_AND:
+ case tflite::BuiltinOperator_LOGICAL_NOT:
+ case tflite::BuiltinOperator_UNPACK:
+ case tflite::BuiltinOperator_FLOOR_DIV:
+ case tflite::BuiltinOperator_REDUCE_ANY:
+ case tflite::BuiltinOperator_SQUARE:
+ case tflite::BuiltinOperator_ZEROS_LIKE:
+ case tflite::BuiltinOperator_FILL:
+#endif
+ logError("Op code %d is currently not delegated to NNAPI", builtin);
+ return kTfLiteError;
break;
- case tflite::BuiltinOperator_CUSTOM:
+ case tflite::BuiltinOperator_CUSTOM: {
std::string custom_name(registration.custom_name);
if (custom_name.compare("TensorFlowMax") == 0) {
CHECK_NN(ANeuralNetworksModel_addOperationEx(
@@ -650,64 +839,99 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
reinterpret_cast<uint32_t*>(node.outputs->data)));
continue;
}
-
- FATAL("Custom operations are not supported when using NNAPI.");
- nn_op_type = -1; // set to invalid
+ logError("Custom operations are not supported when using NNAPI.");
+ return kTfLiteError;
+ break;
+ }
+#ifdef OBS_BUILD
+ default:
+ logError("Op code %d is currently not delegated to NNAPI", builtin);
+ return kTfLiteError;
break;
+#endif
}
- //if (nnapi_version == 11 && kAndroidSdkVersion < 28) {
+ //if (nnapi_version == 11 && GetAndroidSdkVersionCached() < 28) {
// FATAL("Op %d needs NNAPI1.1", builtin);
//}
// Add the operation.
- CHECK_NN(ANeuralNetworksModel_addOperation(
+ RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_addOperation(
nn_model, nn_op_type, static_cast<uint32_t>(augmented_inputs.size()),
augmented_inputs.data(),
static_cast<uint32_t>(augmented_outputs.size()),
reinterpret_cast<uint32_t*>(augmented_outputs.data())));
}
+ return kTfLiteOk;
}
TfLiteStatus NNAPIDelegate::BuildGraph(::tflite::Interpreter* interpreter) {
- // TODO(aselle): This is not correct. need to handle resize invalidation.
- if (nn_model_ && nn_compiled_model_) return kTfLiteOk;
+ if (nn_model_ && nn_compiled_model_) return model_status_;
+ // TODO(aselle): This is not correct. need to handle resize invalidation.
if (!nn_model_) {
CHECK_NN(ANeuralNetworksModel_create(&nn_model_));
- // Find all the temporary tensors and put them in a skip_list.
- std::vector<uint32_t> skip_list;
+ // Find which tensors should be added to NNAPI. TFLite has temporaries
+ // and RNN back-edges which are are not valid for NNAPI. We look through all
+ // inputs and outputs and mark the mapping in tensor_id_to_nnapi_id with
+ // kOperandIdNotSet. addTensorOperands will replace those with the
+ // corresponding NNAPI operand ids and skip kOperandNotNeeded entries.
+ std::vector<int64_t> tensor_id_to_nnapi_id(interpreter->tensors_size(),
+ kOperandNotNeeded);
+ auto set_ids_to_not_set = [&tensor_id_to_nnapi_id](const int* buf,
+ size_t count) {
+ for (int j = 0; j < count; j++) {
+ auto tensor_id = buf[j];
+ if (tensor_id != kOptionalTensor) {
+ tensor_id_to_nnapi_id[tensor_id] = kOperandIdNotSet;
+ }
+ }
+ };
for (size_t i = 0; i < interpreter->nodes_size(); i++) {
const auto* node_and_registration = interpreter->node_and_registration(i);
const TfLiteNode& node = node_and_registration->first;
- if (node.temporaries != nullptr) {
- for (int j = 0; j < node.temporaries->size; j++) {
- skip_list.push_back(static_cast<uint32_t>(node.temporaries->data[j]));
- }
- }
+ set_ids_to_not_set(node.inputs->data, node.inputs->size);
+ set_ids_to_not_set(node.outputs->data, node.outputs->size);
}
-
- uint32_t next_id = addTensorOperands(interpreter, nn_model_, skip_list);
- AddOpsAndParams(interpreter, nn_model_, next_id, &model_states_inputs_,
- &model_states_outputs_);
-
- std::vector<int> augmented_inputs = interpreter->inputs();
- std::vector<int> augmented_outputs = interpreter->outputs();
-
- // All state tensors input/output need to be treated as model input/output.
+ set_ids_to_not_set(interpreter->inputs().data(),
+ interpreter->inputs().size());
+ set_ids_to_not_set(interpreter->outputs().data(),
+ interpreter->outputs().size());
+
+ uint32_t next_id = 0;
+ RETURN_ERROR_IF_TFLITE_FAILED(addTensorOperands(
+ interpreter, nn_model_, &next_id, &tensor_id_to_nnapi_id));
+ RETURN_ERROR_IF_TFLITE_FAILED(
+ AddOpsAndParams(interpreter, nn_model_, next_id, &model_states_inputs_,
+ &model_states_outputs_, tensor_id_to_nnapi_id));
+
+ std::vector<uint32_t> augmented_inputs;
+ MapAndAddTensorIds(interpreter->inputs().data(),
+ interpreter->inputs().size(), &augmented_inputs,
+ tensor_id_to_nnapi_id);
augmented_inputs.insert(augmented_inputs.end(),
model_states_inputs_.begin(),
model_states_inputs_.end());
- augmented_outputs.insert(augmented_outputs.end(),
- model_states_outputs_.begin(),
- model_states_outputs_.end());
+ std::vector<uint32_t> augmented_outputs;
+ MapAndAddTensorIds(interpreter->outputs().data(),
+ interpreter->outputs().size(), &augmented_outputs,
+ tensor_id_to_nnapi_id);
+ MapAndAddTensorIds(model_states_outputs_.data(),
+ model_states_outputs_.size(), &augmented_outputs,
+ tensor_id_to_nnapi_id);
CHECK_NN(ANeuralNetworksModel_identifyInputsAndOutputs(
nn_model_, static_cast<uint32_t>(augmented_inputs.size()),
reinterpret_cast<const uint32_t*>(augmented_inputs.data()),
static_cast<uint32_t>(augmented_outputs.size()),
reinterpret_cast<const uint32_t*>(augmented_outputs.data())));
+
+ // TODO Support ANeuralNetworksModel_relaxComputationFloat32toFloat16
+ //if (GetAndroidSdkVersionCached() >= 28) {
+ // CHECK_NN(ANeuralNetworksModel_relaxComputationFloat32toFloat16(
+ // nn_model_, interpreter->GetAllowFp16PrecisionForFp32()));
+ //}
CHECK_NN(ANeuralNetworksModel_finish(nn_model_));
}
if (!nn_compiled_model_) {
@@ -719,7 +943,13 @@ TfLiteStatus NNAPIDelegate::BuildGraph(::tflite::Interpreter* interpreter) {
TfLiteStatus NNAPIDelegate::Invoke(::tflite::Interpreter* interpreter) {
if (!nn_model_) {
- TF_LITE_ENSURE_STATUS(BuildGraph(interpreter));
+ model_status_ = BuildGraph(interpreter);
+ if (model_status_ != kTfLiteOk) {
+ logError("Failed to build graph for NNAPI");
+ }
+ }
+ if (model_status_ != kTfLiteOk) {
+ return model_status_;
}
ANeuralNetworksExecution* execution = nullptr;
@@ -783,6 +1013,8 @@ TfLiteStatus NNAPIDelegate::Invoke(::tflite::Interpreter* interpreter) {
return kTfLiteOk;
}
+bool NNAPIDelegate::IsSupported() { return nnfw::NNAPIExists(); }
+
} // namespace nnfw
// clang-format on
diff --git a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc
index 2a6a84e10..f434a6dec 100644
--- a/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/AvgPoolLayer.cc
@@ -27,14 +27,14 @@ namespace kernel
namespace cpu
{
-#define AVGPOOLING_PARAMETERS \
- uint32_t height = getSizeOfDimension(_inputShape, 1); \
- uint32_t width = getSizeOfDimension(_inputShape, 2); \
- uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
- uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
- \
- uint32_t paddingHeight = (uint32_t)_paddingTop; \
- uint32_t paddingWidth = (uint32_t)_paddingLeft;
+#define AVGPOOLING_PARAMETERS \
+ tflite::PoolParams op_params; \
+ op_params.stride_height = _strideHeight; \
+ op_params.stride_width = _strideWidth; \
+ op_params.filter_height = _kernelHeight; \
+ op_params.filter_width = _kernelWidth; \
+ op_params.padding_values.height = (int8_t)_paddingTop; \
+ op_params.padding_values.width = (int8_t)_paddingLeft;
AvgPoolLayer::AvgPoolLayer()
: _inputData(nullptr), _outputData(nullptr), _inputShape(), _outputShape(), _paddingLeft(0),
@@ -47,31 +47,31 @@ AvgPoolLayer::AvgPoolLayer()
bool AvgPoolLayer::averagePoolFloat32()
{
-
AVGPOOLING_PARAMETERS
float output_activation_min, output_activation_max;
CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
- ::tflite::optimized_ops::AveragePool(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), _strideWidth,
- _strideHeight, paddingWidth, paddingHeight, _kernelWidth, _kernelHeight,
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape));
+ ::tflite::optimized_ops::AveragePool(op_params, convertShapeToTFLiteShape(_inputShape),
+ reinterpret_cast<const float *>(_inputData),
+ convertShapeToTFLiteShape(_outputShape),
+ reinterpret_cast<float *>(_outputData));
return true;
}
bool AvgPoolLayer::averagePoolQuant8()
{
-
AVGPOOLING_PARAMETERS
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
&output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
- ::tflite::optimized_ops::AveragePool(_inputData, convertShapeToDims(_inputShape), _strideWidth,
- _strideHeight, paddingWidth, paddingHeight, _kernelWidth,
- _kernelHeight, output_activation_min, output_activation_max,
- _outputData, convertShapeToDims(_outputShape));
+ ::tflite::optimized_ops::AveragePool(op_params, convertShapeToTFLiteShape(_inputShape),
+ _inputData, convertShapeToTFLiteShape(_outputShape),
+ _outputData);
return true;
}
diff --git a/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc b/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc
index 5fe5e3993..be093b437 100644
--- a/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/ConcatLayer.cc
@@ -24,6 +24,7 @@ namespace neurun
{
namespace kernel
{
+
namespace cpu
{
@@ -36,13 +37,21 @@ ConcatLayer::ConcatLayer()
bool ConcatLayer::concatenationFloat32()
{
- int num_inputs = _inputShapes.size();
- std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs);
- std::vector<::tflite::Dims<4>> inputDims(num_inputs);
- for (int i = 0; i < num_inputs; i++)
+ uint32_t num_inputs = _inputShapes.size();
+
+ tflite::ConcatenationParams op_params;
+ op_params.axis = _axis;
+ op_params.inputs_count = num_inputs;
+
+ std::vector<::tflite::RuntimeShape *> inputDimsPtr;
+ std::vector<::tflite::RuntimeShape> inputDims;
+ inputDimsPtr.reserve(num_inputs);
+ inputDims.reserve(num_inputs);
+
+ for (uint32_t i = 0; i < num_inputs; i++)
{
- inputDims[i] = convertShapeToDims(_inputShapes[i]);
- inputDimsPtr[i] = &inputDims[i];
+ inputDims.push_back(convertShapeToTFLiteShape(_inputShapes[i]));
+ inputDimsPtr.push_back(&inputDims[i]);
}
std::vector<const float *> inputFloatPtrs;
@@ -52,24 +61,44 @@ bool ConcatLayer::concatenationFloat32()
inputFloatPtrs.emplace_back(reinterpret_cast<const float *>(ptr));
}
- ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, float>(
- getNumberOfDimensions(_outputShape) - _axis - 1, inputFloatPtrs.data(), inputDimsPtr.data(),
- num_inputs, reinterpret_cast<float *>(_outputData), convertShapeToDims(_outputShape));
+ ::tflite::optimized_ops::Concatenation<float>(
+ op_params, inputDimsPtr.data(), inputFloatPtrs.data(),
+ convertShapeToTFLiteShape(_outputShape), reinterpret_cast<float *>(_outputData));
return true;
}
bool ConcatLayer::concatenationQuant8()
{
int num_inputs = _inputShapes.size();
- std::vector<::tflite::Dims<4> *> inputDimsPtr(num_inputs);
- std::vector<::tflite::Dims<4>> inputDims(num_inputs);
- for (int i = 0; i < num_inputs; i++)
+
+ std::vector<int32_t> input_zeropoints(num_inputs);
+ std::vector<float> input_scales(num_inputs);
+ for (uint32_t i = 0; i < num_inputs; i++)
{
- inputDims[i] = convertShapeToDims(_inputShapes[i]);
- inputDimsPtr[i] = &inputDims[i];
+ input_zeropoints[i] = _inputShapes[i].offset;
+ input_scales[i] = _inputShapes[i].scale;
}
- ::tflite::optimized_ops::Concatenation<::tflite::FusedActivationFunctionType::kNone, uint8_t>(
- getNumberOfDimensions(_outputShape) - _axis - 1, _inputDataPtrs.data(), inputDimsPtr.data(),
- num_inputs, _outputData, convertShapeToDims(_outputShape));
+
+ tflite::ConcatenationParams op_params;
+ op_params.axis = _axis;
+ op_params.inputs_count = num_inputs;
+ op_params.input_zeropoint = input_zeropoints.data();
+ op_params.input_scale = input_scales.data();
+ op_params.output_zeropoint = _outputShape.offset;
+ op_params.output_scale = _outputShape.scale;
+
+ std::vector<::tflite::RuntimeShape *> inputDimsPtr;
+ std::vector<::tflite::RuntimeShape> inputDims;
+ inputDimsPtr.reserve(num_inputs);
+ inputDims.reserve(num_inputs);
+ for (uint32_t i = 0; i < num_inputs; i++)
+ {
+ inputDims.push_back(convertShapeToTFLiteShape(_inputShapes[i]));
+ inputDimsPtr.push_back(&inputDims[i]);
+ }
+
+ ::tflite::optimized_ops::Concatenation<uint8_t>(
+ op_params, inputDimsPtr.data(), _inputDataPtrs.data(),
+ convertShapeToTFLiteShape(_outputShape), _outputData);
return true;
}
diff --git a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc
index 81e88e0f0..c694fa75f 100644
--- a/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/ConvolutionLayer.cc
@@ -33,55 +33,51 @@ static constexpr int kStaticBufferSize = 1605632;
static char static_scratch_buffer[kStaticBufferSize];
static std::mutex executionMutex;
-#define ANDROID_NN_CONV_PARAMETERS(Type) \
- uint32_t height = getSizeOfDimension(_inputShape, 1); \
- uint32_t width = getSizeOfDimension(_inputShape, 2); \
- uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \
- uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \
- uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
- uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
- uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \
- \
- uint32_t paddingHeight = (uint32_t)_paddingTop; \
- uint32_t paddingWidth = (uint32_t)_paddingLeft; \
- \
- ::tflite::Dims<4> im2colDim; \
- im2colDim.sizes[3] = (int)getSizeOfDimension(_outputShape, 0); \
- im2colDim.sizes[2] = (int)getSizeOfDimension(_outputShape, 1); \
- im2colDim.sizes[1] = (int)getSizeOfDimension(_outputShape, 2); \
- im2colDim.sizes[0] = (int)inDepth * kernelHeight * kernelWidth; \
- \
- im2colDim.strides[0] = 1; \
- for (int i = 1; i < 4; i++) \
- { \
- im2colDim.strides[i] = im2colDim.strides[i - 1] * im2colDim.sizes[i - 1]; \
- } \
- Type *im2colData = nullptr; \
- uint64_t im2colByteSize = sizeof(Type); \
- std::unique_ptr<Type[]> im2colGuard; \
- for (int i = 0; i < 4; i++) \
- { \
- im2colByteSize *= im2colDim.sizes[i]; \
- } \
- /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \
- if (im2colByteSize >= 0x7fffffff) \
- { \
- std::cout << "Conv size is too large, not enough memory" << std::endl; \
- return false; \
- } \
- if (im2colByteSize <= kStaticBufferSize) \
- { \
- im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \
- } \
- else \
- { \
- im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \
- if (im2colData == nullptr) \
- { \
- std::cout << "Conv size is too large, not enough memory" << std::endl; \
- return false; \
- } \
- im2colGuard.reset(im2colData); \
+#define ANDROID_NN_CONV_PARAMETERS(Type) \
+ uint32_t height = getSizeOfDimension(_inputShape, 1); \
+ uint32_t width = getSizeOfDimension(_inputShape, 2); \
+ uint32_t kernelHeight = getSizeOfDimension(_kernelShape, 1); \
+ uint32_t kernelWidth = getSizeOfDimension(_kernelShape, 2); \
+ uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
+ uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
+ uint32_t inDepth = getSizeOfDimension(_inputShape, 3); \
+ \
+ uint32_t paddingHeight = (uint32_t)_paddingTop; \
+ uint32_t paddingWidth = (uint32_t)_paddingLeft; \
+ \
+ Shape im2colShape; \
+ im2colShape.dimensions.resize(4); \
+ im2colShape.dimensions[0] = getSizeOfDimension(_outputShape, 0); \
+ im2colShape.dimensions[1] = getSizeOfDimension(_outputShape, 1); \
+ im2colShape.dimensions[2] = getSizeOfDimension(_outputShape, 2); \
+ im2colShape.dimensions[3] = inDepth * kernelHeight * kernelWidth; \
+ \
+ Type *im2colData = nullptr; \
+ uint64_t im2colByteSize = sizeof(Type); \
+ std::unique_ptr<Type[]> im2colGuard; \
+ for (int i = 0; i < 4; i++) \
+ { \
+ im2colByteSize *= im2colShape.dimensions[i]; \
+ } \
+ /* http://b/77982879, tflite::optimized_ops::Conv uses int for offsets */ \
+ if (im2colByteSize >= 0x7fffffff) \
+ { \
+ std::cout << "Conv size is too large, not enough memory" << std::endl; \
+ return false; \
+ } \
+ if (im2colByteSize <= kStaticBufferSize) \
+ { \
+ im2colData = reinterpret_cast<Type *>(static_scratch_buffer); \
+ } \
+ else \
+ { \
+ im2colData = new (std::nothrow) Type[im2colByteSize / sizeof(Type)]; \
+ if (im2colData == nullptr) \
+ { \
+ std::cout << "Conv size is too large, not enough memory" << std::endl; \
+ return false; \
+ } \
+ im2colGuard.reset(im2colData); \
}
ConvolutionLayer::ConvolutionLayer()
@@ -112,19 +108,32 @@ bool ConvolutionLayer::convFloat32()
float output_activation_min, output_activation_max;
CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
int32_t dilationWidthFactor = 1, dilationHeightFactor = 1;
+
+ ::tflite::ConvParams op_params;
+ op_params.padding_type = ::tflite::PaddingType::kSame;
+ op_params.padding_values.width = paddingWidth;
+ op_params.padding_values.height = paddingHeight;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = dilationWidthFactor;
+ op_params.dilation_height_factor = dilationHeightFactor;
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
+
::tflite::optimized_ops::Conv(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
- reinterpret_cast<const float *>(_kernelData), convertShapeToDims(_kernelShape),
- reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape), _strideWidth,
- _strideHeight, dilationWidthFactor, dilationHeightFactor, paddingWidth, paddingHeight,
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape), im2colDataToPass, im2colDim);
+ op_params, convertShapeToTFLiteShape(_inputShape),
+ reinterpret_cast<const float *>(_inputData), convertShapeToTFLiteShape(_kernelShape),
+ reinterpret_cast<const float *>(_kernelData), convertShapeToTFLiteShape(_biasShape),
+ reinterpret_cast<const float *>(_biasData), convertShapeToTFLiteShape(_outputShape),
+ reinterpret_cast<float *>(_outputData), convertShapeToTFLiteShape(im2colShape),
+ im2colDataToPass);
return true;
}
bool ConvolutionLayer::convQuant8()
{
ANDROID_NN_CONV_PARAMETERS(uint8_t)
+
int32_t inputOffset = -_inputShape.offset;
int32_t kernelOffset = -_kernelShape.offset;
int32_t outputOffset = _outputShape.offset;
@@ -141,6 +150,24 @@ bool ConvolutionLayer::convQuant8()
}
CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
&output_activation_max);
+ int32_t dilationWidthFactor = 1, dilationHeightFactor = 1;
+
+ ::tflite::ConvParams op_params;
+ op_params.padding_type = ::tflite::PaddingType::kSame;
+ op_params.padding_values.width = paddingWidth;
+ op_params.padding_values.height = paddingHeight;
+ op_params.stride_width = _strideWidth;
+ op_params.stride_height = _strideHeight;
+ op_params.dilation_width_factor = dilationWidthFactor;
+ op_params.dilation_height_factor = dilationHeightFactor;
+ op_params.input_offset = inputOffset;
+ op_params.weights_offset = kernelOffset;
+ op_params.output_offset = outputOffset;
+ op_params.output_multiplier = output_multiplier;
+ op_params.output_shift = output_shift;
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
+
static gemmlowp::GemmContext gemm_context;
// Prevent concurrent executions that may access the scratch buffer and
// gemm_context.
@@ -148,11 +175,10 @@ bool ConvolutionLayer::convQuant8()
// Alow gemmlowp automatically decide how many threads to use.
gemm_context.set_max_num_threads(0);
::tflite::optimized_ops::Conv(
- _inputData, convertShapeToDims(_inputShape), inputOffset, _kernelData,
- convertShapeToDims(_kernelShape), kernelOffset, reinterpret_cast<const int32_t *>(_biasData),
- convertShapeToDims(_biasShape), _strideWidth, _strideHeight, paddingWidth, paddingHeight,
- outputOffset, output_multiplier, output_shift, output_activation_min, output_activation_max,
- _outputData, convertShapeToDims(_outputShape), im2colData, im2colDim, &gemm_context);
+ op_params, convertShapeToTFLiteShape(_inputShape), _inputData,
+ convertShapeToTFLiteShape(_kernelShape), _kernelData, convertShapeToTFLiteShape(_biasShape),
+ reinterpret_cast<const int32_t *>(_biasData), convertShapeToTFLiteShape(_outputShape),
+ _outputData, convertShapeToTFLiteShape(im2colShape), im2colData, &gemm_context);
return true;
}
diff --git a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
index 41b9afc0c..abe82db5e 100644
--- a/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/FullyConnectedLayer.cc
@@ -44,64 +44,39 @@ FullyConnectedLayer::FullyConnectedLayer()
static std::mutex executionMutex;
bool FullyConnectedLayer::fullyConnectedFloat32()
{
- float output_activation_min, output_activation_max;
- CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
- // b/80425683, optimized implementation produces incorrect results when the
- // number of input elements is the squre of batch_size.
- uint32_t batch_size = getSizeOfDimension(_outputShape, 0);
- uint32_t input_n_elements = getNumberOfElements(_inputShape);
- if (batch_size * batch_size == input_n_elements)
+ int total_input_size = 1;
+ for (int i = 0; i < _inputShape.dimensions.size(); i++)
{
- ::tflite::reference_ops::FullyConnected(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
- reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape),
- reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape),
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape));
- }
- else
- {
- ::tflite::optimized_ops::FullyConnected(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape),
- reinterpret_cast<const float *>(_weightsData), convertShapeToDims(_weightsShape),
- reinterpret_cast<const float *>(_biasData), convertShapeToDims(_biasShape),
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape));
+ total_input_size *= _inputShape.dimensions[i];
}
+
+ int input_size = _weightsShape.dimensions[1];
+ const int batch_size = total_input_size / input_size;
+ const int num_units = _weightsShape.dimensions[0];
+
+ TfLiteFusedActivation act = convertFusedActivation(_activation);
+
+ ::tflite::tensor_utils::VectorBatchVectorAssign(reinterpret_cast<const float *>(_biasData),
+ num_units, batch_size,
+ reinterpret_cast<float *>(_outputData));
+
+ // Compute output += weight * input
+ ::tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
+ reinterpret_cast<const float *>(_weightsData), num_units, input_size,
+ reinterpret_cast<const float *>(_inputData), batch_size,
+ reinterpret_cast<float *>(_outputData), /*result_stride=*/1);
+
+ // Apply activation function
+ ::tflite::tensor_utils::ApplyActivationToVector(reinterpret_cast<float *>(_outputData),
+ batch_size * num_units, act,
+ reinterpret_cast<float *>(_outputData));
+
return true;
}
bool FullyConnectedLayer::fullyConnectedQuant8()
{
- int32_t inputOffset = -_inputShape.offset;
- int32_t weightsOffset = -_weightsShape.offset;
- int32_t outputOffset = _outputShape.offset;
- float real_multiplier = 0.0;
- int32_t output_multiplier = 0;
- int32_t output_shift = 0;
- int32_t output_activation_min = 0;
- int32_t output_activation_max = 0;
- // Caution : 'Convolution' can make misleading. It seems it is just math term.
- if (!GetQuantizedConvolutionMultipler(_inputShape, _weightsShape, _biasShape, _outputShape,
- &real_multiplier) ||
- !QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier, &output_shift))
- {
- return false;
- }
- CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
- &output_activation_max);
- static gemmlowp::GemmContext gemm_context;
- // Prevent concurrent executions that access gemm_context.
- std::unique_lock<std::mutex> lock(executionMutex);
- // Alow gemmlowp automatically decide how many threads to use.
- gemm_context.set_max_num_threads(0);
- ::tflite::optimized_ops::FullyConnected(
- _inputData, convertShapeToDims(_inputShape), inputOffset, _weightsData,
- convertShapeToDims(_weightsShape), weightsOffset,
- reinterpret_cast<const int32_t *>(_biasData), convertShapeToDims(_biasShape), outputOffset,
- output_multiplier, output_shift, output_activation_min, output_activation_max, _outputData,
- convertShapeToDims(_outputShape), &gemm_context);
- return true;
+ throw std::runtime_error{"FullyConnectedLayer : Not tested for TENSOR_QUANT8_ASYMM"};
}
void FullyConnectedLayer::configure(uint8_t *inputData, const Shape inputShape,
diff --git a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc
index 3d96bb401..c4a288b07 100644
--- a/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/MaxPoolLayer.cc
@@ -26,14 +26,14 @@ namespace kernel
namespace cpu
{
-#define MAXPOOLING_PARAMETERS \
- uint32_t height = getSizeOfDimension(_inputShape, 1); \
- uint32_t width = getSizeOfDimension(_inputShape, 2); \
- uint32_t outHeight = getSizeOfDimension(_outputShape, 1); \
- uint32_t outWidth = getSizeOfDimension(_outputShape, 2); \
- \
- uint32_t paddingHeight = (uint32_t)_paddingTop; \
- uint32_t paddingWidth = (uint32_t)_paddingLeft;
+#define MAXPOOLING_PARAMETERS \
+ tflite::PoolParams op_params; \
+ op_params.stride_height = _strideHeight; \
+ op_params.stride_width = _strideWidth; \
+ op_params.filter_height = _kernelHeight; \
+ op_params.filter_width = _kernelWidth; \
+ op_params.padding_values.height = (int8_t)_paddingTop; \
+ op_params.padding_values.width = (int8_t)_paddingLeft;
MaxPoolLayer::MaxPoolLayer()
: _inputData(nullptr), _outputData(nullptr), _inputShape(), _outputShape(), _paddingLeft(0),
@@ -46,31 +46,30 @@ MaxPoolLayer::MaxPoolLayer()
bool MaxPoolLayer::maxPoolFloat32()
{
-
MAXPOOLING_PARAMETERS
float output_activation_min, output_activation_max;
CalculateActivationRangeFloat(_activation, &output_activation_min, &output_activation_max);
+ op_params.float_activation_min = output_activation_min;
+ op_params.float_activation_max = output_activation_max;
- ::tflite::optimized_ops::MaxPool(
- reinterpret_cast<const float *>(_inputData), convertShapeToDims(_inputShape), _strideWidth,
- _strideHeight, paddingWidth, paddingHeight, _kernelWidth, _kernelHeight,
- output_activation_min, output_activation_max, reinterpret_cast<float *>(_outputData),
- convertShapeToDims(_outputShape));
+ ::tflite::optimized_ops::MaxPool(op_params, convertShapeToTFLiteShape(_inputShape),
+ reinterpret_cast<const float *>(_inputData),
+ convertShapeToTFLiteShape(_outputShape),
+ reinterpret_cast<float *>(_outputData));
return true;
}
bool MaxPoolLayer::maxPoolQuant8()
{
-
MAXPOOLING_PARAMETERS
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
CalculateActivationRangeUint8(_activation, _outputShape, &output_activation_min,
&output_activation_max);
+ op_params.quantized_activation_min = output_activation_min;
+ op_params.quantized_activation_max = output_activation_max;
- ::tflite::optimized_ops::MaxPool(_inputData, convertShapeToDims(_inputShape), _strideWidth,
- _strideHeight, paddingWidth, paddingHeight, _kernelWidth,
- _kernelHeight, output_activation_min, output_activation_max,
- _outputData, convertShapeToDims(_outputShape));
+ ::tflite::optimized_ops::MaxPool(op_params, convertShapeToTFLiteShape(_inputShape), _inputData,
+ convertShapeToTFLiteShape(_outputShape), _outputData);
return true;
}
diff --git a/runtimes/neurun/src/kernel/cpu/OperationUtils.h b/runtimes/neurun/src/kernel/cpu/OperationUtils.h
index 5914d04e3..066b1e573 100644
--- a/runtimes/neurun/src/kernel/cpu/OperationUtils.h
+++ b/runtimes/neurun/src/kernel/cpu/OperationUtils.h
@@ -23,7 +23,9 @@
#include <limits>
#include <vector>
+#include "tensorflow/contrib/lite/c/builtin_op_data.h"
#include "tensorflow/contrib/lite/kernels/internal/types.h"
+#include "tensorflow/contrib/lite/kernels/internal/tensor.h"
#include "graph/operand/Object.h"
#include "graph/operand/DataType.h"
@@ -75,6 +77,51 @@ inline ::tflite::Dims<4> convertShapeToDims(const Shape &shape)
return dims;
}
+inline ::tflite::RuntimeShape convertShapeToTFLiteShape(const Shape &shape)
+{
+ std::vector<int32_t> raw_shape;
+ raw_shape.resize(4);
+
+ for (uint32_t i = 0; i < 4; ++i)
+ {
+ if (i >= shape.dimensions.size())
+ {
+ raw_shape[i] = 1;
+ }
+ else
+ {
+ raw_shape[i] = shape.dimensions[i];
+ }
+ }
+
+ return ::tflite::GetTensorShape(raw_shape);
+}
+
+inline TfLiteFusedActivation convertFusedActivation(FuseCode act)
+{
+ if (act == ANEURALNETWORKS_FUSED_NONE)
+ {
+ return kTfLiteActNone;
+ }
+
+ if (act == ANEURALNETWORKS_FUSED_RELU)
+ {
+ return kTfLiteActRelu;
+ }
+
+ if (act == ANEURALNETWORKS_FUSED_RELU1)
+ {
+ return kTfLiteActRelu1;
+ }
+
+ if (act == ANEURALNETWORKS_FUSED_RELU6)
+ {
+ return kTfLiteActRelu6;
+ }
+
+ return kTfLiteActNone;
+}
+
__wur bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t *quantized_multiplier,
int32_t *right_shift);
diff --git a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc
index 4f5a69f2e..c998c65f6 100644
--- a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc
@@ -33,45 +33,86 @@ SoftMaxLayer::SoftMaxLayer()
// DO NOTHING
}
+// Performs softmax along the input of size (input_size * batch_size).
+void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
+ float *out)
+{
+ TF_LITE_ASSERT(input_size > 0);
+
+ // For each batch
+ for (int b = 0; b < batch_size; b++)
+ {
+ // Find the max coeff.
+ float max_coeff = in[0];
+ for (int i = 1; i < input_size; i++)
+ {
+ if (in[i] > max_coeff)
+ max_coeff = in[i];
+ }
+
+ // Compute the normalized sum of exps.
+ float exp_sum = 0.0;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] = std::exp((in[i] - max_coeff) * beta);
+ exp_sum += out[i];
+ }
+
+ // Divide by the sum of exps.
+ float reciprocal_sum_exp = 1.f / exp_sum;
+ for (int i = 0; i < input_size; i++)
+ {
+ out[i] *= reciprocal_sum_exp;
+ }
+
+ // Advance in and out pointers for the next batch.
+ in += input_size;
+ out += input_size;
+ }
+}
+
bool SoftMaxLayer::softmaxFloat32()
{
- ::tflite::Dims<4> dim;
+ Shape shapeIn4D;
+
if (getNumberOfDimensions(_inputShape) == 2)
{
uint32_t batch_size = getSizeOfDimension(_inputShape, 0);
uint32_t input_size = getNumberOfElements(_inputShape) / batch_size;
- Shape shapeIn4D;
- shapeIn4D.dimensions = {batch_size, 1, 1, input_size};
- dim = convertShapeToDims(shapeIn4D);
+ Softmax(reinterpret_cast<const float *>(_inputData), input_size, batch_size, _beta,
+ reinterpret_cast<float *>(_outputData));
}
else if (getNumberOfDimensions(_inputShape) == 4)
{
- dim = convertShapeToDims(_inputShape);
+ ::tflite::SoftmaxParams op_params;
+ op_params.beta = _beta;
+ ::tflite::optimized_ops::Softmax(op_params, convertShapeToTFLiteShape(_inputShape),
+ reinterpret_cast<const float *>(_inputData),
+ convertShapeToTFLiteShape(_outputShape),
+ reinterpret_cast<float *>(_outputData));
}
else
{
std::cout << "only 2D and 4D tensors supported" << std::endl;
return false;
}
- ::tflite::optimized_ops::Softmax(reinterpret_cast<const float *>(_inputData), dim, _beta,
- reinterpret_cast<float *>(_outputData), dim);
+
return true;
}
bool SoftMaxLayer::softmaxQuant8()
{
- ::tflite::Dims<4> dim;
+ Shape shapeIn4D = _inputShape;
+
if (getNumberOfDimensions(_inputShape) == 2)
{
uint32_t batch_size = getSizeOfDimension(_inputShape, 0);
uint32_t input_size = getNumberOfElements(_inputShape) / batch_size;
- Shape shapeIn4D;
shapeIn4D.dimensions = {batch_size, 1, 1, input_size};
- dim = convertShapeToDims(shapeIn4D);
}
else if (getNumberOfDimensions(_inputShape) == 4)
{
- dim = convertShapeToDims(_inputShape);
+ shapeIn4D = _inputShape;
}
else
{
@@ -94,8 +135,13 @@ bool SoftMaxLayer::softmaxQuant8()
return false;
}
float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift);
- ::tflite::optimized_ops::Softmax(_inputData, dim, input_multiplier, input_left_shift, diff_min,
- _outputData, dim);
+
+ ::tflite::SoftmaxParams op_params;
+ op_params.input_multiplier = input_multiplier;
+ op_params.input_left_shift = input_left_shift;
+ op_params.diff_min = diff_min;
+ ::tflite::optimized_ops::Softmax(op_params, convertShapeToTFLiteShape(shapeIn4D), _inputData,
+ convertShapeToTFLiteShape(shapeIn4D), _outputData);
return true;
}