diff options
Diffstat (limited to 'compute/cker/include/cker/TensorUtils.h')
-rw-r--r-- | compute/cker/include/cker/TensorUtils.h | 112 |
1 files changed, 95 insertions, 17 deletions
diff --git a/compute/cker/include/cker/TensorUtils.h b/compute/cker/include/cker/TensorUtils.h index e07c91239..bac79b887 100644 --- a/compute/cker/include/cker/TensorUtils.h +++ b/compute/cker/include/cker/TensorUtils.h @@ -31,55 +31,133 @@ namespace nnfw namespace cker { -void VectorBatchVectorAssign(const float *vector, int v_size, int n_batch, float *batch_vector) +inline void CwiseClipping(float *vector, const int v_size, const float clipping_value) +{ + NEON_OR_PORTABLE(CwiseClipping, vector, v_size, clipping_value); +} + +inline void VectorBatchVectorAdd(const float *vector, int v_size, int n_batch, float *batch_vector) +{ + PortableVectorBatchVectorAdd(vector, v_size, n_batch, batch_vector); +} + +inline void VectorBatchVectorAssign(const float *vector, int v_size, int n_batch, + float *batch_vector) { PortableVectorBatchVectorAssign(vector, v_size, n_batch, batch_vector); } -bool IsZeroVector(const float *vector, int v_size) +// Cwise product of two vectors. +template <typename T> +inline void VectorVectorCwiseProduct(const T *__restrict__ vector1, const T *__restrict__ vector2, + int v_size, T *__restrict__ result) +{ + for (int v = 0; v < v_size; v++) + { + *result++ = *vector1++ * *vector2++; + } +} + +// Cwise product and accumulate of two vectors. Since it's a MAC operation, the +// assumption here is that result array is initialized to valid values. +template <typename T> +inline void VectorVectorCwiseProductAccumulate(const T *__restrict__ vector1, + const T *__restrict__ vector2, int v_size, + T *__restrict__ result) +{ + for (int v = 0; v < v_size; v++) + { + *result++ += *vector1++ * *vector2++; + } +} + +// Cwise product of a vector and a batch-vector. +template <typename T> +inline void VectorBatchVectorCwiseProduct(const T *vector, int v_size, const T *batch_vector, + int n_batch, T *result) +{ + for (int b = 0; b < n_batch; b++) + { + VectorVectorCwiseProduct(vector, batch_vector, v_size, result); + // Update the pointers. + result += v_size; + batch_vector += v_size; + } +} + +// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC +// operation, the assumption here is that result array is initialized to valid +// values. +template <typename T> +inline void VectorBatchVectorCwiseProductAccumulate(const T *vector, int v_size, + const T *batch_vector, int n_batch, T *result) +{ + for (int b = 0; b < n_batch; b++) + { + VectorVectorCwiseProductAccumulate(vector, batch_vector, v_size, result); + // Update the pointers. + result += v_size; + batch_vector += v_size; + } +} + +inline bool IsZeroVector(const float *vector, int v_size) { return NEON_OR_PORTABLE(IsZeroVector, vector, v_size); } -void ApplyActivationToVector(const float *vector, int v_size, - FusedActivationFunctionType activation, float *result) +inline void ApplyActivationToVector(const float *vector, int v_size, + FusedActivationFunctionType activation, float *result) { PortableApplyActivationToVector(vector, v_size, activation, result); } -void SymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values, - float *min, float *max, float *scaling_factor) +inline void Sub1Vector(const float *vector, int v_size, float *result) +{ + NEON_OR_PORTABLE(Sub1Vector, vector, v_size, result); +} + +inline void SymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values, + float *min, float *max, float *scaling_factor) { return NEON_OR_PORTABLE(SymmetricQuantizeFloats, values, size, quantized_values, min, max, scaling_factor); } -void MatrixBatchVectorMultiplyAccumulate(const int8_t *matrix, const int m_rows, const int m_cols, - const int8_t *vector, const float *scaling_factors, - int n_batch, float *result, int result_stride) +inline void MatrixBatchVectorMultiplyAccumulate(const int8_t *matrix, const int m_rows, + const int m_cols, const int8_t *vector, + const float *scaling_factors, int n_batch, + float *result, int result_stride) { NEON_OR_PORTABLE(MatrixBatchVectorMultiplyAccumulate, matrix, m_rows, m_cols, vector, scaling_factors, n_batch, result, result_stride); } -void MatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, int m_cols, - const float *vector, int n_batch, float *result, - int result_stride) +inline void MatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, int m_cols, + const float *vector, int n_batch, float *result, + int result_stride) { NEON_OR_PORTABLE(MatrixBatchVectorMultiplyAccumulate, matrix, m_rows, m_cols, vector, n_batch, result, result_stride); } -void MatrixBatchVectorMultiplyAccumulate(const int8_t *matrix, const int m_rows, const int m_cols, - const int8_t *vectors, const float *scaling_factors, - int n_batch, int32_t *scratch, float *result, - int result_stride, ruy::Context *ruy_context) +inline void MatrixBatchVectorMultiplyAccumulate(const int8_t *matrix, const int m_rows, + const int m_cols, const int8_t *vectors, + const float *scaling_factors, int n_batch, + int32_t *scratch, float *result, int result_stride, + ruy::Context *ruy_context) { NEON_OR_PORTABLE(MatrixBatchVectorMultiplyAccumulate, matrix, m_rows, m_cols, vectors, scaling_factors, n_batch, scratch, result, result_stride, ruy_context); } -void ZeroVector(float *vector, int v_size) { PortableZeroVector(vector, v_size); } +inline void MeanStddevNormalization(const float *input_vector, float *output_vector, int v_size, + int n_batch) +{ + PortableMeanStddevNormalization(input_vector, output_vector, v_size, n_batch); +} + +inline void ZeroVector(float *vector, int v_size) { PortableZeroVector(vector, v_size); } } // namespace cker } // namespace nnfw |