summaryrefslogtreecommitdiff
path: root/compute/cker/include/cker/TensorUtils.h
diff options
context:
space:
mode:
Diffstat (limited to 'compute/cker/include/cker/TensorUtils.h')
-rw-r--r--compute/cker/include/cker/TensorUtils.h112
1 files changed, 95 insertions, 17 deletions
diff --git a/compute/cker/include/cker/TensorUtils.h b/compute/cker/include/cker/TensorUtils.h
index e07c91239..bac79b887 100644
--- a/compute/cker/include/cker/TensorUtils.h
+++ b/compute/cker/include/cker/TensorUtils.h
@@ -31,55 +31,133 @@ namespace nnfw
namespace cker
{
-void VectorBatchVectorAssign(const float *vector, int v_size, int n_batch, float *batch_vector)
+inline void CwiseClipping(float *vector, const int v_size, const float clipping_value)
+{
+ NEON_OR_PORTABLE(CwiseClipping, vector, v_size, clipping_value);
+}
+
+inline void VectorBatchVectorAdd(const float *vector, int v_size, int n_batch, float *batch_vector)
+{
+ PortableVectorBatchVectorAdd(vector, v_size, n_batch, batch_vector);
+}
+
+inline void VectorBatchVectorAssign(const float *vector, int v_size, int n_batch,
+ float *batch_vector)
{
PortableVectorBatchVectorAssign(vector, v_size, n_batch, batch_vector);
}
-bool IsZeroVector(const float *vector, int v_size)
+// Cwise product of two vectors.
+template <typename T>
+inline void VectorVectorCwiseProduct(const T *__restrict__ vector1, const T *__restrict__ vector2,
+ int v_size, T *__restrict__ result)
+{
+ for (int v = 0; v < v_size; v++)
+ {
+ *result++ = *vector1++ * *vector2++;
+ }
+}
+
+// Cwise product and accumulate of two vectors. Since it's a MAC operation, the
+// assumption here is that result array is initialized to valid values.
+template <typename T>
+inline void VectorVectorCwiseProductAccumulate(const T *__restrict__ vector1,
+ const T *__restrict__ vector2, int v_size,
+ T *__restrict__ result)
+{
+ for (int v = 0; v < v_size; v++)
+ {
+ *result++ += *vector1++ * *vector2++;
+ }
+}
+
+// Cwise product of a vector and a batch-vector.
+template <typename T>
+inline void VectorBatchVectorCwiseProduct(const T *vector, int v_size, const T *batch_vector,
+ int n_batch, T *result)
+{
+ for (int b = 0; b < n_batch; b++)
+ {
+ VectorVectorCwiseProduct(vector, batch_vector, v_size, result);
+ // Update the pointers.
+ result += v_size;
+ batch_vector += v_size;
+ }
+}
+
+// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC
+// operation, the assumption here is that result array is initialized to valid
+// values.
+template <typename T>
+inline void VectorBatchVectorCwiseProductAccumulate(const T *vector, int v_size,
+ const T *batch_vector, int n_batch, T *result)
+{
+ for (int b = 0; b < n_batch; b++)
+ {
+ VectorVectorCwiseProductAccumulate(vector, batch_vector, v_size, result);
+ // Update the pointers.
+ result += v_size;
+ batch_vector += v_size;
+ }
+}
+
+inline bool IsZeroVector(const float *vector, int v_size)
{
return NEON_OR_PORTABLE(IsZeroVector, vector, v_size);
}
-void ApplyActivationToVector(const float *vector, int v_size,
- FusedActivationFunctionType activation, float *result)
+inline void ApplyActivationToVector(const float *vector, int v_size,
+ FusedActivationFunctionType activation, float *result)
{
PortableApplyActivationToVector(vector, v_size, activation, result);
}
-void SymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values,
- float *min, float *max, float *scaling_factor)
+inline void Sub1Vector(const float *vector, int v_size, float *result)
+{
+ NEON_OR_PORTABLE(Sub1Vector, vector, v_size, result);
+}
+
+inline void SymmetricQuantizeFloats(const float *values, const int size, int8_t *quantized_values,
+ float *min, float *max, float *scaling_factor)
{
return NEON_OR_PORTABLE(SymmetricQuantizeFloats, values, size, quantized_values, min, max,
scaling_factor);
}
-void MatrixBatchVectorMultiplyAccumulate(const int8_t *matrix, const int m_rows, const int m_cols,
- const int8_t *vector, const float *scaling_factors,
- int n_batch, float *result, int result_stride)
+inline void MatrixBatchVectorMultiplyAccumulate(const int8_t *matrix, const int m_rows,
+ const int m_cols, const int8_t *vector,
+ const float *scaling_factors, int n_batch,
+ float *result, int result_stride)
{
NEON_OR_PORTABLE(MatrixBatchVectorMultiplyAccumulate, matrix, m_rows, m_cols, vector,
scaling_factors, n_batch, result, result_stride);
}
-void MatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, int m_cols,
- const float *vector, int n_batch, float *result,
- int result_stride)
+inline void MatrixBatchVectorMultiplyAccumulate(const float *matrix, int m_rows, int m_cols,
+ const float *vector, int n_batch, float *result,
+ int result_stride)
{
NEON_OR_PORTABLE(MatrixBatchVectorMultiplyAccumulate, matrix, m_rows, m_cols, vector, n_batch,
result, result_stride);
}
-void MatrixBatchVectorMultiplyAccumulate(const int8_t *matrix, const int m_rows, const int m_cols,
- const int8_t *vectors, const float *scaling_factors,
- int n_batch, int32_t *scratch, float *result,
- int result_stride, ruy::Context *ruy_context)
+inline void MatrixBatchVectorMultiplyAccumulate(const int8_t *matrix, const int m_rows,
+ const int m_cols, const int8_t *vectors,
+ const float *scaling_factors, int n_batch,
+ int32_t *scratch, float *result, int result_stride,
+ ruy::Context *ruy_context)
{
NEON_OR_PORTABLE(MatrixBatchVectorMultiplyAccumulate, matrix, m_rows, m_cols, vectors,
scaling_factors, n_batch, scratch, result, result_stride, ruy_context);
}
-void ZeroVector(float *vector, int v_size) { PortableZeroVector(vector, v_size); }
+inline void MeanStddevNormalization(const float *input_vector, float *output_vector, int v_size,
+ int n_batch)
+{
+ PortableMeanStddevNormalization(input_vector, output_vector, v_size, n_batch);
+}
+
+inline void ZeroVector(float *vector, int v_size) { PortableZeroVector(vector, v_size); }
} // namespace cker
} // namespace nnfw