1 files changed, 59 insertions, 13 deletions
diff --git a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc
index 4f5a69f2e..c998c65f6 100644
--- a/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/SoftMaxLayer.cc
@@ -33,45 +33,86 @@ SoftMaxLayer::SoftMaxLayer()
   // DO NOTHING
 }
 
+// Performs softmax along the input of size (input_size * batch_size).
+void Softmax(const float *in, const int input_size, const int batch_size, const float beta,
+             float *out)
+{
+  TF_LITE_ASSERT(input_size > 0);
+
+  // For each batch
+  for (int b = 0; b < batch_size; b++)
+  {
+    // Find the max coeff.
+    float max_coeff = in[0];
+    for (int i = 1; i < input_size; i++)
+    {
+      if (in[i] > max_coeff)
+        max_coeff = in[i];
+    }
+
+    // Compute the normalized sum of exps.
+    float exp_sum = 0.0;
+    for (int i = 0; i < input_size; i++)
+    {
+      out[i] = std::exp((in[i] - max_coeff) * beta);
+      exp_sum += out[i];
+    }
+
+    // Divide by the sum of exps.
+    float reciprocal_sum_exp = 1.f / exp_sum;
+    for (int i = 0; i < input_size; i++)
+    {
+      out[i] *= reciprocal_sum_exp;
+    }
+
+    // Advance in and out pointers for the next batch.
+    in += input_size;
+    out += input_size;
+  }
+}
+
 bool SoftMaxLayer::softmaxFloat32()
 {
-  ::tflite::Dims<4> dim;
+  Shape shapeIn4D;
+
   if (getNumberOfDimensions(_inputShape) == 2)
   {
     uint32_t batch_size = getSizeOfDimension(_inputShape, 0);
     uint32_t input_size = getNumberOfElements(_inputShape) / batch_size;
-    Shape shapeIn4D;
-    shapeIn4D.dimensions = {batch_size, 1, 1, input_size};
-    dim = convertShapeToDims(shapeIn4D);
+    Softmax(reinterpret_cast<const float *>(_inputData), input_size, batch_size, _beta,
+            reinterpret_cast<float *>(_outputData));
   }
   else if (getNumberOfDimensions(_inputShape) == 4)
   {
-    dim = convertShapeToDims(_inputShape);
+    ::tflite::SoftmaxParams op_params;
+    op_params.beta = _beta;
+    ::tflite::optimized_ops::Softmax(op_params, convertShapeToTFLiteShape(_inputShape),
+                                     reinterpret_cast<const float *>(_inputData),
+                                     convertShapeToTFLiteShape(_outputShape),
+                                     reinterpret_cast<float *>(_outputData));
   }
   else
   {
     std::cout << "only 2D and 4D tensors supported" << std::endl;
     return false;
   }
-  ::tflite::optimized_ops::Softmax(reinterpret_cast<const float *>(_inputData), dim, _beta,
-                                   reinterpret_cast<float *>(_outputData), dim);
+
   return true;
 }
 
 bool SoftMaxLayer::softmaxQuant8()
 {
-  ::tflite::Dims<4> dim;
+  Shape shapeIn4D = _inputShape;
+
   if (getNumberOfDimensions(_inputShape) == 2)
   {
     uint32_t batch_size = getSizeOfDimension(_inputShape, 0);
     uint32_t input_size = getNumberOfElements(_inputShape) / batch_size;
-    Shape shapeIn4D;
     shapeIn4D.dimensions = {batch_size, 1, 1, input_size};
-    dim = convertShapeToDims(shapeIn4D);
   }
   else if (getNumberOfDimensions(_inputShape) == 4)
   {
-    dim = convertShapeToDims(_inputShape);
+    shapeIn4D = _inputShape;
   }
   else
   {
@@ -94,8 +135,13 @@ bool SoftMaxLayer::softmaxQuant8()
     return false;
   }
   float diff_min = -1.0f * CalculateInputRadius(kScaledDiffIntegerBits, input_left_shift);
-  ::tflite::optimized_ops::Softmax(_inputData, dim, input_multiplier, input_left_shift, diff_min,
-                                   _outputData, dim);
+
+  ::tflite::SoftmaxParams op_params;
+  op_params.input_multiplier = input_multiplier;
+  op_params.input_left_shift = input_left_shift;
+  op_params.diff_min = diff_min;
+  ::tflite::optimized_ops::Softmax(op_params, convertShapeToTFLiteShape(shapeIn4D), _inputData,
+                                   convertShapeToTFLiteShape(shapeIn4D), _outputData);
   return true;
 }