summaryrefslogtreecommitdiff
path: root/compute/ncnn/src/layer/instance_norm.cc
diff options
context:
space:
mode:
Diffstat (limited to 'compute/ncnn/src/layer/instance_norm.cc')
-rw-r--r--compute/ncnn/src/layer/instance_norm.cc371
1 files changed, 0 insertions, 371 deletions
diff --git a/compute/ncnn/src/layer/instance_norm.cc b/compute/ncnn/src/layer/instance_norm.cc
deleted file mode 100644
index 08c3f2c23..000000000
--- a/compute/ncnn/src/layer/instance_norm.cc
+++ /dev/null
@@ -1,371 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-// Tencent is pleased to support the open source community by making ncnn available.
-//
-// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
-//
-// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
-// in compliance with the License. You may obtain a copy of the License at
-//
-// https://opensource.org/licenses/BSD-3-Clause
-//
-// Unless required by applicable law or agreed to in writing, software distributed
-// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
-// CONDITIONS OF ANY KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations under the License.
-
-#include "ncnn/layer/instance_norm.h"
-#ifdef _OPENMP
-#include <omp.h>
-#endif
-
-#include <math.h>
-#include "ncnn/mat.h"
-#ifdef __ARM_NEON
-#include <arm_neon.h>
-#endif // __ARM_NEON
-
-namespace nnfw
-{
-namespace ncnn
-{
-
-void ncnn_instance_norm_rowmajor(Mat &in_mat, Mat &out_mat, Mat &gamma_mat, Mat &beta_mat,
- int channels, float eps)
-{
- // x = (x - mean) / (sqrt(var) + eps) * gamma + beta
-
- int w = in_mat.w;
- int h = in_mat.h;
- int size = w * h;
-#ifdef __ARM_NEON
- int nn = size >> 2;
- int left4 = size & 3;
-#endif
-
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
-#ifdef __ARM_NEON
- float *in_ptr = in_mat.channel(q);
- float *out_ptr = out_mat.channel(q);
- float32x4_t _sum = vdupq_n_f32(0.f);
- float32x4_t _sq_sum = vdupq_n_f32(0.f);
- for (int n = nn; n > 0; n--)
- {
- float32x4_t _p = vld1q_f32(in_ptr);
- _sum = vaddq_f32(_sum, _p);
- _p = vmulq_f32(_p, _p);
- _sq_sum = vaddq_f32(_sq_sum, _p);
- in_ptr += 4;
- }
- float sum = vgetq_lane_f32(_sum, 0) + vgetq_lane_f32(_sum, 1);
- sum += vgetq_lane_f32(_sum, 2);
- sum += vgetq_lane_f32(_sum, 3);
- float sqsum = vgetq_lane_f32(_sq_sum, 0) + vgetq_lane_f32(_sq_sum, 1);
- sqsum += vgetq_lane_f32(_sq_sum, 2);
- sqsum += vgetq_lane_f32(_sq_sum, 3);
-
- for (int left = left4; left > 0; left--)
- {
- sum += *in_ptr;
- sqsum += (*in_ptr) * (*in_ptr);
- in_ptr++;
- }
-
- float mean = sum / size;
- float var = sqsum / size - mean * mean;
- float gamma = gamma_mat[q];
- float beta = beta_mat[q];
- float a = gamma / (sqrt(var + eps));
- float b = -mean * a + beta;
-
- in_ptr = in_mat.channel(q);
- float32x4_t _a = vdupq_n_f32(a);
- float32x4_t _b = vdupq_n_f32(b);
- for (int n = nn; n > 0; n--)
- {
- float32x4_t _p = vld1q_f32(in_ptr);
- _p = vmulq_f32(_p, _a);
- _p = vaddq_f32(_p, _b);
- vst1q_f32(out_ptr, _p);
- in_ptr += 4;
- out_ptr += 4;
- }
- for (int left = left4; left > 0; left--)
- {
- *out_ptr = (*in_ptr) * a + b;
- in_ptr++;
- out_ptr++;
- }
-#else
- float *in_ptr = in_mat.channel(q);
- float *out_ptr = out_mat.channel(q);
- // mean and var
- float sum = 0.f;
- float sqsum = 0.f;
- for (int i = 0; i < size; i++)
- {
- sum += in_ptr[i];
- sqsum += in_ptr[i] * in_ptr[i];
- }
- float mean = sum / size;
- float var = sqsum / size - mean * mean;
-
- float gamma = gamma_mat[q];
- float beta = beta_mat[q];
-
- float a = gamma / (sqrt(var + eps));
- float b = -mean * a + beta;
- for (int i = 0; i < size; i++)
- {
- out_ptr[i] = in_ptr[i] * a + b;
- }
-#endif
- }
-}
-
-void ncnn_instance_norm_colmajor(Mat &in_mat, Mat &out_mat, Mat &gamma_mat, Mat &beta_mat,
- int /*channels*/, float eps)
-{
- // Treat CHW layout as HWC layout
- int h = in_mat.c;
- int w = in_mat.h;
- int c = in_mat.w;
-
- int size = w * h;
- int total = size * c;
-
- float sum[c] = {};
- float sqsum[c] = {};
-
- float mean[c] = {};
- float var[c] = {};
- float a[c] = {};
- float b[c] = {};
-
- float *in_ptr = in_mat.channel(0);
- float *out_ptr = out_mat.channel(0);
-
-#pragma omp parallel for reduction(+ : sum, sqsum) schedule(guided)
- for (int i = 0; i < total; i += c)
- {
- for (int j = 0; j < c; j++)
- {
- sum[j] += in_ptr[i + j];
- sqsum[j] += in_ptr[i + j] * in_ptr[i + j];
- }
- }
-
- for (int i = 0; i < c; i++)
- {
- mean[i] = sum[i] / size;
- var[i] = sqsum[i] / size - mean[i] * mean[i];
- a[i] = gamma_mat[i] / (sqrt(var[i] + eps));
- b[i] = -mean[i] * a[i] + beta_mat[i];
- }
-
-#pragma omp parallel for schedule(guided)
- for (int i = 0; i < total; i += c)
- {
- for (int j = 0; j < c; j++)
- {
- out_ptr[i + j] = in_ptr[i + j] * a[j] + b[j];
- }
- }
-}
-
-void ncnn_instance_norm_with_relu_rowmajor(Mat &in_mat, Mat &out_mat, Mat &gamma_mat, Mat &beta_mat,
- int channels, float eps, float /*slope*/)
-{
- int w = in_mat.w;
- int h = in_mat.h;
- int size = w * h;
-#ifdef __ARM_NEON
- int nn = size >> 2;
- int left4 = size & 3;
-#endif
-#pragma omp parallel for
- for (int q = 0; q < channels; q++)
- {
-#ifdef __ARM_NEON
- float *in_ptr = in_mat.channel(q);
- float *out_ptr = out_mat.channel(q);
- float32x4_t _sum = vdupq_n_f32(0.f);
- float32x4_t _sq_sum = vdupq_n_f32(0.f);
- for (int n = nn; n > 0; n--)
- {
- float32x4_t _p = vld1q_f32(in_ptr);
- _sum = vaddq_f32(_sum, _p);
- _p = vmulq_f32(_p, _p);
- _sq_sum = vaddq_f32(_sq_sum, _p);
- in_ptr += 4;
- }
- // float sum =
- // vgetq_lane_f32(_sum,0)+vgetq_lane_f32(_sum,1)+vgetq_lane_f32(_sum,2)+vgetq_lane_f32(_sum,3);
- // float sqsum = vgetq_lane_f32(_sq_sum,0)+vgetq_lane_f32(_sq_sum,1)+
- // vgetq_lane_f32(_sq_sum,2)+vgetq_lane_f32(_sq_sum,3);
- float sum = vgetq_lane_f32(_sum, 0) + vgetq_lane_f32(_sum, 1);
- sum += vgetq_lane_f32(_sum, 2);
- sum += vgetq_lane_f32(_sum, 3);
- float sqsum = vgetq_lane_f32(_sq_sum, 0) + vgetq_lane_f32(_sq_sum, 1);
- sqsum += vgetq_lane_f32(_sq_sum, 2);
- sqsum += vgetq_lane_f32(_sq_sum, 3);
- for (int left = left4; left > 0; left--)
- {
- sum += *in_ptr;
- sqsum += (*in_ptr) * (*in_ptr);
- in_ptr++;
- }
-
- float mean = sum / size;
- float var = sqsum / size - mean * mean;
- float gamma = gamma_mat[q];
- float beta = beta_mat[q];
- float a = gamma / (sqrt(var + eps));
- float b = -mean * a + beta;
- // TODO:slop is not used here , only for RELU which slop is always = 0;
- in_ptr = in_mat.channel(q);
- float32x4_t _a = vdupq_n_f32(a);
- float32x4_t _b = vdupq_n_f32(b);
- float32x4_t _zero = vdupq_n_f32(0.f);
- for (int n = nn; n > 0; n--)
- {
- float32x4_t _p = vld1q_f32(in_ptr);
- _p = vmulq_f32(_p, _a);
- _p = vaddq_f32(_p, _b);
- _p = vmaxq_f32(_p, _zero);
- vst1q_f32(out_ptr, _p);
- in_ptr += 4;
- out_ptr += 4;
- }
- for (int left = left4; left > 0; left--)
- {
- int temp = (*in_ptr) * a + b;
- *out_ptr = temp > 0 ? temp : 0;
- in_ptr++;
- out_ptr++;
- }
-#else
- float *in_ptr = in_mat.channel(q);
- float *out_ptr = out_mat.channel(q);
-
- // mean and var
- float sum = 0.f;
- float sqsum = 0.f;
- for (int i = 0; i < size; i++)
- {
- sum += in_ptr[i];
- sqsum += in_ptr[i] * in_ptr[i];
- }
- float mean = sum / size;
- float var = sqsum / size - mean * mean;
-
- float gamma = gamma_mat[q];
- float beta = beta_mat[q];
-
- float a = gamma / (sqrt(var + eps));
- float b = -mean * a + beta;
-
- if (slope == 0.f)
- {
- for (int i = 0; i < size; i++)
- {
- float temp = in_ptr[i] * a + b;
- out_ptr[i] = temp > 0 ? temp : 0;
- }
- }
- else
- {
- for (int i = 0; i < size; i++)
- {
- float temp = in_ptr[i] * a + b;
- out_ptr[i] = temp > 0 ? temp : temp * slope;
- }
- }
-#endif
- }
-}
-
-void ncnn_instance_norm_with_relu_colmajor(Mat &in_mat, Mat &out_mat, Mat &gamma_mat, Mat &beta_mat,
- int /*channels*/, float eps, float slope)
-{
- // Treat CHW layout as HWC layout
- int h = in_mat.c;
- int w = in_mat.h;
- int c = in_mat.w;
-
- int size = w * h;
- int total = size * c;
-
- float sum[c] = {};
- float sqsum[c] = {};
-
- float mean[c] = {};
- float var[c] = {};
- float a[c] = {};
- float b[c] = {};
-
- float *in_ptr = in_mat.channel(0);
- float *out_ptr = out_mat.channel(0);
-
-#pragma omp parallel for reduction(+ : sum, sqsum) schedule(guided)
- for (int i = 0; i < total; i += c)
- {
- for (int j = 0; j < c; j++)
- {
- sum[j] += in_ptr[i + j];
- sqsum[j] += in_ptr[i + j] * in_ptr[i + j];
- }
- }
-
- for (int i = 0; i < c; i++)
- {
- mean[i] = sum[i] / size;
- var[i] = sqsum[i] / size - mean[i] * mean[i];
- a[i] = gamma_mat[i] / (sqrt(var[i] + eps));
- b[i] = -mean[i] * a[i] + beta_mat[i];
- }
-
- if (slope == 0.f)
- {
-#pragma omp parallel for schedule(guided)
- for (int i = 0; i < total; i += c)
- {
- for (int j = 0; j < c; j++)
- {
- float temp = in_ptr[i + j] * a[j] + b[j];
- out_ptr[i + j] = temp > 0 ? temp : 0;
- }
- }
- }
- else
- {
-#pragma omp parallel for schedule(guided)
- for (int i = 0; i < total; i += c)
- {
- for (int j = 0; j < c; j++)
- {
- float temp = in_ptr[i + j] * a[j] + b[j];
- out_ptr[i + j] = temp > 0 ? temp : temp * slope;
- }
- }
- }
-}
-
-} // namespace ncnn
-
-} // namespace nnfw