summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHuamin Li <huaminli@fb.com>2019-04-23 13:05:55 -0700
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>2019-04-23 13:31:15 -0700
commit55e53d3d7e18f9bd67f729fcb95688276fc9e37a (patch)
tree7aefe2f880462f29d582ff4fd9ee083e1858ea06
parent5f82d59c0ae7d017bdb19a28b23fa1c19a0d6a9e (diff)
downloadpytorch-55e53d3d7e18f9bd67f729fcb95688276fc9e37a.tar.gz
pytorch-55e53d3d7e18f9bd67f729fcb95688276fc9e37a.tar.bz2
pytorch-55e53d3d7e18f9bd67f729fcb95688276fc9e37a.zip
correct comments in group_norm_op (#19621)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/19621 Comments for group_norm_op is not accurate (i.e., the math part), this diff will fix it. Reviewed By: BIT-silence Differential Revision: D15048695 fbshipit-source-id: 27d41d3ae21054257967815254134849944d56ca
-rw-r--r--caffe2/operators/group_norm_op.cc4
-rw-r--r--caffe2/operators/group_norm_op.cu4
2 files changed, 4 insertions, 4 deletions
diff --git a/caffe2/operators/group_norm_op.cc b/caffe2/operators/group_norm_op.cc
index 80c0152828..872462450f 100644
--- a/caffe2/operators/group_norm_op.cc
+++ b/caffe2/operators/group_norm_op.cc
@@ -15,12 +15,12 @@ namespace caffe2 {
// Math:
// Y = gamma * (X - mu) * rsig + beta
// let s = gamma * rsig
-// let b = beta - mu * rsig
+// let b = beta - gamma * mu * rsig
// Y = s * X + b
// let n = K * HxW
// dL/dX = dL/dY * dY/dX = dL/dY * (d(s * X)/dX + db/dX)
// d(s * X)/dX = s + X * ds/dX = s + gamma * X * drsig/dX
-// db/dX = -u * drsig/dX - rsig * dmu/dX
+// db/dX = -gamma * u * drsig/dX - gamma * rsig * dmu/dX
// drsig/dX = -rsig^3 * (X - mu) / n
// dmu/dX = 1 / n
diff --git a/caffe2/operators/group_norm_op.cu b/caffe2/operators/group_norm_op.cu
index 3e1dcf5c15..4e498adba9 100644
--- a/caffe2/operators/group_norm_op.cu
+++ b/caffe2/operators/group_norm_op.cu
@@ -139,12 +139,12 @@ __global__ void ComputeInternalGradientsNCHWCUDAKernel(
// Math:
// Y = gamma * (X - mu) * rsig + beta
// let s = gamma * rsig
-// let b = beta - mu * rsig
+// let b = beta - gamma * mu * rsig
// Y = s * X + b
// let n = K * HxW
// dL/dX = dL/dY * dY/dX = dL/dY * (d(s * X)/dX + db/dX)
// d(s * X)/dX = s + X * ds/dX = s + gamma * X * drsig/dX
-// db/dX = -u * drsig/dX - rsig * dmu/dX
+// db/dX = -gamma * u * drsig/dX - gamma * rsig * dmu/dX
// drsig/dX = -rsig^3 * (X - mu) / n
// dmu/dX = 1 / n
template <typename T>