diff options
author | Huamin Li <huaminli@fb.com> | 2019-04-23 13:05:55 -0700 |
---|---|---|
committer | Facebook Github Bot <facebook-github-bot@users.noreply.github.com> | 2019-04-23 13:31:15 -0700 |
commit | 55e53d3d7e18f9bd67f729fcb95688276fc9e37a (patch) | |
tree | 7aefe2f880462f29d582ff4fd9ee083e1858ea06 | |
parent | 5f82d59c0ae7d017bdb19a28b23fa1c19a0d6a9e (diff) | |
download | pytorch-55e53d3d7e18f9bd67f729fcb95688276fc9e37a.tar.gz pytorch-55e53d3d7e18f9bd67f729fcb95688276fc9e37a.tar.bz2 pytorch-55e53d3d7e18f9bd67f729fcb95688276fc9e37a.zip |
correct comments in group_norm_op (#19621)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/19621
Comments for group_norm_op is not accurate (i.e., the math part), this diff will fix it.
Reviewed By: BIT-silence
Differential Revision: D15048695
fbshipit-source-id: 27d41d3ae21054257967815254134849944d56ca
-rw-r--r-- | caffe2/operators/group_norm_op.cc | 4 | ||||
-rw-r--r-- | caffe2/operators/group_norm_op.cu | 4 |
2 files changed, 4 insertions, 4 deletions
diff --git a/caffe2/operators/group_norm_op.cc b/caffe2/operators/group_norm_op.cc index 80c0152828..872462450f 100644 --- a/caffe2/operators/group_norm_op.cc +++ b/caffe2/operators/group_norm_op.cc @@ -15,12 +15,12 @@ namespace caffe2 { // Math: // Y = gamma * (X - mu) * rsig + beta // let s = gamma * rsig -// let b = beta - mu * rsig +// let b = beta - gamma * mu * rsig // Y = s * X + b // let n = K * HxW // dL/dX = dL/dY * dY/dX = dL/dY * (d(s * X)/dX + db/dX) // d(s * X)/dX = s + X * ds/dX = s + gamma * X * drsig/dX -// db/dX = -u * drsig/dX - rsig * dmu/dX +// db/dX = -gamma * u * drsig/dX - gamma * rsig * dmu/dX // drsig/dX = -rsig^3 * (X - mu) / n // dmu/dX = 1 / n diff --git a/caffe2/operators/group_norm_op.cu b/caffe2/operators/group_norm_op.cu index 3e1dcf5c15..4e498adba9 100644 --- a/caffe2/operators/group_norm_op.cu +++ b/caffe2/operators/group_norm_op.cu @@ -139,12 +139,12 @@ __global__ void ComputeInternalGradientsNCHWCUDAKernel( // Math: // Y = gamma * (X - mu) * rsig + beta // let s = gamma * rsig -// let b = beta - mu * rsig +// let b = beta - gamma * mu * rsig // Y = s * X + b // let n = K * HxW // dL/dX = dL/dY * dY/dX = dL/dY * (d(s * X)/dX + db/dX) // d(s * X)/dX = s + X * ds/dX = s + gamma * X * drsig/dX -// db/dX = -u * drsig/dX - rsig * dmu/dX +// db/dX = -gamma * u * drsig/dX - gamma * rsig * dmu/dX // drsig/dX = -rsig^3 * (X - mu) / n // dmu/dX = 1 / n template <typename T> |