diff options
author | Ashwin Sekhar T K <ashwin.sekhar@cavium.com> | 2017-07-02 02:00:48 +0530 |
---|---|---|
committer | Ashwin Sekhar T K <ashwin.sekhar@cavium.com> | 2017-07-02 02:00:48 +0530 |
commit | eda9e8632ab7d94d609006612a4b760214dfa847 (patch) | |
tree | 1e58b54c1e35eee3b050e66dceaab69f65b930ac | |
parent | 8f83d3f961f57fb002d8c5359c32a8db50dcab5d (diff) | |
download | openblas-eda9e8632ab7d94d609006612a4b760214dfa847.tar.gz openblas-eda9e8632ab7d94d609006612a4b760214dfa847.tar.bz2 openblas-eda9e8632ab7d94d609006612a4b760214dfa847.zip |
generic: Bug fixes in generic 4x2 and 4x4 gemm kernels
-rw-r--r-- | kernel/generic/gemmkernel_4x2.c | 30 | ||||
-rw-r--r-- | kernel/generic/gemmkernel_4x4.c | 104 |
2 files changed, 67 insertions, 67 deletions
diff --git a/kernel/generic/gemmkernel_4x2.c b/kernel/generic/gemmkernel_4x2.c index 1d15de1d7..8c784e2f1 100644 --- a/kernel/generic/gemmkernel_4x2.c +++ b/kernel/generic/gemmkernel_4x2.c @@ -154,11 +154,11 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL res1_0 *= alpha; res1_1 *= alpha; - C0[0] = res0_0; - C0[1] = res0_1; + C0[0] += res0_0; + C0[1] += res0_1; - C1[0] = res1_0; - C1[1] = res1_1; + C1[0] += res1_0; + C1[1] += res1_1; C0 = C0+2; C1 = C1+2; @@ -190,12 +190,12 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL res1_0 *= alpha; - C0[0] = res0_0; + C0[0] += res0_0; - C1[0] = res1_0; + C1[0] += res1_0; - C0 = C0+1; - C1 = C1+1; + C0 += C0+1; + C1 += C1+1; } @@ -245,10 +245,10 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL res0_2 *= alpha; res0_3 *= alpha; - C0[0] = res0_0; - C0[1] = res0_1; - C0[2] = res0_2; - C0[3] = res0_3; + C0[0] += res0_0; + C0[1] += res0_1; + C0[2] += res0_2; + C0[3] += res0_3; C0 = C0+4; @@ -278,8 +278,8 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL res0_0 *= alpha; res0_1 *= alpha; - C0[0] = res0_0; - C0[1] = res0_1; + C0[0] += res0_0; + C0[1] += res0_1; C0 = C0+2; @@ -306,7 +306,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL C0[0] = res0_0; - C0 = C0+1; + C0 += C0+1; } k = (bk<<0); diff --git a/kernel/generic/gemmkernel_4x4.c b/kernel/generic/gemmkernel_4x4.c index bd67b3fc8..99bd9c1ef 100644 --- a/kernel/generic/gemmkernel_4x4.c +++ b/kernel/generic/gemmkernel_4x4.c @@ -152,25 +152,25 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL res3_2 *= alpha; res3_3 *= alpha; - C0[0] = res0_0; - C0[1] = res0_1; - C0[2] = res0_2; - C0[3] = res0_3; - - C1[0] = res1_0; - C1[1] = res1_1; - C1[2] = res1_2; - C1[3] = res1_3; - - C2[0] = res2_0; - C2[1] = res2_1; - C2[2] = res2_2; - C2[3] = res2_3; - - C3[0] = res3_0; - C3[1] = res3_1; - C3[2] = res3_2; - C3[3] = res3_3; + C0[0] += res0_0; + C0[1] += res0_1; + C0[2] += res0_2; + C0[3] += res0_3; + + C1[0] += res1_0; + C1[1] += res1_1; + C1[2] += res1_2; + C1[3] += res1_3; + + C2[0] += res2_0; + C2[1] += res2_1; + C2[2] += res2_2; + C2[3] += res2_3; + + C3[0] += res3_0; + C3[1] += res3_1; + C3[2] += res3_2; + C3[3] += res3_3; C0 = C0+4; C1 = C1+4; @@ -230,17 +230,17 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL res3_0 *= alpha; res3_1 *= alpha; - C0[0] = res0_0; - C0[1] = res0_1; + C0[0] += res0_0; + C0[1] += res0_1; - C1[0] = res1_0; - C1[1] = res1_1; + C1[0] += res1_0; + C1[1] += res1_1; - C2[0] = res2_0; - C2[1] = res2_1; + C2[0] += res2_0; + C2[1] += res2_1; - C3[0] = res3_0; - C3[1] = res3_1; + C3[0] += res3_0; + C3[1] += res3_1; C0 = C0+2; C1 = C1+2; @@ -283,13 +283,13 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL res3_0 *= alpha; - C0[0] = res0_0; + C0[0] += res0_0; - C1[0] = res1_0; + C1[0] += res1_0; - C2[0] = res2_0; + C2[0] += res2_0; - C3[0] = res3_0; + C3[0] += res3_0; C0 = C0+1; C1 = C1+1; @@ -360,15 +360,15 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL res1_2 *= alpha; res1_3 *= alpha; - C0[0] = res0_0; - C0[1] = res0_1; - C0[2] = res0_2; - C0[3] = res0_3; + C0[0] += res0_0; + C0[1] += res0_1; + C0[2] += res0_2; + C0[3] += res0_3; - C1[0] = res1_0; - C1[1] = res1_1; - C1[2] = res1_2; - C1[3] = res1_3; + C1[0] += res1_0; + C1[1] += res1_1; + C1[2] += res1_2; + C1[3] += res1_3; C0 = C0+4; C1 = C1+4; @@ -408,11 +408,11 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL res1_0 *= alpha; res1_1 *= alpha; - C0[0] = res0_0; - C0[1] = res0_1; + C0[0] += res0_0; + C0[1] += res0_1; - C1[0] = res1_0; - C1[1] = res1_1; + C1[0] += res1_0; + C1[1] += res1_1; C0 = C0+2; C1 = C1+2; @@ -444,9 +444,9 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL res1_0 *= alpha; - C0[0] = res0_0; + C0[0] += res0_0; - C1[0] = res1_0; + C1[0] += res1_0; C0 = C0+1; C1 = C1+1; @@ -499,10 +499,10 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL res0_2 *= alpha; res0_3 *= alpha; - C0[0] = res0_0; - C0[1] = res0_1; - C0[2] = res0_2; - C0[3] = res0_3; + C0[0] += res0_0; + C0[1] += res0_1; + C0[2] += res0_2; + C0[3] += res0_3; C0 = C0+4; @@ -532,8 +532,8 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL res0_0 *= alpha; res0_1 *= alpha; - C0[0] = res0_0; - C0[1] = res0_1; + C0[0] += res0_0; + C0[1] += res0_1; C0 = C0+2; @@ -558,7 +558,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL res0_0 *= alpha; - C0[0] = res0_0; + C0[0] += res0_0; C0 = C0+1; |