diff options
author | wjc404 <52632443+wjc404@users.noreply.github.com> | 2019-07-19 23:47:58 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-07-19 23:47:58 +0800 |
commit | 9c89757562f43af48645a6563161909321077646 (patch) | |
tree | f54840048a889efec06aff09361a2fd623aeaaea | |
parent | 9b04baeaeeaaaeba8c12e3fc2418ceaeca53ebb0 (diff) | |
download | openblas-9c89757562f43af48645a6563161909321077646.tar.gz openblas-9c89757562f43af48645a6563161909321077646.tar.bz2 openblas-9c89757562f43af48645a6563161909321077646.zip |
Add files via upload
-rw-r--r-- | kernel/x86_64/dgemm_kernel_4x8_haswell.S | 29 |
1 files changed, 28 insertions, 1 deletions
diff --git a/kernel/x86_64/dgemm_kernel_4x8_haswell.S b/kernel/x86_64/dgemm_kernel_4x8_haswell.S index 42692f33b..e26bddea3 100644 --- a/kernel/x86_64/dgemm_kernel_4x8_haswell.S +++ b/kernel/x86_64/dgemm_kernel_4x8_haswell.S @@ -1865,6 +1865,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SAVE4x12
+ salq $3, K
+ prefetcht2 32(B)
+ prefetcht2 32(B, K, 8)
+ prefetcht2 96(B)
+ prefetcht2 96(B, K, 8)
+ addq $128, B
+ sarq $3, K
+
decq I # i --
jne .L12_11
ALIGN_4
@@ -1872,6 +1880,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /**************************************************************************
* Rest of M
***************************************************************************/
+ movq M, I
+ sarq $2, I
+ salq $7, I
+ subq I, B
+
.L12_20:
// Test rest of M
@@ -2102,7 +2115,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. jmp .L13_16
-
+ PREFETCHT0_C
.L13_13:
test $1, %rax
@@ -2147,6 +2160,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. SAVE4x12
+ salq $3, K
+ prefetcht2 (B)
+ prefetcht2 (B, K, 8)
+ prefetcht2 64(B)
+ prefetcht2 64(B, K, 8)
+ addq $128, B
+ sarq $3, K
+
decq I # i --
jne .L13_11
ALIGN_4
@@ -2154,6 +2175,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /**************************************************************************
* Rest of M
***************************************************************************/
+
+ movq M, I
+ sarq $2, I
+ salq $7, I
+ subq I, B
+
.L13_20:
// Test rest of M
|