diff options
author | wjc404 <52632443+wjc404@users.noreply.github.com> | 2019-07-17 23:47:30 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-07-17 23:47:30 +0800 |
commit | 8a074b39656636ebec5812532b486cf751231a3b (patch) | |
tree | 35a67940becf79b27d4839e21e9e12fa09f86e50 | |
parent | 211ab03b1402a3c39311b7ca769aaad736ca554c (diff) | |
download | openblas-8a074b39656636ebec5812532b486cf751231a3b.tar.gz openblas-8a074b39656636ebec5812532b486cf751231a3b.tar.bz2 openblas-8a074b39656636ebec5812532b486cf751231a3b.zip |
Update dgemm_kernel_4x8_haswell.S
-rw-r--r-- | kernel/x86_64/dgemm_kernel_4x8_haswell.S | 42 |
1 files changed, 37 insertions, 5 deletions
diff --git a/kernel/x86_64/dgemm_kernel_4x8_haswell.S b/kernel/x86_64/dgemm_kernel_4x8_haswell.S index 3f7f9a98e..5242e3efe 100644 --- a/kernel/x86_64/dgemm_kernel_4x8_haswell.S +++ b/kernel/x86_64/dgemm_kernel_4x8_haswell.S @@ -267,24 +267,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro SAVE4x12
- prefetcht0 128(%rsp) /*BUFFER 1*/
+ prefetcht0 BUFFER1
vbroadcastsd ALPHA, %ymm0
vmulpd %ymm0 , %ymm4 , %ymm4
vmulpd %ymm0 , %ymm5 , %ymm5
vmulpd %ymm0 , %ymm6 , %ymm6
vmulpd %ymm0 , %ymm7 , %ymm7
- prefetcht0 192(%rsp)
+ prefetcht0 64 + BUFFER1
vmulpd %ymm0 , %ymm8 , %ymm8
vmulpd %ymm0 , %ymm9 , %ymm9
vmulpd %ymm0 , %ymm10, %ymm10
vmulpd %ymm0 , %ymm11, %ymm11
- prefetcht0 256(%rsp)
+ prefetcht0 128 + BUFFER1
vmulpd %ymm0 , %ymm12, %ymm12
vmulpd %ymm0 , %ymm13, %ymm13
vmulpd %ymm0 , %ymm14, %ymm14
vmulpd %ymm0 , %ymm15, %ymm15
- prefetcht0 320(%rsp)
+ prefetcht0 192 + BUFFER1
vpermilpd $ 0x05 , %ymm5, %ymm5
vpermilpd $ 0x05 , %ymm7, %ymm7
@@ -1606,6 +1606,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .endm
+.macro PREFETCHT0_C
+ prefetcht0 (CO1)
+ prefetcht0 24(CO1)
+ prefetcht0 (CO1,LDC,4)
+ prefetcht0 24(CO1,LDC,4)
+ prefetcht0 (CO1,LDC,8)
+ prefetcht0 24(CO1,LDC,8)
+ addq LDC,CO1
+ prefetcht0 (CO1)
+ prefetcht0 24(CO1)
+ prefetcht0 (CO1,LDC,4)
+ prefetcht0 24(CO1,LDC,4)
+ prefetcht0 (CO1,LDC,8)
+ prefetcht0 24(CO1,LDC,8)
+ leaq (CO1,LDC,2),CO1
+ prefetcht0 (CO1)
+ prefetcht0 24(CO1)
+ prefetcht0 (CO1,LDC,4)
+ prefetcht0 24(CO1,LDC,4)
+ prefetcht0 (CO1,LDC,8)
+ prefetcht0 24(CO1,LDC,8)
+ subq LDC,CO1
+ prefetcht0 (CO1)
+ prefetcht0 24(CO1)
+ prefetcht0 (CO1,LDC,4)
+ prefetcht0 24(CO1,LDC,4)
+ prefetcht0 (CO1,LDC,8)
+ prefetcht0 24(CO1,LDC,8)
+ subq LDC,CO1
+ subq LDC,CO1
+.endm
/*******************************************************************************************/
#if !defined(TRMMKERNEL)
@@ -1773,7 +1804,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. dec %rax
jne .L12_12
-
+
+ PREFETCHT0_C
.L12_12a:
KERNEL4x12_M1
|