diff options
author | wjc404 <52632443+wjc404@users.noreply.github.com> | 2019-07-20 22:04:41 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-07-20 22:04:41 +0800 |
commit | 94db259e5b432a7f1769c1d61071b9dd727778db (patch) | |
tree | 770a153b918a40a2aa65285ac790c4f9219462d4 | |
parent | f49f8047acbea636eb2a3542f306803a1285793b (diff) | |
download | openblas-94db259e5b432a7f1769c1d61071b9dd727778db.tar.gz openblas-94db259e5b432a7f1769c1d61071b9dd727778db.tar.bz2 openblas-94db259e5b432a7f1769c1d61071b9dd727778db.zip |
Add files via upload
-rw-r--r-- | kernel/x86_64/dgemm_kernel_4x8_haswell.S | 45 |
1 files changed, 19 insertions, 26 deletions
diff --git a/kernel/x86_64/dgemm_kernel_4x8_haswell.S b/kernel/x86_64/dgemm_kernel_4x8_haswell.S index 6d1460bb2..6a8619e32 100644 --- a/kernel/x86_64/dgemm_kernel_4x8_haswell.S +++ b/kernel/x86_64/dgemm_kernel_4x8_haswell.S @@ -1622,35 +1622,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro PREFETCHT0_C
+ prefetcht0 ALPHA
prefetcht0 (CO1)
prefetcht0 24(CO1)
prefetcht0 (CO1,LDC,4)
prefetcht0 24(CO1,LDC,4)
prefetcht0 (CO1,LDC,8)
prefetcht0 24(CO1,LDC,8)
- addq LDC,CO1
- prefetcht0 (CO1)
- prefetcht0 24(CO1)
- prefetcht0 (CO1,LDC,4)
- prefetcht0 24(CO1,LDC,4)
- prefetcht0 (CO1,LDC,8)
- prefetcht0 24(CO1,LDC,8)
- leaq (CO1,LDC,2),CO1
- prefetcht0 (CO1)
- prefetcht0 24(CO1)
- prefetcht0 (CO1,LDC,4)
- prefetcht0 24(CO1,LDC,4)
- prefetcht0 (CO1,LDC,8)
- prefetcht0 24(CO1,LDC,8)
- subq LDC,CO1
- prefetcht0 (CO1)
- prefetcht0 24(CO1)
- prefetcht0 (CO1,LDC,4)
- prefetcht0 24(CO1,LDC,4)
- prefetcht0 (CO1,LDC,8)
- prefetcht0 24(CO1,LDC,8)
- subq LDC,CO1
- subq LDC,CO1
.endm
/*******************************************************************************************/
@@ -1820,12 +1798,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. dec %rax
jne .L12_12
- PREFETCHT0_C
.L12_12a:
-
+ PREFETCHT0_C
+ addq LDC,CO1
KERNEL4x12_M1
+ PREFETCHT0_C
+ leaq (CO1,LDC,2),CO1
KERNEL4x12_M2
+ PREFETCHT0_C
+ subq LDC,CO1
KERNEL4x12_M1
+ PREFETCHT0_C
+ subq LDC,CO1
+ subq LDC,CO1
KERNEL4x12_M2
KERNEL4x12_M1
@@ -2133,9 +2118,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .L13_12a:
+ PREFETCHT0_C
+ addq LDC,CO1
KERNEL4x12_M1
+ PREFETCHT0_C
+ leaq (CO1,LDC,2),CO1
KERNEL4x12_M2
+ PREFETCHT0_C
+ subq LDC,CO1
KERNEL4x12_M1
+ PREFETCHT0_C
+ subq LDC,CO1
+ subq LDC,CO1
KERNEL4x12_M2
KERNEL4x12_M1
@@ -2145,7 +2139,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. jmp .L13_16
- PREFETCHT0_C
.L13_13:
test $1, %rax
|