summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwjc404 <52632443+wjc404@users.noreply.github.com>2019-07-20 22:04:41 +0800
committerGitHub <noreply@github.com>2019-07-20 22:04:41 +0800
commit94db259e5b432a7f1769c1d61071b9dd727778db (patch)
tree770a153b918a40a2aa65285ac790c4f9219462d4
parentf49f8047acbea636eb2a3542f306803a1285793b (diff)
downloadopenblas-94db259e5b432a7f1769c1d61071b9dd727778db.tar.gz
openblas-94db259e5b432a7f1769c1d61071b9dd727778db.tar.bz2
openblas-94db259e5b432a7f1769c1d61071b9dd727778db.zip
Add files via upload
-rw-r--r--kernel/x86_64/dgemm_kernel_4x8_haswell.S45
1 files changed, 19 insertions, 26 deletions
diff --git a/kernel/x86_64/dgemm_kernel_4x8_haswell.S b/kernel/x86_64/dgemm_kernel_4x8_haswell.S
index 6d1460bb2..6a8619e32 100644
--- a/kernel/x86_64/dgemm_kernel_4x8_haswell.S
+++ b/kernel/x86_64/dgemm_kernel_4x8_haswell.S
@@ -1622,35 +1622,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro PREFETCHT0_C
+ prefetcht0 ALPHA
prefetcht0 (CO1)
prefetcht0 24(CO1)
prefetcht0 (CO1,LDC,4)
prefetcht0 24(CO1,LDC,4)
prefetcht0 (CO1,LDC,8)
prefetcht0 24(CO1,LDC,8)
- addq LDC,CO1
- prefetcht0 (CO1)
- prefetcht0 24(CO1)
- prefetcht0 (CO1,LDC,4)
- prefetcht0 24(CO1,LDC,4)
- prefetcht0 (CO1,LDC,8)
- prefetcht0 24(CO1,LDC,8)
- leaq (CO1,LDC,2),CO1
- prefetcht0 (CO1)
- prefetcht0 24(CO1)
- prefetcht0 (CO1,LDC,4)
- prefetcht0 24(CO1,LDC,4)
- prefetcht0 (CO1,LDC,8)
- prefetcht0 24(CO1,LDC,8)
- subq LDC,CO1
- prefetcht0 (CO1)
- prefetcht0 24(CO1)
- prefetcht0 (CO1,LDC,4)
- prefetcht0 24(CO1,LDC,4)
- prefetcht0 (CO1,LDC,8)
- prefetcht0 24(CO1,LDC,8)
- subq LDC,CO1
- subq LDC,CO1
.endm
/*******************************************************************************************/
@@ -1820,12 +1798,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
dec %rax
jne .L12_12
- PREFETCHT0_C
.L12_12a:
-
+ PREFETCHT0_C
+ addq LDC,CO1
KERNEL4x12_M1
+ PREFETCHT0_C
+ leaq (CO1,LDC,2),CO1
KERNEL4x12_M2
+ PREFETCHT0_C
+ subq LDC,CO1
KERNEL4x12_M1
+ PREFETCHT0_C
+ subq LDC,CO1
+ subq LDC,CO1
KERNEL4x12_M2
KERNEL4x12_M1
@@ -2133,9 +2118,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.L13_12a:
+ PREFETCHT0_C
+ addq LDC,CO1
KERNEL4x12_M1
+ PREFETCHT0_C
+ leaq (CO1,LDC,2),CO1
KERNEL4x12_M2
+ PREFETCHT0_C
+ subq LDC,CO1
KERNEL4x12_M1
+ PREFETCHT0_C
+ subq LDC,CO1
+ subq LDC,CO1
KERNEL4x12_M2
KERNEL4x12_M1
@@ -2145,7 +2139,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
jmp .L13_16
- PREFETCHT0_C
.L13_13:
test $1, %rax