summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwjc404 <52632443+wjc404@users.noreply.github.com>2019-07-19 23:47:58 +0800
committerGitHub <noreply@github.com>2019-07-19 23:47:58 +0800
commit9c89757562f43af48645a6563161909321077646 (patch)
treef54840048a889efec06aff09361a2fd623aeaaea
parent9b04baeaeeaaaeba8c12e3fc2418ceaeca53ebb0 (diff)
downloadopenblas-9c89757562f43af48645a6563161909321077646.tar.gz
openblas-9c89757562f43af48645a6563161909321077646.tar.bz2
openblas-9c89757562f43af48645a6563161909321077646.zip
Add files via upload
-rw-r--r--kernel/x86_64/dgemm_kernel_4x8_haswell.S29
1 files changed, 28 insertions, 1 deletions
diff --git a/kernel/x86_64/dgemm_kernel_4x8_haswell.S b/kernel/x86_64/dgemm_kernel_4x8_haswell.S
index 42692f33b..e26bddea3 100644
--- a/kernel/x86_64/dgemm_kernel_4x8_haswell.S
+++ b/kernel/x86_64/dgemm_kernel_4x8_haswell.S
@@ -1865,6 +1865,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SAVE4x12
+ salq $3, K
+ prefetcht2 32(B)
+ prefetcht2 32(B, K, 8)
+ prefetcht2 96(B)
+ prefetcht2 96(B, K, 8)
+ addq $128, B
+ sarq $3, K
+
decq I # i --
jne .L12_11
ALIGN_4
@@ -1872,6 +1880,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/**************************************************************************
* Rest of M
***************************************************************************/
+ movq M, I
+ sarq $2, I
+ salq $7, I
+ subq I, B
+
.L12_20:
// Test rest of M
@@ -2102,7 +2115,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
jmp .L13_16
-
+ PREFETCHT0_C
.L13_13:
test $1, %rax
@@ -2147,6 +2160,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SAVE4x12
+ salq $3, K
+ prefetcht2 (B)
+ prefetcht2 (B, K, 8)
+ prefetcht2 64(B)
+ prefetcht2 64(B, K, 8)
+ addq $128, B
+ sarq $3, K
+
decq I # i --
jne .L13_11
ALIGN_4
@@ -2154,6 +2175,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/**************************************************************************
* Rest of M
***************************************************************************/
+
+ movq M, I
+ sarq $2, I
+ salq $7, I
+ subq I, B
+
.L13_20:
// Test rest of M