summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAshwin Sekhar T K <ashwin.sekhar@cavium.com>2017-07-02 02:24:38 +0530
committerAshwin Sekhar T K <ashwin.sekhar@cavium.com>2017-07-02 02:24:38 +0530
commit09bc6ebe5b26aecd405a25dad2fa2934642fc827 (patch)
treec3ff810a11d9977b2ffd7f51cf21e7d9bf15a4d9
parent872a11a2bfd90225d5ace725b0ec4f59bd9291f3 (diff)
downloadopenblas-09bc6ebe5b26aecd405a25dad2fa2934642fc827.tar.gz
openblas-09bc6ebe5b26aecd405a25dad2fa2934642fc827.tar.bz2
openblas-09bc6ebe5b26aecd405a25dad2fa2934642fc827.zip
arm: add softfp support in dgemm/dtrmm vfp kernels
-rw-r--r--kernel/arm/KERNEL.ARMV68
-rw-r--r--kernel/arm/KERNEL.ARMV710
-rw-r--r--kernel/arm/dgemm_kernel_4x2_vfp.S13
-rw-r--r--kernel/arm/dgemm_kernel_4x4_vfpv3.S12
-rw-r--r--kernel/arm/dtrmm_kernel_4x2_vfp.S13
-rw-r--r--kernel/arm/dtrmm_kernel_4x4_vfpv3.S13
6 files changed, 55 insertions, 14 deletions
diff --git a/kernel/arm/KERNEL.ARMV6 b/kernel/arm/KERNEL.ARMV6
index 18d9869de..622085b45 100644
--- a/kernel/arm/KERNEL.ARMV6
+++ b/kernel/arm/KERNEL.ARMV6
@@ -72,7 +72,6 @@ DGEMVTKERNEL = gemv_t_vfp.S
CGEMVTKERNEL = cgemv_t_vfp.S
ZGEMVTKERNEL = zgemv_t_vfp.S
-SGEMMKERNEL = ../generic/gemmkernel_4x2.c
SGEMMKERNEL = sgemm_kernel_4x2_vfp.S
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
SGEMMINCOPY = sgemm_ncopy_4_vfp.S
@@ -85,7 +84,7 @@ SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o
-DGEMMKERNEL = ../generic/gemmkernel_4x2.c
+DGEMMKERNEL = dgemm_kernel_4x2_vfp.S
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
DGEMMINCOPY = dgemm_ncopy_4_vfp.S
DGEMMITCOPY = dgemm_tcopy_4_vfp.S
@@ -97,9 +96,8 @@ DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
-STRMMKERNEL = ../generic/trmmkernel_4x2.c
STRMMKERNEL = strmm_kernel_4x2_vfp.S
-DTRMMKERNEL = ../generic/trmmkernel_4x2.c
+DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
CGEMMONCOPY = cgemm_ncopy_2_vfp.S
CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
@@ -113,11 +111,9 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy.o
ifeq ($(ARM_ABI),hard)
-DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S
-DGEMMKERNEL = dgemm_kernel_4x2_vfp.S
CGEMMKERNEL = cgemm_kernel_2x2_vfp.S
ZGEMMKERNEL = zgemm_kernel_2x2_vfp.S
diff --git a/kernel/arm/KERNEL.ARMV7 b/kernel/arm/KERNEL.ARMV7
index e2044133d..63c468e66 100644
--- a/kernel/arm/KERNEL.ARMV7
+++ b/kernel/arm/KERNEL.ARMV7
@@ -8,8 +8,8 @@ ZNRM2KERNEL = nrm2_vfpv3.S
SGEMVNKERNEL = gemv_n_vfpv3.S
DGEMVNKERNEL = gemv_n_vfpv3.S
-STRMMKERNEL = ../generic/trmmkernel_4x4.c
-DTRMMKERNEL = ../generic/trmmkernel_4x4.c
+STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
+DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S
SGEMMONCOPY = sgemm_ncopy_4_vfp.S
@@ -17,7 +17,7 @@ SGEMMOTCOPY = sgemm_tcopy_4_vfp.S
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o
-DGEMMKERNEL = ../generic/gemmkernel_4x4.c
+DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S
DGEMMONCOPY = dgemm_ncopy_4_vfp.S
DGEMMOTCOPY = dgemm_tcopy_4_vfp.S
DGEMMONCOPYOBJ = dgemm_oncopy.o
@@ -25,13 +25,9 @@ DGEMMOTCOPYOBJ = dgemm_otcopy.o
ifeq ($(ARM_ABI),hard)
-STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
-DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S
-DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S
-
CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S
ZGEMMKERNEL = zgemm_kernel_2x2_vfpv3.S
diff --git a/kernel/arm/dgemm_kernel_4x2_vfp.S b/kernel/arm/dgemm_kernel_4x2_vfp.S
index 183269d1b..001a6050c 100644
--- a/kernel/arm/dgemm_kernel_4x2_vfp.S
+++ b/kernel/arm/dgemm_kernel_4x2_vfp.S
@@ -62,10 +62,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA [fp, #-280]
-
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHA_SOFTFP [fp, #4]
+#define OLD_A_SOFTFP [fp, #12 ]
+#define B [fp, #16 ]
+#define C [fp, #20 ]
+#define OLD_LDC [fp, #24 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
+#endif
#define I r0
#define J r1
@@ -429,6 +436,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vldr OLD_ALPHA, OLD_ALPHA_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/dgemm_kernel_4x4_vfpv3.S b/kernel/arm/dgemm_kernel_4x4_vfpv3.S
index b14052e06..1744b54d8 100644
--- a/kernel/arm/dgemm_kernel_4x4_vfpv3.S
+++ b/kernel/arm/dgemm_kernel_4x4_vfpv3.S
@@ -79,9 +79,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA [fp, #-280]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHA_SOFTFP [fp, #4]
+#define OLD_A_SOFTFP [fp, #12 ]
+#define B [fp, #16 ]
+#define C [fp, #20 ]
+#define OLD_LDC [fp, #24 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
+#endif
#define I r0
#define J r1
@@ -878,6 +886,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vldr OLD_ALPHA, OLD_ALPHA_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/dtrmm_kernel_4x2_vfp.S b/kernel/arm/dtrmm_kernel_4x2_vfp.S
index c578d2b1e..3d6fbf8e9 100644
--- a/kernel/arm/dtrmm_kernel_4x2_vfp.S
+++ b/kernel/arm/dtrmm_kernel_4x2_vfp.S
@@ -65,10 +65,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA [fp, #-276 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHA_SOFTFP [fp, #4]
+#define OLD_A_SOFTFP [fp, #12 ]
+#define B [fp, #16 ]
+#define OLD_C [fp, #20 ]
+#define OLD_LDC [fp, #24 ]
+#define OFFSET [fp, #28 ]
+#else
#define B [fp, #4 ]
#define OLD_C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
#define OFFSET [fp, #16 ]
+#endif
#define I r0
#define J r1
@@ -404,6 +413,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vldr OLD_ALPHA, OLD_ALPHA_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/dtrmm_kernel_4x4_vfpv3.S b/kernel/arm/dtrmm_kernel_4x4_vfpv3.S
index c7e455f16..c0c6a1677 100644
--- a/kernel/arm/dtrmm_kernel_4x4_vfpv3.S
+++ b/kernel/arm/dtrmm_kernel_4x4_vfpv3.S
@@ -66,10 +66,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA [fp, #-276 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHA_SOFTFP [fp, #4]
+#define OLD_A_SOFTFP [fp, #12 ]
+#define B [fp, #16 ]
+#define OLD_C [fp, #20 ]
+#define OLD_LDC [fp, #24 ]
+#define OFFSET [fp, #28 ]
+#else
#define B [fp, #4 ]
#define OLD_C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
#define OFFSET [fp, #16 ]
+#endif
#define I r0
#define J r1
@@ -846,6 +855,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vldr OLD_ALPHA, OLD_ALPHA_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K