summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAshwin Sekhar T K <ashwin.sekhar@cavium.com>2017-07-02 02:42:32 +0530
committerAshwin Sekhar T K <ashwin.sekhar@cavium.com>2017-07-02 02:42:32 +0530
commit305cd2e8b41f4daccdfa1e6631bce7f7133faf92 (patch)
tree1f015b468e681be046e3777711af43ba8f143850
parent09bc6ebe5b26aecd405a25dad2fa2934642fc827 (diff)
downloadopenblas-305cd2e8b41f4daccdfa1e6631bce7f7133faf92.tar.gz
openblas-305cd2e8b41f4daccdfa1e6631bce7f7133faf92.tar.bz2
openblas-305cd2e8b41f4daccdfa1e6631bce7f7133faf92.zip
arm: add softfp support in cgemm/ctrmm vfp kernels
-rw-r--r--kernel/arm/KERNEL.ARMV64
-rw-r--r--kernel/arm/KERNEL.ARMV75
-rw-r--r--kernel/arm/cgemm_kernel_2x2_vfp.S14
-rw-r--r--kernel/arm/cgemm_kernel_2x2_vfpv3.S14
-rw-r--r--kernel/arm/ctrmm_kernel_2x2_vfp.S15
-rw-r--r--kernel/arm/ctrmm_kernel_2x2_vfpv3.S15
6 files changed, 63 insertions, 4 deletions
diff --git a/kernel/arm/KERNEL.ARMV6 b/kernel/arm/KERNEL.ARMV6
index 622085b45..e8fc3df73 100644
--- a/kernel/arm/KERNEL.ARMV6
+++ b/kernel/arm/KERNEL.ARMV6
@@ -98,7 +98,9 @@ DGEMMOTCOPYOBJ = dgemm_otcopy.o
STRMMKERNEL = strmm_kernel_4x2_vfp.S
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
+CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
+CGEMMKERNEL = cgemm_kernel_2x2_vfp.S
CGEMMONCOPY = cgemm_ncopy_2_vfp.S
CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
CGEMMONCOPYOBJ = cgemm_oncopy.o
@@ -111,10 +113,8 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy.o
ifeq ($(ARM_ABI),hard)
-CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S
-CGEMMKERNEL = cgemm_kernel_2x2_vfp.S
ZGEMMKERNEL = zgemm_kernel_2x2_vfp.S
endif
diff --git a/kernel/arm/KERNEL.ARMV7 b/kernel/arm/KERNEL.ARMV7
index 63c468e66..4bfe18d1d 100644
--- a/kernel/arm/KERNEL.ARMV7
+++ b/kernel/arm/KERNEL.ARMV7
@@ -10,6 +10,7 @@ DGEMVNKERNEL = gemv_n_vfpv3.S
STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
+CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S
SGEMMONCOPY = sgemm_ncopy_4_vfp.S
@@ -23,12 +24,12 @@ DGEMMOTCOPY = dgemm_tcopy_4_vfp.S
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
+CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S
+
ifeq ($(ARM_ABI),hard)
-CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S
-CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S
ZGEMMKERNEL = zgemm_kernel_2x2_vfpv3.S
endif
diff --git a/kernel/arm/cgemm_kernel_2x2_vfp.S b/kernel/arm/cgemm_kernel_2x2_vfp.S
index f0517cb47..512eea387 100644
--- a/kernel/arm/cgemm_kernel_2x2_vfp.S
+++ b/kernel/arm/cgemm_kernel_2x2_vfp.S
@@ -64,9 +64,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR_SOFTFP r3
+#define OLD_ALPHAI_SOFTFP [fp, #4]
+#define OLD_A_SOFTFP [fp, #8 ]
+#define B [fp, #12 ]
+#define C [fp, #16 ]
+#define OLD_LDC [fp, #20 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
+#endif
#define I r0
#define J r1
@@ -816,6 +825,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
+ vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/cgemm_kernel_2x2_vfpv3.S b/kernel/arm/cgemm_kernel_2x2_vfpv3.S
index cf132a184..42eb53a55 100644
--- a/kernel/arm/cgemm_kernel_2x2_vfpv3.S
+++ b/kernel/arm/cgemm_kernel_2x2_vfpv3.S
@@ -80,9 +80,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR_SOFTFP r3
+#define OLD_ALPHAI_SOFTFP [fp, #4]
+#define OLD_A_SOFTFP [fp, #8 ]
+#define B [fp, #12 ]
+#define C [fp, #16 ]
+#define OLD_LDC [fp, #20 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
+#endif
#define I r0
#define J r1
@@ -873,6 +882,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
+ vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/ctrmm_kernel_2x2_vfp.S b/kernel/arm/ctrmm_kernel_2x2_vfp.S
index 8cb7ede9d..95578b10a 100644
--- a/kernel/arm/ctrmm_kernel_2x2_vfp.S
+++ b/kernel/arm/ctrmm_kernel_2x2_vfp.S
@@ -67,10 +67,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR_SOFTFP r3
+#define OLD_ALPHAI_SOFTFP [fp, #4]
+#define OLD_A_SOFTFP [fp, #8 ]
+#define B [fp, #12 ]
+#define C [fp, #16 ]
+#define OLD_LDC [fp, #20 ]
+#define OFFSET [fp, #24 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
#define OFFSET [fp, #16 ]
+#endif
#define I r0
#define J r1
@@ -826,6 +836,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
+ vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/ctrmm_kernel_2x2_vfpv3.S b/kernel/arm/ctrmm_kernel_2x2_vfpv3.S
index 97bd88c69..18beb4e47 100644
--- a/kernel/arm/ctrmm_kernel_2x2_vfpv3.S
+++ b/kernel/arm/ctrmm_kernel_2x2_vfpv3.S
@@ -66,10 +66,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR_SOFTFP r3
+#define OLD_ALPHAI_SOFTFP [fp, #4]
+#define OLD_A_SOFTFP [fp, #8 ]
+#define B [fp, #12 ]
+#define C [fp, #16 ]
+#define OLD_LDC [fp, #20 ]
+#define OFFSET [fp, #24 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
#define OFFSET [fp, #16 ]
+#endif
#define I r0
#define J r1
@@ -846,6 +856,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
+ vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K