diff options
author | Ashwin Sekhar T K <ashwin.sekhar@cavium.com> | 2017-07-02 02:42:32 +0530 |
---|---|---|
committer | Ashwin Sekhar T K <ashwin.sekhar@cavium.com> | 2017-07-02 02:42:32 +0530 |
commit | 305cd2e8b41f4daccdfa1e6631bce7f7133faf92 (patch) | |
tree | 1f015b468e681be046e3777711af43ba8f143850 | |
parent | 09bc6ebe5b26aecd405a25dad2fa2934642fc827 (diff) | |
download | openblas-305cd2e8b41f4daccdfa1e6631bce7f7133faf92.tar.gz openblas-305cd2e8b41f4daccdfa1e6631bce7f7133faf92.tar.bz2 openblas-305cd2e8b41f4daccdfa1e6631bce7f7133faf92.zip |
arm: add softfp support in cgemm/ctrmm vfp kernels
-rw-r--r-- | kernel/arm/KERNEL.ARMV6 | 4 | ||||
-rw-r--r-- | kernel/arm/KERNEL.ARMV7 | 5 | ||||
-rw-r--r-- | kernel/arm/cgemm_kernel_2x2_vfp.S | 14 | ||||
-rw-r--r-- | kernel/arm/cgemm_kernel_2x2_vfpv3.S | 14 | ||||
-rw-r--r-- | kernel/arm/ctrmm_kernel_2x2_vfp.S | 15 | ||||
-rw-r--r-- | kernel/arm/ctrmm_kernel_2x2_vfpv3.S | 15 |
6 files changed, 63 insertions, 4 deletions
diff --git a/kernel/arm/KERNEL.ARMV6 b/kernel/arm/KERNEL.ARMV6 index 622085b45..e8fc3df73 100644 --- a/kernel/arm/KERNEL.ARMV6 +++ b/kernel/arm/KERNEL.ARMV6 @@ -98,7 +98,9 @@ DGEMMOTCOPYOBJ = dgemm_otcopy.o STRMMKERNEL = strmm_kernel_4x2_vfp.S DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S +CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S +CGEMMKERNEL = cgemm_kernel_2x2_vfp.S CGEMMONCOPY = cgemm_ncopy_2_vfp.S CGEMMOTCOPY = cgemm_tcopy_2_vfp.S CGEMMONCOPYOBJ = cgemm_oncopy.o @@ -111,10 +113,8 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy.o ifeq ($(ARM_ABI),hard) -CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S -CGEMMKERNEL = cgemm_kernel_2x2_vfp.S ZGEMMKERNEL = zgemm_kernel_2x2_vfp.S endif diff --git a/kernel/arm/KERNEL.ARMV7 b/kernel/arm/KERNEL.ARMV7 index 63c468e66..4bfe18d1d 100644 --- a/kernel/arm/KERNEL.ARMV7 +++ b/kernel/arm/KERNEL.ARMV7 @@ -10,6 +10,7 @@ DGEMVNKERNEL = gemv_n_vfpv3.S STRMMKERNEL = strmm_kernel_4x4_vfpv3.S DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S +CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S SGEMMONCOPY = sgemm_ncopy_4_vfp.S @@ -23,12 +24,12 @@ DGEMMOTCOPY = dgemm_tcopy_4_vfp.S DGEMMONCOPYOBJ = dgemm_oncopy.o DGEMMOTCOPYOBJ = dgemm_otcopy.o +CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S + ifeq ($(ARM_ABI),hard) -CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S -CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S ZGEMMKERNEL = zgemm_kernel_2x2_vfpv3.S endif diff --git a/kernel/arm/cgemm_kernel_2x2_vfp.S b/kernel/arm/cgemm_kernel_2x2_vfp.S index f0517cb47..512eea387 100644 --- a/kernel/arm/cgemm_kernel_2x2_vfp.S +++ b/kernel/arm/cgemm_kernel_2x2_vfp.S @@ -64,9 +64,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ALPHA_I [fp, #-272] #define ALPHA_R [fp, #-280] +#if !defined(__ARM_PCS_VFP) +#define OLD_ALPHAR_SOFTFP r3 +#define OLD_ALPHAI_SOFTFP [fp, #4] +#define OLD_A_SOFTFP [fp, #8 ] +#define B [fp, #12 ] +#define C [fp, #16 ] +#define OLD_LDC [fp, #20 ] +#else #define B [fp, #4 ] #define C [fp, #8 ] #define OLD_LDC [fp, #12 ] +#endif #define I r0 #define J r1 @@ -816,6 +825,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add fp, sp, #24 sub sp, sp, #STACKSIZE // reserve stack +#if !defined(__ARM_PCS_VFP) + vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP + vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP + ldr OLD_A, OLD_A_SOFTFP +#endif str OLD_M, M str OLD_N, N str OLD_K, K diff --git a/kernel/arm/cgemm_kernel_2x2_vfpv3.S b/kernel/arm/cgemm_kernel_2x2_vfpv3.S index cf132a184..42eb53a55 100644 --- a/kernel/arm/cgemm_kernel_2x2_vfpv3.S +++ b/kernel/arm/cgemm_kernel_2x2_vfpv3.S @@ -80,9 +80,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ALPHA_I [fp, #-272] #define ALPHA_R [fp, #-280] +#if !defined(__ARM_PCS_VFP) +#define OLD_ALPHAR_SOFTFP r3 +#define OLD_ALPHAI_SOFTFP [fp, #4] +#define OLD_A_SOFTFP [fp, #8 ] +#define B [fp, #12 ] +#define C [fp, #16 ] +#define OLD_LDC [fp, #20 ] +#else #define B [fp, #4 ] #define C [fp, #8 ] #define OLD_LDC [fp, #12 ] +#endif #define I r0 #define J r1 @@ -873,6 +882,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add fp, sp, #24 sub sp, sp, #STACKSIZE // reserve stack +#if !defined(__ARM_PCS_VFP) + vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP + vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP + ldr OLD_A, OLD_A_SOFTFP +#endif str OLD_M, M str OLD_N, N str OLD_K, K diff --git a/kernel/arm/ctrmm_kernel_2x2_vfp.S b/kernel/arm/ctrmm_kernel_2x2_vfp.S index 8cb7ede9d..95578b10a 100644 --- a/kernel/arm/ctrmm_kernel_2x2_vfp.S +++ b/kernel/arm/ctrmm_kernel_2x2_vfp.S @@ -67,10 +67,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ALPHA_I [fp, #-272] #define ALPHA_R [fp, #-280] +#if !defined(__ARM_PCS_VFP) +#define OLD_ALPHAR_SOFTFP r3 +#define OLD_ALPHAI_SOFTFP [fp, #4] +#define OLD_A_SOFTFP [fp, #8 ] +#define B [fp, #12 ] +#define C [fp, #16 ] +#define OLD_LDC [fp, #20 ] +#define OFFSET [fp, #24 ] +#else #define B [fp, #4 ] #define C [fp, #8 ] #define OLD_LDC [fp, #12 ] #define OFFSET [fp, #16 ] +#endif #define I r0 #define J r1 @@ -826,6 +836,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add fp, sp, #24 sub sp, sp, #STACKSIZE // reserve stack +#if !defined(__ARM_PCS_VFP) + vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP + vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP + ldr OLD_A, OLD_A_SOFTFP +#endif str OLD_M, M str OLD_N, N str OLD_K, K diff --git a/kernel/arm/ctrmm_kernel_2x2_vfpv3.S b/kernel/arm/ctrmm_kernel_2x2_vfpv3.S index 97bd88c69..18beb4e47 100644 --- a/kernel/arm/ctrmm_kernel_2x2_vfpv3.S +++ b/kernel/arm/ctrmm_kernel_2x2_vfpv3.S @@ -66,10 +66,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ALPHA_I [fp, #-272] #define ALPHA_R [fp, #-280] +#if !defined(__ARM_PCS_VFP) +#define OLD_ALPHAR_SOFTFP r3 +#define OLD_ALPHAI_SOFTFP [fp, #4] +#define OLD_A_SOFTFP [fp, #8 ] +#define B [fp, #12 ] +#define C [fp, #16 ] +#define OLD_LDC [fp, #20 ] +#define OFFSET [fp, #24 ] +#else #define B [fp, #4 ] #define C [fp, #8 ] #define OLD_LDC [fp, #12 ] #define OFFSET [fp, #16 ] +#endif #define I r0 #define J r1 @@ -846,6 +856,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add fp, sp, #24 sub sp, sp, #STACKSIZE // reserve stack +#if !defined(__ARM_PCS_VFP) + vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP + vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP + ldr OLD_A, OLD_A_SOFTFP +#endif str OLD_M, M str OLD_N, N str OLD_K, K |