diff options
author | Ashwin Sekhar T K <ashwin.sekhar@cavium.com> | 2017-07-02 02:24:38 +0530 |
---|---|---|
committer | Ashwin Sekhar T K <ashwin.sekhar@cavium.com> | 2017-07-02 02:24:38 +0530 |
commit | 09bc6ebe5b26aecd405a25dad2fa2934642fc827 (patch) | |
tree | c3ff810a11d9977b2ffd7f51cf21e7d9bf15a4d9 | |
parent | 872a11a2bfd90225d5ace725b0ec4f59bd9291f3 (diff) | |
download | openblas-09bc6ebe5b26aecd405a25dad2fa2934642fc827.tar.gz openblas-09bc6ebe5b26aecd405a25dad2fa2934642fc827.tar.bz2 openblas-09bc6ebe5b26aecd405a25dad2fa2934642fc827.zip |
arm: add softfp support in dgemm/dtrmm vfp kernels
-rw-r--r-- | kernel/arm/KERNEL.ARMV6 | 8 | ||||
-rw-r--r-- | kernel/arm/KERNEL.ARMV7 | 10 | ||||
-rw-r--r-- | kernel/arm/dgemm_kernel_4x2_vfp.S | 13 | ||||
-rw-r--r-- | kernel/arm/dgemm_kernel_4x4_vfpv3.S | 12 | ||||
-rw-r--r-- | kernel/arm/dtrmm_kernel_4x2_vfp.S | 13 | ||||
-rw-r--r-- | kernel/arm/dtrmm_kernel_4x4_vfpv3.S | 13 |
6 files changed, 55 insertions, 14 deletions
diff --git a/kernel/arm/KERNEL.ARMV6 b/kernel/arm/KERNEL.ARMV6 index 18d9869de..622085b45 100644 --- a/kernel/arm/KERNEL.ARMV6 +++ b/kernel/arm/KERNEL.ARMV6 @@ -72,7 +72,6 @@ DGEMVTKERNEL = gemv_t_vfp.S CGEMVTKERNEL = cgemv_t_vfp.S ZGEMVTKERNEL = zgemv_t_vfp.S -SGEMMKERNEL = ../generic/gemmkernel_4x2.c SGEMMKERNEL = sgemm_kernel_4x2_vfp.S ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) SGEMMINCOPY = sgemm_ncopy_4_vfp.S @@ -85,7 +84,7 @@ SGEMMOTCOPY = ../generic/gemm_tcopy_2.c SGEMMONCOPYOBJ = sgemm_oncopy.o SGEMMOTCOPYOBJ = sgemm_otcopy.o -DGEMMKERNEL = ../generic/gemmkernel_4x2.c +DGEMMKERNEL = dgemm_kernel_4x2_vfp.S ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N)) DGEMMINCOPY = dgemm_ncopy_4_vfp.S DGEMMITCOPY = dgemm_tcopy_4_vfp.S @@ -97,9 +96,8 @@ DGEMMOTCOPY = ../generic/gemm_tcopy_2.c DGEMMONCOPYOBJ = dgemm_oncopy.o DGEMMOTCOPYOBJ = dgemm_otcopy.o -STRMMKERNEL = ../generic/trmmkernel_4x2.c STRMMKERNEL = strmm_kernel_4x2_vfp.S -DTRMMKERNEL = ../generic/trmmkernel_4x2.c +DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S CGEMMONCOPY = cgemm_ncopy_2_vfp.S CGEMMOTCOPY = cgemm_tcopy_2_vfp.S @@ -113,11 +111,9 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy.o ifeq ($(ARM_ABI),hard) -DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S -DGEMMKERNEL = dgemm_kernel_4x2_vfp.S CGEMMKERNEL = cgemm_kernel_2x2_vfp.S ZGEMMKERNEL = zgemm_kernel_2x2_vfp.S diff --git a/kernel/arm/KERNEL.ARMV7 b/kernel/arm/KERNEL.ARMV7 index e2044133d..63c468e66 100644 --- a/kernel/arm/KERNEL.ARMV7 +++ b/kernel/arm/KERNEL.ARMV7 @@ -8,8 +8,8 @@ ZNRM2KERNEL = nrm2_vfpv3.S SGEMVNKERNEL = gemv_n_vfpv3.S DGEMVNKERNEL = gemv_n_vfpv3.S -STRMMKERNEL = ../generic/trmmkernel_4x4.c -DTRMMKERNEL = ../generic/trmmkernel_4x4.c +STRMMKERNEL = strmm_kernel_4x4_vfpv3.S +DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S SGEMMONCOPY = sgemm_ncopy_4_vfp.S @@ -17,7 +17,7 @@ SGEMMOTCOPY = sgemm_tcopy_4_vfp.S SGEMMONCOPYOBJ = sgemm_oncopy.o SGEMMOTCOPYOBJ = sgemm_otcopy.o -DGEMMKERNEL = ../generic/gemmkernel_4x4.c +DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S DGEMMONCOPY = dgemm_ncopy_4_vfp.S DGEMMOTCOPY = dgemm_tcopy_4_vfp.S DGEMMONCOPYOBJ = dgemm_oncopy.o @@ -25,13 +25,9 @@ DGEMMOTCOPYOBJ = dgemm_otcopy.o ifeq ($(ARM_ABI),hard) -STRMMKERNEL = strmm_kernel_4x4_vfpv3.S -DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S -DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S - CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S ZGEMMKERNEL = zgemm_kernel_2x2_vfpv3.S diff --git a/kernel/arm/dgemm_kernel_4x2_vfp.S b/kernel/arm/dgemm_kernel_4x2_vfp.S index 183269d1b..001a6050c 100644 --- a/kernel/arm/dgemm_kernel_4x2_vfp.S +++ b/kernel/arm/dgemm_kernel_4x2_vfp.S @@ -62,10 +62,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ALPHA [fp, #-280] - +#if !defined(__ARM_PCS_VFP) +#define OLD_ALPHA_SOFTFP [fp, #4] +#define OLD_A_SOFTFP [fp, #12 ] +#define B [fp, #16 ] +#define C [fp, #20 ] +#define OLD_LDC [fp, #24 ] +#else #define B [fp, #4 ] #define C [fp, #8 ] #define OLD_LDC [fp, #12 ] +#endif #define I r0 #define J r1 @@ -429,6 +436,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add fp, sp, #24 sub sp, sp, #STACKSIZE // reserve stack +#if !defined(__ARM_PCS_VFP) + vldr OLD_ALPHA, OLD_ALPHA_SOFTFP + ldr OLD_A, OLD_A_SOFTFP +#endif str OLD_M, M str OLD_N, N str OLD_K, K diff --git a/kernel/arm/dgemm_kernel_4x4_vfpv3.S b/kernel/arm/dgemm_kernel_4x4_vfpv3.S index b14052e06..1744b54d8 100644 --- a/kernel/arm/dgemm_kernel_4x4_vfpv3.S +++ b/kernel/arm/dgemm_kernel_4x4_vfpv3.S @@ -79,9 +79,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ALPHA [fp, #-280] +#if !defined(__ARM_PCS_VFP) +#define OLD_ALPHA_SOFTFP [fp, #4] +#define OLD_A_SOFTFP [fp, #12 ] +#define B [fp, #16 ] +#define C [fp, #20 ] +#define OLD_LDC [fp, #24 ] +#else #define B [fp, #4 ] #define C [fp, #8 ] #define OLD_LDC [fp, #12 ] +#endif #define I r0 #define J r1 @@ -878,6 +886,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add fp, sp, #24 sub sp, sp, #STACKSIZE // reserve stack +#if !defined(__ARM_PCS_VFP) + vldr OLD_ALPHA, OLD_ALPHA_SOFTFP + ldr OLD_A, OLD_A_SOFTFP +#endif str OLD_M, M str OLD_N, N str OLD_K, K diff --git a/kernel/arm/dtrmm_kernel_4x2_vfp.S b/kernel/arm/dtrmm_kernel_4x2_vfp.S index c578d2b1e..3d6fbf8e9 100644 --- a/kernel/arm/dtrmm_kernel_4x2_vfp.S +++ b/kernel/arm/dtrmm_kernel_4x2_vfp.S @@ -65,10 +65,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ALPHA [fp, #-276 ] +#if !defined(__ARM_PCS_VFP) +#define OLD_ALPHA_SOFTFP [fp, #4] +#define OLD_A_SOFTFP [fp, #12 ] +#define B [fp, #16 ] +#define OLD_C [fp, #20 ] +#define OLD_LDC [fp, #24 ] +#define OFFSET [fp, #28 ] +#else #define B [fp, #4 ] #define OLD_C [fp, #8 ] #define OLD_LDC [fp, #12 ] #define OFFSET [fp, #16 ] +#endif #define I r0 #define J r1 @@ -404,6 +413,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add fp, sp, #24 sub sp, sp, #STACKSIZE // reserve stack +#if !defined(__ARM_PCS_VFP) + vldr OLD_ALPHA, OLD_ALPHA_SOFTFP + ldr OLD_A, OLD_A_SOFTFP +#endif str OLD_M, M str OLD_N, N str OLD_K, K diff --git a/kernel/arm/dtrmm_kernel_4x4_vfpv3.S b/kernel/arm/dtrmm_kernel_4x4_vfpv3.S index c7e455f16..c0c6a1677 100644 --- a/kernel/arm/dtrmm_kernel_4x4_vfpv3.S +++ b/kernel/arm/dtrmm_kernel_4x4_vfpv3.S @@ -66,10 +66,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ALPHA [fp, #-276 ] +#if !defined(__ARM_PCS_VFP) +#define OLD_ALPHA_SOFTFP [fp, #4] +#define OLD_A_SOFTFP [fp, #12 ] +#define B [fp, #16 ] +#define OLD_C [fp, #20 ] +#define OLD_LDC [fp, #24 ] +#define OFFSET [fp, #28 ] +#else #define B [fp, #4 ] #define OLD_C [fp, #8 ] #define OLD_LDC [fp, #12 ] #define OFFSET [fp, #16 ] +#endif #define I r0 #define J r1 @@ -846,6 +855,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add fp, sp, #24 sub sp, sp, #STACKSIZE // reserve stack +#if !defined(__ARM_PCS_VFP) + vldr OLD_ALPHA, OLD_ALPHA_SOFTFP + ldr OLD_A, OLD_A_SOFTFP +#endif str OLD_M, M str OLD_N, N str OLD_K, K |