From 872a11a2bfd90225d5ace725b0ec4f59bd9291f3 Mon Sep 17 00:00:00 2001 From: Ashwin Sekhar T K Date: Sun, 2 Jul 2017 02:05:48 +0530 Subject: arm: add softfp support in sgemm/strmm vfp kernels --- kernel/arm/KERNEL.ARMV6 | 4 ++-- kernel/arm/KERNEL.ARMV7 | 3 +-- kernel/arm/sgemm_kernel_4x2_vfp.S | 12 ++++++++++++ kernel/arm/sgemm_kernel_4x4_vfpv3.S | 29 +++++++++++------------------ kernel/arm/strmm_kernel_4x2_vfp.S | 13 +++++++++++++ kernel/arm/strmm_kernel_4x4_vfpv3.S | 13 +++++++++++++ 6 files changed, 52 insertions(+), 22 deletions(-) diff --git a/kernel/arm/KERNEL.ARMV6 b/kernel/arm/KERNEL.ARMV6 index 022a93183..18d9869de 100644 --- a/kernel/arm/KERNEL.ARMV6 +++ b/kernel/arm/KERNEL.ARMV6 @@ -73,6 +73,7 @@ CGEMVTKERNEL = cgemv_t_vfp.S ZGEMVTKERNEL = zgemv_t_vfp.S SGEMMKERNEL = ../generic/gemmkernel_4x2.c +SGEMMKERNEL = sgemm_kernel_4x2_vfp.S ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) SGEMMINCOPY = sgemm_ncopy_4_vfp.S SGEMMITCOPY = sgemm_tcopy_4_vfp.S @@ -97,6 +98,7 @@ DGEMMONCOPYOBJ = dgemm_oncopy.o DGEMMOTCOPYOBJ = dgemm_otcopy.o STRMMKERNEL = ../generic/trmmkernel_4x2.c +STRMMKERNEL = strmm_kernel_4x2_vfp.S DTRMMKERNEL = ../generic/trmmkernel_4x2.c CGEMMONCOPY = cgemm_ncopy_2_vfp.S @@ -111,12 +113,10 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy.o ifeq ($(ARM_ABI),hard) -STRMMKERNEL = strmm_kernel_4x2_vfp.S DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S -SGEMMKERNEL = sgemm_kernel_4x2_vfp.S DGEMMKERNEL = dgemm_kernel_4x2_vfp.S CGEMMKERNEL = cgemm_kernel_2x2_vfp.S ZGEMMKERNEL = zgemm_kernel_2x2_vfp.S diff --git a/kernel/arm/KERNEL.ARMV7 b/kernel/arm/KERNEL.ARMV7 index 0872cb8cd..e2044133d 100644 --- a/kernel/arm/KERNEL.ARMV7 +++ b/kernel/arm/KERNEL.ARMV7 @@ -11,7 +11,7 @@ DGEMVNKERNEL = gemv_n_vfpv3.S STRMMKERNEL = ../generic/trmmkernel_4x4.c DTRMMKERNEL = ../generic/trmmkernel_4x4.c -SGEMMKERNEL = ../generic/gemmkernel_4x4.c +SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S SGEMMONCOPY = sgemm_ncopy_4_vfp.S SGEMMOTCOPY = sgemm_tcopy_4_vfp.S SGEMMONCOPYOBJ = sgemm_oncopy.o @@ -30,7 +30,6 @@ DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S -SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S diff --git a/kernel/arm/sgemm_kernel_4x2_vfp.S b/kernel/arm/sgemm_kernel_4x2_vfp.S index e8b44b742..1f21e5a1f 100644 --- a/kernel/arm/sgemm_kernel_4x2_vfp.S +++ b/kernel/arm/sgemm_kernel_4x2_vfp.S @@ -62,9 +62,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ALPHA [fp, #-280] +#if !defined(__ARM_PCS_VFP) +#define OLD_ALPHA_SOFTFP r3 +#define OLD_A_SOFTFP [fp, #4 ] +#define B [fp, #8 ] +#define C [fp, #12 ] +#define OLD_LDC [fp, #16 ] +#else #define B [fp, #4 ] #define C [fp, #8 ] #define OLD_LDC [fp, #12 ] +#endif #define I r0 #define J r1 @@ -416,6 +424,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add fp, sp, #24 sub sp, sp, #STACKSIZE // reserve stack +#if !defined(__ARM_PCS_VFP) + vmov OLD_ALPHA, OLD_ALPHA_SOFTFP + ldr OLD_A, OLD_A_SOFTFP +#endif str OLD_M, M str OLD_N, N str OLD_K, K diff --git a/kernel/arm/sgemm_kernel_4x4_vfpv3.S b/kernel/arm/sgemm_kernel_4x4_vfpv3.S index 86198ac90..6491d3571 100644 --- a/kernel/arm/sgemm_kernel_4x4_vfpv3.S +++ b/kernel/arm/sgemm_kernel_4x4_vfpv3.S @@ -58,14 +58,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define OLD_M r0 #define OLD_N r1 #define OLD_K r2 - -#ifdef ARM_SOFTFP_ABI -#define OLD_ALPHA r3 -//#define OLD_A -#else //hard #define OLD_A r3 #define OLD_ALPHA s0 -#endif /****************************************************** * [fp, #-128] - [fp, #-64] is reserved @@ -77,10 +71,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define M [fp, #-256 ] #define N [fp, #-260 ] #define K [fp, #-264 ] - -#ifndef ARM_SOFTFP_ABI #define A [fp, #-268 ] -#endif #define FP_ZERO [fp, #-240] #define FP_ZERO_0 [fp, #-240] @@ -88,17 +79,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ALPHA [fp, #-280] -#ifdef ARM_SOFTFP_ABI -#define A [fp, #4 ] +#if !defined(__ARM_PCS_VFP) +#define OLD_ALPHA_SOFTFP r3 +#define OLD_A_SOFTFP [fp, #4 ] #define B [fp, #8 ] #define C [fp, #12 ] #define OLD_LDC [fp, #16 ] -#else //hard +#else #define B [fp, #4 ] #define C [fp, #8 ] #define OLD_LDC [fp, #12 ] #endif - + #define I r0 #define J r1 #define L r2 @@ -867,16 +859,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add fp, sp, #24 sub sp, sp, #STACKSIZE // reserve stack +#if !defined(__ARM_PCS_VFP) + vmov OLD_ALPHA, OLD_ALPHA_SOFTFP + ldr OLD_A, OLD_A_SOFTFP +#endif + str OLD_M, M str OLD_N, N str OLD_K, K - -#ifdef ARM_SOFTFP_ABI - str OLD_ALPHA, ALPHA -#else //hard str OLD_A, A vstr OLD_ALPHA, ALPHA -#endif + sub r3, fp, #128 vstm r3, { s8 - s31} // store floating point registers diff --git a/kernel/arm/strmm_kernel_4x2_vfp.S b/kernel/arm/strmm_kernel_4x2_vfp.S index 8f97644ec..635b1dd13 100644 --- a/kernel/arm/strmm_kernel_4x2_vfp.S +++ b/kernel/arm/strmm_kernel_4x2_vfp.S @@ -65,10 +65,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ALPHA [fp, #-276 ] +#if !defined(__ARM_PCS_VFP) +#define OLD_ALPHA_SOFTFP r3 +#define OLD_A_SOFTFP [fp, #4 ] +#define B [fp, #8 ] +#define OLD_C [fp, #12 ] +#define OLD_LDC [fp, #16 ] +#define OFFSET [fp, #20 ] +#else #define B [fp, #4 ] #define OLD_C [fp, #8 ] #define OLD_LDC [fp, #12 ] #define OFFSET [fp, #16 ] +#endif #define I r0 #define J r1 @@ -395,6 +404,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add fp, sp, #24 sub sp, sp, #STACKSIZE // reserve stack +#if !defined(__ARM_PCS_VFP) + vmov OLD_ALPHA, OLD_ALPHA_SOFTFP + ldr OLD_A, OLD_A_SOFTFP +#endif str OLD_M, M str OLD_N, N str OLD_K, K diff --git a/kernel/arm/strmm_kernel_4x4_vfpv3.S b/kernel/arm/strmm_kernel_4x4_vfpv3.S index 0dd03ac85..e24d24eba 100644 --- a/kernel/arm/strmm_kernel_4x4_vfpv3.S +++ b/kernel/arm/strmm_kernel_4x4_vfpv3.S @@ -64,10 +64,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ALPHA [fp, #-280] +#if !defined(__ARM_PCS_VFP) +#define OLD_ALPHA_SOFTFP r3 +#define OLD_A_SOFTFP [fp, #4 ] +#define B [fp, #8 ] +#define C [fp, #12 ] +#define OLD_LDC [fp, #16 ] +#define OFFSET [fp, #20 ] +#else #define B [fp, #4 ] #define C [fp, #8 ] #define OLD_LDC [fp, #12 ] #define OFFSET [fp, #16 ] +#endif #define I r0 #define J r1 @@ -782,6 +791,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add fp, sp, #24 sub sp, sp, #STACKSIZE // reserve stack +#if !defined(__ARM_PCS_VFP) + vmov OLD_ALPHA, OLD_ALPHA_SOFTFP + ldr OLD_A, OLD_A_SOFTFP +#endif str OLD_M, M str OLD_N, N str OLD_K, K -- cgit v1.2.3