From 8f83d3f961f57fb002d8c5359c32a8db50dcab5d Mon Sep 17 00:00:00 2001 From: Ashwin Sekhar T K Date: Sun, 2 Jul 2017 00:38:44 +0530 Subject: arm: add softfp support in vfp gemv kernels --- kernel/arm/KERNEL.ARMV6 | 20 ++++++++-------- kernel/arm/KERNEL.ARMV7 | 6 ++--- kernel/arm/cgemv_n_vfp.S | 28 +++++++++++++++++++---- kernel/arm/cgemv_t_vfp.S | 28 +++++++++++++++++++---- kernel/arm/gemv_n_vfp.S | 44 +++++++++++++++++++++++++++++++---- kernel/arm/gemv_n_vfpv3.S | 58 ++++++++++++++++++++++++++++------------------- kernel/arm/gemv_t_vfp.S | 54 +++++++++++++++++++++++++++---------------- kernel/arm/gemv_t_vfpv3.S | 44 +++++++++++++++++++++++++++++++---- kernel/arm/zgemv_n_vfp.S | 28 +++++++++++++++++++---- kernel/arm/zgemv_t_vfp.S | 28 +++++++++++++++++++---- 10 files changed, 252 insertions(+), 86 deletions(-) diff --git a/kernel/arm/KERNEL.ARMV6 b/kernel/arm/KERNEL.ARMV6 index 86d3dabaa..022a93183 100644 --- a/kernel/arm/KERNEL.ARMV6 +++ b/kernel/arm/KERNEL.ARMV6 @@ -62,6 +62,16 @@ DSWAPKERNEL = swap_vfp.S CSWAPKERNEL = swap_vfp.S ZSWAPKERNEL = swap_vfp.S +SGEMVNKERNEL = gemv_n_vfp.S +DGEMVNKERNEL = gemv_n_vfp.S +CGEMVNKERNEL = cgemv_n_vfp.S +ZGEMVNKERNEL = zgemv_n_vfp.S + +SGEMVTKERNEL = gemv_t_vfp.S +DGEMVTKERNEL = gemv_t_vfp.S +CGEMVTKERNEL = cgemv_t_vfp.S +ZGEMVTKERNEL = zgemv_t_vfp.S + SGEMMKERNEL = ../generic/gemmkernel_4x2.c ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) SGEMMINCOPY = sgemm_ncopy_4_vfp.S @@ -101,16 +111,6 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy.o ifeq ($(ARM_ABI),hard) -SGEMVNKERNEL = gemv_n_vfp.S -DGEMVNKERNEL = gemv_n_vfp.S -CGEMVNKERNEL = cgemv_n_vfp.S -ZGEMVNKERNEL = zgemv_n_vfp.S - -SGEMVTKERNEL = gemv_t_vfp.S -DGEMVTKERNEL = gemv_t_vfp.S -CGEMVTKERNEL = cgemv_t_vfp.S -ZGEMVTKERNEL = zgemv_t_vfp.S - STRMMKERNEL = strmm_kernel_4x2_vfp.S DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S diff --git a/kernel/arm/KERNEL.ARMV7 b/kernel/arm/KERNEL.ARMV7 index f4823b70a..0872cb8cd 100644 --- a/kernel/arm/KERNEL.ARMV7 +++ b/kernel/arm/KERNEL.ARMV7 @@ -5,6 +5,9 @@ DNRM2KERNEL = nrm2_vfpv3.S CNRM2KERNEL = nrm2_vfpv3.S ZNRM2KERNEL = nrm2_vfpv3.S +SGEMVNKERNEL = gemv_n_vfpv3.S +DGEMVNKERNEL = gemv_n_vfpv3.S + STRMMKERNEL = ../generic/trmmkernel_4x4.c DTRMMKERNEL = ../generic/trmmkernel_4x4.c @@ -22,9 +25,6 @@ DGEMMOTCOPYOBJ = dgemm_otcopy.o ifeq ($(ARM_ABI),hard) -SGEMVNKERNEL = gemv_n_vfpv3.S -DGEMVNKERNEL = gemv_n_vfpv3.S - STRMMKERNEL = strmm_kernel_4x4_vfpv3.S DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S diff --git a/kernel/arm/cgemv_n_vfp.S b/kernel/arm/cgemv_n_vfp.S index 5d2748644..4a1cd2d45 100644 --- a/kernel/arm/cgemv_n_vfp.S +++ b/kernel/arm/cgemv_n_vfp.S @@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define STACKSIZE 256 -#define OLD_LDA [fp, #0 ] -#define X [fp, #4 ] -#define OLD_INC_X [fp, #8 ] -#define Y [fp, #12 ] -#define OLD_INC_Y [fp, #16 ] +#if !defined(__ARM_PCS_VFP) +#define OLD_ALPHAR r3 +#define OLD_ALPHAI [fp, #0 ] +#define OLD_A_SOFTFP [fp, #4 ] +#define OLD_LDA [fp, #8 ] +#define X [fp, #12 ] +#define OLD_INC_X [fp, #16 ] +#define Y [fp, #20 ] +#define OLD_INC_Y [fp, #24 ] +#else +#define OLD_LDA [fp, #0 ] +#define X [fp, #4 ] +#define OLD_INC_X [fp, #8 ] +#define Y [fp, #12 ] +#define OLD_INC_Y [fp, #16 ] +#endif + #define OLD_A r3 #define OLD_M r0 @@ -462,6 +474,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmp N, #0 ble cgemvn_kernel_L999 +#if !defined(__ARM_PCS_VFP) + vmov s0, OLD_ALPHAR + vldr s1, OLD_ALPHAI + ldr OLD_A, OLD_A_SOFTFP +#endif + str OLD_A, A str OLD_M, M vstr s0 , ALPHA_R diff --git a/kernel/arm/cgemv_t_vfp.S b/kernel/arm/cgemv_t_vfp.S index 76c8a8f18..e1c750c85 100644 --- a/kernel/arm/cgemv_t_vfp.S +++ b/kernel/arm/cgemv_t_vfp.S @@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define STACKSIZE 256 -#define OLD_LDA [fp, #0 ] -#define X [fp, #4 ] -#define OLD_INC_X [fp, #8 ] -#define Y [fp, #12 ] -#define OLD_INC_Y [fp, #16 ] +#if !defined(__ARM_PCS_VFP) +#define OLD_ALPHAR r3 +#define OLD_ALPHAI [fp, #0 ] +#define OLD_A_SOFTFP [fp, #4 ] +#define OLD_LDA [fp, #8 ] +#define X [fp, #12 ] +#define OLD_INC_X [fp, #16 ] +#define Y [fp, #20 ] +#define OLD_INC_Y [fp, #24 ] +#else +#define OLD_LDA [fp, #0 ] +#define X [fp, #4 ] +#define OLD_INC_X [fp, #8 ] +#define Y [fp, #12 ] +#define OLD_INC_Y [fp, #16 ] +#endif + #define OLD_A r3 #define OLD_N r1 @@ -359,6 +371,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmp OLD_N, #0 ble cgemvt_kernel_L999 +#if !defined(__ARM_PCS_VFP) + vmov s0, OLD_ALPHAR + vldr s1, OLD_ALPHAI + ldr OLD_A, OLD_A_SOFTFP +#endif + str OLD_A, A str OLD_N, N diff --git a/kernel/arm/gemv_n_vfp.S b/kernel/arm/gemv_n_vfp.S index 385370b7f..7c154d741 100644 --- a/kernel/arm/gemv_n_vfp.S +++ b/kernel/arm/gemv_n_vfp.S @@ -38,11 +38,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define STACKSIZE 256 -#define OLD_LDA [fp, #0 ] -#define X [fp, #4 ] -#define OLD_INC_X [fp, #8 ] -#define Y [fp, #12 ] -#define OLD_INC_Y [fp, #16 ] +#if !defined(__ARM_PCS_VFP) + +#if !defined(DOUBLE) +#define OLD_ALPHA r3 +#define OLD_A_SOFTFP [fp, #0 ] +#define OLD_LDA [fp, #4 ] +#define X [fp, #8 ] +#define OLD_INC_X [fp, #12 ] +#define Y [fp, #16 ] +#define OLD_INC_Y [fp, #20 ] +#else +#define OLD_ALPHA [fp, #0 ] +#define OLD_A_SOFTFP [fp, #8 ] +#define OLD_LDA [fp, #12] +#define X [fp, #16] +#define OLD_INC_X [fp, #20] +#define Y [fp, #24] +#define OLD_INC_Y [fp, #28] +#endif + +#else + +#define OLD_LDA [fp, #0 ] +#define X [fp, #4 ] +#define OLD_INC_X [fp, #8 ] +#define Y [fp, #12 ] +#define OLD_INC_Y [fp, #16 ] + +#endif + #define OLD_A r3 #define OLD_M r0 @@ -508,6 +533,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmp N, #0 ble gemvn_kernel_L999 +#if !defined(__ARM_PCS_VFP) +#if !defined(DOUBLE) + vmov s0, OLD_ALPHA +#else + vldr d0, OLD_ALPHA +#endif + ldr OLD_A, OLD_A_SOFTFP +#endif + str OLD_A, A str OLD_M, M diff --git a/kernel/arm/gemv_n_vfpv3.S b/kernel/arm/gemv_n_vfpv3.S index 93bf23e49..54f958b7b 100644 --- a/kernel/arm/gemv_n_vfpv3.S +++ b/kernel/arm/gemv_n_vfpv3.S @@ -38,25 +38,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define STACKSIZE 256 -#ifndef ARM_SOFTFP_ABI -//hard -#define OLD_LDA [fp, #0 ] -#define X [fp, #4 ] -#define OLD_INC_X [fp, #8 ] -#define Y [fp, #12 ] -#define OLD_INC_Y [fp, #16 ] -#define OLD_A r3 -#else -#define OLD_A_SOFTFP [fp, #0 ] -#define OLD_LDA [fp, #4 ] -#define X [fp, #8 ] -#define OLD_INC_X [fp, #12 ] -#define Y [fp, #16 ] -#define OLD_INC_Y [fp, #20 ] +#if !defined(__ARM_PCS_VFP) + +#if !defined(DOUBLE) #define OLD_ALPHA r3 -#define OLD_A r3 +#define OLD_A_SOFTFP [fp, #0 ] +#define OLD_LDA [fp, #4 ] +#define X [fp, #8 ] +#define OLD_INC_X [fp, #12 ] +#define Y [fp, #16 ] +#define OLD_INC_Y [fp, #20 ] +#else +#define OLD_ALPHA [fp, #0 ] +#define OLD_A_SOFTFP [fp, #8 ] +#define OLD_LDA [fp, #12] +#define X [fp, #16] +#define OLD_INC_X [fp, #20] +#define Y [fp, #24] +#define OLD_INC_Y [fp, #28] +#endif + +#else + +#define OLD_LDA [fp, #0 ] +#define X [fp, #4 ] +#define OLD_INC_X [fp, #8 ] +#define Y [fp, #12 ] +#define OLD_INC_Y [fp, #16 ] + #endif +#define OLD_A r3 #define OLD_M r0 #define AO1 r0 @@ -565,18 +577,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmp N, #0 ble gemvn_kernel_L999 -#ifndef DOUBLE -#ifdef ARM_SOFTFP_ABI - - vmov s0, OLD_ALPHA - ldr OLD_A, OLD_A_SOFTFP +#if !defined(__ARM_PCS_VFP) +#if !defined(DOUBLE) + vmov s0, OLD_ALPHA +#else + vldr d0, OLD_ALPHA #endif + ldr OLD_A, OLD_A_SOFTFP #endif str OLD_A, A str OLD_M, M - - + ldr INC_X , OLD_INC_X ldr INC_Y , OLD_INC_Y diff --git a/kernel/arm/gemv_t_vfp.S b/kernel/arm/gemv_t_vfp.S index 816be54ff..9559d1829 100644 --- a/kernel/arm/gemv_t_vfp.S +++ b/kernel/arm/gemv_t_vfp.S @@ -38,25 +38,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define STACKSIZE 256 -#ifndef ARM_SOFTFP_ABI -//hard abi -#define OLD_LDA [fp, #0 ] -#define X [fp, #4 ] -#define OLD_INC_X [fp, #8 ] -#define Y [fp, #12 ] -#define OLD_INC_Y [fp, #16 ] -#define OLD_A r3 -#else -#define OLD_A_SOFTFP [fp, #0 ] -#define OLD_LDA [fp, #4 ] -#define X [fp, #8 ] -#define OLD_INC_X [fp, #12 ] -#define Y [fp, #16 ] -#define OLD_INC_Y [fp, #20 ] +#if !defined(__ARM_PCS_VFP) + +#if !defined(DOUBLE) #define OLD_ALPHA r3 -#define OLD_A r3 +#define OLD_A_SOFTFP [fp, #0 ] +#define OLD_LDA [fp, #4 ] +#define X [fp, #8 ] +#define OLD_INC_X [fp, #12 ] +#define Y [fp, #16 ] +#define OLD_INC_Y [fp, #20 ] +#else +#define OLD_ALPHA [fp, #0 ] +#define OLD_A_SOFTFP [fp, #8 ] +#define OLD_LDA [fp, #12] +#define X [fp, #16] +#define OLD_INC_X [fp, #20] +#define Y [fp, #24] +#define OLD_INC_Y [fp, #28] #endif +#else + +#define OLD_LDA [fp, #0 ] +#define X [fp, #4 ] +#define OLD_INC_X [fp, #8 ] +#define Y [fp, #12 ] +#define OLD_INC_Y [fp, #16 ] + +#endif + +#define OLD_A r3 #define OLD_N r1 #define M r0 @@ -518,11 +530,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmp OLD_N, #0 ble gemvt_kernel_L999 -#ifndef DOUBLE -#ifdef ARM_SOFTFP_ABI - vmov s0, OLD_ALPHA - ldr OLD_A, OLD_A_SOFTFP +#if !defined(__ARM_PCS_VFP) +#if !defined(DOUBLE) + vmov s0, OLD_ALPHA +#else + vldr d0, OLD_ALPHA #endif + ldr OLD_A, OLD_A_SOFTFP #endif str OLD_A, A diff --git a/kernel/arm/gemv_t_vfpv3.S b/kernel/arm/gemv_t_vfpv3.S index 7ae5799bc..b1d3dadf1 100644 --- a/kernel/arm/gemv_t_vfpv3.S +++ b/kernel/arm/gemv_t_vfpv3.S @@ -38,11 +38,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define STACKSIZE 256 -#define OLD_LDA [fp, #0 ] -#define X [fp, #4 ] -#define OLD_INC_X [fp, #8 ] -#define Y [fp, #12 ] -#define OLD_INC_Y [fp, #16 ] +#if !defined(__ARM_PCS_VFP) + +#if !defined(DOUBLE) +#define OLD_ALPHA r3 +#define OLD_A_SOFTFP [fp, #0 ] +#define OLD_LDA [fp, #4 ] +#define X [fp, #8 ] +#define OLD_INC_X [fp, #12 ] +#define Y [fp, #16 ] +#define OLD_INC_Y [fp, #20 ] +#else +#define OLD_ALPHA [fp, #0 ] +#define OLD_A_SOFTFP [fp, #8 ] +#define OLD_LDA [fp, #12] +#define X [fp, #16] +#define OLD_INC_X [fp, #20] +#define Y [fp, #24] +#define OLD_INC_Y [fp, #28] +#endif + +#else + +#define OLD_LDA [fp, #0 ] +#define X [fp, #4 ] +#define OLD_INC_X [fp, #8 ] +#define Y [fp, #12 ] +#define OLD_INC_Y [fp, #16 ] + +#endif + #define OLD_A r3 #define OLD_N r1 @@ -476,6 +501,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmp OLD_N, #0 ble gemvt_kernel_L999 +#if !defined(__ARM_PCS_VFP) +#if !defined(DOUBLE) + vmov s0, OLD_ALPHA +#else + vldr d0, OLD_ALPHA +#endif + ldr OLD_A, OLD_A_SOFTFP +#endif + str OLD_A, A str OLD_N, N diff --git a/kernel/arm/zgemv_n_vfp.S b/kernel/arm/zgemv_n_vfp.S index da9a91043..7d5567849 100644 --- a/kernel/arm/zgemv_n_vfp.S +++ b/kernel/arm/zgemv_n_vfp.S @@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define STACKSIZE 256 -#define OLD_LDA [fp, #0 ] -#define X [fp, #4 ] -#define OLD_INC_X [fp, #8 ] -#define Y [fp, #12 ] -#define OLD_INC_Y [fp, #16 ] +#if !defined(__ARM_PCS_VFP) +#define OLD_ALPHAR [fp, #0 ] +#define OLD_ALPHAI [fp, #8 ] +#define OLD_A_SOFTFP [fp, #16] +#define OLD_LDA [fp, #20] +#define X [fp, #24] +#define OLD_INC_X [fp, #28] +#define Y [fp, #32] +#define OLD_INC_Y [fp, #36] +#else +#define OLD_LDA [fp, #0 ] +#define X [fp, #4 ] +#define OLD_INC_X [fp, #8 ] +#define Y [fp, #12 ] +#define OLD_INC_Y [fp, #16 ] +#endif + #define OLD_A r3 #define OLD_M r0 @@ -465,6 +477,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmp N, #0 ble zgemvn_kernel_L999 +#if !defined(__ARM_PCS_VFP) + vldr d0, OLD_ALPHAR + vldr d1, OLD_ALPHAI + ldr OLD_A, OLD_A_SOFTFP +#endif + str OLD_A, A str OLD_M, M vstr d0 , ALPHA_R diff --git a/kernel/arm/zgemv_t_vfp.S b/kernel/arm/zgemv_t_vfp.S index 211fa0701..407026166 100644 --- a/kernel/arm/zgemv_t_vfp.S +++ b/kernel/arm/zgemv_t_vfp.S @@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define STACKSIZE 256 -#define OLD_LDA [fp, #0 ] -#define X [fp, #4 ] -#define OLD_INC_X [fp, #8 ] -#define Y [fp, #12 ] -#define OLD_INC_Y [fp, #16 ] +#if !defined(__ARM_PCS_VFP) +#define OLD_ALPHAR [fp, #0 ] +#define OLD_ALPHAI [fp, #8 ] +#define OLD_A_SOFTFP [fp, #16] +#define OLD_LDA [fp, #20] +#define X [fp, #24] +#define OLD_INC_X [fp, #28] +#define Y [fp, #32] +#define OLD_INC_Y [fp, #36] +#else +#define OLD_LDA [fp, #0 ] +#define X [fp, #4 ] +#define OLD_INC_X [fp, #8 ] +#define Y [fp, #12 ] +#define OLD_INC_Y [fp, #16 ] +#endif + #define OLD_A r3 #define OLD_N r1 @@ -360,6 +372,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmp OLD_N, #0 ble zgemvt_kernel_L999 +#if !defined(__ARM_PCS_VFP) + vldr d0, OLD_ALPHAR + vldr d1, OLD_ALPHAI + ldr OLD_A, OLD_A_SOFTFP +#endif + str OLD_A, A str OLD_N, N -- cgit v1.2.3