diff options
author | Ashwin Sekhar T K <ashwin.sekhar@cavium.com> | 2017-06-30 23:46:02 +0530 |
---|---|---|
committer | Ashwin Sekhar T K <ashwin.sekhar@cavium.com> | 2017-06-30 23:46:02 +0530 |
commit | 54915ce343dba7e00dba21c73b9fd35bcded0de3 (patch) | |
tree | 2fa90ec4c487696e9c2cc8d4bcbaf613050a96fe | |
parent | 0150fabdb6250748bc45d18ccbb782331526c5cd (diff) | |
download | openblas-54915ce343dba7e00dba21c73b9fd35bcded0de3.tar.gz openblas-54915ce343dba7e00dba21c73b9fd35bcded0de3.tar.bz2 openblas-54915ce343dba7e00dba21c73b9fd35bcded0de3.zip |
arm: add softfp support in kernel/arm/*dot_vfp.S
-rw-r--r-- | kernel/arm/KERNEL.ARMV6 | 10 | ||||
-rw-r--r-- | kernel/arm/cdot_vfp.S | 32 | ||||
-rw-r--r-- | kernel/arm/ddot_vfp.S | 3 | ||||
-rw-r--r-- | kernel/arm/sdot_vfp.S | 13 | ||||
-rw-r--r-- | kernel/arm/zdot_vfp.S | 32 |
5 files changed, 66 insertions, 24 deletions
diff --git a/kernel/arm/KERNEL.ARMV6 b/kernel/arm/KERNEL.ARMV6 index e9fe6bedd..022547c9b 100644 --- a/kernel/arm/KERNEL.ARMV6 +++ b/kernel/arm/KERNEL.ARMV6 @@ -47,6 +47,11 @@ DROTKERNEL = rot_vfp.S CROTKERNEL = rot_vfp.S ZROTKERNEL = rot_vfp.S +SDOTKERNEL = sdot_vfp.S +DDOTKERNEL = ddot_vfp.S +CDOTKERNEL = cdot_vfp.S +ZDOTKERNEL = zdot_vfp.S + SGEMMKERNEL = ../generic/gemmkernel_4x2.c ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) SGEMMINCOPY = sgemm_ncopy_4_vfp.S @@ -86,11 +91,6 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy.o ifeq ($(ARM_ABI),hard) -SDOTKERNEL = sdot_vfp.S -DDOTKERNEL = ddot_vfp.S -CDOTKERNEL = cdot_vfp.S -ZDOTKERNEL = zdot_vfp.S - SNRM2KERNEL = nrm2_vfp.S DNRM2KERNEL = nrm2_vfp.S CNRM2KERNEL = nrm2_vfp.S diff --git a/kernel/arm/cdot_vfp.S b/kernel/arm/cdot_vfp.S index 0497b6d83..e5a6e4d35 100644 --- a/kernel/arm/cdot_vfp.S +++ b/kernel/arm/cdot_vfp.S @@ -41,8 +41,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define N r0 #define X r1 #define INC_X r2 -#define OLD_Y r3 - /****************************************************** * [fp, #-128] - [fp, #-64] is reserved @@ -50,7 +48,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * registers *******************************************************/ -#define OLD_INC_Y [fp, #4 ] +#if !defined(__ARM_PCS_VFP) +#define OLD_RETURN_ADDR r0 +#define OLD_N r1 +#define OLD_X r2 +#define OLD_INC_X r3 +#define OLD_Y [fp, #0 ] +#define OLD_INC_Y [fp, #4 ] +#define RETURN_ADDR r8 +#else +#define OLD_Y r3 +#define OLD_INC_Y [fp, #0 ] +#endif #define I r5 #define Y r6 @@ -179,7 +188,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .align 5 push {r4 - r9, fp} - add fp, sp, #24 + add fp, sp, #28 sub sp, sp, #STACKSIZE // reserve stack sub r4, fp, #128 @@ -191,8 +200,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vmov s2, s0 vmov s3, s0 +#if !defined(__ARM_PCS_VFP) + mov RETURN_ADDR, OLD_RETURN_ADDR + mov N, OLD_N + mov X, OLD_X + mov INC_X, OLD_INC_X + ldr Y, OLD_Y + ldr INC_Y, OLD_INC_Y +#else mov Y, OLD_Y ldr INC_Y, OLD_INC_Y +#endif cmp N, #0 ble cdot_kernel_L999 @@ -265,7 +283,6 @@ cdot_kernel_S10: cdot_kernel_L999: - sub r3, fp, #128 vldm r3, { s8 - s15} // restore floating point registers @@ -276,8 +293,11 @@ cdot_kernel_L999: vadd.f32 s0 , s0, s2 vsub.f32 s1 , s1, s3 #endif +#if !defined(__ARM_PCS_VFP) + vstm RETURN_ADDR, {s0 - s1} +#endif - sub sp, fp, #24 + sub sp, fp, #28 pop {r4 - r9, fp} bx lr diff --git a/kernel/arm/ddot_vfp.S b/kernel/arm/ddot_vfp.S index f28acbae3..fb294d8b4 100644 --- a/kernel/arm/ddot_vfp.S +++ b/kernel/arm/ddot_vfp.S @@ -246,6 +246,9 @@ ddot_kernel_L999: vldm r3, { d8 - d15} // restore floating point registers vadd.f64 d0 , d0, d1 // set return value +#if !defined(__ARM_PCS_VFP) + vmov r0, r1, d0 +#endif sub sp, fp, #24 pop {r4 - r9, fp} bx lr diff --git a/kernel/arm/sdot_vfp.S b/kernel/arm/sdot_vfp.S index f3abdc197..5f4f424bf 100644 --- a/kernel/arm/sdot_vfp.S +++ b/kernel/arm/sdot_vfp.S @@ -329,20 +329,19 @@ sdot_kernel_L999: vldm r3, { s8 - s15} // restore floating point registers #if defined(DSDOT) - vadd.f64 d0 , d0, d1 // set return value - -#ifdef ARM_SOFTFP_ABI - vmov r0, r1, d0 +#else + vadd.f32 s0 , s0, s1 // set return value #endif +#if !defined(__ARM_PCS_VFP) +#if defined(DSDOT) + vmov r0, r1, d0 #else - - vadd.f32 s0 , s0, s1 // set return value -#ifdef ARM_SOFTFP_ABI vmov r0, s0 #endif #endif + sub sp, fp, #24 pop {r4 - r9, fp} bx lr diff --git a/kernel/arm/zdot_vfp.S b/kernel/arm/zdot_vfp.S index 936ce9f60..43f2c0c0b 100644 --- a/kernel/arm/zdot_vfp.S +++ b/kernel/arm/zdot_vfp.S @@ -41,8 +41,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define N r0 #define X r1 #define INC_X r2 -#define OLD_Y r3 - /****************************************************** * [fp, #-128] - [fp, #-64] is reserved @@ -50,7 +48,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * registers *******************************************************/ -#define OLD_INC_Y [fp, #4 ] +#if !defined(__ARM_PCS_VFP) +#define OLD_RETURN_ADDR r0 +#define OLD_N r1 +#define OLD_X r2 +#define OLD_INC_X r3 +#define OLD_Y [fp, #0 ] +#define OLD_INC_Y [fp, #4 ] +#define RETURN_ADDR r8 +#else +#define OLD_Y r3 +#define OLD_INC_Y [fp, #0 ] +#endif #define I r5 #define Y r6 @@ -181,7 +190,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .align 5 push {r4 - r9, fp} - add fp, sp, #24 + add fp, sp, #28 sub sp, sp, #STACKSIZE // reserve stack sub r4, fp, #128 @@ -194,9 +203,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vcvt.f64.f32 d2, s0 vcvt.f64.f32 d3, s0 +#if !defined(__ARM_PCS_VFP) + mov RETURN_ADDR, OLD_RETURN_ADDR + mov N, OLD_N + mov X, OLD_X + mov INC_X, OLD_INC_X + ldr Y, OLD_Y + ldr INC_Y, OLD_INC_Y +#else mov Y, OLD_Y ldr INC_Y, OLD_INC_Y - +#endif cmp N, #0 ble zdot_kernel_L999 @@ -280,8 +297,11 @@ zdot_kernel_L999: vadd.f64 d0 , d0, d2 vsub.f64 d1 , d1, d3 #endif +#if !defined(__ARM_PCS_VFP) + vstm RETURN_ADDR, {d0 - d1} +#endif - sub sp, fp, #24 + sub sp, fp, #28 pop {r4 - r9, fp} bx lr |