summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhang Xianyi <traits.zhang@gmail.com>2017-07-10 20:03:57 +0800
committerGitHub <noreply@github.com>2017-07-10 20:03:57 +0800
commita590e6135caf3fe79cbd5bbf7129be19ea47cd8b (patch)
treef63b6abe50ee35b7813497580d6e8a88c4be2744
parent3db2adf87225bd1720a62c05a9b9296dbe8dace0 (diff)
parent4239dd65cec8bb0a9ac44db62750d4760bb64780 (diff)
downloadopenblas-a590e6135caf3fe79cbd5bbf7129be19ea47cd8b.tar.gz
openblas-a590e6135caf3fe79cbd5bbf7129be19ea47cd8b.tar.bz2
openblas-a590e6135caf3fe79cbd5bbf7129be19ea47cd8b.zip
Merge pull request #1221 from ashwinyes/develop_arm_softfp
arm: add support for softfp in arm vfp assembly files
-rw-r--r--Makefile.arm23
-rw-r--r--Makefile.system12
-rw-r--r--common_arm.h5
-rw-r--r--kernel/arm/KERNEL.ARMV663
-rw-r--r--kernel/arm/KERNEL.ARMV7114
-rw-r--r--kernel/arm/asum_vfp.S8
-rw-r--r--kernel/arm/axpy_vfp.S71
-rw-r--r--kernel/arm/cdot_vfp.S32
-rw-r--r--kernel/arm/cgemm_kernel_2x2_vfp.S14
-rw-r--r--kernel/arm/cgemm_kernel_2x2_vfpv3.S14
-rw-r--r--kernel/arm/cgemv_n_vfp.S28
-rw-r--r--kernel/arm/cgemv_t_vfp.S28
-rw-r--r--kernel/arm/ctrmm_kernel_2x2_vfp.S15
-rw-r--r--kernel/arm/ctrmm_kernel_2x2_vfpv3.S15
-rw-r--r--kernel/arm/ddot_vfp.S3
-rw-r--r--kernel/arm/dgemm_kernel_4x2_vfp.S13
-rw-r--r--kernel/arm/dgemm_kernel_4x4_vfpv3.S12
-rw-r--r--kernel/arm/dtrmm_kernel_4x2_vfp.S13
-rw-r--r--kernel/arm/dtrmm_kernel_4x4_vfpv3.S13
-rw-r--r--kernel/arm/gemv_n_vfp.S44
-rw-r--r--kernel/arm/gemv_n_vfpv3.S58
-rw-r--r--kernel/arm/gemv_t_vfp.S54
-rw-r--r--kernel/arm/gemv_t_vfpv3.S44
-rw-r--r--kernel/arm/nrm2_vfp.S7
-rw-r--r--kernel/arm/nrm2_vfpv3.S9
-rw-r--r--kernel/arm/rot_vfp.S19
-rw-r--r--kernel/arm/sdot_vfp.S13
-rw-r--r--kernel/arm/sgemm_kernel_4x2_vfp.S12
-rw-r--r--kernel/arm/sgemm_kernel_4x4_vfpv3.S29
-rw-r--r--kernel/arm/strmm_kernel_4x2_vfp.S13
-rw-r--r--kernel/arm/strmm_kernel_4x4_vfpv3.S13
-rw-r--r--kernel/arm/swap_vfp.S37
-rw-r--r--kernel/arm/zdot_vfp.S32
-rw-r--r--kernel/arm/zgemm_kernel_2x2_vfp.S14
-rw-r--r--kernel/arm/zgemm_kernel_2x2_vfpv3.S14
-rw-r--r--kernel/arm/zgemv_n_vfp.S28
-rw-r--r--kernel/arm/zgemv_t_vfp.S28
-rw-r--r--kernel/arm/ztrmm_kernel_2x2_vfp.S15
-rw-r--r--kernel/arm/ztrmm_kernel_2x2_vfpv3.S15
39 files changed, 683 insertions, 311 deletions
diff --git a/Makefile.arm b/Makefile.arm
index c189b0c47..eedd39b73 100644
--- a/Makefile.arm
+++ b/Makefile.arm
@@ -1,5 +1,4 @@
-#ifeq logical or
-ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
+ifeq ($(CORE), $(filter $(CORE),ARMV7 CORTEXA9 CORTEXA15))
ifeq ($(OSNAME), Android)
CCOMMON_OPT += -mfpu=neon -march=armv7-a
FCOMMON_OPT += -mfpu=neon -march=armv7-a
@@ -9,28 +8,12 @@ FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
endif
endif
-ifeq ($(CORE), ARMV7)
-ifeq ($(OSNAME), Android)
-ifeq ($(ARM_SOFTFP_ABI), 1)
-CCOMMON_OPT += -mfpu=neon -march=armv7-a
-FCOMMON_OPT += -mfpu=neon -march=armv7-a
-else
-CCOMMON_OPT += -mfpu=neon -march=armv7-a -Wl,--no-warn-mismatch
-FCOMMON_OPT += -mfpu=neon -march=armv7-a -Wl,--no-warn-mismatch
-endif
-else
-CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
-FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
-endif
-endif
-
ifeq ($(CORE), ARMV6)
CCOMMON_OPT += -mfpu=vfp -march=armv6
FCOMMON_OPT += -mfpu=vfp -march=armv6
endif
-
ifeq ($(CORE), ARMV5)
-CCOMMON_OPT += -marm -march=armv5
-FCOMMON_OPT += -marm -march=armv5
+CCOMMON_OPT += -march=armv5
+FCOMMON_OPT += -march=armv5
endif
diff --git a/Makefile.system b/Makefile.system
index bb55dd693..c4cf619d0 100644
--- a/Makefile.system
+++ b/Makefile.system
@@ -242,6 +242,10 @@ EXTRALIB += -lm
NO_EXPRECISION = 1
endif
+ifeq ($(OSNAME), Android)
+EXTRALIB += -lm
+endif
+
ifeq ($(OSNAME), AIX)
EXTRALIB += -lm
endif
@@ -486,12 +490,10 @@ BINARY_DEFINED = 1
CCOMMON_OPT += -marm
FCOMMON_OPT += -marm
+# If softfp abi is mentioned on the command line, force it.
ifeq ($(ARM_SOFTFP_ABI), 1)
-CCOMMON_OPT += -mfloat-abi=softfp -DARM_SOFTFP_ABI
-FCOMMON_OPT += -mfloat-abi=softfp -DARM_SOFTFP_ABI
-else
-CCOMMON_OPT += -mfloat-abi=hard
-FCOMMON_OPT += -mfloat-abi=hard
+CCOMMON_OPT += -mfloat-abi=softfp
+FCOMMON_OPT += -mfloat-abi=softfp
endif
ifeq ($(OSNAME), Android)
diff --git a/common_arm.h b/common_arm.h
index a17acb448..27fa76b76 100644
--- a/common_arm.h
+++ b/common_arm.h
@@ -111,11 +111,6 @@ REALNAME:
#define PROFCODE
-#ifdef __ARM_PCS
-//-mfloat-abi=softfp
-#define SOFT_FLOAT_ABI
-#endif
-
#endif
diff --git a/kernel/arm/KERNEL.ARMV6 b/kernel/arm/KERNEL.ARMV6
index 16bde105b..960dae67b 100644
--- a/kernel/arm/KERNEL.ARMV6
+++ b/kernel/arm/KERNEL.ARMV6
@@ -1,7 +1,5 @@
+include $(KERNELDIR)/KERNEL.ARMV5
-
-
-###############################################################################
SAMAXKERNEL = iamax_vfp.S
DAMAXKERNEL = iamax_vfp.S
CAMAXKERNEL = iamax_vfp.S
@@ -44,10 +42,10 @@ DAXPYKERNEL = axpy_vfp.S
CAXPYKERNEL = axpy_vfp.S
ZAXPYKERNEL = axpy_vfp.S
-SCOPYKERNEL = copy.c
-DCOPYKERNEL = copy.c
-CCOPYKERNEL = zcopy.c
-ZCOPYKERNEL = zcopy.c
+SROTKERNEL = rot_vfp.S
+DROTKERNEL = rot_vfp.S
+CROTKERNEL = rot_vfp.S
+ZROTKERNEL = rot_vfp.S
SDOTKERNEL = sdot_vfp.S
DDOTKERNEL = ddot_vfp.S
@@ -59,16 +57,6 @@ DNRM2KERNEL = nrm2_vfp.S
CNRM2KERNEL = nrm2_vfp.S
ZNRM2KERNEL = nrm2_vfp.S
-SROTKERNEL = rot_vfp.S
-DROTKERNEL = rot_vfp.S
-CROTKERNEL = rot_vfp.S
-ZROTKERNEL = rot_vfp.S
-
-SSCALKERNEL = scal.c
-DSCALKERNEL = scal.c
-CSCALKERNEL = zscal.c
-ZSCALKERNEL = zscal.c
-
SSWAPKERNEL = swap_vfp.S
DSWAPKERNEL = swap_vfp.S
CSWAPKERNEL = swap_vfp.S
@@ -84,26 +72,25 @@ DGEMVTKERNEL = gemv_t_vfp.S
CGEMVTKERNEL = cgemv_t_vfp.S
ZGEMVTKERNEL = zgemv_t_vfp.S
-STRMMKERNEL = strmm_kernel_4x2_vfp.S
-DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
-CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
-ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S
-
SGEMMKERNEL = sgemm_kernel_4x2_vfp.S
+ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
SGEMMINCOPY = sgemm_ncopy_4_vfp.S
SGEMMITCOPY = sgemm_tcopy_4_vfp.S
SGEMMINCOPYOBJ = sgemm_incopy.o
SGEMMITCOPYOBJ = sgemm_itcopy.o
+endif
SGEMMONCOPY = sgemm_ncopy_2_vfp.S
-SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
-SGEMMONCOPYOBJ = sgemm_oncopy.o
-SGEMMOTCOPYOBJ = sgemm_otcopy.o
+SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
+SGEMMONCOPYOBJ = sgemm_oncopy.o
+SGEMMOTCOPYOBJ = sgemm_otcopy.o
DGEMMKERNEL = dgemm_kernel_4x2_vfp.S
+ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
DGEMMINCOPY = dgemm_ncopy_4_vfp.S
DGEMMITCOPY = dgemm_tcopy_4_vfp.S
DGEMMINCOPYOBJ = dgemm_incopy.o
DGEMMITCOPYOBJ = dgemm_itcopy.o
+endif
DGEMMONCOPY = dgemm_ncopy_2_vfp.S
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
@@ -121,26 +108,8 @@ ZGEMMOTCOPY = zgemm_tcopy_2_vfp.S
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
-STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
-STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
-STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
-STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
-
-DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
-DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
-DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
-DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
-
-CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
-CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
-CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
-CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
-
-ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
-ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
-ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
-ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
-
-
-
+STRMMKERNEL = strmm_kernel_4x2_vfp.S
+DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
+CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
+ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S
diff --git a/kernel/arm/KERNEL.ARMV7 b/kernel/arm/KERNEL.ARMV7
index d5cd94fbd..5e0b4cfb8 100644
--- a/kernel/arm/KERNEL.ARMV7
+++ b/kernel/arm/KERNEL.ARMV7
@@ -1,91 +1,12 @@
-
-#################################################################################
-SAMAXKERNEL = iamax_vfp.S
-DAMAXKERNEL = iamax_vfp.S
-CAMAXKERNEL = iamax_vfp.S
-ZAMAXKERNEL = iamax_vfp.S
-
-SAMINKERNEL = iamax_vfp.S
-DAMINKERNEL = iamax_vfp.S
-CAMINKERNEL = iamax_vfp.S
-ZAMINKERNEL = iamax_vfp.S
-
-SMAXKERNEL = iamax_vfp.S
-DMAXKERNEL = iamax_vfp.S
-
-SMINKERNEL = iamax_vfp.S
-DMINKERNEL = iamax_vfp.S
-
-ISAMAXKERNEL = iamax_vfp.S
-IDAMAXKERNEL = iamax_vfp.S
-ICAMAXKERNEL = iamax_vfp.S
-IZAMAXKERNEL = iamax_vfp.S
-
-ISAMINKERNEL = iamax_vfp.S
-IDAMINKERNEL = iamax_vfp.S
-ICAMINKERNEL = iamax_vfp.S
-IZAMINKERNEL = iamax_vfp.S
-
-ISMAXKERNEL = iamax_vfp.S
-IDMAXKERNEL = iamax_vfp.S
-
-ISMINKERNEL = iamax_vfp.S
-IDMINKERNEL = iamax_vfp.S
-
-SSWAPKERNEL = swap_vfp.S
-DSWAPKERNEL = swap_vfp.S
-CSWAPKERNEL = swap_vfp.S
-ZSWAPKERNEL = swap_vfp.S
-
-SASUMKERNEL = asum_vfp.S
-DASUMKERNEL = asum_vfp.S
-CASUMKERNEL = asum_vfp.S
-ZASUMKERNEL = asum_vfp.S
-
-SAXPYKERNEL = axpy_vfp.S
-DAXPYKERNEL = axpy_vfp.S
-CAXPYKERNEL = axpy_vfp.S
-ZAXPYKERNEL = axpy_vfp.S
-
-SCOPYKERNEL = copy.c
-DCOPYKERNEL = copy.c
-CCOPYKERNEL = zcopy.c
-ZCOPYKERNEL = zcopy.c
-
-SDOTKERNEL = sdot_vfp.S
-DDOTKERNEL = ddot_vfp.S
-CDOTKERNEL = cdot_vfp.S
-ZDOTKERNEL = zdot_vfp.S
+include $(KERNELDIR)/KERNEL.ARMV6
SNRM2KERNEL = nrm2_vfpv3.S
DNRM2KERNEL = nrm2_vfpv3.S
CNRM2KERNEL = nrm2_vfpv3.S
ZNRM2KERNEL = nrm2_vfpv3.S
-SROTKERNEL = rot_vfp.S
-DROTKERNEL = rot_vfp.S
-CROTKERNEL = rot_vfp.S
-ZROTKERNEL = rot_vfp.S
-
-SSCALKERNEL = scal.c
-DSCALKERNEL = scal.c
-CSCALKERNEL = zscal.c
-ZSCALKERNEL = zscal.c
-
SGEMVNKERNEL = gemv_n_vfpv3.S
DGEMVNKERNEL = gemv_n_vfpv3.S
-CGEMVNKERNEL = cgemv_n_vfp.S
-ZGEMVNKERNEL = zgemv_n_vfp.S
-
-SGEMVTKERNEL = gemv_t_vfp.S
-DGEMVTKERNEL = gemv_t_vfp.S
-CGEMVTKERNEL = cgemv_t_vfp.S
-ZGEMVTKERNEL = zgemv_t_vfp.S
-
-STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
-DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
-CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
-ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S
SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S
SGEMMONCOPY = sgemm_ncopy_4_vfp.S
@@ -100,35 +21,10 @@ DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S
-CGEMMONCOPY = cgemm_ncopy_2_vfp.S
-CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
-CGEMMONCOPYOBJ = cgemm_oncopy.o
-CGEMMOTCOPYOBJ = cgemm_otcopy.o
-
ZGEMMKERNEL = zgemm_kernel_2x2_vfpv3.S
-ZGEMMONCOPY = zgemm_ncopy_2_vfp.S
-ZGEMMOTCOPY = zgemm_tcopy_2_vfp.S
-ZGEMMONCOPYOBJ = zgemm_oncopy.o
-ZGEMMOTCOPYOBJ = zgemm_otcopy.o
-
-STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
-STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
-STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
-STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
-
-DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
-DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
-DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
-DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
-
-CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
-CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
-CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
-CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
-
-ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
-ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
-ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
-ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
+STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
+DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
+CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
+ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S
diff --git a/kernel/arm/asum_vfp.S b/kernel/arm/asum_vfp.S
index fe6242a5b..5b08e5028 100644
--- a/kernel/arm/asum_vfp.S
+++ b/kernel/arm/asum_vfp.S
@@ -475,6 +475,14 @@ asum_kernel_L999:
vadd.f32 s0 , s0, s1 // set return value
#endif
+#if !defined(__ARM_PCS_VFP)
+#if !defined(DOUBLE)
+ vmov r0, s0
+#else
+ vmov r0, r1, d0
+#endif
+#endif
+
bx lr
EPILOGUE
diff --git a/kernel/arm/axpy_vfp.S b/kernel/arm/axpy_vfp.S
index 8e5334f62..a407b04bd 100644
--- a/kernel/arm/axpy_vfp.S
+++ b/kernel/arm/axpy_vfp.S
@@ -38,18 +38,52 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define STACKSIZE 256
-#ifndef ARM_SOFTFP_ABI
-//hard
-#define OLD_INC_X [fp, #0 ]
-#define OLD_Y [fp, #4 ]
-#define OLD_INC_Y [fp, #8 ]
-#else
+#if !defined(__ARM_PCS_VFP)
+
+#if !defined(COMPLEX)
+
+#if !defined(DOUBLE)
+#define OLD_ALPHA r3
#define OLD_X [fp, #0 ]
#define OLD_INC_X [fp, #4 ]
#define OLD_Y [fp, #8 ]
#define OLD_INC_Y [fp, #12 ]
+#else
+#define OLD_ALPHA [fp, #0]
+#define OLD_X [fp, #8 ]
+#define OLD_INC_X [fp, #12 ]
+#define OLD_Y [fp, #16 ]
+#define OLD_INC_Y [fp, #20 ]
#endif
-
+
+#else //COMPLEX
+
+#if !defined(DOUBLE)
+#define OLD_ALPHAR r3
+#define OLD_ALPHAI [fp, #0 ]
+#define OLD_X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define OLD_Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+#else
+#define OLD_ALPHAR [fp, #0]
+#define OLD_ALPHAI [fp, #8]
+#define OLD_X [fp, #16 ]
+#define OLD_INC_X [fp, #20 ]
+#define OLD_Y [fp, #24 ]
+#define OLD_INC_Y [fp, #28 ]
+#endif
+
+#endif //!defined(COMPLEX)
+
+#else //__ARM_PCS_VFP
+
+#define OLD_INC_X [fp, #0 ]
+#define OLD_Y [fp, #4 ]
+#define OLD_INC_Y [fp, #8 ]
+
+#endif //!defined(__ARM_PCS_VFP)
+
#define N r0
#define Y r1
#define INC_X r2
@@ -370,13 +404,28 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #8
sub sp, sp, #STACKSIZE // reserve stack
-#ifdef ARM_SOFTFP_ABI
-#ifndef DOUBLE
- vmov s0, r3 //move alpha to s0
+#if !defined(__ARM_PCS_VFP)
+#if !defined(COMPLEX)
+#if !defined(DOUBLE)
+ vmov s0, OLD_ALPHA
ldr X, OLD_X
+#else
+ vldr d0, OLD_ALPHA
+ ldr X, OLD_X
+#endif
+#else //COMPLEX
+#if !defined(DOUBLE)
+ vmov s0, OLD_ALPHAR
+ vldr s1, OLD_ALPHAI
+ ldr X, OLD_X
+#else
+ vldr d0, OLD_ALPHAR
+ vldr d1, OLD_ALPHAI
+ ldr X, OLD_X
+#endif
#endif
#endif
-
+
ldr INC_X , OLD_INC_X
ldr Y, OLD_Y
ldr INC_Y , OLD_INC_Y
diff --git a/kernel/arm/cdot_vfp.S b/kernel/arm/cdot_vfp.S
index 0497b6d83..e5a6e4d35 100644
--- a/kernel/arm/cdot_vfp.S
+++ b/kernel/arm/cdot_vfp.S
@@ -41,8 +41,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define N r0
#define X r1
#define INC_X r2
-#define OLD_Y r3
-
/******************************************************
* [fp, #-128] - [fp, #-64] is reserved
@@ -50,7 +48,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* registers
*******************************************************/
-#define OLD_INC_Y [fp, #4 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_RETURN_ADDR r0
+#define OLD_N r1
+#define OLD_X r2
+#define OLD_INC_X r3
+#define OLD_Y [fp, #0 ]
+#define OLD_INC_Y [fp, #4 ]
+#define RETURN_ADDR r8
+#else
+#define OLD_Y r3
+#define OLD_INC_Y [fp, #0 ]
+#endif
#define I r5
#define Y r6
@@ -179,7 +188,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.align 5
push {r4 - r9, fp}
- add fp, sp, #24
+ add fp, sp, #28
sub sp, sp, #STACKSIZE // reserve stack
sub r4, fp, #128
@@ -191,8 +200,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmov s2, s0
vmov s3, s0
+#if !defined(__ARM_PCS_VFP)
+ mov RETURN_ADDR, OLD_RETURN_ADDR
+ mov N, OLD_N
+ mov X, OLD_X
+ mov INC_X, OLD_INC_X
+ ldr Y, OLD_Y
+ ldr INC_Y, OLD_INC_Y
+#else
mov Y, OLD_Y
ldr INC_Y, OLD_INC_Y
+#endif
cmp N, #0
ble cdot_kernel_L999
@@ -265,7 +283,6 @@ cdot_kernel_S10:
cdot_kernel_L999:
-
sub r3, fp, #128
vldm r3, { s8 - s15} // restore floating point registers
@@ -276,8 +293,11 @@ cdot_kernel_L999:
vadd.f32 s0 , s0, s2
vsub.f32 s1 , s1, s3
#endif
+#if !defined(__ARM_PCS_VFP)
+ vstm RETURN_ADDR, {s0 - s1}
+#endif
- sub sp, fp, #24
+ sub sp, fp, #28
pop {r4 - r9, fp}
bx lr
diff --git a/kernel/arm/cgemm_kernel_2x2_vfp.S b/kernel/arm/cgemm_kernel_2x2_vfp.S
index f0517cb47..512eea387 100644
--- a/kernel/arm/cgemm_kernel_2x2_vfp.S
+++ b/kernel/arm/cgemm_kernel_2x2_vfp.S
@@ -64,9 +64,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR_SOFTFP r3
+#define OLD_ALPHAI_SOFTFP [fp, #4]
+#define OLD_A_SOFTFP [fp, #8 ]
+#define B [fp, #12 ]
+#define C [fp, #16 ]
+#define OLD_LDC [fp, #20 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
+#endif
#define I r0
#define J r1
@@ -816,6 +825,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
+ vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/cgemm_kernel_2x2_vfpv3.S b/kernel/arm/cgemm_kernel_2x2_vfpv3.S
index cf132a184..42eb53a55 100644
--- a/kernel/arm/cgemm_kernel_2x2_vfpv3.S
+++ b/kernel/arm/cgemm_kernel_2x2_vfpv3.S
@@ -80,9 +80,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR_SOFTFP r3
+#define OLD_ALPHAI_SOFTFP [fp, #4]
+#define OLD_A_SOFTFP [fp, #8 ]
+#define B [fp, #12 ]
+#define C [fp, #16 ]
+#define OLD_LDC [fp, #20 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
+#endif
#define I r0
#define J r1
@@ -873,6 +882,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
+ vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/cgemv_n_vfp.S b/kernel/arm/cgemv_n_vfp.S
index 5d2748644..4a1cd2d45 100644
--- a/kernel/arm/cgemv_n_vfp.S
+++ b/kernel/arm/cgemv_n_vfp.S
@@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define STACKSIZE 256
-#define OLD_LDA [fp, #0 ]
-#define X [fp, #4 ]
-#define OLD_INC_X [fp, #8 ]
-#define Y [fp, #12 ]
-#define OLD_INC_Y [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR r3
+#define OLD_ALPHAI [fp, #0 ]
+#define OLD_A_SOFTFP [fp, #4 ]
+#define OLD_LDA [fp, #8 ]
+#define X [fp, #12 ]
+#define OLD_INC_X [fp, #16 ]
+#define Y [fp, #20 ]
+#define OLD_INC_Y [fp, #24 ]
+#else
+#define OLD_LDA [fp, #0 ]
+#define X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+#endif
+
#define OLD_A r3
#define OLD_M r0
@@ -462,6 +474,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmp N, #0
ble cgemvn_kernel_L999
+#if !defined(__ARM_PCS_VFP)
+ vmov s0, OLD_ALPHAR
+ vldr s1, OLD_ALPHAI
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
+
str OLD_A, A
str OLD_M, M
vstr s0 , ALPHA_R
diff --git a/kernel/arm/cgemv_t_vfp.S b/kernel/arm/cgemv_t_vfp.S
index 76c8a8f18..e1c750c85 100644
--- a/kernel/arm/cgemv_t_vfp.S
+++ b/kernel/arm/cgemv_t_vfp.S
@@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define STACKSIZE 256
-#define OLD_LDA [fp, #0 ]
-#define X [fp, #4 ]
-#define OLD_INC_X [fp, #8 ]
-#define Y [fp, #12 ]
-#define OLD_INC_Y [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR r3
+#define OLD_ALPHAI [fp, #0 ]
+#define OLD_A_SOFTFP [fp, #4 ]
+#define OLD_LDA [fp, #8 ]
+#define X [fp, #12 ]
+#define OLD_INC_X [fp, #16 ]
+#define Y [fp, #20 ]
+#define OLD_INC_Y [fp, #24 ]
+#else
+#define OLD_LDA [fp, #0 ]
+#define X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+#endif
+
#define OLD_A r3
#define OLD_N r1
@@ -359,6 +371,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmp OLD_N, #0
ble cgemvt_kernel_L999
+#if !defined(__ARM_PCS_VFP)
+ vmov s0, OLD_ALPHAR
+ vldr s1, OLD_ALPHAI
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
+
str OLD_A, A
str OLD_N, N
diff --git a/kernel/arm/ctrmm_kernel_2x2_vfp.S b/kernel/arm/ctrmm_kernel_2x2_vfp.S
index 8cb7ede9d..95578b10a 100644
--- a/kernel/arm/ctrmm_kernel_2x2_vfp.S
+++ b/kernel/arm/ctrmm_kernel_2x2_vfp.S
@@ -67,10 +67,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR_SOFTFP r3
+#define OLD_ALPHAI_SOFTFP [fp, #4]
+#define OLD_A_SOFTFP [fp, #8 ]
+#define B [fp, #12 ]
+#define C [fp, #16 ]
+#define OLD_LDC [fp, #20 ]
+#define OFFSET [fp, #24 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
#define OFFSET [fp, #16 ]
+#endif
#define I r0
#define J r1
@@ -826,6 +836,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
+ vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/ctrmm_kernel_2x2_vfpv3.S b/kernel/arm/ctrmm_kernel_2x2_vfpv3.S
index 97bd88c69..18beb4e47 100644
--- a/kernel/arm/ctrmm_kernel_2x2_vfpv3.S
+++ b/kernel/arm/ctrmm_kernel_2x2_vfpv3.S
@@ -66,10 +66,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR_SOFTFP r3
+#define OLD_ALPHAI_SOFTFP [fp, #4]
+#define OLD_A_SOFTFP [fp, #8 ]
+#define B [fp, #12 ]
+#define C [fp, #16 ]
+#define OLD_LDC [fp, #20 ]
+#define OFFSET [fp, #24 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
#define OFFSET [fp, #16 ]
+#endif
#define I r0
#define J r1
@@ -846,6 +856,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
+ vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/ddot_vfp.S b/kernel/arm/ddot_vfp.S
index f28acbae3..fb294d8b4 100644
--- a/kernel/arm/ddot_vfp.S
+++ b/kernel/arm/ddot_vfp.S
@@ -246,6 +246,9 @@ ddot_kernel_L999:
vldm r3, { d8 - d15} // restore floating point registers
vadd.f64 d0 , d0, d1 // set return value
+#if !defined(__ARM_PCS_VFP)
+ vmov r0, r1, d0
+#endif
sub sp, fp, #24
pop {r4 - r9, fp}
bx lr
diff --git a/kernel/arm/dgemm_kernel_4x2_vfp.S b/kernel/arm/dgemm_kernel_4x2_vfp.S
index 183269d1b..001a6050c 100644
--- a/kernel/arm/dgemm_kernel_4x2_vfp.S
+++ b/kernel/arm/dgemm_kernel_4x2_vfp.S
@@ -62,10 +62,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA [fp, #-280]
-
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHA_SOFTFP [fp, #4]
+#define OLD_A_SOFTFP [fp, #12 ]
+#define B [fp, #16 ]
+#define C [fp, #20 ]
+#define OLD_LDC [fp, #24 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
+#endif
#define I r0
#define J r1
@@ -429,6 +436,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vldr OLD_ALPHA, OLD_ALPHA_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/dgemm_kernel_4x4_vfpv3.S b/kernel/arm/dgemm_kernel_4x4_vfpv3.S
index b14052e06..1744b54d8 100644
--- a/kernel/arm/dgemm_kernel_4x4_vfpv3.S
+++ b/kernel/arm/dgemm_kernel_4x4_vfpv3.S
@@ -79,9 +79,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA [fp, #-280]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHA_SOFTFP [fp, #4]
+#define OLD_A_SOFTFP [fp, #12 ]
+#define B [fp, #16 ]
+#define C [fp, #20 ]
+#define OLD_LDC [fp, #24 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
+#endif
#define I r0
#define J r1
@@ -878,6 +886,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vldr OLD_ALPHA, OLD_ALPHA_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/dtrmm_kernel_4x2_vfp.S b/kernel/arm/dtrmm_kernel_4x2_vfp.S
index c578d2b1e..3d6fbf8e9 100644
--- a/kernel/arm/dtrmm_kernel_4x2_vfp.S
+++ b/kernel/arm/dtrmm_kernel_4x2_vfp.S
@@ -65,10 +65,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA [fp, #-276 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHA_SOFTFP [fp, #4]
+#define OLD_A_SOFTFP [fp, #12 ]
+#define B [fp, #16 ]
+#define OLD_C [fp, #20 ]
+#define OLD_LDC [fp, #24 ]
+#define OFFSET [fp, #28 ]
+#else
#define B [fp, #4 ]
#define OLD_C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
#define OFFSET [fp, #16 ]
+#endif
#define I r0
#define J r1
@@ -404,6 +413,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vldr OLD_ALPHA, OLD_ALPHA_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/dtrmm_kernel_4x4_vfpv3.S b/kernel/arm/dtrmm_kernel_4x4_vfpv3.S
index c7e455f16..c0c6a1677 100644
--- a/kernel/arm/dtrmm_kernel_4x4_vfpv3.S
+++ b/kernel/arm/dtrmm_kernel_4x4_vfpv3.S
@@ -66,10 +66,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA [fp, #-276 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHA_SOFTFP [fp, #4]
+#define OLD_A_SOFTFP [fp, #12 ]
+#define B [fp, #16 ]
+#define OLD_C [fp, #20 ]
+#define OLD_LDC [fp, #24 ]
+#define OFFSET [fp, #28 ]
+#else
#define B [fp, #4 ]
#define OLD_C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
#define OFFSET [fp, #16 ]
+#endif
#define I r0
#define J r1
@@ -846,6 +855,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vldr OLD_ALPHA, OLD_ALPHA_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/gemv_n_vfp.S b/kernel/arm/gemv_n_vfp.S
index 385370b7f..7c154d741 100644
--- a/kernel/arm/gemv_n_vfp.S
+++ b/kernel/arm/gemv_n_vfp.S
@@ -38,11 +38,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define STACKSIZE 256
-#define OLD_LDA [fp, #0 ]
-#define X [fp, #4 ]
-#define OLD_INC_X [fp, #8 ]
-#define Y [fp, #12 ]
-#define OLD_INC_Y [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+
+#if !defined(DOUBLE)
+#define OLD_ALPHA r3
+#define OLD_A_SOFTFP [fp, #0 ]
+#define OLD_LDA [fp, #4 ]
+#define X [fp, #8 ]
+#define OLD_INC_X [fp, #12 ]
+#define Y [fp, #16 ]
+#define OLD_INC_Y [fp, #20 ]
+#else
+#define OLD_ALPHA [fp, #0 ]
+#define OLD_A_SOFTFP [fp, #8 ]
+#define OLD_LDA [fp, #12]
+#define X [fp, #16]
+#define OLD_INC_X [fp, #20]
+#define Y [fp, #24]
+#define OLD_INC_Y [fp, #28]
+#endif
+
+#else
+
+#define OLD_LDA [fp, #0 ]
+#define X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+
+#endif
+
#define OLD_A r3
#define OLD_M r0
@@ -508,6 +533,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmp N, #0
ble gemvn_kernel_L999
+#if !defined(__ARM_PCS_VFP)
+#if !defined(DOUBLE)
+ vmov s0, OLD_ALPHA
+#else
+ vldr d0, OLD_ALPHA
+#endif
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
+
str OLD_A, A
str OLD_M, M
diff --git a/kernel/arm/gemv_n_vfpv3.S b/kernel/arm/gemv_n_vfpv3.S
index 93bf23e49..54f958b7b 100644
--- a/kernel/arm/gemv_n_vfpv3.S
+++ b/kernel/arm/gemv_n_vfpv3.S
@@ -38,25 +38,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define STACKSIZE 256
-#ifndef ARM_SOFTFP_ABI
-//hard
-#define OLD_LDA [fp, #0 ]
-#define X [fp, #4 ]
-#define OLD_INC_X [fp, #8 ]
-#define Y [fp, #12 ]
-#define OLD_INC_Y [fp, #16 ]
-#define OLD_A r3
-#else
-#define OLD_A_SOFTFP [fp, #0 ]
-#define OLD_LDA [fp, #4 ]
-#define X [fp, #8 ]
-#define OLD_INC_X [fp, #12 ]
-#define Y [fp, #16 ]
-#define OLD_INC_Y [fp, #20 ]
+#if !defined(__ARM_PCS_VFP)
+
+#if !defined(DOUBLE)
#define OLD_ALPHA r3
-#define OLD_A r3
+#define OLD_A_SOFTFP [fp, #0 ]
+#define OLD_LDA [fp, #4 ]
+#define X [fp, #8 ]
+#define OLD_INC_X [fp, #12 ]
+#define Y [fp, #16 ]
+#define OLD_INC_Y [fp, #20 ]
+#else
+#define OLD_ALPHA [fp, #0 ]
+#define OLD_A_SOFTFP [fp, #8 ]
+#define OLD_LDA [fp, #12]
+#define X [fp, #16]
+#define OLD_INC_X [fp, #20]
+#define Y [fp, #24]
+#define OLD_INC_Y [fp, #28]
+#endif
+
+#else
+
+#define OLD_LDA [fp, #0 ]
+#define X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+
#endif
+#define OLD_A r3
#define OLD_M r0
#define AO1 r0
@@ -565,18 +577,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmp N, #0
ble gemvn_kernel_L999
-#ifndef DOUBLE
-#ifdef ARM_SOFTFP_ABI
-
- vmov s0, OLD_ALPHA
- ldr OLD_A, OLD_A_SOFTFP
+#if !defined(__ARM_PCS_VFP)
+#if !defined(DOUBLE)
+ vmov s0, OLD_ALPHA
+#else
+ vldr d0, OLD_ALPHA
#endif
+ ldr OLD_A, OLD_A_SOFTFP
#endif
str OLD_A, A
str OLD_M, M
-
-
+
ldr INC_X , OLD_INC_X
ldr INC_Y , OLD_INC_Y
diff --git a/kernel/arm/gemv_t_vfp.S b/kernel/arm/gemv_t_vfp.S
index 816be54ff..9559d1829 100644
--- a/kernel/arm/gemv_t_vfp.S
+++ b/kernel/arm/gemv_t_vfp.S
@@ -38,25 +38,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define STACKSIZE 256
-#ifndef ARM_SOFTFP_ABI
-//hard abi
-#define OLD_LDA [fp, #0 ]
-#define X [fp, #4 ]
-#define OLD_INC_X [fp, #8 ]
-#define Y [fp, #12 ]
-#define OLD_INC_Y [fp, #16 ]
-#define OLD_A r3
-#else
-#define OLD_A_SOFTFP [fp, #0 ]
-#define OLD_LDA [fp, #4 ]
-#define X [fp, #8 ]
-#define OLD_INC_X [fp, #12 ]
-#define Y [fp, #16 ]
-#define OLD_INC_Y [fp, #20 ]
+#if !defined(__ARM_PCS_VFP)
+
+#if !defined(DOUBLE)
#define OLD_ALPHA r3
-#define OLD_A r3
+#define OLD_A_SOFTFP [fp, #0 ]
+#define OLD_LDA [fp, #4 ]
+#define X [fp, #8 ]
+#define OLD_INC_X [fp, #12 ]
+#define Y [fp, #16 ]
+#define OLD_INC_Y [fp, #20 ]
+#else
+#define OLD_ALPHA [fp, #0 ]
+#define OLD_A_SOFTFP [fp, #8 ]
+#define OLD_LDA [fp, #12]
+#define X [fp, #16]
+#define OLD_INC_X [fp, #20]
+#define Y [fp, #24]
+#define OLD_INC_Y [fp, #28]
#endif
+#else
+
+#define OLD_LDA [fp, #0 ]
+#define X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+
+#endif
+
+#define OLD_A r3
#define OLD_N r1
#define M r0
@@ -518,11 +530,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmp OLD_N, #0
ble gemvt_kernel_L999
-#ifndef DOUBLE
-#ifdef ARM_SOFTFP_ABI
- vmov s0, OLD_ALPHA
- ldr OLD_A, OLD_A_SOFTFP
+#if !defined(__ARM_PCS_VFP)
+#if !defined(DOUBLE)
+ vmov s0, OLD_ALPHA
+#else
+ vldr d0, OLD_ALPHA
#endif
+ ldr OLD_A, OLD_A_SOFTFP
#endif
str OLD_A, A
diff --git a/kernel/arm/gemv_t_vfpv3.S b/kernel/arm/gemv_t_vfpv3.S
index 7ae5799bc..b1d3dadf1 100644
--- a/kernel/arm/gemv_t_vfpv3.S
+++ b/kernel/arm/gemv_t_vfpv3.S
@@ -38,11 +38,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define STACKSIZE 256
-#define OLD_LDA [fp, #0 ]
-#define X [fp, #4 ]
-#define OLD_INC_X [fp, #8 ]
-#define Y [fp, #12 ]
-#define OLD_INC_Y [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+
+#if !defined(DOUBLE)
+#define OLD_ALPHA r3
+#define OLD_A_SOFTFP [fp, #0 ]
+#define OLD_LDA [fp, #4 ]
+#define X [fp, #8 ]
+#define OLD_INC_X [fp, #12 ]
+#define Y [fp, #16 ]
+#define OLD_INC_Y [fp, #20 ]
+#else
+#define OLD_ALPHA [fp, #0 ]
+#define OLD_A_SOFTFP [fp, #8 ]
+#define OLD_LDA [fp, #12]
+#define X [fp, #16]
+#define OLD_INC_X [fp, #20]
+#define Y [fp, #24]
+#define OLD_INC_Y [fp, #28]
+#endif
+
+#else
+
+#define OLD_LDA [fp, #0 ]
+#define X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+
+#endif
+
#define OLD_A r3
#define OLD_N r1
@@ -476,6 +501,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmp OLD_N, #0
ble gemvt_kernel_L999
+#if !defined(__ARM_PCS_VFP)
+#if !defined(DOUBLE)
+ vmov s0, OLD_ALPHA
+#else
+ vldr d0, OLD_ALPHA
+#endif
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
+
str OLD_A, A
str OLD_N, N
diff --git a/kernel/arm/nrm2_vfp.S b/kernel/arm/nrm2_vfp.S
index b3bd28152..16ac5a632 100644
--- a/kernel/arm/nrm2_vfp.S
+++ b/kernel/arm/nrm2_vfp.S
@@ -574,6 +574,13 @@ nrm2_kernel_L999:
vsqrt.f32 s1, s1
vmul.f32 s0, s0, s1
#endif
+#if !defined(__ARM_PCS_VFP)
+#if !defined(DOUBLE)
+ vmov r0, s0
+#else
+ vmov r0, r1, d0
+#endif
+#endif
bx lr
diff --git a/kernel/arm/nrm2_vfpv3.S b/kernel/arm/nrm2_vfpv3.S
index 7af966895..84977901d 100644
--- a/kernel/arm/nrm2_vfpv3.S
+++ b/kernel/arm/nrm2_vfpv3.S
@@ -503,8 +503,13 @@ nrm2_kernel_L999:
#else
vsqrt.f32 s1, s1
vmul.f32 s0, s0, s1
-#ifdef ARM_SOFTFP_ABI
- vmov r0, s0
+#endif
+
+#if !defined(__ARM_PCS_VFP)
+#if defined(DOUBLE)
+ vmov r0, r1, d0
+#else
+ vmov r0, s0
#endif
#endif
diff --git a/kernel/arm/rot_vfp.S b/kernel/arm/rot_vfp.S
index d053423b6..6e679ecf9 100644
--- a/kernel/arm/rot_vfp.S
+++ b/kernel/arm/rot_vfp.S
@@ -40,6 +40,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define OLD_INC_Y [fp, #0 ]
+#if !defined(__ARM_PCS_VFP)
+#if !defined(DOUBLE)
+#define OLD_C [fp, #4]
+#define OLD_S [fp, #8]
+#else
+#define OLD_C [fp, #8]
+#define OLD_S [fp, #16]
+#endif
+#endif
#define N r0
#define X r1
@@ -462,7 +471,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #8
ldr INC_Y , OLD_INC_Y
-
+#if !defined(__ARM_PCS_VFP)
+#if !defined(DOUBLE)
+ vldr s0, OLD_C
+ vldr s1, OLD_S
+#else
+ vldr d0, OLD_C
+ vldr d1, OLD_S
+#endif
+#endif
cmp N, #0
ble rot_kernel_L999
diff --git a/kernel/arm/sdot_vfp.S b/kernel/arm/sdot_vfp.S
index f3abdc197..5f4f424bf 100644
--- a/kernel/arm/sdot_vfp.S
+++ b/kernel/arm/sdot_vfp.S
@@ -329,20 +329,19 @@ sdot_kernel_L999:
vldm r3, { s8 - s15} // restore floating point registers
#if defined(DSDOT)
-
vadd.f64 d0 , d0, d1 // set return value
-
-#ifdef ARM_SOFTFP_ABI
- vmov r0, r1, d0
+#else
+ vadd.f32 s0 , s0, s1 // set return value
#endif
+#if !defined(__ARM_PCS_VFP)
+#if defined(DSDOT)
+ vmov r0, r1, d0
#else
-
- vadd.f32 s0 , s0, s1 // set return value
-#ifdef ARM_SOFTFP_ABI
vmov r0, s0
#endif
#endif
+
sub sp, fp, #24
pop {r4 - r9, fp}
bx lr
diff --git a/kernel/arm/sgemm_kernel_4x2_vfp.S b/kernel/arm/sgemm_kernel_4x2_vfp.S
index e8b44b742..1f21e5a1f 100644
--- a/kernel/arm/sgemm_kernel_4x2_vfp.S
+++ b/kernel/arm/sgemm_kernel_4x2_vfp.S
@@ -62,9 +62,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA [fp, #-280]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHA_SOFTFP r3
+#define OLD_A_SOFTFP [fp, #4 ]
+#define B [fp, #8 ]
+#define C [fp, #12 ]
+#define OLD_LDC [fp, #16 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
+#endif
#define I r0
#define J r1
@@ -416,6 +424,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vmov OLD_ALPHA, OLD_ALPHA_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/sgemm_kernel_4x4_vfpv3.S b/kernel/arm/sgemm_kernel_4x4_vfpv3.S
index 86198ac90..6491d3571 100644
--- a/kernel/arm/sgemm_kernel_4x4_vfpv3.S
+++ b/kernel/arm/sgemm_kernel_4x4_vfpv3.S
@@ -58,14 +58,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define OLD_M r0
#define OLD_N r1
#define OLD_K r2
-
-#ifdef ARM_SOFTFP_ABI
-#define OLD_ALPHA r3
-//#define OLD_A
-#else //hard
#define OLD_A r3
#define OLD_ALPHA s0
-#endif
/******************************************************
* [fp, #-128] - [fp, #-64] is reserved
@@ -77,10 +71,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define M [fp, #-256 ]
#define N [fp, #-260 ]
#define K [fp, #-264 ]
-
-#ifndef ARM_SOFTFP_ABI
#define A [fp, #-268 ]
-#endif
#define FP_ZERO [fp, #-240]
#define FP_ZERO_0 [fp, #-240]
@@ -88,17 +79,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA [fp, #-280]
-#ifdef ARM_SOFTFP_ABI
-#define A [fp, #4 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHA_SOFTFP r3
+#define OLD_A_SOFTFP [fp, #4 ]
#define B [fp, #8 ]
#define C [fp, #12 ]
#define OLD_LDC [fp, #16 ]
-#else //hard
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
#endif
-
+
#define I r0
#define J r1
#define L r2
@@ -867,16 +859,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vmov OLD_ALPHA, OLD_ALPHA_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
+
str OLD_M, M
str OLD_N, N
str OLD_K, K
-
-#ifdef ARM_SOFTFP_ABI
- str OLD_ALPHA, ALPHA
-#else //hard
str OLD_A, A
vstr OLD_ALPHA, ALPHA
-#endif
+
sub r3, fp, #128
vstm r3, { s8 - s31} // store floating point registers
diff --git a/kernel/arm/strmm_kernel_4x2_vfp.S b/kernel/arm/strmm_kernel_4x2_vfp.S
index 8f97644ec..635b1dd13 100644
--- a/kernel/arm/strmm_kernel_4x2_vfp.S
+++ b/kernel/arm/strmm_kernel_4x2_vfp.S
@@ -65,10 +65,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA [fp, #-276 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHA_SOFTFP r3
+#define OLD_A_SOFTFP [fp, #4 ]
+#define B [fp, #8 ]
+#define OLD_C [fp, #12 ]
+#define OLD_LDC [fp, #16 ]
+#define OFFSET [fp, #20 ]
+#else
#define B [fp, #4 ]
#define OLD_C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
#define OFFSET [fp, #16 ]
+#endif
#define I r0
#define J r1
@@ -395,6 +404,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vmov OLD_ALPHA, OLD_ALPHA_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/strmm_kernel_4x4_vfpv3.S b/kernel/arm/strmm_kernel_4x4_vfpv3.S
index 0dd03ac85..e24d24eba 100644
--- a/kernel/arm/strmm_kernel_4x4_vfpv3.S
+++ b/kernel/arm/strmm_kernel_4x4_vfpv3.S
@@ -64,10 +64,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA [fp, #-280]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHA_SOFTFP r3
+#define OLD_A_SOFTFP [fp, #4 ]
+#define B [fp, #8 ]
+#define C [fp, #12 ]
+#define OLD_LDC [fp, #16 ]
+#define OFFSET [fp, #20 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
#define OFFSET [fp, #16 ]
+#endif
#define I r0
#define J r1
@@ -782,6 +791,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vmov OLD_ALPHA, OLD_ALPHA_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/swap_vfp.S b/kernel/arm/swap_vfp.S
index 352875188..76661da79 100644
--- a/kernel/arm/swap_vfp.S
+++ b/kernel/arm/swap_vfp.S
@@ -38,9 +38,43 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define STACKSIZE 256
+#if !defined(__ARM_PCS_VFP)
+
+#if !defined(COMPLEX)
+
+#if !defined(DOUBLE)
+#define OLD_X [fp, #0 ]
+#define OLD_INC_X [fp, #4 ]
+#define OLD_Y [fp, #8 ]
+#define OLD_INC_Y [fp, #12 ]
+#else
+#define OLD_X [fp, #8 ]
+#define OLD_INC_X [fp, #12]
+#define OLD_Y [fp, #16]
+#define OLD_INC_Y [fp, #20]
+#endif
+
+#else //COMPLEX
+
+#if !defined(DOUBLE)
+#define OLD_X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define OLD_Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+#else
+#define OLD_X [fp, #16]
+#define OLD_INC_X [fp, #20]
+#define OLD_Y [fp, #24]
+#define OLD_INC_Y [fp, #28]
+#endif
+
+#endif // !defined(__ARM_PCS_VFP)
+
+#else
#define OLD_INC_X [fp, #0 ]
#define OLD_Y [fp, #4 ]
#define OLD_INC_Y [fp, #8 ]
+#endif
#define N r0
@@ -229,6 +263,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
push {r4 , fp}
add fp, sp, #8
+#if !defined(__ARM_PCS_VFP)
+ ldr X, OLD_X
+#endif
ldr INC_X , OLD_INC_X
ldr Y, OLD_Y
ldr INC_Y , OLD_INC_Y
diff --git a/kernel/arm/zdot_vfp.S b/kernel/arm/zdot_vfp.S
index 936ce9f60..43f2c0c0b 100644
--- a/kernel/arm/zdot_vfp.S
+++ b/kernel/arm/zdot_vfp.S
@@ -41,8 +41,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define N r0
#define X r1
#define INC_X r2
-#define OLD_Y r3
-
/******************************************************
* [fp, #-128] - [fp, #-64] is reserved
@@ -50,7 +48,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* registers
*******************************************************/
-#define OLD_INC_Y [fp, #4 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_RETURN_ADDR r0
+#define OLD_N r1
+#define OLD_X r2
+#define OLD_INC_X r3
+#define OLD_Y [fp, #0 ]
+#define OLD_INC_Y [fp, #4 ]
+#define RETURN_ADDR r8
+#else
+#define OLD_Y r3
+#define OLD_INC_Y [fp, #0 ]
+#endif
#define I r5
#define Y r6
@@ -181,7 +190,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.align 5
push {r4 - r9, fp}
- add fp, sp, #24
+ add fp, sp, #28
sub sp, sp, #STACKSIZE // reserve stack
sub r4, fp, #128
@@ -194,9 +203,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vcvt.f64.f32 d2, s0
vcvt.f64.f32 d3, s0
+#if !defined(__ARM_PCS_VFP)
+ mov RETURN_ADDR, OLD_RETURN_ADDR
+ mov N, OLD_N
+ mov X, OLD_X
+ mov INC_X, OLD_INC_X
+ ldr Y, OLD_Y
+ ldr INC_Y, OLD_INC_Y
+#else
mov Y, OLD_Y
ldr INC_Y, OLD_INC_Y
-
+#endif
cmp N, #0
ble zdot_kernel_L999
@@ -280,8 +297,11 @@ zdot_kernel_L999:
vadd.f64 d0 , d0, d2
vsub.f64 d1 , d1, d3
#endif
+#if !defined(__ARM_PCS_VFP)
+ vstm RETURN_ADDR, {d0 - d1}
+#endif
- sub sp, fp, #24
+ sub sp, fp, #28
pop {r4 - r9, fp}
bx lr
diff --git a/kernel/arm/zgemm_kernel_2x2_vfp.S b/kernel/arm/zgemm_kernel_2x2_vfp.S
index 46507c4d2..618f09781 100644
--- a/kernel/arm/zgemm_kernel_2x2_vfp.S
+++ b/kernel/arm/zgemm_kernel_2x2_vfp.S
@@ -64,9 +64,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR_SOFTFP [fp, #4]
+#define OLD_ALPHAI_SOFTFP [fp, #12]
+#define OLD_A_SOFTFP [fp, #20 ]
+#define B [fp, #24 ]
+#define C [fp, #28 ]
+#define OLD_LDC [fp, #32 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
+#endif
#define I r0
#define J r1
@@ -863,6 +872,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vldr OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
+ vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/zgemm_kernel_2x2_vfpv3.S b/kernel/arm/zgemm_kernel_2x2_vfpv3.S
index 5a99f792f..0fe0c1993 100644
--- a/kernel/arm/zgemm_kernel_2x2_vfpv3.S
+++ b/kernel/arm/zgemm_kernel_2x2_vfpv3.S
@@ -80,9 +80,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR_SOFTFP [fp, #4]
+#define OLD_ALPHAI_SOFTFP [fp, #12]
+#define OLD_A_SOFTFP [fp, #20 ]
+#define B [fp, #24 ]
+#define C [fp, #28 ]
+#define OLD_LDC [fp, #32 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
+#endif
#define I r0
#define J r1
@@ -909,6 +918,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vldr OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
+ vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/zgemv_n_vfp.S b/kernel/arm/zgemv_n_vfp.S
index da9a91043..7d5567849 100644
--- a/kernel/arm/zgemv_n_vfp.S
+++ b/kernel/arm/zgemv_n_vfp.S
@@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define STACKSIZE 256
-#define OLD_LDA [fp, #0 ]
-#define X [fp, #4 ]
-#define OLD_INC_X [fp, #8 ]
-#define Y [fp, #12 ]
-#define OLD_INC_Y [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR [fp, #0 ]
+#define OLD_ALPHAI [fp, #8 ]
+#define OLD_A_SOFTFP [fp, #16]
+#define OLD_LDA [fp, #20]
+#define X [fp, #24]
+#define OLD_INC_X [fp, #28]
+#define Y [fp, #32]
+#define OLD_INC_Y [fp, #36]
+#else
+#define OLD_LDA [fp, #0 ]
+#define X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+#endif
+
#define OLD_A r3
#define OLD_M r0
@@ -465,6 +477,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmp N, #0
ble zgemvn_kernel_L999
+#if !defined(__ARM_PCS_VFP)
+ vldr d0, OLD_ALPHAR
+ vldr d1, OLD_ALPHAI
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
+
str OLD_A, A
str OLD_M, M
vstr d0 , ALPHA_R
diff --git a/kernel/arm/zgemv_t_vfp.S b/kernel/arm/zgemv_t_vfp.S
index 211fa0701..407026166 100644
--- a/kernel/arm/zgemv_t_vfp.S
+++ b/kernel/arm/zgemv_t_vfp.S
@@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define STACKSIZE 256
-#define OLD_LDA [fp, #0 ]
-#define X [fp, #4 ]
-#define OLD_INC_X [fp, #8 ]
-#define Y [fp, #12 ]
-#define OLD_INC_Y [fp, #16 ]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR [fp, #0 ]
+#define OLD_ALPHAI [fp, #8 ]
+#define OLD_A_SOFTFP [fp, #16]
+#define OLD_LDA [fp, #20]
+#define X [fp, #24]
+#define OLD_INC_X [fp, #28]
+#define Y [fp, #32]
+#define OLD_INC_Y [fp, #36]
+#else
+#define OLD_LDA [fp, #0 ]
+#define X [fp, #4 ]
+#define OLD_INC_X [fp, #8 ]
+#define Y [fp, #12 ]
+#define OLD_INC_Y [fp, #16 ]
+#endif
+
#define OLD_A r3
#define OLD_N r1
@@ -360,6 +372,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmp OLD_N, #0
ble zgemvt_kernel_L999
+#if !defined(__ARM_PCS_VFP)
+ vldr d0, OLD_ALPHAR
+ vldr d1, OLD_ALPHAI
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
+
str OLD_A, A
str OLD_N, N
diff --git a/kernel/arm/ztrmm_kernel_2x2_vfp.S b/kernel/arm/ztrmm_kernel_2x2_vfp.S
index dc80b17b8..78d09a9c7 100644
--- a/kernel/arm/ztrmm_kernel_2x2_vfp.S
+++ b/kernel/arm/ztrmm_kernel_2x2_vfp.S
@@ -66,10 +66,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR_SOFTFP [fp, #4]
+#define OLD_ALPHAI_SOFTFP [fp, #12]
+#define OLD_A_SOFTFP [fp, #20 ]
+#define B [fp, #24 ]
+#define C [fp, #28 ]
+#define OLD_LDC [fp, #32 ]
+#define OFFSET [fp, #36 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
#define OFFSET [fp, #16 ]
+#endif
#define I r0
#define J r1
@@ -882,6 +892,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vldr OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
+ vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K
diff --git a/kernel/arm/ztrmm_kernel_2x2_vfpv3.S b/kernel/arm/ztrmm_kernel_2x2_vfpv3.S
index 5a808ccbc..bf72ce605 100644
--- a/kernel/arm/ztrmm_kernel_2x2_vfpv3.S
+++ b/kernel/arm/ztrmm_kernel_2x2_vfpv3.S
@@ -66,10 +66,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]
+#if !defined(__ARM_PCS_VFP)
+#define OLD_ALPHAR_SOFTFP [fp, #4]
+#define OLD_ALPHAI_SOFTFP [fp, #12]
+#define OLD_A_SOFTFP [fp, #20 ]
+#define B [fp, #24 ]
+#define C [fp, #28 ]
+#define OLD_LDC [fp, #32 ]
+#define OFFSET [fp, #36 ]
+#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
#define OFFSET [fp, #16 ]
+#endif
#define I r0
#define J r1
@@ -883,6 +893,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
+#if !defined(__ARM_PCS_VFP)
+ vldr OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
+ vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
+ ldr OLD_A, OLD_A_SOFTFP
+#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K