diff options
Diffstat (limited to 'kernel/mips64/sgemm_kernel_loongson3a_4x4.S')
-rw-r--r-- | kernel/mips64/sgemm_kernel_loongson3a_4x4.S | 414 |
1 files changed, 207 insertions, 207 deletions
diff --git a/kernel/mips64/sgemm_kernel_loongson3a_4x4.S b/kernel/mips64/sgemm_kernel_loongson3a_4x4.S index 4a8c9b0e4..10c5f47de 100644 --- a/kernel/mips64/sgemm_kernel_loongson3a_4x4.S +++ b/kernel/mips64/sgemm_kernel_loongson3a_4x4.S @@ -110,7 +110,7 @@ #define F27 27 #define F26 26 #define F25 25 -#define F24 24 +#define F24 24 #define F23 23 #define F22 22 #define F21 21 @@ -118,7 +118,7 @@ #define F19 19 #define F18 18 #define F17 17 -#define F16 16 +#define F16 16 #define F15 15 #define F14 14 #define F13 13 @@ -130,14 +130,14 @@ #define F7 7 #define F6 6 #define F5 5 -#define F4 4 -#define F3 3 -#define F2 2 -#define F1 1 +#define F4 4 +#define F3 3 +#define F2 2 +#define F1 1 #define F0 0 PROLOGUE - + daddiu $sp, $sp, -160 sd $16, 0($sp) sd $17, 8($sp) @@ -160,7 +160,7 @@ ST $f23,144($sp) - .align 5 + .align 5 .L0_N4: # Loop N ST ALPHA,152($sp) # Backup ALPHA move MCO,M # Backup M @@ -170,26 +170,26 @@ move AO,A # Backup A_addr dsra N,NCO,2 # N=NCO/2 - + dsll LDC,LDC,BASE_SHIFT # LDC*8Byte dsll SPANB,KCO,2+BASE_SHIFT # SPANB=KC*4nr*8Byte=KC*2^5 - + #if defined(TRMMKERNEL) - LDARG OFFSET,160($sp) # OFFSET is relate to the data part + LDARG OFFSET,160($sp) # OFFSET is relate to the data part #endif #if defined(TRMMKERNEL) && !defined(LEFT) - neg KK,OFFSET + neg KK,OFFSET #endif - + move BO,B # Backup B_addr beq N,$0,.L0_N2 # N=0,NCO<4 dsll SPANA,KCO,1+BASE_SHIFT # SPANA = KCO*2mr*8Byte .L0_N4_Lb: # mr=4,nr=4 - move CO1,C + move CO1,C dsra M,MCO,2 # M=MCO/2 - + move A,AO # Reset A daddu CO2,C,LDC @@ -200,7 +200,7 @@ daddu CO4,CO3,LDC #if defined(TRMMKERNEL) && defined(LEFT) - move KK,OFFSET + move KK,OFFSET #endif beqz M,.L14_M2 daddu C,CO4,LDC # move C to next panel Cj @@ -227,18 +227,18 @@ MOV t41,t11 MOV t12,t11 LD b0,0(B) - + MOV t22,t11 MOV t32,t11 LD b1,1*SIZE(B) MOV t42,t11 LD a2,2*SIZE(A) - + MOV t13,t11 MOV t23,t11 LD b2,2*SIZE(B) - + MOV t33,t11 MOV t43,t11 LD a3,3*SIZE(A) @@ -250,7 +250,7 @@ #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) dsubu TEMP,KCO,KK # temp is the length of the data part #elif defined(LEFT) - daddiu TEMP, KK, 4 # S=L,U=L + daddiu TEMP, KK, 4 # S=L,U=L #else daddiu TEMP, KK, 4 # S=R,U=U,for this two situation KK is the length of the data part #endif @@ -259,7 +259,7 @@ beqz K,.L15 MOV t44,t11 -#else +#else move B,BO # Reset B MTC $0,t11 # GEMM part NR=4,MR=4 LD a0,0(A) @@ -271,7 +271,7 @@ MOV t41,t11 MOV t12,t11 LD b0,0(B) - + MOV t22,t11 MOV t32,t11 LD b1,1*SIZE(B) @@ -279,11 +279,11 @@ MOV t42,t11 dsra K,KCO,2 # K=KCO/2 LD a2,2*SIZE(A) - + MOV t13,t11 MOV t23,t11 LD b2,2*SIZE(B) - + MOV t33,t11 MOV t43,t11 LD a3,3*SIZE(A) @@ -296,7 +296,7 @@ beqz K,.L15 MOV t44,t11 # clear 16 results registers #endif - + .align 5 .L11: # kr=4 MADD t11,t11,a0,b0 @@ -306,29 +306,29 @@ MADD t12,t12,a0,b1 MADD t22,t22,a1,b1 LD a5,5*SIZE(A) - + MADD t31,t31,a2,b0 MADD t41,t41,a3,b0 LD b4,4*SIZE(B) - + MADD t32,t32,a2,b1 MADD t42,t42,a3,b1 LD b5,5*SIZE(B) FETCH $0,(PREB) - + MADD t13,t13,a0,b2 MADD t23,t23,a1,b2 LD a6,6*SIZE(A) - + MADD t14,t14,a0,b3 MADD t24,t24,a1,b3 LD b6,6*SIZE(B) FETCH $0,(PREA) - + MADD t33,t33,a2,b2 MADD t43,t43,a3,b2 LD a7,7*SIZE(A) - + MADD t34,t34,a2,b3 MADD t44,t44,a3,b3 LD b7,7*SIZE(B) @@ -447,14 +447,14 @@ .L15: # kr=2 #ifndef TRMMKERNEL - andi K,KCO,2 + andi K,KCO,2 #else andi K,TEMP, 2 #endif beqz K,.L18 nop -.L16: +.L16: MADD t11,t11,a0,b0 MADD t21,t21,a1,b0 LD a4,4*SIZE(A) @@ -528,16 +528,16 @@ daddu PREB,PREB,8*SIZE LD b3,3*SIZE(B) - + .L18: # kr=1 #ifndef TRMMKERNEL andi K,KCO,1 #else andi K,TEMP,1 #endif - beqz K,.L19 + beqz K,.L19 LD ALPHA,152($sp) # Get ALPHA - + FETCH $0,0(PREB) MADD t11,t11,a0,b0 MADD t21,t21,a1,b0 @@ -569,8 +569,8 @@ MADD t44,t44,a3,b3 .L19: # Write Back to C -#ifndef TRMMKERNEL - LD c11,0(CO1) # GEMM write part +#ifndef TRMMKERNEL + LD c11,0(CO1) # GEMM write part LD c21,1*SIZE(CO1) # get 16 C LD c31,2*SIZE(CO1) LD c41,3*SIZE(CO1) @@ -640,11 +640,11 @@ daddu CO3,CO3,4*SIZE ST t44,3*SIZE(CO4) daddu PREB,BO,SPANB - - bnez M,.L10 + + bnez M,.L10 daddu CO4,CO4,4*SIZE -#else +#else MUL t11, ALPHA, t11 # TRMM write back part MUL t21, ALPHA, t21 MUL t31, ALPHA, t31 @@ -685,7 +685,7 @@ daddiu CO1,CO1, 4 * SIZE daddiu CO2,CO2, 4 * SIZE daddiu CO3,CO3, 4 * SIZE - daddiu CO4,CO4, 4 * SIZE + daddiu CO4,CO4, 4 * SIZE FETCH $0,4*SIZE(CO1) FETCH $0,4*SIZE(CO2) @@ -698,7 +698,7 @@ FETCH $0,0(CO4) #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) - dsubu TEMP,KCO,KK + dsubu TEMP,KCO,KK #ifdef LEFT daddiu TEMP,TEMP, -4 #else @@ -710,10 +710,10 @@ daddu B,B,TEMP # mov B to the end of panel Bj #endif -#ifdef LEFT +#ifdef LEFT daddiu KK, KK,4 #endif - bnez M,.L10 + bnez M,.L10 nop #endif @@ -721,7 +721,7 @@ .align 3 .L14_M2: andi M, MCO, 2 # nr=4,mr=2 - beqz M,.L14_M1 + beqz M,.L14_M1 nop .L20: @@ -729,7 +729,7 @@ #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) move B,BO # Reset B #else - dsll K,KK,1 + BASE_SHIFT # mr=2 + dsll K,KK,1 + BASE_SHIFT # mr=2 dsll TEMP,KK,2 + BASE_SHIFT # nr=4 daddu A,A,K daddu B,BO,TEMP @@ -738,7 +738,7 @@ LD a0,0*SIZE(A) MTC $0,t11 LD a1,1*SIZE(A) - + MOV t21,t11 LD b0,0*SIZE(B) MOV t12,t11 @@ -764,18 +764,18 @@ MOV t24,t11 # clear 2*4=8 results registers #else - move B,BO # Reset B + move B,BO # Reset B LD a0,0*SIZE(A) MTC $0,t11 LD a1,1*SIZE(A) - + MOV t21,t11 LD b0,0*SIZE(B) MOV t12,t11 LD b1,1*SIZE(B) MOV t22,t11 - dsra K,KCO,2 + dsra K,KCO,2 LD b2,2*SIZE(B) MOV t13,t11 @@ -806,7 +806,7 @@ MADD t14,t14,a0,b3 MADD t24,t24,a1,b3 - + MADD t11,t11,a4,b4 LD a2,4*SIZE(A) MADD t21,t21,a5,b4 @@ -866,7 +866,7 @@ MADD t24,t24,a7,b7 -.L25: +.L25: #ifndef TRMMKERNEL andi K,KCO,2 # kr=2 #else @@ -875,7 +875,7 @@ beqz K,.L28 nop -.L26: +.L26: MADD t11,t11,a0,b0 LD a4,2*SIZE(A) MADD t21,t21,a1,b0 @@ -890,7 +890,7 @@ LD b6,6*SIZE(B) MADD t23,t23,a1,b2 LD b7,7*SIZE(B) - + MADD t14,t14,a0,b3 MADD t24,t24,a1,b3 daddu A,A,4*SIZE # 2mr*2kr @@ -915,16 +915,16 @@ MADD t14,t14,a4,b7 MADD t24,t24,a5,b7 - -.L28: # kr=1 + +.L28: # kr=1 #ifndef TRMMKERNEL andi K,KCO,1 #else andi K,TEMP,1 #endif - beqz K,.L29 + beqz K,.L29 LD ALPHA,152($sp) # Get ALPHA - + MADD t11,t11,a0,b0 MADD t21,t21,a1,b0 daddu A,A,2*SIZE # 2mr*kr @@ -942,11 +942,11 @@ .L29: # Write Back to C #ifndef TRMMKERNEL LD c11,0(CO1) # GEMM write back part - LD c21,1*SIZE(CO1) + LD c21,1*SIZE(CO1) LD c12,0(CO2) LD c22,1*SIZE(CO2) - + LD c13,0(CO3) MADD t11,c11,t11,ALPHA LD c23,1*SIZE(CO3) @@ -985,25 +985,25 @@ #else MUL t11, ALPHA, t11 # TRMM write back part MUL t21, ALPHA, t21 - + ST t11, 0 * SIZE(CO1) MUL t12, ALPHA, t12 ST t21, 1 * SIZE(CO1) MUL t22, ALPHA, t22 - + ST t12, 0 * SIZE(CO2) MUL t13, ALPHA, t13 ST t22, 1 * SIZE(CO2) MUL t23, ALPHA, t23 - + ST t13, 0 * SIZE(CO3) MUL t14, ALPHA, t14 ST t23, 1 * SIZE(CO3) MUL t24, ALPHA, t24 - + ST t14, 0 * SIZE(CO4) ST t24, 1 * SIZE(CO4) - + daddiu CO1,CO1, 2 * SIZE daddiu CO2,CO2, 2 * SIZE daddiu CO3,CO3, 2 * SIZE @@ -1036,7 +1036,7 @@ .align 3 .L14_M1: - andi M,MCO,1 # mr=1 + andi M,MCO,1 # mr=1 beqz M,.L0_N4_Loop # M = 0, finishing one panel Bj nop @@ -1056,13 +1056,13 @@ MTC $0,t11 LD b0,0*SIZE(B) - + MOV t12,t11 LD b1,1*SIZE(B) MOV t13,t11 LD b2,2*SIZE(B) - + MOV t14,t11 LD b3,3*SIZE(B) @@ -1077,35 +1077,35 @@ nop beqz K,.L35 nop - -#else + +#else move B,BO # Reset B, GEMM part dsra K,KCO,2 # K=KCO/2 LD a0, 0 * SIZE(A) # a0 MTC $0,t11 LD b0,0*SIZE(B) - + MOV t12,t11 LD b1,1*SIZE(B) MOV t13,t11 LD b2,2*SIZE(B) - + MOV t14,t11 beqz K,.L35 LD b3,3*SIZE(B) #endif -.L31: # nr=4,mr=1,kr=4 +.L31: # nr=4,mr=1,kr=4 LD a1, 1*SIZE(A) # load a1 MADD t11,t11,a0,b0 - + LD b4,4*SIZE(B) LD b5,5*SIZE(B) MADD t12,t12,a0,b1 - + LD b6,6*SIZE(B) LD b7,7*SIZE(B) MADD t13,t13,a0,b2 @@ -1113,11 +1113,11 @@ LD a2, 2*SIZE(A) # a2 MADD t11,t11,a1,b4 - + LD b0,8*SIZE(B) LD b1,9*SIZE(B) MADD t12,t12,a1,b5 - + LD b2,10*SIZE(B) LD b3,11*SIZE(B) MADD t13,t13,a1,b6 @@ -1126,12 +1126,12 @@ LD a3, 3*SIZE(A) # a3 MADD t11,t11,a2,b0 daddiu K,K,-1 - + LD b4,12*SIZE(B) LD b5,13*SIZE(B) MADD t12,t12,a2,b1 daddu A,A,4*SIZE # 1mr*4kr - + LD b6,14*SIZE(B) LD b7,15*SIZE(B) MADD t13,t13,a2,b2 @@ -1140,7 +1140,7 @@ LD a0, 0*SIZE(A) # a0 daddu B,B,16*SIZE # 4nr*4kr MADD t11,t11,a3,b4 - + LD b0,0*SIZE(B) MADD t12,t12,a3,b5 LD b1,1*SIZE(B) @@ -1154,14 +1154,14 @@ .L35: # kr=2 #ifndef TRMMKERNEL - andi K,KCO,2 + andi K,KCO,2 #else andi K,TEMP,2 #endif beqz K,.L38 nop -.L36: +.L36: LD a1,1*SIZE(A) # load a1 MADD t11,t11,a0,b0 @@ -1169,10 +1169,10 @@ LD b5,5*SIZE(B) MADD t12,t12,a0,b1 daddu A,A,2*SIZE # mr*2kr - + LD b6,6*SIZE(B) MADD t13,t13,a0,b2 - + LD b7,7*SIZE(B) MADD t14,t14,a0,b3 daddu B,B,8*SIZE # 4nr*2kr @@ -1181,41 +1181,41 @@ .L37: LD a0,0(A) MADD t11,t11,a1,b4 - + LD b0,0*SIZE(B) LD b1,1*SIZE(B) MADD t12,t12,a1,b5 - + LD b2,2*SIZE(B) LD b3,3*SIZE(B) MADD t13,t13,a1,b6 MADD t14,t14,a1,b7 - - + + .L38: # kr=1 #ifndef TRMMKERNEL andi K,KCO,1 #else andi K,TEMP,1 #endif - beqz K,.L39 + beqz K,.L39 LD ALPHA,152($sp) # Get ALPHA - + MADD t11,t11,a0,b0 MADD t12,t12,a0,b1 - daddu A,A,1*SIZE + daddu A,A,1*SIZE daddu B,B,4*SIZE - + MADD t13,t13,a0,b2 MADD t14,t14,a0,b3 .L39: # Write Back #ifndef TRMMKERNEL - LD c11,0(CO1) + LD c11,0(CO1) LD c12,0(CO2) LD c13,0(CO3) LD c14,0(CO4) - + MADD t11,c11,t11,ALPHA MADD t12,c12,t12,ALPHA MADD t13,c13,t13,ALPHA @@ -1261,22 +1261,22 @@ .L0_N4_Loop: # mc finished daddiu N,N,-1 # N-- #if defined(TRMMKERNEL) && !defined(LEFT) - daddiu KK, KK,4 + daddiu KK, KK,4 #endif - bnez N,.L0_N4_Lb + bnez N,.L0_N4_Lb move BO,B # Set BO point to next panel Bj - .align 5 + .align 5 .L0_N2: andi N,NCO,2 # nr = 2 - beqz N,.L0_N1 + beqz N,.L0_N1 nop .L0_N2_Lb: - move CO1,C + move CO1,C daddu CO2,C,LDC - dsra M,MCO,2 + dsra M,MCO,2 move A,AO # Reset A daddu PREA,AO,SPANA @@ -1288,13 +1288,13 @@ beqz M,.L12_M2 nop -.L40: +.L40: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) move B,BO # Reset B #else dsll K,KK, 2 + BASE_SHIFT - dsll TEMP, KK,1 + BASE_SHIFT + dsll TEMP, KK,1 + BASE_SHIFT daddu A,A,K daddu B,BO,TEMP @@ -1311,10 +1311,10 @@ MOV t41,t11 LD a2,2*SIZE(A) LD a3,3*SIZE(A) - + MOV t12,t11 MOV t22,t11 - + #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) dsubu TEMP,KCO,KK #elif defined(LEFT) @@ -1322,7 +1322,7 @@ #else daddiu TEMP, KK, 2 #endif - dsra K,TEMP,2 + dsra K,TEMP,2 MOV t32,t11 beqz K,.L45 MOV t42,t11 @@ -1342,10 +1342,10 @@ LD a2,2*SIZE(A) dsra K,KCO,2 # K=KCO/2 LD a3,3*SIZE(A) - + MOV t12,t11 MOV t22,t11 - + MOV t32,t11 beqz K,.L45 MOV t42,t11 @@ -1411,9 +1411,9 @@ FETCH $0,8*SIZE(PREA) MADD t32,t32,a2,b3 MADD t42,t42,a3,b3 - + daddu A,A,16*SIZE # 4mr*4kr - daddu B,B,8*SIZE # 2nr*4kr + daddu B,B,8*SIZE # 2nr*4kr .L44: MADD t11,t11,a4,b6 @@ -1443,14 +1443,14 @@ .L45: # kr=2 #ifndef TRMMKERNEL - andi K,KCO,2 + andi K,KCO,2 #else andi K,TEMP,2 #endif beqz K,.L48 nop -.L46: +.L46: MADD t11,t11,a0,b0 LD a4,4*SIZE(A) MADD t21,t21,a1,b0 @@ -1469,7 +1469,7 @@ FETCH $0,0(PREA) MADD t32,t32,a2,b1 daddu B,B,4*SIZE # B+=2(nr)*2(kr)*8Byte=32 - + MADD t42,t42,a3,b1 daddu A,A,8*SIZE # A+=4(mr)*2(kr)*8Byte=8*SIZE @@ -1495,16 +1495,16 @@ daddu PREA,PREA,8*SIZE - + .L48: # kr=1 #ifndef TRMMKERNEL andi K,KCO,1 #else andi K,TEMP,1 #endif - beqz K,.L49 + beqz K,.L49 LD ALPHA,152($sp) # Get ALPHA - + FETCH $0,0(PREA) MADD t11,t11,a0,b0 MADD t21,t21,a1,b0 @@ -1524,7 +1524,7 @@ .L49: # Write Back #ifndef TRMMKERNEL LD c11,0(CO1) # gemm write back part Fetch 16 C - LD c21,1*SIZE(CO1) + LD c21,1*SIZE(CO1) LD c31,2*SIZE(CO1) LD c41,3*SIZE(CO1) @@ -1545,7 +1545,7 @@ MADD t32,c32,t32,ALPHA ST t41,3*SIZE(CO1) MADD t42,c42,t42,ALPHA - daddiu M,M,-1 + daddiu M,M,-1 ST t12,0(CO2) ST t22,1*SIZE(CO2) @@ -1557,8 +1557,8 @@ FETCH $0,8*SIZE(CO1) FETCH $0,8*SIZE(CO2) - daddu CO1,CO1,4*SIZE - bnez M,.L40 + daddu CO1,CO1,4*SIZE + bnez M,.L40 daddu CO2,CO2,4*SIZE #else @@ -1566,7 +1566,7 @@ MUL t21, ALPHA, t21 MUL t31, ALPHA, t31 MUL t41, ALPHA, t41 - + MUL t12, ALPHA, t12 ST t11, 0 * SIZE(CO1) MUL t22, ALPHA, t22 @@ -1575,13 +1575,13 @@ ST t31, 2 * SIZE(CO1) MUL t42, ALPHA, t42 ST t41, 3 * SIZE(CO1) - + ST t12, 0 * SIZE(CO2) daddiu M,M,-1 ST t22, 1 * SIZE(CO2) ST t32, 2 * SIZE(CO2) ST t42, 3 * SIZE(CO2) - + daddiu CO1,CO1, 4*SIZE daddiu CO2,CO2, 4*SIZE @@ -1615,7 +1615,7 @@ .align 3 .L12_M2: andi M,MCO,2 # mr = 2 - beqz M,.L12_M1 + beqz M,.L12_M1 nop .L50: @@ -1636,7 +1636,7 @@ LD b0,0*SIZE(B) MOV t21,t11 LD b1,1*SIZE(B) - + #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) dsubu TEMP, KCO, KK #elif defined(LEFT) @@ -1644,7 +1644,7 @@ #else daddiu TEMP, KK, 2 #endif - dsra K,TEMP,2 + dsra K,TEMP,2 MOV t12,t11 beqz K,.L55 MOV t22,t11 @@ -1659,7 +1659,7 @@ LD b0,0*SIZE(B) MOV t21,t11 LD b1,1*SIZE(B) - + MOV t12,t11 beqz K,.L55 MOV t22,t11 @@ -1715,14 +1715,14 @@ .L55: # kr=2 #ifndef TRMMKERNEL - andi K,KCO,2 + andi K,KCO,2 #else andi K,TEMP,2 #endif beqz K,.L58 nop -.L56: +.L56: MADD t11,t11,a0,b0 LD a4,2*SIZE(A) MADD t21,t21,a1,b0 @@ -1752,9 +1752,9 @@ #else andi K,TEMP, 1 #endif - beqz K,.L59 + beqz K,.L59 LD ALPHA,152($sp) # Get ALPHA - + MADD t11,t11,a0,b0 MADD t21,t21,a1,b0 daddu A,A,2*SIZE # A+=2(mr)*1(kr)*8Byte=16 @@ -1767,10 +1767,10 @@ .L59: # Write Back #ifndef TRMMKERNEL LD c11,0(CO1) # write gemm part back Fetch 16 C - LD c21,1*SIZE(CO1) + LD c21,1*SIZE(CO1) LD c12,0(CO2) LD c22,1*SIZE(CO2) - + MADD t11,c11,t11,ALPHA MADD t21,c21,t21,ALPHA MADD t12,c12,t12,ALPHA @@ -1781,7 +1781,7 @@ ST t12,0(CO2) ST t22,1*SIZE(CO2) - daddu CO1,CO1,2*SIZE + daddu CO1,CO1,2*SIZE daddu CO2,CO2,2*SIZE FETCH $0,0(CO1) @@ -1827,7 +1827,7 @@ .align 3 .L12_M1: andi M,MCO,1 # mr = 1 - beqz M,.L0_N2_Loop + beqz M,.L0_N2_Loop nop .L60: @@ -1842,7 +1842,7 @@ daddu B, BO, TEMP #endif LD a0,0*SIZE(A) - + MTC $0,t11 MOV t21,t11 LD b0,0*SIZE(B) @@ -1857,16 +1857,16 @@ #else daddiu TEMP, KK, 2 #endif - dsra K,TEMP,2 + dsra K,TEMP,2 MOV t22,t11 beqz K,.L65 nop #else - dsra K,KCO,2 + dsra K,KCO,2 move B,BO # Reset B LD a0,0*SIZE(A) - + MTC $0,t11 MOV t21,t11 LD b0,0*SIZE(B) @@ -1878,18 +1878,18 @@ #endif -.L61: # nr=2,mr=1,kr=4 +.L61: # nr=2,mr=1,kr=4 LD a4, 1*SIZE(A) # a2 LD b4, 2*SIZE(B) MADD t11,t11,a0,b0 - + LD b5,3*SIZE(B) MADD t12,t12,a0,b1 LD a2, 2*SIZE(A) # a3 LD b2,4*SIZE(B) MADD t11,t11,a4,b4 - + LD b3,5*SIZE(B) MADD t12,t12,a4,b5 @@ -1897,17 +1897,17 @@ daddiu K,K,-1 LD b6,6*SIZE(B) MADD t11,t11,a2,b2 - + LD b7,7*SIZE(B) MADD t12,t12,a2,b3 daddu A,A,4*SIZE # A+=1(mr)*4(kr)*8Byte=32 LD a0, 0*SIZE(A) daddu B,B,8*SIZE # B+=2(nr)*4(kr)*8Byte=8*SIZE - - LD b0,0*SIZE(B) + + LD b0,0*SIZE(B) MADD t11,t11,a6,b6 - + LD b1,1*SIZE(B) bnez K,.L61 MADD t12,t12,a6,b7 @@ -1916,19 +1916,19 @@ .L65: # kr=2 #ifndef TRMMKERNEL - andi K,KCO,2 + andi K,KCO,2 #else andi K,TEMP,2 #endif beqz K,.L68 nop -.L66: +.L66: LD a4, 1*SIZE(A) # a1 MADD t11,t11,a0,b0 LD b4,2*SIZE(B) daddu A,A,2*SIZE # A+=1(mr)*2(kr)*8Byte=16 - + LD b5,3*SIZE(B) MADD t12,t12,a0,b1 daddu B,B,4*SIZE @@ -1937,7 +1937,7 @@ LD a0,0(A) # a0 LD b0,0*SIZE(B) MADD t11,t11,a4,b4 - + LD b1,1*SIZE(B) MADD t12,t12,a4,b5 @@ -1948,9 +1948,9 @@ #else andi K,TEMP,1 #endif - beqz K,.L69 + beqz K,.L69 LD ALPHA,152($sp) # Get ALPHA - + MADD t11,t11,a0,b0 MADD t12,t12,a0,b1 daddu A,A,1*SIZE # A+=1(mr)*1(kr)*8Byte=16 @@ -1961,14 +1961,14 @@ #ifndef TRMMKERNEL LD c11,0(CO1) # Fetch 16 C LD c12,0(CO2) - + MADD t11,c11,t11,ALPHA MADD t12,c12,t12,ALPHA ST t11,0(CO1) ST t12,0(CO2) - daddu CO1,CO1,1*SIZE + daddu CO1,CO1,1*SIZE daddu CO2,CO2,1*SIZE #else @@ -1978,7 +1978,7 @@ ST t11, 0 * SIZE(CO1) ST t12, 0 * SIZE(CO2) - daddu CO1,CO1,1*SIZE + daddu CO1,CO1,1*SIZE daddu CO2,CO2,1*SIZE #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) @@ -2008,15 +2008,15 @@ move BO, B - .align 5 + .align 5 .L0_N1: andi N,NCO,1 # nr = 1 - beqz N,.L999 + beqz N,.L999 nop - move CO1,C - dsra M,MCO,2 - + move CO1,C + dsra M,MCO,2 + move A,AO # Reset A daddu PREA,AO,SPANA #if defined(TRMMKERNEL) && defined(LEFT) @@ -2026,7 +2026,7 @@ beqz M,.L11_M2 daddu C,CO1,LDC -.L70: +.L70: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) move B, BO # Reset B @@ -2038,12 +2038,12 @@ daddu B, BO, TEMP #endif LD b0, 0*SIZE(B) - + MTC $0,t11 LD a0,0*SIZE(A) MOV t21,t11 LD a1,1*SIZE(A) - + MOV t31,t11 LD a2,2*SIZE(A) MOV t41,t11 @@ -2057,19 +2057,19 @@ #else daddiu TEMP, KK, 1 #endif - dsra K,TEMP,2 + dsra K,TEMP,2 beqz K,.L75 nop #else move B, BO # Reset B - dsra K,KCO,2 + dsra K,KCO,2 LD b0, 0*SIZE(B) - + MTC $0,t11 LD a0,0*SIZE(A) MOV t21,t11 LD a1,1*SIZE(A) - + MOV t31,t11 LD a2,2*SIZE(A) MOV t41,t11 @@ -2081,7 +2081,7 @@ .L71: # nr=1,mr=kr=4 LD b4, 1*SIZE(B) # b1 MADD t11,t11,a0,b0 - + LD a4, 4*SIZE(A) MADD t21,t21,a1,b0 @@ -2097,7 +2097,7 @@ .L72: LD b2, 2*SIZE(B) # b2 MADD t11,t11,a4,b4 - + LD a0,8*SIZE(A) MADD t21,t21,a5,b4 @@ -2106,17 +2106,17 @@ LD a2,10*SIZE(A) MADD t31,t31,a6,b4 - + LD a3,11*SIZE(A) MADD t41,t41,a7,b4 .L73: LD b6, 3*SIZE(B) MADD t11,t11,a0,b2 - + LD a4,12*SIZE(A) daddu B,B,4*SIZE # B+=1(nr)*4(kr)*8Byte=32 - + LD a5,13*SIZE(A) MADD t21,t21,a1,b2 @@ -2131,7 +2131,7 @@ .L74: LD b0, 0*SIZE(B) MADD t11,t11,a4,b6 - + LD a0,0*SIZE(A) daddu PREA,PREA,16*SIZE @@ -2150,20 +2150,20 @@ .L75: # kr=2 #ifndef TRMMKERNEL - andi K,KCO,2 + andi K,KCO,2 #else andi K,TEMP,2 #endif beqz K,.L78 nop -.L76: +.L76: LD b4, 1*SIZE(B) MADD t11,t11,a0,b0 - + LD a4,4*SIZE(A) daddu B,B,2*SIZE # B+=1(nr)*2(kr)*8Byte=32 - + LD a5,5*SIZE(A) MADD t21,t21,a1,b0 FETCH $0,0(PREA) @@ -2193,16 +2193,16 @@ daddu PREA,PREA,8*SIZE - + .L78: # kr=1 #ifndef TRMMKERNEL andi K,KCO,1 #else andi K,TEMP,1 #endif - beqz K,.L79 + beqz K,.L79 LD ALPHA,152($sp) # Get ALPHA - + FETCH $0,0(PREA) MADD t11,t11,a0,b0 MADD t21,t21,a1,b0 @@ -2217,7 +2217,7 @@ .L79: # Write Back #ifndef TRMMKERNEL LD c11,0(CO1) # Fetch 16 C - LD c21,1*SIZE(CO1) + LD c21,1*SIZE(CO1) LD c31,2*SIZE(CO1) LD c41,3*SIZE(CO1) @@ -2252,7 +2252,7 @@ FETCH $0,4*SIZE(CO1) FETCH $0,8*SIZE(CO1) - daddu CO1,CO1,4*SIZE + daddu CO1,CO1,4*SIZE #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) dsubu TEMP, KCO, KK #ifdef LEFT @@ -2271,7 +2271,7 @@ #ifdef LEFT daddiu KK, KK, 4 #endif - bnez M,.L70 + bnez M,.L70 nop #endif @@ -2279,10 +2279,10 @@ .align 3 .L11_M2: andi M,MCO,2 # mr = 2 - beqz M,.L11_M1 + beqz M,.L11_M1 nop -.L80: +.L80: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) move B, BO @@ -2312,13 +2312,13 @@ nop #else move B, BO - dsra K,KCO,2 + dsra K,KCO,2 LD b0, 0*SIZE(B) MTC $0,t11 MOV t21,t11 LD a0,0*SIZE(A) - + beqz K,.L85 LD a1,1*SIZE(A) @@ -2336,7 +2336,7 @@ MADD t11,t11,a4,b4 LD a3,5*SIZE(A) MADD t21,t21,a5,b4 - + LD b6, 3*SIZE(B) LD a6,6*SIZE(A) MADD t11,t11,a2,b2 @@ -2358,23 +2358,23 @@ .L85: # kr=2 #ifndef TRMMKERNEL - andi K,KCO,2 + andi K,KCO,2 #else andi K,TEMP,2 #endif beqz K,.L88 nop -.L86: +.L86: LD b4, 1*SIZE(B) LD a4,2*SIZE(A) MADD t11,t11,a0,b0 LD a5,3*SIZE(A) MADD t21,t21,a1,b0 - + daddu A,A,4*SIZE # A+=2(mr)*2(kr)*8Byte=32 daddu B,B,2*SIZE # B+=1(nr)*2(kr)*8Byte=16 - + LD b0,0(B) LD a0,0*SIZE(A) MADD t11,t11,a4,b4 @@ -2382,16 +2382,16 @@ MADD t21,t21,a5,b4 - + .L88: # kr=1 #ifndef TRMMKERNEL andi K,KCO,1 #else andi K,TEMP,1 #endif - beqz K,.L89 + beqz K,.L89 LD ALPHA,152($sp) # Get ALPHA - + MADD t11,t11,a0,b0 MADD t21,t21,a1,b0 daddu A,A,2*SIZE # A+=2(mr)*1(kr)*8Byte=16 @@ -2401,7 +2401,7 @@ .L89: # Write Back #ifndef TRMMKERNEL LD c11,0(CO1) # Fetch 16 C - LD c21,1*SIZE(CO1) + LD c21,1*SIZE(CO1) MADD t11,c11,t11,ALPHA MADD t21,c21,t21,ALPHA @@ -2410,7 +2410,7 @@ ST t21,1*SIZE(CO1) FETCH $0,2*SIZE(CO1) - + daddu CO1,CO1,2*SIZE # COx += 2*8Byte #else @@ -2445,10 +2445,10 @@ .align 3 .L11_M1: andi M,MCO,1 # mr = 1 - beqz M,.L999 + beqz M,.L999 nop -.L90: +.L90: #if defined(TRMMKERNEL) #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) move B, BO @@ -2478,7 +2478,7 @@ move B, BO LD a0, 0*SIZE(A) LD b0, 0*SIZE(B) - dsra K,KCO,2 + dsra K,KCO,2 beqz K,.L95 MTC $0,t11 #endif @@ -2487,7 +2487,7 @@ LD a4, 1*SIZE(A) LD b4, 1*SIZE(B) MADD t11,t11,a0,b0 - + LD a2, 2*SIZE(A) LD b2, 2*SIZE(B) MADD t11,t11,a4,b4 @@ -2495,28 +2495,28 @@ LD a6, 3*SIZE(A) LD b6, 3*SIZE(B) MADD t11,t11,a2,b2 - + daddu A,A,4*SIZE # A+=1(mr)*4(kr)*8Byte=32 daddu B,B,4*SIZE # B+=1(nr)*4(kr)*8Byte=32 LD a0, 0*SIZE(A) LD b0, 0*SIZE(B) MADD t11,t11,a6,b6 - + daddiu K,K,-1 bnez K,.L91 nop .L95: # kr=2 #ifndef TRMMKERNEL - andi K,KCO,2 + andi K,KCO,2 #else andi K,TEMP,2 #endif beqz K,.L98 nop -.L96: +.L96: LD a4, 1*SIZE(A) LD b4, 1*SIZE(B) MADD t11,t11,a0,b0 @@ -2526,14 +2526,14 @@ LD b0,0(B) LD a0,0(A) MADD t11,t11,a4,b4 - + .L98: # kr=1 #ifndef TRMMKERNEL andi K,KCO,1 #else andi K,TEMP,1 #endif - beqz K,.L99 + beqz K,.L99 LD ALPHA,152($sp) # Get ALPHA MADD t11,t11,a0,b0 |