summaryrefslogtreecommitdiff
path: root/roms/qemu-palcode/strlen.S
diff options
context:
space:
mode:
Diffstat (limited to 'roms/qemu-palcode/strlen.S')
-rw-r--r--roms/qemu-palcode/strlen.S59
1 files changed, 59 insertions, 0 deletions
diff --git a/roms/qemu-palcode/strlen.S b/roms/qemu-palcode/strlen.S
new file mode 100644
index 000000000..5a77d7293
--- /dev/null
+++ b/roms/qemu-palcode/strlen.S
@@ -0,0 +1,59 @@
+/*
+ * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu)
+ *
+ * Finds length of a 0-terminated string. Optimized for the
+ * Alpha architecture:
+ *
+ * - memory accessed as aligned quadwords only
+ * - uses bcmpge to compare 8 bytes in parallel
+ * - does binary search to find 0 byte in last
+ * quadword (HAKMEM needed 12 instructions to
+ * do this instead of the 9 instructions that
+ * binary search needs).
+ */
+
+ .set noreorder
+ .set noat
+
+ .align 3
+
+ .globl strlen
+ .ent strlen
+strlen:
+ .frame $sp, 0, $26, 0
+ .prologue 0
+
+ ldq_u $1, 0($16) # load first quadword ($16 may be misaligned)
+ lda $2, -1($31)
+ insqh $2, $16, $2
+ andnot $16, 7, $0
+ or $2, $1, $1
+ cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0
+ bne $2, found
+
+loop: ldq $1, 8($0)
+ addq $0, 8, $0 # addr += 8
+ nop # helps dual issue last two insns
+ cmpbge $31, $1, $2
+ beq $2, loop
+
+found: blbs $2, done # make aligned case fast
+ negq $2, $3
+ and $2, $3, $2
+
+ and $2, 0x0f, $1
+ addq $0, 4, $3
+ cmoveq $1, $3, $0
+
+ and $2, 0x33, $1
+ addq $0, 2, $3
+ cmoveq $1, $3, $0
+
+ and $2, 0x55, $1
+ addq $0, 1, $3
+ cmoveq $1, $3, $0
+
+done: subq $0, $16, $0
+ ret $31, ($26)
+
+ .end strlen