summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2007-08-31 07:23:31 +0000
committerH. Peter Anvin <hpa@zytor.com>2007-08-31 07:23:31 +0000
commit91a86cdb31f8f68f6cebd42ac6dae202adc3f8fe (patch)
treecc8137c24bb00dca67c1ec0978ec52e5340e1086
parent535af831f15a2a89e726d0c9a38103005b3bce27 (diff)
downloadnasm-91a86cdb31f8f68f6cebd42ac6dae202adc3f8fe.tar.gz
nasm-91a86cdb31f8f68f6cebd42ac6dae202adc3f8fe.tar.bz2
nasm-91a86cdb31f8f68f6cebd42ac6dae202adc3f8fe.zip
tokhash: Speed up the rejection of unhashed values
Speed up the rejection of unhashed values (typically identifiers) by filling unused hash slots with a large value (but not so large that it is likely to overflow.) This means those values will be rejected already by the range check, not needing strcmp().
-rw-r--r--perllib/phash.ph12
-rwxr-xr-xtokhash.pl11
2 files changed, 18 insertions, 5 deletions
diff --git a/perllib/phash.ph b/perllib/phash.ph
index 017155d..e17874a 100644
--- a/perllib/phash.ph
+++ b/perllib/phash.ph
@@ -156,10 +156,16 @@ sub gen_hash_n($$$) {
# for the edge (which is our hash index.) Since the graph is
# acyclic, this is always doable.
for ($i = 0; $i < $gsize; $i++) {
- if (!$gr->has_vertex_attribute($i, "val")) {
- walk_graph($gr,$i,0); # First vertex in a cluster
+ if ($gr->degree($i)) {
+ # This vertex has neighbors (is used)
+ if (!$gr->has_vertex_attribute($i, "val")) {
+ walk_graph($gr,$i,0); # First vertex in a cluster
+ }
+ push(@g, $gr->get_vertex_attribute($i, "val"));
+ } else {
+ # Unused vertex
+ push(@g, undef);
}
- push(@g, $gr->get_vertex_attribute($i, "val"));
}
# for ($i = 0; $i < $n; $i++) {
diff --git a/tokhash.pl b/tokhash.pl
index 4730a8b..9afe61d 100755
--- a/tokhash.pl
+++ b/tokhash.pl
@@ -148,17 +148,24 @@ print "\n";
print "int nasm_token_hash(const char *token, struct tokenval *tv)\n";
print "{\n";
+# Put a large value in unused slots. This makes it extremely unlikely
+# that any combination that involves unused slot will pass the range test.
+# This speeds up rejection of unrecognized tokens, i.e. identifiers.
+$unused = 16383;
+
print "\tstatic const int16_t hash1[$n] =\n";
print "\t{\n";
for ($i = 0; $i < $n; $i++) {
- print "\t\t", ${$g}[${$f1}[$i]], ",\n";
+ my $h = ${$g}[${$f1}[$i]];
+ printf "\t\t%d,\n", defined($h) ? $h : $unused;
}
print "\t};\n\n";
print "\tstatic const int16_t hash2[$n] =\n";
print "\t{\n";
for ($i = 0; $i < $n; $i++) {
- print "\t\t", ${$g}[${$f2}[$i]], ",\n";
+ my $h = ${$g}[${$f2}[$i]];
+ printf "\t\t%d,\n", defined($h) ? $h : $unused;
}
print "\t};\n\n";