diff options
Diffstat (limited to 'test/join.tcl')
-rw-r--r-- | test/join.tcl | 454 |
1 files changed, 454 insertions, 0 deletions
diff --git a/test/join.tcl b/test/join.tcl new file mode 100644 index 0000000..928bbf7 --- /dev/null +++ b/test/join.tcl @@ -0,0 +1,454 @@ +# See the file LICENSE for redistribution information. +# +# Copyright (c) 1996-2009 Oracle. All rights reserved. +# +# $Id$ +# +# TEST jointest +# TEST Test duplicate assisted joins. Executes 1, 2, 3 and 4-way joins +# TEST with differing index orders and selectivity. +# TEST +# TEST We'll test 2-way, 3-way, and 4-way joins and figure that if those +# TEST work, everything else does as well. We'll create test databases +# TEST called join1.db, join2.db, join3.db, and join4.db. The number on +# TEST the database describes the duplication -- duplicates are of the +# TEST form 0, N, 2N, 3N, ... where N is the number of the database. +# TEST Primary.db is the primary database, and null.db is the database +# TEST that has no matching duplicates. +# TEST +# TEST We should test this on all btrees, all hash, and a combination thereof +proc jointest { {psize 8192} {with_dup_dups 0} {flags 0} } { + global testdir + global rand_init + source ./include.tcl + + env_cleanup $testdir + berkdb srand $rand_init + + # Use one environment for all database opens so we don't + # need oodles of regions. + set env [berkdb_env -create -home $testdir] + error_check_good env_open [is_valid_env $env] TRUE + + # With the new offpage duplicate code, we don't support + # duplicate duplicates in sorted dup sets. Thus, if with_dup_dups + # is greater than one, run only with "-dup". + if { $with_dup_dups > 1 } { + set doptarray {"-dup"} + } else { + set doptarray {"-dup -dupsort" "-dup" RANDOMMIX RANDOMMIX } + } + + # NB: these flags are internal only, ok + foreach m "DB_BTREE DB_HASH DB_BOTH" { + # run with two different random mixes. + foreach dopt $doptarray { + set opt [list "-env" $env $dopt] + + puts "Join test: ($m $dopt) psize $psize,\ + $with_dup_dups dup\ + dups, flags $flags." + + build_all $m $psize $opt oa $with_dup_dups + + # null.db is db_built fifth but is referenced by + # zero; set up the option array appropriately. + set oa(0) $oa(5) + + # Build the primary + puts "\tBuilding the primary database $m" + set oflags "-create -truncate -mode 0644 -env $env\ + [conv $m [berkdb random_int 1 2]]" + set db [eval {berkdb_open} $oflags primary.db] + error_check_good dbopen [is_valid_db $db] TRUE + for { set i 0 } { $i < 1000 } { incr i } { + set key [format "%04d" $i] + set ret [$db put $key stub] + error_check_good "primary put" $ret 0 + } + error_check_good "primary close" [$db close] 0 + set did [open $dict] + gets $did str + do_join primary.db "1 0" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "2 0" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "3 0" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "4 0" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "1" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "2" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "3" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "4" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "1 2" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "1 2 3" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "1 2 3 4" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "2 1" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "3 2 1" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "4 3 2 1" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "1 3" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "3 1" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "1 4" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "4 1" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "2 3" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "3 2" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "2 4" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "4 2" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "3 4" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "4 3" $str oa $flags $with_dup_dups + gets $did str + do_join primary.db "2 3 4" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "3 4 1" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "4 2 1" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "0 2 1" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "3 2 0" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "4 3 2 1" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "4 3 0 1" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "3 3 3" $str oa $flags\ + $with_dup_dups + gets $did str + do_join primary.db "2 2 3 3" $str oa $flags\ + $with_dup_dups + gets $did str2 + gets $did str + do_join primary.db "1 2" $str oa $flags\ + $with_dup_dups "3" $str2 + + # You really don't want to run this section + # with $with_dup_dups > 2. + if { $with_dup_dups <= 2 } { + gets $did str2 + gets $did str + do_join primary.db "1 2 3" $str\ + oa $flags $with_dup_dups "3 3 1" $str2 + gets $did str2 + gets $did str + do_join primary.db "4 0 2" $str\ + oa $flags $with_dup_dups "4 3 3" $str2 + gets $did str2 + gets $did str + do_join primary.db "3 2 1" $str\ + oa $flags $with_dup_dups "0 2" $str2 + gets $did str2 + gets $did str + do_join primary.db "2 2 3 3" $str\ + oa $flags $with_dup_dups "1 4 4" $str2 + gets $did str2 + gets $did str + do_join primary.db "2 2 3 3" $str\ + oa $flags $with_dup_dups "0 0 4 4" $str2 + gets $did str2 + gets $did str + do_join primary.db "2 2 3 3" $str2\ + oa $flags $with_dup_dups "2 4 4" $str + gets $did str2 + gets $did str + do_join primary.db "2 2 3 3" $str2\ + oa $flags $with_dup_dups "0 0 4 4" $str + } + close $did + } + } + + error_check_good env_close [$env close] 0 +} + +proc build_all { method psize opt oaname with_dup_dups {nentries 100} } { + global testdir + db_build join1.db $nentries 50 1 [conv $method 1]\ + $psize $opt $oaname $with_dup_dups + db_build join2.db $nentries 25 2 [conv $method 2]\ + $psize $opt $oaname $with_dup_dups + db_build join3.db $nentries 16 3 [conv $method 3]\ + $psize $opt $oaname $with_dup_dups + db_build join4.db $nentries 12 4 [conv $method 4]\ + $psize $opt $oaname $with_dup_dups + db_build null.db $nentries 0 5 [conv $method 5]\ + $psize $opt $oaname $with_dup_dups +} + +proc conv { m i } { + switch -- $m { + DB_HASH { return "-hash"} + "-hash" { return "-hash"} + DB_BTREE { return "-btree"} + "-btree" { return "-btree"} + DB_BOTH { + if { [expr $i % 2] == 0 } { + return "-hash"; + } else { + return "-btree"; + } + } + } +} + +proc random_opts { } { + set j [berkdb random_int 0 1] + if { $j == 0 } { + return " -dup" + } else { + return " -dup -dupsort" + } +} + +proc db_build { name nkeys ndups dup_interval method psize lopt oaname \ + with_dup_dups } { + source ./include.tcl + + # Get array of arg names (from two levels up the call stack) + upvar 2 $oaname oa + + # Search for "RANDOMMIX" in $opt, and if present, replace + # with " -dup" or " -dup -dupsort" at random. + set i [lsearch $lopt RANDOMMIX] + if { $i != -1 } { + set lopt [lreplace $lopt $i $i [random_opts]] + } + + # Save off db_open arguments for this database. + set opt [eval concat $lopt] + set oa($dup_interval) $opt + + # Create the database and open the dictionary + set oflags "-create -truncate -mode 0644 $method\ + -pagesize $psize" + set db [eval {berkdb_open} $oflags $opt $name] + error_check_good dbopen [is_valid_db $db] TRUE + set did [open $dict] + set count 0 + puts -nonewline "\tBuilding $name: $nkeys keys " + puts -nonewline "with $ndups duplicates at interval of $dup_interval" + if { $with_dup_dups > 0 } { + puts "" + puts "\t\tand $with_dup_dups duplicate duplicates." + } else { + puts "." + } + for { set count 0 } { [gets $did str] != -1 && $count < $nkeys } { + incr count} { + set str $str$name + # We need to make sure that the dups are inserted in a + # random, or near random, order. Do this by generating + # them and putting each in a list, then sorting the list + # at random. + set duplist {} + for { set i 0 } { $i < $ndups } { incr i } { + set data [format "%04d" [expr $i * $dup_interval]] + lappend duplist $data + } + # randomize the list + for { set i 0 } { $i < $ndups } {incr i } { + # set j [berkdb random_int $i [expr $ndups - 1]] + set j [expr ($i % 2) + $i] + if { $j >= $ndups } { set j $i } + set dupi [lindex $duplist $i] + set dupj [lindex $duplist $j] + set duplist [lreplace $duplist $i $i $dupj] + set duplist [lreplace $duplist $j $j $dupi] + } + foreach data $duplist { + if { $with_dup_dups != 0 } { + for { set j 0 }\ + { $j < $with_dup_dups }\ + {incr j} { + set ret [$db put $str $data] + error_check_good put$j $ret 0 + } + } else { + set ret [$db put $str $data] + error_check_good put $ret 0 + } + } + + if { $ndups == 0 } { + set ret [$db put $str NODUP] + error_check_good put $ret 0 + } + } + close $did + error_check_good close:$name [$db close] 0 +} + +proc do_join { primary dbs key oanm flags with_dup_dups {dbs2 ""} {key2 ""} } { + global testdir + source ./include.tcl + + upvar $oanm oa + + puts -nonewline "\tJoining: $dbs on $key" + if { $dbs2 == "" } { + puts "" + } else { + puts " with $dbs2 on $key2" + } + + # Open all the databases + set p [berkdb_open -unknown $testdir/$primary] + error_check_good "primary open" [is_valid_db $p] TRUE + + set dblist "" + set curslist "" + + set ndx [llength $dbs] + + foreach i [concat $dbs $dbs2] { + set opt $oa($i) + set db [eval {berkdb_open -unknown} $opt [n_to_name $i]] + error_check_good "[n_to_name $i] open" [is_valid_db $db] TRUE + set curs [$db cursor] + error_check_good "$db cursor" \ + [is_substr $curs "$db.c"] 1 + lappend dblist $db + lappend curslist $curs + + if { $ndx > 0 } { + set realkey [concat $key[n_to_name $i]] + } else { + set realkey [concat $key2[n_to_name $i]] + } + + set pair [$curs get -set $realkey] + error_check_good cursor_set:$realkey:$pair \ + [llength [lindex $pair 0]] 2 + + incr ndx -1 + } + + set join_curs [eval {$p join} $curslist] + error_check_good join_cursor \ + [is_substr $join_curs "$p.c"] 1 + + # Calculate how many dups we expect. + # We go through the list of indices. If we find a 0, then we + # expect 0 dups. For everything else, we look at pairs of numbers, + # if the are relatively prime, multiply them and figure out how + # many times that goes into 50. If they aren't relatively prime, + # take the number of times the larger goes into 50. + set expected 50 + set last 1 + foreach n [concat $dbs $dbs2] { + if { $n == 0 } { + set expected 0 + break + } + if { $last == $n } { + continue + } + + if { [expr $last % $n] == 0 || [expr $n % $last] == 0 } { + if { $n > $last } { + set last $n + set expected [expr 50 / $last] + } + } else { + set last [expr $n * $last / [gcd $n $last]] + set expected [expr 50 / $last] + } + } + + # If $with_dup_dups is greater than zero, each datum has + # been inserted $with_dup_dups times. So we expect the number + # of dups to go up by a factor of ($with_dup_dups)^(number of databases) + + if { $with_dup_dups > 0 } { + foreach n [concat $dbs $dbs2] { + set expected [expr $expected * $with_dup_dups] + } + } + + set ndups 0 + if { $flags == " -join_item"} { + set l 1 + } else { + set flags "" + set l 2 + } + for { set pair [eval {$join_curs get} $flags] } { \ + [llength [lindex $pair 0]] == $l } { + set pair [eval {$join_curs get} $flags] } { + set k [lindex [lindex $pair 0] 0] + foreach i $dbs { + error_check_bad valid_dup:$i:$dbs $i 0 + set kval [string trimleft $k 0] + if { [string length $kval] == 0 } { + set kval 0 + } + error_check_good valid_dup:$i:$dbs [expr $kval % $i] 0 + } + incr ndups + } + error_check_good number_of_dups:$dbs $ndups $expected + + error_check_good close_primary [$p close] 0 + foreach i $curslist { + error_check_good close_cursor:$i [$i close] 0 + } + foreach i $dblist { + error_check_good close_index:$i [$i close] 0 + } +} + +proc n_to_name { n } { +global testdir + if { $n == 0 } { + return null.db; + } else { + return join$n.db; + } +} + +proc gcd { a b } { + set g 1 + + for { set i 2 } { $i <= $a } { incr i } { + if { [expr $a % $i] == 0 && [expr $b % $i] == 0 } { + set g $i + } + } + return $g +} |