summaryrefslogtreecommitdiff
path: root/test/rep039.tcl
diff options
context:
space:
mode:
Diffstat (limited to 'test/rep039.tcl')
-rw-r--r--test/rep039.tcl472
1 files changed, 472 insertions, 0 deletions
diff --git a/test/rep039.tcl b/test/rep039.tcl
new file mode 100644
index 00000000..552e2d3d
--- /dev/null
+++ b/test/rep039.tcl
@@ -0,0 +1,472 @@
+# See the file LICENSE for redistribution information.
+#
+# Copyright (c) 2004-2009 Oracle. All rights reserved.
+#
+# $Id$
+#
+# TEST rep039
+# TEST Test of interrupted internal initialization. The
+# TEST interruption is due to a changed master, or the client crashing,
+# TEST or both.
+# TEST
+# TEST One master, two clients.
+# TEST Generate several log files. Remove old master log files.
+# TEST Restart client, optionally having "cleaned" client env dir. Either
+# TEST way, this has the effect of forcing an internal init.
+# TEST Interrupt the internal init.
+# TEST Vary the number of times we process messages to make sure
+# TEST the interruption occurs at varying stages of the first internal
+# TEST initialization.
+# TEST
+# TEST Run for btree and queue only because of the number of permutations.
+# TEST
+proc rep039 { method { niter 200 } { tnum "039" } args } {
+
+ source ./include.tcl
+ global databases_in_memory
+ global repfiles_in_memory
+
+ # Run for btree and queue methods only.
+ if { $checking_valid_methods } {
+ set test_methods {}
+ foreach method $valid_methods {
+ if { [is_btree $method] == 1 || \
+ [is_queue $method] == 1 } {
+ lappend test_methods $method
+ }
+ }
+ return $test_methods
+ }
+ if { [is_btree $method] == 0 && [is_queue $method] == 0 } {
+ puts "Rep$tnum: skipping for non-btree, non-queue method."
+ return
+ }
+
+ # Skip for mixed-mode logging -- this test has a very large
+ # set of iterations already.
+ global mixed_mode_logging
+ if { $mixed_mode_logging > 0 } {
+ puts "Rep$tnum: Skipping for mixed mode logging."
+ return
+ }
+
+ # This test needs to set its own pagesize.
+ set pgindex [lsearch -exact $args "-pagesize"]
+ if { $pgindex != -1 } {
+ puts "Rep$tnum: skipping for specific pagesizes"
+ return
+ }
+
+ set args [convert_args $method $args]
+
+ # Set up for on-disk or in-memory databases.
+ set msg "using on-disk databases"
+ if { $databases_in_memory } {
+ set msg "using named in-memory databases"
+ if { [is_queueext $method] } {
+ puts -nonewline "Skipping rep$tnum for method "
+ puts "$method with named in-memory databases."
+ return
+ }
+ }
+
+ set msg2 "and on-disk replication files"
+ if { $repfiles_in_memory } {
+ set msg2 "and in-memory replication files"
+ }
+
+ # Run the body of the test with and without recovery,
+ # and with and without cleaning.
+ set cleanopts { noclean clean }
+ set archopts { archive noarchive }
+ set nummsgs 4
+ set announce {puts "Rep$tnum ($method $r $clean $a $crash $l $args):\
+ Test of internal init. $i message iters. \
+ Test $cnt of $maxtest tests $with recovery $msg $msg2."}
+ foreach r $test_recopts {
+ if { $r == "-recover" && ! $is_windows_test && ! $is_hp_test } {
+ set crashopts { master_change client_crash both }
+ } else {
+ set crashopts { master_change }
+ }
+ # Only one of the three sites in the replication group needs to
+ # be tested with in-memory logs: the "client under test".
+ #
+ if { $r == "-recover" } {
+ set cl_logopts { on-disk }
+ set with "with"
+ } else {
+ set cl_logopts { on-disk in-memory }
+ set with "without"
+ }
+ set maxtest [expr [llength $crashopts] * \
+ [llength $cleanopts] * \
+ [llength $archopts] * \
+ [llength $cl_logopts] * \
+ [expr $nummsgs]]
+ set cnt 1
+ foreach crash $crashopts {
+ foreach clean $cleanopts {
+ foreach a $archopts {
+ foreach l $cl_logopts {
+ for { set i 1 } \
+ { $i <= $nummsgs } \
+ { incr i } {
+ eval $announce
+ rep039_sub $method \
+ $niter $tnum $r \
+ $clean $a $crash \
+ $l $i $args
+ incr cnt
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+proc rep039_sub \
+ { method niter tnum recargs clean archive crash cl_logopt pmsgs largs } {
+ global testdir
+ global util_path
+ global databases_in_memory
+ global repfiles_in_memory
+ global rep_verbose
+ global verbose_type
+
+ set verbargs ""
+ if { $rep_verbose == 1 } {
+ set verbargs " -verbose {$verbose_type on} "
+ }
+
+ set repmemargs ""
+ if { $repfiles_in_memory } {
+ set repmemargs "-rep_inmem_files "
+ }
+
+ set master_change false
+ set client_crash false
+ if { $crash == "master_change" } {
+ set master_change true
+ } elseif { $crash == "client_crash" } {
+ set client_crash true
+ } elseif { $crash == "both" } {
+ set master_change true
+ set client_crash true
+ } else {
+ error "FAIL:[timestamp] '$crash' is an unrecognized crash type"
+ }
+
+ env_cleanup $testdir
+
+ replsetup $testdir/MSGQUEUEDIR
+
+ # This test has three replication sites: a master, a client whose
+ # behavior is under test, and another client. We'll call them
+ # "A", "B" and "C". At one point during the test, we may (depending on
+ # the setting of $master_change) switch roles between the master and the
+ # other client.
+ #
+ # The initial site/role assignments are as follows:
+ #
+ # A = master
+ # B = client under test
+ # C = other client
+ #
+ # In the case where we do switch roles, the roles become:
+ #
+ # A = other client
+ # B = client under test (no change here)
+ # C = master
+ #
+ # Although the real names are A, B, and C, we'll use mnemonic names
+ # whenever possible. In particular, this means that we'll have to
+ # re-jigger the mnemonic names after the role switch.
+
+ file mkdir [set dirs(A) $testdir/SITE_A]
+ file mkdir [set dirs(B) $testdir/SITE_B]
+ file mkdir [set dirs(C) $testdir/SITE_C]
+
+ # Log size is small so we quickly create more than one.
+ # The documentation says that the log file must be at least
+ # four times the size of the in-memory log buffer.
+ set pagesize 4096
+ append largs " -pagesize $pagesize "
+ set log_buf [expr $pagesize * 2]
+ set log_max [expr $log_buf * 4]
+
+ # Set up the three sites: A, B, and C will correspond to EID's
+ # 1, 2, and 3 in the obvious way. As we start out, site A is always the
+ # master.
+ #
+ repladd 1
+ set env_A_cmd "berkdb_env_noerr -create -txn nosync \
+ $verbargs $repmemargs \
+ -log_buffer $log_buf -log_max $log_max -errpfx SITE_A \
+ -home $dirs(A) -rep_transport \[list 1 replsend\]"
+ set envs(A) [eval $env_A_cmd $recargs -rep_master]
+
+ # Open a client
+ repladd 2
+ set txn_arg [adjust_txnargs $cl_logopt]
+ set log_arg [adjust_logargs $cl_logopt]
+ if { $cl_logopt == "on-disk" } {
+ # Override in this case, because we want to specify log_buffer.
+ set log_arg "-log_buffer $log_buf"
+ }
+ set env_B_cmd "berkdb_env_noerr -create $txn_arg \
+ $verbargs $repmemargs \
+ $log_arg -log_max $log_max -errpfx SITE_B \
+ -home $dirs(B) -rep_transport \[list 2 replsend\]"
+ set envs(B) [eval $env_B_cmd $recargs -rep_client]
+
+ # Open 2nd client
+ repladd 3
+ set env_C_cmd "berkdb_env_noerr -create -txn nosync \
+ $verbargs $repmemargs \
+ -log_buffer $log_buf -log_max $log_max -errpfx SITE_C \
+ -home $dirs(C) -rep_transport \[list 3 replsend\]"
+ set envs(C) [eval $env_C_cmd $recargs -rep_client]
+
+ # Turn off throttling for this test.
+ foreach site [array names envs] {
+ $envs($site) rep_limit 0 0
+ }
+
+ # Bring the clients online by processing the startup messages.
+ set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}"
+ process_msgs $envlist
+
+ # Set up the (indirect) mnemonic role names for the first part of the
+ # test.
+ set master A
+ set test_client B
+ set other C
+
+ # Clobber replication's 30-second anti-archive timer, which will have
+ # been started by client sync-up internal init, so that we can do a
+ # log_archive in a moment.
+ #
+ $envs($master) test force noarchive_timeout
+
+ # Run rep_test in the master (and update client).
+ puts "\tRep$tnum.a: Running rep_test in replicated env."
+ eval rep_test $method $envs($master) NULL $niter 0 0 0 $largs
+ process_msgs $envlist
+
+ puts "\tRep$tnum.b: Close client."
+ error_check_good client_close [$envs($test_client) close] 0
+
+ set res [eval exec $util_path/db_archive -l -h $dirs($test_client)]
+ set last_client_log [lindex [lsort $res] end]
+
+ set stop 0
+ while { $stop == 0 } {
+ # Run rep_test in the master (don't update client).
+ puts "\tRep$tnum.c: Running rep_test in replicated env."
+ eval rep_test $method $envs($master) NULL $niter 0 0 0 $largs
+ #
+ # Clear messages for first client. We want that site
+ # to get far behind.
+ #
+ replclear 2
+ puts "\tRep$tnum.d: Run db_archive on master."
+ set res [eval exec $util_path/db_archive -d -h $dirs($master)]
+ set res [eval exec $util_path/db_archive -l -h $dirs($master)]
+ if { [lsearch -exact $res $last_client_log] == -1 } {
+ set stop 1
+ }
+ }
+
+ set envlist "{$envs($master) 1} {$envs($other) 3}"
+ process_msgs $envlist
+
+ if { $archive == "archive" } {
+ puts "\tRep$tnum.d: Run db_archive on other client."
+ set res [eval exec $util_path/db_archive -l -h $dirs($other)]
+ error_check_bad \
+ log.1.present [lsearch -exact $res log.0000000001] -1
+ set res [eval exec $util_path/db_archive -d -h $dirs($other)]
+ set res [eval exec $util_path/db_archive -l -h $dirs($other)]
+ error_check_good \
+ log.1.gone [lsearch -exact $res log.0000000001] -1
+ } else {
+ puts "\tRep$tnum.d: Skipping db_archive on other client."
+ }
+
+ puts "\tRep$tnum.e: Reopen test client ($clean)."
+ if { $clean == "clean" } {
+ env_cleanup $dirs($test_client)
+ }
+
+ # (The test client is always site B, EID 2.)
+ #
+ set envs(B) [eval $env_B_cmd $recargs -rep_client]
+ error_check_good client_env [is_valid_env $envs(B)] TRUE
+ $envs(B) rep_limit 0 0
+
+ # Hold an open database handle while doing internal init, to make sure
+ # no back lock interactions are happening. But only do so some of the
+ # time, and of course only if it's reasonable to expect the database to
+ # exist at this point. (It won't, if we're using in-memory databases
+ # and we've just started the client with recovery, since recovery blows
+ # away the mpool.) Set up database as in-memory or on-disk first.
+ #
+ if { $databases_in_memory } {
+ set dbname { "" "test.db" }
+ set have_db [expr {$recargs != "-recover"}]
+ } else {
+ set dbname "test.db"
+ set have_db true
+ }
+
+ if {$clean == "noclean" && $have_db && [berkdb random_int 0 1] == 1} {
+ puts "\tRep$tnum.g: Hold open db handle from client app."
+ set cdb [eval\
+ {berkdb_open_noerr -env} $envs($test_client) $dbname]
+ error_check_good dbopen [is_valid_db $cdb] TRUE
+ set ccur [$cdb cursor]
+ error_check_good curs [is_valid_cursor $ccur $cdb] TRUE
+ set ret [$ccur get -first]
+ set kd [lindex $ret 0]
+ set key [lindex $kd 0]
+ error_check_good cclose [$ccur close] 0
+ } else {
+ puts "\tRep$tnum.g: (No client app handle will be held.)"
+ set cdb "NONE"
+ }
+
+ set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}"
+ proc_msgs_once $envlist
+
+ #
+ # We want to simulate a master continually getting new
+ # records while an update is going on.
+ #
+ set entries 10
+ eval rep_test $method $envs($master) NULL $entries $niter 0 0 $largs
+ #
+ # We call proc_msgs_once N times to get us into page recovery:
+ # 1. Send master messages and client finds master.
+ # 2. Master replies and client does verify.
+ # 3. Master gives verify_fail and client does update_req.
+ # 4. Master send update info and client does page_req.
+ #
+ # We vary the number of times we call proc_msgs_once (via pmsgs)
+ # so that we test switching master at each point in the
+ # internal initialization processing.
+ #
+ set nproced 0
+ puts "\tRep$tnum.f: Get partially through initialization ($pmsgs iters)"
+ for { set i 1 } { $i < $pmsgs } { incr i } {
+ incr nproced [proc_msgs_once $envlist]
+ }
+
+ if { [string is true $master_change] } {
+ replclear 1
+ replclear 3
+ puts "\tRep$tnum.g: Downgrade/upgrade master."
+
+ # Downgrade the existing master to a client, switch around the
+ # roles, and then upgrade the newly appointed master.
+ error_check_good downgrade [$envs($master) rep_start -client] 0
+
+ set master C
+ set other A
+
+ error_check_good upgrade [$envs($master) rep_start -master] 0
+ }
+
+ # Simulate a client crash: simply abandon the handle without closing it.
+ # Note that this doesn't work on Windows, because there you can't remove
+ # a file if anyone (including yourself) has it open. This also does not
+ # work on HP-UX, because there you are not allowed to open a second
+ # handle on an env.
+ #
+ # Note that crashing only makes sense with "-recover".
+ #
+ if { [string is true $client_crash] } {
+ error_check_good assert [string compare $recargs "-recover"] 0
+
+ set abandoned_env $envs($test_client)
+ set abandoned true
+
+ set envs($test_client) [eval $env_B_cmd $recargs -rep_client]
+ $envs($test_client) rep_limit 0 0
+
+ # Again, remember: whatever the current roles, a site and its EID
+ # stay linked always.
+ #
+ set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}"
+ } else {
+ set abandoned false
+ }
+
+ process_msgs $envlist
+ #
+ # Now simulate continual updates to the new master. Each
+ # time through we just process messages once before
+ # generating more updates.
+ #
+ set niter 10
+ for { set i 0 } { $i < $niter } { incr i } {
+ set nproced 0
+ set start [expr $i * $entries]
+ eval rep_test $method $envs($master) NULL $entries $start \
+ $start 0 $largs
+ incr nproced [proc_msgs_once $envlist]
+ error_check_bad nproced $nproced 0
+ }
+ set start [expr $i * $entries]
+ process_msgs $envlist
+
+ puts "\tRep$tnum.h: Verify logs and databases"
+ # Whether or not we've switched roles, it's always site A that may have
+ # had its logs archived away. When the $init_test flag is turned on,
+ # rep_verify allows the site in the second position to have
+ # (more-)archived logs, so we have to abuse the calling signature a bit
+ # here to get this to work. (I.e., even when A is still master and C is
+ # still the other client, we have to pass things in this order so that
+ # the $init_test different-sized-logs trick can work.)
+ #
+ set init_test 1
+ rep_verify $dirs(C) $envs(C) $dirs(A) $envs(A) $init_test
+
+ # Process messages again in case we are running with debug_rop.
+ process_msgs $envlist
+ rep_verify $dirs($master) $envs($master) \
+ $dirs($test_client) $envs($test_client) $init_test
+
+ # Add records to the master and update client.
+ puts "\tRep$tnum.i: Add more records and check again."
+ set entries 10
+ eval rep_test $method $envs($master) NULL $entries $start \
+ $start 0 $largs
+ process_msgs $envlist 0 NONE err
+
+ # Check again that everyone is identical.
+ rep_verify $dirs(C) $envs(C) $dirs(A) $envs(A) $init_test
+ process_msgs $envlist
+ rep_verify $dirs($master) $envs($master) \
+ $dirs($test_client) $envs($test_client) $init_test
+
+ if {$cdb != "NONE"} {
+ if {$abandoned} {
+ # The $cdb was opened in an env which was then
+ # abandoned, recovered, marked panic'ed. We don't
+ # really care; we're just trying to clean up resources.
+ #
+ catch {$cdb close}
+ } else {
+ error_check_good clientdb_close [$cdb close] 0
+ }
+ }
+ error_check_good masterenv_close [$envs($master) close] 0
+ error_check_good clientenv_close [$envs($test_client) close] 0
+ error_check_good clientenv2_close [$envs($other) close] 0
+ if { $abandoned } {
+ catch {$abandoned_env close}
+ }
+ replclose $testdir/MSGQUEUEDIR
+}