db/test/rep067.tcl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361

# See the file LICENSE for redistribution information.
#
# Copyright (c) 2002,2007 Oracle.  All rights reserved.
#
# $Id: rep067.tcl,v 1.8 2007/06/15 14:39:51 carol Exp $
#
# TEST  rep067
# TEST	Replication election test with large timeouts.
# TEST
# TEST	Test replication elections among clients with widely varying
# TEST	timeouts.  This test is used to simulate a customer that
# TEST	wants to force full participation in an election, but only
# TEST	if all sites are present (i.e. if all sites are restarted
# TEST	together).  If any site has already been part of the group,
# TEST	then we want to be able to elect a master based on majority.
# TEST	Using varied timeouts, we can force full participation if
# TEST	all sites are present with "long_timeout" amount of time and
# TEST	then revert to majority.
# TEST
# TEST	A long_timeout would be several minutes whereas a normal
# TEST	short timeout would be a few seconds.
#
proc rep067 { method args } {

	source ./include.tcl
	if { $is_windows9x_test == 1 } {
		puts "Skipping replication test on Win 9x platform."
		return
	}

	# Skip for all methods except btree.
	if { $checking_valid_methods } {
		set test_methods { btree }
		return $test_methods
	}
	if { [is_btree $method] == 0 } {
		puts "Rep067: Skipping for method $method."
		return
	}

	set tnum "067"
	set niter 10
	set nclients 3
	set logsets [create_logsets [expr $nclients + 1]]

	# We don't want to run this with -recover - it takes too
	# long and doesn't cover any new ground.
	set recargs ""
	foreach l $logsets {
		puts "Rep$tnum ($recargs): Replication election\
		    mixed long timeouts with $nclients clients."
		puts -nonewline "Rep$tnum: Started at: "
		puts [clock format [clock seconds] -format "%H:%M %D"]
		puts "Rep$tnum: Master logs are [lindex $l 0]"
		for { set i 0 } { $i < $nclients } { incr i } {
			puts "Rep$tnum: Client $i logs are\
			    [lindex $l [expr $i + 1]]"
		}
		rep067_sub $method $tnum \
		    $niter $nclients $l $recargs $args
	}
}

proc rep067_sub { method tnum niter nclients logset recargs largs } {
	source ./include.tcl
	global rand_init
	error_check_good set_random_seed [berkdb srand $rand_init] 0
	global rep_verbose

	set verbargs ""
	if { $rep_verbose == 1 } {
		set verbargs " -verbose {rep on} "
	}

	env_cleanup $testdir

	set qdir $testdir/MSGQUEUEDIR
	replsetup $qdir

	set masterdir $testdir/MASTERDIR
	file mkdir $masterdir
	set m_logtype [lindex $logset 0]
	set m_logargs [adjust_logargs $m_logtype]
	set m_txnargs [adjust_txnargs $m_logtype]

	for { set i 0 } { $i < $nclients } { incr i } {
		set clientdir($i) $testdir/CLIENTDIR.$i
		file mkdir $clientdir($i)
		set c_logtype($i) [lindex $logset [expr $i + 1]]
		set c_logargs($i) [adjust_logargs $c_logtype($i)]
		set c_txnargs($i) [adjust_txnargs $c_logtype($i)]
	}

	# Open a master.
	repladd 1
	set env_cmd(M) "berkdb_env_noerr -create -log_max 1000000 \
	    -event rep_event \
	    -home $masterdir $m_logargs $verbargs -errpfx MASTER \
	    $m_txnargs -rep_master -rep_transport \[list 1 replsend\]"
	set masterenv [eval $env_cmd(M) $recargs]

	set envlist {}
	lappend envlist "$masterenv 1"

	# Open the clients.
	for { set i 0 } { $i < $nclients } { incr i } {
		set envid [expr $i + 2]
		repladd $envid
		set env_cmd($i) "berkdb_env_noerr -create \
		    -event rep_event -home $clientdir($i) \
		    $c_logargs($i) $c_txnargs($i) -rep_client $verbargs \
		    -errpfx CLIENT.$i -rep_transport \[list $envid replsend\]"
		set clientenv($i) [eval $env_cmd($i) $recargs]
		lappend envlist "$clientenv($i) $envid"
	}

	# Process startup messages
	process_msgs $envlist
	# Run a modified test001 in the master.
	puts "\tRep$tnum.a: Running test001 in replicated env."
	eval rep_test $method $masterenv NULL $niter 0 0 0 0 $largs

	# Process all the messages and close the master.
	process_msgs $envlist
	error_check_good masterenv_close [$masterenv close] 0
	set envlist [lreplace $envlist 0 0]

	#
	# Make sure all clients are starting with no pending messages.
	#
	for { set i 0 } { $i < $nclients } { incr i } {
		replclear [expr $i + 2]
	}

	#
	# Run the test for all different timoeut combinations.
	#
	set c0to { long medium short }
	set c1to { medium short long }
	set c2to { short medium long }
	set numtests [expr [llength $c0to] * [llength $c1to] * \
	    [llength $c2to]]
	set m "Rep$tnum"
	set count 0
	set last_win -1
	set win -1
	set quorum { majority all }
	foreach q $quorum {
		puts "\t$m.b: Starting $numtests election with\
		    timeout tests: $q must participate"
		foreach c0 $c0to {
			foreach c1 $c1to {
				foreach c2 $c2to {
					set elist [list $c0 $c1 $c2]
					rep067_elect env_cmd envlist $qdir \
					    $m $count win last_win $elist \
					    $q $logset
					incr count
				}
			}
		}
	}

	foreach pair $envlist {
		set cenv [lindex $pair 0]
		error_check_good cenv_close [$cenv close] 0
	}

	replclose $testdir/MSGQUEUEDIR
	puts -nonewline \
	    "Rep$tnum: Completed at: "
	puts [clock format [clock seconds] -format "%H:%M %D"]
}

proc rep067_elect { ecmd celist qdir msg count \
    winner lsn_lose elist quorum logset} {
	global elect_timeout elect_serial
	global timeout_ok
	upvar $ecmd env_cmd
	upvar $celist envlist
	upvar $winner win
	upvar $lsn_lose last_win

	# Set the proper value for the first time through the 
	# loop.  On subsequent passes, timeout_ok will already 
	# be set. 
	if { [info exists timeout_ok] == 0 } {
		set timeout_ok 0
	}

	set nclients [llength $elist]
	set nsites [expr $nclients + 1]

	#
	# Set long timeout to 3 minutes (180 sec).
	# Set medium timeout to half the long timeout.
	# Set short timeout to 10 seconds.
	set long_timeout 180000000
	set med_timeout [expr $long_timeout / 2]
	set short_timeout 10000000
	set cl_list {}
	foreach pair $envlist {
		set id [lindex $pair 1]
		set i [expr $id - 2]
		set clientenv($i) [lindex $pair 0]
		set to [lindex $elist $i]
		if { $to == "long" } {
			set elect_timeout($i) $long_timeout
		} elseif { $to == "medium" } {
			set elect_timeout($i) $med_timeout
		} elseif { $to == "short" } {
			set elect_timeout($i) $short_timeout
		}
		set elect_pipe($i) INVALID
		set err_cmd($i) "none"
		replclear $id
		lappend cl_list $i
	}

	# Select winner.  We want to test biggest LSN wins, and secondarily
	# highest priority wins.  If we already have a master, make sure
	# we don't start a client in that master.
	set elector 0
	if { $win == -1 } {
		if { $last_win != -1 } {
			set cl_list [lreplace $cl_list $last_win $last_win]
			set elector $last_win
		}
		set windex [berkdb random_int 0 [expr [llength $cl_list] - 1]]
		set win [lindex $cl_list $windex]
	} else {
		# Easy case, if we have a master, the winner must be the
		# same one as last time, just use $win.
		# If client0 is the current existing master, start the
		# election in client 1.
		if {$win == 0} {
			set elector 1
		}
	}
	# Winner has priority 100.  If we are testing LSN winning, the
	# make sure the lowest LSN client has the highest priority.
	# Everyone else has priority 10.
	for { set i 0 } { $i < $nclients } { incr i } {
		set crash($i) 0
		if { $i == $win } {
			set pri($i) 100
		} elseif { $i == $last_win } {
			set pri($i) 200
		} else {
			set pri($i) 10
		}
	}

	puts "\t$msg.b.$count: Start election (win=client$win) $elist"
	set msg $msg.c.$count
	#
	# If we want all sites, then set nsites and nvotes the same.
	# otherwise, we need to increase nsites to account
	# for the master that is "down".
	#
	if { $quorum == "all" } {
		set nsites $nclients
	} else {
		set nsites [expr $nclients + 1]
	}
	set nvotes $nclients
	run_election env_cmd envlist err_cmd pri crash \
	    $qdir $msg $elector $nsites $nvotes $nclients $win \
	    0 "test.db" 0 $timeout_ok
	#
	# Sometimes test elections with an existing master.
	# Other times test elections without master by closing the
	# master we just elected and creating a new client.
	# We want to weight it to close the new master.  So, use
	# a list to cause closing about 70% of the time.
	#
	set close_list { 0 0 0 1 1 1 1 1 1 1}
	set close_len [expr [llength $close_list] - 1]
	set close_index [berkdb random_int 0 $close_len]

	# Unless we close the master, the next election will time out.
	set timeout_ok 1
	
	if { [lindex $close_list $close_index] == 1 } {
		# Declare that we expect the next election to succeed.
		set timeout_ok 0
		puts -nonewline "\t\t$msg: Closing "
		error_check_good newmaster_flush [$clientenv($win) log_flush] 0
		error_check_good newmaster_close [$clientenv($win) close] 0
		#
		# If the next test should win via LSN then remove the
		# env before starting the new client so that we
		# can guarantee this client doesn't win the next one.
		set lsn_win { 0 0 0 0 1 1 1 1 1 1 }
		set lsn_len [expr [llength $lsn_win] - 1]
		set lsn_index [berkdb random_int 0 $lsn_len]
		set rec_arg ""
		set win_inmem [expr [string compare [lindex $logset \
		    [expr $win + 1]] in-memory] == 0]
		if { [lindex $lsn_win $lsn_index] == 1 } {
			set last_win $win
			set dirindex [lsearch -exact $env_cmd($win) "-home"]
			incr dirindex
			set lsn_dir [lindex $env_cmd($win) $dirindex]
			env_cleanup $lsn_dir
			puts -nonewline "and cleaning "
		} else {
			#
			# If we're not cleaning the env, decide if we should
			# run recovery upon reopening the env.  This causes
			# two things:
			# 1. Removal of region files which forces the env
			# to read its __db.rep.egen file.
			# 2. Adding a couple log records, so this client must
			# be the next winner as well since it'll have the
			# biggest LSN.
			#
			set rec_win { 0 0 0 0 0 0 1 1 1 1 }
			set rec_len [expr [llength $rec_win] - 1]
			set rec_index [berkdb random_int 0 $rec_len]
			if { [lindex $rec_win $rec_index] == 1 } {
				puts -nonewline "and recovering "
				set rec_arg "-recover"
				#
				# If we're in memory and about to run
				# recovery, we force ourselves not to win
				# the next election because recovery will
				# blow away the entire log in memory.
				# However, we don't skip this entirely
				# because we still want to force reading
				# of __db.rep.egen.
				#
				if { $win_inmem } {
					set last_win $win
				} else {
					set last_win -1
				}
			} else {
				set last_win -1
			}
		}
		puts "new master, new client $win"
		set clientenv($win) [eval $env_cmd($win) $rec_arg]
		error_check_good cl($win) [is_valid_env $clientenv($win)] TRUE
		#
		# Since we started a new client, we need to replace it
		# in the message processing list so that we get the
		# new Tcl handle name in there.
		set newelector "$clientenv($win) [expr $win + 2]"
		set envlist [lreplace $envlist $win $win $newelector]
		if { $rec_arg == "" || $win_inmem } {
			set win -1
		}
		#
		# Since we started a new client we want to give them
		# all a chance to process everything outstanding before
		# the election on the next iteration.
		#
		process_msgs $envlist
	}
}