db/test/rep039.tcl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400

# See the file LICENSE for redistribution information.
#
# Copyright (c) 2004,2007 Oracle.  All rights reserved.
#
# $Id: rep039.tcl,v 1.24 2007/05/24 20:06:39 alanb Exp $
#
# TEST	rep039
# TEST	Test of interrupted internal initialization changes.  The
# TEST	interruption is due to a changed master, or the client crashing,
# TEST	or both.
# TEST
# TEST	One master, two clients.
# TEST	Generate several log files. Remove old master log files.
# TEST	Restart client, optionally having "cleaned" client env dir.  Either
# TEST	way, this has the effect of forcing an internal init.
# TEST	Interrupt the internal init.
# TEST	Vary the number of times we process messages to make sure
# TEST	the interruption occurs at varying stages of the first internal
# TEST	initialization.
# TEST
# TEST	Run for btree and queue only because of the number of permutations.
# TEST
proc rep039 { method { niter 200 } { tnum "039" } args } {

	source ./include.tcl

	# Run for btree and queue methods only.
	if { $checking_valid_methods } {
		set test_methods {}
		foreach method $valid_methods {
			if { [is_btree $method] == 1 || \
			    [is_queue $method] == 1 } {
				lappend test_methods $method
			}
		}
		return $test_methods
	}
	if { [is_btree $method] == 0 && [is_queue $method] == 0 } {
		puts "Rep$tnum: skipping for non-btree, non-queue method."
		return
	}

	# Skip for mixed-mode logging -- this test has a very large
	# set of iterations already.
	global mixed_mode_logging
	if { $mixed_mode_logging > 0 } {
		puts "Rep$tnum: Skipping for mixed mode logging."
		return
	}

	# This test needs to set its own pagesize.
	set pgindex [lsearch -exact $args "-pagesize"]
	if { $pgindex != -1 } {
		puts "Rep$tnum: skipping for specific pagesizes"
		return
	}

	set args [convert_args $method $args]

	# Run the body of the test with and without recovery,
	# and with and without cleaning.
	set cleanopts { noclean clean }
	set archopts { archive noarchive }
	set nummsgs 5
	set announce {puts "Rep$tnum ($method $r $clean $a $crash $args):\
            Test of internal init. $i message iters."}
	foreach r $test_recopts {
		if { $r == "-recover" && ! $is_windows_test } {
			set crashopts { master_change client_crash both }
		} else {
			set crashopts { master_change }
		}
		foreach crash $crashopts {
			foreach clean $cleanopts {
				foreach a $archopts {
					for { set i 1 } \
					    { $i < $nummsgs } { incr i } {
						eval $announce
						rep039_sub $method $niter \
						    $tnum $r $clean $a $crash \
						    $i $args
					}
				}
			}
		}
	}
}

proc rep039_sub { method niter tnum recargs clean archive crash pmsgs largs } {
	global testdir
	global util_path
	global rep_verbose

	set verbargs ""
	if { $rep_verbose == 1 } {
		set verbargs " -verbose {rep on} "
	}

	set master_change false
	set client_crash false
	if { $crash == "master_change" } {
		set master_change true
	} elseif { $crash == "client_crash" } {
		set client_crash true
	} elseif { $crash == "both" } {
		set master_change true
		set client_crash true
	} else {
		error "FAIL:[timestamp] '$crash' is an unrecognized crash type"
	}

	env_cleanup $testdir

	replsetup $testdir/MSGQUEUEDIR

	# This test has three replication sites: a master, a client whose
	# behavior is under test, and another client.  We'll call them
	# "A", "B" and "C".  At one point during the test, we may (depending on
	# the setting of $master_change) switch roles between the master and the
	# other client.
	#
	# The initial site/role assignments are as follows:
	#
	#     A = master
	#     B = client under test
	#     C = other client
	#
	# In the case where we do switch roles, the roles become:
	#
	#     A = other client
	#     B = client under test (no change here)
	#     C = master
	#
	# Although the real names are A, B, and C, we'll use mnemonic names
	# whenever possible.  In particular, this means that we'll have to
	# re-jigger the mnemonic names after the role switch.

	file mkdir [set dirs(A) $testdir/SITE_A]
	file mkdir [set dirs(B) $testdir/SITE_B]
	file mkdir [set dirs(C) $testdir/SITE_C]

	# Log size is small so we quickly create more than one.
	# The documentation says that the log file must be at least
	# four times the size of the in-memory log buffer.
	set pagesize 4096
	append largs " -pagesize $pagesize "
	set log_buf [expr $pagesize * 2]
	set log_max [expr $log_buf * 4]

	# Set up the three sites: A, B, and C will correspond to EID's
	# 1, 2, and 3 in the obvious way.  As we start out, site A is always the
	# master.
	#
	repladd 1
	set env_A_cmd "berkdb_env_noerr -create -txn nosync $verbargs \
	    -log_buffer $log_buf -log_max $log_max -errpfx SITE_A \
	    -home $dirs(A) -rep_transport \[list 1 replsend\]"
	set envs(A) [eval $env_A_cmd $recargs -rep_master]

	# Open a client
	repladd 2
	set env_B_cmd "berkdb_env_noerr -create -txn nosync $verbargs \
	    -log_buffer $log_buf -log_max $log_max -errpfx SITE_B \
	    -home $dirs(B) -rep_transport \[list 2 replsend\]"
	set envs(B) [eval $env_B_cmd $recargs -rep_client]

	# Open 2nd client
	repladd 3
	set env_C_cmd "berkdb_env_noerr -create -txn nosync $verbargs \
	    -log_buffer $log_buf -log_max $log_max -errpfx SITE_C \
	    -home $dirs(C) -rep_transport \[list 3 replsend\]"
	set envs(C) [eval $env_C_cmd $recargs -rep_client]

	# Turn off throttling for this test.
	foreach site [array names envs] {
		$envs($site) rep_limit 0 0
	}

	# Bring the clients online by processing the startup messages.
	set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}"
	process_msgs $envlist

	# Set up the (indirect) mnemonic role names for the first part of the
	# test.
	set master A
	set test_client B
	set other C

	# Clobber replication's 30-second anti-archive timer, which will have
	# been started by client sync-up internal init, so that we can do a
	# log_archive in a moment.
	#
	$envs($master) test force noarchive_timeout

	# Run rep_test in the master (and update client).
	puts "\tRep$tnum.a: Running rep_test in replicated env."
	eval rep_test $method $envs($master) NULL $niter 0 0 0 0 $largs
	process_msgs $envlist

	puts "\tRep$tnum.b: Close client."
	error_check_good client_close [$envs($test_client) close] 0

	set res [eval exec $util_path/db_archive -l -h $dirs($test_client)]
	set last_client_log [lindex [lsort $res] end]

	set stop 0
	while { $stop == 0 } {
		# Run rep_test in the master (don't update client).
		puts "\tRep$tnum.c: Running rep_test in replicated env."
		eval rep_test $method $envs($master) NULL $niter 0 0 0 0 $largs
		#
		# Clear messages for first client.  We want that site
		# to get far behind.
		#
		replclear 2
		puts "\tRep$tnum.d: Run db_archive on master."
		set res [eval exec $util_path/db_archive -d -h $dirs($master)]
		set res [eval exec $util_path/db_archive -l -h $dirs($master)]
		if { [lsearch -exact $res $last_client_log] == -1 } {
			set stop 1
		}
	}

	set envlist "{$envs($master) 1} {$envs($other) 3}"
	process_msgs $envlist

	if { $archive == "archive" } {
		puts "\tRep$tnum.d: Run db_archive on other client."
		set res [eval exec $util_path/db_archive -l -h $dirs($other)]
		error_check_bad \
		    log.1.present [lsearch -exact $res log.0000000001] -1
		set res [eval exec $util_path/db_archive -d -h $dirs($other)]
		set res [eval exec $util_path/db_archive -l -h $dirs($other)]
		error_check_good \
		    log.1.gone [lsearch -exact $res log.0000000001] -1
	} else {
		puts "\tRep$tnum.d: Skipping db_archive on other client."
	}

	puts "\tRep$tnum.e: Reopen test client ($clean)."
	if { $clean == "clean" } {
		env_cleanup $dirs($test_client)
	}

	# (The test client is always site B, EID 2.)
	#
	set envs(B) [eval $env_B_cmd $recargs -rep_client]
	error_check_good client_env [is_valid_env $envs(B)] TRUE
	$envs(B) rep_limit 0 0

	# Hold an open database handle while doing internal init, to make sure
	# no back lock interactions are happening.  But only do so some of the
	# time.
	#
	if {$clean == "noclean" && [berkdb random_int 0 1] == 1} {
		puts "\tRep$tnum.g: Hold open db handle from client app."
		set cdb [eval {berkdb_open_noerr -env} $envs($test_client) "test.db"]
		error_check_good dbopen [is_valid_db $cdb] TRUE
		set ccur [$cdb cursor]
		error_check_good curs [is_valid_cursor $ccur $cdb] TRUE
		set ret [$ccur get -first]
		set kd [lindex $ret 0]
		set key [lindex $kd 0]
		error_check_good cclose [$ccur close] 0
	} else {
		puts "\tRep$tnum.g: (No client app handle will be held.)"
		set cdb "NONE"
	}

	set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}"
	proc_msgs_once $envlist

	#
	# We want to simulate a master continually getting new
	# records while an update is going on.
	#
	set entries 10
	eval rep_test $method $envs($master) NULL $entries $niter 0 0 0 $largs
	#
	# We call proc_msgs_once N times to get us into page recovery:
	# 1.  Send master messages and client finds master.
	# 2.  Master replies and client does verify.
	# 3.  Master gives verify_fail and client does update_req.
	# 4.  Master send update info and client does page_req.
	#
	# We vary the number of times we call proc_msgs_once (via pmsgs)
	# so that we test switching master at each point in the
	# internal initialization processing.
	#
	set nproced 0
	puts "\tRep$tnum.f: Get partially through initialization ($pmsgs iters)"
	for { set i 1 } { $i < $pmsgs } { incr i } {
		incr nproced [proc_msgs_once $envlist]
	}

	if { [string is true $master_change] } {
		replclear 1
		replclear 3
		puts "\tRep$tnum.g: Downgrade/upgrade master."

		# Downgrade the existing master to a client, switch around the
		# roles, and then upgrade the newly appointed master.
		error_check_good downgrade [$envs($master) rep_start -client] 0

		set master C
		set other A

		error_check_good upgrade [$envs($master) rep_start -master] 0
	}

	# Simulate a client crash: simply abandon the handle without closing it.
	# Note that this doesn't work on Windows, because there you can't remove
	# a file if anyone (including yourself) has it open.
	#
	# Note that crashing only makes sense with "-recover".
	#
	if { [string is true $client_crash] } {
		error_check_good assert [string compare $recargs "-recover"] 0

		set abandoned_env $envs($test_client)
		set abandoned true

		set envs($test_client) [eval $env_B_cmd $recargs -rep_client]
		$envs($test_client) rep_limit 0 0

		# Again, remember: whatever the current roles, a site and its EID
		# stay linked always.
		#
		set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}"
	} else {
		set abandoned false
	}

	process_msgs $envlist
	#
	# Now simulate continual updates to the new master.  Each
	# time through we just process messages once before
	# generating more updates.
	#
	set niter 10
	for { set i 0 } { $i < $niter } { incr i } {
		set nproced 0
		set start [expr $i * $entries]
		eval rep_test $method $envs($master) NULL $entries $start \
		    $start 0 0 $largs
		incr nproced [proc_msgs_once $envlist]
		error_check_bad nproced $nproced 0
	}
	set start [expr $i * $entries]
	process_msgs $envlist

	puts "\tRep$tnum.h: Verify logs and databases"
	# Whether or not we've switched roles, it's always site A that may have
	# had its logs archived away.  When the $init_test flag is turned on,
	# rep_verify allows the site in the second position to have
	# (more-)archived logs, so we have to abuse the calling signature a bit
	# here to get this to work.  (I.e., even when A is still master and C is
	# still the other client, we have to pass things in this order so that
	# the $init_test different-sized-logs trick can work.)
	#
	set init_test 1
	rep_verify $dirs(C) $envs(C) $dirs(A) $envs(A) $init_test

	# Process messages again in case we are running with debug_rop.
	process_msgs $envlist
	rep_verify $dirs($master) $envs($master) \
	    $dirs($test_client) $envs($test_client) $init_test

	# Add records to the master and update client.
	puts "\tRep$tnum.i: Add more records and check again."
	set entries 10
	eval rep_test $method $envs($master) NULL $entries $start \
	    $start 0 0 $largs
	process_msgs $envlist 0 NONE err

	# Check again that everyone is identical.
	rep_verify $dirs(C) $envs(C) $dirs(A) $envs(A) $init_test
	process_msgs $envlist
	rep_verify $dirs($master) $envs($master) \
	    $dirs($test_client) $envs($test_client) $init_test

	if {$cdb != "NONE"} {
		if {$abandoned} {
			# The $cdb was opened in an env which was then
			# abandoned, recovered, marked panic'ed.  We don't
			# really care; we're just trying to clean up resources.
			# 
			catch {$cdb close}
		} else {
			error_check_good clientdb_close [$cdb close] 0
		}
	}
	error_check_good masterenv_close [$envs($master) close] 0
	error_check_good clientenv_close [$envs($test_client) close] 0
	error_check_good clientenv2_close [$envs($other) close] 0
	if { $abandoned } {
		catch {$abandoned_env close}
	}
	replclose $testdir/MSGQUEUEDIR
}