test/rep088.tcl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248

# See the file LICENSE for redistribution information.
#
# Copyright (c) 2009 Oracle.  All rights reserved.
#
# TEST	rep088
# TEST  Replication roll-back preserves checkpoint.
# TEST
# TEST  Create a situation where a client has to roll back its
# TEST  log, discarding some existing transactions, in order to sync
# TEST  with a new master.
# TEST
# TEST  1. When the client still has its entire log file history, all
# TEST     the way back to log file #1, it's OK if the roll-back discards
# TEST     any/all checkpoints.
# TEST  2. When old log files have been archived, if the roll-back would
# TEST     remove all existing checkpoints it must be forbidden.  The log
# TEST     must always have a checkpoint (or all files back through #1).
# TEST     The client must do internal init or return JOIN_FAILURE.
# TEST  3. (the normal case) Old log files archived, and a checkpoint
# TEST     still exists in the portion of the log which will remain after
# TEST     the roll-back: no internal-init/JOIN_FAILURE necessary.
#
# TODO: maybe just reject anything that doesn't comply with my simplified
# rep_test clone, like fixed-length record methods, etc.

proc rep088 { method { niter 20 } { tnum 088 } args } {
	source ./include.tcl

	if { $is_windows9x_test == 1 } {
		puts "Skipping replication test on Win 9x platform."
		return
	}

	# Run for btree only.
	if { $checking_valid_methods } {
		set test_methods { btree }
		return $test_methods
	}
	if { [is_btree $method] == 0 } {
		puts "\tRep$tnum: Skipping for method $method."
		return
	}

	set args [convert_args $method $args]

	puts "Rep$tnum ($method): Replication roll-back preserves checkpoint."
	# Note: expected result = "sync" means the client should be allowed to
	# synchronize normally (to the found sync point), without any need for
	# internal init.
	# 
	# Case #1.
	puts "Rep$tnum: Rollback without checkpoint, with log file 1"
	set archive false
	set ckpt false
	set result sync
	rep088_sub $method $niter $tnum $archive $ckpt $result $args

	# Case #2.(a).
	# 
	puts "Rep$tnum: Forbid rollback over only chkp: join failure"
	set archive true
	set ckpt false
	set result join_failure
	rep088_sub $method $niter $tnum $archive $ckpt $result $args

	# Case #2.(b): essentially the same, but allow the internal init to
	# happen, so that we verify that the subsequent restart with recovery
	# works fine.  NB: this is the obvious failure case prior to bug fix
	# #16732.
	# 
	puts "Rep$tnum: Forbid rollback over only chkp: internal init"
	set archive true
	set ckpt false
	set result internal_init
	rep088_sub $method $niter $tnum $archive $ckpt $result $args

	# Case #3.
	puts "Rep$tnum: Rollback with sufficient extra checkpoints"
	set archive true
	set ckpt true
	set result sync
	rep088_sub $method $niter $tnum $archive $ckpt $result $args
}

proc rep088_sub { method niter tnum archive ckpt result largs } {
	source ./include.tcl
	global testdir
	global util_path
	global rep_verbose
	global verbose_type

	set verbargs ""
	if { $rep_verbose == 1 } {
		set verbargs " -verbose {$verbose_type on} "
	}

	env_cleanup $testdir
	replsetup $testdir/MSGQUEUEDIR

	file mkdir [set dirA $testdir/A]
	file mkdir [set dirB $testdir/B]
	file mkdir [set dirC $testdir/C]

	set pagesize 4096
	append largs " -pagesize $pagesize "
	set log_buf [expr $pagesize * 2]
	set log_max [expr $log_buf * 4]

	puts "\tRep$tnum.a: Create master and two clients"
	repladd 1
	set env_A_cmd "berkdb_env -create -txn $verbargs \
              -log_buffer $log_buf -log_max $log_max \
	    -errpfx SITE_A -errfile /dev/stderr \
	    -home $dirA -rep_transport \[list 1 replsend\]"
	set envs(A) [eval $env_A_cmd -rep_master]

	repladd 2
	set env_B_cmd "berkdb_env -create -txn $verbargs \
              -log_buffer $log_buf -log_max $log_max \
              -errpfx SITE_B -errfile /dev/stderr \
              -home $dirB -rep_transport \[list 2 replsend\]"
	set envs(B) [eval $env_B_cmd -rep_client]

	repladd 3
	set env_C_cmd "berkdb_env -create -txn $verbargs \
              -log_buffer $log_buf -log_max $log_max \
              -errpfx SITE_C -errfile /dev/stderr \
              -home $dirC -rep_transport \[list 3 replsend\]"
	set envs(C) [eval $env_C_cmd -rep_client]

	set envlist "{$envs(A) 1} {$envs(B) 2} {$envs(C) 3}"
	process_msgs $envlist
	$envs(A) test force noarchive_timeout
	
	# Using small log file size, push into the second log file.
	#
	puts "\tRep$tnum.b: Write enough txns to exceed 1 log file"
	while { [lsn_file [next_expected_lsn $envs(C)]] == 1 } {
		eval rep088_reptest $method $envs(A) $niter $largs
		process_msgs $envlist
	}

	# To make sure everything still works in the normal case, put in a
	# checkpoint here before writing the transactions that will have to be
	# rolled back.  Later, when the client sees that it must roll back over
	# (and discard) the later checkpoint, the fact that this checkpoint is
	# here will allow it to proceed.
	#
	if { $ckpt } {
		puts "\tRep$tnum.c: put in an 'extra' checkpoint."
		$envs(A) txn_checkpoint
		process_msgs $envlist
	}

	# Turn off client TBM (the one that will become master later).
	# 
	puts "\tRep$tnum.d: Turn off client B and write more txns"
	$envs(B) close
	set envlist "{$envs(A) 1} {$envs(C) 3}"

	# Fill a bit more log, and then write a checkpoint.
	# 
	eval rep088_reptest $method $envs(A) $niter $largs
	$envs(A) txn_checkpoint
	replclear 2
	process_msgs $envlist
	
	# At the client under test, archive away the first log file.
	#
	if { $archive } {
		puts "\tRep$tnum.e: Archive log at client C"
		exec $util_path/db_archive -d -h $dirC
	}

	# Maybe another cycle of filling and checkpoint.
	# 
	eval rep088_reptest $method $envs(A) $niter $largs
	$envs(A) txn_checkpoint
	replclear 2
	process_msgs $envlist

	# Now turn off the master, and turn on the TBM site as master.  The
	# client under test has to sync with the new master.  Just to make sure
	# I understand what's going on, turn off auto-init.
	# 

	if { $result != "internal_init" } {
		$envs(C) rep_config {noautoinit on}
	}
	puts "\tRep$tnum.f: Switch master to site B, try to sync client C"
	$envs(A) close
	set envs(B) [eval $env_B_cmd -rep_master]
	set envlist "{$envs(B) 2} {$envs(C) 3}"
	replclear 1
	set succeeded [catch { process_msgs $envlist } ret]
	
	switch $result {
		internal_init {
			error_check_good inited $succeeded 0
        
			# Now stop the client, and try restarting it with
			# recovery.
			# 
			$envs(C) close
			set envs(C) [eval $env_C_cmd -rep_client -recover]
		}
		join_failure {
			error_check_bad no_autoinit $succeeded 0
			error_check_good join_fail \
			    [is_substr $ret DB_REP_JOIN_FAILURE] 1
		}
		sync {
			error_check_good sync_ok $succeeded 0
			error_check_good not_outdated \
			    [stat_field $envs(C) rep_stat \
				 "Outdated conditions"] 0
		}
		default {
			error "FAIL: unknown test result option $result"
		}
	}

	$envs(C) close
	$envs(B) close
	replclose $testdir/MSGQUEUEDIR
}

# A simplified clone of proc rep_test, with the crucial distinction that it
# doesn't do any of its own checkpointing.  For this test we need explicit
# control of when checkpoints should happen.  This proc doesn't support
# access methods using record numbers.
proc rep088_reptest { method env niter args } {
	source ./include.tcl

	set omethod [convert_method $method]
	set largs [convert_args $method $args]
	set db [eval berkdb_open_noerr -env $env -auto_commit \
		    -create $omethod $largs test.db]

	set did [open $dict]
	for { set i 0 } { $i < $niter && [gets $did str] >= 0 } { incr i } {
		set key $str
		set str [reverse $str]
		$db put $key $str
	}
	close $did
	$db close
}