examples/mnist/lenet_consolidated_solver.prototxt


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263

# lenet_consolidated_solver.prototxt consolidates the lenet_solver, lenet_train,
# and lenet_test prototxts into a single file.  It also adds an additional test
# net which runs on the training set, e.g., for the purpose of comparing
# train/test accuracy (accuracy is computed only on the test set in the included
# LeNet example).  This is mainly included as an example of using these features
# (specify NetParameters directly in the solver, specify multiple test nets)
# if desired.
#
# Carry out testing every 500 training iterations.
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 10000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "examples/mnist/lenet"
# Set a random_seed for repeatable results.
# (For results that vary due to random initialization, comment out the below
# line, or set to a negative integer -- e.g. "random_seed: -1")
random_seed: 1701
# solver mode: CPU or GPU
solver_mode: GPU

# We test on both the test and train set using "stages".  The TEST DATA layers
# each have a stage, either 'test-on-train-set' or 'test-on-test-set'.
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100
test_state: { stage: "test-on-test-set" }
# The train set has 60K images, so we run 600 test iters (600 * 100 = 60K).
test_iter: 600
test_state: { stage: "test-on-train-set" }

# The net protocol buffer definition
net_param {
  name: "LeNet"
  layers {
    name: "mnist"
    type: DATA
    top: "data"
    top: "label"
    data_param {
      source: "examples/mnist/mnist_train_lmdb"
      backend: LMDB
      batch_size: 64
    }
    transform_param {
      scale: 0.00390625
    }
    include: { phase: TRAIN }
  }
  layers {
    name: "mnist"
    type: DATA
    top: "data"
    top: "label"
    data_param {
      source: "examples/mnist/mnist_test_lmdb"
      backend: LMDB
      batch_size: 100
    }
    transform_param {
      scale: 0.00390625
    }
    include: {
      phase: TEST
      stage: "test-on-test-set"
    }
  }
  layers {
    name: "mnist"
    type: DATA
    top: "data"
    top: "label"
    data_param {
      source: "examples/mnist/mnist_train_lmdb"
      backend: LMDB
      batch_size: 100
    }
    transform_param {
      scale: 0.00390625
    }
    include: {
      phase: TEST
      stage: "test-on-train-set"
    }
  }
  layers {
    name: "conv1"
    type: CONVOLUTION
    bottom: "data"
    top: "conv1"
    blobs_lr: 1
    blobs_lr: 2
    convolution_param {
      num_output: 20
      kernel_size: 5
      stride: 1
      weight_filler {
        type: "xavier"
      }
      bias_filler {
        type: "constant"
      }
    }
  }
  layers {
    name: "pool1"
    type: POOLING
    bottom: "conv1"
    top: "pool1"
    pooling_param {
      pool: MAX
      kernel_size: 2
      stride: 2
    }
  }
  layers {
    name: "conv2"
    type: CONVOLUTION
    bottom: "pool1"
    top: "conv2"
    blobs_lr: 1
    blobs_lr: 2
    convolution_param {
      num_output: 50
      kernel_size: 5
      stride: 1
      weight_filler {
        type: "xavier"
      }
      bias_filler {
        type: "constant"
      }
    }
  }
  layers {
    name: "pool2"
    type: POOLING
    bottom: "conv2"
    top: "pool2"
    pooling_param {
      pool: MAX
      kernel_size: 2
      stride: 2
    }
  }
  layers {
    name: "ip1"
    type: INNER_PRODUCT
    bottom: "pool2"
    top: "ip1"
    blobs_lr: 1
    blobs_lr: 2
    inner_product_param {
      num_output: 500
      weight_filler {
        type: "xavier"
      }
      bias_filler {
        type: "constant"
      }
    }
  }
  layers {
    name: "relu1"
    type: RELU
    bottom: "ip1"
    top: "ip1"
  }
  layers {
    name: "ip2"
    type: INNER_PRODUCT
    bottom: "ip1"
    top: "ip2"
    blobs_lr: 1
    blobs_lr: 2
    inner_product_param {
      num_output: 10
      weight_filler {
        type: "xavier"
      }
      bias_filler {
        type: "constant"
      }
    }
  }
  layers {
    name: "accuracy"
    type: ACCURACY
    bottom: "ip2"
    bottom: "label"
    top: "accuracy"
  }
  layers {
    name: "loss"
    type: SOFTMAX_LOSS
    bottom: "ip2"
    bottom: "label"
    top: "loss"
  }
}

# Expected results for first and last 500 iterations:
# (with portions of log omitted for brevity)
#
# Iteration 0, Testing net (#0)
# Test score #0: 0.067
# Test score #1: 2.30256
# Iteration 0, Testing net (#1)
# Test score #0: 0.0670334
# Test score #1: 2.30258
# Iteration 100, lr = 0.00992565
# Iteration 100, loss = 0.280585
# Iteration 200, lr = 0.00985258
# Iteration 200, loss = 0.345601
# Iteration 300, lr = 0.00978075
# Iteration 300, loss = 0.172217
# Iteration 400, lr = 0.00971013
# Iteration 400, loss = 0.261836
# Iteration 500, lr = 0.00964069
# Iteration 500, loss = 0.157803
# Iteration 500, Testing net (#0)
# Test score #0: 0.968
# Test score #1: 0.0993772
# Iteration 500, Testing net (#1)
# Test score #0: 0.965883
# Test score #1: 0.109374
#
# [...]
#
# Iteration 9500, Testing net (#0)
# Test score #0: 0.9899
# Test score #1: 0.0308299
# Iteration 9500, Testing net (#1)
# Test score #0: 0.996816
# Test score #1: 0.0118238
# Iteration 9600, lr = 0.00603682
# Iteration 9600, loss = 0.0126215
# Iteration 9700, lr = 0.00601382
# Iteration 9700, loss = 0.00579304
# Iteration 9800, lr = 0.00599102
# Iteration 9800, loss = 0.00500633
# Iteration 9900, lr = 0.00596843
# Iteration 9900, loss = 0.00796607
# Iteration 10000, lr = 0.00594604
# Iteration 10000, loss = 0.00271736
# Iteration 10000, Testing net (#0)
# Test score #0: 0.9914
# Test score #1: 0.0276671
# Iteration 10000, Testing net (#1)
# Test score #0: 0.997782
# Test score #1: 0.00908085