diff options
author | Jeff Donahue <jeff.donahue@gmail.com> | 2015-10-22 18:33:07 -0700 |
---|---|---|
committer | Jeff Donahue <jeff.donahue@gmail.com> | 2015-10-22 18:33:07 -0700 |
commit | 39f69fbe520d90d30fc342e5d69153ec9534d2bc (patch) | |
tree | 84246ebebc7236446b358b582ab3ce9e7b7daf29 /examples | |
parent | 50a23b7ba2c8fe9ee2f6e115bdc5948ff82c194d (diff) | |
parent | a52ee656a589313901560c87b65a570ee41c9fee (diff) | |
download | caffeonacl-39f69fbe520d90d30fc342e5d69153ec9534d2bc.tar.gz caffeonacl-39f69fbe520d90d30fc342e5d69153ec9534d2bc.tar.bz2 caffeonacl-39f69fbe520d90d30fc342e5d69153ec9534d2bc.zip |
Merge pull request #3229 from cdoersch/batchnorm2
Yet another batch normalization PR
Diffstat (limited to 'examples')
-rw-r--r-- | examples/cifar10/cifar10_full_sigmoid_solver.prototxt | 28 | ||||
-rw-r--r-- | examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt | 28 | ||||
-rw-r--r-- | examples/cifar10/cifar10_full_sigmoid_train_test.prototxt | 212 | ||||
-rw-r--r-- | examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt | 240 | ||||
-rwxr-xr-x | examples/cifar10/train_full_sigmoid.sh | 7 | ||||
-rwxr-xr-x | examples/cifar10/train_full_sigmoid_bn.sh | 7 |
6 files changed, 522 insertions, 0 deletions
diff --git a/examples/cifar10/cifar10_full_sigmoid_solver.prototxt b/examples/cifar10/cifar10_full_sigmoid_solver.prototxt new file mode 100644 index 00000000..7dd3ecb9 --- /dev/null +++ b/examples/cifar10/cifar10_full_sigmoid_solver.prototxt @@ -0,0 +1,28 @@ +# reduce learning rate after 120 epochs (60000 iters) by factor 0f 10 +# then another factor of 10 after 10 more epochs (5000 iters) + +# The train/test net protocol buffer definition +net: "examples/cifar10/cifar10_full_sigmoid_train_test.prototxt" +# test_iter specifies how many forward passes the test should carry out. +# In the case of CIFAR10, we have test batch size 100 and 100 test iterations, +# covering the full 10,000 testing images. +test_iter: 10 +# Carry out testing every 1000 training iterations. +test_interval: 1000 +# The base learning rate, momentum and the weight decay of the network. +base_lr: 0.001 +momentum: 0.9 +#weight_decay: 0.004 +# The learning rate policy +lr_policy: "step" +gamma: 1 +stepsize: 5000 +# Display every 200 iterations +display: 100 +# The maximum number of iterations +max_iter: 60000 +# snapshot intermediate results +snapshot: 10000 +snapshot_prefix: "examples/cifar10_full_sigmoid" +# solver mode: CPU or GPU +solver_mode: GPU diff --git a/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt b/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt new file mode 100644 index 00000000..a57b280f --- /dev/null +++ b/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt @@ -0,0 +1,28 @@ +# reduce learning rate after 120 epochs (60000 iters) by factor 0f 10 +# then another factor of 10 after 10 more epochs (5000 iters) + +# The train/test net protocol buffer definition +net: "examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt" +# test_iter specifies how many forward passes the test should carry out. +# In the case of CIFAR10, we have test batch size 100 and 100 test iterations, +# covering the full 10,000 testing images. +test_iter: 10 +# Carry out testing every 1000 training iterations. +test_interval: 1000 +# The base learning rate, momentum and the weight decay of the network. +base_lr: 0.001 +momentum: 0.9 +#weight_decay: 0.004 +# The learning rate policy +lr_policy: "step" +gamma: 1 +stepsize: 5000 +# Display every 200 iterations +display: 100 +# The maximum number of iterations +max_iter: 60000 +# snapshot intermediate results +snapshot: 10000 +snapshot_prefix: "examples/cifar10_full_sigmoid_bn" +# solver mode: CPU or GPU +solver_mode: GPU diff --git a/examples/cifar10/cifar10_full_sigmoid_train_test.prototxt b/examples/cifar10/cifar10_full_sigmoid_train_test.prototxt new file mode 100644 index 00000000..fba69b81 --- /dev/null +++ b/examples/cifar10/cifar10_full_sigmoid_train_test.prototxt @@ -0,0 +1,212 @@ +name: "CIFAR10_full" +layer { + name: "cifar" + type: "Data" + top: "data" + top: "label" + include { + phase: TRAIN + } + transform_param { + mean_file: "examples/cifar10/mean.binaryproto" + } + data_param { + source: "examples/cifar10/cifar10_train_lmdb" + batch_size: 111 + backend: LMDB + } +} +layer { + name: "cifar" + type: "Data" + top: "data" + top: "label" + include { + phase: TEST + } + transform_param { + mean_file: "examples/cifar10/mean.binaryproto" + } + data_param { + source: "examples/cifar10/cifar10_test_lmdb" + batch_size: 1000 + backend: LMDB + } +} +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 32 + pad: 2 + kernel_size: 5 + stride: 1 + weight_filler { + type: "gaussian" + std: 0.0001 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} + + + +layer { + name: "Sigmoid1" + type: "Sigmoid" + bottom: "pool1" + top: "Sigmoid1" +} + +layer { + name: "conv2" + type: "Convolution" + bottom: "Sigmoid1" + top: "conv2" + param { + lr_mult: 1 + } + param { + lr_mult: 2 + } + convolution_param { + num_output: 32 + pad: 2 + kernel_size: 5 + stride: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} + + +layer { + name: "Sigmoid2" + type: "Sigmoid" + bottom: "conv2" + top: "Sigmoid2" +} +layer { + name: "pool2" + type: "Pooling" + bottom: "Sigmoid2" + top: "pool2" + pooling_param { + pool: AVE + kernel_size: 3 + stride: 2 + } +} +layer { + name: "conv3" + type: "Convolution" + bottom: "pool2" + top: "conv3" + convolution_param { + num_output: 64 + pad: 2 + kernel_size: 5 + stride: 1 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } + param { + lr_mult: 1 + } + param { + lr_mult: 1 + } + +} + +layer { + name: "Sigmoid3" + type: "Sigmoid" + bottom: "conv3" + top: "Sigmoid3" +} + +layer { + name: "pool3" + type: "Pooling" + bottom: "Sigmoid3" + top: "pool3" + pooling_param { + pool: AVE + kernel_size: 3 + stride: 2 + } +} + +layer { + name: "ip1" + type: "InnerProduct" + bottom: "pool3" + top: "ip1" + param { + lr_mult: 1 + decay_mult: 0 + } + param { + lr_mult: 2 + decay_mult: 0 + } + inner_product_param { + num_output: 10 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "accuracy" + type: "Accuracy" + bottom: "ip1" + bottom: "label" + top: "accuracy" + include { + phase: TEST + } +} +layer { + name: "loss" + type: "SoftmaxWithLoss" + bottom: "ip1" + bottom: "label" + top: "loss" +} diff --git a/examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt b/examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt new file mode 100644 index 00000000..1a810751 --- /dev/null +++ b/examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt @@ -0,0 +1,240 @@ +name: "CIFAR10_full" +layer { + name: "cifar" + type: "Data" + top: "data" + top: "label" + include { + phase: TRAIN + } + transform_param { + mean_file: "examples/cifar10/mean.binaryproto" + } + data_param { + source: "examples/cifar10/cifar10_train_lmdb" + batch_size: 100 + backend: LMDB + } +} +layer { + name: "cifar" + type: "Data" + top: "data" + top: "label" + include { + phase: TEST + } + transform_param { + mean_file: "examples/cifar10/mean.binaryproto" + } + data_param { + source: "examples/cifar10/cifar10_test_lmdb" + batch_size: 1000 + backend: LMDB + } +} +layer { + name: "conv1" + type: "Convolution" + bottom: "data" + top: "conv1" + param { + lr_mult: 1 + } + convolution_param { + num_output: 32 + pad: 2 + kernel_size: 5 + stride: 1 + bias_term: false + weight_filler { + type: "gaussian" + std: 0.0001 + } + } +} +layer { + name: "pool1" + type: "Pooling" + bottom: "conv1" + top: "pool1" + pooling_param { + pool: MAX + kernel_size: 3 + stride: 2 + } +} + +layer { + name: "bn1" + type: "BatchNorm" + bottom: "pool1" + top: "bn1" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } +} + +layer { + name: "Sigmoid1" + type: "Sigmoid" + bottom: "bn1" + top: "Sigmoid1" +} + +layer { + name: "conv2" + type: "Convolution" + bottom: "Sigmoid1" + top: "conv2" + param { + lr_mult: 1 + } + convolution_param { + num_output: 32 + pad: 2 + kernel_size: 5 + stride: 1 + bias_term: false + weight_filler { + type: "gaussian" + std: 0.01 + } + } +} + +layer { + name: "bn2" + type: "BatchNorm" + bottom: "conv2" + top: "bn2" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } +} + +layer { + name: "Sigmoid2" + type: "Sigmoid" + bottom: "bn2" + top: "Sigmoid2" +} +layer { + name: "pool2" + type: "Pooling" + bottom: "Sigmoid2" + top: "pool2" + pooling_param { + pool: AVE + kernel_size: 3 + stride: 2 + } +} +layer { + name: "conv3" + type: "Convolution" + bottom: "pool2" + top: "conv3" + param { + lr_mult: 1 + } + convolution_param { + num_output: 64 + pad: 2 + kernel_size: 5 + stride: 1 + bias_term: false + weight_filler { + type: "gaussian" + std: 0.01 + } + } +} + +layer { + name: "bn3" + type: "BatchNorm" + bottom: "conv3" + top: "bn3" + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } + param { + lr_mult: 0 + } +} + +layer { + name: "Sigmoid3" + type: "Sigmoid" + bottom: "bn3" + top: "Sigmoid3" +} +layer { + name: "pool3" + type: "Pooling" + bottom: "Sigmoid3" + top: "pool3" + pooling_param { + pool: AVE + kernel_size: 3 + stride: 2 + } +} + +layer { + name: "ip1" + type: "InnerProduct" + bottom: "pool3" + top: "ip1" + param { + lr_mult: 1 + decay_mult: 1 + } + param { + lr_mult: 1 + decay_mult: 0 + } + inner_product_param { + num_output: 10 + weight_filler { + type: "gaussian" + std: 0.01 + } + bias_filler { + type: "constant" + } + } +} +layer { + name: "accuracy" + type: "Accuracy" + bottom: "ip1" + bottom: "label" + top: "accuracy" + include { + phase: TEST + } +} +layer { + name: "loss" + type: "SoftmaxWithLoss" + bottom: "ip1" + bottom: "label" + top: "loss" +} diff --git a/examples/cifar10/train_full_sigmoid.sh b/examples/cifar10/train_full_sigmoid.sh new file mode 100755 index 00000000..9cff06d3 --- /dev/null +++ b/examples/cifar10/train_full_sigmoid.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env sh + +TOOLS=./build/tools + +$TOOLS/caffe train \ + --solver=examples/cifar10/cifar10_full_sigmoid_solver.prototxt + diff --git a/examples/cifar10/train_full_sigmoid_bn.sh b/examples/cifar10/train_full_sigmoid_bn.sh new file mode 100755 index 00000000..011387c9 --- /dev/null +++ b/examples/cifar10/train_full_sigmoid_bn.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env sh + +TOOLS=./build/tools + +$TOOLS/caffe train \ + --solver=examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt + |