summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorDmytro Mishkin <ducha.aiki@gmail.com>2015-02-25 17:00:22 +0200
committerCarl Doersch <cdoersch@cs.cmu.edu>2015-10-20 21:04:08 -0700
commit2f05b03371e5936a478c7ad2946d0cd7c013920c (patch)
treeef569944b7bebf611e6e68df17146bd91e0bcd96 /examples
parent8c8e832e71985ba89dcb7c8a60697322c54b5f5b (diff)
downloadcaffeonacl-2f05b03371e5936a478c7ad2946d0cd7c013920c.tar.gz
caffeonacl-2f05b03371e5936a478c7ad2946d0cd7c013920c.tar.bz2
caffeonacl-2f05b03371e5936a478c7ad2946d0cd7c013920c.zip
Added batch normalization layer with test and examples
Diffstat (limited to 'examples')
-rw-r--r--examples/cifar10/cifar10_full_sigmoid_solver.prototxt28
-rw-r--r--examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt28
-rw-r--r--examples/cifar10/cifar10_full_sigmoid_train_test.prototxt212
-rw-r--r--examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt284
-rwxr-xr-xexamples/cifar10/train_full_sigmoid.sh7
-rwxr-xr-xexamples/cifar10/train_full_sigmoid_bn.sh7
6 files changed, 566 insertions, 0 deletions
diff --git a/examples/cifar10/cifar10_full_sigmoid_solver.prototxt b/examples/cifar10/cifar10_full_sigmoid_solver.prototxt
new file mode 100644
index 00000000..7dd3ecb9
--- /dev/null
+++ b/examples/cifar10/cifar10_full_sigmoid_solver.prototxt
@@ -0,0 +1,28 @@
+# reduce learning rate after 120 epochs (60000 iters) by factor 0f 10
+# then another factor of 10 after 10 more epochs (5000 iters)
+
+# The train/test net protocol buffer definition
+net: "examples/cifar10/cifar10_full_sigmoid_train_test.prototxt"
+# test_iter specifies how many forward passes the test should carry out.
+# In the case of CIFAR10, we have test batch size 100 and 100 test iterations,
+# covering the full 10,000 testing images.
+test_iter: 10
+# Carry out testing every 1000 training iterations.
+test_interval: 1000
+# The base learning rate, momentum and the weight decay of the network.
+base_lr: 0.001
+momentum: 0.9
+#weight_decay: 0.004
+# The learning rate policy
+lr_policy: "step"
+gamma: 1
+stepsize: 5000
+# Display every 200 iterations
+display: 100
+# The maximum number of iterations
+max_iter: 60000
+# snapshot intermediate results
+snapshot: 10000
+snapshot_prefix: "examples/cifar10_full_sigmoid"
+# solver mode: CPU or GPU
+solver_mode: GPU
diff --git a/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt b/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt
new file mode 100644
index 00000000..a57b280f
--- /dev/null
+++ b/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt
@@ -0,0 +1,28 @@
+# reduce learning rate after 120 epochs (60000 iters) by factor 0f 10
+# then another factor of 10 after 10 more epochs (5000 iters)
+
+# The train/test net protocol buffer definition
+net: "examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt"
+# test_iter specifies how many forward passes the test should carry out.
+# In the case of CIFAR10, we have test batch size 100 and 100 test iterations,
+# covering the full 10,000 testing images.
+test_iter: 10
+# Carry out testing every 1000 training iterations.
+test_interval: 1000
+# The base learning rate, momentum and the weight decay of the network.
+base_lr: 0.001
+momentum: 0.9
+#weight_decay: 0.004
+# The learning rate policy
+lr_policy: "step"
+gamma: 1
+stepsize: 5000
+# Display every 200 iterations
+display: 100
+# The maximum number of iterations
+max_iter: 60000
+# snapshot intermediate results
+snapshot: 10000
+snapshot_prefix: "examples/cifar10_full_sigmoid_bn"
+# solver mode: CPU or GPU
+solver_mode: GPU
diff --git a/examples/cifar10/cifar10_full_sigmoid_train_test.prototxt b/examples/cifar10/cifar10_full_sigmoid_train_test.prototxt
new file mode 100644
index 00000000..6f5bf26b
--- /dev/null
+++ b/examples/cifar10/cifar10_full_sigmoid_train_test.prototxt
@@ -0,0 +1,212 @@
+name: "CIFAR10_full"
+layer {
+ name: "cifar"
+ type: "Data"
+ top: "data"
+ top: "label"
+ include {
+ phase: TRAIN
+ }
+ transform_param {
+ mean_file: "examples/cifar10/mean.binaryproto"
+ }
+ data_param {
+ source: "examples/cifar10/cifar10_train_lmdb"
+ batch_size: 111
+ backend: LMDB
+ }
+}
+layer {
+ name: "cifar"
+ type: "Data"
+ top: "data"
+ top: "label"
+ include {
+ phase: TEST
+ }
+ transform_param {
+ mean_file: "examples/cifar10/mean.binaryproto"
+ }
+ data_param {
+ source: "examples/cifar10/cifar10_test_lmdb"
+ batch_size: 1000
+ backend: LMDB
+ }
+}
+layer {
+ name: "conv1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1"
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 2
+ }
+ convolution_param {
+ num_output: 32
+ pad: 2
+ kernel_size: 5
+ stride: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.0001
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+
+
+
+layer {
+ name: "Sigmoid1"
+ type: "Sigmoid"
+ bottom: "pool1"
+ top: "Sigmoid1"
+}
+
+layer {
+ name: "conv2"
+ type: "Convolution"
+ bottom: "Sigmoid1"
+ top: "conv2"
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 2
+ }
+ convolution_param {
+ num_output: 32
+ pad: 2
+ kernel_size: 5
+ stride: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+
+
+layer {
+ name: "Sigmoid2"
+ type: "Sigmoid"
+ bottom: "conv2"
+ top: "Sigmoid2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "Sigmoid2"
+ top: "pool2"
+ pooling_param {
+ pool: AVE
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "conv3"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3"
+ convolution_param {
+ num_output: 64
+ pad: 2
+ kernel_size: 5
+ stride: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 1
+ }
+
+}
+
+layer {
+ name: "Sigmoid3"
+ type: "Sigmoid"
+ bottom: "conv3"
+ top: "Sigmoid3"
+}
+
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "Sigmoid3"
+ top: "pool3"
+ pooling_param {
+ pool: AVE
+ kernel_size: 3
+ stride: 2
+ }
+}
+
+layer {
+ name: "ip1"
+ type: "InnerProduct"
+ bottom: "pool3"
+ top: "ip1"
+ param {
+ lr_mult: 1
+ decay_mult: 250
+ }
+ param {
+ lr_mult: 0.2
+ decay_mult: 0
+ }
+ inner_product_param {
+ num_output: 10
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+layer {
+ name: "accuracy"
+ type: "Accuracy"
+ bottom: "ip1"
+ bottom: "label"
+ top: "accuracy"
+ include {
+ phase: TEST
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "ip1"
+ bottom: "label"
+ top: "loss"
+}
diff --git a/examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt b/examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt
new file mode 100644
index 00000000..85c2dffe
--- /dev/null
+++ b/examples/cifar10/cifar10_full_sigmoid_train_test_bn.prototxt
@@ -0,0 +1,284 @@
+name: "CIFAR10_full"
+layer {
+ name: "cifar"
+ type: "Data"
+ top: "data"
+ top: "label"
+ include {
+ phase: TRAIN
+ }
+ transform_param {
+ mean_file: "examples/cifar10/mean.binaryproto"
+ }
+ data_param {
+ source: "examples/cifar10/cifar10_train_lmdb"
+ batch_size: 111
+ backend: LMDB
+ }
+}
+layer {
+ name: "cifar"
+ type: "Data"
+ top: "data"
+ top: "label"
+ include {
+ phase: TEST
+ }
+ transform_param {
+ mean_file: "examples/cifar10/mean.binaryproto"
+ }
+ data_param {
+ source: "examples/cifar10/cifar10_test_lmdb"
+ batch_size: 1000
+ backend: LMDB
+ }
+}
+layer {
+ name: "conv1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1"
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 2
+ }
+ convolution_param {
+ num_output: 32
+ pad: 2
+ kernel_size: 5
+ stride: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.0001
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+
+layer {
+ name: "bn1"
+ type: "BatchNorm"
+ bottom: "pool1"
+ top: "bn1"
+ bn_param {
+ scale_filler {
+ type: "constant"
+ value: 1
+ }
+ shift_filler {
+ type: "constant"
+ value: 0.001
+ }
+ }
+ param {
+ lr_mult: 1.00001
+ decay_mult: 0
+ }
+ param {
+ lr_mult: 1.00001
+ decay_mult: 0
+ }
+}
+
+layer {
+ name: "Sigmoid1"
+ type: "Sigmoid"
+ bottom: "bn1"
+ top: "Sigmoid1"
+}
+
+layer {
+ name: "conv2"
+ type: "Convolution"
+ bottom: "Sigmoid1"
+ top: "conv2"
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 2
+ }
+ convolution_param {
+ num_output: 32
+ pad: 2
+ kernel_size: 5
+ stride: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+
+
+
+layer {
+ name: "bn2"
+ type: "BatchNorm"
+ bottom: "conv2"
+ top: "bn2"
+ bn_param {
+ scale_filler {
+ type: "constant"
+ value: 1
+ }
+ shift_filler {
+ type: "constant"
+ value: 0.001
+ }
+ }
+ param {
+ lr_mult: 1.00001
+ decay_mult: 0
+ }
+ param {
+ lr_mult: 1.00001
+ decay_mult: 0
+ }
+}
+layer {
+ name: "Sigmoid2"
+ type: "Sigmoid"
+ bottom: "bn2"
+ top: "Sigmoid2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "Sigmoid2"
+ top: "pool2"
+ pooling_param {
+ pool: AVE
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "conv3"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3"
+ convolution_param {
+ num_output: 64
+ pad: 2
+ kernel_size: 5
+ stride: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 1
+ }
+
+}
+
+
+layer {
+ name: "bn3"
+ type: "BatchNorm"
+ bottom: "conv3"
+ top: "bn3"
+ bn_param {
+ scale_filler {
+ type: "constant"
+ value: 1
+ }
+ shift_filler {
+ type: "constant"
+ value: 0.001
+ }
+ }
+ param {
+ lr_mult: 1.00001
+ decay_mult: 0
+ }
+ param {
+ lr_mult: 1.00001
+ decay_mult: 0
+ }
+}
+layer {
+ name: "Sigmoid3"
+ type: "Sigmoid"
+ bottom: "bn3"
+ top: "Sigmoid3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "Sigmoid3"
+ top: "pool3"
+ pooling_param {
+ pool: AVE
+ kernel_size: 3
+ stride: 2
+ }
+}
+
+layer {
+ name: "ip1"
+ type: "InnerProduct"
+ bottom: "pool3"
+ top: "ip1"
+ param {
+ lr_mult: 1
+ decay_mult: 250
+ }
+ param {
+ lr_mult: 0.2
+ decay_mult: 0
+ }
+ inner_product_param {
+ num_output: 10
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+layer {
+ name: "accuracy"
+ type: "Accuracy"
+ bottom: "ip1"
+ bottom: "label"
+ top: "accuracy"
+ include {
+ phase: TEST
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "ip1"
+ bottom: "label"
+ top: "loss"
+}
diff --git a/examples/cifar10/train_full_sigmoid.sh b/examples/cifar10/train_full_sigmoid.sh
new file mode 100755
index 00000000..9cff06d3
--- /dev/null
+++ b/examples/cifar10/train_full_sigmoid.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env sh
+
+TOOLS=./build/tools
+
+$TOOLS/caffe train \
+ --solver=examples/cifar10/cifar10_full_sigmoid_solver.prototxt
+
diff --git a/examples/cifar10/train_full_sigmoid_bn.sh b/examples/cifar10/train_full_sigmoid_bn.sh
new file mode 100755
index 00000000..011387c9
--- /dev/null
+++ b/examples/cifar10/train_full_sigmoid_bn.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env sh
+
+TOOLS=./build/tools
+
+$TOOLS/caffe train \
+ --solver=examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt
+