diff options
author | Sergey Karayev <sergeykarayev@gmail.com> | 2014-09-04 02:50:53 +0100 |
---|---|---|
committer | Sergey Karayev <sergeykarayev@gmail.com> | 2014-09-04 03:59:14 +0100 |
commit | c6827bf3fcd8bf6cafb2aeb7a1ce4d3a958b7f19 (patch) | |
tree | 8dedeae1ad6e6af3be84a4bab3bb6d1682cad251 /examples | |
parent | bc601e9060cc71b95f0250b9efacaffcbb5d8c0e (diff) | |
download | caffeonacl-c6827bf3fcd8bf6cafb2aeb7a1ce4d3a958b7f19.tar.gz caffeonacl-c6827bf3fcd8bf6cafb2aeb7a1ce4d3a958b7f19.tar.bz2 caffeonacl-c6827bf3fcd8bf6cafb2aeb7a1ce4d3a958b7f19.zip |
flickr style fine-tuning model (separated from example read me)
Diffstat (limited to 'examples')
3 files changed, 11 insertions, 371 deletions
diff --git a/examples/finetune_flickr_style/flickr_style_solver.prototxt b/examples/finetune_flickr_style/flickr_style_solver.prototxt deleted file mode 100644 index 756e162b..00000000 --- a/examples/finetune_flickr_style/flickr_style_solver.prototxt +++ /dev/null @@ -1,17 +0,0 @@ -net: "examples/finetune_flickr_style/flickr_style_train_val.prototxt" -test_iter: 100 -test_interval: 1000 -# lr for fine-tuning should be lower than when starting from scratch -base_lr: 0.001 -lr_policy: "step" -gamma: 0.1 -# stepsize should also be lower, as we're closer to being done -stepsize: 20000 -display: 20 -max_iter: 100000 -momentum: 0.9 -weight_decay: 0.0005 -snapshot: 10000 -snapshot_prefix: "examples/finetune_flickr_style/flickr_style" -# uncomment the following to default to CPU mode solving -# solver_mode: CPU diff --git a/examples/finetune_flickr_style/flickr_style_train_val.prototxt b/examples/finetune_flickr_style/flickr_style_train_val.prototxt deleted file mode 100644 index 46a198a8..00000000 --- a/examples/finetune_flickr_style/flickr_style_train_val.prototxt +++ /dev/null @@ -1,349 +0,0 @@ -name: "FlickrStyleCaffeNet" -layers { - name: "data" - type: IMAGE_DATA - top: "data" - top: "label" - image_data_param { - source: "data/flickr_style/train.txt" - batch_size: 50 - new_height: 256 - new_width: 256 - } - transform_param { - crop_size: 227 - mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" - mirror: true - } - include: { phase: TRAIN } -} -layers { - name: "data" - type: IMAGE_DATA - top: "data" - top: "label" - image_data_param { - source: "data/flickr_style/train.txt" - batch_size: 50 - new_height: 256 - new_width: 256 - } - transform_param { - crop_size: 227 - mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" - mirror: false - } - include: { phase: TEST } -} -layers { - name: "conv1" - type: CONVOLUTION - bottom: "data" - top: "conv1" - blobs_lr: 1 - blobs_lr: 2 - weight_decay: 1 - weight_decay: 0 - convolution_param { - num_output: 96 - kernel_size: 11 - stride: 4 - weight_filler { - type: "gaussian" - std: 0.01 - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layers { - name: "relu1" - type: RELU - bottom: "conv1" - top: "conv1" -} -layers { - name: "pool1" - type: POOLING - bottom: "conv1" - top: "pool1" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } -} -layers { - name: "norm1" - type: LRN - bottom: "pool1" - top: "norm1" - lrn_param { - local_size: 5 - alpha: 0.0001 - beta: 0.75 - } -} -layers { - name: "conv2" - type: CONVOLUTION - bottom: "norm1" - top: "conv2" - blobs_lr: 1 - blobs_lr: 2 - weight_decay: 1 - weight_decay: 0 - convolution_param { - num_output: 256 - pad: 2 - kernel_size: 5 - group: 2 - weight_filler { - type: "gaussian" - std: 0.01 - } - bias_filler { - type: "constant" - value: 1 - } - } -} -layers { - name: "relu2" - type: RELU - bottom: "conv2" - top: "conv2" -} -layers { - name: "pool2" - type: POOLING - bottom: "conv2" - top: "pool2" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } -} -layers { - name: "norm2" - type: LRN - bottom: "pool2" - top: "norm2" - lrn_param { - local_size: 5 - alpha: 0.0001 - beta: 0.75 - } -} -layers { - name: "conv3" - type: CONVOLUTION - bottom: "norm2" - top: "conv3" - blobs_lr: 1 - blobs_lr: 2 - weight_decay: 1 - weight_decay: 0 - convolution_param { - num_output: 384 - pad: 1 - kernel_size: 3 - weight_filler { - type: "gaussian" - std: 0.01 - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layers { - name: "relu3" - type: RELU - bottom: "conv3" - top: "conv3" -} -layers { - name: "conv4" - type: CONVOLUTION - bottom: "conv3" - top: "conv4" - blobs_lr: 1 - blobs_lr: 2 - weight_decay: 1 - weight_decay: 0 - convolution_param { - num_output: 384 - pad: 1 - kernel_size: 3 - group: 2 - weight_filler { - type: "gaussian" - std: 0.01 - } - bias_filler { - type: "constant" - value: 1 - } - } -} -layers { - name: "relu4" - type: RELU - bottom: "conv4" - top: "conv4" -} -layers { - name: "conv5" - type: CONVOLUTION - bottom: "conv4" - top: "conv5" - blobs_lr: 1 - blobs_lr: 2 - weight_decay: 1 - weight_decay: 0 - convolution_param { - num_output: 256 - pad: 1 - kernel_size: 3 - group: 2 - weight_filler { - type: "gaussian" - std: 0.01 - } - bias_filler { - type: "constant" - value: 1 - } - } -} -layers { - name: "relu5" - type: RELU - bottom: "conv5" - top: "conv5" -} -layers { - name: "pool5" - type: POOLING - bottom: "conv5" - top: "pool5" - pooling_param { - pool: MAX - kernel_size: 3 - stride: 2 - } -} -layers { - name: "fc6" - type: INNER_PRODUCT - bottom: "pool5" - top: "fc6" - blobs_lr: 1 - blobs_lr: 2 - weight_decay: 1 - weight_decay: 0 - inner_product_param { - num_output: 4096 - weight_filler { - type: "gaussian" - std: 0.005 - } - bias_filler { - type: "constant" - value: 1 - } - } -} -layers { - name: "relu6" - type: RELU - bottom: "fc6" - top: "fc6" -} -layers { - name: "drop6" - type: DROPOUT - bottom: "fc6" - top: "fc6" - dropout_param { - dropout_ratio: 0.5 - } -} -layers { - name: "fc7" - type: INNER_PRODUCT - bottom: "fc6" - top: "fc7" - # Note that blobs_lr can be set to 0 to disable any fine-tuning of this, and any other, layer - blobs_lr: 1 - blobs_lr: 2 - weight_decay: 1 - weight_decay: 0 - inner_product_param { - num_output: 4096 - weight_filler { - type: "gaussian" - std: 0.005 - } - bias_filler { - type: "constant" - value: 1 - } - } -} -layers { - name: "relu7" - type: RELU - bottom: "fc7" - top: "fc7" -} -layers { - name: "drop7" - type: DROPOUT - bottom: "fc7" - top: "fc7" - dropout_param { - dropout_ratio: 0.5 - } -} -layers { - name: "fc8_flickr" - type: INNER_PRODUCT - bottom: "fc7" - top: "fc8_flickr" - # blobs_lr is set to higher than for other layers, because this layer is starting from random while the others are already trained - blobs_lr: 10 - blobs_lr: 20 - weight_decay: 1 - weight_decay: 0 - inner_product_param { - num_output: 20 - weight_filler { - type: "gaussian" - std: 0.01 - } - bias_filler { - type: "constant" - value: 0 - } - } -} -layers { - name: "loss" - type: SOFTMAX_LOSS - bottom: "fc8_flickr" - bottom: "label" -} -layers { - name: "accuracy" - type: ACCURACY - bottom: "fc8_flickr" - bottom: "label" - top: "accuracy" - include: { phase: TEST } -} diff --git a/examples/finetune_flickr_style/readme.md b/examples/finetune_flickr_style/readme.md index 68b2778c..2c9ee0a8 100644 --- a/examples/finetune_flickr_style/readme.md +++ b/examples/finetune_flickr_style/readme.md @@ -34,7 +34,7 @@ All steps are to be done from the caffe root directory. The dataset is distributed as a list of URLs with corresponding labels. Using a script, we will download a small subset of the data and split it into train and val sets. - caffe % ./examples/finetune_flickr_style/assemble_data.py -h + caffe % ./models/finetune_flickr_style/assemble_data.py -h usage: assemble_data.py [-h] [-s SEED] [-i IMAGES] [-w WORKERS] Download a subset of Flickr Style to a directory @@ -48,7 +48,7 @@ Using a script, we will download a small subset of the data and split it into tr num workers used to download images. -x uses (all - x) cores. - caffe % python examples/finetune_flickr_style/assemble_data.py --workers=-1 --images=2000 --seed 831486 + caffe % python models/finetune_flickr_style/assemble_data.py --workers=-1 --images=2000 --seed 831486 Downloading 2000 images with 7 workers... Writing train/val for 1939 successfully downloaded images. @@ -56,11 +56,11 @@ This script downloads images and writes train/val file lists into `data/flickr_s With this random seed there are 1,557 train images and 382 test images. The prototxts in this example assume this, and also assume the presence of the ImageNet mean file (run `get_ilsvrc_aux.sh` from `data/ilsvrc12` to obtain this if you haven't yet). -We'll also need the ImageNet-trained model, which you can obtain by running `get_caffe_reference_imagenet_model.sh` from `examples/imagenet`. +We'll also need the ImageNet-trained model, which you can obtain by running `get_caffe_reference_imagenet_model.sh` from `models/imagenet`. Now we can train! (You can fine-tune in CPU mode by leaving out the `-gpu` flag.) - caffe % ./build/tools/caffe train -solver examples/finetune_flickr_style/flickr_style_solver.prototxt -weights models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel -gpu 0 + caffe % ./build/tools/caffe train -solver models/finetune_flickr_style/flickr_style_solver.prototxt -weights models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel -gpu 0 [...] @@ -149,10 +149,16 @@ This model is only beginning to learn. Fine-tuning can be feasible when training from scratch would not be for lack of time or data. Even in CPU mode each pass through the training set takes ~100 s. GPU fine-tuning is of course faster still and can learn a useful model in minutes or hours instead of days or weeks. Furthermore, note that the model has only trained on < 2,000 instances. Transfer learning a new task like style recognition from the ImageNet pretraining can require much less data than training from scratch. + Now try fine-tuning to your own tasks and data! +## Trained model + +We provide a model trained on all 80K images, with final accuracy of 98%. +Simply do `./scripts/download_model_binary.py models/finetune_flickr_style` to obtain it. + ## License The Flickr Style dataset as distributed here contains only URLs to images. Some of the images may have copyright. -Training a category-recognition model for research/non-commercial use may constitute fair use of this data. +Training a category-recognition model for research/non-commercial use may constitute fair use of this data, but the result should not be used for commercial purposes. |