diff options
-rw-r--r-- | examples/finetune_flickr_style/readme.md | 65 | ||||
-rw-r--r-- | models/finetune_flickr_style/train_val.prototxt | 2 |
2 files changed, 34 insertions, 33 deletions
diff --git a/examples/finetune_flickr_style/readme.md b/examples/finetune_flickr_style/readme.md index dad45aeb..4220102e 100644 --- a/examples/finetune_flickr_style/readme.md +++ b/examples/finetune_flickr_style/readme.md @@ -60,7 +60,7 @@ We'll also need the ImageNet-trained model, which you can obtain by running `./s Now we can train! (You can fine-tune in CPU mode by leaving out the `-gpu` flag.) - caffe % ./build/tools/caffe train -solver models/finetune_flickr_style/flickr_style_solver.prototxt -weights models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel -gpu 0 + caffe % ./build/tools/caffe train -solver models/finetune_flickr_style/solver.prototxt -weights models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel -gpu 0 [...] @@ -72,77 +72,78 @@ Now we can train! (You can fine-tune in CPU mode by leaving out the `-gpu` flag. I0828 22:17:48.338963 11510 solver.cpp:165] Solving FlickrStyleCaffeNet I0828 22:17:48.339010 11510 solver.cpp:251] Iteration 0, Testing net (#0) - I0828 22:18:14.313817 11510 solver.cpp:302] Test net output #0: accuracy = 0.0416 - I0828 22:18:14.476822 11510 solver.cpp:195] Iteration 0, loss = 3.75717 + I0828 22:18:14.313817 11510 solver.cpp:302] Test net output #0: accuracy = 0.0308 + I0828 22:18:14.476822 11510 solver.cpp:195] Iteration 0, loss = 3.78589 I0828 22:18:14.476878 11510 solver.cpp:397] Iteration 0, lr = 0.001 - I0828 22:18:19.700408 11510 solver.cpp:195] Iteration 20, loss = 3.1689 + I0828 22:18:19.700408 11510 solver.cpp:195] Iteration 20, loss = 3.25728 I0828 22:18:19.700461 11510 solver.cpp:397] Iteration 20, lr = 0.001 - I0828 22:18:24.924685 11510 solver.cpp:195] Iteration 40, loss = 2.3549 + I0828 22:18:24.924685 11510 solver.cpp:195] Iteration 40, loss = 2.18531 I0828 22:18:24.924741 11510 solver.cpp:397] Iteration 40, lr = 0.001 - I0828 22:18:30.114858 11510 solver.cpp:195] Iteration 60, loss = 2.74191 + I0828 22:18:30.114858 11510 solver.cpp:195] Iteration 60, loss = 2.4915 I0828 22:18:30.114910 11510 solver.cpp:397] Iteration 60, lr = 0.001 - I0828 22:18:35.328071 11510 solver.cpp:195] Iteration 80, loss = 1.9147 + I0828 22:18:35.328071 11510 solver.cpp:195] Iteration 80, loss = 2.04539 I0828 22:18:35.328127 11510 solver.cpp:397] Iteration 80, lr = 0.001 - I0828 22:18:40.588317 11510 solver.cpp:195] Iteration 100, loss = 1.81419 + I0828 22:18:40.588317 11510 solver.cpp:195] Iteration 100, loss = 2.1924 I0828 22:18:40.588373 11510 solver.cpp:397] Iteration 100, lr = 0.001 - I0828 22:18:46.171576 11510 solver.cpp:195] Iteration 120, loss = 2.02105 + I0828 22:18:46.171576 11510 solver.cpp:195] Iteration 120, loss = 2.25107 I0828 22:18:46.171669 11510 solver.cpp:397] Iteration 120, lr = 0.001 - I0828 22:18:51.757809 11510 solver.cpp:195] Iteration 140, loss = 1.49083 + I0828 22:18:51.757809 11510 solver.cpp:195] Iteration 140, loss = 1.355 I0828 22:18:51.757863 11510 solver.cpp:397] Iteration 140, lr = 0.001 - I0828 22:18:57.345080 11510 solver.cpp:195] Iteration 160, loss = 1.35319 + I0828 22:18:57.345080 11510 solver.cpp:195] Iteration 160, loss = 1.40815 I0828 22:18:57.345135 11510 solver.cpp:397] Iteration 160, lr = 0.001 - I0828 22:19:02.928794 11510 solver.cpp:195] Iteration 180, loss = 1.11658 + I0828 22:19:02.928794 11510 solver.cpp:195] Iteration 180, loss = 1.6558 I0828 22:19:02.928850 11510 solver.cpp:397] Iteration 180, lr = 0.001 - I0828 22:19:08.514497 11510 solver.cpp:195] Iteration 200, loss = 1.08851 + I0828 22:19:08.514497 11510 solver.cpp:195] Iteration 200, loss = 0.88126 I0828 22:19:08.514552 11510 solver.cpp:397] Iteration 200, lr = 0.001 [...] - I0828 22:22:40.789010 11510 solver.cpp:195] Iteration 960, loss = 0.0844627 + I0828 22:22:40.789010 11510 solver.cpp:195] Iteration 960, loss = 0.112586 I0828 22:22:40.789175 11510 solver.cpp:397] Iteration 960, lr = 0.001 - I0828 22:22:46.376626 11510 solver.cpp:195] Iteration 980, loss = 0.0110937 + I0828 22:22:46.376626 11510 solver.cpp:195] Iteration 980, loss = 0.0959077 I0828 22:22:46.376682 11510 solver.cpp:397] Iteration 980, lr = 0.001 I0828 22:22:51.687258 11510 solver.cpp:251] Iteration 1000, Testing net (#0) - I0828 22:23:17.438894 11510 solver.cpp:302] Test net output #0: accuracy = 1 + I0828 22:23:17.438894 11510 solver.cpp:302] Test net output #0: accuracy = 0.2356 -Note how rapidly the loss went down. Although the 100% accuracy is optimistic, it is evidence the model is learning quickly and well. +Note how rapidly the loss went down. Although the 23.5% accuracy is only modest, it was achieved in only 1000, and evidence that the model is starting to learn quickly and well. +Once the model is fully fine-tuned on the whole training set over 100,000 iterations the final validation accuracy is 91.64%. This takes ~7 hours in Caffe on a K40 GPU. For comparison, here is how the loss goes down when we do not start with a pre-trained model: I0828 22:24:18.624004 12919 solver.cpp:165] Solving FlickrStyleCaffeNet I0828 22:24:18.624099 12919 solver.cpp:251] Iteration 0, Testing net (#0) - I0828 22:24:44.520992 12919 solver.cpp:302] Test net output #0: accuracy = 0.045 - I0828 22:24:44.676905 12919 solver.cpp:195] Iteration 0, loss = 3.33111 + I0828 22:24:44.520992 12919 solver.cpp:302] Test net output #0: accuracy = 0.0366 + I0828 22:24:44.676905 12919 solver.cpp:195] Iteration 0, loss = 3.47942 I0828 22:24:44.677120 12919 solver.cpp:397] Iteration 0, lr = 0.001 - I0828 22:24:50.152454 12919 solver.cpp:195] Iteration 20, loss = 2.98133 + I0828 22:24:50.152454 12919 solver.cpp:195] Iteration 20, loss = 2.99694 I0828 22:24:50.152509 12919 solver.cpp:397] Iteration 20, lr = 0.001 - I0828 22:24:55.736256 12919 solver.cpp:195] Iteration 40, loss = 3.02124 + I0828 22:24:55.736256 12919 solver.cpp:195] Iteration 40, loss = 3.0498 I0828 22:24:55.736311 12919 solver.cpp:397] Iteration 40, lr = 0.001 - I0828 22:25:01.316514 12919 solver.cpp:195] Iteration 60, loss = 2.99509 + I0828 22:25:01.316514 12919 solver.cpp:195] Iteration 60, loss = 2.99549 I0828 22:25:01.316567 12919 solver.cpp:397] Iteration 60, lr = 0.001 - I0828 22:25:06.899554 12919 solver.cpp:195] Iteration 80, loss = 2.9928 + I0828 22:25:06.899554 12919 solver.cpp:195] Iteration 80, loss = 3.00573 I0828 22:25:06.899610 12919 solver.cpp:397] Iteration 80, lr = 0.001 - I0828 22:25:12.484624 12919 solver.cpp:195] Iteration 100, loss = 2.99072 + I0828 22:25:12.484624 12919 solver.cpp:195] Iteration 100, loss = 2.99094 I0828 22:25:12.484678 12919 solver.cpp:397] Iteration 100, lr = 0.001 - I0828 22:25:18.069056 12919 solver.cpp:195] Iteration 120, loss = 3.01816 + I0828 22:25:18.069056 12919 solver.cpp:195] Iteration 120, loss = 3.01616 I0828 22:25:18.069149 12919 solver.cpp:397] Iteration 120, lr = 0.001 - I0828 22:25:23.650928 12919 solver.cpp:195] Iteration 140, loss = 2.9694 + I0828 22:25:23.650928 12919 solver.cpp:195] Iteration 140, loss = 2.98786 I0828 22:25:23.650984 12919 solver.cpp:397] Iteration 140, lr = 0.001 - I0828 22:25:29.235535 12919 solver.cpp:195] Iteration 160, loss = 3.00383 + I0828 22:25:29.235535 12919 solver.cpp:195] Iteration 160, loss = 3.00724 I0828 22:25:29.235589 12919 solver.cpp:397] Iteration 160, lr = 0.001 - I0828 22:25:34.816898 12919 solver.cpp:195] Iteration 180, loss = 2.99802 + I0828 22:25:34.816898 12919 solver.cpp:195] Iteration 180, loss = 3.00099 I0828 22:25:34.816953 12919 solver.cpp:397] Iteration 180, lr = 0.001 - I0828 22:25:40.396656 12919 solver.cpp:195] Iteration 200, loss = 2.99769 + I0828 22:25:40.396656 12919 solver.cpp:195] Iteration 200, loss = 2.99848 I0828 22:25:40.396711 12919 solver.cpp:397] Iteration 200, lr = 0.001 [...] - I0828 22:29:12.539094 12919 solver.cpp:195] Iteration 960, loss = 2.99314 + I0828 22:29:12.539094 12919 solver.cpp:195] Iteration 960, loss = 2.99203 I0828 22:29:12.539258 12919 solver.cpp:397] Iteration 960, lr = 0.001 - I0828 22:29:18.123092 12919 solver.cpp:195] Iteration 980, loss = 2.99503 + I0828 22:29:18.123092 12919 solver.cpp:195] Iteration 980, loss = 2.99345 I0828 22:29:18.123147 12919 solver.cpp:397] Iteration 980, lr = 0.001 I0828 22:29:23.432059 12919 solver.cpp:251] Iteration 1000, Testing net (#0) - I0828 22:29:49.409044 12919 solver.cpp:302] Test net output #0: accuracy = 0.0624 + I0828 22:29:49.409044 12919 solver.cpp:302] Test net output #0: accuracy = 0.0572 This model is only beginning to learn. diff --git a/models/finetune_flickr_style/train_val.prototxt b/models/finetune_flickr_style/train_val.prototxt index 46a198a8..7155c492 100644 --- a/models/finetune_flickr_style/train_val.prototxt +++ b/models/finetune_flickr_style/train_val.prototxt @@ -23,7 +23,7 @@ layers { top: "data" top: "label" image_data_param { - source: "data/flickr_style/train.txt" + source: "data/flickr_style/test.txt" batch_size: 50 new_height: 256 new_width: 256 |