summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorEvan Shelhamer <shelhamer@imaginarynumber.net>2015-03-07 00:34:24 -0800
committerEvan Shelhamer <shelhamer@imaginarynumber.net>2015-03-08 00:24:22 -0800
commit08b595d539c0e624ceb73869ee7c95376d4972e5 (patch)
tree0ad5fcad9ce74730610469a836bd38d114a6e252 /examples
parent5c84795572cf08f020dab31af43e7d8478628f44 (diff)
downloadcaffeonacl-08b595d539c0e624ceb73869ee7c95376d4972e5.tar.gz
caffeonacl-08b595d539c0e624ceb73869ee7c95376d4972e5.tar.bz2
caffeonacl-08b595d539c0e624ceb73869ee7c95376d4972e5.zip
[example] revise filter visualization
- download CaffeNet if it isn't there - switch to caffe.Net - reshape net for single input - explain param, bias indexing - update output for N-D blobs
Diffstat (limited to 'examples')
-rw-r--r--examples/filter_visualization.ipynb109
1 files changed, 64 insertions, 45 deletions
diff --git a/examples/filter_visualization.ipynb b/examples/filter_visualization.ipynb
index 0bfdb5ca..7125907f 100644
--- a/examples/filter_visualization.ipynb
+++ b/examples/filter_visualization.ipynb
@@ -4,7 +4,7 @@
"example_name": "Filter visualization",
"include_in_docs": true,
"priority": 2,
- "signature": "sha256:44536e4f82eb5748b6a3bb6fcfca01bc6c5815dad2641c994dab031f452b7606"
+ "signature": "sha256:64c88129e2eeaa956e4c8a26467ff6119f24ea3d7ef15f8217326249973bea8f"
},
"nbformat": 3,
"nbformat_minor": 0,
@@ -24,7 +24,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "First, import required modules and set plotting parameters"
+ "First, import required modules, set plotting parameters, and run `./scripts/download_model_binary.py models/bvlc_reference_caffenet` to get the pretrained CaffeNet model if it hasn't already been fetched."
]
},
{
@@ -44,7 +44,12 @@
"\n",
"plt.rcParams['figure.figsize'] = (10, 10)\n",
"plt.rcParams['image.interpolation'] = 'nearest'\n",
- "plt.rcParams['image.cmap'] = 'gray'"
+ "plt.rcParams['image.cmap'] = 'gray'\n",
+ "\n",
+ "import os\n",
+ "if not os.path.isfile(caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'):\n",
+ " print(\"Downloading pre-trained CaffeNet model...\")\n",
+ " !../scripts/download_model_binary.py ../models/bvlc_reference_caffenet"
],
"language": "python",
"metadata": {},
@@ -55,7 +60,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Run `./scripts/download_model_binary.py models/bvlc_reference_caffenet` to get the pretrained CaffeNet model, load the net, specify test phase and CPU mode, and configure input preprocessing."
+ "Set Caffe to CPU mode, load the net in the test phase for inference, and configure input preprocessing."
]
},
{
@@ -63,12 +68,16 @@
"collapsed": false,
"input": [
"caffe.set_mode_cpu()\n",
- "net = caffe.Classifier(caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt',\n",
- " caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel')\n",
+ "net = caffe.Net(caffe_root + 'models/bvlc_reference_caffenet/deploy.prototxt',\n",
+ " caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel',\n",
+ " caffe.TEST)\n",
+ "\n",
"# input preprocessing: 'data' is the name of the input blob == net.inputs[0]\n",
- "net.transformer.set_mean('data', np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy').mean(1).mean(1)) # ImageNet mean\n",
- "net.transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1]\n",
- "net.transformer.set_channel_swap('data', (2,1,0)) # the reference model has channels in BGR order instead of RGB"
+ "transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})\n",
+ "transformer.set_transpose('data', (2,0,1))\n",
+ "transformer.set_mean('data', np.load(caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy').mean(1).mean(1)) # mean pixel\n",
+ "transformer.set_raw_scale('data', 255) # the reference model operates on images in [0,255] range instead of [0,1]\n",
+ "transformer.set_channel_swap('data', (2,1,0)) # the reference model has channels in BGR order instead of RGB"
],
"language": "python",
"metadata": {},
@@ -79,25 +88,36 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "Run a classification pass"
+ "Classify the image by reshaping the net for the single input then doing the forward pass."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
- "scores = net.predict([caffe.io.load_image(caffe_root + 'examples/images/cat.jpg')])"
+ "net.blobs['data'].reshape(1,3,227,227)\n",
+ "net.blobs['data'].data[...] = transformer.preprocess('data', caffe.io.load_image(caffe_root + 'examples/images/cat.jpg'))\n",
+ "out = net.forward()\n",
+ "print(\"Predicted class is #{}.\".format(out['prob'].argmax()))"
],
"language": "python",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "stream": "stdout",
+ "text": [
+ "Predicted class is #281.\n"
+ ]
+ }
+ ],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "The layer features and their shapes (10 is the batch size, corresponding to the the ten subcrops used by Krizhevsky et al.)"
+ "The layer features and their shapes (1 is the batch size, corresponding to the single input image in this example)."
]
},
{
@@ -114,21 +134,21 @@
"output_type": "pyout",
"prompt_number": 4,
"text": [
- "[('data', (10, 3, 227, 227)),\n",
- " ('conv1', (10, 96, 55, 55)),\n",
- " ('pool1', (10, 96, 27, 27)),\n",
- " ('norm1', (10, 96, 27, 27)),\n",
- " ('conv2', (10, 256, 27, 27)),\n",
- " ('pool2', (10, 256, 13, 13)),\n",
- " ('norm2', (10, 256, 13, 13)),\n",
- " ('conv3', (10, 384, 13, 13)),\n",
- " ('conv4', (10, 384, 13, 13)),\n",
- " ('conv5', (10, 256, 13, 13)),\n",
- " ('pool5', (10, 256, 6, 6)),\n",
- " ('fc6', (10, 4096, 1, 1)),\n",
- " ('fc7', (10, 4096, 1, 1)),\n",
- " ('fc8', (10, 1000, 1, 1)),\n",
- " ('prob', (10, 1000, 1, 1))]"
+ "[('data', (1, 3, 227, 227)),\n",
+ " ('conv1', (1, 96, 55, 55)),\n",
+ " ('pool1', (1, 96, 27, 27)),\n",
+ " ('norm1', (1, 96, 27, 27)),\n",
+ " ('conv2', (1, 256, 27, 27)),\n",
+ " ('pool2', (1, 256, 13, 13)),\n",
+ " ('norm2', (1, 256, 13, 13)),\n",
+ " ('conv3', (1, 384, 13, 13)),\n",
+ " ('conv4', (1, 384, 13, 13)),\n",
+ " ('conv5', (1, 256, 13, 13)),\n",
+ " ('pool5', (1, 256, 6, 6)),\n",
+ " ('fc6', (1, 4096)),\n",
+ " ('fc7', (1, 4096)),\n",
+ " ('fc8', (1, 1000)),\n",
+ " ('prob', (1, 1000))]"
]
}
],
@@ -138,7 +158,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "The parameters and their shapes (each of these layers also has biases which are omitted here)"
+ "The parameters and their shapes. The parameters are `net.params['name'][0]` while biases are `net.params['name'][1]`."
]
},
{
@@ -160,9 +180,9 @@
" ('conv3', (384, 256, 3, 3)),\n",
" ('conv4', (384, 192, 3, 3)),\n",
" ('conv5', (256, 192, 3, 3)),\n",
- " ('fc6', (1, 1, 4096, 9216)),\n",
- " ('fc7', (1, 1, 4096, 4096)),\n",
- " ('fc8', (1, 1, 1000, 4096))]"
+ " ('fc6', (4096, 9216)),\n",
+ " ('fc7', (4096, 4096)),\n",
+ " ('fc8', (1000, 4096))]"
]
}
],
@@ -180,7 +200,7 @@
"collapsed": false,
"input": [
"# take an array of shape (n, height, width) or (n, height, width, channels)\n",
- "# and visualize each (height, width) thing in a grid of size approx. sqrt(n) by sqrt(n)\n",
+ "# and visualize each (height, width) thing in a grid of size approx. sqrt(n) by sqrt(n)\n",
"def vis_square(data, padsize=1, padval=0):\n",
" data -= data.min()\n",
" data /= data.max()\n",
@@ -212,8 +232,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
- "# index four is the center crop\n",
- "plt.imshow(net.transformer.deprocess('data', net.blobs['data'].data[4]))"
+ "plt.imshow(transformer.deprocess('data', net.blobs['data'].data[0]))"
],
"language": "python",
"metadata": {},
@@ -269,7 +288,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
- "feat = net.blobs['conv1'].data[4, :36]\n",
+ "feat = net.blobs['conv1'].data[0, :36]\n",
"vis_square(feat, padval=1)"
],
"language": "python",
@@ -327,7 +346,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
- "feat = net.blobs['conv2'].data[4, :36]\n",
+ "feat = net.blobs['conv2'].data[0, :36]\n",
"vis_square(feat, padval=1)"
],
"language": "python",
@@ -355,7 +374,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
- "feat = net.blobs['conv3'].data[4]\n",
+ "feat = net.blobs['conv3'].data[0]\n",
"vis_square(feat, padval=0.5)"
],
"language": "python",
@@ -383,7 +402,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
- "feat = net.blobs['conv4'].data[4]\n",
+ "feat = net.blobs['conv4'].data[0]\n",
"vis_square(feat, padval=0.5)"
],
"language": "python",
@@ -411,7 +430,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
- "feat = net.blobs['conv5'].data[4]\n",
+ "feat = net.blobs['conv5'].data[0]\n",
"vis_square(feat, padval=0.5)"
],
"language": "python",
@@ -439,7 +458,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
- "feat = net.blobs['pool5'].data[4]\n",
+ "feat = net.blobs['pool5'].data[0]\n",
"vis_square(feat, padval=1)"
],
"language": "python",
@@ -469,7 +488,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
- "feat = net.blobs['fc6'].data[4]\n",
+ "feat = net.blobs['fc6'].data[0]\n",
"plt.subplot(2, 1, 1)\n",
"plt.plot(feat.flat)\n",
"plt.subplot(2, 1, 2)\n",
@@ -500,7 +519,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
- "feat = net.blobs['fc7'].data[4]\n",
+ "feat = net.blobs['fc7'].data[0]\n",
"plt.subplot(2, 1, 1)\n",
"plt.plot(feat.flat)\n",
"plt.subplot(2, 1, 2)\n",
@@ -531,7 +550,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
- "feat = net.blobs['prob'].data[4]\n",
+ "feat = net.blobs['prob'].data[0]\n",
"plt.plot(feat.flat)"
],
"language": "python",
@@ -576,7 +595,7 @@
" labels = np.loadtxt(imagenet_labels_filename, str, delimiter='\\t')\n",
"\n",
"# sort top k predictions from softmax output\n",
- "top_k = net.blobs['prob'].data[4].flatten().argsort()[-1:-6:-1]\n",
+ "top_k = net.blobs['prob'].data[0].flatten().argsort()[-1:-6:-1]\n",
"print labels[top_k]"
],
"language": "python",