fix pycaffe input processing

- load an image as [0,1] single / np.float32 according to Python convention - fix input scaling during preprocessing: - scale input for preprocessing by `raw_scale` e.g. to map an image to [0, 255] for the CaffeNet and AlexNet ImageNet models - scale feature space by `input_scale` after mean subtraction - switch examples to raw scale for ImageNet models - fix #525 - preserve type after resizing. - resize 1, 3, or K channel images with special casing between skimage.transform (1 and 3) and scipy.ndimage (K) for speed
author: Evan Shelhamer <shelhamer@imaginarynumber.net> 2014-07-31 16:19:20 -0700
committer: Evan Shelhamer <shelhamer@imaginarynumber.net> 2014-08-05 23:17:59 -0700
commit: d5c3cef47155b5c2aad146465187364a2b41fd99 (patch)
tree: 8ebaf286f71556f3f182251664f95997b6661120 /examples
parent: f1eb9821ba717a55b684d42ef8c87125e855b402 (diff)
download: caffeonacl-d5c3cef47155b5c2aad146465187364a2b41fd99.tar.gz
caffeonacl-d5c3cef47155b5c2aad146465187364a2b41fd99.tar.bz2
caffeonacl-d5c3cef47155b5c2aad146465187364a2b41fd99.zip
4 files changed, 10 insertions, 19 deletions
diff --git a/examples/detection.ipynb b/examples/detection.ipynb
index 3f2cf71a..3b0a5b2e 100644
--- a/examples/detection.ipynb
+++ b/examples/detection.ipynb
@@ -36,7 +36,7 @@
      "input": [
       "!mkdir -p _temp\n",
       "!echo `pwd`/images/fish-bike.jpg > _temp/det_input.txt\n",
-      "!../python/detect.py --crop_mode=selective_search --pretrained_model=imagenet/caffe_rcnn_imagenet_model --model_def=imagenet/rcnn_imagenet_deploy.prototxt --gpu _temp/det_input.txt _temp/det_output.h5"
+      "!../python/detect.py --crop_mode=selective_search --pretrained_model=imagenet/caffe_rcnn_imagenet_model --model_def=imagenet/rcnn_imagenet_deploy.prototxt --gpu --raw_scale=255 _temp/det_input.txt _temp/det_output.h5"
      ],
      "language": "python",
      "metadata": {},
diff --git a/examples/filter_visualization.ipynb b/examples/filter_visualization.ipynb
index 0fe863bc..ea99f06b 100644
--- a/examples/filter_visualization.ipynb
+++ b/examples/filter_visualization.ipynb
@@ -66,8 +66,8 @@
       "net.set_mode_cpu()\n",
       "# input preprocessing: 'data' is the name of the input blob == net.inputs[0]\n",
       "net.set_mean('data', caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy')  # ImageNet mean\n",
-      "net.set_channel_swap('data', (2,1,0))  # the reference model has channels in BGR order instead of RGB\n",
-      "net.set_input_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]"
+      "net.set_raw_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]\n",
+      "net.set_channel_swap('data', (2,1,0))  # the reference model has channels in BGR order instead of RGB"
      ],
      "language": "python",
      "metadata": {},
@@ -178,12 +178,6 @@
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "# our network takes BGR images, so we need to switch color channels\n",
-      "def showimage(im):\n",
-      "    if im.ndim == 3:\n",
-      "        im = im[:, :, ::-1]\n",
-      "    plt.imshow(im)\n",
-      "    \n",
       "# take an array of shape (n, height, width) or (n, height, width, channels)\n",
       "#  and visualize each (height, width) thing in a grid of size approx. sqrt(n) by sqrt(n)\n",
       "def vis_square(data, padsize=1, padval=0):\n",
@@ -199,7 +193,7 @@
       "    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))\n",
       "    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])\n",
       "    \n",
-      "    showimage(data)"
+      "    imshow(data)"
      ],
      "language": "python",
      "metadata": {},
@@ -218,10 +212,7 @@
      "collapsed": false,
      "input": [
       "# index four is the center crop\n",
-      "image = net.blobs['data'].data[4].copy()\n",
-      "image -= image.min()\n",
-      "image /= image.max()\n",
-      "showimage(image.transpose(1, 2, 0))"
+      "imshow(net.deprocess('data', net.blobs['data'].data[4]))"
      ],
      "language": "python",
      "metadata": {},
@@ -606,4 +597,4 @@
    "metadata": {}
   }
  ]
-}
-\ No newline at end of file
+}
diff --git a/examples/imagenet_classification.ipynb b/examples/imagenet_classification.ipynb
index 8ab65fda..60e8bd07 100644
--- a/examples/imagenet_classification.ipynb
+++ b/examples/imagenet_classification.ipynb
@@ -53,7 +53,7 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-      "Loading a network is easy. `caffe.Classifier` takes care of everything. Note the arguments for configuring input preprocessing: mean subtraction switched on by giving a mean file, input channel swapping takes care of mapping RGB into the reference ImageNet model's BGR order, and input scaling multiplies the feature scale from the input [0,1] to [0,255]."
+      "Loading a network is easy. `caffe.Classifier` takes care of everything. Note the arguments for configuring input preprocessing: mean subtraction switched on by giving a mean file, input channel swapping takes care of mapping RGB into the reference ImageNet model's BGR order, and raw scaling multiplies the feature scale from the input [0,1] to the ImageNet model's [0,255]."
      ]
     },
     {
@@ -63,7 +63,7 @@
       "net = caffe.Classifier(MODEL_FILE, PRETRAINED,\n",
       "                       mean_file=caffe_root + 'python/caffe/imagenet/ilsvrc_2012_mean.npy',\n",
       "                       channel_swap=(2,1,0),\n",
-      "                       input_scale=255,\n",
+      "                       raw_scale=255,\n",
       "                       image_dims=(256, 256))"
      ],
      "language": "python",
diff --git a/examples/net_surgery.ipynb b/examples/net_surgery.ipynb
index bf3d1140..31847b24 100644
--- a/examples/net_surgery.ipynb
+++ b/examples/net_surgery.ipynb
@@ -277,7 +277,7 @@
       "plt.imshow(im)\n",
       "net_full_conv.set_mean('data', '../python/caffe/imagenet/ilsvrc_2012_mean.npy')\n",
       "net_full_conv.set_channel_swap('data', (2,1,0))\n",
-      "net_full_conv.set_input_scale('data', 255.0)\n",
+      "net_full_conv.set_raw_scale('data', 255.0)\n",
       "# make classification map by forward pass and show top prediction index per location\n",
       "out = net_full_conv.forward_all(data=np.asarray([net_full_conv.preprocess('data', im)]))\n",
       "out['prob'][0].argmax(axis=0)"
@@ -331,4 +331,4 @@
    "metadata": {}
   }
  ]
-}
-\ No newline at end of file
+}
author	Evan Shelhamer <shelhamer@imaginarynumber.net>	2014-07-31 16:19:20 -0700
committer	Evan Shelhamer <shelhamer@imaginarynumber.net>	2014-08-05 23:17:59 -0700
commit	d5c3cef47155b5c2aad146465187364a2b41fd99 (patch)
tree	8ebaf286f71556f3f182251664f95997b6661120 /examples
parent	f1eb9821ba717a55b684d42ef8c87125e855b402 (diff)
download	caffeonacl-d5c3cef47155b5c2aad146465187364a2b41fd99.tar.gz caffeonacl-d5c3cef47155b5c2aad146465187364a2b41fd99.tar.bz2 caffeonacl-d5c3cef47155b5c2aad146465187364a2b41fd99.zip