summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvan Shelhamer <shelhamer@imaginarynumber.net>2014-05-18 17:13:05 -0700
committerEvan Shelhamer <shelhamer@imaginarynumber.net>2014-05-19 23:55:21 -0700
commit50d0b6d9c67d9ca6b062ddea6f5ec30189e61518 (patch)
tree5d31effdd13790baf727825341b658ea5308efe6
parent738c8758e30c6f8bdf9155529aeb431547af65d6 (diff)
downloadcaffe-50d0b6d9c67d9ca6b062ddea6f5ec30189e61518.tar.gz
caffe-50d0b6d9c67d9ca6b062ddea6f5ec30189e61518.tar.bz2
caffe-50d0b6d9c67d9ca6b062ddea6f5ec30189e61518.zip
add caffe.io submodule for conversions, image loading and resizing
-rw-r--r--python/caffe/__init__.py1
-rw-r--r--python/caffe/io.py (renamed from python/caffe/convert.py)83
-rw-r--r--python/caffe/pycaffe.py17
3 files changed, 89 insertions, 12 deletions
diff --git a/python/caffe/__init__.py b/python/caffe/__init__.py
index c3bb98f6..e5e1062e 100644
--- a/python/caffe/__init__.py
+++ b/python/caffe/__init__.py
@@ -1 +1,2 @@
from .pycaffe import Net, SGDSolver
+import io
diff --git a/python/caffe/convert.py b/python/caffe/io.py
index deef6577..0bd2f812 100644
--- a/python/caffe/convert.py
+++ b/python/caffe/io.py
@@ -1,9 +1,84 @@
-#!/usr/bin/env python
-"""This script converts blobproto instances to numpy arrays.
-"""
+import numpy as np
+import skimage.io
+import skimage.transform
from caffe.proto import caffe_pb2
-import numpy as np
+
+
+def load_image(filename):
+ """
+ Load an image converting from grayscale or alpha as needed.
+
+ Take
+ filename: string
+
+ Give
+ image: an image of size (H x W x 3) with RGB channels of type uint8.
+ """
+ img = skimage.img_as_float(skimage.io.imread(filename)).astype(np.float32)
+ if img.ndim == 2:
+ img = np.tile(img[:, :, np.newaxis], (1, 1, 3))
+ elif img.shape[2] == 4:
+ img = img[:, :, :3]
+ return img
+
+
+def resize_image(im, new_dims, interp_order=1):
+ """
+ Resize an image array with interpolation.
+
+ Take
+ im: (H x W x K) ndarray
+ new_dims: (height, width) tuple of new dimensions.
+ interp_order: interpolation order, default is linear.
+
+ Give
+ im: resized ndarray with shape (new_dims[0], new_dims[1], K)
+ """
+ return skimage.transform.resize(im, new_dims, order=interp_order)
+
+
+def oversample(images, crop_dims):
+ """
+ Crop images into the four corners, center, and their mirrored versions.
+
+ Take
+ image: iterable of (H x W x K) ndarrays
+ crop_dims: (height, width) tuple for the crops.
+
+ Give
+ crops: (10*N x H x W x K) ndarray of crops for number of inputs N.
+ """
+ # Dimensions and center.
+ im_shape = np.array(images[0].shape)
+ crop_dims = np.array(crop_dims)
+ im_center = im_shape[:2] / 2.0
+
+ # Make crop coordinates
+ h_indices = (0, im_shape[0] - crop_dims[0])
+ w_indices = (0, im_shape[1] - crop_dims[1])
+ crops_ix = np.empty((5, 4), dtype=int)
+ curr = 0
+ for i in h_indices:
+ for j in w_indices:
+ crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
+ curr += 1
+ crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([
+ -crop_dims / 2.0,
+ crop_dims / 2.0
+ ])
+ crops_ix = np.tile(crops_ix, (2, 1))
+
+ # Extract crops
+ crops = np.empty((10 * len(images), crop_dims[0], crop_dims[1],
+ im_shape[-1]), dtype=np.float32)
+ ix = 0
+ for im in images:
+ for crop in crops_ix:
+ crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
+ ix += 1
+ crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :] # flip for mirrors
+ return crops
def blobproto_to_array(blob, return_diff=False):
diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py
index 9b1ed807..72ae5fbb 100644
--- a/python/caffe/pycaffe.py
+++ b/python/caffe/pycaffe.py
@@ -6,9 +6,9 @@ interface.
from collections import OrderedDict
from itertools import izip_longest
import numpy as np
-from scipy.ndimage import zoom
from ._caffe import Net, SGDSolver
+import caffe.io
# We directly update methods from Net here (rather than using composition or
# inheritance) so that nets created by caffe (e.g., by SGDSolver) will
@@ -197,8 +197,11 @@ def _Net_set_mean(self, input_, mean_f, mode='elementwise'):
mean = np.load(mean_f)
if mode == 'elementwise':
if mean.shape != in_shape[1:]:
- mean = caffe.io.resize_image(mean.transpose((1,2,0)),
- in_shape[2:]).transpose((2,0,1))
+ # Resize mean (which requires H x W x K input in range [0,1]).
+ m_min, m_max = mean.min(), mean.max()
+ normal_mean = (mean - m_min) / (m_max - m_min)
+ mean = caffe.io.resize_image(normal_mean.transpose((1,2,0)),
+ in_shape[2:]).transpose((2,0,1)) * (m_max - m_min) + m_min
self.mean[input_] = mean
elif mode == 'channel':
self.mean[input_] = mean.mean(1).mean(1).reshape((in_shape[1], 1, 1))
@@ -258,11 +261,9 @@ def _Net_preprocess(self, input_name, inputs):
input_scale = self.input_scale.get(input_name)
channel_order = self.channel_swap.get(input_name)
mean = self.mean.get(input_name)
- in_dims = self.blobs[input_name].data.shape[2:]
- if caffe_in.shape[:2] != in_dims:
- scale_h = in_dims[0] / float(caffe_in.shape[0])
- scale_w = in_dims[1] / float(caffe_in.shape[1])
- caffe_in = zoom(caffe_in, (scale_h, scale_w, 1), order=1)
+ in_size = self.blobs[input_name].data.shape[2:]
+ if caffe_in.shape[:2] != in_size:
+ caffe_in = caffe.io.resize_image(caffe_in, in_size)
if input_scale:
caffe_in *= input_scale
if channel_order: