summaryrefslogtreecommitdiff
path: root/python/caffe/pycaffe.py
blob: 5275a07f52eee344d89e6518bff6077d9963bf8d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
"""
Wrap the internal caffe C++ module (_caffe.so) with a clean, Pythonic
interface.
"""

from collections import OrderedDict
import numpy as np

from ._caffe import Net, SGDSolver

# we directly update methods from Net here (rather than using composition or
# inheritance) so that nets created by caffe (e.g., by SGDSolver) will
# automatically have the improved interface

# Input preprocessing
Net.mean = {}   # image mean (ndarray, input dimensional or broadcastable)
Net.input_scale = {}  # for a model that expects data = input * input_scale
Net.channel_swap = {}  # for RGB -> BGR and the like


@property
def _Net_blobs(self):
  """
  An OrderedDict (bottom to top, i.e., input to output) of network
  blobs indexed by name
  """
  return OrderedDict([(bl.name, bl) for bl in self._blobs])

Net.blobs = _Net_blobs


@property
def _Net_params(self):
  """
  An OrderedDict (bottom to top, i.e., input to output) of network
  parameters indexed by name; each is a list of multiple blobs (e.g.,
  weights and biases)
  """
  return OrderedDict([(lr.name, lr.blobs) for lr in self.layers
                                          if len(lr.blobs) > 0])

Net.params = _Net_params


def _Net_forward(self, blobs=None, **kwargs):
  """
  Forward pass: prepare inputs and run the net forward.

  Take
    blobs: list of blobs to return in addition to output blobs.
    kwargs: Keys are input blob names and values are lists of inputs.
            Images must be (H x W x K) ndarrays.
            If None, input is taken from data layers by ForwardPrefilled().

  Give
    outs: {blob name: list of blobs ndarrays} dict.
  """
  if blobs is None:
    blobs = []

  if not kwargs:
    # Carry out prefilled forward pass and unpack output.
    self.ForwardPrefilled()
    out_blobs = [self.blobs[out].data for out in self.outputs]
  else:
    # Create input and output blobs according to net defined shapes
    # and make arrays single and C-contiguous as Caffe expects.
    in_blobs = [np.ascontiguousarray(np.concatenate(kwargs[in_]),
                                     dtype=np.float32) for in_ in self.inputs]
    out_blobs = [np.empty(self.blobs[out].data.shape, dtype=np.float32)
                 for out in self.outputs]

    self.Forward(in_blobs, out_blobs)

  # Unpack blobs to extract
  outs = {}
  out_blobs.extend([self.blobs[blob].data for blob in blobs])
  out_blob_names = self.outputs + blobs
  for out, out_blob in zip(out_blob_names, out_blobs):
    outs[out] = [out_blob[ix, :, :, :].squeeze()
                  for ix in range(out_blob.shape[0])]
  return outs

Net.forward = _Net_forward


def _Net_backward(self, diffs=None, **kwargs):
  """
  Backward pass: prepare diffs and run the net backward.

  Take
    diffs: list of diffs to return in addition to bottom diffs.
    kwargs: Keys are output blob names and values are lists of diffs.
            If None, top diffs are taken from loss by BackwardPrefilled().

  Give
    outs: {blob name: list of diffs} dict.
  """
  if diffs is None:
    diffs = []

  if not kwargs:
    # Carry out backward with forward loss diffs and unpack bottom diffs.
    self.BackwardPrefilled()
    out_diffs = [self.blobs[in_].diff for in_ in self.inputs]
  else:
    # Create top and bottom diffs according to net defined shapes
    # and make arrays single and C-contiguous as Caffe expects.
    top_diffs = [np.ascontiguousarray(np.concatenate(kwargs[out]),
                                      dtype=np.float32) for out in self.outputs]
    out_diffs = [np.empty(self.blobs[bottom].diff.shape, dtype=np.float32)
                 for bottom in self.inputs]

    self.Backward(top_diffs, out_diffs)

  # Unpack diffs to extract
  outs = {}
  out_diffs.extend([self.blobs[diff].diff for diff in diffs])
  out_diff_names = self.inputs + diffs
  for out, out_diff in zip(out_diff_names, out_diffs):
    outs[out] = [out_diff[ix, :, :, :].squeeze()
                           for ix in range(out_diff.shape[0])]
  return outs

Net.backward = _Net_backward


def _Net_set_mean(self, input_, mean_f, mode='image'):
  """
  Set the mean to subtract for data centering.

  Take
    input_: which input to assign this mean.
    mean_f: path to mean .npy
    mode: image = use the whole-image mean (and check dimensions)
          channel = channel constant (i.e. mean pixel instead of mean image)
  """
  if input_ not in self.inputs:
    raise Exception('Input not in {}'.format(self.inputs))
  mean = np.load(mean_f)
  if mode == 'image':
    if mean.shape != self.input.data.shape[1:]:
      raise Exception('The mean shape does not match the input shape.')
    self.mean[input_] = mean
  elif mode == 'channel':
    self.mean[input_] = mean.mean(1).mean(1)
  else:
    raise Exception('Mode not in {}'.format(['image', 'channel']))

Net.set_mean = _Net_set_mean


def _Net_set_input_scale(self, input_, scale):
  """
  Set the input feature scaling factor s.t. input blob = input * scale.

  Take
    input_: which input to assign this scale factor
    scale: scale coefficient
  """
  if input_ not in self.inputs:
    raise Exception('Input not in {}'.format(self.inputs))
  self.input_scale[input_] = scale

Net.set_input_scale = _Net_set_input_scale


def _Net_set_channel_swap(self, input_, order):
  """
  Set the input channel order for e.g. RGB to BGR conversion
  as needed for the reference ImageNet model.

  Take
    input_: which input to assign this channel order
    order: the order to take the channels. (2,1,0) maps RGB to BGR for example.
  """
  if input_ not in self.inputs:
    raise Exception('Input not in {}'.format(self.inputs))
  self.channel_swap[input_] = order

Net.set_channel_swap = _Net_set_channel_swap


def _Net_format_image(self, input_, image):
  """
  Format image for input to Caffe:
  - convert to single
  - scale feature
  - reorder channels (for instance color to BGR)
  - subtract mean
  - reshape to 1 x K x H x W

  Take
    image: (H x W x K) ndarray

  Give
    image: (K x H x W) ndarray
  """
  caf_image = image.astype(np.float32)
  input_scale = self.input_scale.get(input_)
  channel_order = self.channel_swap.get(input_)
  mean = self.mean.get(input_)
  if input_scale:
    caf_image *= input_scale
  if channel_order:
    caf_image = caf_image[:, :, channel_order]
  if mean:
    caf_image -= mean
  caf_image = caf_image.transpose((2, 0, 1))
  caf_image = caf_image[np.newaxis, :, :, :]
  return caf_image

Net.format_image = _Net_format_image


def _Net_decaffeinate_image(self, input_, image):
  """
  Invert Caffe formatting; see _Net_format_image().
  """
  decaf_image = image.squeeze()
  decaf_image = decaf_image.transpose((1,2,0))
  input_scale = self.input_scale.get(input_)
  channel_order = self.channel_swap.get(input_)
  mean = self.mean.get(input_)
  if mean:
    decaf_image += mean
  if channel_order:
    decaf_image = decaf_image[:, :, channel_order[::-1]]
  if input_scale:
    decaf_image /= input_scale
  return decaf_image

Net.decaffeinate_image = _Net_decaffeinate_image


def _Net_set_input_arrays(self, data, labels):
  """
  Set input arrays of the in-memory MemoryDataLayer.
  (Note: this is only for networks declared with the memory data layer.)
  """
  if labels.ndim == 1:
    labels = np.ascontiguousarray(labels[:, np.newaxis, np.newaxis,
                                         np.newaxis])
  return self._set_input_arrays(data, labels)

Net.set_input_arrays = _Net_set_input_arrays