diff options
286 files changed, 3346 insertions, 3028 deletions
diff --git a/.travis.yml b/.travis.yml index 781eb2db42..58a015cd1c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -44,4 +44,4 @@ matrix: python: "2.7" addons: true install: pip install pep8 - script: pep8 setup.py + script: pep8 diff --git a/docs/source/conf.py b/docs/source/conf.py index f7b76d2098..2513f8c299 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -201,12 +201,13 @@ from docutils import nodes from sphinx.util.docfields import TypedField from sphinx import addnodes + def patched_make_field(self, types, domain, items): # type: (List, unicode, Tuple) -> nodes.field def handle_item(fieldarg, content): par = nodes.paragraph() par += addnodes.literal_strong('', fieldarg) # Patch: this line added - #par.extend(self.make_xrefs(self.rolename, domain, fieldarg, + # par.extend(self.make_xrefs(self.rolename, domain, fieldarg, # addnodes.literal_strong)) if fieldarg in types: par += nodes.Text(' (') @@ -1,2 +1,7 @@ [pep8] max-line-length = 120 +ignore = E402,E721,E731 + +[flake8] +max-line-length = 120 +ignore = E305,E402,E721,E731,F401,F403,F405,F811,F812,F821,F841 diff --git a/test/common.py b/test/common.py index 46b9273e1a..48a7ae150f 100644 --- a/test/common.py +++ b/test/common.py @@ -12,6 +12,7 @@ from torch.autograd import Variable, Function torch.set_default_tensor_type('torch.DoubleTensor') + def run_tests(): parser = argparse.ArgumentParser(add_help=False) parser.add_argument('--seed', type=int, default=123) @@ -29,6 +30,7 @@ try: except ImportError: TEST_NUMPY = False + def get_cpu_type(t): assert t.__module__ == 'torch.cuda' return getattr(torch, t.__class__.__name__) @@ -155,7 +157,7 @@ def make_jacobian(input, num_out): return torch.zeros(input.nelement(), num_out) else: return type(input)(filter(lambda x: x is not None, - (make_jacobian(elem, num_out) for elem in input))) + (make_jacobian(elem, num_out) for elem in input))) def iter_tensors(x, only_requiring_grad=False): @@ -206,7 +208,7 @@ def get_numerical_jacobian(fn, input, target): outb.copy_(fn(input)) flat_tensor[i] = orig - outb.add_(-1,outa).div_(2*perturbation) + outb.add_(-1, outa).div_(2 * perturbation) d_tensor[i] = outb return jacobian diff --git a/test/common_nn.py b/test/common_nn.py index 5c43442953..174ab44e6b 100644 --- a/test/common_nn.py +++ b/test/common_nn.py @@ -25,14 +25,14 @@ module_tests = [ module_name='Linear', constructor_args=(10, 8), input_size=(4, 10), - reference_fn=lambda i,p: torch.mm(i, p[0].t()) + p[1].view(1, -1).expand(4, 8) + reference_fn=lambda i, p: torch.mm(i, p[0].t()) + p[1].view(1, -1).expand(4, 8) ), dict( module_name='Linear', constructor_args=(10, 8, False), input_size=(4, 10), desc='no_bias', - reference_fn=lambda i,p: torch.mm(i, p[0].t()) + reference_fn=lambda i, p: torch.mm(i, p[0].t()) ), dict( module_name='Threshold', @@ -72,7 +72,7 @@ module_tests = [ dict( module_name='Hardtanh', input_size=(3, 2, 5), - reference_fn=lambda i,_: i.clamp(-1, 1) + reference_fn=lambda i, _: i.clamp(-1, 1) ), dict( module_name='Sigmoid', @@ -85,22 +85,22 @@ module_tests = [ dict( module_name='Softmax', input_size=(10, 20), - reference_fn=lambda i,_: torch.exp(i).div(torch.exp(i).sum(1).expand(10, 20)) + reference_fn=lambda i, _: torch.exp(i).div(torch.exp(i).sum(1).expand(10, 20)) ), dict( module_name='Softmax2d', input_size=(1, 3, 10, 20), - reference_fn=lambda i,_: torch.exp(i).div(torch.exp(i).sum(1).expand_as(i)) + reference_fn=lambda i, _: torch.exp(i).div(torch.exp(i).sum(1).expand_as(i)) ), dict( module_name='LogSoftmax', input_size=(10, 20), - reference_fn=lambda i,_: torch.exp(i).div_(torch.exp(i).sum(1).expand(10, 20)).log_() + reference_fn=lambda i, _: torch.exp(i).div_(torch.exp(i).sum(1).expand(10, 20)).log_() ), dict( module_name='LogSoftmax', input_size=(1, 3, 10, 20), - reference_fn=lambda i,_: torch.exp(i).div_(torch.exp(i).sum(1).expand_as(i)).log_(), + reference_fn=lambda i, _: torch.exp(i).div_(torch.exp(i).sum(1).expand_as(i)).log_(), desc='multiparam' ), dict( @@ -130,18 +130,18 @@ module_tests = [ dict( module_name='LogSigmoid', input_size=(2, 3, 4), - reference_fn=lambda i,_: i.sigmoid().log() + reference_fn=lambda i, _: i.sigmoid().log() ), dict( module_name='Softplus', input_size=(10, 20), - reference_fn=lambda i,_: torch.log(1 + torch.exp(i)) + reference_fn=lambda i, _: torch.log(1 + torch.exp(i)) ), dict( module_name='Softplus', constructor_args=(2,), input_size=(10, 20), - reference_fn=lambda i,_: 1. / 2. * torch.log(1 + torch.exp(2 * i)), + reference_fn=lambda i, _: 1. / 2. * torch.log(1 + torch.exp(2 * i)), desc='beta' ), dict( @@ -172,7 +172,7 @@ module_tests = [ dict( module_name='Softsign', input_size=(3, 2, 5), - reference_fn=lambda i,_: i.div(1 + torch.abs(i)) + reference_fn=lambda i, _: i.div(1 + torch.abs(i)) ), dict( module_name='Softmin', @@ -187,11 +187,11 @@ module_tests = [ criterion_tests = [ dict(module_name='L1Loss', - input_size=(2, 3, 4), - target=torch.randn(2, 3, 4), - reference_fn=lambda i,t,_: 1./i.numel() * \ - sum((a-b).abs().sum() for a,b in zip(i, t)) - ), + input_size=(2, 3, 4), + target=torch.randn(2, 3, 4), + reference_fn=lambda i, t, _: 1. / i.numel() * + sum((a - b).abs().sum() for a, b in zip(i, t)) + ), dict( module_name='NLLLoss', input=torch.rand(15, 10).log(), @@ -213,7 +213,7 @@ criterion_tests = [ module_name='MSELoss', input=torch.randn(2, 3, 4, 5), target=torch.randn(2, 3, 4, 5), - reference_fn=lambda i,t,_: (i-t).abs().pow(2).sum() / i.numel() + reference_fn=lambda i, t, _: (i - t).abs().pow(2).sum() / i.numel() ), dict( module_name='BCELoss', @@ -370,9 +370,9 @@ class NNTestCase(TestCase): if jacobian_input: for jacobian_x, d_x in zip(flat_jacobian_input, iter_tensors(d_input)): - jacobian_x[:,i] = d_x + jacobian_x[:, i] = d_x if jacobian_parameters: - jacobian_param[:,i] = torch.cat(self._flatten_tensors(d_param), 0) + jacobian_param[:, i] = torch.cat(self._flatten_tensors(d_param), 0) res = tuple() if jacobian_input: @@ -433,7 +433,7 @@ class NNTestCase(TestCase): fx1 = self._forward_criterion(criterion, input, target) x[i] = original - eps fx2 = self._forward_criterion(criterion, input, target) - deriv = (fx1 - fx2) / (2.*eps) + deriv = (fx1 - fx2) / (2. * eps) d_x[i] = deriv x[i] = original @@ -447,8 +447,9 @@ class NNTestCase(TestCase): class TestBase(object): + def __init__(self, constructor, constructor_args=tuple(), input_size=None, - input=None, desc='', reference_fn=None, fullname=None, **kwargs): + input=None, desc='', reference_fn=None, fullname=None, **kwargs): if input_size is None and input is None: raise RuntimeError("Specify either an input tensor, or it's size!") self.constructor = constructor @@ -496,6 +497,7 @@ class TestBase(object): class ModuleTest(TestBase): + def __init__(self, *args, **kwargs): super(ModuleTest, self).__init__(*args, **kwargs) self.jacobian_input = kwargs.get('jacobian_input', True) @@ -568,6 +570,7 @@ class ModuleTest(TestBase): class CriterionTest(TestBase): + def __init__(self, *args, **kwargs): super(CriterionTest, self).__init__(*args, **kwargs) self.target = self._get_target(kwargs['target']) @@ -590,7 +593,7 @@ class CriterionTest(TestBase): if isinstance(target, Variable): target = target.data expected_out = self.reference_fn(deepcopy(self._unpack_input(input)), - deepcopy(target), module) + deepcopy(target), module) test_case.assertEqual(out, expected_out) test_case.check_criterion_jacobian(module, input, self.target) diff --git a/test/data/network1.py b/test/data/network1.py index 9c052a1943..68fbe37696 100644 --- a/test/data/network1.py +++ b/test/data/network1.py @@ -2,6 +2,7 @@ import torch.nn as nn class Net(nn.Module): + def __init__(self): super(Net, self).__init__() self.linear = nn.Linear(10, 20) diff --git a/test/data/network2.py b/test/data/network2.py index 8db55a11a1..862593c5fc 100644 --- a/test/data/network2.py +++ b/test/data/network2.py @@ -2,6 +2,7 @@ import torch.nn as nn class Net(nn.Module): + def __init__(self): super(Net, self).__init__() self.linear = nn.Linear(10, 20) diff --git a/test/error_messages/storage.py b/test/error_messages/storage.py index 40dd94fffa..bde3df7dec 100644 --- a/test/error_messages/storage.py +++ b/test/error_messages/storage.py @@ -1,5 +1,6 @@ import torch + def check_error(desc, fn, *required_substrings): try: fn() @@ -16,54 +17,55 @@ def check_error(desc, fn, *required_substrings): assert False, "given function ({}) didn't raise an error".format(desc) check_error( - 'Wrong argument types', - lambda: torch.FloatStorage(object()), - 'object') + 'Wrong argument types', + lambda: torch.FloatStorage(object()), + 'object') check_error('Unknown keyword argument', - lambda: torch.FloatStorage(content=1234.), - 'keyword') + lambda: torch.FloatStorage(content=1234.), + 'keyword') check_error('Invalid types inside a sequence', - lambda: torch.FloatStorage(['a', 'b']), - 'list', 'str') + lambda: torch.FloatStorage(['a', 'b']), + 'list', 'str') check_error('Invalid size type', - lambda: torch.FloatStorage(1.5), - 'float') + lambda: torch.FloatStorage(1.5), + 'float') check_error('Invalid offset', - lambda: torch.FloatStorage(torch.FloatStorage(2), 4), - '2', '4') + lambda: torch.FloatStorage(torch.FloatStorage(2), 4), + '2', '4') check_error('Negative offset', - lambda: torch.FloatStorage(torch.FloatStorage(2), -1), - '2', '-1') + lambda: torch.FloatStorage(torch.FloatStorage(2), -1), + '2', '-1') check_error('Invalid size', - lambda: torch.FloatStorage(torch.FloatStorage(3), 1, 5), - '2', '1', '5') + lambda: torch.FloatStorage(torch.FloatStorage(3), 1, 5), + '2', '1', '5') check_error('Negative size', - lambda: torch.FloatStorage(torch.FloatStorage(3), 1, -5), - '2', '1', '-5') + lambda: torch.FloatStorage(torch.FloatStorage(3), 1, -5), + '2', '1', '-5') check_error('Invalid index type', - lambda: torch.FloatStorage(10)['first item'], - 'str') + lambda: torch.FloatStorage(10)['first item'], + 'str') + def assign(): torch.FloatStorage(10)[1:-1] = '1' check_error('Invalid value type', - assign, - 'str') + assign, + 'str') check_error('resize_ with invalid type', - lambda: torch.FloatStorage(10).resize_(1.5), - 'float') + lambda: torch.FloatStorage(10).resize_(1.5), + 'float') check_error('fill_ with invalid type', - lambda: torch.IntStorage(10).fill_('asdf'), - 'str') + lambda: torch.IntStorage(10).fill_('asdf'), + 'str') # TODO: frombuffer diff --git a/test/optim/test.py b/test/optim/test.py index 3c6e34f40c..076b9e5d2c 100644 --- a/test/optim/test.py +++ b/test/optim/test.py @@ -3,10 +3,12 @@ import torch import torch.legacy.optim as optim from pprint import pprint + def rosenbrock(tensor): x, y = tensor return (1 - x)**2 + 100 * (y - x**2)**2 + def drosenbrock(tensor): x, y = tensor return torch.DoubleTensor((-400 * x * (y - x**2) - 2 * (1 - x), 200 * x * (y - x**2))) diff --git a/test/test_autograd.py b/test/test_autograd.py index e7fc2c56d4..1d7ddc48f6 100644 --- a/test/test_autograd.py +++ b/test/test_autograd.py @@ -8,7 +8,7 @@ from copy import deepcopy from collections import OrderedDict from common import make_jacobian, TestCase, iter_tensors, \ - get_numerical_jacobian, run_tests + get_numerical_jacobian, run_tests from torch.autograd._functions import * from torch.autograd import Variable, Function @@ -46,7 +46,7 @@ def get_analytical_jacobian(input, output): zero_gradients(input) output.backward(grad_output, retain_variables=True) for jacobian_x, d_x in zip(jacobian, iter_gradients(input)): - jacobian_x[:,i] = d_x + jacobian_x[:, i] = d_x return jacobian @@ -68,6 +68,7 @@ class TestAutograd(TestCase): y = Variable(torch.ones(5, 5) * 4, requires_grad=True) counter = [0] + def bw_hook(inc, grad): self.assertIsInstance(grad, Variable) counter[0] += inc @@ -103,6 +104,7 @@ class TestAutograd(TestCase): # WARNING: this is a test for autograd internals. # You should never have to use such things in your code. class NoneGradientFunction(Function): + def forward(self, x, y): assert self.needs_input_grad[0] assert not self.needs_input_grad[1] @@ -114,6 +116,7 @@ class TestAutograd(TestCase): fn = NoneGradientFunction() fn._backward_hooks = OrderedDict() was_called = [False] + def hook(grad_input, grad_output): self.assertIsInstance(grad_input, tuple) self.assertIsInstance(grad_output, tuple) @@ -242,6 +245,7 @@ class TestAutograd(TestCase): self.assertFalse(a.requires_grad) b = a + z self.assertTrue(b.requires_grad) + def error(): raise RuntimeError # Make sure backward isn't called on these @@ -379,6 +383,7 @@ class TestAutograd(TestCase): segfault. """ class CollectOnDelete(Function): + def __del__(self): gc.collect() @@ -386,7 +391,7 @@ class TestAutograd(TestCase): Variable(torch.randn(10, 10), creator=CollectOnDelete()) @unittest.skipIf(not torch.cuda.is_available() or torch.cuda.device_count() < 2, - "CUDA not available or <2 GPUs detected") + "CUDA not available or <2 GPUs detected") def test_unused_output_gpu(self): from torch.nn.parallel._functions import Broadcast x = Variable(torch.randn(5, 5).float().cuda(), requires_grad=True) @@ -436,6 +441,7 @@ class TestAutograd(TestCase): def test_return_leaf(self): class Identity(Function): + def forward(self, a, b): return a, a + b @@ -443,6 +449,7 @@ class TestAutograd(TestCase): return grad_a + grad_b, grad_b class Inplace(InplaceFunction): + def forward(self, a, b): self.mark_dirty(a) return a.add_(b), b + 2 @@ -464,6 +471,7 @@ class TestAutograd(TestCase): def test_return_leaf_inplace(self): class Inplace(InplaceFunction): + def forward(self, a, b): self.mark_dirty(a) return a.add_(b), b + 2 @@ -496,51 +504,51 @@ class TestAutograd(TestCase): self.assertEqual(z.grad.data, torch.ones(5) * 2) def test_backward_copy(self): - # This tests checks backward engine for a very subtle bug that appreared - # in one of the initial versions of autograd. Gradients tensors were - # simply stored in lists while the function waited for all its gradients - # to be computed. However, sometimes an output was used multiple times, - # so the gradients needed to be summed. Engine used to keep a need_copy - # set of tensors that will need a clone upon next addition and removed - # them from the set as soon as the clone was performed. However, this - # could lead to incorrect results if the same gradient tensor was - # buffered in three places in the graph: - # 1. When accumulating gradients in one of these places it was cloned - # and removed from need_copy set. - # 2. When accumulating in second place, it wasn't in the need_copy set, - # so the gradients were simply accumulated in-place (which already - # modified the grad in 3rd place) - # 3. When accumulating in the third place, it wasn't in the need_copy set - # as well, so the incoming gradient was summed in-place, yielding - # incorrect results in all functions, except the first one. - x = Variable(torch.ones(5, 5), requires_grad=True) - y = Variable(torch.ones(5, 5), requires_grad=True) - # Simulate that we're in the middle of the graph - a = x + 2 - b = y + 2 - c = x + 2 - # This op will just return grad_output two times in backward - add1 = a + b - add2 = add1 + c - # Simulate a long branch, so grad_output will get buffered. - for i in range(4): - a = a * 2 - b = b * 2 - c = c * 2 - branch = a + b + c - out = add2 + branch - # expected gradients are: - # for x: 34 (16 from final a, 16 from final c, 2 from add2) - # for y: 17 (16 from final b, 1 from add2) - grad_output = torch.ones(5, 5) - out.backward(grad_output) - self.assertEqual(x.grad.data, torch.ones(5, 5) * 34) - self.assertEqual(y.grad.data, torch.ones(5, 5) * 17) + # This tests checks backward engine for a very subtle bug that appreared + # in one of the initial versions of autograd. Gradients tensors were + # simply stored in lists while the function waited for all its gradients + # to be computed. However, sometimes an output was used multiple times, + # so the gradients needed to be summed. Engine used to keep a need_copy + # set of tensors that will need a clone upon next addition and removed + # them from the set as soon as the clone was performed. However, this + # could lead to incorrect results if the same gradient tensor was + # buffered in three places in the graph: + # 1. When accumulating gradients in one of these places it was cloned + # and removed from need_copy set. + # 2. When accumulating in second place, it wasn't in the need_copy set, + # so the gradients were simply accumulated in-place (which already + # modified the grad in 3rd place) + # 3. When accumulating in the third place, it wasn't in the need_copy set + # as well, so the incoming gradient was summed in-place, yielding + # incorrect results in all functions, except the first one. + x = Variable(torch.ones(5, 5), requires_grad=True) + y = Variable(torch.ones(5, 5), requires_grad=True) + # Simulate that we're in the middle of the graph + a = x + 2 + b = y + 2 + c = x + 2 + # This op will just return grad_output two times in backward + add1 = a + b + add2 = add1 + c + # Simulate a long branch, so grad_output will get buffered. + for i in range(4): + a = a * 2 + b = b * 2 + c = c * 2 + branch = a + b + c + out = add2 + branch + # expected gradients are: + # for x: 34 (16 from final a, 16 from final c, 2 from add2) + # for y: 17 (16 from final b, 1 from add2) + grad_output = torch.ones(5, 5) + out.backward(grad_output) + self.assertEqual(x.grad.data, torch.ones(5, 5) * 34) + self.assertEqual(y.grad.data, torch.ones(5, 5) * 17) def test_functional_blas(self): def compare(fn, *args): unpacked_args = tuple(arg.data if isinstance(arg, Variable) else arg - for arg in args) + for arg in args) self.assertEqual(fn(*args).data, fn(*unpacked_args)) def test_blas_add(fn, x, y, z): @@ -553,27 +561,29 @@ class TestAutograd(TestCase): compare(fn, x, y) test_blas(torch.mm, Variable(torch.randn(2, 10)), - Variable(torch.randn(10, 4))) + Variable(torch.randn(10, 4))) test_blas_add(torch.addmm, Variable(torch.randn(2, 4)), - Variable(torch.randn(2, 10)), Variable(torch.randn(10, 4))) + Variable(torch.randn(2, 10)), Variable(torch.randn(10, 4))) test_blas(torch.bmm, Variable(torch.randn(4, 2, 10)), - Variable(torch.randn(4, 10, 4))) + Variable(torch.randn(4, 10, 4))) test_blas_add(torch.addbmm, Variable(torch.randn(2, 4)), - Variable(torch.randn(4, 2, 10)), Variable(torch.randn(4, 10, 4))) + Variable(torch.randn(4, 2, 10)), Variable(torch.randn(4, 10, 4))) test_blas_add(torch.baddbmm, Variable(torch.randn(4, 2, 4)), - Variable(torch.randn(4, 2, 10)), Variable(torch.randn(4, 10, 4))) + Variable(torch.randn(4, 2, 10)), Variable(torch.randn(4, 10, 4))) test_blas(torch.mv, Variable(torch.randn(2, 10)), - Variable(torch.randn(10))) + Variable(torch.randn(10))) test_blas_add(torch.addmv, Variable(torch.randn(2)), - Variable(torch.randn(2, 10)), Variable(torch.randn(10))) + Variable(torch.randn(2, 10)), Variable(torch.randn(10))) test_blas(torch.ger, Variable(torch.randn(5)), - Variable(torch.randn(6))) + Variable(torch.randn(6))) test_blas_add(torch.addr, Variable(torch.randn(5, 6)), - Variable(torch.randn(5)), Variable(torch.randn(6))) + Variable(torch.randn(5)), Variable(torch.randn(6))) def test_save_none_for_backward(self): test_case = self + class MyFn(Function): + def forward(self, input): self.save_for_backward(None, input, None) return input * input @@ -591,6 +601,7 @@ class TestAutograd(TestCase): def test_too_many_grads(self): class MyFn(Function): + def forward(self, input): return input @@ -679,6 +690,7 @@ class TestAutograd(TestCase): def test_dep_nograd(self): class F1(Function): + def forward(self, input): out = torch.randn(input.size()) self.mark_non_differentiable(out) @@ -688,6 +700,7 @@ class TestAutograd(TestCase): return grad_output class F2(Function): + def forward(self, input, ignored): return input @@ -710,6 +723,7 @@ def index_variable(shape, max_indices): index = torch.rand(*shape).mul_(max_indices).floor_().long() return Variable(index, requires_grad=False) + def gather_variable(shape, index_dim, max_indices): assert len(shape) == 2 assert index_dim < 2 @@ -717,7 +731,7 @@ def gather_variable(shape, index_dim, max_indices): index = torch.LongTensor(*shape) for i in range(shape[index_dim]): index.select(index_dim, i).copy_( - torch.randperm(max_indices)[:shape[batch_dim]]) + torch.randperm(max_indices)[:shape[batch_dim]]) return Variable(index, requires_grad=False) @@ -725,215 +739,215 @@ L = 20 M = 10 S = 5 function_tests = [ - (Add, (), ((M, M), (M, M)) ), - (Sub, (), ((M, M), (M, M)) ), - (Mul, (), ((M, M), (M, M)) ), - (Div, (), ((M, M), torch.rand(M, M) + 5e-2) ), - (Pow, (), (torch.rand(M, M) + 1e-3, torch.rand(M, M) + 0.1)), - (AddConstant, (3.14,), ((L, L),) ), - (SubConstant, (3.14,), ((L, L),) ), - (SubConstant, (3.14, True), ((L, L),), 'from_tensor' ), - (MulConstant, (3.14,), ((L, L),) ), - (DivConstant, (3.14, True), (torch.rand(L, L) + 1e-1,), 'by_tensor' ), - (PowConstant, (3.14,), (torch.rand(L, L),) ), - (PowConstant, (3.14, True), (torch.rand(L, L),), 'tensor_power' ), - (Transpose, (0, 1), (torch.rand(L, L),) ), - (Transpose, (2, 0), (torch.rand(S, S, S),), '3d' ), - (Permute, ((0, 4, 3, 5, 1, 2),), ((1, 2, 3, 4, 5, 6),) ), - (Index, ((1, 2),), (torch.rand(S, S, S),) ), - (Index, (slice(0, 3),), (torch.rand(S, S, S),), 'slice' ), - (Index, ((slice(0, 3), 1),),(torch.rand(S, S, S),), 'slice_index' ), - (View, (S*S, S), (torch.rand(S, S, S),) ), - (Expand, ((S, 5, S, 5),), ((S, 1, S, 1),) ), - (Exp, (), (torch.rand(S, S, S),) ), - (Log, (), (torch.rand(S, S, S) + 1e-2,) ), - (Log1p, (), (torch.rand(S, S, S),) ), - (Tanh, (), ((S, S, S),) ), - (Sigmoid, (), ((S, S, S),) ), - (Sinh, (), ((S, S, S),) ), - (Cosh, (), ((S, S, S),) ), - (Abs, (), ((S, S, S),) ), - (Clamp, (0, 1), ((S, S, S),) ), - (Sqrt, (), (torch.rand(S, S, S) + 5e-4,) ), - (Sin, (), ((S, S, S),) ), - (Cos, (), ((S, S, S),) ), - (Tan, (), (torch.randn(S, S, S).clamp(-1, 1),) ), - (Asin, (), (torch.randn(S, S, S).clamp(-0.9, 0.9),) ), - (Acos, (), (torch.randn(S, S, S).clamp(-0.9, 0.9),) ), - (Atan, (), ((S, S, S),) ), - (Reciprocal, (), (torch.rand(S, S, S) + 0.1,) ), - (Cmax, (), ((S, S, S), (S, S, S)) ), - (Cmin, (), ((S, S, S), (S, S, S)) ), - (Round, (), ((S, S, S),) ), - (Sign, (), ((S, S, S),) ), - (Trunc, (), ((S, S, S),) ), - (Floor, (), ((S, S, S),) ), - (Ceil, (), ((S, S, S),) ), - (Frac, (), ((S, S, S),) ), - (Fmod, (1.5,), ((S, S, S),) ), - (Lerp, (0.2,), ((S, S, S), (S, S, S)) ), - (Rsqrt, (), (torch.rand(S, S, S) + 1e-2,) ), - (Remainder, (1.5,), ((S, S, S),) ), - (CmaxConstant, (0.5,), ((S, S, S),) ), - (CminConstant, (0.5,), ((S, S, S),) ), - (Mean, (), ((S, S, S),) ), - (Mean, (1,), ((S, S, S),), 'dim' ), - (Sum, (), ((S, S, S),) ), - (Sum, (1,), ((S, S, S),), 'dim' ), - (Prod, (), ((S, S, S),) ), - (Prod, (1,), ((S, S, S),), 'dim' ), - (Addmm, (), ((S, M), (S, S), (S, M)), ), - (Addmm, (0.1, 1), ((S, M), (S, S), (S, M)), 'coef' ), - (Addbmm, (), ((S, M), (S, S, S), (S, S, M)), ), - (Addbmm, (0.1, 0.4), ((S, M), (S, S, S), (S, S, M)), 'coef' ), - (Baddbmm, (), ((S, S, M), (S, S, S), (S, S, M)), ), - (Baddbmm, (0.1, 0.4), ((S, S, M), (S, S, S), (S, S, M)), 'coef' ), - (Addmv, (), ((S,), (S, M), (M,)), ), - (Addmv, (0.1, 0.4), ((S,), (S, M), (M,)), 'coef' ), - (Addr, (), ((S, M), (S,), (M,)), ), - (Addr, (0.1, 0.4), ((S, M), (S,), (M,)), 'coef' ), - (Dot, (), ((L,), (L,)), ), - (Max, (), ((S, S, S),), ), - (Min, (), ((S, S, S),), ), - (Max, (0,), ((S, S, S),), 'dim' ), - (Min, (0,), ((S, S, S),), 'dim' ), - (Mode, (0,), ((S, S, S),), ), - (Kthvalue, (2, 0), ((S, S, S),), ), - (Median, (0,), ((S, S, S),), ), - (Norm, (1.5,), (torch.rand(S, S, S),), '1_5' ), - (Norm, (), ((S, S, S),), '2' ), - (Norm, (3,), ((S, S, S),), '3' ), - (Norm, (1.5, 0), (torch.rand(S, S, S),), '1_5_dim' ), - (Norm, (2, 0), ((S, S, S),), '2_dim' ), - (Norm, (3, 0), ((S, S, S),), '3_dim' ), - (Addcmul, (), ((S, S), (S, S), (S, S)) ), - (Addcmul, (0.6,), ((S, S), (S, S), (S, S)), 'scale' ), - (Addcdiv, (), ((S, S), (S, S), torch.rand(S, S) + 1e-2) ), - (Addcdiv, (0.6,), ((S, S), (S, S), torch.rand(S, S) + 1e-2), 'scale'), - (IndexAdd, (0,), ((S, S), index_variable(2, S), (2, S)) ), + (Add, (), ((M, M), (M, M))), + (Sub, (), ((M, M), (M, M))), + (Mul, (), ((M, M), (M, M))), + (Div, (), ((M, M), torch.rand(M, M) + 5e-2)), + (Pow, (), (torch.rand(M, M) + 1e-3, torch.rand(M, M) + 0.1)), + (AddConstant, (3.14,), ((L, L),)), + (SubConstant, (3.14,), ((L, L),)), + (SubConstant, (3.14, True), ((L, L),), 'from_tensor'), + (MulConstant, (3.14,), ((L, L),)), + (DivConstant, (3.14, True), (torch.rand(L, L) + 1e-1,), 'by_tensor'), + (PowConstant, (3.14,), (torch.rand(L, L),)), + (PowConstant, (3.14, True), (torch.rand(L, L),), 'tensor_power'), + (Transpose, (0, 1), (torch.rand(L, L),)), + (Transpose, (2, 0), (torch.rand(S, S, S),), '3d'), + (Permute, ((0, 4, 3, 5, 1, 2),), ((1, 2, 3, 4, 5, 6),)), + (Index, ((1, 2),), (torch.rand(S, S, S),)), + (Index, (slice(0, 3),), (torch.rand(S, S, S),), 'slice'), + (Index, ((slice(0, 3), 1),), (torch.rand(S, S, S),), 'slice_index'), + (View, (S * S, S), (torch.rand(S, S, S),)), + (Expand, ((S, 5, S, 5),), ((S, 1, S, 1),)), + (Exp, (), (torch.rand(S, S, S),)), + (Log, (), (torch.rand(S, S, S) + 1e-2,)), + (Log1p, (), (torch.rand(S, S, S),)), + (Tanh, (), ((S, S, S),)), + (Sigmoid, (), ((S, S, S),)), + (Sinh, (), ((S, S, S),)), + (Cosh, (), ((S, S, S),)), + (Abs, (), ((S, S, S),)), + (Clamp, (0, 1), ((S, S, S),)), + (Sqrt, (), (torch.rand(S, S, S) + 5e-4,)), + (Sin, (), ((S, S, S),)), + (Cos, (), ((S, S, S),)), + (Tan, (), (torch.randn(S, S, S).clamp(-1, 1),)), + (Asin, (), (torch.randn(S, S, S).clamp(-0.9, 0.9),)), + (Acos, (), (torch.randn(S, S, S).clamp(-0.9, 0.9),)), + (Atan, (), ((S, S, S),)), + (Reciprocal, (), (torch.rand(S, S, S) + 0.1,)), + (Cmax, (), ((S, S, S), (S, S, S))), + (Cmin, (), ((S, S, S), (S, S, S))), + (Round, (), ((S, S, S),)), + (Sign, (), ((S, S, S),)), + (Trunc, (), ((S, S, S),)), + (Floor, (), ((S, S, S),)), + (Ceil, (), ((S, S, S),)), + (Frac, (), ((S, S, S),)), + (Fmod, (1.5,), ((S, S, S),)), + (Lerp, (0.2,), ((S, S, S), (S, S, S))), + (Rsqrt, (), (torch.rand(S, S, S) + 1e-2,)), + (Remainder, (1.5,), ((S, S, S),)), + (CmaxConstant, (0.5,), ((S, S, S),)), + (CminConstant, (0.5,), ((S, S, S),)), + (Mean, (), ((S, S, S),)), + (Mean, (1,), ((S, S, S),), 'dim'), + (Sum, (), ((S, S, S),)), + (Sum, (1,), ((S, S, S),), 'dim'), + (Prod, (), ((S, S, S),)), + (Prod, (1,), ((S, S, S),), 'dim'), + (Addmm, (), ((S, M), (S, S), (S, M)),), + (Addmm, (0.1, 1), ((S, M), (S, S), (S, M)), 'coef'), + (Addbmm, (), ((S, M), (S, S, S), (S, S, M)),), + (Addbmm, (0.1, 0.4), ((S, M), (S, S, S), (S, S, M)), 'coef'), + (Baddbmm, (), ((S, S, M), (S, S, S), (S, S, M)),), + (Baddbmm, (0.1, 0.4), ((S, S, M), (S, S, S), (S, S, M)), 'coef'), + (Addmv, (), ((S,), (S, M), (M,)),), + (Addmv, (0.1, 0.4), ((S,), (S, M), (M,)), 'coef'), + (Addr, (), ((S, M), (S,), (M,)),), + (Addr, (0.1, 0.4), ((S, M), (S,), (M,)), 'coef'), + (Dot, (), ((L,), (L,)),), + (Max, (), ((S, S, S),),), + (Min, (), ((S, S, S),),), + (Max, (0,), ((S, S, S),), 'dim'), + (Min, (0,), ((S, S, S),), 'dim'), + (Mode, (0,), ((S, S, S),),), + (Kthvalue, (2, 0), ((S, S, S),),), + (Median, (0,), ((S, S, S),),), + (Norm, (1.5,), (torch.rand(S, S, S),), '1_5'), + (Norm, (), ((S, S, S),), '2'), + (Norm, (3,), ((S, S, S),), '3'), + (Norm, (1.5, 0), (torch.rand(S, S, S),), '1_5_dim'), + (Norm, (2, 0), ((S, S, S),), '2_dim'), + (Norm, (3, 0), ((S, S, S),), '3_dim'), + (Addcmul, (), ((S, S), (S, S), (S, S))), + (Addcmul, (0.6,), ((S, S), (S, S), (S, S)), 'scale'), + (Addcdiv, (), ((S, S), (S, S), torch.rand(S, S) + 1e-2)), + (Addcdiv, (0.6,), ((S, S), (S, S), torch.rand(S, S) + 1e-2), 'scale'), + (IndexAdd, (0,), ((S, S), index_variable(2, S), (2, S))), # (IndexCopy, (0,), ((S, S), index_variable(2, S), (2, S)) ), - (IndexFill, (0, 2), ((S, S), index_variable(2, S)) ), - (IndexSelect, (0,), ((S, S), index_variable(2, S)) ), - (Gather, (0,), ((M, S), gather_variable((S, S), 1, M)) ), - (Gather, (1,), ((M, S), gather_variable((M, S//2), 0, S)), 'dim1'), - (Scatter, (0,), ((M, S), gather_variable((S, S), 1, M), (S, S))), - (Scatter, (1,), ((M, S), gather_variable((M, S//2), 0, S), (M, S//2)), 'dim1'), - (Concat, (0,), ((1, S, S), (2, S, S), (3, S, S)) ), - (Resize, (S*S, S), ((S, S, S),) ), - (Diag, (), ((S, S),), '2d' ), - (Diag, (), ((S,),), '1d' ), - (Tril, (), ((S, S),) ), - (Tril, (2,), ((S, S),), 'idx' ), - (Triu, (), ((S, S),) ), - (Triu, (2,), ((S, S),), 'idx' ), - (Clone, (), ((S, M, S),) ), - (Squeeze, (), ((S, 1, M, 1),) ), - (Squeeze, (1,), ((S, 1, M, 1),), 'dim' ), - (Unsqueeze, (0,), ((S, M, S),), '0' ), - (Unsqueeze, (1,), ((S, M, S),), '1' ), + (IndexFill, (0, 2), ((S, S), index_variable(2, S))), + (IndexSelect, (0,), ((S, S), index_variable(2, S))), + (Gather, (0,), ((M, S), gather_variable((S, S), 1, M))), + (Gather, (1,), ((M, S), gather_variable((M, S // 2), 0, S)), 'dim1'), + (Scatter, (0,), ((M, S), gather_variable((S, S), 1, M), (S, S))), + (Scatter, (1,), ((M, S), gather_variable((M, S // 2), 0, S), (M, S // 2)), 'dim1'), + (Concat, (0,), ((1, S, S), (2, S, S), (3, S, S))), + (Resize, (S * S, S), ((S, S, S),)), + (Diag, (), ((S, S),), '2d'), + (Diag, (), ((S,),), '1d'), + (Tril, (), ((S, S),)), + (Tril, (2,), ((S, S),), 'idx'), + (Triu, (), ((S, S),)), + (Triu, (2,), ((S, S),), 'idx'), + (Clone, (), ((S, M, S),)), + (Squeeze, (), ((S, 1, M, 1),)), + (Squeeze, (1,), ((S, 1, M, 1),), 'dim'), + (Unsqueeze, (0,), ((S, M, S),), '0'), + (Unsqueeze, (1,), ((S, M, S),), '1'), # (MaskedCopy, (), ((S, S), Variable(torch.randn(S, S).gt(0), requires_grad=False), (S, S),)), - (MaskedFill, (10,), ((S, S), Variable(torch.randn(S, S).gt(0), requires_grad=False))), - (MaskedSelect, (), ((S, S), Variable(torch.randn(S, S).gt(0), requires_grad=False))), - (Sort, (), ((S, M, S),) ), - (Sort, (1,), ((S, M, S),), 'dim' ), - (Sort, (1, True), ((S, M, S),), 'dim_desc' ), - (Topk, (3,), ((S, M, S),) ), - (Topk, (3, 1), ((S, M, S),), 'dim' ), - (Topk, (3, 1, True), ((S, M, S),), 'dim_desc' ), - (Topk, (3, 1, True, True), ((S, M, S),), 'dim_desc_sort' ), + (MaskedFill, (10,), ((S, S), Variable(torch.randn(S, S).gt(0), requires_grad=False))), + (MaskedSelect, (), ((S, S), Variable(torch.randn(S, S).gt(0), requires_grad=False))), + (Sort, (), ((S, M, S),)), + (Sort, (1,), ((S, M, S),), 'dim'), + (Sort, (1, True), ((S, M, S),), 'dim_desc'), + (Topk, (3,), ((S, M, S),)), + (Topk, (3, 1), ((S, M, S),), 'dim'), + (Topk, (3, 1, True), ((S, M, S),), 'dim_desc'), + (Topk, (3, 1, True, True), ((S, M, S),), 'dim_desc_sort'), ] method_tests = [ - ('add', (S, S, S), ((S, S, S),) ), - ('add', (S, S, S), (3.14,), 'constant' ), - ('sub', (S, S, S), ((S, S, S),) ), - ('sub', (S, S, S), (3.14,), 'constant' ), - ('mul', (S, S, S), ((S, S, S),) ), - ('mul', (S, S, S), (3.14,), 'constant' ), - ('div', (S, S, S), ((S, S, S),) ), - ('div', (S, S, S), (3.14,), 'constant' ), - ('pow', (S, S, S), ((S, S, S),) ), - ('pow', (S, S, S), (3.14,), 'constant' ), - ('transpose', (1, 2, 3), (1, 2) ), - ('t', (1, 2), () ), - ('view', (S, S, S), (S*S, S), ), - ('view_as', (S, S, S), ((S*S, S),) ), - ('expand', (S, 1, S), (S, S, S) ), - ('expand', (torch.Size([S, 1, S]),), (S, S, S), 'size' ), - ('exp', (S, S, S), () ), - ('log', (S, S, S), () ), - ('log1p', (S, S, S), () ), - ('tanh', (S, S, S), () ), - ('sigmoid', (S, S, S), () ), - ('sinh', (S, S, S), () ), - ('cosh', (S, S, S), () ), - ('abs', (S, S, S), () ), - ('clamp', (S, S, S), (0, 1) ), - ('sqrt', (S, S, S), () ), - ('sin', (S, S, S), () ), - ('cos', (S, S, S), () ), - ('tan', (S, S, S), () ), - ('asin', (S, S, S), () ), - ('acos', (S, S, S), () ), - ('atan', (S, S, S), () ), - ('reciprocal', (S, S, S), () ), - ('round', (S, S, S), () ), - ('sign', (S, S, S), () ), - ('trunc', (S, S, S), () ), - ('floor', (S, S, S), () ), - ('ceil', (S, S, S), () ), - ('rsqrt', (S, S, S), () ), - ('fmod', (S, S, S), (1.5,) ), - ('remainder', (S, S, S), (1.5,) ), - ('lerp', (S, S, S), ((S, S, S), 0.4) ), - ('max', (S, S, S), () ), - ('max', (S, S, S), ((S, S, S),), 'elementwise' ), - ('min', (S, S, S), () ), - ('min', (S, S, S), ((S, S, S),), 'elementwise' ), - ('mean', (S, S, S), () ), - ('mean', (S, S, S), (1,), 'dim' ), - ('sum', (S, S, S), () ), - ('sum', (S, S, S), (1,), 'dim' ), - ('prod', (S, S, S), () ), - ('prod', (S, S, S), (1,), 'dim' ), - ('addmm', (S, M), ((S, S), (S, M)), ), - ('addmm', (S, M), (0.2, 0.6, (S, S), (S, M)), 'coef' ), - ('addbmm', (S, M), ((S, S, S), (S, S, M)), ), - ('addbmm', (S, M), (0.2, 0.6, (S, S, S), (S, S, M)), 'coef' ), - ('baddbmm', (S, S, M), ((S, S, S), (S, S, M)), ), - ('baddbmm', (S, S, M), (0.2, 0.6, (S, S, S), (S, S, M)), 'coef' ), - ('addmv', (S,), ((S, M), (M,)), ), - ('addmv', (S,), (0.2, 0.6, (S, M), (M,)), 'coef' ), - ('addr', (S, M), ((S,), (M,)), ), - ('addr', (S, M), (0.2, 0.6, (S,), (M,)), 'coef' ), - ('dot', (L,), ((L,),), ), - ('addcmul', (S, S), ((S, S), (S, S)) ), - ('addcmul', (S, S), (0.5, (S, S), (S, S)), 'scale' ), - ('addcdiv', (S, S), ((S, S), (S, S)) ), - ('addcdiv', (S, S), (0.5, (S, S), (S, S)), 'scale' ), - ('norm', (S, S, S), (2,) ), - ('norm', (S, S, S), (2, 1), 'dim' ), - ('dist', (S, S, S), ((S, S, S),) ), - ('dist', (S, S, S), ((S, S, S), 4), '4' ), - ('index_select', (S, S, S), (0, index_variable(2, S)) ), - ('diag', (M, M), (), '2d' ), - ('diag', (M,), (), '1d' ), - ('tril', (M, M), () ), - ('triu', (M, M), () ), - ('clone', (S, M, S), () ), - ('permute', (1, 2, 3, 4), (0, 2, 3, 1) ), - ('select', (S, S, S), (1, 2) ), - ('narrow', (S, S, S), (1, 2, 2) ), - ('squeeze', (S, 1, S, 1), () ), - ('squeeze', (S, 1, S, 1), (1,), '1_dim' ), - ('squeeze', (S, 1, S, 1), (2,), 'not_1_dim' ), - ('unsqueeze', (S, S, S), (0,), 'first' ), - ('unsqueeze', (S, S, S), (1,), 'middle' ), - ('unsqueeze', (S, S, S), (3,), 'last' ), - ('masked_select', (M, M), (Variable(torch.ByteTensor(M, M).bernoulli_(), requires_grad=False),) ), - ('masked_fill_', (M, M), (Variable(torch.ByteTensor(M, M).bernoulli_(), requires_grad=False), 10) ), - ('masked_copy_', (M, M), (Variable(torch.ByteTensor(M, M).bernoulli_(), requires_grad=False), (M, M)) ), + ('add', (S, S, S), ((S, S, S),)), + ('add', (S, S, S), (3.14,), 'constant'), + ('sub', (S, S, S), ((S, S, S),)), + ('sub', (S, S, S), (3.14,), 'constant'), + ('mul', (S, S, S), ((S, S, S),)), + ('mul', (S, S, S), (3.14,), 'constant'), + ('div', (S, S, S), ((S, S, S),)), + ('div', (S, S, S), (3.14,), 'constant'), + ('pow', (S, S, S), ((S, S, S),)), + ('pow', (S, S, S), (3.14,), 'constant'), + ('transpose', (1, 2, 3), (1, 2)), + ('t', (1, 2), ()), + ('view', (S, S, S), (S * S, S),), + ('view_as', (S, S, S), ((S * S, S),)), + ('expand', (S, 1, S), (S, S, S)), + ('expand', (torch.Size([S, 1, S]),), (S, S, S), 'size'), + ('exp', (S, S, S), ()), + ('log', (S, S, S), ()), + ('log1p', (S, S, S), ()), + ('tanh', (S, S, S), ()), + ('sigmoid', (S, S, S), ()), + ('sinh', (S, S, S), ()), + ('cosh', (S, S, S), ()), + ('abs', (S, S, S), ()), + ('clamp', (S, S, S), (0, 1)), + ('sqrt', (S, S, S), ()), + ('sin', (S, S, S), ()), + ('cos', (S, S, S), ()), + ('tan', (S, S, S), ()), + ('asin', (S, S, S), ()), + ('acos', (S, S, S), ()), + ('atan', (S, S, S), ()), + ('reciprocal', (S, S, S), ()), + ('round', (S, S, S), ()), + ('sign', (S, S, S), ()), + ('trunc', (S, S, S), ()), + ('floor', (S, S, S), ()), + ('ceil', (S, S, S), ()), + ('rsqrt', (S, S, S), ()), + ('fmod', (S, S, S), (1.5,)), + ('remainder', (S, S, S), (1.5,)), + ('lerp', (S, S, S), ((S, S, S), 0.4)), + ('max', (S, S, S), ()), + ('max', (S, S, S), ((S, S, S),), 'elementwise'), + ('min', (S, S, S), ()), + ('min', (S, S, S), ((S, S, S),), 'elementwise'), + ('mean', (S, S, S), ()), + ('mean', (S, S, S), (1,), 'dim'), + ('sum', (S, S, S), ()), + ('sum', (S, S, S), (1,), 'dim'), + ('prod', (S, S, S), ()), + ('prod', (S, S, S), (1,), 'dim'), + ('addmm', (S, M), ((S, S), (S, M)),), + ('addmm', (S, M), (0.2, 0.6, (S, S), (S, M)), 'coef'), + ('addbmm', (S, M), ((S, S, S), (S, S, M)),), + ('addbmm', (S, M), (0.2, 0.6, (S, S, S), (S, S, M)), 'coef'), + ('baddbmm', (S, S, M), ((S, S, S), (S, S, M)),), + ('baddbmm', (S, S, M), (0.2, 0.6, (S, S, S), (S, S, M)), 'coef'), + ('addmv', (S,), ((S, M), (M,)),), + ('addmv', (S,), (0.2, 0.6, (S, M), (M,)), 'coef'), + ('addr', (S, M), ((S,), (M,)),), + ('addr', (S, M), (0.2, 0.6, (S,), (M,)), 'coef'), + ('dot', (L,), ((L,),),), + ('addcmul', (S, S), ((S, S), (S, S))), + ('addcmul', (S, S), (0.5, (S, S), (S, S)), 'scale'), + ('addcdiv', (S, S), ((S, S), (S, S))), + ('addcdiv', (S, S), (0.5, (S, S), (S, S)), 'scale'), + ('norm', (S, S, S), (2,)), + ('norm', (S, S, S), (2, 1), 'dim'), + ('dist', (S, S, S), ((S, S, S),)), + ('dist', (S, S, S), ((S, S, S), 4), '4'), + ('index_select', (S, S, S), (0, index_variable(2, S))), + ('diag', (M, M), (), '2d'), + ('diag', (M,), (), '1d'), + ('tril', (M, M), ()), + ('triu', (M, M), ()), + ('clone', (S, M, S), ()), + ('permute', (1, 2, 3, 4), (0, 2, 3, 1)), + ('select', (S, S, S), (1, 2)), + ('narrow', (S, S, S), (1, 2, 2)), + ('squeeze', (S, 1, S, 1), ()), + ('squeeze', (S, 1, S, 1), (1,), '1_dim'), + ('squeeze', (S, 1, S, 1), (2,), 'not_1_dim'), + ('unsqueeze', (S, S, S), (0,), 'first'), + ('unsqueeze', (S, S, S), (1,), 'middle'), + ('unsqueeze', (S, S, S), (3,), 'last'), + ('masked_select', (M, M), (Variable(torch.ByteTensor(M, M).bernoulli_(), requires_grad=False),)), + ('masked_fill_', (M, M), (Variable(torch.ByteTensor(M, M).bernoulli_(), requires_grad=False), 10)), + ('masked_copy_', (M, M), (Variable(torch.ByteTensor(M, M).bernoulli_(), requires_grad=False), (M, M))), ] # TODO: mm, bmm, mv, ger # TODO: max, min with dim (problem with indices) @@ -946,6 +960,7 @@ method_tests = [ def create_input(call_args): if not isinstance(call_args, tuple): call_args = (call_args,) + def map_arg(arg): if isinstance(arg, tuple) and not isinstance(arg[0], Variable): return Variable(torch.randn(*arg).double(), requires_grad=True) @@ -976,8 +991,9 @@ ignore_inplace = set(( for test in function_tests: cls, constructor_args, call_args = test[:3] test_name = 'test_' + cls.__name__ + ('_' + test[3] if len(test) == 4 else '') + def do_test(self, cls=cls, constructor_args=constructor_args, - call_args=call_args, test_name=test_name): + call_args=call_args, test_name=test_name): input = create_input(call_args) output = cls(*constructor_args)(*input) if not isinstance(output, tuple): @@ -986,6 +1002,7 @@ for test in function_tests: if not o.requires_grad: continue analytical = get_analytical_jacobian(input, o) + def fn(input): tmp = cls(*constructor_args)(*input) if not isinstance(tmp, tuple): @@ -1032,6 +1049,7 @@ EXCLUDE_FUNCTIONAL = { for test in method_tests: name, self_size, args = test[:3] test_name = 'test_' + name + ('_' + test[3] if len(test) == 4 else '') + def do_test(self, name=name, self_size=self_size, args=args, test_name=test_name): def check(name): self_variable = create_input((self_size,))[0] @@ -1064,7 +1082,6 @@ for test in method_tests: if not 'only supports scalar' in e.args[0]: raise - assert not hasattr(TestAutograd, test_name), 'Two tests have the same name: ' + test_name setattr(TestAutograd, test_name, do_test) diff --git a/test/test_cuda.py b/test/test_cuda.py index fa359cb642..2d5f06bad2 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -14,6 +14,7 @@ if not torch.cuda.is_available(): import sys sys.exit() + def is_floating(t): return type(t) in [torch.FloatTensor, torch.DoubleTensor, torch.cuda.FloatTensor, torch.cuda.DoubleTensor] @@ -31,7 +32,8 @@ types = [ float_types = [ torch.FloatTensor, torch.DoubleTensor -] # TODO: add half... +] # TODO: add half... + def number(floating, integer, t): name = type(t).__name__ @@ -44,188 +46,204 @@ def number(floating, integer, t): S = 10 M = 50 + def make_tensor(t, *sizes): return t(*sizes).copy_(torch.randn(*sizes)) + def small_2d(t): return make_tensor(t, S, S) + def small_2d_scaled(t, scale=10): return make_tensor(t, S, S).mul(scale) + def small_3d(t): return make_tensor(t, S, S, S) + def medium_1d(t): return make_tensor(t, M) + def medium_2d(t): return make_tensor(t, M, M) + def medium_2d_scaled(t, scale=10): return make_tensor(t, M, M).mul(scale) + def small_3d_ones(t): return t(S, S, S).copy_(torch.ones(S, S, S)) + def small_3d_positive(t): min_val = 1e-3 if is_floating(t) else 2 return make_tensor(t, S, S, S).clamp_(min_val, 120) + def small_3d_unique(t): - return t(S, S, S).copy_(torch.range(1, S*S*S)) + return t(S, S, S).copy_(torch.range(1, S * S * S)) + def small_1d_lapack(t): return t(1, 3).copy_(torch.range(1, 3).view(3)) + def small_2d_lapack(t): return t(3, 3).copy_(torch.range(1, 9).view(3, 3)) + def small_2d_lapack_skinny(t): return t(3, 4).copy_(torch.range(1, 12).view(3, 4)) + def small_2d_lapack_fat(t): return t(4, 3).copy_(torch.range(1, 12).view(4, 3)) + def new_t(*sizes): def tmp(t): return t(*sizes).copy_(torch.randn(*sizes)) return tmp tests = [ - ('add', small_3d, lambda t: [number(3.14, 3, t)] ), - ('add', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ), - ('add', small_3d, lambda t: [number(0.2, 2, t), small_3d_positive(t)], 'scalar_tensor' ), - ('sub', small_3d, lambda t: [number(3.14, 3, t)], ), - ('sub', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ), - ('mul', small_3d, lambda t: [number(3.14, 3, t)], ), - ('mul', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ), - ('div', small_3d, lambda t: [number(3.14, 3, t)], ), - ('div', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ), - ('pow', small_3d, lambda t: [number(3.14, 3, t)], None, float_types), - ('pow', small_3d, lambda t: [small_3d(t).abs_()], 'tensor', float_types), - ('addbmm', small_2d, lambda t: [small_3d(t), small_3d(t)], None, float_types), - ('addbmm', small_2d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar' ), - ('addbmm', small_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars' ), - ('baddbmm', small_3d, lambda t: [small_3d(t), small_3d(t)], ), - ('baddbmm', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar' ), - ('baddbmm', small_3d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars' ), - ('addcdiv', small_2d_lapack, lambda t: [small_2d_lapack(t).mul(2), small_2d_lapack(t)], ), - ('addcdiv', small_2d_lapack, lambda t: [number(2.8, 1, t), small_2d_lapack(t).mul(2), small_2d_lapack(t)], 'scalar' ), - ('addcmul', small_3d, lambda t: [small_3d(t), small_3d(t)], ), - ('addcmul', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar' ), - ('addmm', medium_2d, lambda t: [medium_2d(t), medium_2d(t)], ), - ('addmm', medium_2d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'scalar' ), - ('addmm', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'two_scalars' ), - ('addmv', medium_1d, lambda t: [medium_2d(t), medium_1d(t)], ), - ('addmv', medium_1d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'scalar' ), - ('addmv', medium_1d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'two_scalars' ), - ('addr', medium_2d, lambda t: [medium_1d(t), medium_1d(t)], ), - ('addr', medium_2d, lambda t: [number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'scalar' ), - ('addr', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'two_scalars' ), - ('atan2', medium_2d, lambda t: [medium_2d(t)], None, float_types), - ('fmod', small_3d, lambda t: [3], 'value' ), - ('fmod', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ), - ('chunk', medium_2d, lambda t: [4], ), - ('chunk', medium_2d, lambda t: [4, 1], 'dim' ), - ('clamp', medium_2d_scaled, lambda t: [-1, 5], ), - ('clone', medium_2d, lambda t: [], ), - ('contiguous', medium_2d, lambda t: [], ), - ('cross', new_t(M, 3, M), lambda t: [new_t(M, 3, M)(t)], ), - ('cumprod', small_3d, lambda t: [1], ), - ('cumsum', small_3d, lambda t: [1], ), - ('dim', small_3d, lambda t: [], ), - ('dist', small_2d, lambda t: [small_2d(t)], ), - ('dist', small_2d, lambda t: [small_2d(t), 3], '3_norm' ), - ('dist', small_2d, lambda t: [small_2d(t), 2.5], '2_5_norm' ), - ('dot', medium_1d, lambda t: [medium_1d(t)], ), - ('element_size', medium_1d, lambda t: [], ), - ('eq', small_3d_ones, lambda t: [small_3d(t)], ), - ('eq', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal' ), - ('ne', small_3d_ones, lambda t: [small_3d(t)], ), - ('ne', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal' ), - ('equal', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal' ), - ('equal', small_3d_ones, lambda t: [small_3d(t)], ), - ('expand', new_t(M, 1, M), lambda t: [M, 4, M], ), - ('expand_as', new_t(M, 1, M), lambda t: [new_t(M, 4, M)(t)], ), - ('fill', medium_2d, lambda t: [number(3.14, 3, t)], ), - ('ge', medium_2d, lambda t: [medium_2d(t)], ), - ('le', medium_2d, lambda t: [medium_2d(t)], ), - ('gt', medium_2d, lambda t: [medium_2d(t)], ), - ('lt', medium_2d, lambda t: [medium_2d(t)], ), - ('is_contiguous', medium_2d, lambda t: [], ), + ('add', small_3d, lambda t: [number(3.14, 3, t)]), + ('add', small_3d, lambda t: [small_3d_positive(t)], 'tensor'), + ('add', small_3d, lambda t: [number(0.2, 2, t), small_3d_positive(t)], 'scalar_tensor'), + ('sub', small_3d, lambda t: [number(3.14, 3, t)],), + ('sub', small_3d, lambda t: [small_3d_positive(t)], 'tensor'), + ('mul', small_3d, lambda t: [number(3.14, 3, t)],), + ('mul', small_3d, lambda t: [small_3d_positive(t)], 'tensor'), + ('div', small_3d, lambda t: [number(3.14, 3, t)],), + ('div', small_3d, lambda t: [small_3d_positive(t)], 'tensor'), + ('pow', small_3d, lambda t: [number(3.14, 3, t)], None, float_types), + ('pow', small_3d, lambda t: [small_3d(t).abs_()], 'tensor', float_types), + ('addbmm', small_2d, lambda t: [small_3d(t), small_3d(t)], None, float_types), + ('addbmm', small_2d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar'), + ('addbmm', small_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars'), + ('baddbmm', small_3d, lambda t: [small_3d(t), small_3d(t)],), + ('baddbmm', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar'), + ('baddbmm', small_3d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), small_3d(t), small_3d(t)], 'two_scalars'), + ('addcdiv', small_2d_lapack, lambda t: [small_2d_lapack(t).mul(2), small_2d_lapack(t)],), + ('addcdiv', small_2d_lapack, lambda t: [number(2.8, 1, t), + small_2d_lapack(t).mul(2), small_2d_lapack(t)], 'scalar'), + ('addcmul', small_3d, lambda t: [small_3d(t), small_3d(t)],), + ('addcmul', small_3d, lambda t: [number(0.4, 2, t), small_3d(t), small_3d(t)], 'scalar'), + ('addmm', medium_2d, lambda t: [medium_2d(t), medium_2d(t)],), + ('addmm', medium_2d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'scalar'), + ('addmm', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_2d(t)], 'two_scalars'), + ('addmv', medium_1d, lambda t: [medium_2d(t), medium_1d(t)],), + ('addmv', medium_1d, lambda t: [number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'scalar'), + ('addmv', medium_1d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_2d(t), medium_1d(t)], 'two_scalars'), + ('addr', medium_2d, lambda t: [medium_1d(t), medium_1d(t)],), + ('addr', medium_2d, lambda t: [number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'scalar'), + ('addr', medium_2d, lambda t: [number(0.5, 3, t), number(0.4, 2, t), medium_1d(t), medium_1d(t)], 'two_scalars'), + ('atan2', medium_2d, lambda t: [medium_2d(t)], None, float_types), + ('fmod', small_3d, lambda t: [3], 'value'), + ('fmod', small_3d, lambda t: [small_3d_positive(t)], 'tensor'), + ('chunk', medium_2d, lambda t: [4],), + ('chunk', medium_2d, lambda t: [4, 1], 'dim'), + ('clamp', medium_2d_scaled, lambda t: [-1, 5],), + ('clone', medium_2d, lambda t: [],), + ('contiguous', medium_2d, lambda t: [],), + ('cross', new_t(M, 3, M), lambda t: [new_t(M, 3, M)(t)],), + ('cumprod', small_3d, lambda t: [1],), + ('cumsum', small_3d, lambda t: [1],), + ('dim', small_3d, lambda t: [],), + ('dist', small_2d, lambda t: [small_2d(t)],), + ('dist', small_2d, lambda t: [small_2d(t), 3], '3_norm'), + ('dist', small_2d, lambda t: [small_2d(t), 2.5], '2_5_norm'), + ('dot', medium_1d, lambda t: [medium_1d(t)],), + ('element_size', medium_1d, lambda t: [],), + ('eq', small_3d_ones, lambda t: [small_3d(t)],), + ('eq', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal'), + ('ne', small_3d_ones, lambda t: [small_3d(t)],), + ('ne', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal'), + ('equal', small_3d_ones, lambda t: [small_3d_ones(t)], 'equal'), + ('equal', small_3d_ones, lambda t: [small_3d(t)],), + ('expand', new_t(M, 1, M), lambda t: [M, 4, M],), + ('expand_as', new_t(M, 1, M), lambda t: [new_t(M, 4, M)(t)],), + ('fill', medium_2d, lambda t: [number(3.14, 3, t)],), + ('ge', medium_2d, lambda t: [medium_2d(t)],), + ('le', medium_2d, lambda t: [medium_2d(t)],), + ('gt', medium_2d, lambda t: [medium_2d(t)],), + ('lt', medium_2d, lambda t: [medium_2d(t)],), + ('is_contiguous', medium_2d, lambda t: [],), # TODO: can't check negative case - GPU copy will be contiguous - ('is_same_size', medium_2d, lambda t: [small_3d(t)], 'negative' ), - ('is_same_size', medium_2d, lambda t: [medium_2d(t)], 'positive' ), - ('is_set_to', medium_2d, lambda t: [medium_2d(t)], ), + ('is_same_size', medium_2d, lambda t: [small_3d(t)], 'negative'), + ('is_same_size', medium_2d, lambda t: [medium_2d(t)], 'positive'), + ('is_set_to', medium_2d, lambda t: [medium_2d(t)],), # TODO: positive case - ('kthvalue', small_3d_unique, lambda t: [3], ), - ('kthvalue', small_3d_unique, lambda t: [3, 1], 'dim' ), - ('lerp', small_3d, lambda t: [small_3d(t), 0.3], ), - ('max', small_3d_unique, lambda t: [], ), - ('max', small_3d_unique, lambda t: [1], 'dim' ), - ('max', medium_2d, lambda t: [medium_2d(t)], 'elementwise' ), - ('min', small_3d_unique, lambda t: [], ), - ('min', small_3d_unique, lambda t: [1], 'dim' ), - ('min', medium_2d, lambda t: [medium_2d(t)], 'elementwise' ), - ('mean', small_3d, lambda t: [], ), - ('mean', small_3d, lambda t: [1], 'dim' ), - ('mode', small_3d, lambda t: [], ), - ('mode', small_3d, lambda t: [1], 'dim' ), - ('remainder', small_3d, lambda t: [3], 'value' ), - ('remainder', small_3d, lambda t: [small_3d_positive(t)], 'tensor' ), - ('std', small_3d, lambda t: [], ), - ('std', small_3d, lambda t: [1], 'dim' ), - ('var', small_3d, lambda t: [], ), - ('var', small_3d, lambda t: [1], 'dim' ), - ('ndimension', small_3d, lambda t: [], ), - ('nelement', small_3d, lambda t: [], ), - ('numel', small_3d, lambda t: [], ), - ('narrow', small_3d, lambda t: [1, 3, 2], ), - ('nonzero', small_3d, lambda t: [], ), - ('norm', small_3d, lambda t: [], ), - ('norm', small_3d, lambda t: [3], '3_norm' ), - ('norm', small_3d, lambda t: [3, 0], '3_norm_dim' ), - ('ones', small_3d, lambda t: [1, 2, 3, 4, 5], ), - ('permute', new_t(1, 2, 3, 4), lambda t: [2, 1, 3, 0], ), - ('prod', small_3d, lambda t: [], ), - ('prod', small_3d, lambda t: [1], 'dim' ), - ('sum', small_2d, lambda t: [], ), - ('sum', small_3d, lambda t: [1], 'dim' ), - ('renorm', small_3d, lambda t: [2, 1, 1], '2_norm' ), - ('renorm', small_3d, lambda t: [1.5, 1, 1], '1_5_norm' ), - ('repeat', small_2d, lambda t: [2, 2, 2], ), - ('size', new_t(1, 2, 3, 4), lambda t: [], ), - ('sort', small_3d_unique, lambda t: [], ), - ('sort', small_3d_unique, lambda t: [1], 'dim' ), - ('sort', small_3d_unique, lambda t: [1, True], 'dim_descending'), - ('split', small_3d, lambda t: [2], ), - ('split', small_3d, lambda t: [2, 1], 'dim' ), - ('squeeze', new_t(1, 2, 1, 4), lambda t: [], ), - ('squeeze', new_t(1, 2, 1, 4), lambda t: [2], 'dim' ), - ('t', new_t(1, 2), lambda t: [], ), - ('transpose', new_t(1, 2, 3, 4), lambda t: [1, 2], ), - ('to_list', small_3d, lambda t: [], ), - ('topk', small_3d, lambda t: [2, 1, False, True], 'dim_sort' ), - ('topk', small_3d, lambda t: [2, 1, True, True], 'dim_desc_sort' ), - ('trace', medium_2d, lambda t: [], ), - ('tril', medium_2d, lambda t: [], ), - ('tril', medium_2d, lambda t: [2], 'positive' ), - ('tril', medium_2d, lambda t: [-2], 'negative' ), - ('triu', medium_2d, lambda t: [], ), - ('triu', medium_2d, lambda t: [2], 'positive' ), - ('triu', medium_2d, lambda t: [-2], 'negative' ), - ('view', small_3d, lambda t: [100, 10], ), - ('view_as', small_3d, lambda t: [t(100, 10)], ), - ('zero', small_3d, lambda t: [], ), - ('zeros', small_3d, lambda t: [1, 2, 3, 4], ), - ('rsqrt', lambda t: small_3d(t) + 1, lambda t: [], None, float_types), - ('sinh', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], None, float_types), - ('tan', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], None, float_types), + ('kthvalue', small_3d_unique, lambda t: [3],), + ('kthvalue', small_3d_unique, lambda t: [3, 1], 'dim'), + ('lerp', small_3d, lambda t: [small_3d(t), 0.3],), + ('max', small_3d_unique, lambda t: [],), + ('max', small_3d_unique, lambda t: [1], 'dim'), + ('max', medium_2d, lambda t: [medium_2d(t)], 'elementwise'), + ('min', small_3d_unique, lambda t: [],), + ('min', small_3d_unique, lambda t: [1], 'dim'), + ('min', medium_2d, lambda t: [medium_2d(t)], 'elementwise'), + ('mean', small_3d, lambda t: [],), + ('mean', small_3d, lambda t: [1], 'dim'), + ('mode', small_3d, lambda t: [],), + ('mode', small_3d, lambda t: [1], 'dim'), + ('remainder', small_3d, lambda t: [3], 'value'), + ('remainder', small_3d, lambda t: [small_3d_positive(t)], 'tensor'), + ('std', small_3d, lambda t: [],), + ('std', small_3d, lambda t: [1], 'dim'), + ('var', small_3d, lambda t: [],), + ('var', small_3d, lambda t: [1], 'dim'), + ('ndimension', small_3d, lambda t: [],), + ('nelement', small_3d, lambda t: [],), + ('numel', small_3d, lambda t: [],), + ('narrow', small_3d, lambda t: [1, 3, 2],), + ('nonzero', small_3d, lambda t: [],), + ('norm', small_3d, lambda t: [],), + ('norm', small_3d, lambda t: [3], '3_norm'), + ('norm', small_3d, lambda t: [3, 0], '3_norm_dim'), + ('ones', small_3d, lambda t: [1, 2, 3, 4, 5],), + ('permute', new_t(1, 2, 3, 4), lambda t: [2, 1, 3, 0],), + ('prod', small_3d, lambda t: [],), + ('prod', small_3d, lambda t: [1], 'dim'), + ('sum', small_2d, lambda t: [],), + ('sum', small_3d, lambda t: [1], 'dim'), + ('renorm', small_3d, lambda t: [2, 1, 1], '2_norm'), + ('renorm', small_3d, lambda t: [1.5, 1, 1], '1_5_norm'), + ('repeat', small_2d, lambda t: [2, 2, 2],), + ('size', new_t(1, 2, 3, 4), lambda t: [],), + ('sort', small_3d_unique, lambda t: [],), + ('sort', small_3d_unique, lambda t: [1], 'dim'), + ('sort', small_3d_unique, lambda t: [1, True], 'dim_descending'), + ('split', small_3d, lambda t: [2],), + ('split', small_3d, lambda t: [2, 1], 'dim'), + ('squeeze', new_t(1, 2, 1, 4), lambda t: [],), + ('squeeze', new_t(1, 2, 1, 4), lambda t: [2], 'dim'), + ('t', new_t(1, 2), lambda t: [],), + ('transpose', new_t(1, 2, 3, 4), lambda t: [1, 2],), + ('to_list', small_3d, lambda t: [],), + ('topk', small_3d, lambda t: [2, 1, False, True], 'dim_sort'), + ('topk', small_3d, lambda t: [2, 1, True, True], 'dim_desc_sort'), + ('trace', medium_2d, lambda t: [],), + ('tril', medium_2d, lambda t: [],), + ('tril', medium_2d, lambda t: [2], 'positive'), + ('tril', medium_2d, lambda t: [-2], 'negative'), + ('triu', medium_2d, lambda t: [],), + ('triu', medium_2d, lambda t: [2], 'positive'), + ('triu', medium_2d, lambda t: [-2], 'negative'), + ('view', small_3d, lambda t: [100, 10],), + ('view_as', small_3d, lambda t: [t(100, 10)],), + ('zero', small_3d, lambda t: [],), + ('zeros', small_3d, lambda t: [1, 2, 3, 4],), + ('rsqrt', lambda t: small_3d(t) + 1, lambda t: [], None, float_types), + ('sinh', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], None, float_types), + ('tan', lambda t: small_3d(t).clamp(-1, 1), lambda t: [], None, float_types), # lapack tests - ('qr', small_2d_lapack, lambda t: [], 'square', float_types), - ('qr', small_2d_lapack_skinny, lambda t: [], 'skinny', float_types), - ('qr', small_2d_lapack_fat, lambda t: [], 'fat', float_types), + ('qr', small_2d_lapack, lambda t: [], 'square', float_types), + ('qr', small_2d_lapack_skinny, lambda t: [], 'skinny', float_types), + ('qr', small_2d_lapack_fat, lambda t: [], 'fat', float_types), ] @@ -275,6 +293,8 @@ for fn in simple_pointwise_float: tests.append((fn, small_3d, lambda t: [], None, float_types)) _cycles_per_ms = None + + def get_cycles_per_ms(): """Approximate number of cycles per millisecond for torch.cuda._sleep""" global _cycles_per_ms @@ -288,6 +308,7 @@ def get_cycles_per_ms(): _cycles_per_ms = 1000000 / start.elapsed_time(end) return _cycles_per_ms + def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5): def tmp(self): cpu_tensor = tensor_constructor(t) @@ -314,6 +335,7 @@ def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5): self.assertEqual(cpu_result, gpu_result, precision) return tmp + class TestCuda(TestCase): def test_autogpu(self): @@ -412,7 +434,7 @@ class TestCuda(TestCase): y_cuda = y.cuda(1) result = comm.reduce_add((x_cuda, y_cuda)) self.assertEqual(result.get_device(), 0) - self.assertEqual(result.cpu(), x+y) + self.assertEqual(result.cpu(), x + y) def _test_scatter(self, input, chunk_sizes=None, dim=0): if torch.cuda.device_count() < 2: @@ -473,7 +495,7 @@ class TestCuda(TestCase): self._test_gather(1) def test_from_sequence(self): - seq = [list(range(i*4,i*4+4)) for i in range(5)] + seq = [list(range(i * 4, i * 4 + 4)) for i in range(5)] reference = torch.range(0, 19).resize_(5, 4) for t in types: cuda_type = get_gpu_type(t) @@ -526,6 +548,7 @@ class TestCuda(TestCase): @unittest.skipIf(torch.cuda.device_count() < 2, "detected only one GPU") def test_multigpu_serialization_remap(self): x = [torch.randn(4, 4).cuda(0), torch.randn(4, 4).cuda(1)] + def gpu_remap(storage, location): if location == 'cuda:1': return storage.cuda(0) @@ -666,7 +689,8 @@ for decl in tests: if not hasattr(tensor, name_inner): continue if not hasattr(gpu_tensor, name_inner): - print("Ignoring {}, because it's not implemented by torch.cuda.{}".format(name_inner, gpu_tensor.__class__.__name__)) + print("Ignoring {}, because it's not implemented by torch.cuda.{}".format( + name_inner, gpu_tensor.__class__.__name__)) continue test_name = 'test_' + t.__name__ + '_' + name_inner diff --git a/test/test_dataloader.py b/test/test_dataloader.py index dc5b8bf162..23d5ac6ca6 100644 --- a/test/test_dataloader.py +++ b/test/test_dataloader.py @@ -27,11 +27,12 @@ class TestTensorDataset(TestCase): l = torch.randn(15) source = TensorDataset(t, l) for i in range(15): - self.assertEqual(t[i:i+1], source[i][0]) - self.assertEqual(l[i:i+1], source[i][1]) + self.assertEqual(t[i:i + 1], source[i][0]) + self.assertEqual(l[i:i + 1], source[i][1]) class ErrorDataset(Dataset): + def __init__(self, size): self.size = size @@ -50,9 +51,9 @@ class TestDataLoader(TestCase): batch_size = loader.batch_size for i, (sample, target) in enumerate(loader): idx = i * batch_size - self.assertEqual(sample, self.data[idx:idx+batch_size]) - self.assertEqual(target, self.labels[idx:idx+batch_size].view(-1, 1)) - self.assertEqual(i, math.floor((len(self.dataset)-1) / batch_size)) + self.assertEqual(sample, self.data[idx:idx + batch_size]) + self.assertEqual(target, self.labels[idx:idx + batch_size].view(-1, 1)) + self.assertEqual(i, math.floor((len(self.dataset) - 1) / batch_size)) def _test_shuffle(self, loader): found_data = {i: 0 for i in range(self.data.size(0))} @@ -67,9 +68,9 @@ class TestDataLoader(TestCase): break self.assertEqual(target, self.labels.narrow(0, data_point_idx, 1)) found_labels[data_point_idx] += 1 - self.assertEqual(sum(found_data.values()), (i+1) * batch_size) - self.assertEqual(sum(found_labels.values()), (i+1) * batch_size) - self.assertEqual(i, math.floor((len(self.dataset)-1) / batch_size)) + self.assertEqual(sum(found_data.values()), (i + 1) * batch_size) + self.assertEqual(sum(found_labels.values()), (i + 1) * batch_size) + self.assertEqual(i, math.floor((len(self.dataset) - 1) / batch_size)) def _test_error(self, loader): it = iter(loader) @@ -81,10 +82,9 @@ class TestDataLoader(TestCase): errors += 1 except StopIteration: self.assertEqual(errors, - math.ceil(float(len(loader.dataset))/loader.batch_size)) + math.ceil(float(len(loader.dataset)) / loader.batch_size)) return - def test_sequential(self): self._test_sequential(DataLoader(self.dataset)) diff --git a/test/test_legacy_nn.py b/test/test_legacy_nn.py index d9b0d87615..731421f3f6 100644 --- a/test/test_legacy_nn.py +++ b/test/test_legacy_nn.py @@ -9,7 +9,9 @@ from common_nn import NNTestCase, ModuleTest, CriterionTest, iter_tensors, \ module_tests, criterion_tests, TEST_CUDA, PRECISION from common import to_gpu, freeze_rng_state, run_tests + class OldModuleTest(ModuleTest): + def __init__(self, *args, **kwargs): super(OldModuleTest, self).__init__(*args, **kwargs) self.check_inplace = kwargs.get('check_inplace', False) @@ -45,18 +47,18 @@ class OldModuleTest(ModuleTest): # TODO: hessian tests tests = [ OldModuleTest(nn.Add, - (torch.Size([5, 4]),), - input_size=(3, 5, 4), - desc='3D'), + (torch.Size([5, 4]),), + input_size=(3, 5, 4), + desc='3D'), OldModuleTest(nn.Add, - (1, True), - input_size=(3, 1, 4), - desc='scalar'), + (1, True), + input_size=(3, 1, 4), + desc='scalar'), OldModuleTest(nn.AddConstant, - (3.5,), - input_size=(3, 5, 4), - reference_fn=lambda i,_: i + 3.5, - check_inplace=True), + (3.5,), + input_size=(3, 5, 4), + reference_fn=lambda i, _: i + 3.5, + check_inplace=True), OldModuleTest(nn.BatchNormalization, (10,), input_size=(4, 10), @@ -88,435 +90,435 @@ tests = [ input_size=(2, 3, 4, 4, 4), desc='no_affine'), OldModuleTest(nn.CMul, - (5, 6), - input_size=(10, 5, 6), - desc='3D'), + (5, 6), + input_size=(10, 5, 6), + desc='3D'), OldModuleTest(nn.CMul, - (50, 4), - input_size=(1, 50, 4), - desc='3D_single_example'), + (50, 4), + input_size=(1, 50, 4), + desc='3D_single_example'), OldModuleTest(nn.CMul, - (1, 5), - input=torch.randn(10, 3, 5)[:,1], - desc='3D_noncontiguous'), + (1, 5), + input=torch.randn(10, 3, 5)[:, 1], + desc='3D_noncontiguous'), OldModuleTest(nn.Exp, - input_size=(2, 3, 4), - reference_fn=lambda i,_: i.exp()), + input_size=(2, 3, 4), + reference_fn=lambda i, _: i.exp()), OldModuleTest(nn.Log, - input=torch.rand(2, 3, 2) + 0.1, - reference_fn=lambda i,_: i.log()), + input=torch.rand(2, 3, 2) + 0.1, + reference_fn=lambda i, _: i.log()), OldModuleTest(nn.Clamp, - (-2., 5.), - input=torch.randn(3, 2, 50) * 6, - reference_fn=lambda i,_: i.clamp(-2, 5)), + (-2., 5.), + input=torch.randn(3, 2, 50) * 6, + reference_fn=lambda i, _: i.clamp(-2, 5)), OldModuleTest(nn.Abs, - input_size=(3, 20, 5), - reference_fn=lambda i,_: i.abs()), + input_size=(3, 20, 5), + reference_fn=lambda i, _: i.abs()), OldModuleTest(nn.Bilinear, - (2, 3, 10), - input_size=[(4, 2), (4, 3)]), + (2, 3, 10), + input_size=[(4, 2), (4, 3)]), OldModuleTest(nn.Bilinear, - (5, 4, 2), - input_size=[(2, 5), (2, 4)], - desc='small_output'), + (5, 4, 2), + input_size=[(2, 5), (2, 4)], + desc='small_output'), OldModuleTest(nn.Euclidean, - (5, 7), - input_size=(10, 5)), + (5, 7), + input_size=(10, 5)), OldModuleTest(nn.WeightedEuclidean, - (5, 7), - input_size=(10, 5)), + (5, 7), + input_size=(10, 5)), OldModuleTest(nn.Cosine, - (5, 7), - input_size=(10, 5)), + (5, 7), + input_size=(10, 5)), OldModuleTest(nn.CAddTable, - input_size=[(5, 7), (5, 7)]), + input_size=[(5, 7), (5, 7)]), OldModuleTest(nn.CSubTable, - input_size=[(5, 7), (5, 7)]), + input_size=[(5, 7), (5, 7)]), OldModuleTest(nn.CDivTable, - input=[torch.randn(1, 7), torch.rand(1, 7) + 0.1]), + input=[torch.randn(1, 7), torch.rand(1, 7) + 0.1]), OldModuleTest(nn.CMulTable, - input_size=[(5, 7), (5, 7)]), + input_size=[(5, 7), (5, 7)]), OldModuleTest(nn.Square, - input_size=(10, 2, 4), - reference_fn=lambda i,_: i.mul(i)), + input_size=(10, 2, 4), + reference_fn=lambda i, _: i.mul(i)), OldModuleTest(nn.Sqrt, - input=torch.rand(10, 2, 4)+0.01, - reference_fn=lambda i,_: i.sqrt()), + input=torch.rand(10, 2, 4) + 0.01, + reference_fn=lambda i, _: i.sqrt()), OldModuleTest(nn.Squeeze, - input_size=(2, 1, 1, 4, 5), - reference_fn=lambda i,_: i.squeeze()), + input_size=(2, 1, 1, 4, 5), + reference_fn=lambda i, _: i.squeeze()), OldModuleTest(nn.Squeeze, - (1,), - input_size=(2, 1, 1, 4, 5), - reference_fn=lambda i,_: i.squeeze(1), - desc='dim'), + (1,), + input_size=(2, 1, 1, 4, 5), + reference_fn=lambda i, _: i.squeeze(1), + desc='dim'), OldModuleTest(nn.Unsqueeze, - (1,), - input_size=(2, 4, 5), - reference_fn=lambda i,_: i.view(2, 1, 4, 5)), + (1,), + input_size=(2, 4, 5), + reference_fn=lambda i, _: i.view(2, 1, 4, 5)), OldModuleTest(nn.Unsqueeze, - (0,), - input_size=(2, 4, 5), - reference_fn=lambda i,_: i.view(1, 2, 4, 5), - desc='fist_dim'), + (0,), + input_size=(2, 4, 5), + reference_fn=lambda i, _: i.view(1, 2, 4, 5), + desc='fist_dim'), OldModuleTest(nn.Unsqueeze, - (3,), - input_size=(2, 4, 5), - reference_fn=lambda i,_: i.view(2, 4, 5, 1), - desc='last_dim'), + (3,), + input_size=(2, 4, 5), + reference_fn=lambda i, _: i.view(2, 4, 5, 1), + desc='last_dim'), OldModuleTest(nn.View, - (-1, 2, 20), - input_size=(2, 2, 4, 5), - reference_fn=lambda i,_: i.view(-1, 2, 20), - desc='infer_batch'), + (-1, 2, 20), + input_size=(2, 2, 4, 5), + reference_fn=lambda i, _: i.view(-1, 2, 20), + desc='infer_batch'), OldModuleTest(nn.View, - (2, 2, 2, 5), - input_size=(2, 4, 5), - reference_fn=lambda i,_: i.view(2, 2, 2, 5), - desc='split_dim'), + (2, 2, 2, 5), + input_size=(2, 4, 5), + reference_fn=lambda i, _: i.view(2, 2, 2, 5), + desc='split_dim'), OldModuleTest(nn.View, - (2, -1, 2, 5), - input_size=(2, 4, 5), - reference_fn=lambda i,_: i.view(2, -1, 2, 5), - desc='infer_middle'), + (2, -1, 2, 5), + input_size=(2, 4, 5), + reference_fn=lambda i, _: i.view(2, -1, 2, 5), + desc='infer_middle'), OldModuleTest(nn.Sum, - (1,), - input_size=(2, 4, 5), - reference_fn=lambda i,_: i.sum(1).squeeze(1)), + (1,), + input_size=(2, 4, 5), + reference_fn=lambda i, _: i.sum(1).squeeze(1)), OldModuleTest(nn.Sum, - (1, True), - input_size=(2, 4, 5), - reference_fn=lambda i,_: i.sum(1).div(i.size(1)).squeeze(1), - desc='sizeAverage'), + (1, True), + input_size=(2, 4, 5), + reference_fn=lambda i, _: i.sum(1).div(i.size(1)).squeeze(1), + desc='sizeAverage'), OldModuleTest(nn.Mean, - (1,), - input_size=(2, 4, 5), - reference_fn=lambda i,_: torch.mean(i, 1).squeeze(1)), + (1,), + input_size=(2, 4, 5), + reference_fn=lambda i, _: torch.mean(i, 1).squeeze(1)), OldModuleTest(lambda: nn.Sequential().add(nn.GradientReversal()).add(nn.GradientReversal()), - input_size=(4, 3, 2, 2), - fullname='GradientReversal'), + input_size=(4, 3, 2, 2), + fullname='GradientReversal'), OldModuleTest(nn.Identity, - input_size=(4, 3, 2, 4), - reference_fn=lambda i,_: i), + input_size=(4, 3, 2, 4), + reference_fn=lambda i, _: i), OldModuleTest(nn.DotProduct, - input_size=[(10, 4), (10, 4)], - reference_fn=lambda i,_: torch.Tensor(list( - a.dot(b) for a, b in zip(i[0], i[1]))) - ), + input_size=[(10, 4), (10, 4)], + reference_fn=lambda i, _: torch.Tensor(list( + a.dot(b) for a, b in zip(i[0], i[1]))) + ), OldModuleTest(nn.CosineDistance, - input_size=[(10, 4), (10, 4)], - reference_fn=lambda i,_: torch.Tensor(list( - a.dot(b) / (a.norm(2) * b.norm(2)) for a, b in zip(i[0], i[1]))) - ), + input_size=[(10, 4), (10, 4)], + reference_fn=lambda i, _: torch.Tensor(list( + a.dot(b) / (a.norm(2) * b.norm(2)) for a, b in zip(i[0], i[1]))) + ), OldModuleTest(nn.JoinTable, - (0,), - input_size=[(10, 4), (10, 4)], - reference_fn=lambda i,_: torch.cat(i, 0), - desc='first_dim'), + (0,), + input_size=[(10, 4), (10, 4)], + reference_fn=lambda i, _: torch.cat(i, 0), + desc='first_dim'), OldModuleTest(nn.JoinTable, - (2,), - input_size=[(2, 4, 2), (2, 4, 2)], - reference_fn=lambda i,_: torch.cat(i, 2), - desc='positive_dim_index'), + (2,), + input_size=[(2, 4, 2), (2, 4, 2)], + reference_fn=lambda i, _: torch.cat(i, 2), + desc='positive_dim_index'), OldModuleTest(nn.JoinTable, - (-1,), - input_size=[(2, 4, 2, 4), (2, 4, 2, 4)], - reference_fn=lambda i,_: torch.cat(i, 3), - desc='negative_dim_index'), + (-1,), + input_size=[(2, 4, 2, 4), (2, 4, 2, 4)], + reference_fn=lambda i, _: torch.cat(i, 3), + desc='negative_dim_index'), OldModuleTest(nn.MM, - input_size=[(4, 5, 3), (4, 3, 2)], - reference_fn=lambda i,_: torch.bmm(*i)), + input_size=[(4, 5, 3), (4, 3, 2)], + reference_fn=lambda i, _: torch.bmm(*i)), OldModuleTest(nn.MV, - input_size=[(4, 5, 3), (4, 3)], - reference_fn=lambda i,_: torch.bmm(i[0], i[1].view(i[1].size(0), i[1].size(1), 1)).squeeze()), + input_size=[(4, 5, 3), (4, 3)], + reference_fn=lambda i, _: torch.bmm(i[0], i[1].view(i[1].size(0), i[1].size(1), 1)).squeeze()), OldModuleTest(nn.Max, - input_size=(4, 5, 3), - reference_fn=lambda i,_: torch.max(i, 0)[0].squeeze()), + input_size=(4, 5, 3), + reference_fn=lambda i, _: torch.max(i, 0)[0].squeeze()), OldModuleTest(nn.Max, - (1,), - input_size=(4, 5, 3), - reference_fn=lambda i,_: torch.max(i, 1)[0].squeeze(), - desc='with_dimension'), + (1,), + input_size=(4, 5, 3), + reference_fn=lambda i, _: torch.max(i, 1)[0].squeeze(), + desc='with_dimension'), OldModuleTest(nn.Min, - input_size=(4, 5, 3), - reference_fn=lambda i,_: torch.min(i, 0)[0].squeeze()), + input_size=(4, 5, 3), + reference_fn=lambda i, _: torch.min(i, 0)[0].squeeze()), OldModuleTest(nn.Min, - (1,), - input_size=(4, 5, 3), - reference_fn=lambda i,_: torch.min(i, 1)[0].squeeze(), - desc='with_dimension'), + (1,), + input_size=(4, 5, 3), + reference_fn=lambda i, _: torch.min(i, 1)[0].squeeze(), + desc='with_dimension'), OldModuleTest(nn.MixtureTable, - tuple(), - input_size=[(5, 3), (5, 3, 6)]), + tuple(), + input_size=[(5, 3), (5, 3, 6)]), OldModuleTest(nn.LookupTable, - (4, 3), - input=torch.randperm(2).repeat(1, 2), - jacobian_input=False), + (4, 3), + input=torch.randperm(2).repeat(1, 2), + jacobian_input=False), OldModuleTest(nn.Mul, - input_size=(2, 3, 4, 2), - reference_fn=lambda i,p: i * p[0][0]), + input_size=(2, 3, 4, 2), + reference_fn=lambda i, p: i * p[0][0]), OldModuleTest(nn.MulConstant, - (4,), - input_size=(2, 3, 4, 2), - reference_fn=lambda i,_: i * 4, - check_inplace=True), + (4,), + input_size=(2, 3, 4, 2), + reference_fn=lambda i, _: i * 4, + check_inplace=True), OldModuleTest(nn.Narrow, - (0, 0), - input_size=(2, 3, 4, 2), - reference_fn=lambda i,_: i.narrow(0, 0, 1)), + (0, 0), + input_size=(2, 3, 4, 2), + reference_fn=lambda i, _: i.narrow(0, 0, 1)), OldModuleTest(nn.Narrow, - (1, 1, 2), - input_size=(2, 3, 4, 2), - reference_fn=lambda i,_: i.narrow(1, 1, 2), - desc='length'), + (1, 1, 2), + input_size=(2, 3, 4, 2), + reference_fn=lambda i, _: i.narrow(1, 1, 2), + desc='length'), OldModuleTest(nn.Transpose, - ((1, 2), (1, 3)), - input_size=(2, 3, 4, 5), - reference_fn=lambda i,_: i.transpose(1, 2).transpose(1, 3)), + ((1, 2), (1, 3)), + input_size=(2, 3, 4, 5), + reference_fn=lambda i, _: i.transpose(1, 2).transpose(1, 3)), OldModuleTest(nn.Transpose, - ((1, 2),), - input_size=(2, 3, 4, 5), - reference_fn=lambda i,_: i.transpose(1, 2), - desc='single_arg'), + ((1, 2),), + input_size=(2, 3, 4, 5), + reference_fn=lambda i, _: i.transpose(1, 2), + desc='single_arg'), # TODO: this seems to be very slow OldModuleTest(nn.Replicate, - (2, 1), - input_size=(10, 3, 4, 5), - reference_fn=lambda i,_: i.view(10, 1, 3, 4, 5).expand(10, 2, 3, 4, 5)), + (2, 1), + input_size=(10, 3, 4, 5), + reference_fn=lambda i, _: i.view(10, 1, 3, 4, 5).expand(10, 2, 3, 4, 5)), OldModuleTest(nn.Padding, - (0, 2, -10), - input_size=(2, 3, 4, 5)), + (0, 2, -10), + input_size=(2, 3, 4, 5)), OldModuleTest(nn.Padding, - (0, 2, -10, 1), - input_size=(2, 3, 4, 5), - desc='index'), + (0, 2, -10, 1), + input_size=(2, 3, 4, 5), + desc='index'), OldModuleTest(nn.Padding, - (0, -2, -10, 1), - input_size=(2, 3, 4, 5), - desc='negative_pad'), + (0, -2, -10, 1), + input_size=(2, 3, 4, 5), + desc='negative_pad'), OldModuleTest(nn.PartialLinear, - (5, 6), - input_size=(4, 5)), + (5, 6), + input_size=(4, 5)), OldModuleTest(lambda: nn.PartialLinear(5, 6).setPartition(torch.Tensor((2, 4))), - input_size=(4, 5), - fullname='PartialLinear_setPartition'), + input_size=(4, 5), + fullname='PartialLinear_setPartition'), OldModuleTest(nn.Power, - (2,), - input_size=(2, 3, 4, 5)), + (2,), + input_size=(2, 3, 4, 5)), OldModuleTest(nn.Power, - (1.5,), - input=torch.rand(3, 4, 5), - desc='fractional'), + (1.5,), + input=torch.rand(3, 4, 5), + desc='fractional'), OldModuleTest(nn.Reshape, - (4, 5), - input_size=(3, 4*5), - desc='add_dim'), + (4, 5), + input_size=(3, 4 * 5), + desc='add_dim'), OldModuleTest(nn.Reshape, - (4*5,), - input_size=(3, 4, 5), - desc='squash_dim'), + (4 * 5,), + input_size=(3, 4, 5), + desc='squash_dim'), OldModuleTest(nn.Select, - (1, 2), - input_size=(3, 4, 5), - reference_fn=lambda i,_: i.select(1, 2)), + (1, 2), + input_size=(3, 4, 5), + reference_fn=lambda i, _: i.select(1, 2)), OldModuleTest(nn.SelectTable, - (1,), - input_size=[(1,), (2,), (3,), (4,)], - reference_fn=lambda i,_: i[1]), + (1,), + input_size=[(1,), (2,), (3,), (4,)], + reference_fn=lambda i, _: i[1]), OldModuleTest(nn.SpatialAveragePooling, - (2, 2), - input_size=(2, 3, 6, 6)), + (2, 2), + input_size=(2, 3, 6, 6)), OldModuleTest(nn.SpatialAveragePooling, - (2, 2, 2, 2), - input_size=(2, 3, 6, 6), - desc='stride'), + (2, 2, 2, 2), + input_size=(2, 3, 6, 6), + desc='stride'), OldModuleTest(nn.SpatialAveragePooling, - (2, 2, 2, 2, 1, 1), - input_size=(2, 3, 6, 6), - desc='stride_pad'), + (2, 2, 2, 2, 1, 1), + input_size=(2, 3, 6, 6), + desc='stride_pad'), OldModuleTest(nn.SpatialAdaptiveMaxPooling, - (4, 4), - input_size=(2, 3, 8, 8), - reference_fn=lambda i,_: nn.SpatialMaxPooling(2, 2).forward(i)), + (4, 4), + input_size=(2, 3, 8, 8), + reference_fn=lambda i, _: nn.SpatialMaxPooling(2, 2).forward(i)), OldModuleTest(nn.SpatialAdaptiveMaxPooling, - (4, 4), - input_size=(2, 3, 7, 11), - desc='irregular'), + (4, 4), + input_size=(2, 3, 7, 11), + desc='irregular'), OldModuleTest(nn.SpatialConvolution, - (3, 4, 3, 3), - input_size=(2, 3, 6, 6)), + (3, 4, 3, 3), + input_size=(2, 3, 6, 6)), OldModuleTest(nn.SpatialConvolution, - (3, 4, 3, 3, 2, 2), - input_size=(2, 3, 6, 6), - desc='strided'), + (3, 4, 3, 3, 2, 2), + input_size=(2, 3, 6, 6), + desc='strided'), OldModuleTest(nn.SpatialConvolution, - (3, 4, 3, 3, 2, 2, 1, 1), - input_size=(2, 3, 6, 6), - desc='padding'), + (3, 4, 3, 3, 2, 2, 1, 1), + input_size=(2, 3, 6, 6), + desc='padding'), OldModuleTest(nn.SpatialConvolutionLocal, - (3, 2, 4, 4, 2, 2), - input_size=(1, 3, 4, 4)), + (3, 2, 4, 4, 2, 2), + input_size=(1, 3, 4, 4)), OldModuleTest(nn.SpatialConvolutionLocal, - (3, 2, 6, 6, 2, 2, 2, 2), - input_size=(2, 3, 6, 6), - desc='stride'), + (3, 2, 6, 6, 2, 2, 2, 2), + input_size=(2, 3, 6, 6), + desc='stride'), OldModuleTest(nn.SpatialConvolutionLocal, - (3, 2, 6, 6, 2, 2, 2, 2, 1, 1), - input_size=(2, 3, 6, 6), - desc='stride_pad'), + (3, 2, 6, 6, 2, 2, 2, 2, 1, 1), + input_size=(2, 3, 6, 6), + desc='stride_pad'), OldModuleTest(nn.SpatialDivisiveNormalization, - (3,), - input_size=(2, 3, 8, 8)), + (3,), + input_size=(2, 3, 8, 8)), OldModuleTest(nn.SpatialContrastiveNormalization, - (3,), - input_size=(2, 3, 8, 8)), + (3,), + input_size=(2, 3, 8, 8)), OldModuleTest(nn.SpatialDilatedConvolution, - (3, 2, 3, 3, 2, 2, 1, 1, 2, 2), - input_size=(2, 3, 8, 8)), + (3, 2, 3, 3, 2, 2, 1, 1, 2, 2), + input_size=(2, 3, 8, 8)), OldModuleTest(nn.SpatialDilatedConvolution, - (3, 2, 3, 3, 2, 2, 1, 1, 2, 2), - input_size=(2, 3, 8, 8), - desc='stride_pad'), + (3, 2, 3, 3, 2, 2, 1, 1, 2, 2), + input_size=(2, 3, 8, 8), + desc='stride_pad'), OldModuleTest(nn.SpatialMaxPooling, - (3, 3, 2, 2, 1, 1), - input_size=(1, 3, 7, 7)), + (3, 3, 2, 2, 1, 1), + input_size=(1, 3, 7, 7)), OldModuleTest(nn.SpatialReflectionPadding, - (1, 2, 3, 4), - input_size=(2, 3, 8, 8)), + (1, 2, 3, 4), + input_size=(2, 3, 8, 8)), OldModuleTest(nn.SpatialReplicationPadding, - (1, 2, 3, 4), - input_size=(2, 3, 4, 4)), + (1, 2, 3, 4), + input_size=(2, 3, 4, 4)), OldModuleTest(nn.SpatialZeroPadding, - (1, 2, 3, 4), - input_size=(2, 3, 4, 4)), + (1, 2, 3, 4), + input_size=(2, 3, 4, 4)), OldModuleTest(nn.SpatialConvolutionMap, - (nn.SpatialConvolutionMap.maps.oneToOne(3), 3, 3), - input_size=(3, 5, 5), - desc='oneToOne'), + (nn.SpatialConvolutionMap.maps.oneToOne(3), 3, 3), + input_size=(3, 5, 5), + desc='oneToOne'), OldModuleTest(nn.SpatialConvolutionMap, - (nn.SpatialConvolutionMap.maps.oneToOne(3), 3, 3, 2, 2), - input_size=(3, 5, 5), - desc='oneToOne_stride'), + (nn.SpatialConvolutionMap.maps.oneToOne(3), 3, 3, 2, 2), + input_size=(3, 5, 5), + desc='oneToOne_stride'), OldModuleTest(nn.SpatialConvolutionMap, - (nn.SpatialConvolutionMap.maps.full(3, 4), 3, 3), - input_size=(3, 5, 5), - desc='full'), + (nn.SpatialConvolutionMap.maps.full(3, 4), 3, 3), + input_size=(3, 5, 5), + desc='full'), OldModuleTest(nn.SpatialFullConvolutionMap, - (nn.SpatialConvolutionMap.maps.oneToOne(3), 3, 3), - input_size=(3, 5, 5), - desc='oneToOne'), + (nn.SpatialConvolutionMap.maps.oneToOne(3), 3, 3), + input_size=(3, 5, 5), + desc='oneToOne'), OldModuleTest(nn.SpatialFullConvolutionMap, - (nn.SpatialConvolutionMap.maps.oneToOne(3), 3, 3, 2, 2), - input_size=(3, 5, 5), - desc='oneToOne_stride'), + (nn.SpatialConvolutionMap.maps.oneToOne(3), 3, 3, 2, 2), + input_size=(3, 5, 5), + desc='oneToOne_stride'), OldModuleTest(nn.SpatialFullConvolutionMap, - (nn.SpatialConvolutionMap.maps.full(3, 4), 3, 3), - input_size=(3, 5, 5), - desc='full'), + (nn.SpatialConvolutionMap.maps.full(3, 4), 3, 3), + input_size=(3, 5, 5), + desc='full'), # TODO: test CUDA OldModuleTest(lambda: nn.SpatialFractionalMaxPooling(2, 2, 0.5, 0.5).fixPoolingRegions(), - input_size=(1, 3, 5, 5), - fullname='SpatialFractionalMaxPooling_ratio', - test_cuda=False), + input_size=(1, 3, 5, 5), + fullname='SpatialFractionalMaxPooling_ratio', + test_cuda=False), OldModuleTest(lambda: nn.SpatialFractionalMaxPooling(2, 2, 4, 4).fixPoolingRegions(), - input_size=(1, 3, 7, 7), - fullname='SpatialFractionalMaxPooling_size', - test_cuda=False), + input_size=(1, 3, 7, 7), + fullname='SpatialFractionalMaxPooling_size', + test_cuda=False), OldModuleTest(nn.SpatialFullConvolution, - (3, 4, 3, 3, 2, 2, 1, 1, 1, 1), - input_size=(1, 3, 7, 7)), + (3, 4, 3, 3, 2, 2, 1, 1, 1, 1), + input_size=(1, 3, 7, 7)), OldModuleTest(nn.SpatialLPPooling, - (3, 2, 2, 2, 2, 2), - input_size=(1, 3, 7, 7)), + (3, 2, 2, 2, 2, 2), + input_size=(1, 3, 7, 7)), OldModuleTest(nn.SpatialSubSampling, - (3, 3, 3, 2, 2), - input_size=(1, 3, 7, 7)), + (3, 3, 3, 2, 2), + input_size=(1, 3, 7, 7)), OldModuleTest(nn.SpatialSubtractiveNormalization, - (3,), - input_size=(1, 3, 7, 7)), + (3,), + input_size=(1, 3, 7, 7)), OldModuleTest(nn.SpatialSubtractiveNormalization, - (3, torch.rand(3)), - input_size=(1, 3, 7, 7), - desc='kernel'), + (3, torch.rand(3)), + input_size=(1, 3, 7, 7), + desc='kernel'), OldModuleTest(nn.SpatialUpSamplingNearest, - (2,), - input_size=(1, 3, 4, 4)), + (2,), + input_size=(1, 3, 4, 4)), OldModuleTest(nn.TemporalConvolution, - (4, 5, 3), - input_size=(2, 10, 4)), + (4, 5, 3), + input_size=(2, 10, 4)), OldModuleTest(nn.TemporalConvolution, - (4, 5, 3, 2), - input_size=(2, 10, 4), - desc='stride'), + (4, 5, 3, 2), + input_size=(2, 10, 4), + desc='stride'), # TODO: this runs in non-batch mode only OldModuleTest(nn.TemporalSubSampling, - (4, 3), - input_size=(10, 4)), + (4, 3), + input_size=(10, 4)), OldModuleTest(nn.TemporalSubSampling, - (4, 3, 2), - input_size=(10, 4), - desc='stride'), + (4, 3, 2), + input_size=(10, 4), + desc='stride'), OldModuleTest(nn.VolumetricAveragePooling, - (2, 2, 2), - input_size=(2, 3, 4, 4, 4)), + (2, 2, 2), + input_size=(2, 3, 4, 4, 4)), OldModuleTest(nn.VolumetricAveragePooling, - (2, 2, 2, 2, 2, 2), - input_size=(2, 3, 5, 5, 5), - desc='stride'), + (2, 2, 2, 2, 2, 2), + input_size=(2, 3, 5, 5, 5), + desc='stride'), OldModuleTest(nn.VolumetricConvolution, - (3, 4, 2, 2, 2), - input_size=(2, 3, 3, 3, 3)), + (3, 4, 2, 2, 2), + input_size=(2, 3, 3, 3, 3)), OldModuleTest(nn.VolumetricConvolution, - (3, 4, 2, 2, 2, 2, 2, 2), - input_size=(2, 3, 5, 5, 5), - desc='stride'), + (3, 4, 2, 2, 2, 2, 2, 2), + input_size=(2, 3, 5, 5, 5), + desc='stride'), OldModuleTest(nn.VolumetricConvolution, - (3, 4, 2, 2, 2, 2, 2, 2, 1, 1, 1), - input_size=(2, 3, 5, 5, 5), - desc='stride_padding'), + (3, 4, 2, 2, 2, 2, 2, 2, 1, 1, 1), + input_size=(2, 3, 5, 5, 5), + desc='stride_padding'), OldModuleTest(nn.VolumetricFullConvolution, - (2, 3, 2, 2, 2), - input_size=(1, 2, 4, 4, 4)), + (2, 3, 2, 2, 2), + input_size=(1, 2, 4, 4, 4)), OldModuleTest(nn.VolumetricMaxPooling, - (2, 2, 2), - input_size=(2, 3, 5, 5, 5)), + (2, 2, 2), + input_size=(2, 3, 5, 5, 5)), OldModuleTest(nn.VolumetricMaxPooling, - (2, 2, 2, 2, 2, 2), - input_size=(2, 3, 5, 5, 5), - desc='stride'), + (2, 2, 2, 2, 2, 2), + input_size=(2, 3, 5, 5, 5), + desc='stride'), OldModuleTest(nn.VolumetricMaxPooling, - (2, 2, 2, 2, 2, 2, 1, 1, 1), - input_size=(2, 3, 5, 5, 5), - desc='stride_padding'), + (2, 2, 2, 2, 2, 2, 1, 1, 1), + input_size=(2, 3, 5, 5, 5), + desc='stride_padding'), OldModuleTest(nn.VolumetricReplicationPadding, - (1, 2, 3, 4, 5, 6), - input_size=(2, 3, 5, 5, 5)), + (1, 2, 3, 4, 5, 6), + input_size=(2, 3, 5, 5, 5)), CriterionTest(nn.L1Cost, - input=torch.randn(2, 3, 4, 5), - target=None), + input=torch.randn(2, 3, 4, 5), + target=None), CriterionTest(nn.L1HingeEmbeddingCriterion, - input=[torch.randn(2, 3, 4, 5), torch.randn(2, 3, 4, 5)], - target=1), + input=[torch.randn(2, 3, 4, 5), torch.randn(2, 3, 4, 5)], + target=1), CriterionTest(nn.L1HingeEmbeddingCriterion, - (2,), - input=[torch.randn(2, 3, 4, 5), torch.randn(2, 3, 4, 5)], - target=1, - desc='margin'), + (2,), + input=[torch.randn(2, 3, 4, 5), torch.randn(2, 3, 4, 5)], + target=1, + desc='margin'), CriterionTest(nn.WeightedMSECriterion, - (torch.rand(3, 4, 5),), - input=torch.randn(2, 3, 4, 5), - target=torch.randn(2, 3, 4, 5)), + (torch.rand(3, 4, 5),), + input=torch.randn(2, 3, 4, 5), + target=torch.randn(2, 3, 4, 5)), CriterionTest(nn.MarginCriterion, - input_size=(5, 10), - target=torch.randn(5, 10).sign()), + input_size=(5, 10), + target=torch.randn(5, 10).sign()), CriterionTest(nn.ClassSimplexCriterion, - (30,), - input=torch.randn(5, 30).mul(10).renorm(2, 0, 1), - target=torch.rand(5).mul(30).floor().long(), - desc='margin'), + (30,), + input=torch.randn(5, 30).mul(10).renorm(2, 0, 1), + target=torch.rand(5).mul(30).floor().long(), + desc='margin'), ] # TODO: FlattenTable gradient # TODO: NarrowTable gradient @@ -527,30 +529,32 @@ tests = [ for p in (1, 2, 1.5): tests.append( OldModuleTest(nn.Normalize, - (p,), - input_size=(4, 5), - # Eh, we need to use p as a default, so it's passed by value - reference_fn=lambda i,_,p=p: i.div(i.norm(p, 1).expand_as(i)), - desc=str(p)), + (p,), + input_size=(4, 5), + # Eh, we need to use p as a default, so it's passed by value + reference_fn=lambda i, _, p=p: i.div(i.norm(p, 1).expand_as(i)), + desc=str(p)), ) -for p in range(1, 4+1): +for p in range(1, 4 + 1): tests.append( OldModuleTest(nn.PairwiseDistance, - (p,), - input_size=[(4, 10), (4, 10)], - desc=str(p)) + (p,), + input_size=[(4, 10), (4, 10)], + desc=str(p)) ) + def build_spatial_unpooling_net(): pool = nn.SpatialMaxPooling(2, 2, 2, 2) unpool = nn.SpatialMaxUnpooling(pool) return nn.Sequential().add(pool).add(unpool) tests.append( - OldModuleTest(build_spatial_unpooling_net, - input_size=(1, 3, 10, 10), - desc='SpatialMaxUnpooling') - ) + OldModuleTest(build_spatial_unpooling_net, + input_size=(1, 3, 10, 10), + desc='SpatialMaxUnpooling') +) + def build_volumetric_unpooling_net(): pool = nn.VolumetricMaxPooling(2, 2, 2, 2) @@ -558,10 +562,11 @@ def build_volumetric_unpooling_net(): return nn.Sequential().add(pool).add(unpool) tests.append( - OldModuleTest(build_volumetric_unpooling_net, - input_size=(1, 3, 10, 10), - desc='VolumetricMaxUnpooling') - ) + OldModuleTest(build_volumetric_unpooling_net, + input_size=(1, 3, 10, 10), + desc='VolumetricMaxUnpooling') +) + def prepare_tests(): def add_test(test): @@ -571,8 +576,8 @@ def prepare_tests(): raise RuntimeError('Found two tests with the same name: ' + test_name) if hasattr(TestNN, cuda_test_name): raise RuntimeError('Found two tests with the same name: ' + cuda_test_name) - setattr(TestNN, test_name, lambda self,test=test: test(self)) - setattr(TestNN, cuda_test_name, lambda self,test=test: test.test_cuda(self)) + setattr(TestNN, test_name, lambda self, test=test: test(self)) + setattr(TestNN, cuda_test_name, lambda self, test=test: test.test_cuda(self)) name_remap = { 'Conv2d': 'SpatialConvolution', 'MaxPool2d': 'SpatialMaxPooling', @@ -613,6 +618,7 @@ def prepare_tests(): test = CriterionTest(**test_params) add_test(test) + class TestNN(NNTestCase): def _forward(self, module, input): @@ -636,19 +642,19 @@ class TestNN(NNTestCase): def test_Dropout(self): p = 0.2 - input = torch.Tensor(1000).fill_(1-p) + input = torch.Tensor(1000).fill_(1 - p) module = nn.Dropout(p) output = module.forward(input) - self.assertLess(abs(output.mean() - (1-p)), 0.05) + self.assertLess(abs(output.mean() - (1 - p)), 0.05) gradInput = module.backward(input, input) - self.assertLess(abs(gradInput.mean() - (1-p)), 0.05) + self.assertLess(abs(gradInput.mean() - (1 - p)), 0.05) module = nn.Dropout(p, True) output = module.forward(input.clone()) - self.assertLess(abs(output.mean() - (1-p)), 0.05) + self.assertLess(abs(output.mean() - (1 - p)), 0.05) gradInput = module.backward(input.clone(), input.clone()) - self.assertLess(abs(gradInput.mean() - (1-p)), 0.05) + self.assertLess(abs(gradInput.mean() - (1 - p)), 0.05) # Check that these don't raise errors module.__repr__() @@ -664,9 +670,9 @@ class TestNN(NNTestCase): module = nn.SpatialDropout(p) module.training() output = module.forward(input) - self.assertLess(abs(output.mean() - (1-p)), 0.05) + self.assertLess(abs(output.mean() - (1 - p)), 0.05) gradInput = module.backward(input, input) - self.assertLess(abs(gradInput.mean() - (1-p)), 0.05) + self.assertLess(abs(gradInput.mean() - (1 - p)), 0.05) # Check that these don't raise errors module.__repr__() @@ -674,18 +680,18 @@ class TestNN(NNTestCase): def test_VolumetricDropout(self): p = 0.2 - bsz = random.randint(1,5) - t = random.randint(1,5) - w = random.randint(1,5) - h = random.randint(1,5) + bsz = random.randint(1, 5) + t = random.randint(1, 5) + w = random.randint(1, 5) + h = random.randint(1, 5) nfeats = 1000 input = torch.Tensor(bsz, nfeats, t, w, h).fill_(1) module = nn.VolumetricDropout(p) module.training() output = module.forward(input) - self.assertLess(abs(output.mean() - (1-p)), 0.05) + self.assertLess(abs(output.mean() - (1 - p)), 0.05) gradInput = module.backward(input, input) - self.assertLess(abs(gradInput.mean() - (1-p)), 0.05) + self.assertLess(abs(gradInput.mean() - (1 - p)), 0.05) # Check that these don't raise errors module.__repr__() @@ -706,7 +712,7 @@ class TestNN(NNTestCase): self.assertTrue(output[input.lt(0)].eq(0).all()) def test_Copy(self): - input = torch.randn(3,4).double() + input = torch.randn(3, 4).double() c = nn.Copy(torch.DoubleTensor, torch.FloatTensor) output = c.forward(input) self.assertEqual(torch.typename(output), 'torch.FloatTensor') @@ -833,9 +839,9 @@ class TestNN(NNTestCase): def test_ParallelTable(self): input = torch.randn(3, 4, 5) p = nn.ParallelTable() - p.add(nn.View(4,5,1)) - p.add(nn.View(4,5,1)) - p.add(nn.View(4,5,1)) + p.add(nn.View(4, 5, 1)) + p.add(nn.View(4, 5, 1)) + p.add(nn.View(4, 5, 1)) m = nn.Sequential() m.add(nn.SplitTable(0)) m.add(p) @@ -846,7 +852,7 @@ class TestNN(NNTestCase): str(p) output = m.forward(input) - output2 = input.transpose(0,2).transpose(0,1) + output2 = input.transpose(0, 2).transpose(0, 1) self.assertEqual(output2, output) gradInput = m.backward(input, output2) @@ -854,15 +860,15 @@ class TestNN(NNTestCase): def test_ConcatTable(self): input = [ - torch.randn(3, 4).float(), torch.randn(3, 4).float(), [torch.randn(3, 4).float()] + torch.randn(3, 4).float(), torch.randn(3, 4).float(), [torch.randn(3, 4).float()] ] _gradOutput = [ - torch.randn(3, 3,4).float(), torch.randn(3, 3,4).float(), torch.randn(3, 3,4).float() + torch.randn(3, 3, 4).float(), torch.randn(3, 3, 4).float(), torch.randn(3, 3, 4).float() ] gradOutput = [ - [_gradOutput[0][0], _gradOutput[1][0], [_gradOutput[2][0]]], - [_gradOutput[0][1], _gradOutput[1][1], [_gradOutput[2][1]]], - [_gradOutput[0][2], _gradOutput[1][2], [_gradOutput[2][2]]] + [_gradOutput[0][0], _gradOutput[1][0], [_gradOutput[2][0]]], + [_gradOutput[0][1], _gradOutput[1][1], [_gradOutput[2][1]]], + [_gradOutput[0][2], _gradOutput[1][2], [_gradOutput[2][2]]] ] module = nn.ConcatTable() module.add(nn.Identity()) @@ -878,7 +884,8 @@ class TestNN(NNTestCase): output2 = [input, input, input] self.assertEqual(output2, output) gradInput = module.backward(input, gradOutput) - gradInput2 = [_gradOutput[0].sum(0).squeeze(0), _gradOutput[1].sum(0).squeeze(0), [_gradOutput[2].sum(0).squeeze(0)]] + gradInput2 = [_gradOutput[0].sum(0).squeeze(0), _gradOutput[1].sum( + 0).squeeze(0), [_gradOutput[2].sum(0).squeeze(0)]] self.assertTrue(isinstance(gradInput, list)) self.assertFalse(isinstance(gradInput[0], list)) self.assertFalse(isinstance(gradInput[1], list)) @@ -910,25 +917,26 @@ class TestNN(NNTestCase): input = torch.randn(2, 3, 12, 12) gradOutput = torch.randn(2, int(outputSize.sum()), 12, 12) concat = nn.DepthConcat(1) - concat.add(nn.SpatialConvolution(3, outputSize[0], 1, 1, 1, 1)) #> 2, 5, 12, 12 - concat.add(nn.SpatialConvolution(3, outputSize[1], 3, 3, 1, 1)) #> 2, 6, 10, 10 - concat.add(nn.SpatialConvolution(3, outputSize[2], 4, 4, 1, 1)) #> 2, 7, 9, 9 - concat.add(nn.SpatialConvolution(3, outputSize[3], 5, 5, 1, 1)) #> 2, 8, 8, 8 + concat.add(nn.SpatialConvolution(3, outputSize[0], 1, 1, 1, 1)) # > 2, 5, 12, 12 + concat.add(nn.SpatialConvolution(3, outputSize[1], 3, 3, 1, 1)) # > 2, 6, 10, 10 + concat.add(nn.SpatialConvolution(3, outputSize[2], 4, 4, 1, 1)) # > 2, 7, 9, 9 + concat.add(nn.SpatialConvolution(3, outputSize[3], 5, 5, 1, 1)) # > 2, 8, 8, 8 concat.zeroGradParameters() # forward/backward outputConcat = concat.forward(input) gradInputConcat = concat.backward(input, gradOutput) # the spatial dims are the largest, the nFilters is the sum - output = torch.Tensor(2, int(outputSize.sum()), 12, 12).zero_() # zero for padding - narrows = ( (slice(None), slice(0, 5), slice(None), slice(None)), (slice(None), slice(5, 11), slice(1, 11), slice(1, 11)), (slice(None), slice(11, 18), slice(1, 10), slice(1, 10)), (slice(None), slice(18, 26), slice(2, 10), slice(2, 10)) ) + output = torch.Tensor(2, int(outputSize.sum()), 12, 12).zero_() # zero for padding + narrows = ((slice(None), slice(0, 5), slice(None), slice(None)), (slice(None), slice(5, 11), slice(1, 11), slice( + 1, 11)), (slice(None), slice(11, 18), slice(1, 10), slice(1, 10)), (slice(None), slice(18, 26), slice(2, 10), slice(2, 10))) gradInput = input.clone().zero_() for i in range(4): - conv = concat.get(i) - gradWeight = conv.gradWeight.clone() - conv.zeroGradParameters() - output[narrows[i]].copy_(conv.forward(input)) - gradInput.add_(conv.backward(input, gradOutput[narrows[i]])) - self.assertEqual(gradWeight, conv.gradWeight) + conv = concat.get(i) + gradWeight = conv.gradWeight.clone() + conv.zeroGradParameters() + output[narrows[i]].copy_(conv.forward(input)) + gradInput.add_(conv.backward(input, gradOutput[narrows[i]])) + self.assertEqual(gradWeight, conv.gradWeight) self.assertEqual(output, outputConcat) self.assertEqual(gradInput, gradInputConcat) @@ -979,7 +987,7 @@ class TestNN(NNTestCase): weight = 1 m = nn.L1Penalty(weight, False, False) - input = torch.rand(2,10).add_(-0.5) + input = torch.rand(2, 10).add_(-0.5) input[0][0] = 0 m.forward(input) @@ -988,7 +996,7 @@ class TestNN(NNTestCase): self.assertEqual(input.abs().sum() * weight, m.loss) true_grad = (input.gt(0).type_as(grad) + - input.lt(0).type_as(grad).mul_(-1)).mul_(weight) + input.lt(0).type_as(grad).mul_(-1)).mul_(weight) self.assertEqual(true_grad, grad) # Check that these don't raise errors @@ -1023,7 +1031,7 @@ class TestNN(NNTestCase): mc = nn.MultiCriterion().add(nll, 0.5).add(nll2) output = mc.forward(input, target) - output2 = nll.forward(input, target)/2 + nll2.forward(input, target) + output2 = nll.forward(input, target) / 2 + nll2.forward(input, target) self.assertEqual(output, output2) gradInput = mc.backward(input, target) @@ -1072,7 +1080,7 @@ class TestNN(NNTestCase): mse = nn.MSECriterion() pc = nn.ParallelCriterion().add(nll, 0.5).add(mse) output = pc.forward(input, target) - output2 = nll.forward(input[0], target[0])/2 + mse.forward(input[1], target[1]) + output2 = nll.forward(input[0], target[0]) / 2 + mse.forward(input[1], target[1]) self.assertEqual(output, output2) gradInput2 = [nll.backward(input[0], target[0]).clone().div(2), mse.backward(input[1], target[1])] gradInput = pc.backward(input, target) @@ -1096,7 +1104,7 @@ class TestNN(NNTestCase): mse = nn.MSECriterion() pc = nn.ParallelCriterion(True).add(mse, 0.5).add(nn.MSECriterion()) output = pc.forward(input, target) - output2 = mse.forward(input[0], target)/2 + mse.forward(input[1], target) + output2 = mse.forward(input[0], target) / 2 + mse.forward(input[1], target) self.assertEqual(output, output2) gradInput = pc.backward(input, target) gradInput2 = [mse.backward(input[0], target).clone().div(2), mse.backward(input[1], target)] @@ -1112,11 +1120,12 @@ class TestNN(NNTestCase): pc = nn.ParallelCriterion().add(nll, 0.5).add(mse) pc2 = nn.ParallelCriterion().add(nll2, 0.4).add(pc) output = pc2.forward(input, target) - output2 = nll2.forward(input[0], target[0])*0.4 + nll.forward(input[1][0], target[1][0])/2 + mse.forward(input[1][1], target[1][1]) + output2 = nll2.forward(input[0], target[0]) * 0.4 + nll.forward(input[1][0], + target[1][0]) / 2 + mse.forward(input[1][1], target[1][1]) self.assertEqual(output, output2) gradInput2 = [ - nll2.backward(input[0], target[0]).clone().mul(0.4), - [nll.backward(input[1][1], target[1][0]).clone().div(2), mse.backward(input[1][1], target[1][1])] + nll2.backward(input[0], target[0]).clone().mul(0.4), + [nll.backward(input[1][1], target[1][0]).clone().div(2), mse.backward(input[1][1], target[1][1])] ] gradInput = pc2.backward(input, target) self.assertEqual(gradInput[0], gradInput2[0]) @@ -1144,11 +1153,11 @@ class TestNN(NNTestCase): def _build_net(self): return (nn.Sequential() - .add(nn.Concat(0) - .add(nn.Linear(2, 5)) - .add(nn.Linear(2, 5))) - .add(nn.ReLU()) - .add(nn.Linear(10, 20))) + .add(nn.Concat(0) + .add(nn.Linear(2, 5)) + .add(nn.Linear(2, 5))) + .add(nn.ReLU()) + .add(nn.Linear(10, 20))) def test_parameters(self): net = self._build_net() @@ -1197,6 +1206,7 @@ class TestNN(NNTestCase): def test_apply(self): net = self._build_net() seen_modules = set() + def callback(module): self.assertNotIn(module, seen_modules) seen_modules.add(module) @@ -1206,6 +1216,7 @@ class TestNN(NNTestCase): def test_listModules(self): net = self._build_net() module_list = list() + def callback(module): module_list.append(module) net.apply(callback) @@ -1214,6 +1225,7 @@ class TestNN(NNTestCase): def test_replace(self): ref_net = self._build_net() net = self._build_net() + def callback(module): if isinstance(module, nn.ReLU): return nn.Tanh() diff --git a/test/test_multiprocessing.py b/test/test_multiprocessing.py index 3a0142a97a..1a6e0b3525 100644 --- a/test/test_multiprocessing.py +++ b/test/test_multiprocessing.py @@ -16,8 +16,8 @@ from common import TestCase, run_tests HAS_SHM_FILES = os.path.isdir('/dev/shm') TEST_CUDA_IPC = torch.cuda.is_available() and \ - sys.version_info[0] == 3 and \ - sys.platform != 'darwin' + sys.version_info[0] == 3 and \ + sys.platform != 'darwin' def simple_fill(queue, event): @@ -74,7 +74,7 @@ def autograd_sharing(queue, ready, master_modified): master_modified.wait() expected_var = torch.range(1, 25).view(5, 5) - expected_var[0,0] = 1000 + expected_var[0, 0] = 1000 is_ok = var.data.equal(expected_var) var.data[:] = torch.ones(5, 5) @@ -189,7 +189,7 @@ class TestMultiprocessing(TestCase): def _test_preserve_sharing(self, ctx=mp, repeat=1): def do_test(): x = torch.randn(5, 5) - data = [x.storage(), x.storage()[1:4], x, x[2], x[:,1]] + data = [x.storage(), x.storage()[1:4], x, x[2], x[:, 1]] q = ctx.Queue() q.put(data) new_data = q.get() @@ -268,6 +268,7 @@ class TestMultiprocessing(TestCase): def test_inherit_tensor(self): class SubProcess(mp.Process): + def __init__(self, tensor): super(SubProcess, self).__init__() self.tensor = tensor @@ -286,7 +287,6 @@ class TestMultiprocessing(TestCase): torch.cuda.FloatTensor([1]) # initialize CUDA outside of leak checker self._test_sharing(mp.get_context('spawn'), torch.cuda.FloatTensor) - @unittest.skipIf(not TEST_CUDA_IPC, 'CUDA IPC not available') def test_cuda_small_tensors(self): # Check multiple small tensors which will likely use the same @@ -359,7 +359,7 @@ class TestMultiprocessing(TestCase): queue.put(var) ready.wait() - var.data[0,0] = 1000 + var.data[0, 0] = 1000 if var.grad is not None: var.grad.data[:] = torch.ones(5, 5) * 4 master_modified.set() @@ -380,8 +380,8 @@ class TestMultiprocessing(TestCase): ] for requires_grad, volatile in configs: var = Variable(torch.range(1, 25).view(5, 5), - requires_grad=requires_grad, - volatile=volatile) + requires_grad=requires_grad, + volatile=volatile) self._test_autograd_sharing(var) def test_parameter_sharing(self): diff --git a/test/test_nn.py b/test/test_nn.py index 0e6db08fe3..e516d2170c 100644 --- a/test/test_nn.py +++ b/test/test_nn.py @@ -16,8 +16,10 @@ from common_nn import NNTestCase, ModuleTest, CriterionTest, TestBase, \ module_tests, criterion_tests, TEST_CUDA, TEST_MULTIGPU, TEST_CUDNN, PRECISION from common import freeze_rng_state, run_tests + def default_tensor_type(type): type_str = torch.typename(type) + def decorator(fn): @wraps(fn) def wrapper(*args, **kwargs): @@ -30,9 +32,12 @@ def default_tensor_type(type): return wrapper return decorator + class InputVariableMixin(object): + def _get_input(self): input = TestBase._get_input(self) + def map_variables(i): if isinstance(i, Variable): return i @@ -44,6 +49,7 @@ class InputVariableMixin(object): class NewModuleTest(InputVariableMixin, ModuleTest): + def __init__(self, *args, **kwargs): super(NewModuleTest, self).__init__(*args, **kwargs) self.cudnn = kwargs.get('cudnn', False) @@ -356,21 +362,21 @@ class TestNN(NNTestCase): def _test_dropout(self, cls, input): p = 0.2 - input.fill_(1-p) + input.fill_(1 - p) module = cls(p) input_var = Variable(input, requires_grad=True) output = module(input_var) - self.assertLess(abs(output.data.mean() - (1-p)), 0.05) + self.assertLess(abs(output.data.mean() - (1 - p)), 0.05) output.backward(input) - self.assertLess(abs(input_var.grad.data.mean() - (1-p)), 0.05) + self.assertLess(abs(input_var.grad.data.mean() - (1 - p)), 0.05) module = cls(p, True) input_var = Variable(input.clone(), requires_grad=True) output = module(input_var + 0) - self.assertLess(abs(output.data.mean() - (1-p)), 0.05) + self.assertLess(abs(output.data.mean() - (1 - p)), 0.05) output.backward(input) - self.assertLess(abs(input_var.grad.data.mean() - (1-p)), 0.05) + self.assertLess(abs(input_var.grad.data.mean() - (1 - p)), 0.05) # Check that these don't raise errors module.__repr__() @@ -379,7 +385,9 @@ class TestNN(NNTestCase): def test_parameters(self): def num_params(module): return len(list(module.parameters())) + class Net(nn.Module): + def __init__(self): super(Net, self).__init__() self.l1 = l @@ -394,6 +402,7 @@ class TestNN(NNTestCase): def test_modules(self): class Net(nn.Module): + def __init__(self): super(Net, self).__init__() self.l1 = l @@ -455,6 +464,7 @@ class TestNN(NNTestCase): def test_non_leaf_parameters(self): l1 = nn.Linear(10, 10) l2 = nn.Linear(10, 10) + def assign_weight(): l2.weight = l1.weight + 2 self.assertRaises(TypeError, assign_weight) @@ -462,8 +472,8 @@ class TestNN(NNTestCase): l2.weight = Parameter(torch.randn(10, 10)) def test_embedding_padding_idx(self): - embedding = nn.Embedding(10, 20, padding_idx = 0) - input = Variable(torch.LongTensor([[0,2,4,5],[4,3,0,9]])) + embedding = nn.Embedding(10, 20, padding_idx=0) + input = Variable(torch.LongTensor([[0, 2, 4, 5], [4, 3, 0, 9]])) output = embedding(input) self.assertEqual(output[0][0].sum().data[0], 0) self.assertEqual(output[1][2].sum().data[0], 0) @@ -493,14 +503,14 @@ class TestNN(NNTestCase): def expected_indices(dim): if dim == 1: return torch.DoubleTensor([1, 3]) - lower_dim = expected_indices(dim-1) + lower_dim = expected_indices(dim - 1) lower_dim = lower_dim.view(1, *lower_dim.size()) - return torch.cat((lower_dim+4, lower_dim+12), 0) + return torch.cat((lower_dim + 4, lower_dim + 12), 0) def expected_grad(dim): if dim == 1: return torch.DoubleTensor([0, 1, 0, 1]) - lower_dim_grad = expected_grad(dim-1) + lower_dim_grad = expected_grad(dim - 1) grad = lower_dim_grad.view(1, *lower_dim_grad.size()) zero = torch.zeros(grad.size()) return torch.cat((zero, grad, zero, grad), 0) @@ -671,7 +681,9 @@ class TestNN(NNTestCase): def test_data_parallel_nested_output(self): def fn(input): return [input, (input.sin(), input.cos(), [input.add(1)]), input] + class Net(nn.Module): + def forward(self, input): return fn(input) i = Variable(torch.randn(2, 2).float().cuda(1)) @@ -690,7 +702,9 @@ class TestNN(NNTestCase): def test_data_parallel_nested_input(self): def fn(input): return input[1][0] + class Net(nn.Module): + def forward(self, input): return fn(input) i = Variable(torch.randn(20, 3).float().cuda(1)) @@ -712,7 +726,7 @@ class TestNN(NNTestCase): def test_state_dict(self): l = nn.Linear(5, 5) block = nn.Module() - block.conv=nn.Conv2d(3, 3, 3, bias=False) + block.conv = nn.Conv2d(3, 3, 3, bias=False) net = nn.Module() net.linear1 = l net.linear2 = l @@ -781,6 +795,7 @@ class TestNN(NNTestCase): def test_parameter_assignment(self): l = nn.Linear(5, 5) + def num_params(): return len(list(l.parameters())) self.assertEqual(num_params(), 2) @@ -814,9 +829,9 @@ class TestNN(NNTestCase): # These sizes require huge cuDNN workspaces. Make sure we choose a # reasonable algorithm that does not run out of memory sizes = [ - (1, 256, 109, 175), - (1, 256, 80, 128), - (1, 256, 120, 192), + (1, 256, 109, 175), + (1, 256, 80, 128), + (1, 256, 120, 192), ] dtype = torch.cuda.FloatTensor @@ -887,7 +902,7 @@ class TestNN(NNTestCase): small_t = torch.rand(1, 1, 5, 5) for i in range(0, 4, 2): for j in range(0, 4, 2): - small_t[:,:,i,j] = 100 + small_t[:, :, i, j] = 100 output_small, indices_small = m(Variable(small_t)) for h in range(3, 10): for w in range(3, 10): @@ -900,10 +915,11 @@ class TestNN(NNTestCase): mu(output_small, indices_small, output_size=size) else: self.assertRaises(ValueError, lambda: - mu(output_small, indices_small, (h, w))) + mu(output_small, indices_small, (h, w))) def test_container_copy(self): class Model(nn.Module): + def __init__(self): super(Model, self).__init__() self.linear = nn.Linear(4, 5) @@ -955,7 +971,7 @@ class TestNN(NNTestCase): for i in range(6): hx, cx = lstm(input, (hx, cx)) - (hx+cx).sum().backward() + (hx + cx).sum().backward() @unittest.skipIf(not TEST_CUDNN, "needs cudnn") @default_tensor_type(torch.FloatTensor) # FIXME: just until torch.cuda.DoubleTensor.sum() implemented @@ -987,9 +1003,9 @@ class TestNN(NNTestCase): output, hy = rnn(input, hx) # FIXME this is because of a pytorch bug if is_lstm: - fake_loss = 0*(hy[0] + hy[1]).sum() + fake_loss = 0 * (hy[0] + hy[1]).sum() else: - fake_loss = 0*hy.sum() + fake_loss = 0 * hy.sum() loss = output.sum() + fake_loss loss.backward() @@ -1019,11 +1035,10 @@ class TestNN(NNTestCase): for (cpu_weight, gpu_weight) in zip(cpu_layer_weight, gpu_layer_weight): self.assertEqual(cpu_weight.grad.data, gpu_weight.grad.data, prec=5e-5) - for module in (nn.RNN, nn.LSTM, nn.GRU): for bias in (True, False): for bidirectional in (False, True): - for dropout in (0, 1): # Because of dropout randomness, can only compare 0 and 1 + for dropout in (0, 1): # Because of dropout randomness, can only compare 0 and 1 for batch_first in (False, True): num_directions = 2 if bidirectional else 1 if batch_first: @@ -1038,7 +1053,7 @@ class TestNN(NNTestCase): bias=bias, dropout=dropout, bidirectional=bidirectional, - batch_first = batch_first) + batch_first=batch_first) outputs_cpu = forward_backward( False, rnn, input_val, hx_val, rnn.all_weights) @@ -1049,7 +1064,7 @@ class TestNN(NNTestCase): bias=bias, dropout=dropout, bidirectional=bidirectional, - batch_first = batch_first) + batch_first=batch_first) outputs_gpu = forward_backward( True, rnn_gpu, input_val, hx_val, rnn.all_weights) @@ -1087,8 +1102,8 @@ class TestNN(NNTestCase): rnn.weight_hh_l0.data.fill_(1) rnn.weight_ih_l1.data.fill_(1) rnn.weight_hh_l1.data.fill_(1) - input = Variable(torch.Tensor(1,1,10).fill_(1)) - hx = Variable(torch.Tensor(2,1,1000).fill_(0)) + input = Variable(torch.Tensor(1, 1, 10).fill_(1)) + hx = Variable(torch.Tensor(2, 1, 1000).fill_(0)) if cuda: input = input.cuda() hx = hx.cuda() @@ -1129,8 +1144,8 @@ class TestNN(NNTestCase): rnn.train() else: rnn.eval() - input = Variable(torch.Tensor(1,1,100).uniform_()) - hx = Variable(torch.Tensor(2,1,100).uniform_()) + input = Variable(torch.Tensor(1, 1, 100).uniform_()) + hx = Variable(torch.Tensor(2, 1, 100).uniform_()) if cuda: input = input.cuda() hx = hx.cuda() @@ -1185,8 +1200,8 @@ class TestNN(NNTestCase): module = nn.BatchNorm1d(3).type(tp) module.eval() - data = Variable(torch.rand(4,3).type(tp), requires_grad=True) - grad = torch.rand(4,3).type(tp) + data = Variable(torch.rand(4, 3).type(tp), requires_grad=True) + grad = torch.rand(4, 3).type(tp) # 1st pass res1 = module(data) @@ -1210,8 +1225,8 @@ def add_test(test): raise RuntimeError('Found two tests with the same name: ' + test_name) if hasattr(TestNN, cuda_test_name): raise RuntimeError('Found two tests with the same name: ' + cuda_test_name) - setattr(TestNN, test_name, lambda self,test=test: test(self)) - setattr(TestNN, cuda_test_name, lambda self,test=test: test.test_cuda(self)) + setattr(TestNN, test_name, lambda self, test=test: test(self)) + setattr(TestNN, cuda_test_name, lambda self, test=test: test.test_cuda(self)) new_module_tests = [ @@ -1528,13 +1543,15 @@ new_module_tests = [ jacobian_input=False ), dict( - constructor=lambda: nn.FractionalMaxPool2d(2, output_ratio=0.5, _random_samples=torch.DoubleTensor(1, 3, 2).uniform_()), + constructor=lambda: nn.FractionalMaxPool2d( + 2, output_ratio=0.5, _random_samples=torch.DoubleTensor(1, 3, 2).uniform_()), input_size=(1, 3, 5, 5), fullname='FractionalMaxPool2d_ratio', test_cuda=False ), dict( - constructor=lambda: nn.FractionalMaxPool2d((2, 2), output_size=(4, 4), _random_samples=torch.DoubleTensor(1, 3, 2).uniform_()), + constructor=lambda: nn.FractionalMaxPool2d((2, 2), output_size=( + 4, 4), _random_samples=torch.DoubleTensor(1, 3, 2).uniform_()), input_size=(1, 3, 7, 7), fullname='FractionalMaxPool2d_size', test_cuda=False @@ -1596,6 +1613,7 @@ for test_params in criterion_tests: class UnpoolingNet(nn.Module): + def __init__(self, pool, unpool): super(UnpoolingNet, self).__init__() self.pool = pool diff --git a/test/test_optim.py b/test/test_optim.py index 003e47cef7..3c8dfd9105 100644 --- a/test/test_optim.py +++ b/test/test_optim.py @@ -53,7 +53,7 @@ class TestOptim(TestCase): for i in range(2000): optimizer.step(eval) old_fn(lambda _: (rosenbrock(params_t), drosenbrock(params_t)), - params_t, state) + params_t, state) self.assertEqual(params.data, params_t) self.assertLessEqual(params.data.dist(solution), initial_dist) @@ -128,8 +128,8 @@ class TestOptim(TestCase): ) # non-contiguous parameters self._test_basic_cases_template( - torch.randn(10, 5, 2)[...,0], - torch.randn(10, 2)[...,0], + torch.randn(10, 5, 2)[..., 0], + torch.randn(10, 2)[..., 0], torch.randn(5), constructor ) diff --git a/test/test_sparse.py b/test/test_sparse.py index 1b0a9c52bb..11b51eaf3f 100644 --- a/test/test_sparse.py +++ b/test/test_sparse.py @@ -11,6 +11,7 @@ SparseTensor = sparse.DoubleTensor class TestSparse(TestCase): + @staticmethod def _gen_sparse(d, nnz, with_size): v = torch.randn(nnz) @@ -19,7 +20,7 @@ class TestSparse(TestCase): x = SparseTensor(i, v) else: i = torch.rand(d, nnz) * \ - torch.Tensor(with_size).repeat(nnz, 1).transpose(0, 1) + torch.Tensor(with_size).repeat(nnz, 1).transpose(0, 1) i = i.type(torch.LongTensor) x = SparseTensor(i, v, torch.Size(with_size)) @@ -74,13 +75,13 @@ class TestSparse(TestCase): def test_contig(self): i = torch.LongTensor([ - [1, 0, 35, 14, 39, 6, 71, 66, 40, 27], + [1, 0, 35, 14, 39, 6, 71, 66, 40, 27], [92, 31, 62, 50, 22, 65, 89, 74, 56, 34], ]) v = torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) x = SparseTensor(i, v, torch.Size([100, 100])) exp_i = torch.LongTensor([ - [0, 1, 6, 14, 27, 35, 39, 40, 66, 71], + [0, 1, 6, 14, 27, 35, 39, 40, 66, 71], [31, 92, 65, 50, 34, 62, 22, 56, 74, 89], ]) exp_v = torch.Tensor([2, 1, 6, 4, 10, 3, 5, 9, 8, 7]) diff --git a/test/test_torch.py b/test/test_torch.py index 4a0591222e..0d92819b3c 100644 --- a/test/test_torch.py +++ b/test/test_torch.py @@ -15,6 +15,7 @@ if TEST_NUMPY: SIZE = 100 + def skipIfNoLapack(fn): @wraps(fn) def wrapper(*args, **kwargs): @@ -26,6 +27,7 @@ def skipIfNoLapack(fn): raise return wrapper + class TestTorch(TestCase): def test_dot(self): @@ -36,7 +38,7 @@ class TestTorch(TestCase): for tname, prec in types.items(): v1 = torch.randn(100).type(tname) v2 = torch.randn(100).type(tname) - res1 = torch.dot(v1,v2) + res1 = torch.dot(v1, v2) res2 = 0 for i, j in zip(v1, v2): res2 += i * j @@ -54,9 +56,9 @@ class TestTorch(TestCase): # non-contiguous m1 = torch.randn(*size) - res1 = torchfn(m1[:,4]) + res1 = torchfn(m1[:, 4]) res2 = res1.clone().zero_() - for i, v in enumerate(m1[:,4]): + for i, v in enumerate(m1[:, 4]): res2[i] = mathfn(v) self.assertEqual(res1, res2) @@ -112,7 +114,7 @@ class TestTorch(TestCase): def test_sigmoid(self): # TODO: why not simulate math.sigmoid like with rsqrt? - inputValues = [-1000,-1,0,0.5,1,2,1000] + inputValues = [-1000, -1, 0, 0.5, 1, 2, 1000] expectedOutput = [0.0000, 0.2689, 0.5, 0.6225, 0.7311, 0.8808, 1.000] precision_4dps = 0.0002 @@ -145,31 +147,31 @@ class TestTorch(TestCase): def _testSelection(self, torchfn, mathfn): # contiguous - m1 = torch.randn(100,100) + m1 = torch.randn(100, 100) res1 = torchfn(m1) - res2 = m1[0,0] + res2 = m1[0, 0] for i, j in iter_indices(m1): - res2 = mathfn(res2, m1[i,j]) + res2 = mathfn(res2, m1[i, j]) self.assertEqual(res1, res2) # non-contiguous - m1 = torch.randn(10,10,10) - m2 = m1[:,4] + m1 = torch.randn(10, 10, 10) + m2 = m1[:, 4] res1 = torchfn(m2) - res2 = m2[0,0] + res2 = m2[0, 0] for i, j in iter_indices(m2): res2 = mathfn(res2, m2[i][j]) self.assertEqual(res1, res2) # with indices - m1 = torch.randn(100,100) + m1 = torch.randn(100, 100) res1val, res1ind = torchfn(m1, 1) - res2val = m1[:,0:1].clone() + res2val = m1[:, 0:1].clone() res2ind = res1ind.clone().fill_(0) for i, j in iter_indices(m1): - if mathfn(res2val[i,0], m1[i,j]) != res2val[i,0]: - res2val[i,0] = m1[i,j] - res2ind[i,0] = j + if mathfn(res2val[i, 0], m1[i, j]) != res2val[i, 0]: + res2val[i, 0] = m1[i, j] + res2ind[i, 0] = j maxerr = 0 for i in range(res1val.size(0)): @@ -211,7 +213,7 @@ class TestTorch(TestCase): def test_lerp(self): def TH_lerp(a, b, weight): - return a + weight * (b-a); + return a + weight * (b - a) size = (100, 100) a = torch.rand(*size) @@ -244,10 +246,10 @@ class TestTorch(TestCase): test((5, 5)) def test_mv(self): - m1 = torch.randn(100,100) + m1 = torch.randn(100, 100) v1 = torch.randn(100) - res1 = torch.mv(m1,v1) + res1 = torch.mv(m1, v1) res2 = res1.clone().zero_() for i, j in iter_indices(m1): res2[i] += m1[i][j] * v1[j] @@ -256,51 +258,51 @@ class TestTorch(TestCase): def test_add(self): # [res] torch.add([res,] tensor1, tensor2) - m1 = torch.randn(100,100) + m1 = torch.randn(100, 100) v1 = torch.randn(100) # contiguous res1 = torch.add(m1[4], v1) res2 = res1.clone().zero_() for i in range(m1.size(1)): - res2[i] = m1[4,i] + v1[i] + res2[i] = m1[4, i] + v1[i] self.assertEqual(res1, res2) - m1 = torch.randn(100,100) + m1 = torch.randn(100, 100) v1 = torch.randn(100) # non-contiguous - res1 = torch.add(m1[:,4],v1) + res1 = torch.add(m1[:, 4], v1) res2 = res1.clone().zero_() for i in range(m1.size(0)): - res2[i] = m1[i,4] + v1[i] + res2[i] = m1[i, 4] + v1[i] self.assertEqual(res1, res2) # [res] torch.add([res,] tensor, value) - m1 = torch.randn(10,10) + m1 = torch.randn(10, 10) # contiguous res1 = m1.clone() res1[3].add_(2) res2 = m1.clone() for i in range(m1.size(1)): - res2[3,i] = res2[3,i] + 2 + res2[3, i] = res2[3, i] + 2 self.assertEqual(res1, res2) # non-contiguous - m1 = torch.randn(10,10) + m1 = torch.randn(10, 10) res1 = m1.clone() - res1[:,3].add_(2) + res1[:, 3].add_(2) res2 = m1.clone() for i in range(m1.size(0)): - res2[i,3] = res2[i,3] + 2 + res2[i, 3] = res2[i, 3] + 2 self.assertEqual(res1, res2) # [res] torch.add([res,] tensor1, value, tensor2) def test_csub(self): # with a tensor - a = torch.randn(100,90) + a = torch.randn(100, 90) b = a.clone().normal_() res_add = torch.add(a, -1, b) @@ -309,7 +311,7 @@ class TestTorch(TestCase): self.assertEqual(res_add, res_csub) # with a scalar - a = torch.randn(100,100) + a = torch.randn(100, 100) scalar = 123.5 res_add = torch.add(a, -scalar) @@ -318,7 +320,7 @@ class TestTorch(TestCase): self.assertEqual(res_add, res_csub) def test_neg(self): - a = torch.randn(100,90) + a = torch.randn(100, 90) zeros = torch.Tensor().resize_as_(a).zero_() res_add = torch.add(zeros, -1, a) @@ -327,7 +329,7 @@ class TestTorch(TestCase): self.assertEqual(res_neg, res_add) def test_reciprocal(self): - a = torch.randn(100,89) + a = torch.randn(100, 89) zeros = torch.Tensor().resize_as_(a).zero_() res_pow = torch.pow(a, -1) @@ -336,97 +338,97 @@ class TestTorch(TestCase): self.assertEqual(res_reciprocal, res_pow) def test_mul(self): - m1 = torch.randn(10,10) + m1 = torch.randn(10, 10) res1 = m1.clone() - res1[:,3].mul_(2) + res1[:, 3].mul_(2) res2 = m1.clone() for i in range(res1.size(0)): - res2[i,3] = res2[i,3] * 2 + res2[i, 3] = res2[i, 3] * 2 self.assertEqual(res1, res2) def test_div(self): - m1 = torch.randn(10,10) + m1 = torch.randn(10, 10) res1 = m1.clone() - res1[:,3].div_(2) + res1[:, 3].div_(2) res2 = m1.clone() for i in range(m1.size(0)): - res2[i,3] = res2[i,3] / 2 + res2[i, 3] = res2[i, 3] / 2 self.assertEqual(res1, res2) def test_fmod(self): - m1 = torch.Tensor(10,10).uniform_(-10., 10.) + m1 = torch.Tensor(10, 10).uniform_(-10., 10.) res1 = m1.clone() q = 2.1 - res1[:,3].fmod_(q) + res1[:, 3].fmod_(q) res2 = m1.clone() for i in range(m1.size(1)): - res2[i,3] = math.fmod(res2[i,3], q) + res2[i, 3] = math.fmod(res2[i, 3], q) self.assertEqual(res1, res2) def test_remainder(self): m1 = torch.Tensor(10, 10).uniform_(-10., 10.) res1 = m1.clone() q = 2.1 - res1[:,3].remainder_(q) + res1[:, 3].remainder_(q) res2 = m1.clone() for i in range(m1.size(0)): - res2[i,3] = res2[i,3] % q + res2[i, 3] = res2[i, 3] % q self.assertEqual(res1, res2) def test_mm(self): # helper function - def matrixmultiply(mat1,mat2): + def matrixmultiply(mat1, mat2): n = mat1.size(0) m = mat1.size(1) p = mat2.size(1) - res = torch.zeros(n,p) + res = torch.zeros(n, p) for i, j in iter_indices(res): - res[i,j] = sum(mat1[i,k] * mat2[k,j] for k in range(m)) + res[i, j] = sum(mat1[i, k] * mat2[k, j] for k in range(m)) return res # contiguous case n, m, p = 10, 10, 5 - mat1 = torch.randn(n,m) - mat2 = torch.randn(m,p) - res = torch.mm(mat1,mat2) + mat1 = torch.randn(n, m) + mat2 = torch.randn(m, p) + res = torch.mm(mat1, mat2) - res2 = matrixmultiply(mat1,mat2) + res2 = matrixmultiply(mat1, mat2) self.assertEqual(res, res2) # non contiguous case 1 n, m, p = 10, 10, 5 - mat1 = torch.randn(n,m) - mat2 = torch.randn(p,m).t() - res = torch.mm(mat1,mat2) + mat1 = torch.randn(n, m) + mat2 = torch.randn(p, m).t() + res = torch.mm(mat1, mat2) - res2 = matrixmultiply(mat1,mat2) + res2 = matrixmultiply(mat1, mat2) self.assertEqual(res, res2) # non contiguous case 2 n, m, p = 10, 10, 5 - mat1 = torch.randn(m,n).t() - mat2 = torch.randn(m,p) - res = torch.mm(mat1,mat2) + mat1 = torch.randn(m, n).t() + mat2 = torch.randn(m, p) + res = torch.mm(mat1, mat2) - res2 = matrixmultiply(mat1,mat2) + res2 = matrixmultiply(mat1, mat2) self.assertEqual(res, res2) # non contiguous case 3 n, m, p = 10, 10, 5 - mat1 = torch.randn(m,n).t() - mat2 = torch.randn(p,m).t() - res = torch.mm(mat1,mat2) + mat1 = torch.randn(m, n).t() + mat2 = torch.randn(p, m).t() + res = torch.mm(mat1, mat2) - res2 = matrixmultiply(mat1,mat2) + res2 = matrixmultiply(mat1, mat2) self.assertEqual(res, res2) # test with zero stride n, m, p = 10, 10, 5 - mat1 = torch.randn(n,m) - mat2 = torch.randn(m,1).expand(m,p) - res = torch.mm(mat1,mat2) + mat1 = torch.randn(n, m) + mat2 = torch.randn(m, 1).expand(m, p) + res = torch.mm(mat1, mat2) - res2 = matrixmultiply(mat1,mat2) + res2 = matrixmultiply(mat1, mat2) self.assertEqual(res, res2) def test_bmm(self): @@ -449,25 +451,25 @@ class TestTorch(TestCase): res = torch.bmm(b1, b2) res2 = torch.Tensor().resize_as_(res[0]).zero_() - res2.addbmm_(b1,b2) + res2.addbmm_(b1, b2) self.assertEqual(res2, res.sum(0)[0]) - res2.addbmm_(1,b1,b2) - self.assertEqual(res2, res.sum(0)[0]*2) + res2.addbmm_(1, b1, b2) + self.assertEqual(res2, res.sum(0)[0] * 2) - res2.addbmm_(1.,.5,b1,b2) - self.assertEqual(res2, res.sum(0)[0]*2.5) + res2.addbmm_(1., .5, b1, b2) + self.assertEqual(res2, res.sum(0)[0] * 2.5) - res3 = torch.addbmm(1,res2,0,b1,b2) + res3 = torch.addbmm(1, res2, 0, b1, b2) self.assertEqual(res3, res2) - res4 = torch.addbmm(1,res2,.5,b1,b2) - self.assertEqual(res4, res.sum(0)[0]*3) + res4 = torch.addbmm(1, res2, .5, b1, b2) + self.assertEqual(res4, res.sum(0)[0] * 3) - res5 = torch.addbmm(0,res2,1,b1,b2) + res5 = torch.addbmm(0, res2, 1, b1, b2) self.assertEqual(res5, res.sum(0)[0]) - res6 = torch.addbmm(.1,res2,.5,b1,b2) + res6 = torch.addbmm(.1, res2, .5, b1, b2) self.assertEqual(res6, res2 * .1 + res.sum(0) * .5) def test_baddbmm(self): @@ -478,25 +480,25 @@ class TestTorch(TestCase): res = torch.bmm(b1, b2) res2 = torch.Tensor().resize_as_(res).zero_() - res2.baddbmm_(b1,b2) + res2.baddbmm_(b1, b2) self.assertEqual(res2, res) - res2.baddbmm_(1,b1,b2) - self.assertEqual(res2, res*2) + res2.baddbmm_(1, b1, b2) + self.assertEqual(res2, res * 2) - res2.baddbmm_(1,.5,b1,b2) - self.assertEqual(res2, res*2.5) + res2.baddbmm_(1, .5, b1, b2) + self.assertEqual(res2, res * 2.5) - res3 = torch.baddbmm(1,res2,0,b1,b2) + res3 = torch.baddbmm(1, res2, 0, b1, b2) self.assertEqual(res3, res2) - res4 = torch.baddbmm(1,res2,.5,b1,b2) - self.assertEqual(res4, res*3) + res4 = torch.baddbmm(1, res2, .5, b1, b2) + self.assertEqual(res4, res * 3) - res5 = torch.baddbmm(0,res2,1,b1,b2) + res5 = torch.baddbmm(0, res2, 1, b1, b2) self.assertEqual(res5, res) - res6 = torch.baddbmm(.1,res2,.5,b1,b2) + res6 = torch.baddbmm(.1, res2, .5, b1, b2) self.assertEqual(res6, res2 * .1 + res * .5) def test_clamp(self): @@ -531,7 +533,7 @@ class TestTorch(TestCase): # base - tensor, exponent - number # contiguous - m1 = torch.randn(100,100) + m1 = torch.randn(100, 100) res1 = torch.pow(m1[4], 3) res2 = res1.clone().zero_() for i in range(res2.size(0)): @@ -539,25 +541,25 @@ class TestTorch(TestCase): self.assertEqual(res1, res2) # non-contiguous - m1 = torch.randn(100,100) - res1 = torch.pow(m1[:,4], 3) + m1 = torch.randn(100, 100) + res1 = torch.pow(m1[:, 4], 3) res2 = res1.clone().zero_() for i in range(res2.size(0)): - res2[i] = math.pow(m1[i,4], 3) + res2[i] = math.pow(m1[i, 4], 3) self.assertEqual(res1, res2) # base - number, exponent - tensor # contiguous - m1 = torch.randn(100,100) + m1 = torch.randn(100, 100) res1 = torch.pow(3, m1[4]) res2 = res1.clone().zero_() for i in range(res2.size(0)): - res2[i] = math.pow(3, m1[4,i]) + res2[i] = math.pow(3, m1[4, i]) self.assertEqual(res1, res2) # non-contiguous - m1 = torch.randn(100,100) - res1 = torch.pow(3, m1[:,4]) + m1 = torch.randn(100, 100) + res1 = torch.pow(3, m1[:, 4]) res2 = res1.clone().zero_() for i in range(res2.size(0)): res2[i] = math.pow(3, m1[i][4]) @@ -567,7 +569,7 @@ class TestTorch(TestCase): def reference_implementation(res2): for i, j in iter_indices(sm1): idx1d = i * sm1.size(0) + j - res2[i,j] = mathfn(sm1[i,j], sm2[idx1d]) + res2[i, j] = mathfn(sm1[i, j], sm2[idx1d]) return res2 # contiguous @@ -582,8 +584,8 @@ class TestTorch(TestCase): # non-contiguous m1 = torch.randn(10, 10, 10) m2 = torch.randn(10 * 10, 10 * 10) - sm1 = m1[:,4] - sm2 = m2[:,4] + sm1 = m1[:, 4] + sm2 = m2[:, 4] res1 = torchfn(sm1, sm2) res2 = reference_implementation(res1.clone()) self.assertEqual(res1, res2) @@ -649,7 +651,7 @@ class TestTorch(TestCase): def test_histc(self): x = torch.Tensor((2, 4, 2, 2, 5, 4)) - y = torch.histc(x, 5, 1, 5) # nbins, min, max + y = torch.histc(x, 5, 1, 5) # nbins, min, max z = torch.Tensor((0, 3, 0, 2, 1)) self.assertEqual(y, z) @@ -673,7 +675,7 @@ class TestTorch(TestCase): self.assertEqual(res1, res2) def test_renorm(self): - m1 = torch.randn(10,5) + m1 = torch.randn(10, 5) res1 = torch.Tensor() def renorm(matrix, value, dim, max_norm): @@ -708,9 +710,9 @@ class TestTorch(TestCase): def test_multinomial(self): # with replacement n_row = 3 - for n_col in range(4, 5+1): + for n_col in range(4, 5 + 1): prob_dist = torch.rand(n_row, n_col) - prob_dist.select(1, n_col-1).fill_(0) #index n_col shouldn't be sampled + prob_dist.select(1, n_col - 1).fill_(0) # index n_col shouldn't be sampled n_sample = n_col sample_indices = torch.multinomial(prob_dist, n_sample, True) self.assertEqual(prob_dist.dim(), 2) @@ -720,9 +722,9 @@ class TestTorch(TestCase): # without replacement n_row = 3 - for n_col in range(4, 5+1): + for n_col in range(4, 5 + 1): prob_dist = torch.rand(n_row, n_col) - prob_dist.select(1, n_col-1).fill_(0) #index n_col shouldn't be sampled + prob_dist.select(1, n_col - 1).fill_(0) # index n_col shouldn't be sampled n_sample = 3 sample_indices = torch.multinomial(prob_dist, n_sample, False) self.assertEqual(prob_dist.dim(), 2) @@ -730,9 +732,9 @@ class TestTorch(TestCase): for i in range(n_row): row_samples = {} for j in range(n_sample): - sample_idx = sample_indices[i,j] - self.assertNotEqual(sample_idx, n_col-1, - "sampled an index with zero probability") + sample_idx = sample_indices[i, j] + self.assertNotEqual(sample_idx, n_col - 1, + "sampled an index with zero probability") self.assertNotIn(sample_idx, row_samples, "sampled an index twice") row_samples[sample_idx] = True @@ -803,17 +805,17 @@ class TestTorch(TestCase): are_ordered = True for j, k in product(range(SIZE), range(1, SIZE)): - self.assertTrue(check_order(mxx[j][k-1], mxx[j][k]), - 'torch.sort ({}) values unordered for {}'.format(order, task)) + self.assertTrue(check_order(mxx[j][k - 1], mxx[j][k]), + 'torch.sort ({}) values unordered for {}'.format(order, task)) seen = set() indicesCorrect = True - size = x.size(x.dim()-1) + size = x.size(x.dim() - 1) for k in range(size): seen.clear() for j in range(size): self.assertEqual(x[k][ixx[k][j]], mxx[k][j], - 'torch.sort ({}) indices wrong for {}'.format(order, task)) + 'torch.sort ({}) indices wrong for {}'.format(order, task)) seen.add(ixx[k][j]) self.assertEqual(len(seen), size) @@ -840,18 +842,18 @@ class TestTorch(TestCase): ) # Test that we still have proper sorting with duplicate keys - x = torch.floor(torch.rand(SIZE, SIZE)*10) + x = torch.floor(torch.rand(SIZE, SIZE) * 10) torch.sort(x, out=(res2val, res2ind)) self.assertIsOrdered('ascending', x, res2val, res2ind, 'random with duplicate keys') # DESCENDING SORT x = torch.rand(SIZE, SIZE) - res1val, res1ind = torch.sort(x, x.dim()-1, True) + res1val, res1ind = torch.sort(x, x.dim() - 1, True) # Test use of result tensor res2val = torch.Tensor() res2ind = torch.LongTensor() - torch.sort(x, x.dim()-1, True, out=(res2val, res2ind)) + torch.sort(x, x.dim() - 1, True, out=(res2val, res2ind)) self.assertEqual(res1val, res2val, 0) self.assertEqual(res1ind, res2ind, 0) @@ -892,8 +894,8 @@ class TestTorch(TestCase): compareTensors(t, sortKVal, sortKInd, topKVal, topKInd, dim) t = torch.rand(random.randint(1, SIZE), - random.randint(1, SIZE), - random.randint(1, SIZE)) + random.randint(1, SIZE), + random.randint(1, SIZE)) for kTries in range(3): for dimTries in range(3): @@ -926,23 +928,23 @@ class TestTorch(TestCase): res1val, res1ind = torch.kthvalue(x, k) res2val, res2ind = torch.sort(x) - self.assertEqual(res1val[:,:,0], res2val[:,:,k-1], 0) - self.assertEqual(res1ind[:,:,0], res2ind[:,:,k-1], 0) + self.assertEqual(res1val[:, :, 0], res2val[:, :, k - 1], 0) + self.assertEqual(res1ind[:, :, 0], res2ind[:, :, k - 1], 0) # test use of result tensors k = random.randint(1, SIZE) res1val = torch.Tensor() res1ind = torch.LongTensor() torch.kthvalue(x, k, out=(res1val, res1ind)) res2val, res2ind = torch.sort(x) - self.assertEqual(res1val[:,:,0], res2val[:,:,k-1], 0) - self.assertEqual(res1ind[:,:,0], res2ind[:,:,k-1], 0) + self.assertEqual(res1val[:, :, 0], res2val[:, :, k - 1], 0) + self.assertEqual(res1ind[:, :, 0], res2ind[:, :, k - 1], 0) # test non-default dim k = random.randint(1, SIZE) res1val, res1ind = torch.kthvalue(x, k, 0) res2val, res2ind = torch.sort(x, 0) - self.assertEqual(res1val[0], res2val[k-1], 0) - self.assertEqual(res1ind[0], res2ind[k-1], 0) + self.assertEqual(res1val[0], res2val[k - 1], 0) + self.assertEqual(res1ind[0], res2ind[k - 1], 0) # non-contiguous y = x.narrow(1, 0, 1) @@ -968,7 +970,7 @@ class TestTorch(TestCase): res1val, res1ind = torch.median(x) res2val, res2ind = torch.sort(x) - ind = int(math.floor((size+1)/2) - 1) + ind = int(math.floor((size + 1) / 2) - 1) self.assertEqual(res2val.select(1, ind), res1val.select(1, 0), 0) self.assertEqual(res2val.select(1, ind), res1val.select(1, 0), 0) @@ -992,15 +994,15 @@ class TestTorch(TestCase): def test_mode(self): x = torch.range(1, SIZE * SIZE).clone().resize_(SIZE, SIZE) x[:2] = 1 - x[:,:2] = 1 + x[:, :2] = 1 x0 = x.clone() # Pre-calculated results. res1val = torch.Tensor(SIZE, 1).fill_(1) # The indices are the position of the last appearance of the mode element. res1ind = torch.LongTensor(SIZE, 1).fill_(1) - res1ind[0] = SIZE-1 - res1ind[1] = SIZE-1 + res1ind[0] = SIZE - 1 + res1ind[1] = SIZE - 1 res2val, res2ind = torch.mode(x) @@ -1124,16 +1126,16 @@ class TestTorch(TestCase): @skipIfNoLapack def test_gesv(self): - a = torch.Tensor(((6.80, -2.11, 5.66, 5.97, 8.23), - (-6.05, -3.30, 5.36, -4.44, 1.08), - (-0.45, 2.58, -2.70, 0.27, 9.04), - (8.32, 2.71, 4.35, -7.17, 2.14), - (-9.67, -5.14, -7.26, 6.08, -6.87))).t() - b = torch.Tensor(((4.02, 6.19, -8.22, -7.57, -3.03), - (-1.56, 4.00, -8.67, 1.75, 2.86), - (9.81, -4.09, -4.57, -8.61, 8.99))).t() - - res1 = torch.gesv(b,a)[0] + a = torch.Tensor(((6.80, -2.11, 5.66, 5.97, 8.23), + (-6.05, -3.30, 5.36, -4.44, 1.08), + (-0.45, 2.58, -2.70, 0.27, 9.04), + (8.32, 2.71, 4.35, -7.17, 2.14), + (-9.67, -5.14, -7.26, 6.08, -6.87))).t() + b = torch.Tensor(((4.02, 6.19, -8.22, -7.57, -3.03), + (-1.56, 4.00, -8.67, 1.75, 2.86), + (9.81, -4.09, -4.57, -8.61, 8.99))).t() + + res1 = torch.gesv(b, a)[0] self.assertLessEqual(b.dist(torch.mm(a, res1)), 1e-12) ta = torch.Tensor() tb = torch.Tensor() @@ -1195,55 +1197,55 @@ class TestTorch(TestCase): a = torch.Tensor(((1, 2, 3), (4, 5, 6), (7, 8, 10))) expected_q = torch.Tensor(( - (-1.230914909793328e-01, 9.045340337332914e-01, 4.082482904638621e-01), - (-4.923659639173310e-01, 3.015113445777629e-01, -8.164965809277264e-01), + (-1.230914909793328e-01, 9.045340337332914e-01, 4.082482904638621e-01), + (-4.923659639173310e-01, 3.015113445777629e-01, -8.164965809277264e-01), (-8.616404368553292e-01, -3.015113445777631e-01, 4.082482904638634e-01))) expected_r = torch.Tensor(( (-8.124038404635959e+00, -9.601136296387955e+00, -1.193987e+01), - ( 0.000000000000000e+00, 9.045340337332926e-01, 1.507557e+00), - ( 0.000000000000000e+00, 0.000000000000000e+00, 4.082483e-01))) + (0.000000000000000e+00, 9.045340337332926e-01, 1.507557e+00), + (0.000000000000000e+00, 0.000000000000000e+00, 4.082483e-01))) check_qr(a, expected_q, expected_r) # check rectangular thin a = torch.Tensor(( - ( 1, 2, 3), - ( 4, 5, 6), - ( 7, 8, 9), - (10, 11, 13), - )) + (1, 2, 3), + (4, 5, 6), + (7, 8, 9), + (10, 11, 13), + )) expected_q = torch.Tensor(( - (-0.0776150525706334, -0.833052161400748 , 0.3651483716701106), + (-0.0776150525706334, -0.833052161400748, 0.3651483716701106), (-0.3104602102825332, -0.4512365874254053, -0.1825741858350556), (-0.5433053679944331, -0.0694210134500621, -0.7302967433402217), - (-0.7761505257063329, 0.3123945605252804, 0.5477225575051663) + (-0.7761505257063329, 0.3123945605252804, 0.5477225575051663) )) expected_r = torch.Tensor(( (-12.8840987267251261, -14.5916298832790581, -17.0753115655393231), - ( 0, -1.0413152017509357, -1.770235842976589 ), - ( 0, 0, 0.5477225575051664) + (0, -1.0413152017509357, -1.770235842976589), + (0, 0, 0.5477225575051664) )) check_qr(a, expected_q, expected_r) # check rectangular fat a = torch.Tensor(( - (1, 2, 3, 4), - (5, 6, 7, 8), - (9, 10, 11, 13) - )) + (1, 2, 3, 4), + (5, 6, 7, 8), + (9, 10, 11, 13) + )) expected_q = torch.Tensor(( - (-0.0966736489045663, 0.907737593658436 , 0.4082482904638653), - (-0.4833682445228317, 0.3157348151855452, -0.8164965809277254), - (-0.870062840141097 , -0.2762679632873518, 0.4082482904638621) + (-0.0966736489045663, 0.907737593658436, 0.4082482904638653), + (-0.4833682445228317, 0.3157348151855452, -0.8164965809277254), + (-0.870062840141097, -0.2762679632873518, 0.4082482904638621) )) expected_r = torch.Tensor(( - ( -1.0344080432788603e+01, -1.1794185166357092e+01, - -1.3244289899925587e+01, -1.5564457473635180e+01), - ( 0.0000000000000000e+00, 9.4720444555662542e-01, - 1.8944088911132546e+00, 2.5653453733825331e+00), - ( 0.0000000000000000e+00, 0.0000000000000000e+00, - 1.5543122344752192e-15, 4.0824829046386757e-01) + (-1.0344080432788603e+01, -1.1794185166357092e+01, + -1.3244289899925587e+01, -1.5564457473635180e+01), + (0.0000000000000000e+00, 9.4720444555662542e-01, + 1.8944088911132546e+00, 2.5653453733825331e+00), + (0.0000000000000000e+00, 0.0000000000000000e+00, + 1.5543122344752192e-15, 4.0824829046386757e-01) )) check_qr(a, expected_q, expected_r) @@ -1272,14 +1274,14 @@ class TestTorch(TestCase): @skipIfNoLapack def test_trtrs(self): - a = torch.Tensor(((6.80, -2.11, 5.66, 5.97, 8.23), - (-6.05, -3.30, 5.36, -4.44, 1.08), - (-0.45, 2.58, -2.70, 0.27, 9.04), - (8.32, 2.71, 4.35, -7.17, 2.14), - (-9.67, -5.14, -7.26, 6.08, -6.87))).t() - b = torch.Tensor(((4.02, 6.19, -8.22, -7.57, -3.03), - (-1.56, 4.00, -8.67, 1.75, 2.86), - (9.81, -4.09, -4.57, -8.61, 8.99))).t() + a = torch.Tensor(((6.80, -2.11, 5.66, 5.97, 8.23), + (-6.05, -3.30, 5.36, -4.44, 1.08), + (-0.45, 2.58, -2.70, 0.27, 9.04), + (8.32, 2.71, 4.35, -7.17, 2.14), + (-9.67, -5.14, -7.26, 6.08, -6.87))).t() + b = torch.Tensor(((4.02, 6.19, -8.22, -7.57, -3.03), + (-1.56, 4.00, -8.67, 1.75, 2.86), + (9.81, -4.09, -4.57, -8.61, 8.99))).t() U = torch.triu(a) L = torch.tril(a) @@ -1317,7 +1319,7 @@ class TestTorch(TestCase): self.assertLessEqual(x.dist(y), 1e-12) # test reuse - res1 = torch.trtrs(b,a)[0] + res1 = torch.trtrs(b, a)[0] ta = torch.Tensor() tb = torch.Tensor() torch.trtrs(b, a, out=(tb, ta)) @@ -1352,42 +1354,42 @@ class TestTorch(TestCase): # basic test expectedNorm = 0 - a = torch.Tensor(((1.44, -9.96, -7.55, 8.34), - (-7.84, -0.28, 3.24, 8.09), - (-4.39, -3.24, 6.27, 5.28), - (4.53, 3.83, -6.64, 2.06))).t() - b = torch.Tensor(((8.58, 8.26, 8.48, -5.28), - (9.35, -4.43, -0.70, -0.26))).t() + a = torch.Tensor(((1.44, -9.96, -7.55, 8.34), + (-7.84, -0.28, 3.24, 8.09), + (-4.39, -3.24, 6.27, 5.28), + (4.53, 3.83, -6.64, 2.06))).t() + b = torch.Tensor(((8.58, 8.26, 8.48, -5.28), + (9.35, -4.43, -0.70, -0.26))).t() _test(a, b, expectedNorm) # test overderemined expectedNorm = 17.390200628863 - a = torch.Tensor(((1.44, -9.96, -7.55, 8.34, 7.08, -5.45), - (-7.84, -0.28, 3.24, 8.09, 2.52, -5.70), - (-4.39, -3.24, 6.27, 5.28, 0.74, -1.19), - (4.53, 3.83, -6.64, 2.06, -2.47, 4.70))).t() - b = torch.Tensor(((8.58, 8.26, 8.48, -5.28, 5.72, 8.93), - (9.35, -4.43, -0.70, -0.26, -7.36, -2.52))).t() + a = torch.Tensor(((1.44, -9.96, -7.55, 8.34, 7.08, -5.45), + (-7.84, -0.28, 3.24, 8.09, 2.52, -5.70), + (-4.39, -3.24, 6.27, 5.28, 0.74, -1.19), + (4.53, 3.83, -6.64, 2.06, -2.47, 4.70))).t() + b = torch.Tensor(((8.58, 8.26, 8.48, -5.28, 5.72, 8.93), + (9.35, -4.43, -0.70, -0.26, -7.36, -2.52))).t() _test(a, b, expectedNorm) # test underdetermined expectedNorm = 0 a = torch.Tensor(((1.44, -9.96, -7.55), - (-7.84, -0.28, 3.24), - (-4.39, -3.24, 6.27), - (4.53, 3.83, -6.64))).t() - b = torch.Tensor(((8.58, 8.26, 8.48), - (9.35, -4.43, -0.70))).t() + (-7.84, -0.28, 3.24), + (-4.39, -3.24, 6.27), + (4.53, 3.83, -6.64))).t() + b = torch.Tensor(((8.58, 8.26, 8.48), + (9.35, -4.43, -0.70))).t() _test(a, b, expectedNorm) # test reuse expectedNorm = 0 - a = torch.Tensor(((1.44, -9.96, -7.55, 8.34), - (-7.84, -0.28, 3.24, 8.09), - (-4.39, -3.24, 6.27, 5.28), - (4.53, 3.83, -6.64, 2.06))).t() - b = torch.Tensor(((8.58, 8.26, 8.48, -5.28), - (9.35, -4.43, -0.70, -0.26))).t() + a = torch.Tensor(((1.44, -9.96, -7.55, 8.34), + (-7.84, -0.28, 3.24, 8.09), + (-4.39, -3.24, 6.27, 5.28), + (4.53, 3.83, -6.64, 2.06))).t() + b = torch.Tensor(((8.58, 8.26, 8.48, -5.28), + (9.35, -4.43, -0.70, -0.26))).t() ta = torch.Tensor() tb = torch.Tensor() torch.gels(b, a, out=(tb, ta)) @@ -1399,11 +1401,11 @@ class TestTorch(TestCase): @skipIfNoLapack def test_eig(self): - a = torch.Tensor(((1.96, 0.00, 0.00, 0.00, 0.00), - (-6.49, 3.80, 0.00, 0.00, 0.00), - (-0.47, -6.39, 4.17, 0.00, 0.00), - (-7.20, 1.50, -1.51, 5.70, 0.00), - (-0.65, -6.34, 2.67, 1.80, -7.10))).t().contiguous() + a = torch.Tensor(((1.96, 0.00, 0.00, 0.00, 0.00), + (-6.49, 3.80, 0.00, 0.00, 0.00), + (-0.47, -6.39, 4.17, 0.00, 0.00), + (-7.20, 1.50, -1.51, 5.70, 0.00), + (-0.65, -6.34, 2.67, 1.80, -7.10))).t().contiguous() e = torch.eig(a)[0] ee, vv = torch.eig(a, True) te = torch.Tensor() @@ -1416,9 +1418,9 @@ class TestTorch(TestCase): self.assertEqual(vv, tv, 1e-12) # test reuse - X = torch.randn(4,4) + X = torch.randn(4, 4) X = torch.mm(X.t(), X) - e, v = torch.zeros(4,2), torch.zeros(4,4) + e, v = torch.zeros(4, 2), torch.zeros(4, 4) torch.eig(X, True, out=(e, v)) Xhat = torch.mm(torch.mm(v, torch.diag(e.select(1, 0))), v.t()) self.assertEqual(X, Xhat, 1e-8, 'VeV\' wrong') @@ -1432,8 +1434,8 @@ class TestTorch(TestCase): # test non-contiguous X = torch.randn(4, 4) X = torch.mm(X.t(), X) - e = torch.zeros(4, 2, 2)[:,1] - v = torch.zeros(4, 2, 4)[:,1] + e = torch.zeros(4, 2, 2)[:, 1] + v = torch.zeros(4, 2, 4)[:, 1] self.assertFalse(v.is_contiguous(), 'V is contiguous') self.assertFalse(e.is_contiguous(), 'E is contiguous') torch.eig(X, True, out=(e, v)) @@ -1442,10 +1444,10 @@ class TestTorch(TestCase): @skipIfNoLapack def test_symeig(self): - xval = torch.rand(100,3) + xval = torch.rand(100, 3) cov = torch.mm(xval.t(), xval) rese = torch.zeros(3) - resv = torch.zeros(3,3) + resv = torch.zeros(3, 3) # First call to symeig self.assertTrue(resv.is_contiguous(), 'resv is not contiguous') @@ -1463,7 +1465,7 @@ class TestTorch(TestCase): X = torch.rand(5, 5) X = X.t() * X e = torch.zeros(4, 2).select(1, 1) - v = torch.zeros(4, 2, 4)[:,1] + v = torch.zeros(4, 2, 4)[:, 1] self.assertFalse(v.is_contiguous(), 'V is contiguous') self.assertFalse(e.is_contiguous(), 'E is contiguous') torch.symeig(X, True, out=(e, v)) @@ -1472,11 +1474,11 @@ class TestTorch(TestCase): @skipIfNoLapack def test_svd(self): - a=torch.Tensor(((8.79, 6.11, -9.15, 9.57, -3.49, 9.84), - (9.93, 6.91, -7.93, 1.64, 4.02, 0.15), - (9.83, 5.04, 4.86, 8.83, 9.80, -8.99), - (5.45, -0.27, 4.85, 0.74, 10.00, -6.02), - (3.16, 7.98, 3.01, 5.80, 4.27, -5.31))).t().clone() + a = torch.Tensor(((8.79, 6.11, -9.15, 9.57, -3.49, 9.84), + (9.93, 6.91, -7.93, 1.64, 4.02, 0.15), + (9.83, 5.04, 4.86, 8.83, 9.80, -8.99), + (5.45, -0.27, 4.85, 0.74, 10.00, -6.02), + (3.16, 7.98, 3.01, 5.80, 4.27, -5.31))).t().clone() u, s, v = torch.svd(a) uu = torch.Tensor() ss = torch.Tensor() @@ -1502,9 +1504,9 @@ class TestTorch(TestCase): # test non-contiguous X = torch.randn(5, 5) - U = torch.zeros(5, 2, 5)[:,1] - S = torch.zeros(5, 2)[:,1] - V = torch.zeros(5, 2, 5)[:,1] + U = torch.zeros(5, 2, 5)[:, 1] + S = torch.zeros(5, 2)[:, 1] + V = torch.zeros(5, 2, 5)[:, 1] self.assertFalse(U.is_contiguous(), 'U is contiguous') self.assertFalse(S.is_contiguous(), 'S is contiguous') @@ -1515,7 +1517,7 @@ class TestTorch(TestCase): @skipIfNoLapack def test_inverse(self): - M = torch.randn(5,5) + M = torch.randn(5, 5) MI = torch.inverse(M) E = torch.eye(5) self.assertFalse(MI.is_contiguous(), 'MI is contiguous') @@ -1542,9 +1544,9 @@ class TestTorch(TestCase): ki = k.clone() ks = k.storage() kis = ki.storage() - for i in range(ks.size()-1, 0, -1): - kis[ks.size()-i+1] = ks[i] - #for i=ks.size(), 1, -1 do kis[ks.size()-i+1]=ks[i] end + for i in range(ks.size() - 1, 0, -1): + kis[ks.size() - i + 1] = ks[i] + # for i=ks.size(), 1, -1 do kis[ks.size()-i+1]=ks[i] end imvx = torch.xcorr2(x, ki) imvx2 = torch.xcorr2(x, ki, 'V') imfx = torch.xcorr2(x, ki, 'F') @@ -1575,20 +1577,20 @@ class TestTorch(TestCase): @unittest.skip("Not implemented yet") def test_conv3(self): x = torch.rand(math.floor(torch.uniform(20, 40)), - math.floor(torch.uniform(20, 40)), - math.floor(torch.uniform(20, 40))) + math.floor(torch.uniform(20, 40)), + math.floor(torch.uniform(20, 40))) k = torch.rand(math.floor(torch.uniform(5, 10)), - math.floor(torch.uniform(5, 10)), - math.floor(torch.uniform(5, 10))) + math.floor(torch.uniform(5, 10)), + math.floor(torch.uniform(5, 10))) imvc = torch.conv3(x, k) imvc2 = torch.conv3(x, k, 'V') imfc = torch.conv3(x, k, 'F') - ki = k.clone(); + ki = k.clone() ks = k.storage() kis = ki.storage() - for i in range(ks.size()-1, 0, -1): - kis[ks.size()-i+1] = ks[i] + for i in range(ks.size() - 1, 0, -1): + kis[ks.size() - i + 1] = ks[i] imvx = torch.xcorr3(x, ki) imvx2 = torch.xcorr3(x, ki, 'V') imfx = torch.xcorr3(x, ki, 'F') @@ -1638,7 +1640,7 @@ class TestTorch(TestCase): def reference(x, k, o3, o32): for i in range(o3.size(1)): for j in range(k.size(1)): - o32[i].add(torch.xcorr2(x[i+j-1], k[j])) + o32[i].add(torch.xcorr2(x[i + j - 1], k[j])) self._test_conv_corr_eq(lambda x, k: torch.xcorr3(x, k), reference) @unittest.skip("Not implemented yet") @@ -1654,7 +1656,7 @@ class TestTorch(TestCase): def reference(x, k, o3, o32): for i in range(o3.size(1)): for j in range(k.size(1)): - o32[i].add(torch.conv2(x[i+j-1], k[k.size(1)-j+1])) + o32[i].add(torch.conv2(x[i + j - 1], k[k.size(1) - j + 1])) self._test_conv_corr_eq(lambda x, k: torch.conv3(x, k), reference) @unittest.skip("Not implemented yet") @@ -1662,7 +1664,7 @@ class TestTorch(TestCase): def reference(x, k, o3, o32): for i in range(o3.size(1)): for j in range(k.size(1)): - o32[i+j-1].add(torch.conv2(x[i], k[j], 'F')) + o32[i + j - 1].add(torch.conv2(x[i], k[j], 'F')) self._test_conv_corr_eq(lambda x, k: torch.conv3(x, k, 'F'), reference) def test_logical(self): @@ -1714,9 +1716,9 @@ class TestTorch(TestCase): torch.manual_seed(123) reseeded = torch.randn(odd_number) self.assertEqual(midstream, repeat_midstream, 0, - 'get_rng_state/set_rng_state not generating same sequence of normally distributed numbers') + 'get_rng_state/set_rng_state not generating same sequence of normally distributed numbers') self.assertEqual(seeded, reseeded, 0, - 'repeated calls to manual_seed not generating same sequence of normally distributed numbers') + 'repeated calls to manual_seed not generating same sequence of normally distributed numbers') def test_manual_seed(self): rng_state = torch.get_rng_state() @@ -1750,14 +1752,14 @@ class TestTorch(TestCase): @skipIfNoLapack def test_potrs(self): - a=torch.Tensor(((6.80, -2.11, 5.66, 5.97, 8.23), - (-6.05, -3.30, 5.36, -4.44, 1.08), - (-0.45, 2.58, -2.70, 0.27, 9.04), - (8.32, 2.71, 4.35, -7.17, 2.14), - (-9.67, -5.14, -7.26, 6.08, -6.87))).t() - b=torch.Tensor(((4.02, 6.19, -8.22, -7.57, -3.03), - (-1.56, 4.00, -8.67, 1.75, 2.86), - (9.81, -4.09, -4.57, -8.61, 8.99))).t() + a = torch.Tensor(((6.80, -2.11, 5.66, 5.97, 8.23), + (-6.05, -3.30, 5.36, -4.44, 1.08), + (-0.45, 2.58, -2.70, 0.27, 9.04), + (8.32, 2.71, 4.35, -7.17, 2.14), + (-9.67, -5.14, -7.26, 6.08, -6.87))).t() + b = torch.Tensor(((4.02, 6.19, -8.22, -7.57, -3.03), + (-1.56, 4.00, -8.67, 1.75, 2.86), + (9.81, -4.09, -4.57, -8.61, 8.99))).t() # make sure 'a' is symmetric PSD a = torch.mm(a, a.t()) @@ -1774,11 +1776,11 @@ class TestTorch(TestCase): @skipIfNoLapack def tset_potri(self): - a=torch.Tensor(((6.80, -2.11, 5.66, 5.97, 8.23), - (-6.05, -3.30, 5.36, -4.44, 1.08), - (-0.45, 2.58, -2.70, 0.27, 9.04), - (8.32, 2.71, 4.35, -7.17, 2.14), - (-9.67, -5.14, -7.26, 6.08, -6.87))).t() + a = torch.Tensor(((6.80, -2.11, 5.66, 5.97, 8.23), + (-6.05, -3.30, 5.36, -4.44, 1.08), + (-0.45, 2.58, -2.70, 0.27, 9.04), + (8.32, 2.71, 4.35, -7.17, 2.14), + (-9.67, -5.14, -7.26, 6.08, -6.87))).t() # make sure 'a' is symmetric PSD a = a * a.t() @@ -1839,8 +1841,8 @@ class TestTorch(TestCase): def test_numel(self): b = torch.ByteTensor(3, 100, 100) - self.assertEqual(b.nelement(), 3*100*100) - self.assertEqual(b.numel(), 3*100*100) + self.assertEqual(b.nelement(), 3 * 100 * 100) + self.assertEqual(b.numel(), 3 * 100 * 100) def _consecutive(self, size, start=1): sequence = torch.ones(int(torch.Tensor(size).prod(0)[0])).cumsum(0) @@ -1889,6 +1891,7 @@ class TestTorch(TestCase): def test_newindex(self): reference = self._consecutive((3, 3, 3)) # This relies on __index__() being correct - but we have separate tests for that + def checkPartialAssign(index): reference = torch.zeros(3, 3, 3) reference[index] = self._consecutive((3, 3, 3))[index] @@ -1968,7 +1971,7 @@ class TestTorch(TestCase): for j in range(1 if dim == 1 else n): for k in range(1 if dim == 2 else o): ii = [i, j, k] - ii[dim] = slice(0, idx.size(dim)+1) + ii[dim] = slice(0, idx.size(dim) + 1) idx[tuple(ii)] = torch.randperm(dim_size)[0:elems_per_row] def test_gather(self): @@ -1988,8 +1991,8 @@ class TestTorch(TestCase): for j in range(idx_size[1]): for k in range(idx_size[2]): ii = [i, j, k] - ii[dim] = idx[i,j,k] - expected[i,j,k] = src[tuple(ii)] + ii[dim] = idx[i, j, k] + expected[i, j, k] = src[tuple(ii)] self.assertEqual(actual, expected, 0) idx[0][0][0] = 23 @@ -2017,8 +2020,8 @@ class TestTorch(TestCase): for j in range(idx_size[1]): for k in range(idx_size[2]): ii = [i, j, k] - ii[dim] = idx[i,j,k] - expected[tuple(ii)] = src[i,j,k] + ii[dim] = idx[i, j, k] + expected[tuple(ii)] = src[i, j, k] self.assertEqual(actual, expected, 0) idx[0][0][0] = 34 @@ -2041,7 +2044,7 @@ class TestTorch(TestCase): for j in range(idx_size[1]): for k in range(idx_size[2]): ii = [i, j, k] - ii[dim] = idx[i,j,k] + ii[dim] = idx[i, j, k] expected[tuple(ii)] = val self.assertEqual(actual, expected, 0) @@ -2109,7 +2112,7 @@ class TestTorch(TestCase): self.assertEqual(res.abs(), data, 1e-16) # Checking that the right abs function is called for LongTensor - bignumber = 2^31 + 1 + bignumber = 2 ^ 31 + 1 res = torch.LongTensor((-bignumber,)) self.assertGreater(res.abs()[0], 0) @@ -2125,7 +2128,7 @@ class TestTorch(TestCase): self.assertEqual(tensor.view(3, -1).size(), target) tensor_view = tensor.view(5, 3) tensor_view.fill_(random.uniform(0, 1)) - self.assertEqual((tensor_view-tensor).abs().max(), 0) + self.assertEqual((tensor_view - tensor).abs().max(), 0) self.assertEqual(empty.view_as(empty), empty) self.assertEqual(empty.view(0), empty) @@ -2150,7 +2153,7 @@ class TestTorch(TestCase): self.assertEqual(result.size(), target, 'Error in repeat using result') result = tensor.repeat(torchSize) self.assertEqual(result.size(), target, 'Error in repeat using result and LongStorage') - self.assertEqual((result.mean(0).view(8, 4)-tensor).abs().max(), 0, 'Error in repeat (not equal)') + self.assertEqual((result.mean(0).view(8, 4) - tensor).abs().max(), 0, 'Error in repeat (not equal)') def test_is_same_size(self): t1 = torch.Tensor(3, 4, 9, 10) @@ -2172,8 +2175,8 @@ class TestTorch(TestCase): self.assertTrue(t3.is_set_to(t1), "is_set_to should be symmetric") self.assertFalse(t1.is_set_to(t4)) self.assertFalse(torch.Tensor().is_set_to(torch.Tensor()), - "Tensors with no storages should not appear to be set " - "to each other") + "Tensors with no storages should not appear to be set " + "to each other") def test_tensor_set(self): t1 = torch.Tensor() @@ -2208,7 +2211,7 @@ class TestTorch(TestCase): # Non contiguous, 2D s = torch.Tensor(((1, 2, 3, 4), (5, 6, 7, 8))) - s1 = s[:,1:3] + s1 = s[:, 1:3] s2 = s1.clone() s3 = torch.Tensor(((2, 3), (6, 7))) s4 = torch.Tensor(((0, 0), (0, 0))) @@ -2222,20 +2225,20 @@ class TestTorch(TestCase): self.assertFalse(torch.equal(s1, s4)) def test_element_size(self): - byte = torch.ByteStorage().element_size() - char = torch.CharStorage().element_size() - short = torch.ShortStorage().element_size() - int = torch.IntStorage().element_size() - long = torch.LongStorage().element_size() - float = torch.FloatStorage().element_size() + byte = torch.ByteStorage().element_size() + char = torch.CharStorage().element_size() + short = torch.ShortStorage().element_size() + int = torch.IntStorage().element_size() + long = torch.LongStorage().element_size() + float = torch.FloatStorage().element_size() double = torch.DoubleStorage().element_size() - self.assertEqual(byte, torch.ByteTensor().element_size()) - self.assertEqual(char, torch.CharTensor().element_size()) - self.assertEqual(short, torch.ShortTensor().element_size()) - self.assertEqual(int, torch.IntTensor().element_size()) - self.assertEqual(long, torch.LongTensor().element_size()) - self.assertEqual(float, torch.FloatTensor().element_size()) + self.assertEqual(byte, torch.ByteTensor().element_size()) + self.assertEqual(char, torch.CharTensor().element_size()) + self.assertEqual(short, torch.ShortTensor().element_size()) + self.assertEqual(int, torch.IntTensor().element_size()) + self.assertEqual(long, torch.LongTensor().element_size()) + self.assertEqual(float, torch.FloatTensor().element_size()) self.assertEqual(double, torch.DoubleTensor().element_size()) self.assertGreater(byte, 0) @@ -2366,12 +2369,12 @@ class TestTorch(TestCase): # This test will allow through some False positives. It only checks # that the elements flagged positive are indeed non-zero. for i in range(dst1.size(0)): - self.assertNotEqual(tensor[dst1[i,0], dst1[i,1]], 0) + self.assertNotEqual(tensor[dst1[i, 0], dst1[i, 1]], 0) elif len(shape) == 3: - # This test will allow through some False positives. It only checks - # that the elements flagged positive are indeed non-zero. + # This test will allow through some False positives. It only checks + # that the elements flagged positive are indeed non-zero. for i in range(dst1.size(0)): - self.assertNotEqual(tensor[dst1[i,0], dst1[i,1], dst1[i,2]], 0) + self.assertNotEqual(tensor[dst1[i, 0], dst1[i, 1], dst1[i, 2]], 0) def test_deepcopy(self): from copy import deepcopy @@ -2444,8 +2447,8 @@ class TestTorch(TestCase): std = torch.Tensor(100, 100) mean[:50] = 0 mean[50:] = 1 - std[:,:50] = 4 - std[:,50:] = 1 + std[:, :50] = 4 + std[:, 50:] = 1 r = torch.normal(mean) self.assertEqual(r[:50].mean(), 0, 0.2) @@ -2459,14 +2462,14 @@ class TestTorch(TestCase): r = torch.normal(2, std) self.assertEqual(r.mean(), 2, 0.2) - self.assertEqual(r[:,:50].std(), 4, 0.3) - self.assertEqual(r[:,50:].std(), 1, 0.2) + self.assertEqual(r[:, :50].std(), 4, 0.3) + self.assertEqual(r[:, 50:].std(), 1, 0.2) r = torch.normal(mean, std) self.assertEqual(r[:50].mean(), 0, 0.2) self.assertEqual(r[50:].mean(), 1, 0.2) - self.assertEqual(r[:,:50].std(), 4, 0.3) - self.assertEqual(r[:,50:].std(), 1, 0.2) + self.assertEqual(r[:, :50].std(), 4, 0.3) + self.assertEqual(r[:, 50:].std(), 1, 0.2) def test_serialization(self): a = [torch.randn(5, 5).float() for i in range(2)] @@ -2552,7 +2555,7 @@ class TestTorch(TestCase): obj.__repr__() str(obj) for t in torch._storage_classes: - if t.is_cuda and not torch.cuda.is_available(): + if t.is_cuda and not torch.cuda.is_available(): continue obj = t(100).fill_(1) obj.__repr__() @@ -2633,7 +2636,7 @@ class TestTorch(TestCase): # 1D > 0 storage offset xm = torch.randn(sz * 2).mul(255).type(tp) - x = xm.narrow(0, sz-1, sz) + x = xm.narrow(0, sz - 1, sz) self.assertTrue(x.storage_offset() > 0) y = x.numpy() for i in range(sz): @@ -2658,7 +2661,7 @@ class TestTorch(TestCase): # with storage offset xm = torch.randn(sz1 * 2, sz2).mul(255).type(tp) - x = xm.narrow(0, sz1-1, sz1) + x = xm.narrow(0, sz1 - 1, sz1) y = x.numpy() self.assertTrue(x.storage_offset() > 0) check2d(x, y) @@ -2670,14 +2673,14 @@ class TestTorch(TestCase): # with storage offset xm = torch.randn(sz2 * 2, sz1).mul(255).type(tp) - x = xm.narrow(0, sz2-1, sz2).t() + x = xm.narrow(0, sz2 - 1, sz2).t() y = x.numpy() self.assertTrue(x.storage_offset() > 0) check2d(x, y) # non-contiguous 2D with holes xm = torch.randn(sz2 * 2, sz1 * 2).mul(255).type(tp) - x = xm.narrow(0, sz2-1, sz2).narrow(1, sz1-1, sz1).t() + x = xm.narrow(0, sz2 - 1, sz2).narrow(1, sz1 - 1, sz1).t() y = x.numpy() self.assertTrue(x.storage_offset() > 0) check2d(x, y) diff --git a/test/test_utils.py b/test/test_utils.py index 2b842a9837..bf6946e67a 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -28,7 +28,9 @@ try: except ImportError: HAS_CFFI = False + class SimplePlugin(Plugin): + def __init__(self, interval): super(SimplePlugin, self).__init__(interval) self.trainer = None @@ -58,6 +60,7 @@ class SimplePlugin(Plugin): class ModelMock(object): + def __init__(self): self.num_calls = 0 self.output = Variable(torch.ones(1, 1), requires_grad=True) @@ -68,6 +71,7 @@ class ModelMock(object): class CriterionMock(object): + def __init__(self): self.num_calls = 0 @@ -95,6 +99,7 @@ class OptimizerMock(object): class DatasetMock(object): + def __iter__(self): for i in range(10): yield torch.randn(2, 10), torch.randperm(10)[:2] @@ -183,6 +188,7 @@ class TestTrainer(TestCase): test_dir = os.path.abspath(os.path.dirname(str(__file__))) + class TestFFI(TestCase): def setUp(self): @@ -196,13 +202,13 @@ class TestFFI(TestCase): @unittest.skipIf(not HAS_CFFI, "ffi tests require cffi package") def test_cpu(self): compile_extension( - name='test_extensions.cpulib', - header=test_dir + '/ffi/src/cpu/lib.h', - sources=[ - test_dir + '/ffi/src/cpu/lib1.c', - test_dir + '/ffi/src/cpu/lib2.c', - ], - verbose=False, + name='test_extensions.cpulib', + header=test_dir + '/ffi/src/cpu/lib.h', + sources=[ + test_dir + '/ffi/src/cpu/lib1.c', + test_dir + '/ffi/src/cpu/lib2.c', + ], + verbose=False, ) from test_extensions import cpulib tensor = torch.ones(2, 2).float() @@ -217,20 +223,20 @@ class TestFFI(TestCase): self.assertIs(type(f), float) self.assertRaises(TypeError, - lambda: cpulib.good_func(tensor.double(), 2, 1.5)) + lambda: cpulib.good_func(tensor.double(), 2, 1.5)) self.assertRaises(torch.FatalError, - lambda: cpulib.bad_func(tensor, 2, 1.5)) + lambda: cpulib.bad_func(tensor, 2, 1.5)) @unittest.skipIf(not HAS_CFFI or not HAS_CUDA, "ffi tests require cffi package") def test_gpu(self): compile_extension( - name='gpulib', - header=test_dir + '/ffi/src/cuda/cudalib.h', - sources=[ - test_dir + '/ffi/src/cuda/cudalib.c', - ], - with_cuda=True, - verbose=False, + name='gpulib', + header=test_dir + '/ffi/src/cuda/cudalib.h', + sources=[ + test_dir + '/ffi/src/cuda/cudalib.c', + ], + with_cuda=True, + verbose=False, ) import gpulib tensor = torch.ones(2, 2).float() @@ -243,9 +249,9 @@ class TestFFI(TestCase): self.assertEqual(ctensor, torch.ones(2, 2) * 2 + 1.5) self.assertRaises(TypeError, - lambda: gpulib.cuda_func(tensor, 2, 1.5)) + lambda: gpulib.cuda_func(tensor, 2, 1.5)) self.assertRaises(TypeError, - lambda: gpulib.cuda_func(ctensor.storage(), 2, 1.5)) + lambda: gpulib.cuda_func(ctensor.storage(), 2, 1.5)) class TestLuaReader(TestCase): @@ -320,7 +326,7 @@ class TestLuaReader(TestCase): cls._download_data(test_file_path) except urllib.URLError as e: warnings.warn(("Couldn't download the test file for TestLuaReader! " - "Tests will be incomplete!"), RuntimeWarning) + "Tests will be incomplete!"), RuntimeWarning) return tests = load_lua(test_file_path) diff --git a/tools/cwrap/cwrap.py b/tools/cwrap/cwrap.py index 0f5f26a7fb..b763a0ed59 100644 --- a/tools/cwrap/cwrap.py +++ b/tools/cwrap/cwrap.py @@ -20,13 +20,14 @@ class cwrap(object): """) OPTION_CODE_TEMPLATE = [ - '$call', - '$return_result', + '$call', + '$return_result', ] FUNCTION_CALL_TEMPLATE = Template("$capture_result$cname($arg_unpack);") - DEFAULT_PLUGIN_CLASSES = [ArgcountChecker, ConstantArguments, OptionalArguments, ArgumentReferences, BeforeAfterCall, ReturnArguments, GILRelease] + DEFAULT_PLUGIN_CLASSES = [ArgcountChecker, ConstantArguments, OptionalArguments, + ArgumentReferences, BeforeAfterCall, ReturnArguments, GILRelease] def __init__(self, source, destination=None, plugins=[], default_plugins=True): if destination is None: @@ -87,7 +88,7 @@ class cwrap(object): with open(fname, 'r') as f: included = f.read().split('\n') # insert it into lines at position i+1 - lines[i+1:i+1] = included + lines[i + 1:i + 1] = included else: output.append(line) i += 1 @@ -136,10 +137,10 @@ class cwrap(object): return fallback(*args) def get_type_check(self, arg, option): - return self.search_plugins('get_type_check', (arg, option), lambda arg,_: None) + return self.search_plugins('get_type_check', (arg, option), lambda arg, _: None) def get_type_unpack(self, arg, option): - return self.search_plugins('get_type_unpack', (arg, option), lambda arg,_: None) + return self.search_plugins('get_type_unpack', (arg, option), lambda arg, _: None) def get_return_wrapper(self, option): return self.search_plugins('get_return_wrapper', (option,), lambda _: self.RETURN_WRAPPERS[option['return']]) @@ -193,14 +194,14 @@ class cwrap(object): # Generate checks arg_checks = self.map_selected_arguments('get_type_check', - 'process_single_check', option, checked_args) + 'process_single_check', option, checked_args) arg_checks = ' &&\n '.join(arg_checks) for plugin in self.plugins: arg_checks = plugin.process_all_checks(arg_checks, option) # Generate unpacks arg_unpack = self.map_selected_arguments('get_type_unpack', - 'process_single_unpack', option, option['arguments']) + 'process_single_unpack', option, option['arguments']) arg_unpack = ', '.join(arg_unpack) for plugin in self.plugins: arg_unpack = plugin.process_all_unpacks(arg_unpack, option) @@ -209,16 +210,16 @@ class cwrap(object): try: return_result = self.get_return_wrapper(option).substitute() call = self.FUNCTION_CALL_TEMPLATE.substitute(capture_result='', - cname=option['cname'], arg_unpack=arg_unpack) + cname=option['cname'], arg_unpack=arg_unpack) except KeyError: return_result = self.get_return_wrapper(option).substitute(result='__result') call = self.FUNCTION_CALL_TEMPLATE.substitute(capture_result=(option['return'] + ' __result = '), - cname=option['cname'], arg_unpack=arg_unpack) + cname=option['cname'], arg_unpack=arg_unpack) code_template = deepcopy(self.OPTION_CODE_TEMPLATE) for plugin in self.plugins: code_template = plugin.process_option_code_template(code_template, - option) + option) code_template = Template('\n'.join(code_template)) code = code_template.substitute(call=call, return_result=return_result) code_lines = map(lambda s: s.strip(), code.split('\n')) diff --git a/tools/cwrap/plugins/ArgcountChecker.py b/tools/cwrap/plugins/ArgcountChecker.py index 2572327cc2..5852dc9f3f 100644 --- a/tools/cwrap/plugins/ArgcountChecker.py +++ b/tools/cwrap/plugins/ArgcountChecker.py @@ -1,5 +1,6 @@ from . import CWrapPlugin + class ArgcountChecker(CWrapPlugin): def process_all_checks(self, checks, option): diff --git a/tools/cwrap/plugins/ArgcountSortPlugin.py b/tools/cwrap/plugins/ArgcountSortPlugin.py index b77bd95666..d82b9a0b33 100644 --- a/tools/cwrap/plugins/ArgcountSortPlugin.py +++ b/tools/cwrap/plugins/ArgcountSortPlugin.py @@ -1,5 +1,6 @@ from . import CWrapPlugin + class ArgcountSortPlugin(CWrapPlugin): def __init__(self, descending=True): @@ -11,4 +12,3 @@ class ArgcountSortPlugin(CWrapPlugin): for declaration in declarations: declaration['options'].sort(key=num_checked_args, reverse=self.descending) return declarations - diff --git a/tools/cwrap/plugins/ArgumentReferences.py b/tools/cwrap/plugins/ArgumentReferences.py index fa1900e780..ab341b8dc0 100644 --- a/tools/cwrap/plugins/ArgumentReferences.py +++ b/tools/cwrap/plugins/ArgumentReferences.py @@ -1,6 +1,7 @@ from . import CWrapPlugin from string import Template + class ArgumentReferences(CWrapPlugin): def initialize(self, cwrap): diff --git a/tools/cwrap/plugins/AutoGPU.py b/tools/cwrap/plugins/AutoGPU.py index f3ff6a2c47..65ffc69cd2 100644 --- a/tools/cwrap/plugins/AutoGPU.py +++ b/tools/cwrap/plugins/AutoGPU.py @@ -1,5 +1,6 @@ from . import CWrapPlugin + class AutoGPU(CWrapPlugin): def __init__(self, has_self=True, condition=None): diff --git a/tools/cwrap/plugins/BeforeAfterCall.py b/tools/cwrap/plugins/BeforeAfterCall.py index e6b5584552..407b88d65e 100644 --- a/tools/cwrap/plugins/BeforeAfterCall.py +++ b/tools/cwrap/plugins/BeforeAfterCall.py @@ -1,6 +1,7 @@ from . import CWrapPlugin from string import Template + class BeforeAfterCall(CWrapPlugin): def initialize(self, cwrap): @@ -13,7 +14,7 @@ class BeforeAfterCall(CWrapPlugin): if '$' in prepend_str: before_call_template = Template(option[name]) args = {'arg' + str(i): self.cwrap.get_arg_accessor(arg, option) for i, arg - in enumerate(option['arguments'])} + in enumerate(option['arguments'])} prepend_str = before_call_template.substitute(args) template.insert(offset, prepend_str) @@ -23,5 +24,5 @@ class BeforeAfterCall(CWrapPlugin): self.insert_snippet(template, option, call_idx, 'before_call') # call position might have changed call_idx = template.index('$call') - self.insert_snippet(template, option, call_idx+1, 'after_call') + self.insert_snippet(template, option, call_idx + 1, 'after_call') return template diff --git a/tools/cwrap/plugins/BoolOption.py b/tools/cwrap/plugins/BoolOption.py index ba81e3b509..c686aa4ffd 100644 --- a/tools/cwrap/plugins/BoolOption.py +++ b/tools/cwrap/plugins/BoolOption.py @@ -1,6 +1,7 @@ from . import CWrapPlugin from string import Template + class BoolOption(CWrapPlugin): UNPACK_TEMPLATE = Template('$arg == Py_True ? $if_true : $if_false') @@ -16,4 +17,3 @@ class BoolOption(CWrapPlugin): if self.is_bool_option(arg): return Template(self.UNPACK_TEMPLATE.safe_substitute( if_true=arg['if_true'], if_false=arg['if_false'])) - diff --git a/tools/cwrap/plugins/ConstantArguments.py b/tools/cwrap/plugins/ConstantArguments.py index 4d30345e9e..7a67ebe621 100644 --- a/tools/cwrap/plugins/ConstantArguments.py +++ b/tools/cwrap/plugins/ConstantArguments.py @@ -1,6 +1,7 @@ from . import CWrapPlugin from string import Template + class ConstantArguments(CWrapPlugin): def process_declarations(self, declarations): @@ -18,5 +19,3 @@ class ConstantArguments(CWrapPlugin): def get_arg_accessor(self, arg, option): if arg['type'] == 'CONSTANT': return arg['name'] - - diff --git a/tools/cwrap/plugins/CuDNNPlugin.py b/tools/cwrap/plugins/CuDNNPlugin.py index 76432fbfce..6ee7395e0a 100644 --- a/tools/cwrap/plugins/CuDNNPlugin.py +++ b/tools/cwrap/plugins/CuDNNPlugin.py @@ -3,30 +3,31 @@ from copy import deepcopy from . import CWrapPlugin from itertools import product + class CuDNNPlugin(CWrapPlugin): TYPE_UNPACK = { - 'THTensor*': Template('((THPVoidTensor*)$arg)->cdata'), - 'int': Template('THPUtils_unpackLong($arg)'), + 'THTensor*': Template('((THPVoidTensor*)$arg)->cdata'), + 'int': Template('THPUtils_unpackLong($arg)'), 'std::vector<int>': Template('THPUtils_unpackIntTuple($arg)'), - 'cudnnDataType_t': Template('$arg'), - 'cudnnHandle_t': Template('$arg'), - 'Convolution*': Template('(Convolution*)THPWrapper_get($arg)'), - 'bool': Template('$arg == Py_True'), - 'double': Template('THPDoubleUtils_unpackReal($arg)'), + 'cudnnDataType_t': Template('$arg'), + 'cudnnHandle_t': Template('$arg'), + 'Convolution*': Template('(Convolution*)THPWrapper_get($arg)'), + 'bool': Template('$arg == Py_True'), + 'double': Template('THPDoubleUtils_unpackReal($arg)'), } TYPE_CHECK = { - 'Convolution*': Template('THPWrapper_check($arg)'), - 'THTensor*': Template('(PyObject*)Py_TYPE($arg) == tensorClass'), - 'int': Template('THPUtils_checkLong($arg)'), + 'Convolution*': Template('THPWrapper_check($arg)'), + 'THTensor*': Template('(PyObject*)Py_TYPE($arg) == tensorClass'), + 'int': Template('THPUtils_checkLong($arg)'), 'std::vector<int>': Template('THPUtils_checkIntTuple($arg)'), - 'bool': Template('PyBool_Check($arg)'), - 'double': Template('THPDoubleUtils_checkReal($arg)'), + 'bool': Template('PyBool_Check($arg)'), + 'double': Template('THPDoubleUtils_checkReal($arg)'), } RETURN_WRAPPER = { - 'Convolution*': Template('return THPWrapper_New($result, [](void* arg) { delete (Convolution*)arg; });'), + 'Convolution*': Template('return THPWrapper_New($result, [](void* arg) { delete (Convolution*)arg; });'), } METHODS_DECLARATION = Template(""" @@ -151,8 +152,8 @@ static PyObject * $name(PyObject *self, PyObject *args, PyObject *kwargs) if not declaration.get('only_register'): extra_flags += ' | METH_KEYWORDS' entry = Template(' {"$python_name", (PyCFunction)$name, METH_VARARGS$extra_flags, NULL},\n').substitute( - python_name=declaration['python_name'], name=declaration['name'], extra_flags=extra_flags - ) + python_name=declaration['python_name'], name=declaration['name'], extra_flags=extra_flags + ) if 'defined_if' in declaration: entry = self.preprocessor_guard(entry, declaration['defined_if']) methods += entry diff --git a/tools/cwrap/plugins/GILRelease.py b/tools/cwrap/plugins/GILRelease.py index 0be754847b..f6f435cc34 100644 --- a/tools/cwrap/plugins/GILRelease.py +++ b/tools/cwrap/plugins/GILRelease.py @@ -1,6 +1,7 @@ from . import CWrapPlugin from string import Template + class GILRelease(CWrapPlugin): OPTION_START = [ @@ -24,6 +25,5 @@ class GILRelease(CWrapPlugin): def process_option_code_template(self, template, option): call_idx = template.index('$call') template.insert(call_idx, self.BEFORE_CALL) - template.insert(call_idx+2, self.AFTER_CALL) + template.insert(call_idx + 2, self.AFTER_CALL) return self.OPTION_START + template + self.OPTION_END - diff --git a/tools/cwrap/plugins/KwargsPlugin.py b/tools/cwrap/plugins/KwargsPlugin.py index 85e7afabac..e4f598cc44 100644 --- a/tools/cwrap/plugins/KwargsPlugin.py +++ b/tools/cwrap/plugins/KwargsPlugin.py @@ -1,6 +1,7 @@ from . import CWrapPlugin from string import Template + class KwargsPlugin(CWrapPlugin): ACCESSOR_TEMPLATE = Template('(__tuplecount > $idx ? PyTuple_GET_ITEM(args, $idx) : __kw_$name)') @@ -53,7 +54,8 @@ class KwargsPlugin(CWrapPlugin): seen_args.add(name) args.append(name) declarations = '\n '.join(['PyObject *__kw_{} = NULL;'.format(name) for name in args]) - lookups = '\n '.join(['__kw_{name} = PyDict_GetItemString(kwargs, "{name}");'.format(name=name) for name in args]) + lookups = '\n '.join( + ['__kw_{name} = PyDict_GetItemString(kwargs, "{name}");'.format(name=name) for name in args]) start_idx = code.find('{') + 1 new_code = self.WRAPPER_TEMPLATE.substitute(declarations=declarations, lookups=lookups) return code[:start_idx] + new_code + code[start_idx:] diff --git a/tools/cwrap/plugins/NullableArguments.py b/tools/cwrap/plugins/NullableArguments.py index 47c1b0c0ab..b69c96fa74 100644 --- a/tools/cwrap/plugins/NullableArguments.py +++ b/tools/cwrap/plugins/NullableArguments.py @@ -1,6 +1,8 @@ from . import CWrapPlugin + class NullableArguments(CWrapPlugin): + def process_single_check(self, code, arg, arg_accessor): if 'nullable' in arg and arg['nullable']: return '({} || {} == Py_None)'.format(code, arg_accessor) @@ -10,5 +12,3 @@ class NullableArguments(CWrapPlugin): if 'nullable' in arg and arg['nullable']: return '({} == Py_None ? NULL : {})'.format(arg_accessor, code) return code - - diff --git a/tools/cwrap/plugins/OptionalArguments.py b/tools/cwrap/plugins/OptionalArguments.py index 0bbc12d6d4..6cb24e1aee 100644 --- a/tools/cwrap/plugins/OptionalArguments.py +++ b/tools/cwrap/plugins/OptionalArguments.py @@ -2,6 +2,7 @@ from copy import deepcopy from . import CWrapPlugin from itertools import product + class OptionalArguments(CWrapPlugin): def process_declarations(self, declarations): @@ -32,20 +33,20 @@ class OptionalArguments(CWrapPlugin): else: kwarg_only_count = -kwarg_only_count arg_signature = '#'.join( - arg['type'] - for arg in option['arguments'][:kwarg_only_count] - if not arg.get('ignore_check')) + arg['type'] + for arg in option['arguments'][:kwarg_only_count] + if not arg.get('ignore_check')) if kwarg_only_count is None: return arg_signature kwarg_only_signature = '#'.join( - arg['name'] + '#' + arg['type'] - for arg in option['arguments'][kwarg_only_count:] - if not arg.get('ignore_check')) + arg['name'] + '#' + arg['type'] + for arg in option['arguments'][kwarg_only_count:] + if not arg.get('ignore_check')) return arg_signature + "#-#" + kwarg_only_signature seen_signatures = set() unique = [] for option in options: - for num_kwarg_only in range(0, len(option['arguments'])+1): + for num_kwarg_only in range(0, len(option['arguments']) + 1): sig = signature(option, num_kwarg_only) if sig not in seen_signatures: if num_kwarg_only > 0: @@ -55,4 +56,3 @@ class OptionalArguments(CWrapPlugin): seen_signatures.add(sig) break return unique - diff --git a/tools/cwrap/plugins/ReturnArguments.py b/tools/cwrap/plugins/ReturnArguments.py index 43a85a558f..651d617d5a 100644 --- a/tools/cwrap/plugins/ReturnArguments.py +++ b/tools/cwrap/plugins/ReturnArguments.py @@ -1,9 +1,10 @@ from . import CWrapPlugin from string import Template + class ReturnArguments(CWrapPlugin): - ARGUMENT_RETURN_TEMPLATE = Template("Py_INCREF($arg);\nreturn (PyObject*)($arg);") - TUPLE_RETURN_TEMPLATE = Template("return PyTuple_Pack($num_args, $args);") + ARGUMENT_RETURN_TEMPLATE = Template("Py_INCREF($arg);\nreturn (PyObject*)($arg);") + TUPLE_RETURN_TEMPLATE = Template("return PyTuple_Pack($num_args, $args);") def initialize(self, cwrap): self.cwrap = cwrap diff --git a/tools/cwrap/plugins/StandaloneExtension.py b/tools/cwrap/plugins/StandaloneExtension.py index 3442c80251..26407cf655 100644 --- a/tools/cwrap/plugins/StandaloneExtension.py +++ b/tools/cwrap/plugins/StandaloneExtension.py @@ -26,41 +26,41 @@ $METHODS class StandaloneExtension(CWrapPlugin): TYPE_UNPACK = { - 'THFloatTensor*': Template('THPFloatTensor_CData((THPFloatTensor*)$arg)'), - 'THDoubleTensor*': Template('THPDoubleTensor_CData((THPDoubleTensor*)$arg)'), - 'THLongTensor*': Template('THPLongTensor_CData((THPLongTensor*)$arg)'), - 'THIntTensor*': Template('THPIntTensor_CData((THPIntTensor*)$arg)'), + 'THFloatTensor*': Template('THPFloatTensor_CData((THPFloatTensor*)$arg)'), + 'THDoubleTensor*': Template('THPDoubleTensor_CData((THPDoubleTensor*)$arg)'), + 'THLongTensor*': Template('THPLongTensor_CData((THPLongTensor*)$arg)'), + 'THIntTensor*': Template('THPIntTensor_CData((THPIntTensor*)$arg)'), 'THCudaHalfTensor*': Template('THCPHalfTensor_CData((THCPHalfTensor*)$arg)'), - 'THCudaTensor*': Template('THCPFloatTensor_CData((THCPFloatTensor*)$arg)'), + 'THCudaTensor*': Template('THCPFloatTensor_CData((THCPFloatTensor*)$arg)'), 'THCudaDoubleTensor*': Template('THCPDoubleTensor_CData((THCPDoubleTensor*)$arg)'), 'THCudaLongTensor*': Template('THCPLongTensor_CData((THCPLongTensor*)$arg)'), - 'half': Template('THPHalfUtils_unpackReal($arg)'), - 'float': Template('THPFloatUtils_unpackReal($arg)'), - 'double': Template('THPDoubleUtils_unpackReal($arg)'), - 'bool': Template('($arg == Py_True ? true : false)'), - 'int': Template('THPUtils_unpackLong($arg)'), - 'long': Template('THPUtils_unpackLong($arg)'), - 'void*': Template('(void*)THPUtils_unpackLong($arg)'), - 'THGenerator*': Template('THPGenerator_CData((THPGenerator*)$arg)'), + 'half': Template('THPHalfUtils_unpackReal($arg)'), + 'float': Template('THPFloatUtils_unpackReal($arg)'), + 'double': Template('THPDoubleUtils_unpackReal($arg)'), + 'bool': Template('($arg == Py_True ? true : false)'), + 'int': Template('THPUtils_unpackLong($arg)'), + 'long': Template('THPUtils_unpackLong($arg)'), + 'void*': Template('(void*)THPUtils_unpackLong($arg)'), + 'THGenerator*': Template('THPGenerator_CData((THPGenerator*)$arg)'), } TYPE_CHECK = { - 'THDoubleTensor*': Template('(PyObject*)Py_TYPE($arg) == THPDoubleTensorClass'), - 'THFloatTensor*': Template('(PyObject*)Py_TYPE($arg) == THPFloatTensorClass'), - 'THLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THPLongTensorClass'), - 'THIntTensor*': Template('(PyObject*)Py_TYPE($arg) == THPIntTensorClass'), + 'THDoubleTensor*': Template('(PyObject*)Py_TYPE($arg) == THPDoubleTensorClass'), + 'THFloatTensor*': Template('(PyObject*)Py_TYPE($arg) == THPFloatTensorClass'), + 'THLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THPLongTensorClass'), + 'THIntTensor*': Template('(PyObject*)Py_TYPE($arg) == THPIntTensorClass'), 'THCudaHalfTensor*': Template('THCPHalfTensor_Check($arg)'), - 'THCudaTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPFloatTensorClass'), + 'THCudaTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPFloatTensorClass'), 'THCudaDoubleTensor*': Template('THCPDoubleTensor_Check($arg)'), 'THCudaLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPLongTensorClass'), - 'half': Template('THPHalfUtils_checkReal($arg)'), - 'float': Template('THPFloatUtils_checkReal($arg)'), - 'double': Template('THPDoubleUtils_checkReal($arg)'), - 'bool': Template('PyBool_Check($arg)'), - 'int': Template('THPUtils_checkLong($arg)'), - 'long': Template('THPUtils_checkLong($arg)'), - 'void*': Template('THPUtils_checkLong($arg)'), - 'THGenerator*': Template('(PyObject*)Py_TYPE($arg) == THPGeneratorClass'), + 'half': Template('THPHalfUtils_checkReal($arg)'), + 'float': Template('THPFloatUtils_checkReal($arg)'), + 'double': Template('THPDoubleUtils_checkReal($arg)'), + 'bool': Template('PyBool_Check($arg)'), + 'int': Template('THPUtils_checkLong($arg)'), + 'long': Template('THPUtils_checkLong($arg)'), + 'void*': Template('THPUtils_checkLong($arg)'), + 'THGenerator*': Template('(PyObject*)Py_TYPE($arg) == THPGeneratorClass'), } WRAPPER_TEMPLATE = Template(""" @@ -131,6 +131,7 @@ PyObject * $name(PyObject *_unused, PyObject *args) def get_wrapper_template(self, declaration): arg_desc = [] + def describe_arg(arg): desc = self.TYPE_NAMES[arg['type']] + ' ' + arg['name'] if arg.get('nullable'): @@ -138,8 +139,8 @@ PyObject * $name(PyObject *_unused, PyObject *args) return desc for option in declaration['options']: option_desc = [describe_arg(arg) - for arg in option['arguments'] - if not arg.get('ignore_check', False)] + for arg in option['arguments'] + if not arg.get('ignore_check', False)] if option_desc: arg_desc.append('({})'.format(', '.join(option_desc))) else: diff --git a/tools/cwrap/plugins/THPPlugin.py b/tools/cwrap/plugins/THPPlugin.py index caeab51360..1c4f5d2194 100644 --- a/tools/cwrap/plugins/THPPlugin.py +++ b/tools/cwrap/plugins/THPPlugin.py @@ -4,85 +4,86 @@ from . import CWrapPlugin from itertools import product, chain from collections import OrderedDict + class THPPlugin(CWrapPlugin): TYPE_UNPACK = { - 'THFloatTensor*': Template('((THPFloatTensor*)$arg)->cdata'), - 'THDoubleTensor*': Template('((THPDoubleTensor*)$arg)->cdata'), - 'THLongTensor*': Template('((THPLongTensor*)$arg)->cdata'), - 'THIntTensor*': Template('((THPIntTensor*)$arg)->cdata'), - 'THTensor*': Template('((THPTensor*)$arg)->cdata'), - 'THBoolTensor*': Template('((THPBoolTensor*)$arg)->cdata'), - 'THIndexTensor*': Template('((THPIndexTensor*)$arg)->cdata'), - - 'THSFloatTensor*': Template('((THSPFloatTensor*)$arg)->cdata'), + 'THFloatTensor*': Template('((THPFloatTensor*)$arg)->cdata'), + 'THDoubleTensor*': Template('((THPDoubleTensor*)$arg)->cdata'), + 'THLongTensor*': Template('((THPLongTensor*)$arg)->cdata'), + 'THIntTensor*': Template('((THPIntTensor*)$arg)->cdata'), + 'THTensor*': Template('((THPTensor*)$arg)->cdata'), + 'THBoolTensor*': Template('((THPBoolTensor*)$arg)->cdata'), + 'THIndexTensor*': Template('((THPIndexTensor*)$arg)->cdata'), + + 'THSFloatTensor*': Template('((THSPFloatTensor*)$arg)->cdata'), 'THSDoubleTensor*': Template('((THSPDoubleTensor*)$arg)->cdata'), - 'THSLongTensor*': Template('((THSPLongTensor*)$arg)->cdata'), - 'THSIntTensor*': Template('((THSPIntTensor*)$arg)->cdata'), - 'THSTensor*': Template('((THSPTensor*)$arg)->cdata'), - 'THSBoolTensor*': Template('((THSPBoolTensor*)$arg)->cdata'), - 'THSIndexTensor*': Template('((THSPIndexTensor*)$arg)->cdata'), - - 'THLongStorage*': Template('((THPLongStorage*)$arg)->cdata'), - 'THStorage*': Template('((THPStorage*)$arg)->cdata'), - 'THGenerator*': Template('((THPGenerator*)$arg)->cdata'), - 'THSize*': Template('__size.get()'), - 'THStride*': Template('__stride.get()'), - 'void*': Template('THPUtils_unpackLong($arg)'), - 'long': Template('THPUtils_unpackLong($arg)'), - 'int': Template('THPUtils_unpackLong($arg)'), - 'bool': Template('($arg == Py_True ? true : false)'), - 'float': Template('THPFloatUtils_unpackReal($arg)'), - 'double': Template('THPDoubleUtils_unpackReal($arg)'), - 'real': Template('THPUtils_(unpackReal)($arg)'), - 'accreal': Template('THPUtils_(unpackAccreal)($arg)'), + 'THSLongTensor*': Template('((THSPLongTensor*)$arg)->cdata'), + 'THSIntTensor*': Template('((THSPIntTensor*)$arg)->cdata'), + 'THSTensor*': Template('((THSPTensor*)$arg)->cdata'), + 'THSBoolTensor*': Template('((THSPBoolTensor*)$arg)->cdata'), + 'THSIndexTensor*': Template('((THSPIndexTensor*)$arg)->cdata'), + + 'THLongStorage*': Template('((THPLongStorage*)$arg)->cdata'), + 'THStorage*': Template('((THPStorage*)$arg)->cdata'), + 'THGenerator*': Template('((THPGenerator*)$arg)->cdata'), + 'THSize*': Template('__size.get()'), + 'THStride*': Template('__stride.get()'), + 'void*': Template('THPUtils_unpackLong($arg)'), + 'long': Template('THPUtils_unpackLong($arg)'), + 'int': Template('THPUtils_unpackLong($arg)'), + 'bool': Template('($arg == Py_True ? true : false)'), + 'float': Template('THPFloatUtils_unpackReal($arg)'), + 'double': Template('THPDoubleUtils_unpackReal($arg)'), + 'real': Template('THPUtils_(unpackReal)($arg)'), + 'accreal': Template('THPUtils_(unpackAccreal)($arg)'), } TYPE_CHECK = { - 'THDoubleTensor*': Template('(PyObject*)Py_TYPE($arg) == THPDoubleTensorClass'), - 'THFloatTensor*': Template('(PyObject*)Py_TYPE($arg) == THPFloatTensorClass'), - 'THLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THPLongTensorClass'), - 'THIntTensor*': Template('(PyObject*)Py_TYPE($arg) == THPIntTensorClass'), - 'THCudaTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPFloatTensorClass'), - 'THTensor*': Template('(PyObject*)Py_TYPE($arg) == THPTensorClass'), - 'THBoolTensor*': Template('(PyObject*)Py_TYPE($arg) == THPBoolTensorClass'), - 'THIndexTensor*': Template('(PyObject*)Py_TYPE($arg) == THPIndexTensorClass'), + 'THDoubleTensor*': Template('(PyObject*)Py_TYPE($arg) == THPDoubleTensorClass'), + 'THFloatTensor*': Template('(PyObject*)Py_TYPE($arg) == THPFloatTensorClass'), + 'THLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THPLongTensorClass'), + 'THIntTensor*': Template('(PyObject*)Py_TYPE($arg) == THPIntTensorClass'), + 'THCudaTensor*': Template('(PyObject*)Py_TYPE($arg) == THCPFloatTensorClass'), + 'THTensor*': Template('(PyObject*)Py_TYPE($arg) == THPTensorClass'), + 'THBoolTensor*': Template('(PyObject*)Py_TYPE($arg) == THPBoolTensorClass'), + 'THIndexTensor*': Template('(PyObject*)Py_TYPE($arg) == THPIndexTensorClass'), 'THSDoubleTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPDoubleTensorClass'), - 'THSFloatTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPFloatTensorClass'), - 'THSLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPLongTensorClass'), - 'THSIntTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPIntTensorClass'), - 'THSTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPTensorClass'), - 'THSBoolTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPBoolTensorClass'), - 'THSIndexTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPIndexTensorClass'), - - 'THLongStorage*': Template('(PyObject*)Py_TYPE($arg) == THPLongStorageClass'), - 'THStorage*': Template('(PyObject*)Py_TYPE($arg) == THPStorageClass'), - 'THGenerator*': Template('(PyObject*)Py_TYPE($arg) == THPGeneratorClass'), - 'THSize*': Template('THPUtils_tryUnpackLongs($arg, __size)'), - 'THStride*': Template('THPUtils_tryUnpackLongs($arg, __stride)'), - 'void*': Template('THPUtils_checkLong($arg)'), - 'long': Template('THPUtils_checkLong($arg)'), - 'int': Template('THPUtils_checkLong($arg)'), - 'bool': Template('PyBool_Check($arg)'), - 'float': Template('THPFloatUtils_checkReal($arg)'), - 'double': Template('THPDoubleUtils_checkReal($arg)'), - 'real': Template('THPUtils_(checkReal)($arg)'), - 'accreal': Template('THPUtils_(checkAccreal)($arg)'), + 'THSFloatTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPFloatTensorClass'), + 'THSLongTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPLongTensorClass'), + 'THSIntTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPIntTensorClass'), + 'THSTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPTensorClass'), + 'THSBoolTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPBoolTensorClass'), + 'THSIndexTensor*': Template('(PyObject*)Py_TYPE($arg) == THSPIndexTensorClass'), + + 'THLongStorage*': Template('(PyObject*)Py_TYPE($arg) == THPLongStorageClass'), + 'THStorage*': Template('(PyObject*)Py_TYPE($arg) == THPStorageClass'), + 'THGenerator*': Template('(PyObject*)Py_TYPE($arg) == THPGeneratorClass'), + 'THSize*': Template('THPUtils_tryUnpackLongs($arg, __size)'), + 'THStride*': Template('THPUtils_tryUnpackLongs($arg, __stride)'), + 'void*': Template('THPUtils_checkLong($arg)'), + 'long': Template('THPUtils_checkLong($arg)'), + 'int': Template('THPUtils_checkLong($arg)'), + 'bool': Template('PyBool_Check($arg)'), + 'float': Template('THPFloatUtils_checkReal($arg)'), + 'double': Template('THPDoubleUtils_checkReal($arg)'), + 'real': Template('THPUtils_(checkReal)($arg)'), + 'accreal': Template('THPUtils_(checkAccreal)($arg)'), } SIZE_VARARG_CHECK = Template('THPUtils_tryUnpackLongVarArgs(args, $idx, __size)') RETURN_WRAPPER = { - 'THTensor*': Template('return THPTensor_(New)($result);'), - 'THSTensor*': Template('return THSPTensor_(New)($result);'), - 'THLongTensor*': Template('return THPLongTensor_New($result);'), - 'THLongStorage*': Template('return THPLongStorage_New($result);'), + 'THTensor*': Template('return THPTensor_(New)($result);'), + 'THSTensor*': Template('return THSPTensor_(New)($result);'), + 'THLongTensor*': Template('return THPLongTensor_New($result);'), + 'THLongStorage*': Template('return THPLongStorage_New($result);'), # TODO: make it smarter - it should return python long if result doesn't fit into an int - 'long': Template('return PyInt_FromLong($result);'), - 'accreal': Template('return THPUtils_(newAccreal)($result);'), - 'self': Template('Py_INCREF(self);\nreturn (PyObject*)self;'), - 'real': Template('return THPUtils_(newReal)($result);'), + 'long': Template('return PyInt_FromLong($result);'), + 'accreal': Template('return THPUtils_(newAccreal)($result);'), + 'self': Template('Py_INCREF(self);\nreturn (PyObject*)self;'), + 'real': Template('return THPUtils_(newReal)($result);'), } TENSOR_METHODS_DECLARATION = Template(""" @@ -138,13 +139,13 @@ ${cpu} return Template(code) ALLOCATE_TYPE = { - 'THTensor*': _allocate('', ALLOCATE_TMPL), - 'THLongTensor*': _allocate('Long', ALLOCATE_TMPL), - 'THIntTensor*': _allocate('Int', ALLOCATE_TMPL), - 'THBoolTensor*': _allocate('Byte', ALLOCATE_TMPL, ALLOCATE_CUDA), - 'THIndexTensor*': _allocate('Long', ALLOCATE_TMPL, ALLOCATE_CUDA), + 'THTensor*': _allocate('', ALLOCATE_TMPL), + 'THLongTensor*': _allocate('Long', ALLOCATE_TMPL), + 'THIntTensor*': _allocate('Int', ALLOCATE_TMPL), + 'THBoolTensor*': _allocate('Byte', ALLOCATE_TMPL, ALLOCATE_CUDA), + 'THIndexTensor*': _allocate('Long', ALLOCATE_TMPL, ALLOCATE_CUDA), - 'THSTensor*': _allocate('', ALLOCATE_TMPL, sparse=True), + 'THSTensor*': _allocate('', ALLOCATE_TMPL, sparse=True), } TYPE_NAMES = { @@ -205,7 +206,7 @@ ${cpu} if len(output_args) > 1: out_type = 'tuple[' out_type += ', '.join( - self.TYPE_NAMES[arg['type']] for arg in output_args) + self.TYPE_NAMES[arg['type']] for arg in output_args) out_type += ']' option_desc += ['#' + out_type + ' out'] else: @@ -287,7 +288,7 @@ ${cpu} if not output_provided: arg['ignore_check'] = True else: - option_copy['argcount_offset'] = -len(out_idx) + 1 + option_copy['argcount_offset'] = -len(out_idx) + 1 arg['no_kwargs'] = True arg['no_idx'] = True new_options.append(option_copy) @@ -345,7 +346,6 @@ ${cpu} if arg['name'] == 'self': arg['ignore_check'] = True - declarations = [d for d in declarations if not d.get('only_stateless', False)] self.declarations.extend(filter(lambda x: not x.get('only_stateless', False), register_only)) self.stateless_declarations.extend(filter(lambda x: x.get('only_stateless', False), register_only)) @@ -377,9 +377,9 @@ ${cpu} if declaration.get('override_method_flags'): flags = declaration['override_method_flags'] entry = Template(' {"$python_name", (PyCFunction)$name, $flags, $docstring},\n').substitute( - python_name=declaration['python_name'], name=declaration['name'], flags=flags, - docstring=declaration.get('docstring_var', 'NULL') - ) + python_name=declaration['python_name'], name=declaration['name'], flags=flags, + docstring=declaration.get('docstring_var', 'NULL') + ) if 'defined_if' in declaration: entry = self.preprocessor_guard(entry, declaration['defined_if']) tensor_methods += entry @@ -401,7 +401,7 @@ ${cpu} ) def preprocessor_guard(self, code, condition): - return '#if ' + condition + '\n' + code + '#endif\n' + return '#if ' + condition + '\n' + code + '#endif\n' def process_wrapper(self, code, declaration): if 'defined_if' in declaration: @@ -419,7 +419,7 @@ ${cpu} if option['output_count'] > 1: checks += "PyTuple_Check(__out) &&\n" + indent length_check = "PyTuple_GET_SIZE(__out) == {} &&\n".format( - option['output_count']) + option['output_count']) checks += length_check + indent code = checks + code else: @@ -443,13 +443,13 @@ ${cpu} def generate_docstrings_cpp(self): template = Template('char* $name = "$content";') return '\n\n'.join( - template.substitute(name=decl['docstring_var'], content=decl['docstring_content']) - for decl in chain(self.declarations, self.stateless_declarations) - if 'docstring_var' in decl) + template.substitute(name=decl['docstring_var'], content=decl['docstring_content']) + for decl in chain(self.declarations, self.stateless_declarations) + if 'docstring_var' in decl) def generate_docstrings_h(self): template = Template('extern char* $name;') return '\n\n'.join( - template.substitute(name=decl['docstring_var']) - for decl in chain(self.declarations, self.stateless_declarations) - if 'docstring_var' in decl) + template.substitute(name=decl['docstring_var']) + for decl in chain(self.declarations, self.stateless_declarations) + if 'docstring_var' in decl) diff --git a/tools/nnwrap/generate_wrappers.py b/tools/nnwrap/generate_wrappers.py index 9520cfcac7..d5ca74d9e0 100644 --- a/tools/nnwrap/generate_wrappers.py +++ b/tools/nnwrap/generate_wrappers.py @@ -8,6 +8,7 @@ BASE_PATH = os.path.realpath(os.path.join(__file__, '..', '..', '..')) WRAPPER_PATH = os.path.join(BASE_PATH, 'torch', 'csrc', 'nn') THNN_UTILS_PATH = os.path.join(BASE_PATH, 'torch', '_thnn', 'utils.py') + def import_module(name, path): if sys.version_info >= (3, 5): import importlib.util @@ -81,7 +82,8 @@ for t in ['CudaHalf', 'Cuda', 'CudaDouble']: def wrap_function(name, type, arguments): cname = 'THNN_' + type + name declaration = '' - declaration += 'extern "C" void ' + cname + '(' + ', '.join(TYPE_TRANSFORMS[type].get(arg.type, arg.type) for arg in arguments) + ');\n' + declaration += 'extern "C" void ' + cname + \ + '(' + ', '.join(TYPE_TRANSFORMS[type].get(arg.type, arg.type) for arg in arguments) + ');\n' declaration += FUNCTION_TEMPLATE.substitute(name=type + name, cname=cname) indent = ' ' * 4 dict_indent = ' ' * 6 @@ -92,15 +94,17 @@ def wrap_function(name, type, arguments): else: t = TYPE_TRANSFORMS[type].get(arg.type, arg.type) declaration += prefix + 'type: ' + t + '\n' + \ - dict_indent + 'name: ' + arg.name + '\n' + \ - dict_indent + 'nullable: True' + '\n' + dict_indent + 'name: ' + arg.name + '\n' + \ + dict_indent + 'nullable: True' + '\n' declaration += ']]\n\n\n' return declaration + def generate_wrappers(): wrap_nn() wrap_cunn() + def wrap_nn(): wrapper = '#include <TH/TH.h>\n\n\n' nn_functions = thnn_utils.parse_header(thnn_utils.THNN_H_PATH) @@ -114,6 +118,7 @@ def wrap_nn(): NullableArguments(), ]) + def wrap_cunn(): wrapper = '#include <TH/TH.h>\n' wrapper += '#include <THC/THC.h>\n\n\n' diff --git a/tools/setup_helpers/env.py b/tools/setup_helpers/env.py index 70db036899..b7a555b651 100644 --- a/tools/setup_helpers/env.py +++ b/tools/setup_helpers/env.py @@ -1,4 +1,5 @@ import os + def check_env_flag(name): return os.getenv(name) in ['ON', '1', 'YES', 'TRUE', 'Y'] diff --git a/torch/__init__.py b/torch/__init__.py index b75a8c0478..021fee57a7 100644 --- a/torch/__init__.py +++ b/torch/__init__.py @@ -56,6 +56,7 @@ del old_flags # Define basic utilities ################################################################################ + def typename(o): module = '' class_name = '' @@ -91,7 +92,7 @@ def set_default_tensor_type(t): def set_rng_state(new_state): r"""Sets the random number generator state. - + Args: new_state (torch.ByteTensor): The desired state """ @@ -106,7 +107,7 @@ def get_rng_state(): def manual_seed(seed): r"""Sets the seed for generating random numbers. And returns a `torch._C.Generator` object. - + Args: seed (int or long): The desired seed. """ @@ -130,61 +131,101 @@ from ._tensor_str import set_printoptions from .storage import _StorageBase from .tensor import _TensorBase + class DoubleStorage(_C.DoubleStorageBase, _StorageBase): pass + + class FloatStorage(_C.FloatStorageBase, _StorageBase): pass + + class LongStorage(_C.LongStorageBase, _StorageBase): pass + + class IntStorage(_C.IntStorageBase, _StorageBase): pass + + class ShortStorage(_C.ShortStorageBase, _StorageBase): pass + + class CharStorage(_C.CharStorageBase, _StorageBase): pass + + class ByteStorage(_C.ByteStorageBase, _StorageBase): pass + class DoubleTensor(_C.DoubleTensorBase, _TensorBase): + def is_signed(self): return True + @classmethod def storage_type(cls): return DoubleStorage + + class FloatTensor(_C.FloatTensorBase, _TensorBase): + def is_signed(self): return True + @classmethod def storage_type(cls): return FloatStorage + + class LongTensor(_C.LongTensorBase, _TensorBase): + def is_signed(self): return True + @classmethod def storage_type(cls): return LongStorage + + class IntTensor(_C.IntTensorBase, _TensorBase): + def is_signed(self): return True + @classmethod def storage_type(cls): return IntStorage + + class ShortTensor(_C.ShortTensorBase, _TensorBase): + def is_signed(self): return True + @classmethod def storage_type(cls): return ShortStorage + + class CharTensor(_C.CharTensorBase, _TensorBase): + def is_signed(self): # TODO return False + @classmethod def storage_type(cls): return CharStorage + + class ByteTensor(_C.ByteTensorBase, _TensorBase): + def is_signed(self): return False + @classmethod def storage_type(cls): return ByteStorage diff --git a/torch/_tensor_docs.py b/torch/_tensor_docs.py index a4e6cfc808..caae3905ef 100644 --- a/torch/_tensor_docs.py +++ b/torch/_tensor_docs.py @@ -4,133 +4,133 @@ import torch._C from torch._C import _add_docstr as add_docstr add_docstr(torch._C.FloatTensorBase.abs, -""" + """ abs() -> Tensor See :func:`torch.abs` """) add_docstr(torch._C.FloatTensorBase.abs_, -""" + """ abs_() -> Tensor In-place version of :meth:`~Tensor.abs` """) add_docstr(torch._C.FloatTensorBase.acos, -""" + """ acos() -> Tensor See :func:`torch.acos` """) add_docstr(torch._C.FloatTensorBase.acos_, -""" + """ acos_() -> Tensor In-place version of :meth:`~Tensor.acos` """) add_docstr(torch._C.FloatTensorBase.add, -""" + """ add(value) See :func:`torch.add` """) add_docstr(torch._C.FloatTensorBase.add_, -""" + """ add_(value) In-place version of :meth:`~Tensor.add` """) add_docstr(torch._C.FloatTensorBase.addbmm, -""" + """ addbmm(beta=1, mat, alpha=1, batch1, batch2) -> Tensor See :func:`torch.addbmm` """) add_docstr(torch._C.FloatTensorBase.addbmm_, -""" + """ addbmm_(beta=1, mat, alpha=1, batch1, batch2) -> Tensor In-place version of :meth:`~Tensor.addbmm` """) add_docstr(torch._C.FloatTensorBase.addcdiv, -""" + """ addcdiv(value=1, tensor1, tensor2) -> Tensor See :func:`torch.addcdiv` """) add_docstr(torch._C.FloatTensorBase.addcdiv_, -""" + """ addcdiv_(value=1, tensor1, tensor2) -> Tensor In-place version of :meth:`~Tensor.addcdiv` """) add_docstr(torch._C.FloatTensorBase.addcmul, -""" + """ addcmul(value=1, tensor1, tensor2) -> Tensor See :func:`torch.addcmul` """) add_docstr(torch._C.FloatTensorBase.addcmul_, -""" + """ addcmul_(value=1, tensor1, tensor2) -> Tensor In-place version of :meth:`~Tensor.addcmul` """) add_docstr(torch._C.FloatTensorBase.addmm, -""" + """ addmm(beta=1, mat, alpha=1, mat1, mat2) -> Tensor See :func:`torch.addmm` """) add_docstr(torch._C.FloatTensorBase.addmm_, -""" + """ addmm_(beta=1, mat, alpha=1, mat1, mat2) -> Tensor In-place version of :meth:`~Tensor.addmm` """) add_docstr(torch._C.FloatTensorBase.addmv, -""" + """ addmv(beta=1, tensor, alpha=1, mat, vec) -> Tensor See :func:`torch.addmv` """) add_docstr(torch._C.FloatTensorBase.addmv_, -""" + """ addmv_(beta=1, tensor, alpha=1, mat, vec) -> Tensor In-place version of :meth:`~Tensor.addmv` """) add_docstr(torch._C.FloatTensorBase.addr, -""" + """ addr(beta=1, alpha=1, vec1, vec2) -> Tensor See :func:`torch.addr` """) add_docstr(torch._C.FloatTensorBase.addr_, -""" + """ addr_(beta=1, alpha=1, vec1, vec2) -> Tensor In-place version of :meth:`~Tensor.addr` """) add_docstr(torch._C.FloatTensorBase.apply_, -""" + """ apply_(callable) -> Tensor Applies the function :attr:`callable` to each element in the tensor, replacing @@ -143,84 +143,84 @@ each element with the value returned by :attr:`callable`. """) add_docstr(torch._C.FloatTensorBase.asin, -""" + """ asin() -> Tensor See :func:`torch.asin` """) add_docstr(torch._C.FloatTensorBase.asin_, -""" + """ asin_() -> Tensor In-place version of :meth:`~Tensor.asin` """) add_docstr(torch._C.FloatTensorBase.atan, -""" + """ atan() -> Tensor See :func:`torch.atan` """) add_docstr(torch._C.FloatTensorBase.atan2, -""" + """ atan2(other) -> Tensor See :func:`torch.atan2` """) add_docstr(torch._C.FloatTensorBase.atan2_, -""" + """ atan2_(other) -> Tensor In-place version of :meth:`~Tensor.atan2` """) add_docstr(torch._C.FloatTensorBase.atan_, -""" + """ atan_() -> Tensor In-place version of :meth:`~Tensor.atan` """) add_docstr(torch._C.FloatTensorBase.baddbmm, -""" + """ baddbmm(beta=1, alpha=1, batch1, batch2) -> Tensor See :func:`torch.baddbmm` """) add_docstr(torch._C.FloatTensorBase.baddbmm_, -""" + """ baddbmm_(beta=1, alpha=1, batch1, batch2) -> Tensor In-place version of :meth:`~Tensor.baddbmm` """) add_docstr(torch._C.FloatTensorBase.bernoulli, -""" + """ bernoulli() -> Tensor See :func:`torch.bernoulli` """) add_docstr(torch._C.FloatTensorBase.bernoulli_, -""" + """ bernoulli_() -> Tensor In-place version of :meth:`~Tensor.bernoulli` """) add_docstr(torch._C.FloatTensorBase.bmm, -""" + """ bmm(batch2) -> Tensor See :func:`torch.bmm` """) add_docstr(torch._C.FloatTensorBase.cauchy_, -""" + """ cauchy_(generator=None, median=0, sigma=1) -> Tensor Fills the tensor with numbers drawn from the Cauchy distribution: @@ -231,35 +231,35 @@ Fills the tensor with numbers drawn from the Cauchy distribution: """) add_docstr(torch._C.FloatTensorBase.ceil, -""" + """ ceil() -> Tensor See :func:`torch.ceil` """) add_docstr(torch._C.FloatTensorBase.ceil_, -""" + """ ceil_() -> Tensor In-place version of :meth:`~Tensor.ceil` """) add_docstr(torch._C.FloatTensorBase.clamp, -""" + """ clamp(min, max) -> Tensor See :func:`torch.clamp` """) add_docstr(torch._C.FloatTensorBase.clamp_, -""" + """ clamp_(min, max) -> Tensor In-place version of :meth:`~Tensor.clamp` """) add_docstr(torch._C.FloatTensorBase.clone, -""" + """ clone() -> Tensor Returns a copy of the tensor. The copy has the same size and data type as the @@ -267,7 +267,7 @@ original tensor. """) add_docstr(torch._C.FloatTensorBase.contiguous, -""" + """ contiguous() -> Tensor Returns a contiguous Tensor containing the same data as this tensor. If this @@ -275,7 +275,7 @@ tensor is contiguous, this function returns the original tensor. """) add_docstr(torch._C.FloatTensorBase.copy_, -""" + """ copy_(src, async=False) -> Tensor Copies the elements from :attr:`src` into this tensor and returns this tensor. @@ -291,112 +291,112 @@ Args: """) add_docstr(torch._C.FloatTensorBase.cos, -""" + """ cos() -> Tensor See :func:`torch.cos` """) add_docstr(torch._C.FloatTensorBase.cos_, -""" + """ cos_() -> Tensor In-place version of :meth:`~Tensor.cos` """) add_docstr(torch._C.FloatTensorBase.cosh, -""" + """ cosh() -> Tensor See :func:`torch.cosh` """) add_docstr(torch._C.FloatTensorBase.cosh_, -""" + """ cosh_() -> Tensor In-place version of :meth:`~Tensor.cosh` """) add_docstr(torch._C.FloatTensorBase.cross, -""" + """ cross(other, dim=-1) -> Tensor See :func:`torch.cross` """) add_docstr(torch._C.FloatTensorBase.cumprod, -""" + """ cumprod(dim) -> Tensor See :func:`torch.cumprod` """) add_docstr(torch._C.FloatTensorBase.cumsum, -""" + """ cumsum(dim) -> Tensor See :func:`torch.cumsum` """) add_docstr(torch._C.FloatTensorBase.data_ptr, -""" + """ data_ptr() -> int Returns the address of the first element of this tensor. """) add_docstr(torch._C.FloatTensorBase.diag, -""" + """ diag(diagonal=0) -> Tensor See :func:`torch.diag` """) add_docstr(torch._C.FloatTensorBase.dim, -""" + """ dim() -> int Returns the number of dimensions of this tensor. """) add_docstr(torch._C.FloatTensorBase.dist, -""" + """ dist(other, p=2) -> Tensor See :func:`torch.dist` """) add_docstr(torch._C.FloatTensorBase.div, -""" + """ div(value) See :func:`torch.div` """) add_docstr(torch._C.FloatTensorBase.div_, -""" + """ div_(value) In-place version of :meth:`~Tensor.div` """) add_docstr(torch._C.FloatTensorBase.dot, -""" + """ dot(tensor2) -> float See :func:`torch.dot` """) add_docstr(torch._C.FloatTensorBase.eig, -""" + """ eig(eigenvectors=False) -> (Tensor, Tensor) See :func:`torch.eig` """) add_docstr(torch._C.FloatTensorBase.element_size, -""" + """ element_size() -> int Returns the size in bytes of an individual element. @@ -409,42 +409,42 @@ Example: """) add_docstr(torch._C.FloatTensorBase.eq, -""" + """ eq(other) -> Tensor See :func:`torch.eq` """) add_docstr(torch._C.FloatTensorBase.eq_, -""" + """ eq_(other) -> Tensor In-place version of :meth:`~Tensor.eq` """) add_docstr(torch._C.FloatTensorBase.equal, -""" + """ equal(other) -> bool See :func:`torch.equal` """) add_docstr(torch._C.FloatTensorBase.exp, -""" + """ exp() -> Tensor See :func:`torch.exp` """) add_docstr(torch._C.FloatTensorBase.exp_, -""" + """ exp_() -> Tensor In-place version of :meth:`~Tensor.exp` """) add_docstr(torch._C.FloatTensorBase.exponential_, -""" + """ exponential_(generator=None, lambd=1) -> Tensor Fills this tensor with elements drawn from the exponential distribution: @@ -455,84 +455,84 @@ Fills this tensor with elements drawn from the exponential distribution: """) add_docstr(torch._C.FloatTensorBase.fill_, -""" + """ fill_(value) -> Tensor Fills this tensor with the specified value. """) add_docstr(torch._C.FloatTensorBase.floor, -""" + """ floor() -> Tensor See :func:`torch.floor` """) add_docstr(torch._C.FloatTensorBase.floor_, -""" + """ floor_() -> Tensor In-place version of :meth:`~Tensor.floor` """) add_docstr(torch._C.FloatTensorBase.fmod, -""" + """ fmod(divisor) -> Tensor See :func:`torch.fmod` """) add_docstr(torch._C.FloatTensorBase.fmod_, -""" + """ fmod_(divisor) -> Tensor In-place version of :meth:`~Tensor.fmod` """) add_docstr(torch._C.FloatTensorBase.frac, -""" + """ frac() -> Tensor See :func:`torch.frac` """) add_docstr(torch._C.FloatTensorBase.frac_, -""" + """ frac_() -> Tensor In-place version of :meth:`~Tensor.frac` """) add_docstr(torch._C.FloatTensorBase.gather, -""" + """ gather(dim, index) -> Tensor See :func:`torch.gather` """) add_docstr(torch._C.FloatTensorBase.ge, -""" + """ ge(other) -> Tensor See :func:`torch.ge` """) add_docstr(torch._C.FloatTensorBase.ge_, -""" + """ ge_(other) -> Tensor In-place version of :meth:`~Tensor.ge` """) add_docstr(torch._C.FloatTensorBase.gels, -""" + """ gels(A) -> Tensor See :func:`torch.gels` """) add_docstr(torch._C.FloatTensorBase.geometric_, -""" + """ geometric_(generator=None, p) -> Tensor Fills this tensor with elements drawn from the geometric distribution: @@ -544,49 +544,49 @@ Fills this tensor with elements drawn from the geometric distribution: """) add_docstr(torch._C.FloatTensorBase.geqrf, -""" + """ geqrf() -> (Tensor, Tensor) See :func:`torch.geqrf` """) add_docstr(torch._C.FloatTensorBase.ger, -""" + """ ger(vec2) -> Tensor See :func:`torch.ger` """) add_docstr(torch._C.FloatTensorBase.gesv, -""" + """ gesv(A) -> Tensor, Tensor See :func:`torch.gesv` """) add_docstr(torch._C.FloatTensorBase.gt, -""" + """ gt(other) -> Tensor See :func:`torch.gt` """) add_docstr(torch._C.FloatTensorBase.gt_, -""" + """ gt_(other) -> Tensor In-place version of :meth:`~Tensor.gt` """) add_docstr(torch._C.FloatTensorBase.histc, -""" + """ histc(bins=100, min=0, max=0) -> Tensor See :func:`torch.histc` """) add_docstr(torch._C.FloatTensorBase.index, -""" + """ index(m) -> Tensor Selects elements from this tensor using a binary mask or along a given @@ -597,7 +597,7 @@ Args: """) add_docstr(torch._C.FloatTensorBase.index_add_, -""" + """ index_add_(dim, index, tensor) -> Tensor Accumulate the elements of tensor into the original tensor by adding to the @@ -622,7 +622,7 @@ Example: """) add_docstr(torch._C.FloatTensorBase.index_copy_, -""" + """ index_copy_(dim, index, tensor) -> Tensor Copies the elements of tensor into the original tensor by selecting the @@ -647,7 +647,7 @@ Example: """) add_docstr(torch._C.FloatTensorBase.index_fill_, -""" + """ index_fill_(dim, index, tensor) -> Tensor Fills the elements of the original tensor with value :attr:`val` by selecting @@ -670,28 +670,28 @@ Example: """) add_docstr(torch._C.FloatTensorBase.index_select, -""" + """ index_select(dim, index) -> Tensor See :func:`torch.index_select` """) add_docstr(torch._C.FloatTensorBase.inverse, -""" + """ inverse() -> Tensor See :func:`torch.inverse` """) add_docstr(torch._C.FloatTensorBase.is_contiguous, -""" + """ is_contiguous() -> bool Returns True if this tensor is contiguous in memory in C order. """) add_docstr(torch._C.FloatTensorBase.is_set_to, -""" + """ is_set_to(tensor) -> bool Returns True if this object refers to the same ``THTensor`` object from the @@ -699,56 +699,56 @@ Torch C API as the given tensor. """) add_docstr(torch._C.FloatTensorBase.kthvalue, -""" + """ kthvalue(k, dim=None) -> (Tensor, LongTensor) See :func:`torch.kthvalue` """) add_docstr(torch._C.FloatTensorBase.le, -""" + """ le(other) -> Tensor See :func:`torch.le` """) add_docstr(torch._C.FloatTensorBase.le_, -""" + """ le_(other) -> Tensor In-place version of :meth:`~Tensor.le` """) add_docstr(torch._C.FloatTensorBase.lerp, -""" + """ lerp(start, end, weight) See :func:`torch.lerp` """) add_docstr(torch._C.FloatTensorBase.lerp_, -""" + """ lerp_(start, end, weight) In-place version of :meth:`~Tensor.lerp` """) add_docstr(torch._C.FloatTensorBase.log, -""" + """ log() -> Tensor See :func:`torch.log` """) add_docstr(torch._C.FloatTensorBase.log1p, -""" + """ log1p() -> Tensor See :func:`torch.log1p` """) add_docstr(torch._C.FloatTensorBase.log1p_, -""" + """ log1p_() -> Tensor In-place version of :meth:`~Tensor.log1p` @@ -774,21 +774,21 @@ underlying normal distribution, and not of the returned distribution: """) add_docstr(torch._C.FloatTensorBase.lt, -""" + """ lt(other) -> Tensor See :func:`torch.lt` """) add_docstr(torch._C.FloatTensorBase.lt_, -""" + """ lt_(other) -> Tensor In-place version of :meth:`~Tensor.lt` """) add_docstr(torch._C.FloatTensorBase.map_, -""" + """ map_(tensor, callable) Applies :attr:`callable` for each element in this tensor and the given tensor @@ -799,7 +799,7 @@ signature:: """) add_docstr(torch._C.FloatTensorBase.masked_copy_, -""" + """ masked_copy_(mask, source) Copies elements from :attr:`source` into this tensor at positions where the @@ -818,7 +818,7 @@ Args: """) add_docstr(torch._C.FloatTensorBase.masked_fill_, -""" + """ masked_fill_(mask, value) Fills elements of this tensor with :attr:`value` where :attr:`mask` is one. @@ -831,84 +831,84 @@ Args: """) add_docstr(torch._C.FloatTensorBase.masked_select, -""" + """ masked_select(mask) -> Tensor See :func:`torch.masked_select` """) add_docstr(torch._C.FloatTensorBase.max, -""" + """ max(dim=None) -> float or (Tensor, Tensor) See :func:`torch.max` """) add_docstr(torch._C.FloatTensorBase.mean, -""" + """ mean(dim=None) -> float or (Tensor, Tensor) See :func:`torch.mean` """) add_docstr(torch._C.FloatTensorBase.median, -""" + """ median(dim=-1, values=None, indices=None) -> (Tensor, LongTensor) See :func:`torch.median` """) add_docstr(torch._C.FloatTensorBase.min, -""" + """ min(dim=None) -> float or (Tensor, Tensor) See :func:`torch.min` """) add_docstr(torch._C.FloatTensorBase.mm, -""" + """ mm(mat2) -> Tensor See :func:`torch.mm` """) add_docstr(torch._C.FloatTensorBase.mode, -""" + """ mode(dim=-1, values=None, indices=None) -> (Tensor, LongTensor) See :func:`torch.mode` """) add_docstr(torch._C.FloatTensorBase.mul, -""" + """ mul(value) -> Tensor See :func:`torch.mul` """) add_docstr(torch._C.FloatTensorBase.mul_, -""" + """ mul_(value) In-place version of :meth:`~Tensor.mul` """) add_docstr(torch._C.FloatTensorBase.multinomial, -""" + """ multinomial(generator=None, num_samples, replacement=False) See :func:`torch.multinomial` """) add_docstr(torch._C.FloatTensorBase.mv, -""" + """ mv(vec) -> Tensor See :func:`torch.mv` """) add_docstr(torch._C.FloatTensorBase.narrow, -""" + """ narrow(dimension, start, length) -> Tensor Returns a new tensor that is a narrowed version of this tensor. The dimension @@ -934,63 +934,63 @@ Example: """) add_docstr(torch._C.FloatTensorBase.ndimension, -""" + """ ndimension() -> int Alias for :meth:`~Tensor.dim()` """) add_docstr(torch._C.FloatTensorBase.ne, -""" + """ ne(other) -> Tensor See :func:`torch.ne` """) add_docstr(torch._C.FloatTensorBase.ne_, -""" + """ ne_(other) -> Tensor In-place version of :meth:`~Tensor.ne` """) add_docstr(torch._C.FloatTensorBase.neg, -""" + """ neg() -> Tensor See :func:`torch.neg` """) add_docstr(torch._C.FloatTensorBase.neg_, -""" + """ neg_() -> Tensor In-place version of :meth:`~Tensor.neg` """) add_docstr(torch._C.FloatTensorBase.nelement, -""" + """ nelement() -> int Alias for :meth:`~Tensor.numel` """) add_docstr(torch._C.FloatTensorBase.nonzero, -""" + """ nonzero() -> LongTensor See :func:`torch.nonzero` """) add_docstr(torch._C.FloatTensorBase.norm, -""" + """ norm(p=2) -> float See :func:`torch.norm` """) add_docstr(torch._C.FloatTensorBase.normal_, -""" + """ normal_(generator=None, mean=0, var=1) Fills this tensor with elements samples from the normal distribution @@ -998,14 +998,14 @@ parameterized by :attr:`mean` and :attr:`var`. """) add_docstr(torch._C.FloatTensorBase.numel, -""" + """ numel() -> int See :func:`torch.numel` """) add_docstr(torch._C.FloatTensorBase.numpy, -""" + """ numpy() -> ndarray Returns this tensor as a NumPy :class:`ndarray`. This tensor and the returned @@ -1014,77 +1014,77 @@ be reflected in the :class:`ndarray` and vice versa. """) add_docstr(torch._C.FloatTensorBase.orgqr, -""" + """ orgqr(input2) -> Tensor See :func:`torch.orgqr` """) add_docstr(torch._C.FloatTensorBase.ormqr, -""" + """ ormqr(input2, input3, left=True, transpose=False) -> Tensor See :func:`torch.ormqr` """) add_docstr(torch._C.FloatTensorBase.potrf, -""" + """ potrf(upper=True) -> Tensor See :func:`torch.potrf` """) add_docstr(torch._C.FloatTensorBase.potri, -""" + """ potri(upper=True) -> Tensor See :func:`torch.potri` """) add_docstr(torch._C.FloatTensorBase.potrs, -""" + """ potrs(input2, upper=True) -> Tensor See :func:`torch.potrs` """) add_docstr(torch._C.FloatTensorBase.pow, -""" + """ pow(exponent) See :func:`torch.pow` """) add_docstr(torch._C.FloatTensorBase.pow_, -""" + """ pow_(exponent) In-place version of :meth:`~Tensor.pow` """) add_docstr(torch._C.FloatTensorBase.prod, -""" + """ prod() -> float See :func:`torch.prod` """) add_docstr(torch._C.FloatTensorBase.pstrf, -""" + """ pstrf(upper=True, tol=-1) -> (Tensor, IntTensor) See :func:`torch.pstrf` """) add_docstr(torch._C.FloatTensorBase.qr, -""" + """ qr() -> (Tensor, Tensor) See :func:`torch.qr` """) add_docstr(torch._C.FloatTensorBase.random_, -""" + """ random_(generator=None, from=0, to=None) Fills this tensor with numbers sampled from the uniform distribution or @@ -1093,49 +1093,49 @@ defaults to the largest value representable by this tensor's data type. """) add_docstr(torch._C.FloatTensorBase.reciprocal, -""" + """ reciprocal() -> Tensor See :func:`torch.reciprocal` """) add_docstr(torch._C.FloatTensorBase.reciprocal_, -""" + """ reciprocal_() -> Tensor In-place version of :meth:`~Tensor.reciprocal` """) add_docstr(torch._C.FloatTensorBase.remainder, -""" + """ remainder(divisor) -> Tensor See :func:`torch.remainder` """) add_docstr(torch._C.FloatTensorBase.remainder_, -""" + """ remainder_(divisor) -> Tensor In-place version of :meth:`~Tensor.remainder` """) add_docstr(torch._C.FloatTensorBase.renorm, -""" + """ renorm(p, dim, maxnorm) -> Tensor See :func:`torch.renorm` """) add_docstr(torch._C.FloatTensorBase.renorm_, -""" + """ renorm_(p, dim, maxnorm) -> Tensor In-place version of :meth:`~Tensor.renorm` """) add_docstr(torch._C.FloatTensorBase.resize_, -""" + """ resize_(*sizes) Resizes this tensor to the specified size. If the number of elements is @@ -1157,7 +1157,7 @@ Example: """) add_docstr(torch._C.FloatTensorBase.resize_as_, -""" + """ resize_as_(tensor) Resizes the current tensor to be the same size as the specified tensor. This is @@ -1167,35 +1167,35 @@ equivalent to:: """) add_docstr(torch._C.FloatTensorBase.round, -""" + """ round() -> Tensor See :func:`torch.round` """) add_docstr(torch._C.FloatTensorBase.round_, -""" + """ round_() -> Tensor In-place version of :meth:`~Tensor.round` """) add_docstr(torch._C.FloatTensorBase.rsqrt, -""" + """ rsqrt() -> Tensor See :func:`torch.rsqrt` """) add_docstr(torch._C.FloatTensorBase.rsqrt_, -""" + """ rsqrt_() -> Tensor In-place version of :meth:`~Tensor.rsqrt` """) add_docstr(torch._C.FloatTensorBase.scatter_, -""" + """ scatter_(input, dim, index, src) -> Tensor Writes all values from the Tensor :attr:`src` into self at the indices specified @@ -1237,7 +1237,7 @@ Example:: """) add_docstr(torch._C.FloatTensorBase.select, -""" + """ select(dim, index) -> Tensor or number Slices the tensor along the selected dimension at the given index. If this @@ -1256,7 +1256,7 @@ Args: """) add_docstr(torch._C.FloatTensorBase.set_, -""" + """ set_(source=None, storage_offset=0, size=None, stride=None) Sets the underlying storage, size, and strides. If :attr:`source` is a tensor, @@ -1275,70 +1275,70 @@ Args: """) add_docstr(torch._C.FloatTensorBase.set_index, -""" + """ set_index(index, value) Alias for ``self[index] = value`` """) add_docstr(torch._C.FloatTensorBase.sigmoid, -""" + """ sigmoid() -> Tensor See :func:`torch.sigmoid` """) add_docstr(torch._C.FloatTensorBase.sigmoid_, -""" + """ sigmoid_() -> Tensor In-place version of :meth:`~Tensor.sigmoid` """) add_docstr(torch._C.FloatTensorBase.sign, -""" + """ sign() -> Tensor See :func:`torch.sign` """) add_docstr(torch._C.FloatTensorBase.sign_, -""" + """ sign_() -> Tensor In-place version of :meth:`~Tensor.sign` """) add_docstr(torch._C.FloatTensorBase.sin, -""" + """ sin() -> Tensor See :func:`torch.sin` """) add_docstr(torch._C.FloatTensorBase.sin_, -""" + """ sin_() -> Tensor In-place version of :meth:`~Tensor.sin` """) add_docstr(torch._C.FloatTensorBase.sinh, -""" + """ sinh() -> Tensor See :func:`torch.sinh` """) add_docstr(torch._C.FloatTensorBase.sinh_, -""" + """ sinh_() -> Tensor In-place version of :meth:`~Tensor.sinh` """) add_docstr(torch._C.FloatTensorBase.size, -""" + """ size() -> torch.Size Returns the size of the tensor. The returned value is a subclass of @@ -1350,56 +1350,56 @@ Example: """) add_docstr(torch._C.FloatTensorBase.sort, -""" + """ sort(dim=None, descending=False) -> (Tensor, LongTensor) See :func:`torch.sort` """) add_docstr(torch._C.FloatTensorBase.sqrt, -""" + """ sqrt() -> Tensor See :func:`torch.sqrt` """) add_docstr(torch._C.FloatTensorBase.sqrt_, -""" + """ sqrt_() -> Tensor In-place version of :meth:`~Tensor.sqrt` """) add_docstr(torch._C.FloatTensorBase.squeeze, -""" + """ squeeze(dim=None) See :func:`torch.squeeze` """) add_docstr(torch._C.FloatTensorBase.squeeze_, -""" + """ squeeze_(dim=None) In-place version of :meth:`~Tensor.squeeze` """) add_docstr(torch._C.FloatTensorBase.std, -""" + """ std() -> float See :func:`torch.std` """) add_docstr(torch._C.FloatTensorBase.storage, -""" + """ storage() -> torch.Storage Returns the underlying storage """) add_docstr(torch._C.FloatTensorBase.storage_offset, -""" + """ storage_offset() -> int Returns this tensor's offset in the underlying storage in terms of number of @@ -1414,14 +1414,14 @@ Example: """) add_docstr(torch._C.FloatTensorBase.stride, -""" + """ stride() -> tuple Returns the stride of the tensor. """) add_docstr(torch._C.FloatTensorBase.sub, -""" + """ sub(value, other) -> Tensor Subtracts a scalar or tensor from this tensor. If both :attr:`value` and @@ -1430,154 +1430,154 @@ Subtracts a scalar or tensor from this tensor. If both :attr:`value` and """) add_docstr(torch._C.FloatTensorBase.sub_, -""" + """ sub_(x) -> Tensor In-place version of :meth:`~Tensor.sub` """) add_docstr(torch._C.FloatTensorBase.sum, -""" + """ sum(dim=None) -> float See :func:`torch.sum` """) add_docstr(torch._C.FloatTensorBase.svd, -""" + """ svd(some=True) -> (Tensor, Tensor, Tensor) See :func:`torch.svd` """) add_docstr(torch._C.FloatTensorBase.symeig, -""" + """ symeig(eigenvectors=False, upper=True) -> (Tensor, Tensor) See :func:`torch.symeig` """) add_docstr(torch._C.FloatTensorBase.t, -""" + """ t() -> Tensor See :func:`torch.t` """) add_docstr(torch._C.FloatTensorBase.t_, -""" + """ t_() -> Tensor In-place version of :meth:`~Tensor.t` """) add_docstr(torch._C.FloatTensorBase.tan, -""" + """ tan() -> Tensor See :func:`torch.tan` """) add_docstr(torch._C.FloatTensorBase.tan_, -""" + """ tan_() -> Tensor In-place version of :meth:`~Tensor.tan` """) add_docstr(torch._C.FloatTensorBase.tanh, -""" + """ tanh() -> Tensor See :func:`torch.tanh` """) add_docstr(torch._C.FloatTensorBase.tanh_, -""" + """ tanh_() -> Tensor In-place version of :meth:`~Tensor.tanh` """) add_docstr(torch._C.FloatTensorBase.topk, -""" + """ topk(k, dim=None, largest=True, sorted=True) -> (Tensor, LongTensor) See :func:`torch.topk` """) add_docstr(torch._C.FloatTensorBase.trace, -""" + """ trace() -> float See :func:`torch.trace` """) add_docstr(torch._C.FloatTensorBase.transpose, -""" + """ transpose(dim0, dim1) -> Tensor See :func:`torch.transpose` """) add_docstr(torch._C.FloatTensorBase.transpose_, -""" + """ transpose_(dim0, dim1) -> Tensor In-place version of :meth:`~Tensor.transpose` """) add_docstr(torch._C.FloatTensorBase.tril, -""" + """ tril(k=0) -> Tensor See :func:`torch.tril` """) add_docstr(torch._C.FloatTensorBase.tril_, -""" + """ tril_(k=0) -> Tensor In-place version of :meth:`~Tensor.tril` """) add_docstr(torch._C.FloatTensorBase.triu, -""" + """ triu(k=0) -> Tensor See :func:`torch.triu` """) add_docstr(torch._C.FloatTensorBase.triu_, -""" + """ triu_(k=0) -> Tensor In-place version of :meth:`~Tensor.triu` """) add_docstr(torch._C.FloatTensorBase.trtrs, -""" + """ trtrs(A, upper=True, transpose=False, unitriangular=False) -> (Tensor, Tensor) See :func:`torch.trtrs` """) add_docstr(torch._C.FloatTensorBase.trunc, -""" + """ trunc() -> Tensor See :func:`torch.trunc` """) add_docstr(torch._C.FloatTensorBase.trunc_, -""" + """ trunc_() -> Tensor In-place version of :meth:`~Tensor.trunc` """) add_docstr(torch._C.FloatTensorBase.unfold, -""" + """ unfold(dim, size, step) -> Tensor Returns a tensor which contains all slices of size :attr:`size` in @@ -1629,7 +1629,7 @@ Example:: """) add_docstr(torch._C.FloatTensorBase.uniform_, -""" + """ uniform_(from=0, to=1) -> Tensor Fills this tensor with numbers sampled from the uniform distribution: @@ -1640,14 +1640,14 @@ Fills this tensor with numbers sampled from the uniform distribution: """) add_docstr(torch._C.FloatTensorBase.var, -""" + """ var() -> float See :func:`torch.var` """) add_docstr(torch._C.FloatTensorBase.zero_, -""" + """ zero_() Fills this tensor with zeros. diff --git a/torch/_tensor_str.py b/torch/_tensor_str.py index 3909f0989c..1ccf46e5b3 100644 --- a/torch/_tensor_str.py +++ b/torch/_tensor_str.py @@ -22,7 +22,7 @@ def set_printoptions( edgeitems=None, linewidth=None, profile=None, - ): +): """Set options for printing. Items shamelessly taken from Numpy Args: @@ -119,7 +119,7 @@ def _number_format(tensor, min_sz=-1): else: if exp_max > prec + 1 or exp_max < 0: sz = max(min_sz, 7) - scale = math.pow(10, exp_max-1) + scale = math.pow(10, exp_max - 1) else: if exp_max == 0: sz = 7 @@ -132,19 +132,19 @@ def _number_format(tensor, min_sz=-1): def _tensor_str(self): n = PRINT_OPTS.edgeitems - has_hdots = self.size()[-1] > 2*n - has_vdots = self.size()[-2] > 2*n + has_hdots = self.size()[-1] > 2 * n + has_vdots = self.size()[-2] > 2 * n print_full_mat = not has_hdots and not has_vdots formatter = _number_format(self, min_sz=3 if not print_full_mat else 0) print_dots = self.numel() >= PRINT_OPTS.threshold dim_sz = max(2, max(len(str(x)) for x in self.size())) dim_fmt = "{:^" + str(dim_sz) + "}" - dot_fmt = u"{:^" + str(dim_sz+1) + "}" + dot_fmt = u"{:^" + str(dim_sz + 1) + "}" counter_dim = self.ndimension() - 2 counter = torch.LongStorage(counter_dim).fill_(0) - counter[counter.size()-1] = -1 + counter[counter.size() - 1] = -1 finished = False strt = '' while True: @@ -152,7 +152,7 @@ def _tensor_str(self): nskipped = [False for i in counter] for i in _range(counter_dim - 1, -1, -1): counter[i] += 1 - if print_dots and counter[i] == n and self.size(i) > 2*n: + if print_dots and counter[i] == n and self.size(i) > 2 * n: counter[i] = self.size(i) - n nskipped[i] = True if counter[i] == self.size(i): @@ -188,18 +188,18 @@ def __repr_row(row, indent, fmt, scale, sz, truncate=None): if truncate is not None: dotfmt = " {:^5} " return (indent + - ' '.join(fmt.format(val/scale) for val in row[:truncate]) + + ' '.join(fmt.format(val / scale) for val in row[:truncate]) + dotfmt.format('...') + - ' '.join(fmt.format(val/scale) for val in row[-truncate:]) + + ' '.join(fmt.format(val / scale) for val in row[-truncate:]) + '\n') else: - return indent + ' '.join(fmt.format(val/scale) for val in row) + '\n' + return indent + ' '.join(fmt.format(val / scale) for val in row) + '\n' def _matrix_str(self, indent='', formatter=None, force_truncate=False): n = PRINT_OPTS.edgeitems - has_hdots = self.size(1) > 2*n - has_vdots = self.size(0) > 2*n + has_hdots = self.size(1) > 2 * n + has_vdots = self.size(0) > 2 * n print_full_mat = not has_hdots and not has_vdots if formatter is None: @@ -207,14 +207,14 @@ def _matrix_str(self, indent='', formatter=None, force_truncate=False): min_sz=5 if not print_full_mat else 0) else: fmt, scale, sz = formatter - nColumnPerLine = int(math.floor((PRINT_OPTS.linewidth-len(indent))/(sz+1))) + nColumnPerLine = int(math.floor((PRINT_OPTS.linewidth - len(indent)) / (sz + 1))) strt = '' firstColumn = 0 if not force_truncate and \ (self.numel() < PRINT_OPTS.threshold or print_full_mat): while firstColumn < self.size(1): - lastColumn = min(firstColumn + nColumnPerLine - 1, self.size(1)-1) + lastColumn = min(firstColumn + nColumnPerLine - 1, self.size(1) - 1) if nColumnPerLine < self.size(1): strt += '\n' if firstColumn != 1 else '' strt += 'Columns {} to {} \n{}'.format( @@ -223,15 +223,15 @@ def _matrix_str(self, indent='', formatter=None, force_truncate=False): strt += SCALE_FORMAT.format(scale) for l in _range(self.size(0)): strt += indent + (' ' if scale != 1 else '') - row_slice = self[l, firstColumn:lastColumn+1] - strt += ' '.join(fmt.format(val/scale) for val in row_slice) + row_slice = self[l, firstColumn:lastColumn + 1] + strt += ' '.join(fmt.format(val / scale) for val in row_slice) strt += '\n' firstColumn = lastColumn + 1 else: if scale != 1: strt += SCALE_FORMAT.format(scale) if has_vdots and has_hdots: - vdotfmt = "{:^" + str((sz+1)*n-1) + "}" + vdotfmt = "{:^" + str((sz + 1) * n - 1) + "}" ddotfmt = u"{:^5}" for row in self[:n]: strt += __repr_row(row, indent, fmt, scale, sz, n) @@ -245,8 +245,8 @@ def _matrix_str(self, indent='', formatter=None, force_truncate=False): strt += __repr_row(row, indent, fmt, scale, sz, n) elif has_vdots and not has_hdots: vdotfmt = u"{:^" + \ - str(len(__repr_row(self[0], '', fmt, scale, sz))) + \ - "}\n" + str(len(__repr_row(self[0], '', fmt, scale, sz))) + \ + "}\n" for row in self[:n]: strt += __repr_row(row, indent, fmt, scale, sz) strt += vdotfmt.format(u'\u22EE') @@ -269,13 +269,13 @@ def _vector_str(self): ident = ' ' if self.numel() < PRINT_OPTS.threshold: return (strt + - '\n'.join(ident + fmt.format(val/scale) for val in self) + + '\n'.join(ident + fmt.format(val / scale) for val in self) + '\n') else: return (strt + - '\n'.join(ident + fmt.format(val/scale) for val in self[:n]) + + '\n'.join(ident + fmt.format(val / scale) for val in self[:n]) + '\n' + (ident + dotfmt.format(u"\u22EE")) + - '\n'.join(ident + fmt.format(val/scale) for val in self[-n:]) + + '\n'.join(ident + fmt.format(val / scale) for val in self[-n:]) + '\n') @@ -295,4 +295,3 @@ def _str(self): strt += '[{} of size {}{}]\n'.format(torch.typename(self), size_str, device_str) return '\n' + strt - diff --git a/torch/_thnn/__init__.py b/torch/_thnn/__init__.py index 97474692eb..dd41e47a27 100644 --- a/torch/_thnn/__init__.py +++ b/torch/_thnn/__init__.py @@ -2,7 +2,9 @@ import threading import torch.cuda from .utils import THNN_H_PATH, THCUNN_H_PATH, parse_header, load_backend + class Backends(object): + def __init__(self): self.backends = {} @@ -14,6 +16,7 @@ class Backends(object): class Backend(object): + def __init__(self, lib_prefix, lib_name, functions, mixins=tuple()): self.lib_prefix = lib_prefix self.lib_name = lib_name @@ -32,11 +35,12 @@ class Backend(object): with self.loading_lock: if self.backend is None: self.backend = load_backend(self.lib_prefix, self.lib_name, - self.functions, self.mixins) + self.functions, self.mixins) return self.backend class THNNCudaBackendStateMixin(object): + @property def library_state(self): return torch.cuda._state_cdata diff --git a/torch/_thnn/utils.py b/torch/_thnn/utils.py index c62fc2a29e..66d527a704 100644 --- a/torch/_thnn/utils.py +++ b/torch/_thnn/utils.py @@ -12,6 +12,7 @@ def _unpickle_backend(backend_name): class THNNBackendBase(object): + def __init__(self): self.methods = {} @@ -33,6 +34,7 @@ class THNNBackendBase(object): class Function(object): + def __init__(self, name): self.name = name self.arguments = [] @@ -46,6 +48,7 @@ class Function(object): class Argument(object): + def __init__(self, _type, name, is_optional): self.type = _type self.name = name diff --git a/torch/_torch_docs.py b/torch/_torch_docs.py index ed46cc8ab8..8641c188a9 100644 --- a/torch/_torch_docs.py +++ b/torch/_torch_docs.py @@ -4,7 +4,7 @@ import torch._C from torch._C import _add_docstr as add_docstr add_docstr(torch._C.abs, -"""abs(input, out=None) -> Tensor + """abs(input, out=None) -> Tensor Computes the element-wise absolute value of the given :attr:`input` a tensor. @@ -15,7 +15,7 @@ Example:: """) add_docstr(torch._C.acos, -""" + """ acos(input, out=None) -> Tensor Returns a new `Tensor` with the arccosine of the elements of :attr:`input`. @@ -44,7 +44,7 @@ Example:: """) add_docstr(torch._C.add, -""" + """ .. function:: add(input, value, out=None) Adds the scalar :attr:`value` to each element of the input :attr:`input` @@ -127,7 +127,7 @@ Example:: """) add_docstr(torch._C.addbmm, -""" + """ addbmm(beta=1, mat, alpha=1, batch1, batch2, out=None) -> Tensor Performs a batch matrix-matrix product of matrices stored @@ -167,7 +167,7 @@ Example:: """) add_docstr(torch._C.addcdiv, -""" + """ addcdiv(tensor, value=1, tensor1, tensor2, out=None) -> Tensor Performs the element-wise division of :attr:`tensor1` by :attr:`tensor2`, @@ -195,7 +195,7 @@ Example:: """) add_docstr(torch._C.addcmul, -""" + """ addcmul(tensor, value=1, tensor1, tensor2, out=None) -> Tensor Performs the element-wise multiplication of :attr:`tensor1` @@ -224,7 +224,7 @@ Example:: """) add_docstr(torch._C.addmm, -""" + """ addmm(beta=1, mat, alpha=1, mat1, mat2, out=None) -> Tensor Performs a matrix multiplication of the matrices :attr:`mat1` and :attr:`mat2`. @@ -259,7 +259,7 @@ Example:: """) add_docstr(torch._C.addmv, -""" + """ addmv(beta=1, tensor, alpha=1, mat, vec, out=None) -> Tensor Performs a matrix-vector product of the matrix :attr:`mat` and @@ -296,7 +296,7 @@ Example:: """) add_docstr(torch._C.addr, -r""" + r""" addr(beta=1, mat, alpha=1, vec1, vec2, out=None) -> Tensor Performs the outer-product of vectors :attr:`vec1` and :attr:`vec2` @@ -332,7 +332,7 @@ Example:: """) add_docstr(torch._C.asin, -""" + """ asin(input, out=None) -> Tensor Returns a new `Tensor` with the arcsine of the elements of :attr:`input`. @@ -360,7 +360,7 @@ Example:: """) add_docstr(torch._C.atan, -""" + """ atan(input, out=None) -> Tensor Returns a new `Tensor` with the arctangent of the elements of :attr:`input`. @@ -388,7 +388,7 @@ Example:: """) add_docstr(torch._C.atan2, -""" + """ atan2(input1, input2, out=None) -> Tensor Returns a new `Tensor` with the arctangent of the elements of :attr:`input1` @@ -418,7 +418,7 @@ Example:: """) add_docstr(torch._C.baddbmm, -r""" + r""" baddbmm(beta=1, mat, alpha=1, batch1, batch2, out=None) -> Tensor Performs a batch matrix-matrix product of matrices in :attr:`batch1` @@ -452,7 +452,7 @@ Example:: """) add_docstr(torch._C.bernoulli, -""" + """ bernoulli(input, out=None) -> Tensor Draws binary random numbers (0 or 1) from a bernoulli distribution. @@ -508,7 +508,7 @@ Example:: """) add_docstr(torch._C.bmm, -""" + """ bmm(batch1, batch2, out=None) -> Tensor Performs a batch matrix-matrix product of matrices stored in :attr:`batch1` and :attr:`batch2`. @@ -533,7 +533,7 @@ Example:: """) add_docstr(torch._C.cat, -""" + """ cat(inputs, dimension=0) -> Tensor Concatenates the given sequence of :attr:`inputs` Tensors in the given dimension. @@ -574,7 +574,7 @@ Example:: """) add_docstr(torch._C.ceil, -""" + """ ceil(input, out=None) -> Tensor Returns a new `Tensor` with the ceil of the elements of :attr:`input`, the smallest integer greater than or equal to each element. @@ -605,7 +605,7 @@ Example:: """) add_docstr(torch._C.reciprocal, -""" + """ reciprocal(input, out=None) -> Tensor Returns a new `Tensor` with the reciprocal of the elements of :attr:`input`, i.e. :math:`1.0 / x` @@ -636,7 +636,7 @@ Example:: """) add_docstr(torch._C.clamp, -""" + """ clamp(input, min, max, out=None) -> Tensor Clamp all elements in :attr:`input` into the range `[min, max]` and return a resulting Tensor. @@ -731,7 +731,7 @@ Example:: """) add_docstr(torch._C.cos, -""" + """ cos(input, out=None) -> Tensor Returns a new `Tensor` with the cosine of the elements of :attr:`input`. @@ -759,7 +759,7 @@ Example:: """) add_docstr(torch._C.cosh, -""" + """ cosh(input, out=None) -> Tensor Returns a new `Tensor` with the hyperbolic cosine of the elements of :attr:`input`. @@ -787,7 +787,7 @@ Example:: """) add_docstr(torch._C.cross, -""" + """ cross(input, other, dim=-1, out=None) -> Tensor @@ -841,7 +841,7 @@ Example:: """) add_docstr(torch._C.cumprod, -""" + """ cumprod(input, dim, out=None) -> Tensor Returns the cumulative product of elements of :attr:`input` in the dimension :attr:`dim`. @@ -903,7 +903,7 @@ Example:: """) add_docstr(torch._C.cumsum, -""" + """ cumsum(input, dim, out=None) -> Tensor Returns the cumulative sum of elements of :attr:`input` in the dimension :attr:`dim`. @@ -951,7 +951,7 @@ Example:: """) add_docstr(torch._C.diag, -""" + """ diag(input, diagonal=0, out=None) -> Tensor - If :attr:`input` is a vector (1D Tensor), then returns a 2D square Tensor with the elements of :attr:`input` as the diagonal. @@ -1022,7 +1022,7 @@ Get the k-th diagonal of a given matrix:: """) add_docstr(torch._C.dist, -""" + """ dist(input, other, p=2, out=None) -> Tensor Returns the p-norm of (:attr:`input` - :attr:`other`) @@ -1066,7 +1066,7 @@ Example:: """) add_docstr(torch._C.div, -""" + """ .. function:: div(input, value, out=None) Divides each element of the input :attr:`input` with the scalar :attr:`value` and returns a new resulting tensor. @@ -1150,7 +1150,7 @@ Example:: """) add_docstr(torch._C.dot, -""" + """ dot(tensor1, tensor2) -> float Computes the dot product (inner product) of two tensors. Both tensors are @@ -1163,7 +1163,7 @@ Example:: """) add_docstr(torch._C.eig, -""" + """ eig(a, eigenvectors=False, out=None) -> (Tensor, Tensor) Computes the eigenvalues and eigenvectors of a real square matrix. @@ -1183,7 +1183,7 @@ Returns: """) add_docstr(torch._C.eq, -""" + """ eq(input, other, out=None) -> Tensor Computes element-wise equality @@ -1208,7 +1208,7 @@ Example:: """) add_docstr(torch._C.equal, -""" + """ equal(tensor1, tensor2) -> bool True if two tensors have the same size and elements, False otherwise. @@ -1220,7 +1220,7 @@ Example:: """) add_docstr(torch._C.exp, -""" + """ exp(tensor, out=None) -> Tensor Computes the exponential of each element. @@ -1232,7 +1232,7 @@ Example:: """) add_docstr(torch._C.eye, -""" + """ eye(n, m=None, out=None) Returns a 2-D tensor with ones on the diagonal and zeros elsewhere. @@ -1255,7 +1255,7 @@ Example:: """) add_docstr(torch._C.floor, -""" + """ floor(input, out=None) -> Tensor Returns a new `Tensor` with the floor of the elements of :attr:`input`, the largest integer less than or equal to each element. @@ -1287,7 +1287,7 @@ Example:: """) add_docstr(torch._C.fmod, -""" + """ fmod(input, divisor, out=None) -> Tensor Computes the element-wise remainder of division. @@ -1315,7 +1315,7 @@ Example:: """) add_docstr(torch._C.frac, -""" + """ frac(tensor, out=None) -> Tensor Computes the fractional portion of each element in `tensor`. @@ -1327,7 +1327,7 @@ Example:: """) add_docstr(torch._C.from_numpy, -""" + """ from_numpy(ndarray) -> Tensor Creates a :class:`Tensor` from a :class:`numpy.ndarray`. @@ -1348,7 +1348,7 @@ Example:: """) add_docstr(torch._C.gather, -""" + """ gather(input, dim, index, out=None) -> Tensor Gathers values along an axis specified by `dim`. @@ -1375,7 +1375,7 @@ Example:: """) add_docstr(torch._C.ge, -""" + """ ge(input, other, out=None) -> Tensor Computes `tensor >= other` element-wise. @@ -1400,7 +1400,7 @@ Example:: """) add_docstr(torch._C.gels, -r""" + r""" gels(B, A, out=None) -> Tensor Computes the solution to the least squares and least norm problems for a full @@ -1466,7 +1466,7 @@ Example:: """) add_docstr(torch._C.geqrf, -r""" + r""" geqrf(input, out=None) -> (Tensor, Tensor) This is a low-level function for calling LAPACK directly. @@ -1489,7 +1489,7 @@ Args: """) add_docstr(torch._C.ger, -""" + """ ger(vec1, vec2, out=None) -> Tensor Outer product of :attr:`vec1` and :attr:`vec2`. If :attr:`vec1` is a vector of size `n` and :attr:`vec2` is a vector of size `m`, then :attr:`out` must be a matrix of size `n x m`. @@ -1513,7 +1513,7 @@ Example:: """) add_docstr(torch._C.gesv, -""" + """ gesv(B, A, out=None) -> (Tensor, Tensor) `X, LU = torch.gesv(B, A)` returns the solution to the system of linear @@ -1552,14 +1552,14 @@ Example:: """) add_docstr(torch._C.get_num_threads, -""" + """ get_num_threads() -> int Gets the number of OpenMP threads used for parallelizing CPU operations """) add_docstr(torch._C.gt, -""" + """ gt(input, other, out=None) -> Tensor Computes `tensor > other` element-wise. @@ -1584,7 +1584,7 @@ Example:: """) add_docstr(torch._C.histc, -""" + """ histc(input, bins=100, min=0, max=0, out=None) -> Tensor Computes the histogram of a tensor. @@ -1610,7 +1610,7 @@ Example:: """) add_docstr(torch._C.index_select, -""" + """ index_select(input, dim, index, out=None) -> Tensor Returns a new `Tensor` which indexes the :attr:`input` `Tensor` along dimension :attr:`dim` @@ -1653,7 +1653,7 @@ Example:: """) add_docstr(torch._C.inverse, -""" + """ inverse(input, out=None) -> Tensor Takes the inverse of the square matrix :attr:`input`. @@ -1704,7 +1704,7 @@ Example:: """) add_docstr(torch._C.kthvalue, -""" + """ kthvalue(input, k, dim=None, out=None) -> (Tensor, LongTensor) Returns the :attr:`k`th smallest element of the given :attr:`input` Tensor along a given dimension. @@ -1745,7 +1745,7 @@ Example:: """) add_docstr(torch._C.le, -""" + """ le(input, other, out=None) -> Tensor Computes `tensor <= other` element-wise. @@ -1770,7 +1770,7 @@ Example:: """) add_docstr(torch._C.lerp, -""" + """ lerp(start, end, weight, out=None) Does a linear interpolation of two tensors :attr:`start` and :attr:`end` based on a scalar :attr:`weight`: and returns the resulting :attr:`out` Tensor. @@ -1814,7 +1814,7 @@ Example:: """) add_docstr(torch._C.linspace, -""" + """ linspace(start, end, steps=100, out=None) -> Tensor Returns a one-dimensional Tensor of :attr:`steps` @@ -1860,7 +1860,7 @@ Example:: """) add_docstr(torch._C.log, -""" + """ log(input, out=None) -> Tensor Returns a new `Tensor` with the natural logarithm of the elements of :attr:`input`. @@ -1893,7 +1893,7 @@ Example:: """) add_docstr(torch._C.log1p, -""" + """ log1p(input, out=None) -> Tensor Returns a new `Tensor` with the natural logarithm of (1 + :attr:`input`). @@ -1930,7 +1930,7 @@ Example:: """) add_docstr(torch._C.logspace, -""" + """ logspace(start, end, steps=100, out=None) -> Tensor Returns a one-dimensional Tensor of :attr:`steps` points @@ -1967,7 +1967,7 @@ Example:: """) add_docstr(torch._C.lt, -""" + """ lt(input, other, out=None) -> Tensor Computes `tensor < other` element-wise. @@ -1992,7 +1992,7 @@ Example:: """) add_docstr(torch._C.masked_select, -""" + """ masked_select(input, mask, out=None) -> Tensor Returns a new 1D `Tensor` which indexes the :attr:`input` `Tensor` according to the binary mask :attr:`mask` which is a `ByteTensor`. @@ -2038,7 +2038,7 @@ Example:: """) add_docstr(torch._C.max, -""" + """ .. function:: max(input) -> float Returns the maximum value of all elements in the :attr:`input` Tensor. @@ -2144,7 +2144,7 @@ Example:: """) add_docstr(torch._C.mean, -""" + """ .. function:: mean(input) -> float Returns the mean value of all elements in the :attr:`input` Tensor. @@ -2197,7 +2197,7 @@ Example:: """) add_docstr(torch._C.median, -""" + """ median(input, dim=-1, values=None, indices=None) -> (Tensor, LongTensor) Returns the median value of each row of the :attr:`input` Tensor in the given dimension :attr:`dim`. @@ -2252,7 +2252,7 @@ Example:: """) add_docstr(torch._C.min, -""" + """ .. function:: min(input) -> float Returns the minimum value of all elements in the :attr:`input` Tensor. @@ -2357,7 +2357,7 @@ Example:: """) add_docstr(torch._C.mm, -""" + """ mm(mat1, mat2, out=None) -> Tensor Performs a matrix multiplication of the matrices :attr:`mat1` and :attr:`mat2`. @@ -2380,7 +2380,7 @@ Example:: """) add_docstr(torch._C.mode, -""" + """ mode(input, dim=-1, values=None, indices=None) -> (Tensor, LongTensor) Returns the mode value of each row of the :attr:`input` Tensor in the given dimension :attr:`dim`. @@ -2435,7 +2435,7 @@ Example:: """) add_docstr(torch._C.mul, -""" + """ .. function:: mul(input, value, out=None) Multiplies each element of the input :attr:`input` with the scalar :attr:`value` and returns a new resulting tensor. @@ -2508,7 +2508,7 @@ Example:: """) add_docstr(torch._C.multinomial, -u""" + u""" multinomial(input, num_samples, replacement=False, out=None) -> LongTensor Returns a Tensor where each row @@ -2562,7 +2562,7 @@ Example:: """) add_docstr(torch._C.mv, -""" + """ mv(mat, vec, out=None) -> Tensor Performs a matrix-vector product of the matrix :attr:`mat` and the vector :attr:`vec`. @@ -2585,7 +2585,7 @@ Example:: """) add_docstr(torch._C.ne, -""" + """ ne(input, other, out=None) -> Tensor Computes `tensor != other` element-wise. @@ -2610,7 +2610,7 @@ Example:: """) add_docstr(torch._C.neg, -""" + """ neg(input, out=None) -> Tensor Returns a new `Tensor` with the negative of the elements of :attr:`input`. @@ -2645,7 +2645,7 @@ Example:: """) add_docstr(torch._C.nonzero, -""" + """ nonzero(input, out=None) -> LongTensor Returns a tensor containing the indices of all non-zero elements of :attr:`input`. @@ -2681,7 +2681,7 @@ Example:: """) add_docstr(torch._C.norm, -""" + """ .. function:: norm(input, p=2) -> float Returns the p-norm of the :attr:`input` Tensor. @@ -2743,7 +2743,7 @@ Example:: """) add_docstr(torch._C.normal, -""" + """ .. function:: normal(means, stddevs, out=None) Returns a Tensor of random numbers drawn from separate normal distributions @@ -2825,7 +2825,7 @@ Example:: """) add_docstr(torch._C.numel, -""" + """ numel(input) -> int Returns the total number of elements in the :attr:`input` Tensor. @@ -2845,7 +2845,7 @@ Example:: """) add_docstr(torch._C.ones, -""" + """ ones(*sizes, out=None) -> Tensor Returns a Tensor filled with the scalar value `1`, with the shape defined @@ -2896,7 +2896,7 @@ Example:: # """) add_docstr(torch._C.pow, -""" + """ .. function:: pow(input, exponent, out=None) Takes the power of each element in :attr:`input` with :attr:`exponent` and returns a Tensor with the result. @@ -2991,7 +2991,7 @@ Example:: """) add_docstr(torch._C.prod, -""" + """ .. function:: prod(input) -> float Returns the product of all elements in the :attr:`input` Tensor. @@ -3049,7 +3049,7 @@ Example:: # """) add_docstr(torch._C.qr, -""" + """ qr(input, out=None) -> (Tensor, Tensor) Computes the QR decomposition of a matrix :attr:`input`: returns matrices @@ -3106,7 +3106,7 @@ Example:: """) add_docstr(torch._C.rand, -""" + """ rand(*sizes, out=None) -> Tensor Returns a Tensor filled with random numbers from a uniform distribution @@ -3137,7 +3137,7 @@ Example:: """) add_docstr(torch._C.randn, -""" + """ randn(*sizes, out=None) -> Tensor Returns a Tensor filled with random numbers from a normal distribution @@ -3168,7 +3168,7 @@ Example:: """) add_docstr(torch._C.randperm, -""" + """ randperm(n, out=None) -> LongTensor Returns a random permutation of integers from ``0`` to ``n - 1``. @@ -3188,7 +3188,7 @@ Example:: """) add_docstr(torch._C.range, -""" + """ range(start, end, step=1, out=None) -> Tensor returns a 1D Tensor of size :math:`floor((end - start) / step) + 1` with values @@ -3225,7 +3225,7 @@ Example:: """) add_docstr(torch._C.remainder, -""" + """ remainder(input, divisor, out=None) -> Tensor Computes the element-wise remainder of division. @@ -3253,7 +3253,7 @@ Example:: """) add_docstr(torch._C.renorm, -""" + """ renorm(input, p, dim, maxnorm, out=None) -> Tensor Returns a Tensor where each sub-tensor of :attr:`input` along dimension :attr:`dim` @@ -3290,7 +3290,7 @@ Example:: """) add_docstr(torch._C.round, -""" + """ round(input, out=None) -> Tensor Returns a new `Tensor` with each of the elements of :attr:`input` rounded to the closest integer. @@ -3321,7 +3321,7 @@ Example:: """) add_docstr(torch._C.rsqrt, -""" + """ rsqrt(input, out=None) -> Tensor Returns a new `Tensor` with the reciprocal of the square-root of each of the elements of :attr:`input`. @@ -3352,14 +3352,14 @@ Example:: """) add_docstr(torch._C.set_num_threads, -""" + """ set_num_threads(int) Sets the number of OpenMP threads used for parallelizing CPU operations """) add_docstr(torch._C.sigmoid, -""" + """ sigmoid(input, out=None) -> Tensor Returns a new `Tensor` with the sigmoid of the elements of :attr:`input`. @@ -3390,7 +3390,7 @@ Example:: """) add_docstr(torch._C.sign, -""" + """ sign(input, out=None) -> Tensor Returns a new `Tensor` with the sign of the elements of :attr:`input`. @@ -3420,7 +3420,7 @@ Example:: """) add_docstr(torch._C.sin, -""" + """ sin(input, out=None) -> Tensor Returns a new `Tensor` with the sine of the elements of :attr:`input`. @@ -3448,7 +3448,7 @@ Example:: """) add_docstr(torch._C.sinh, -""" + """ sinh(input, out=None) -> Tensor Returns a new `Tensor` with the hyperbolic sine of the elements of :attr:`input`. @@ -3476,7 +3476,7 @@ Example:: """) add_docstr(torch._C.sort, -""" + """ sort(input, dim=None, descending=False, out=None) -> (Tensor, LongTensor) Sorts the elements of the :attr:`input` Tensor along a given dimension in ascending order by value. @@ -3530,7 +3530,7 @@ Example:: """) add_docstr(torch._C.sqrt, -""" + """ sqrt(input, out=None) -> Tensor Returns a new `Tensor` with the square-root of the elements of :attr:`input`. @@ -3561,7 +3561,7 @@ Example:: """) add_docstr(torch._C.squeeze, -""" + """ squeeze(input, dim=None, out=None) Returns a `Tensor` with all the dimensions of :attr:`input` of size `1` removed. @@ -3599,7 +3599,7 @@ Example:: """) add_docstr(torch._C.std, -""" + """ .. function:: std(input) -> float Returns the standard-deviation of all elements in the :attr:`input` Tensor. @@ -3652,7 +3652,7 @@ Example:: """) add_docstr(torch._C.sum, -""" + """ .. function:: sum(input) -> float Returns the sum of all elements in the :attr:`input` Tensor. @@ -3705,7 +3705,7 @@ Example:: """) add_docstr(torch._C.svd, -""" + """ svd(input, some=True, out=None) -> (Tensor, Tensor, Tensor) `U, S, V = torch.svd(A)` returns the singular value decomposition of a @@ -3780,7 +3780,7 @@ Example:: """) add_docstr(torch._C.symeig, -""" + """ symeig(input, eigenvectors=False, upper=True, out=None) -> (Tensor, Tensor) `e, V = torch.symeig(input)` returns eigenvalues and eigenvectors @@ -3842,7 +3842,7 @@ Examples:: """) add_docstr(torch._C.t, -""" + """ t(input, out=None) -> Tensor Expects :attr:`input` to be a matrix (2D Tensor) and transposes dimensions 0 and 1. @@ -3872,7 +3872,7 @@ Example:: """) add_docstr(torch._C.tan, -""" + """ tan(input, out=None) -> Tensor Returns a new `Tensor` with the tangent of the elements of :attr:`input`. @@ -3900,7 +3900,7 @@ Example:: """) add_docstr(torch._C.tanh, -""" + """ tanh(input, out=None) -> Tensor Returns a new `Tensor` with the hyperbolic tangent of the elements of :attr:`input`. @@ -3928,7 +3928,7 @@ Example:: """) add_docstr(torch._C.topk, -""" + """ topk(input, k, dim=None, largest=True, sorted=True, out=None) -> (Tensor, LongTensor) Returns the :attr:`k` largest elements of the given :attr:`input` Tensor along a given dimension. @@ -3992,7 +3992,7 @@ Example:: """) add_docstr(torch._C.trace, -""" + """ trace(input) -> float Returns the sum of the elements of the diagonal of the input 2D matrix. @@ -4013,7 +4013,7 @@ Example:: """) add_docstr(torch._C.transpose, -""" + """ transpose(input, dim0, dim1, out=None) -> Tensor Returns a `Tensor` that is a transposed version of :attr:`input`. The given dimensions :attr:`dim0` and :attr:`dim1` are swapped. @@ -4044,7 +4044,7 @@ Example:: """) add_docstr(torch._C.tril, -""" + """ tril(input, k=0, out=None) -> Tensor Returns the lower triangular part of the matrix (2D Tensor) :attr:`input`, @@ -4097,7 +4097,7 @@ Example:: """) add_docstr(torch._C.triu, -""" + """ triu(input, k=0, out=None) -> Tensor Returns the upper triangular part of the matrix (2D Tensor) :attr:`input`, @@ -4155,7 +4155,7 @@ Example:: # """) add_docstr(torch._C.trunc, -""" + """ trunc(input, out=None) -> Tensor Returns a new `Tensor` with the truncated integer values of the elements of :attr:`input`. @@ -4186,7 +4186,7 @@ Example:: """) add_docstr(torch._C.var, -""" + """ .. function:: var(input) -> float Returns the variance of all elements in the :attr:`input` Tensor. @@ -4239,7 +4239,7 @@ Example:: """) add_docstr(torch._C.zeros, -""" + """ zeros(*sizes, out=None) -> Tensor Returns a Tensor filled with the scalar value `0`, with the shape defined diff --git a/torch/autograd/__init__.py b/torch/autograd/__init__.py index 6a7614dab9..7786a4ee8f 100644 --- a/torch/autograd/__init__.py +++ b/torch/autograd/__init__.py @@ -12,6 +12,7 @@ from .stochastic_function import StochasticFunction __all__ = ['Variable', 'Function', 'StochasticFunction', 'backward'] + def backward(variables, grad_variables, retain_variables=False): """Computes the sum of gradients of given variables w.r.t. graph leaves. @@ -37,6 +38,6 @@ def backward(variables, grad_variables, retain_variables=False): times. """ Variable._execution_engine.run_backward( - tuple(variables), tuple(grad_variables), retain_variables) + tuple(variables), tuple(grad_variables), retain_variables) assert torch._C._autograd_init() diff --git a/torch/autograd/_functions/__init__.py b/torch/autograd/_functions/__init__.py index 9f82344c28..2c07aafabd 100644 --- a/torch/autograd/_functions/__init__.py +++ b/torch/autograd/_functions/__init__.py @@ -5,4 +5,3 @@ from .reduce import * from .linalg import * from .blas import * from .stochastic import * - diff --git a/torch/autograd/_functions/basic_ops.py b/torch/autograd/_functions/basic_ops.py index c8ff8bcfbd..1a405efbc5 100644 --- a/torch/autograd/_functions/basic_ops.py +++ b/torch/autograd/_functions/basic_ops.py @@ -59,7 +59,7 @@ class Pow(Function): def backward(self, grad_output): a, b = self.saved_tensors - return grad_output.mul(b).mul_(a.pow(b-1)), grad_output.mul(a.pow(b)).mul_(a.log()) + return grad_output.mul(b).mul_(a.pow(b - 1)), grad_output.mul(a.pow(b)).mul_(a.log()) class AddConstant(InplaceFunction): @@ -174,7 +174,7 @@ class PowConstant(Function): return grad_output.mul(self.fw_result).mul_(math.log(self.constant)) else: a = self.saved_tensors[0] - return grad_output.mul(self.constant).mul_(a.pow(self.constant-1)) + return grad_output.mul(self.constant).mul_(a.pow(self.constant - 1)) class Negate(InplaceFunction): diff --git a/torch/autograd/_functions/blas.py b/torch/autograd/_functions/blas.py index 0f584b6044..5738c92458 100644 --- a/torch/autograd/_functions/blas.py +++ b/torch/autograd/_functions/blas.py @@ -25,7 +25,7 @@ class Addmm(_BlasBase): self.save_for_backward(matrix1, matrix2) output = self._get_output(add_matrix) return torch.addmm(self.alpha, add_matrix, self.beta, - matrix1, matrix2, out=output) + matrix1, matrix2, out=output) def backward(self, grad_output): matrix1, matrix2 = self.saved_tensors @@ -55,7 +55,7 @@ class Addbmm(_BlasBase): self.save_for_backward(batch1, batch2) output = self._get_output(add_matrix) return torch.addbmm(self.alpha, add_matrix, self.beta, - batch1, batch2, out=output) + batch1, batch2, out=output) def backward(self, grad_output): batch1, batch2 = self.saved_tensors @@ -68,8 +68,8 @@ class Addbmm(_BlasBase): if any(self.needs_input_grad[1:]): batch_grad_output = (grad_output - .unsqueeze(0) - .expand(batch1.size(0), batch1.size(1), batch2.size(2))) + .unsqueeze(0) + .expand(batch1.size(0), batch1.size(1), batch2.size(2))) if self.needs_input_grad[1]: grad_batch1 = torch.bmm(batch_grad_output, batch2.transpose(1, 2)) @@ -90,7 +90,7 @@ class Baddbmm(_BlasBase): self.save_for_backward(batch1, batch2) output = self._get_output(add_batch) return torch.baddbmm(self.alpha, add_batch, self.beta, - batch1, batch2, out=output) + batch1, batch2, out=output) def backward(self, grad_output): batch1, batch2 = self.saved_tensors @@ -120,7 +120,7 @@ class Addmv(_BlasBase): self.save_for_backward(matrix, vector) output = self._get_output(add_vector) return torch.addmv(self.alpha, add_vector, self.beta, - matrix, vector, out=output) + matrix, vector, out=output) def backward(self, grad_output): matrix, vector = self.saved_tensors @@ -150,7 +150,7 @@ class Addr(_BlasBase): self.save_for_backward(vector1, vector2) output = self._get_output(add_matrix) return torch.addr(self.alpha, add_matrix, self.beta, - vector1, vector2, out=output) + vector1, vector2, out=output) def backward(self, grad_output): vector1, vector2 = self.saved_tensors @@ -199,4 +199,3 @@ class Dot(Function): # TODO: trace # TODO: tril # TODO: triu - diff --git a/torch/autograd/_functions/linalg.py b/torch/autograd/_functions/linalg.py index 93c69062f8..7c610b56e7 100644 --- a/torch/autograd/_functions/linalg.py +++ b/torch/autograd/_functions/linalg.py @@ -42,4 +42,3 @@ class Triu(Function): return grad_output.triu(self.diagonal_idx) # TODO: trace - diff --git a/torch/autograd/_functions/pointwise.py b/torch/autograd/_functions/pointwise.py index c9b95628ae..cdc3db9e96 100644 --- a/torch/autograd/_functions/pointwise.py +++ b/torch/autograd/_functions/pointwise.py @@ -165,6 +165,7 @@ class Tan(Function): class Asin(Function): + def forward(self, i): self.save_for_backward(i) return i.asin() @@ -175,6 +176,7 @@ class Asin(Function): class Acos(Function): + def forward(self, i): self.save_for_backward(i) return i.acos() @@ -185,6 +187,7 @@ class Acos(Function): class Atan(Function): + def forward(self, i): self.save_for_backward(i) return i.atan() diff --git a/torch/autograd/_functions/reduce.py b/torch/autograd/_functions/reduce.py index 314ae3aef5..07253cc42f 100644 --- a/torch/autograd/_functions/reduce.py +++ b/torch/autograd/_functions/reduce.py @@ -4,6 +4,7 @@ from ..function import Function class _DimReduceFunction(Function): + def __init__(self, dim=None): super(_DimReduceFunction, self).__init__() self.dim = dim @@ -139,6 +140,7 @@ class Kthvalue(_SelectionFunction): class Norm(Function): + def __init__(self, norm_type=2, dim=None): super(Norm, self).__init__() self.norm_type = norm_type diff --git a/torch/autograd/_functions/stochastic.py b/torch/autograd/_functions/stochastic.py index 2290e35245..4a4eb64d2e 100644 --- a/torch/autograd/_functions/stochastic.py +++ b/torch/autograd/_functions/stochastic.py @@ -65,7 +65,7 @@ class Normal(StochasticFunction): output.mul_(stddevs) else: raise RuntimeError("Normal function requires specifying a common " - "stddev, or per-sample stddev") + "stddev, or per-sample stddev") output.add_(means) self.save_for_backward(output, means, stddevs) self.mark_non_differentiable(output) @@ -74,7 +74,7 @@ class Normal(StochasticFunction): def backward(self, reward): output, means, stddevs = self.saved_tensors grad_stddevs = None - grad_means = means - output # == -(output - means) + grad_means = means - output # == -(output - means) assert self.stddev is not None or stddevs is not None if self.stddev is not None: grad_means /= 1e-6 + self.stddev ** 2 @@ -88,4 +88,3 @@ class Normal(StochasticFunction): grad_means /= stddevs_sq grad_means *= reward return grad_means, grad_stddevs - diff --git a/torch/autograd/_functions/tensor.py b/torch/autograd/_functions/tensor.py index fa9d81167e..d52eac50a6 100644 --- a/torch/autograd/_functions/tensor.py +++ b/torch/autograd/_functions/tensor.py @@ -103,6 +103,7 @@ class View(Function): class Expand(Function): + def __init__(self, sizes): super(Expand, self).__init__() self.sizes = sizes @@ -110,8 +111,8 @@ class Expand(Function): def forward(self, i): self.expanded_dims = [dim for dim, (expanded, original) - in enumerate(zip(self.sizes, i.size())) - if expanded != original] + in enumerate(zip(self.sizes, i.size())) + if expanded != original] result = i.expand(*self.sizes) self.mark_shared_storage((i, result)) return result @@ -304,8 +305,8 @@ class Concat(Function): return torch.cat(inputs, self.dim) def backward(self, grad_output): - return tuple(grad_output.narrow(self.dim, end-size, size) for size, end - in zip(self.input_sizes, _accumulate(self.input_sizes))) + return tuple(grad_output.narrow(self.dim, end - size, size) for size, end + in zip(self.input_sizes, _accumulate(self.input_sizes))) class Resize(Function): @@ -318,11 +319,11 @@ class Resize(Function): def forward(self, tensor): if tensor.numel() != self.numel: raise RuntimeError(("requested resize to {} ({} elements in total), " - "but the given tensor has a size of {} ({} elements). " - "autograd's resize can only change the shape of a given " - "tensor, while preserving the number of elements. ").format( - 'x'.join(map(str, self.sizes)), self.numel, - 'x'.join(map(str, tensor.size())), tensor.numel())) + "but the given tensor has a size of {} ({} elements). " + "autograd's resize can only change the shape of a given " + "tensor, while preserving the number of elements. ").format( + 'x'.join(map(str, self.sizes)), self.numel, + 'x'.join(map(str, tensor.size())), tensor.numel())) self.input_sizes = tensor.size() result = tensor.new(tensor).resize_(*self.sizes) self.mark_shared_storage((tensor, result)) @@ -493,7 +494,7 @@ class Topk(_MultiSelectionFunction): self.sort = sort def forward(self, input): - dim = self.dim if self.dim is not None else input.dim()-1 + dim = self.dim if self.dim is not None else input.dim() - 1 self.args = (self.k, dim, self.largest, self.sort) return super(Topk, self).forward(input) diff --git a/torch/autograd/engine.py b/torch/autograd/engine.py index fd264bd85c..0865aca6df 100644 --- a/torch/autograd/engine.py +++ b/torch/autograd/engine.py @@ -71,8 +71,8 @@ class BasicEngine(object): else: if prev_fn.num_outputs != 1: raise RuntimeError("one of the function outputs " - "wasn't used - this is an error not, but " - "it's going to be fixed soon") + "wasn't used - this is an error not, but " + "it's going to be fixed soon") prev_grad = (d_prev_fn,) ready.appendleft((prev_fn, prev_grad)) else: diff --git a/torch/autograd/function.py b/torch/autograd/function.py index 74bc5024ac..e31d2758f8 100644 --- a/torch/autograd/function.py +++ b/torch/autograd/function.py @@ -154,9 +154,10 @@ def _nested_map(condition, fn): return type(obj)(_map(x) for x in obj) else: raise ValueError("NestedIOFunction doesn't know how to process " - "an input object of type " + torch.typename(obj)) + "an input object of type " + torch.typename(obj)) return _map + def _iter_filter(condition): def _iter(obj): if condition(obj): @@ -169,7 +170,7 @@ def _iter_filter(condition): yield var else: raise ValueError("NestedIOFunction doesn't know how to process " - "an input object of type " + torch.typename(obj)) + "an input object of type " + torch.typename(obj)) return _iter @@ -178,8 +179,10 @@ _iter_tensors = _iter_filter(torch.is_tensor) _iter_None_tensors = _iter_filter(lambda o: o is None or torch.is_tensor(o)) _map_variable_tensor = _nested_map(lambda o: isinstance(o, torch.autograd.Variable), lambda o: o.data) + def _map_tensor_fromiter(itr): - return _nested_map(lambda o: torch.is_tensor(o), lambda o: next(itr)) + return _nested_map(lambda o: torch.is_tensor(o), lambda o: next(itr)) + class NestedIOFunction(Function): diff --git a/torch/autograd/stochastic_function.py b/torch/autograd/stochastic_function.py index 74d598263a..cc81248727 100644 --- a/torch/autograd/stochastic_function.py +++ b/torch/autograd/stochastic_function.py @@ -2,6 +2,7 @@ from .function import Function _NOT_PROVIDED = object() + class StochasticFunction(Function): def __init__(self): @@ -10,7 +11,7 @@ class StochasticFunction(Function): def _do_backward(self, grad_output, retain_variables): if self.reward is _NOT_PROVIDED: raise RuntimeError("differentiating stochastic functions requires " - "providing a reward") + "providing a reward") result = super(StochasticFunction, self)._do_backward((self.reward,), retain_variables) if not retain_variables: self.reward = None @@ -18,4 +19,3 @@ class StochasticFunction(Function): def _reinforce(self, reward): self.reward = reward - diff --git a/torch/autograd/variable.py b/torch/autograd/variable.py index c48957a5a3..e03d8c1eef 100644 --- a/torch/autograd/variable.py +++ b/torch/autograd/variable.py @@ -72,12 +72,12 @@ class Variable(_C._VariableBase): if self.creator is not None: if value is False: hint = (" If you want to use a computed variable in a subgraph " - "that doesn't require differentiation use " - "var_no_grad = var.detach().") + "that doesn't require differentiation use " + "var_no_grad = var.detach().") else: hint = '' raise RuntimeError("you can only change requires_grad flags of " - "leaf variables." + hint) + "leaf variables." + hint) self._requires_grad = value def __getattr__(self, name): @@ -87,13 +87,13 @@ class Variable(_C._VariableBase): def __getitem__(self, key): if (isinstance(key, Variable) and - type(key.data).__name__ == 'ByteTensor'): + type(key.data).__name__ == 'ByteTensor'): return MaskedSelect()(self, key) return Index(key)(self) def __setitem__(self, key, value): if (isinstance(key, Variable) and - type(key.data).__name__ == 'ByteTensor'): + type(key.data).__name__ == 'ByteTensor'): if isinstance(value, Variable): return MaskedCopy(inplace=True)(self, key, value) else: @@ -107,9 +107,9 @@ class Variable(_C._VariableBase): def __deepcopy__(self, memo): if self.creator is not None: raise RuntimeError("Only Variables created explicitly by the user " - "(graph leaves) support the deepcopy protocol at the moment") + "(graph leaves) support the deepcopy protocol at the moment") result = type(self)(self.data.clone(), requires_grad=self.requires_grad, - volatile=self.volatile) + volatile=self.volatile) memo[id(self)] = result return result @@ -151,7 +151,8 @@ class Variable(_C._VariableBase): raise RuntimeError('calling backward on a volatile variable') if gradient is None and self.requires_grad: if self.data.numel() != 1: - raise RuntimeError('backward should be called only on a scalar (i.e. 1-element tensor) or with gradient w.r.t. the variable') + raise RuntimeError( + 'backward should be called only on a scalar (i.e. 1-element tensor) or with gradient w.r.t. the variable') gradient = self.data.new().resize_as_(self.data).fill_(1) self._execution_engine.run_backward((self,), (gradient,), retain_variables) @@ -219,7 +220,7 @@ class Variable(_C._VariableBase): """ if not isinstance(self.creator, StochasticFunction): raise RuntimeError("reinforce() can be only called on outputs " - "of stochastic functions") + "of stochastic functions") self.creator._reinforce(reward) def detach(self): @@ -392,7 +393,7 @@ class Variable(_C._VariableBase): def clamp(self, min=None, max=None): if min is None and max is None: raise ValueError("clamp requires specifying at least one of " - "min and max arguments") + "min and max arguments") elif min is None and max is not None: return CminConstant(max)(self) elif min is not None and max is None: @@ -503,7 +504,7 @@ class Variable(_C._VariableBase): def bmm(self, batch): output = Variable(self.data.new(self.data.size(0), self.data.size(1), - batch.data.size(2))) + batch.data.size(2))) return self._static_blas(Baddbmm, (output, 0, 1, self, batch), False) def mv(self, vector): @@ -622,7 +623,7 @@ class Variable(_C._VariableBase): if isinstance(sizes[0], torch.Size): if len(sizes) > 1: raise ValueError("expand expects a several ints or a single " - "torch.Size argument") + "torch.Size argument") sizes = sizes[0] return Expand(sizes)(self) @@ -641,7 +642,7 @@ class Variable(_C._VariableBase): def narrow(self, dim, start_index, length): index = tuple(slice(None, None) for _ in range(dim)) + \ - (slice(start_index, start_index+length),) + (slice(start_index, start_index + length),) return Index(index)(self) @@ -710,7 +711,7 @@ class Variable(_C._VariableBase): elif dim_self == 2 and dim_other == 2: return self.mm(other) raise ValueError("both arguments to __matmul__ need to be 1D or 2D, " - "but they are {}D and {}D".format(dim_self, dim_other)) + "but they are {}D and {}D".format(dim_self, dim_other)) def __div__(self, other): return self.div(other) diff --git a/torch/backends/cudnn/__init__.py b/torch/backends/cudnn/__init__.py index 4fda6123c3..8c2344ff81 100644 --- a/torch/backends/cudnn/__init__.py +++ b/torch/backends/cudnn/__init__.py @@ -20,6 +20,7 @@ elif sys.platform == 'darwin': else: libnames = [] + def _loadlib(): global lib loaded = False @@ -39,6 +40,7 @@ def _loadlib(): lib = None raise OSError("Could not load cuDNN") + def is_acceptable(tensor): if not enabled: return False @@ -58,13 +60,15 @@ def is_acceptable(tensor): return False if not _C.has_cudnn: warnings.warn("cuDNN library has been detected, but your pytorch " - "installation was compiled without support for it. You " - "might want to rebuild pytorch, making sure the library " - "is visible to the build system.") + "installation was compiled without support for it. You " + "might want to rebuild pytorch, making sure the library " + "is visible to the build system.") return False return True __cudnn_version = [] + + def version(): if not lib: raise RuntimeError("cuDNN not initialized") @@ -108,7 +112,9 @@ CUDNN_GRU = 3 CUDNN_LINEAR_INPUT = 0 CUDNN_SKIP_INPUT = 1 + class CuDNNHandle: + def __init__(self): ptr = ctypes.c_void_p() check_error(lib.cudnnCreate(ctypes.byref(ptr))) @@ -117,7 +123,9 @@ class CuDNNHandle: def __del__(self): check_error(lib.cudnnDestroy(self)) + class CuDNNError(RuntimeError): + def __init__(self, status): self.status = status msg = '{}: {}'.format(status, get_error_string(status)) @@ -125,6 +133,7 @@ class CuDNNError(RuntimeError): class TensorDescriptor(object): + def __init__(self): ptr = ctypes.c_void_p() check_error(lib.cudnnCreateTensorDescriptor(ctypes.byref(ptr))) @@ -147,6 +156,7 @@ class TensorDescriptor(object): class TensorDescriptorArray(object): + def __init__(self, N): self.ptrs = (ctypes.c_void_p * N)() for i in range(N): @@ -175,6 +185,7 @@ class TensorDescriptorArray(object): class ConvolutionDescriptor(object): + def __init__(self): ptr = ctypes.c_void_p() check_error(lib.cudnnCreateConvolutionDescriptor(ctypes.byref(ptr))) @@ -195,7 +206,9 @@ class ConvolutionDescriptor(object): def as_tuple(self): return (self._pad, self._stride) + class FilterDescriptor(object): + def __init__(self): ptr = ctypes.c_void_p() check_error(lib.cudnnCreateFilterDescriptor(ctypes.byref(ptr))) @@ -216,6 +229,7 @@ class FilterDescriptor(object): class DropoutDescriptor(object): + def __init__(self, handle, dropout, seed): ptr = ctypes.c_void_p() check_error(lib.cudnnCreateDropoutDescriptor(ctypes.byref(ptr))) @@ -241,10 +255,10 @@ class DropoutDescriptor(object): check_error(lib.cudnnDestroyDropoutDescriptor(self)) - class RNNDescriptor(object): + def __init__(self, hidden_size, num_layers, dropout_desc, input_mode, - bidirectional, mode, datatype): + bidirectional, mode, datatype): ptr = ctypes.c_void_p() check_error(lib.cudnnCreateRNNDescriptor(ctypes.byref(ptr))) self._as_parameter_ = ptr @@ -272,13 +286,16 @@ class ConvolutionAlgoPerf(ctypes.Structure): ("memory", ctypes.c_size_t), ] + def check_error(status): if status is not 0: raise CuDNNError(status) + def get_error_string(status): return lib.cudnnGetErrorString(status) + def get_handle(): if lib is None: _loadlib() @@ -296,11 +313,12 @@ _typemap = { } _sizeofmap = { - CUDNN_DATA_HALF : 2, - CUDNN_DATA_FLOAT : 4, - CUDNN_DATA_DOUBLE : 8, + CUDNN_DATA_HALF: 2, + CUDNN_DATA_FLOAT: 4, + CUDNN_DATA_DOUBLE: 8, } + def c_type(tensor): if isinstance(tensor, torch.cuda.HalfTensor): return ctypes.c_float @@ -311,10 +329,12 @@ def c_type(tensor): else: raise ValueError("unknown type '{}'".format(type(tensor))) + def int_array(itr): array_type = ctypes.c_int * len(itr) return array_type(*itr) + def descriptor(tensor, N=None): if N is not None: descriptor = TensorDescriptorArray(N) @@ -331,9 +351,11 @@ _autotuner_forward = {} _autotuner_backward_data = {} _autotuner_backward_filter = {} + def convolution_autotuner_key(idesc, weight_desc, conv_desc): return (idesc.as_tuple(), weight_desc.as_tuple(), conv_desc.as_tuple()) + def convolution_forward_algorithm(idesc, weight_desc, conv_desc, odesc): k = convolution_autotuner_key(idesc, weight_desc, conv_desc) if k in _autotuner_forward: @@ -360,15 +382,19 @@ def convolution_forward_algorithm(idesc, weight_desc, conv_desc, odesc): wlimit, ctypes.byref(fwd_alg))) return fwd_alg + def convolution_forward_workspace_size(*args): check_error(lib.cudnnGetConvolutionForwardWorkspaceSize(*args)) + def convolution_forward(*args): check_error(lib.cudnnConvolutionForward(*args)) + def convolution_backward_data(*args): return check_error(lib.cudnnConvolutionBackwardData(*args)) + def convolution_backward_data_algorithm(weight_desc, odesc, conv_desc, idesc): k = convolution_autotuner_key(idesc, weight_desc, conv_desc) if k in _autotuner_backward_data: @@ -395,12 +421,15 @@ def convolution_backward_data_algorithm(weight_desc, odesc, conv_desc, idesc): wlimit, ctypes.byref(bwd_data_alg))) return bwd_data_alg + def convolution_backward_data_workspace_size(*args): return check_error(lib.cudnnGetConvolutionBackwardDataWorkspaceSize(*args)) + def convolution_backward_filter(*args): return check_error(lib.cudnnConvolutionBackwardFilter(*args)) + def convolution_backward_filter_algorithm(idesc, odesc, conv_desc, weight_desc): k = convolution_autotuner_key(idesc, weight_desc, conv_desc) if k in _autotuner_backward_filter: @@ -427,11 +456,14 @@ def convolution_backward_filter_algorithm(idesc, odesc, conv_desc, weight_desc): wlimit, ctypes.byref(bwd_filter_alg))) return bwd_filter_alg + def convolution_backward_filter_workspace_size(*args): return check_error(lib.cudnnGetConvolutionBackwardFilterWorkspaceSize(*args)) + def convolution_backward_bias(*args): check_error(lib.cudnnConvolutionBackwardBias(*args)) + def add_tensor(*args): check_error(lib.cudnnAddTensor(*args)) diff --git a/torch/backends/cudnn/rnn.py b/torch/backends/cudnn/rnn.py index 19ec7a288a..9d3cfce57d 100644 --- a/torch/backends/cudnn/rnn.py +++ b/torch/backends/cudnn/rnn.py @@ -3,6 +3,7 @@ import torch.backends.cudnn as cudnn from torch.backends.cudnn import check_error import ctypes + def get_cudnn_mode(mode): if mode == 'RNN_RELU': return cudnn.CUDNN_RNN_RELU @@ -17,9 +18,10 @@ def get_cudnn_mode(mode): class Unserializable(object): + def __init__(self, inner): self.inner = inner - + def get(self): return self.inner @@ -39,6 +41,7 @@ def init_dropout_descriptor(fn, handle): fn.dropout_seed ) + def init_rnn_descriptor(fn): return cudnn.RNNDescriptor( fn.hidden_size, @@ -161,7 +164,6 @@ def get_parameters(fn, handle, weight_buf): cur_offset = offset + filter_dim_a[0] - params.append(layer_params) return params @@ -237,7 +239,7 @@ def forward(fn, input, hx, weight, output, hy): if tuple(hx.size()) != hidden_size: raise RuntimeError('Expected hidden size {}, got {}'.format( - hidden_size, tuple(hx.size()))) + hidden_size, tuple(hx.size()))) if cx is not None and tuple(cx.size()) != hidden_size: raise RuntimeError('Expected cell size {}, got {}'.format( hidden_size, tuple(cx.size()))) @@ -295,7 +297,6 @@ def forward(fn, input, hx, weight, output, hy): output = output.transpose_(0, 1) - def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_input, grad_hx): with torch.cuda.device_of(input): handle = cudnn.get_handle() diff --git a/torch/cuda/__init__.py b/torch/cuda/__init__.py index cd947b5b24..fab8cbc36f 100644 --- a/torch/cuda/__init__.py +++ b/torch/cuda/__init__.py @@ -51,9 +51,9 @@ def _load_cudart(): except OSError: pass raise RuntimeError("couldn't find libcudart. Make sure CUDA libraries " - "are installed in a default location, or that they're in " + - ("DYLD_LIBRARY_PATH" if system == 'Darwin' else "LD_LIBRARY_PATH") + - ".") + "are installed in a default location, or that they're in " + + ("DYLD_LIBRARY_PATH" if system == 'Darwin' else "LD_LIBRARY_PATH") + + ".") def _check_driver(): @@ -259,67 +259,112 @@ class _CudaBase(object): class DoubleStorage(_CudaBase, torch._C.CudaDoubleStorageBase, _StorageBase): pass + + class FloatStorage(_CudaBase, torch._C.CudaFloatStorageBase, _StorageBase): pass + + class LongStorage(_CudaBase, torch._C.CudaLongStorageBase, _StorageBase): pass + + class IntStorage(_CudaBase, torch._C.CudaIntStorageBase, _StorageBase): pass + + class ShortStorage(_CudaBase, torch._C.CudaShortStorageBase, _StorageBase): pass + + class CharStorage(_CudaBase, torch._C.CudaCharStorageBase, _StorageBase): pass + + class ByteStorage(_CudaBase, torch._C.CudaByteStorageBase, _StorageBase): pass + + class HalfStorage(_CudaBase, torch._C.CudaHalfStorageBase, _StorageBase): pass + class DoubleTensor(_CudaBase, torch._C.CudaDoubleTensorBase, _TensorBase): + def is_signed(self): return True + @classmethod def storage_type(cls): return DoubleStorage + + class FloatTensor(_CudaBase, torch._C.CudaFloatTensorBase, _TensorBase): + def is_signed(self): return True + @classmethod def storage_type(cls): return FloatStorage + + class LongTensor(_CudaBase, torch._C.CudaLongTensorBase, _TensorBase): + def is_signed(self): return True + @classmethod def storage_type(cls): return LongStorage + + class IntTensor(_CudaBase, torch._C.CudaIntTensorBase, _TensorBase): + def is_signed(self): return True + @classmethod def storage_type(cls): return IntStorage + + class ShortTensor(_CudaBase, torch._C.CudaShortTensorBase, _TensorBase): + def is_signed(self): return True + @classmethod def storage_type(cls): return ShortStorage + + class CharTensor(_CudaBase, torch._C.CudaCharTensorBase, _TensorBase): + def is_signed(self): # TODO return False + @classmethod def storage_type(cls): return CharStorage + + class ByteTensor(_CudaBase, torch._C.CudaByteTensorBase, _TensorBase): + def is_signed(self): return False + @classmethod def storage_type(cls): return ByteStorage + + class HalfTensor(_CudaBase, torch._C.CudaHalfTensorBase, _TensorBase): + def is_signed(self): return True + @classmethod def storage_type(): return HalfStorage diff --git a/torch/cuda/comm.py b/torch/cuda/comm.py index d0607152bf..e6650a7bb6 100644 --- a/torch/cuda/comm.py +++ b/torch/cuda/comm.py @@ -4,6 +4,7 @@ from torch._utils import _accumulate # TODO: sync streams when implemented + def broadcast(tensor, devices): """Broadcasts a tensor to a number of GPUs. diff --git a/torch/cuda/nccl.py b/torch/cuda/nccl.py index 78c1a01c92..5e9de84709 100644 --- a/torch/cuda/nccl.py +++ b/torch/cuda/nccl.py @@ -92,6 +92,7 @@ nccl_types = { class NcclError(RuntimeError): + def __init__(self, status): self.status = status msg = '{0} ({1})'.format(status_codes.get(status), status) @@ -103,6 +104,7 @@ class NcclComm(ctypes.c_void_p): class NcclCommList(object): + def __init__(self, devices): self.devices = devices ptrs = (NcclComm * len(devices))() @@ -141,7 +143,7 @@ def communicator(inputs, outputs=None): def cudaStream(): # TODO: return the current stream - #ffi.C.THCState_getCurrentStream(cutorch.getState()) + # ffi.C.THCState_getCurrentStream(cutorch.getState()) return None @@ -202,7 +204,7 @@ def all_gather(inputs, outputs): def reduce_scatter(inputs, outputs, op=SUM): - _check_inputs(inputs, outputs, 1.0/len(inputs)) + _check_inputs(inputs, outputs, 1.0 / len(inputs)) comm = communicator(inputs, outputs) count = inputs[0].numel() // len(inputs) data_type = nccl_types[inputs[0].type()] diff --git a/torch/cuda/random.py b/torch/cuda/random.py index 1b4bc54fbc..8d49587c1d 100644 --- a/torch/cuda/random.py +++ b/torch/cuda/random.py @@ -35,4 +35,3 @@ def seed_all(): def initial_seed(): _lazy_init() return _C._cuda_initialSeed() - diff --git a/torch/cuda/streams.py b/torch/cuda/streams.py index a4cca7fe15..042c3e0cc7 100644 --- a/torch/cuda/streams.py +++ b/torch/cuda/streams.py @@ -8,6 +8,7 @@ ERROR_NOT_READY = 34 class CudaError(RuntimeError): + def __init__(self, code): msg = cudart().cudaGetErrorString(code).decode('utf-8') super(CudaError, self).__init__('{0} ({1})'.format(msg, code)) diff --git a/torch/functional.py b/torch/functional.py index a26cda6283..bcbceffca8 100644 --- a/torch/functional.py +++ b/torch/functional.py @@ -1,16 +1,18 @@ import torch from ._utils import _range + def split(tensor, split_size, dim=0): if dim < 0: dim += tensor.dim() dim_size = tensor.size(dim) num_splits = (dim_size + split_size - 1) // split_size last_split_size = split_size - (split_size * num_splits - dim_size) + def get_split_size(i): - return split_size if i < num_splits-1 else last_split_size - return tuple(tensor.narrow(int(dim), int(i*split_size), int(get_split_size(i))) for i - in _range(0, num_splits)) + return split_size if i < num_splits - 1 else last_split_size + return tuple(tensor.narrow(int(dim), int(i * split_size), int(get_split_size(i))) for i + in _range(0, num_splits)) def chunk(tensor, n_chunks, dim=0): diff --git a/torch/legacy/nn/Abs.py b/torch/legacy/nn/Abs.py index 475180af66..4b61c32041 100644 --- a/torch/legacy/nn/Abs.py +++ b/torch/legacy/nn/Abs.py @@ -1,24 +1,25 @@ import torch from .Module import Module + class Abs(Module): + def __init__(self): super(Abs, self).__init__() def updateOutput(self, input): self._backend.Abs_updateOutput( - self._backend.library_state, - input, - self.output + self._backend.library_state, + input, + self.output ) return self.output def updateGradInput(self, input, gradOutput): self._backend.Abs_updateGradInput( - self._backend.library_state, - input, - gradOutput, - self.gradInput + self._backend.library_state, + input, + gradOutput, + self.gradInput ) return self.gradInput - diff --git a/torch/legacy/nn/AbsCriterion.py b/torch/legacy/nn/AbsCriterion.py index 9c440faee0..a7cb79b69e 100644 --- a/torch/legacy/nn/AbsCriterion.py +++ b/torch/legacy/nn/AbsCriterion.py @@ -1,6 +1,7 @@ import torch from .Criterion import Criterion + class AbsCriterion(Criterion): def __init__(self, sizeAverage=True): @@ -10,7 +11,7 @@ class AbsCriterion(Criterion): def updateOutput(self, input, target): if self.output_tensor is None: - self.output_tensor = input.new(1) + self.output_tensor = input.new(1) self._backend.AbsCriterion_updateOutput( self._backend.library_state, input, @@ -21,7 +22,6 @@ class AbsCriterion(Criterion): self.output = self.output_tensor[0] return self.output - def updateGradInput(self, input, target): self._backend.AbsCriterion_updateGradInput( self._backend.library_state, @@ -31,4 +31,3 @@ class AbsCriterion(Criterion): self.sizeAverage ) return self.gradInput - diff --git a/torch/legacy/nn/Add.py b/torch/legacy/nn/Add.py index 847c4cad1d..09847f9a5a 100644 --- a/torch/legacy/nn/Add.py +++ b/torch/legacy/nn/Add.py @@ -2,6 +2,7 @@ import math import torch from .Module import Module + class Add(Module): def __init__(self, inputSize, scalar=False): @@ -19,16 +20,16 @@ class Add(Module): def reset(self, stdv=None): if stdv is not None: - stdv = stdv * math.sqrt(3) + stdv = stdv * math.sqrt(3) else: - stdv = 1./math.sqrt(self.bias.size(0)) + stdv = 1. / math.sqrt(self.bias.size(0)) self.bias.uniform_(-stdv, stdv) def updateOutput(self, input): self.output.resize_as_(input).copy_(input) if self.scalar: - self.output.add_(self.bias[0]); + self.output.add_(self.bias[0]) else: batchSize = input.size(0) if self._ones.size(0) != batchSize: @@ -42,16 +43,15 @@ class Add(Module): def updateGradInput(self, input, gradOutput): if self.gradInput is not None: - self.gradInput.resize_as_(gradOutput).copy_(gradOutput) - return self.gradInput + self.gradInput.resize_as_(gradOutput).copy_(gradOutput) + return self.gradInput def accGradParameters(self, input, gradOutput, scale=1): if self.gradBias.size(0) == 1: - self.gradBias[0] = self.gradBias[0] + scale*gradOutput.sum(); + self.gradBias[0] = self.gradBias[0] + scale * gradOutput.sum() else: - if input.is_same_size(self.bias): - self.gradBias.add_(scale, gradOutput) - else: - gradOutput = gradOutput.view(input.size(0), -1) - self.gradBias.view(-1).addmv_(scale, gradOutput.t(), self._ones) - + if input.is_same_size(self.bias): + self.gradBias.add_(scale, gradOutput) + else: + gradOutput = gradOutput.view(input.size(0), -1) + self.gradBias.view(-1).addmv_(scale, gradOutput.t(), self._ones) diff --git a/torch/legacy/nn/AddConstant.py b/torch/legacy/nn/AddConstant.py index 8582d947b8..4e9f10dcbf 100644 --- a/torch/legacy/nn/AddConstant.py +++ b/torch/legacy/nn/AddConstant.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class AddConstant(Module): def __init__(self, constant_scalar, inplace=False): @@ -29,4 +30,3 @@ class AddConstant(Module): self.gradInput.copy_(gradOutput) return self.gradInput - diff --git a/torch/legacy/nn/BCECriterion.py b/torch/legacy/nn/BCECriterion.py index 1e3642daa0..94ca51a0bc 100644 --- a/torch/legacy/nn/BCECriterion.py +++ b/torch/legacy/nn/BCECriterion.py @@ -2,6 +2,8 @@ import torch from .Criterion import Criterion # TODO: use THNN + + class BCECriterion(Criterion): eps = 1e-12 @@ -20,7 +22,7 @@ class BCECriterion(Criterion): raise RuntimeError("input and target size mismatch") if self.buffer is None: - self.buffer = input.new() + self.buffer = input.new() buffer = self.buffer weights = self.weights @@ -38,7 +40,7 @@ class BCECriterion(Criterion): output = torch.dot(target, buffer) # log(1 - input) * (1 - target) - torch.mul(input, -1, out=buffer).add_(1+self.eps).log_() + torch.mul(input, -1, out=buffer).add_(1 + self.eps).log_() if weights is not None: buffer.mul_(weights) @@ -52,42 +54,39 @@ class BCECriterion(Criterion): return self.output - def updateGradInput(self, input, target): # - (target - input) / ( input (1 - input) ) # The gradient is slightly incorrect: # It should have be divided by (input + self.eps) (1 - input + self.eps) # but it is divided by input (1 - input + self.eps) + self.eps # This modification requires less memory to be computed. - if input.nelement() != target.nelement(): + if input.nelement() != target.nelement(): raise RuntimeError("input and target size mismatch") - if self.buffer is None: - self.buffer = input.new() - - buffer = self.buffer - weights = self.weights - gradInput = self.gradInput - - if weights is not None and target.dim() != 1: - weights = self.weights.view(1, target.size(1)).expand_as(target) + if self.buffer is None: + self.buffer = input.new() + buffer = self.buffer + weights = self.weights + gradInput = self.gradInput - buffer.resize_as_(input) - # - x ( 1 + self.eps -x ) + self.eps - torch.add(input, -1, out=buffer).add_(-self.eps).mul_(input).add_(-self.eps) + if weights is not None and target.dim() != 1: + weights = self.weights.view(1, target.size(1)).expand_as(target) - gradInput.resize_as_(input) - # y - x - torch.add(target, -1, input, out=gradInput) - # - (y - x) / ( x ( 1 + self.eps -x ) + self.eps ) - gradInput.div_(buffer) + buffer.resize_as_(input) + # - x ( 1 + self.eps -x ) + self.eps + torch.add(input, -1, out=buffer).add_(-self.eps).mul_(input).add_(-self.eps) - if weights is not None: - gradInput.mul_(weights) + gradInput.resize_as_(input) + # y - x + torch.add(target, -1, input, out=gradInput) + # - (y - x) / ( x ( 1 + self.eps -x ) + self.eps ) + gradInput.div_(buffer) - if self.sizeAverage: - gradInput.div_(target.nelement()) + if weights is not None: + gradInput.mul_(weights) - return gradInput + if self.sizeAverage: + gradInput.div_(target.nelement()) + return gradInput diff --git a/torch/legacy/nn/BatchNormalization.py b/torch/legacy/nn/BatchNormalization.py index f7a18e8d24..3c17c6481a 100644 --- a/torch/legacy/nn/BatchNormalization.py +++ b/torch/legacy/nn/BatchNormalization.py @@ -32,6 +32,7 @@ import torch from .Module import Module from .utils import clear + class BatchNormalization(Module): # expected dimension of input nDim = 2 @@ -51,44 +52,45 @@ class BatchNormalization(Module): self.save_std = None if self.affine: - self.weight = torch.Tensor(nOutput) - self.bias = torch.Tensor(nOutput) - self.gradWeight = torch.Tensor(nOutput) - self.gradBias = torch.Tensor(nOutput) - self.reset() + self.weight = torch.Tensor(nOutput) + self.bias = torch.Tensor(nOutput) + self.gradWeight = torch.Tensor(nOutput) + self.gradBias = torch.Tensor(nOutput) + self.reset() else: - self.weight = None - self.bias = None - self.gradWeight = None - self.gradBias = None + self.weight = None + self.bias = None + self.gradWeight = None + self.gradBias = None def reset(self): if self.weight is not None: - self.weight.uniform_() + self.weight.uniform_() if self.bias is not None: - self.bias.zero_() + self.bias.zero_() self.running_mean.zero_() self.running_var.fill_(1) def _checkInputDim(self, input): if input.dim() != self.nDim: - raise RuntimeError('only mini-batch supported ({}D tensor), got {}D tensor instead'.format(self.nDim, input.dim())) + raise RuntimeError( + 'only mini-batch supported ({}D tensor), got {}D tensor instead'.format(self.nDim, input.dim())) if input.size(1) != self.running_mean.nelement(): raise RuntimeError('got {}-feature tensor, expected {}'.format(input.size(1), self.running_mean.nelement())) def _makeContiguous(self, input, gradOutput=None): if not input.is_contiguous(): if self._input is None: - self._input = input.new() + self._input = input.new() self._input.resize_as_(input).copy_(input) input = self._input if gradOutput is not None: if not gradOutput.is_contiguous(): if self._gradOutput is None: - self._gradOutput = gradOutput.new() + self._gradOutput = gradOutput.new() self._gradOutput.resize_as_(gradOutput).copy_(gradOutput) gradOutput = self._gradOutput @@ -101,10 +103,10 @@ class BatchNormalization(Module): self.output.resize_as_(input) if self.save_mean is None: - self.save_mean = input.new() + self.save_mean = input.new() self.save_mean.resize_as_(self.running_mean) if self.save_std is None: - self.save_std = input.new() + self.save_std = input.new() self.save_std.resize_as_(self.running_var) self._backend.BatchNormalization_updateOutput( @@ -124,7 +126,6 @@ class BatchNormalization(Module): return self.output - def _backward(self, input, gradOutput, scale, gradInput=None, gradWeight=None, gradBias=None): self._checkInputDim(input) self._checkInputDim(gradOutput) @@ -135,8 +136,7 @@ class BatchNormalization(Module): scale = scale or 1. if gradInput is not None: - gradInput.resize_as_(gradOutput) - + gradInput.resize_as_(gradOutput) self._backend.BatchNormalization_backward( self._backend.library_state, @@ -177,15 +177,14 @@ class BatchNormalization(Module): # first 5 buffers are not present in the current implementation, # but we keep them for cleaning old saved models clear(self, [ - 'buffer', - 'buffer2', - 'centered', - 'std', - 'normalized', - '_input', - '_gradOutput', - 'save_mean', - 'save_std', + 'buffer', + 'buffer2', + 'centered', + 'std', + 'normalized', + '_input', + '_gradOutput', + 'save_mean', + 'save_std', ]) return super(BatchNormalization, self).clearState() - diff --git a/torch/legacy/nn/Bilinear.py b/torch/legacy/nn/Bilinear.py index 2d699216b0..e4e0049262 100644 --- a/torch/legacy/nn/Bilinear.py +++ b/torch/legacy/nn/Bilinear.py @@ -3,6 +3,7 @@ import torch from .Module import Module from .utils import clear + class Bilinear(Module): def _assertInput(self, input): @@ -23,14 +24,13 @@ class Bilinear(Module): if gradOutput.size(1) != self.weight.size(0): raise RuntimeError('number of columns in gradOutput does not match layer\'s output size') - def __init__(self, inputSize1, inputSize2, outputSize, bias=True): # set up model: super(Bilinear, self).__init__() - self.weight = torch.Tensor(outputSize, inputSize1, inputSize2) + self.weight = torch.Tensor(outputSize, inputSize1, inputSize2) self.gradWeight = torch.Tensor(outputSize, inputSize1, inputSize2) if bias: - self.bias = torch.Tensor(outputSize) + self.bias = torch.Tensor(outputSize) self.gradBias = torch.Tensor(outputSize) else: self.bias = None @@ -53,13 +53,12 @@ class Bilinear(Module): self.bias.uniform_(-stdv, stdv) return self - def updateOutput(self, input): self._assertInput(input) # set up buffer: if self.buff2 is None: - self.buff2 = input[0].new() + self.buff2 = input[0].new() self.buff2.resize_as_(input[1]) # compute output scores: @@ -74,7 +73,6 @@ class Bilinear(Module): return self.output - def updateGradInput(self, input, gradOutput): if self.gradInput is None: return @@ -87,38 +85,36 @@ class Bilinear(Module): #: first slice of weight tensor (k = 1) self.gradInput[0].addmm_(input[1], self.weight[0].t()) self.gradInput[0].mul_(gradOutput.narrow(1, 0, 1).expand(self.gradInput[0].size(0), - self.gradInput[0].size(1))) + self.gradInput[0].size(1))) self.gradInput[1].addmm_(input[0], self.weight[0]) self.gradInput[1].mul_(gradOutput.narrow(1, 0, 1).expand(self.gradInput[1].size(0), - self.gradInput[1].size(1))) + self.gradInput[1].size(1))) #: remaining slices of weight tensor if self.weight.size(0) > 1: if self.buff1 is None: - self.buff1 = input[0].new() + self.buff1 = input[0].new() self.buff1.resize_as_(input[0]) for k in range(1, self.weight.size(0)): torch.mm(input[1], self.weight[k].t(), out=self.buff1) self.buff1.mul_(gradOutput.narrow(1, k, 1).expand(self.gradInput[0].size(0), - self.gradInput[0].size(1))) + self.gradInput[0].size(1))) self.gradInput[0].add_(self.buff1) torch.mm(input[0], self.weight[k], out=self.buff2) self.buff2.mul_(gradOutput.narrow(1, k, 1).expand(self.gradInput[1].size(0), - self.gradInput[1].size(1))) + self.gradInput[1].size(1))) self.gradInput[1].add_(self.buff2) return self.gradInput - - def accGradParameters(self, input, gradOutput, scale=1): self._assertInputGradOutput(input, gradOutput) # make sure we have buffer: if self.buff1 is None: - self.buff1 = input[0].new() + self.buff1 = input[0].new() self.buff1.resize_as_(input[0]) # accumulate parameter gradients: @@ -129,15 +125,13 @@ class Bilinear(Module): if self.bias is not None: self.gradBias.add_(scale, gradOutput.sum(0)) - def __repr__(self): return str(type(self)) + \ - '({}x{} -> {}) {}'.format( - self.weight.size(1), self.weight.size(2), self.weight.size(0), - (' without bias' if self.bias is None else '') - ) + '({}x{} -> {}) {}'.format( + self.weight.size(1), self.weight.size(2), self.weight.size(0), + (' without bias' if self.bias is None else '') + ) def clearState(self): clear(self, 'buff1', 'buff2') return super(Bilinear, self).clearState() - diff --git a/torch/legacy/nn/CAddTable.py b/torch/legacy/nn/CAddTable.py index 9b8481ec50..bcefa11f2a 100644 --- a/torch/legacy/nn/CAddTable.py +++ b/torch/legacy/nn/CAddTable.py @@ -1,25 +1,25 @@ import torch from .Module import Module + class CAddTable(Module): + def __init__(self, inplace=False): super(CAddTable, self).__init__() self.inplace = inplace self.gradInput = [] - def updateOutput(self, input): if self.inplace: - self.output.set_(input[0]) + self.output.set_(input[0]) else: - self.output.resize_as_(input[0]).copy_(input[0]) + self.output.resize_as_(input[0]).copy_(input[0]) for i in range(1, len(input)): - self.output.add_(input[i]) + self.output.add_(input[i]) return self.output - def updateGradInput(self, input, gradOutput): for i in range(len(input)): if i >= len(self.gradInput): @@ -34,4 +34,3 @@ class CAddTable(Module): del self.gradInput[len(input):] return self.gradInput - diff --git a/torch/legacy/nn/CDivTable.py b/torch/legacy/nn/CDivTable.py index 790944786f..c60a5bb927 100644 --- a/torch/legacy/nn/CDivTable.py +++ b/torch/legacy/nn/CDivTable.py @@ -1,7 +1,9 @@ import torch from .Module import Module + class CDivTable(Module): + def __init__(self, ): super(CDivTable, self).__init__() self.gradInput = [] @@ -20,4 +22,3 @@ class CDivTable(Module): del self.gradInput[len(input):] return self.gradInput - diff --git a/torch/legacy/nn/CMul.py b/torch/legacy/nn/CMul.py index 40998ad43d..4880d25d34 100644 --- a/torch/legacy/nn/CMul.py +++ b/torch/legacy/nn/CMul.py @@ -4,6 +4,7 @@ import torch from .Module import Module from .utils import clear, contiguousView + class CMul(Module): def __init__(self, *args): @@ -33,11 +34,10 @@ class CMul(Module): if stdv is not None: stdv = stdv * math.sqrt(3) else: - stdv = 1./math.sqrt(self.weight.nelement()) + stdv = 1. / math.sqrt(self.weight.nelement()) self.weight.uniform_(-stdv, stdv) - def updateOutput(self, input): # lazy-initialize if self._output is None: @@ -61,10 +61,9 @@ class CMul(Module): return self.output - def updateGradInput(self, input, gradOutput): if self.gradInput is None: - return + return if self._gradOutput is None: self._gradOutput = input.new() @@ -85,7 +84,6 @@ class CMul(Module): return self.gradInput - def accGradParameters(self, input, gradOutput, scale=1): if self._input is None: self._input = input.new() @@ -103,17 +101,17 @@ class CMul(Module): def type(self, type=None, tensorCache=None): if type: - self.clearState() + self.clearState() return super(CMul, self).type(type, tensorCache) def clearState(self): clear(self, [ - '_input', - '_output', - '_weight', - '_gradWeight', - '_expand', - '_repeat', - '_sum', + '_input', + '_output', + '_weight', + '_gradWeight', + '_expand', + '_repeat', + '_sum', ]) return super(CMul, self).clearState() diff --git a/torch/legacy/nn/CMulTable.py b/torch/legacy/nn/CMulTable.py index f79114c33f..64a58f0c79 100644 --- a/torch/legacy/nn/CMulTable.py +++ b/torch/legacy/nn/CMulTable.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class CMulTable(Module): def __init__(self, ): @@ -17,7 +18,7 @@ class CMulTable(Module): def updateGradInput_efficient(self, input, gradOutput): if self.tout is None: - self.tout = input[0].new() + self.tout = input[0].new() self.tout.resize_as_(self.output) for i in range(len(input)): if len(self.gradInput) <= i: diff --git a/torch/legacy/nn/CSubTable.py b/torch/legacy/nn/CSubTable.py index b8ee7ab22d..85d8527f8c 100644 --- a/torch/legacy/nn/CSubTable.py +++ b/torch/legacy/nn/CSubTable.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class CSubTable(Module): def __init__(self, ): @@ -14,12 +15,11 @@ class CSubTable(Module): def updateGradInput(self, input, gradOutput): if self.gradInput[0] is None: - self.gradInput[0] = input[0].new() + self.gradInput[0] = input[0].new() if self.gradInput[1] is None: - self.gradInput[1] = input[1].new() + self.gradInput[1] = input[1].new() self.gradInput[0].resize_as_(input[0]).copy_(gradOutput) self.gradInput[1].resize_as_(input[1]).copy_(gradOutput).mul_(-1) self.gradInput = self.gradInput[:2] return self.gradInput - diff --git a/torch/legacy/nn/Clamp.py b/torch/legacy/nn/Clamp.py index 90eec28bb0..0bfcac3266 100644 --- a/torch/legacy/nn/Clamp.py +++ b/torch/legacy/nn/Clamp.py @@ -1,6 +1,8 @@ import torch from .HardTanh import HardTanh + class Clamp(HardTanh): + def __init__(self, min_value, max_value): super(Clamp, self,).__init__(min_value, max_value) diff --git a/torch/legacy/nn/ClassNLLCriterion.py b/torch/legacy/nn/ClassNLLCriterion.py index 60aec29f60..c9a67f7ff0 100644 --- a/torch/legacy/nn/ClassNLLCriterion.py +++ b/torch/legacy/nn/ClassNLLCriterion.py @@ -1,7 +1,9 @@ import torch from .Criterion import Criterion + class ClassNLLCriterion(Criterion): + def __init__(self, weights=None, sizeAverage=True): super(ClassNLLCriterion, self).__init__() self.sizeAverage = sizeAverage @@ -27,7 +29,6 @@ class ClassNLLCriterion(Criterion): self.output = self.output_tensor[0] return self.output - def updateGradInput(self, input, target): self.gradInput.resize_as_(input).zero_() target = target.long() diff --git a/torch/legacy/nn/ClassSimplexCriterion.py b/torch/legacy/nn/ClassSimplexCriterion.py index 17bf77f354..a6e7329630 100644 --- a/torch/legacy/nn/ClassSimplexCriterion.py +++ b/torch/legacy/nn/ClassSimplexCriterion.py @@ -12,19 +12,20 @@ from .MSECriterion import MSECriterion Reference: http.//arxiv.org/abs/1506.08230 """ + class ClassSimplexCriterion(MSECriterion): def __init__(self, nClasses): - super(ClassSimplexCriterion, self).__init__() - self.nClasses = nClasses + super(ClassSimplexCriterion, self).__init__() + self.nClasses = nClasses - # embedding the simplex in a space of dimension strictly greater than - # the minimum possible (nClasses-1) is critical for effective training. - simp = self._regsplex(nClasses - 1) - self.simplex = torch.cat((simp, torch.zeros(simp.size(0), nClasses - simp.size(1))), 1) - self._target = torch.Tensor(nClasses) + # embedding the simplex in a space of dimension strictly greater than + # the minimum possible (nClasses-1) is critical for effective training. + simp = self._regsplex(nClasses - 1) + self.simplex = torch.cat((simp, torch.zeros(simp.size(0), nClasses - simp.size(1))), 1) + self._target = torch.Tensor(nClasses) - self.output_tensor = None + self.output_tensor = None def _regsplex(self, n): """ @@ -51,11 +52,11 @@ class ClassSimplexCriterion(MSECriterion): if k == 0: a[k][k] = 1 else: - a[k][k] = math.sqrt(1 - a[k:k+1, 0:k+1].norm()**2) + a[k][k] = math.sqrt(1 - a[k:k + 1, 0:k + 1].norm()**2) # fill_ the k-th coordinates for the vectors of the remaining vertices - c = (a[k][k]**2 - 1 - 1/n) / a[k][k] - a[k+1:n+2, k:k+1].fill_(c) + c = (a[k][k]**2 - 1 - 1 / n) / a[k][k] + a[k + 1:n + 2, k:k + 1].fill_(c) return a @@ -69,20 +70,20 @@ class ClassSimplexCriterion(MSECriterion): self._target[i].copy_(self.simplex[int(target[i])]) def updateOutput(self, input, target): - self._transformTarget(target) + self._transformTarget(target) - assert input.nelement() == self._target.nelement() - if self.output_tensor is None: - self.output_tensor = input.new(1) - self._backend.MSECriterion_updateOutput( + assert input.nelement() == self._target.nelement() + if self.output_tensor is None: + self.output_tensor = input.new(1) + self._backend.MSECriterion_updateOutput( self._backend.library_state, input, self._target, self.output_tensor, self.sizeAverage - ) - self.output = self.output_tensor[0] - return self.output + ) + self.output = self.output_tensor[0] + return self.output def updateGradInput(self, input, target): assert input.nelement() == self._target.nelement() @@ -100,6 +101,5 @@ class ClassSimplexCriterion(MSECriterion): def getTopPrediction(self, input): prod = self.getPredictions(input) - _, maxs = prod.max(prod.ndimension()-1) + _, maxs = prod.max(prod.ndimension() - 1) return maxs.view(-1) - diff --git a/torch/legacy/nn/Concat.py b/torch/legacy/nn/Concat.py index e9e924ce93..cb54d7674c 100644 --- a/torch/legacy/nn/Concat.py +++ b/torch/legacy/nn/Concat.py @@ -1,6 +1,7 @@ import torch from .Container import Container + class Concat(Container): def __init__(self, dimension): @@ -22,9 +23,9 @@ class Concat(Container): offset = 0 for i, module in enumerate(self.modules): - currentOutput = outs[i] - self.output.narrow(self.dimension, offset, currentOutput.size(self.dimension)).copy_(currentOutput) - offset = offset + currentOutput.size(self.dimension) + currentOutput = outs[i] + self.output.narrow(self.dimension, offset, currentOutput.size(self.dimension)).copy_(currentOutput) + offset = offset + currentOutput.size(self.dimension) return self.output @@ -34,9 +35,11 @@ class Concat(Container): offset = 0 for i, module in enumerate(self.modules): currentOutput = module.output - currentGradInput = module.updateGradInput(input, gradOutput.narrow(self.dimension, offset, currentOutput.size(self.dimension))) + currentGradInput = module.updateGradInput(input, gradOutput.narrow( + self.dimension, offset, currentOutput.size(self.dimension))) - if currentGradInput: # if the module does not produce a gradInput (for example first layer),: ignore it and move on. + # if the module does not produce a gradInput (for example first layer),: ignore it and move on. + if currentGradInput: if i == 0: self.gradInput.copy_(currentGradInput) else: @@ -46,24 +49,25 @@ class Concat(Container): return self.gradInput - def accGradParameters(self, input, gradOutput, scale=1): offset = 0 for i, module in enumerate(self.modules): - currentOutput = module.output - module.accGradParameters( - input, - gradOutput.narrow(self.dimension, offset, currentOutput.size(self.dimension)), - scale) - offset = offset + currentOutput.size(self.dimension) + currentOutput = module.output + module.accGradParameters( + input, + gradOutput.narrow(self.dimension, offset, currentOutput.size(self.dimension)), + scale) + offset = offset + currentOutput.size(self.dimension) def backward(self, input, gradOutput, scale=1): self.gradInput.resize_as_(input) offset = 0 for i, module in enumerate(self.modules): currentOutput = module.output - currentGradInput = module.backward(input, gradOutput.narrow(self.dimension, offset, currentOutput.size(self.dimension)), scale) - if currentGradInput is not None: # if the module.es not produce a gradInput (for example first layer),: ignore it and move on. + currentGradInput = module.backward(input, gradOutput.narrow( + self.dimension, offset, currentOutput.size(self.dimension)), scale) + # if the module.es not produce a gradInput (for example first layer),: ignore it and move on. + if currentGradInput is not None: if i == 0: self.gradInput.copy_(currentGradInput) else: @@ -75,12 +79,12 @@ class Concat(Container): def accUpdateGradParameters(self, input, gradOutput, lr): offset = 0 for i, module in enumerate(self.modules): - currentOutput = module.output - module.accUpdateGradParameters( - input, - gradOutput.narrow(self.dimension, offset, currentOutput.size(self.dimension)), - lr) - offset = offset + currentOutput.size(self.dimension) + currentOutput = module.output + module.accUpdateGradParameters( + input, + gradOutput.narrow(self.dimension, offset, currentOutput.size(self.dimension)), + lr) + offset = offset + currentOutput.size(self.dimension) def __tostring__(self): tab = ' ' @@ -92,7 +96,7 @@ class Concat(Container): res = torch.type(self) res += ' {' + line + tab + 'input' for i in range(len(self.modules)): - if i == len(self.modules)-1: + if i == len(self.modules) - 1: res += line + tab + next + '(' + i + '): ' + str(self.modules[i]).replace(line, line + tab + extlast) else: res += line + tab + next + '(' + i + '): ' + str(self.modules[i]).replace(line, line + tab + ext) diff --git a/torch/legacy/nn/ConcatTable.py b/torch/legacy/nn/ConcatTable.py index 628b9595fe..afebf8c296 100644 --- a/torch/legacy/nn/ConcatTable.py +++ b/torch/legacy/nn/ConcatTable.py @@ -1,6 +1,7 @@ import torch from .Container import Container + class ConcatTable(Container): def __init__(self, ): @@ -23,7 +24,7 @@ class ConcatTable(Container): l1[i] = res else: f(l1, i, v) - for i in range(len(l1)-1, len(l2)-1, -1): + for i in range(len(l1) - 1, len(l2) - 1, -1): del l1[i] return l1 @@ -44,6 +45,7 @@ class ConcatTable(Container): if i == 0: self.gradInput = self.gradInput if wasTable else [] + def fn(l, i, v): if i >= len(l): assert len(l) == i @@ -82,11 +84,11 @@ class ConcatTable(Container): def accGradParameters(self, input, gradOutput, scale=1): for i, module in ipairs(self.modules): - self.rethrowErrors(module, i, 'accGradParameters', input, gradOutput[i], scale) + self.rethrowErrors(module, i, 'accGradParameters', input, gradOutput[i], scale) def accUpdateGradParameters(self, input, gradOutput, lr): for i, module in ipairs(self.modules): - self.rethrowErrors(module, i, 'accUpdateGradParameters', input, gradOutput[i], lr) + self.rethrowErrors(module, i, 'accUpdateGradParameters', input, gradOutput[i], lr) def __repr__(self): tab = ' ' @@ -98,14 +100,13 @@ class ConcatTable(Container): res = torch.typename(self) res = res + ' {' + line + tab + 'input' for i in range(len(self.modules)): - if i == len(self.modules)-1: - res = res + line + tab + next + '(' + str(i) + '): ' + str(self.modules[i]).replace(line, line + tab + extlast) - else: - res = res + line + tab + next + '(' + str(i) + '): ' + str(self.modules[i]).replace(line, line + tab + ext) - + if i == len(self.modules) - 1: + res = res + line + tab + next + '(' + str(i) + '): ' + \ + str(self.modules[i]).replace(line, line + tab + extlast) + else: + res = res + line + tab + next + '(' + str(i) + '): ' + \ + str(self.modules[i]).replace(line, line + tab + ext) res = res + line + tab + last + 'output' res = res + line + '}' return res - - diff --git a/torch/legacy/nn/Container.py b/torch/legacy/nn/Container.py index dba0f78802..84a726e003 100644 --- a/torch/legacy/nn/Container.py +++ b/torch/legacy/nn/Container.py @@ -4,11 +4,12 @@ from .utils import clear from functools import wraps import sys + class Container(Module): def __init__(self, *args): - super(Container, self).__init__(*args) - self.modules = [] + super(Container, self).__init__(*args) + self.modules = [] def add(self, module): self.modules.append(module) @@ -18,11 +19,11 @@ class Container(Module): return self.modules[index] def size(self): - return len(self.modules) + return len(self.modules) def applyToModules(self, func): - for module in self.modules: - func(module) + for module in self.modules: + func(module) def zeroGradParameters(self): self.applyToModules(lambda m: m.zeroGradParameters()) @@ -46,16 +47,16 @@ class Container(Module): self.applyToModules(lambda m: m.reset(stdv)) def parameters(self): - w = [] - gw = [] - for module in self.modules: - mparam = module.parameters() - if mparam is not None: - w.extend(mparam[0]) - gw.extend(mparam[1]) - if not w: - return - return w, gw + w = [] + gw = [] + for module in self.modules: + mparam = module.parameters() + if mparam is not None: + w.extend(mparam[0]) + gw.extend(mparam[1]) + if not w: + return + return w, gw def clearState(self): clear('output') @@ -63,4 +64,3 @@ class Container(Module): for module in self.modules: module.clearState() return self - diff --git a/torch/legacy/nn/Contiguous.py b/torch/legacy/nn/Contiguous.py index 0371ceb6f4..aacadb05e5 100644 --- a/torch/legacy/nn/Contiguous.py +++ b/torch/legacy/nn/Contiguous.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class Contiguous(Module): def updateOutput(self, input): @@ -11,7 +12,6 @@ class Contiguous(Module): return self.output - def updateGradInput(self, input, gradOutput): if not gradOutput.is_contiguous(): self.gradInput.resize_as_(gradOutput).copy_(gradOutput) @@ -19,4 +19,3 @@ class Contiguous(Module): self.gradInput.set_(gradOutput) return self.gradInput - diff --git a/torch/legacy/nn/Copy.py b/torch/legacy/nn/Copy.py index 4f9a9c9c72..71c8682cc9 100644 --- a/torch/legacy/nn/Copy.py +++ b/torch/legacy/nn/Copy.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class Copy(Module): def __init__(self, intype, outtype, dontCast=False): @@ -13,15 +14,12 @@ class Copy(Module): self.output.resize_(input.size()).copy_(input) return self.output - def updateGradInput(self, input, gradOutput): self.gradInput.resize_(gradOutput.size()).copy_(gradOutput) return self.gradInput - def type(self, type=None, tensorCache=None): if type and self.dontCast: - return self + return self return super(Copy, self).type(self, type, tensorCache) - diff --git a/torch/legacy/nn/Cosine.py b/torch/legacy/nn/Cosine.py index 71888b3797..cda75c5467 100644 --- a/torch/legacy/nn/Cosine.py +++ b/torch/legacy/nn/Cosine.py @@ -3,6 +3,7 @@ import torch from .Module import Module from .utils import clear + class Cosine(Module): def __init__(self, inputSize, outputSize): @@ -22,7 +23,7 @@ class Cosine(Module): if stdv is not None: stdv = stdv * math.sqrt(3) else: - stdv = 1./math.sqrt(self.weight.size(0)) + stdv = 1. / math.sqrt(self.weight.size(0)) self.weight.uniform_(-stdv, stdv) def updateOutput(self, input): @@ -32,9 +33,9 @@ class Cosine(Module): outputSize = self.weight.size(0) if self._weightNorm is None: - self._weightNorm = self.weight.new() + self._weightNorm = self.weight.new() if self._inputNorm is None: - self._inputNorm = self.weight.new() + self._inputNorm = self.weight.new() # y_j = (w_j * x) / ( || w_j || * || x || ) @@ -53,12 +54,11 @@ class Cosine(Module): self.output.div_(self._inputNorm.expand_as(self.output)) return self.output - def updateGradInput(self, input, gradOutput): assert input.dim() == 2 if self.gradInput is None: - return + return inputSize = self.weight.size(1) outputSize = self.weight.size(0) @@ -72,15 +72,15 @@ class Cosine(Module): nelement = self.gradInput.nelement() self.gradInput.resize_as_(input) if self.gradInput.nelement() != nelement: - self.gradInput.zero_() + self.gradInput.zero_() inputNorm = self._inputNorm.expand_as(input) weightNorm = self._weightNorm.view(1, outputSize).expand_as(gradOutput) if self._gradOutput is None: - self._gradOutput = gradOutput.new() + self._gradOutput = gradOutput.new() if self._sum is None: - self._sum = input.new() + self._sum = input.new() self.gradInput.copy_(input).div_(inputNorm) self._gradOutput.resize_as_(gradOutput).copy_(gradOutput) @@ -107,13 +107,13 @@ class Cosine(Module): """ if self._weight is None: - self._weight = self.weight.new() + self._weight = self.weight.new() if self._sum is None: - self._sum = input.new() + self._sum = input.new() self._weight.resize_as_(self.weight).copy_(self.weight) if self._gradOutput is None: - self._gradOutput = gradOutput.new() + self._gradOutput = gradOutput.new() self._gradOutput.resize_as_(gradOutput).copy_(gradOutput) self._gradOutput.mul_(self.output) torch.sum(self._gradOutput, 0, out=self._sum) @@ -131,25 +131,23 @@ class Cosine(Module): def type(self, type=None, tensorCache=None): if type is not None: - # prevent premature memory allocations - self._input = None - self._weight = None - self._inputNorm = None - self._weightNorm = None - self._gradOutput = None - self._sum = None + # prevent premature memory allocations + self._input = None + self._weight = None + self._inputNorm = None + self._weightNorm = None + self._gradOutput = None + self._sum = None return super(Cosine, self).type(type, tensorCache) - def clearState(self): clear(self, [ - '_input', - '_weight', - '_gradOutput', - '_sum', - '_inputNorm', - '_weightNorm', + '_input', + '_weight', + '_gradOutput', + '_sum', + '_inputNorm', + '_weightNorm', ]) return super(Cosine, self).clearState() - diff --git a/torch/legacy/nn/CosineDistance.py b/torch/legacy/nn/CosineDistance.py index 0bea8ee341..b13b96408a 100644 --- a/torch/legacy/nn/CosineDistance.py +++ b/torch/legacy/nn/CosineDistance.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class CosineDistance(Module): def __init__(self, ): @@ -11,39 +12,38 @@ class CosineDistance(Module): self._input1 = None self._input2 = None self.buffer = None - self.w1 = None + self.w1 = None self.w22 = None - self.w = None + self.w = None self.w32 = None self.ones = None def _makeContiguous(self, input1, input2): if not input1.is_contiguous(): - if self._input1 is None: - self._input1 = input1.new() - self._input1.resize_as_(input1).copy_(input1) - input1 = self._input1 + if self._input1 is None: + self._input1 = input1.new() + self._input1.resize_as_(input1).copy_(input1) + input1 = self._input1 if not input2.is_contiguous(): - if self._input2 is None: - self._input2 = input2.new() - self._input2.resize_as_(input2).copy_(input2) - input2 = self._input2 + if self._input2 is None: + self._input2 = input2.new() + self._input2.resize_as_(input2).copy_(input2) + input2 = self._input2 return input1, input2 - def updateOutput(self, input): input1, input2 = input[0], input[1] input1, input2 = self._makeContiguous(input1, input2) if self.buffer is None: - self.buffer = input1.new() - self.w1 = input1.new() - self.w22 = input1.new() - self.w = input1.new() - self.w32 = input1.new() - self.ones = input1.new() + self.buffer = input1.new() + self.w1 = input1.new() + self.w22 = input1.new() + self.w = input1.new() + self.w32 = input1.new() + self.ones = input1.new() torch.mul(input1, input2, out=self.buffer) torch.sum(self.buffer, 1, out=self.w1) @@ -65,18 +65,17 @@ class CosineDistance(Module): return self.output - def updateGradInput(self, input, gradOutput): - v1 = input[0] - v2 = input[1] + v1 = input[0] + v2 = input[1] v1, v2 = self._makeContiguous(v1, v2) if len(self.gradInput) != 2: - if self.gradInput[0] is None: - self.gradInput[0] = v1.new() - if self.gradInput[1] is None: - self.gradInput[1] = v1.new() - self.gradInput = self.gradInput[:2] + if self.gradInput[0] is None: + self.gradInput[0] = v1.new() + if self.gradInput[1] is None: + self.gradInput[1] = v1.new() + self.gradInput = self.gradInput[:2] gw1 = self.gradInput[0] gw2 = self.gradInput[1] @@ -97,15 +96,13 @@ class CosineDistance(Module): return self.gradInput - def clearState(self): clear(self, [ - 'buffer', - 'w1', - 'w22', - 'w', - 'w32', - 'ones', + 'buffer', + 'w1', + 'w22', + 'w', + 'w32', + 'ones', ]) return super(CosineDistance, self).clearState() - diff --git a/torch/legacy/nn/CosineEmbeddingCriterion.py b/torch/legacy/nn/CosineEmbeddingCriterion.py index cafbe41d76..271842faae 100644 --- a/torch/legacy/nn/CosineEmbeddingCriterion.py +++ b/torch/legacy/nn/CosineEmbeddingCriterion.py @@ -1,6 +1,7 @@ import torch from .Criterion import Criterion + class CosineEmbeddingCriterion(Criterion): def __init__(self, margin=0, sizeAverage=True): @@ -9,23 +10,22 @@ class CosineEmbeddingCriterion(Criterion): self.sizeAverage = sizeAverage self.gradInput = [torch.Tensor(), torch.Tensor()] self.buffer = None - self.w1 = None + self.w1 = None self.w22 = None - self.w = None + self.w = None self.w32 = None self._outputs = None self._idx = None - def updateOutput(self, input, y): input1, input2 = input[0], input[1] # keep backward compatibility if self.buffer is None: self.buffer = input1.new() - self.w1 = input1.new() + self.w1 = input1.new() self.w22 = input1.new() - self.w = input1.new() + self.w = input1.new() self.w32 = input1.new() self._outputs = input1.new() @@ -64,14 +64,13 @@ class CosineEmbeddingCriterion(Criterion): self.output = self._outputs.sum() if self.sizeAverage: - self.output = self.output / y.size(0) + self.output = self.output / y.size(0) return self.output - def updateGradInput(self, input, y): - v1 = input[0] - v2 = input[1] + v1 = input[0] + v2 = input[1] gw1 = self.gradInput[0] gw2 = self.gradInput[1] @@ -98,22 +97,21 @@ class CosineEmbeddingCriterion(Criterion): gw2[self._idx] = gw2[self._idx].mul_(-1) if self.sizeAverage: - gw1.div_(y.size(0)) - gw2.div_(y.size(0)) + gw1.div_(y.size(0)) + gw2.div_(y.size(0)) return self.gradInput def type(self, type=None, tensorCache=None): if not type: - return self._type + return self._type self._idx = None super(CosineEmbeddingCriterion, self).type(type, tensorCache) # comparison operators behave differently from cuda/c implementations if type == 'torch.cuda.FloatTensor': - self._idx = torch.cuda.ByteTensor() + self._idx = torch.cuda.ByteTensor() else: - self._idx = torch.ByteTensor() + self._idx = torch.ByteTensor() return self - diff --git a/torch/legacy/nn/Criterion.py b/torch/legacy/nn/Criterion.py index 4da81e6bfd..0c74470709 100644 --- a/torch/legacy/nn/Criterion.py +++ b/torch/legacy/nn/Criterion.py @@ -3,6 +3,7 @@ from .Module import Module from .utils import recursiveType import torch._thnn + class Criterion(object): def __init__(self): diff --git a/torch/legacy/nn/CriterionTable.py b/torch/legacy/nn/CriterionTable.py index f7a6479342..7e20a8fb85 100644 --- a/torch/legacy/nn/CriterionTable.py +++ b/torch/legacy/nn/CriterionTable.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class CriterionTable(Module): def __init__(self, criterion): @@ -15,4 +16,3 @@ class CriterionTable(Module): def updateGradInput(self, input, grad_output): self.criterion.updateGradInput(*input) return self.gradInput - diff --git a/torch/legacy/nn/CrossEntropyCriterion.py b/torch/legacy/nn/CrossEntropyCriterion.py index f6042cc14b..67e8b0d9ab 100644 --- a/torch/legacy/nn/CrossEntropyCriterion.py +++ b/torch/legacy/nn/CrossEntropyCriterion.py @@ -3,6 +3,7 @@ from .Criterion import Criterion from .LogSoftMax import LogSoftMax from .ClassNLLCriterion import ClassNLLCriterion + class CrossEntropyCriterion(Criterion): def __init__(self, weights=None): @@ -26,4 +27,3 @@ class CrossEntropyCriterion(Criterion): self.lsm.updateGradInput(input, self.nll.gradInput) self.gradInput = self.lsm.gradInput.view(size) return self.gradInput - diff --git a/torch/legacy/nn/DepthConcat.py b/torch/legacy/nn/DepthConcat.py index b7abe9ad5e..19c31873ff 100644 --- a/torch/legacy/nn/DepthConcat.py +++ b/torch/legacy/nn/DepthConcat.py @@ -14,18 +14,19 @@ import math import torch from .Concat import Concat + class DepthConcat(Concat): def windowNarrow(self, output, currentOutput, offset): outputWindow = output.narrow(self.dimension, offset, currentOutput.size(self.dimension)) for dim in range(len(self.outputSize)): - currentSize = currentOutput.size(dim) - if dim != self.dimension and self.outputSize[dim] != currentSize: - # 5x5 vs 3x3 -> start = [(5-3)/2] + 1 = 2 (1 pad each side) - # 9x9 vs 5x5 -> start = [(9-5)/2] + 1 = 3 (2 pad each side) - # 9x9 vs 4x4 -> start = [(9-4)/2] + 1 = 3.5 (2 pad, 3 pad) - start = int(math.floor(((self.outputSize[dim] - currentSize) / 2))) - outputWindow = outputWindow.narrow(dim, start, currentSize) + currentSize = currentOutput.size(dim) + if dim != self.dimension and self.outputSize[dim] != currentSize: + # 5x5 vs 3x3 -> start = [(5-3)/2] + 1 = 2 (1 pad each side) + # 9x9 vs 5x5 -> start = [(9-5)/2] + 1 = 3 (2 pad each side) + # 9x9 vs 4x4 -> start = [(9-4)/2] + 1 = 3.5 (2 pad, 3 pad) + start = int(math.floor(((self.outputSize[dim] - currentSize) / 2))) + outputWindow = outputWindow.narrow(dim, start, currentSize) return outputWindow def updateOutput(self, input): diff --git a/torch/legacy/nn/DistKLDivCriterion.py b/torch/legacy/nn/DistKLDivCriterion.py index 984895f1de..074e1db1ff 100644 --- a/torch/legacy/nn/DistKLDivCriterion.py +++ b/torch/legacy/nn/DistKLDivCriterion.py @@ -1,6 +1,7 @@ import torch from .Criterion import Criterion + class DistKLDivCriterion(Criterion): def __init__(self, sizeAverage=True): @@ -11,7 +12,7 @@ class DistKLDivCriterion(Criterion): def updateOutput(self, input, target): assert input.is_same_size(target) if self.output_tensor is None: - self.output_tensor = input.new(1) + self.output_tensor = input.new(1) self._backend.DistKLDivCriterion_updateOutput( self._backend.library_state, input, @@ -32,4 +33,3 @@ class DistKLDivCriterion(Criterion): self.sizeAverage ) return self.gradInput - diff --git a/torch/legacy/nn/DotProduct.py b/torch/legacy/nn/DotProduct.py index 6584cc59ba..c91d5c738a 100644 --- a/torch/legacy/nn/DotProduct.py +++ b/torch/legacy/nn/DotProduct.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class DotProduct(Module): def __init__(self): @@ -13,7 +14,7 @@ class DotProduct(Module): input1, input2 = input[0], input[1] if self.buffer is None: - self.buffer = input1.new() + self.buffer = input1.new() torch.mul(input1, input2, out=self.buffer) torch.sum(self.buffer, 1, out=self.output) @@ -26,11 +27,11 @@ class DotProduct(Module): not_batch = False if len(self.gradInput) != 2: - if self.gradInput[0] is None: - self.gradInput[0] = input[0].new() - if self.gradInput[1] is None: - self.gradInput[1] = input[1].new() - self.gradInput = self.gradInput[:2] + if self.gradInput[0] is None: + self.gradInput[0] = input[0].new() + if self.gradInput[1] is None: + self.gradInput[1] = input[1].new() + self.gradInput = self.gradInput[:2] gw1 = self.gradInput[0] gw2 = self.gradInput[1] @@ -46,4 +47,3 @@ class DotProduct(Module): def clearState(self): clear(self, 'buffer') return super(DotProduct, self).clearState() - diff --git a/torch/legacy/nn/Dropout.py b/torch/legacy/nn/Dropout.py index b1be2c8d8a..41330e503b 100644 --- a/torch/legacy/nn/Dropout.py +++ b/torch/legacy/nn/Dropout.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class Dropout(Module): def __init__(self, p=0.5, inplace=False): @@ -19,8 +20,8 @@ class Dropout(Module): if self.p > 0 and self.train: self.noise.resize_as_(input) - self.noise.bernoulli_(1-self.p) - self.noise.div_(1-self.p) + self.noise.bernoulli_(1 - self.p) + self.noise.div_(1 - self.p) self.output.mul_(self.noise) return self.output @@ -32,7 +33,7 @@ class Dropout(Module): self.gradInput.resize_as_(gradOutput).copy_(gradOutput) if self.p > 0 and self.train: - self.gradInput.mul_(self.noise) # simply mask the gradients with the noise vector + self.gradInput.mul_(self.noise) # simply mask the gradients with the noise vector return self.gradInput @@ -45,4 +46,3 @@ class Dropout(Module): def clearState(self): clear(self, 'noise') return super(Dropout, self).clearState() - diff --git a/torch/legacy/nn/ELU.py b/torch/legacy/nn/ELU.py index b84562cd19..56e3a85352 100644 --- a/torch/legacy/nn/ELU.py +++ b/torch/legacy/nn/ELU.py @@ -2,6 +2,7 @@ import torch from .Module import Module + class ELU(Module): """ Djork-Arné Clevert, Thomas Unterthiner, Sepp Hochreiter @@ -39,4 +40,3 @@ class ELU(Module): def __repr__(self): return '{}(alpha={:.3f})'.format(str(type(self)), self.alpha) - diff --git a/torch/legacy/nn/Euclidean.py b/torch/legacy/nn/Euclidean.py index b52b077492..d529982eaa 100644 --- a/torch/legacy/nn/Euclidean.py +++ b/torch/legacy/nn/Euclidean.py @@ -3,6 +3,7 @@ import torch from .Module import Module from .utils import clear + class Euclidean(Module): def __init__(self, inputSize, outputSize): @@ -18,11 +19,11 @@ class Euclidean(Module): self.fastBackward = True self.reset() - self._input = None - self._weight = None - self._expand = None + self._input = None + self._weight = None + self._expand = None self._expand2 = None - self._repeat = None + self._repeat = None self._repeat2 = None self._div = None self._output = None @@ -32,32 +33,32 @@ class Euclidean(Module): def reset(self, stdv=None): if stdv is not None: - stdv = stdv * math.sqrt(3) + stdv = stdv * math.sqrt(3) else: - stdv = 1./math.sqrt(self.weight.size(0)) + stdv = 1. / math.sqrt(self.weight.size(0)) self.weight.uniform_(-stdv, stdv) def _view(self, res, src, *args): if src.is_contiguous(): - res.set_(src.view(*args)) + res.set_(src.view(*args)) else: - res.set_(src.contiguous().view(*args)) + res.set_(src.contiguous().view(*args)) def updateOutput(self, input): # lazy initialize buffers if self._input is None: - self._input = input.new() + self._input = input.new() if self._weight is None: - self._weight = self.weight.new() + self._weight = self.weight.new() if self._expand is None: - self._expand = self.output.new() + self._expand = self.output.new() if self._expand2 is None: - self._expand2 = self.output.new() + self._expand2 = self.output.new() if self._repeat is None: - self._repeat = self.output.new() + self._repeat = self.output.new() if self._repeat2 is None: - self._repeat2 = self.output.new() + self._repeat2 = self.output.new() inputSize, outputSize = self.weight.size(0), self.weight.size(1) @@ -88,19 +89,19 @@ class Euclidean(Module): def updateGradInput(self, input, gradOutput): if self.gradInput is None: - return + return if self._div is None: - self._div = input.new() + self._div = input.new() if self._output is None: - self._output = self.output.new() + self._output = self.output.new() if self._gradOutput is None: - self._gradOutput = input.new() + self._gradOutput = input.new() if self._expand3 is None: - self._expand3 = input.new() + self._expand3 = input.new() if not self.fastBackward: - self.updateOutput(input) + self.updateOutput(input) inputSize, outputSize = self.weight.size(0), self.weight.size(1) @@ -126,13 +127,11 @@ class Euclidean(Module): else: torch.mul(self._repeat, self._expand3, out=self._repeat2) - torch.sum(self._repeat2, 2, out=self.gradInput) self.gradInput.resize_as_(input) return self.gradInput - def accGradParameters(self, input, gradOutput, scale=1): inputSize, outputSize = self.weight.size(0), self.weight.size(1) @@ -144,32 +143,30 @@ class Euclidean(Module): # assumes a preceding call to updateGradInput assert input.dim() == 2 if self._sum is None: - self._sum = input.new() + self._sum = input.new() torch.sum(self._repeat2, 0, out=self._sum) self._sum.resize_(inputSize, outputSize) self.gradWeight.add_(-scale, self._sum) def type(self, type=None, tensorCache=None): if type: - # prevent premature memory allocations - self.clearState() + # prevent premature memory allocations + self.clearState() return super(Euclidean, self).type(type, tensorCache) - def clearState(self): clear(self, [ - '_input', - '_output', - '_gradOutput', - '_weight', - '_div', - '_sum', - '_expand', - '_expand2', - '_expand3', - '_repeat', - '_repeat2', + '_input', + '_output', + '_gradOutput', + '_weight', + '_div', + '_sum', + '_expand', + '_expand2', + '_expand3', + '_repeat', + '_repeat2', ]) return super(Euclidean, self).clearState() - diff --git a/torch/legacy/nn/Exp.py b/torch/legacy/nn/Exp.py index 97cd9c4e57..7156a99eb9 100644 --- a/torch/legacy/nn/Exp.py +++ b/torch/legacy/nn/Exp.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class Exp(Module): def updateOutput(self, input): @@ -8,4 +9,3 @@ class Exp(Module): def updateGradInput(self, input, gradOutput): return torch.mul(self.output, gradOutput, out=self.gradInput) - diff --git a/torch/legacy/nn/FlattenTable.py b/torch/legacy/nn/FlattenTable.py index dbb1950abb..1468f0cedd 100644 --- a/torch/legacy/nn/FlattenTable.py +++ b/torch/legacy/nn/FlattenTable.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class FlattenTable(Module): def __init__(self): @@ -59,7 +60,6 @@ class FlattenTable(Module): return self.output - def updateGradInput(self, input, gradOutput): assert isinstance(input, list) assert isinstance(gradOutput, list) @@ -69,11 +69,10 @@ class FlattenTable(Module): # However, we should check that the gradInput is valid: if not self._checkMapping(gradOutput, self.gradInput, self.input_map): - self.gradInput = self._inverseFlatten(gradOutput, self.input_map) + self.gradInput = self._inverseFlatten(gradOutput, self.input_map) return self.gradInput - def type(self, type=None, tensorCache=None): if not type: return self._type @@ -81,8 +80,6 @@ class FlattenTable(Module): # conversions. Just force the tables to be empty. self.clearState() - def clearState(self): self.input_map = [] return super(FlattenTable, self).clearState() - diff --git a/torch/legacy/nn/GradientReversal.py b/torch/legacy/nn/GradientReversal.py index 033d60a448..36c048b3b0 100644 --- a/torch/legacy/nn/GradientReversal.py +++ b/torch/legacy/nn/GradientReversal.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class GradientReversal(Module): def __init__(self, lambd=1): @@ -19,4 +20,3 @@ class GradientReversal(Module): self.gradInput.copy_(gradOutput) self.gradInput.mul_(-self.lambd) return self.gradInput - diff --git a/torch/legacy/nn/HardShrink.py b/torch/legacy/nn/HardShrink.py index 4015ab8030..99b3bb2292 100644 --- a/torch/legacy/nn/HardShrink.py +++ b/torch/legacy/nn/HardShrink.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class HardShrink(Module): def __init__(self, lambd=0.5): @@ -26,4 +27,3 @@ class HardShrink(Module): self.lambd ) return self.gradInput - diff --git a/torch/legacy/nn/HardTanh.py b/torch/legacy/nn/HardTanh.py index d7ce767bea..b8bae62f9b 100644 --- a/torch/legacy/nn/HardTanh.py +++ b/torch/legacy/nn/HardTanh.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class HardTanh(Module): def __init__(self, min_value=-1, max_value=1, inplace=False): @@ -32,4 +33,3 @@ class HardTanh(Module): self.inplace ) return self.gradInput - diff --git a/torch/legacy/nn/HingeEmbeddingCriterion.py b/torch/legacy/nn/HingeEmbeddingCriterion.py index fb36467c9d..d94bd14bbd 100644 --- a/torch/legacy/nn/HingeEmbeddingCriterion.py +++ b/torch/legacy/nn/HingeEmbeddingCriterion.py @@ -1,6 +1,7 @@ import torch from .Criterion import Criterion + class HingeEmbeddingCriterion(Criterion): def __init__(self, margin=1, sizeAverage=True): @@ -11,7 +12,7 @@ class HingeEmbeddingCriterion(Criterion): def updateOutput(self, input, y): if self.buffer is None: - self.buffer = input.new() + self.buffer = input.new() self.buffer.resize_as_(input).copy_(input) self.buffer[torch.eq(y, -1.)] = 0 self.output = self.buffer.sum() @@ -34,4 +35,3 @@ class HingeEmbeddingCriterion(Criterion): self.gradInput.mul_(1. / input.nelement()) return self.gradInput - diff --git a/torch/legacy/nn/Identity.py b/torch/legacy/nn/Identity.py index ed9f302f28..09c7e1b8e8 100644 --- a/torch/legacy/nn/Identity.py +++ b/torch/legacy/nn/Identity.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class Identity(Module): def updateOutput(self, input): @@ -14,8 +15,7 @@ class Identity(Module): def clearState(self): clear(self, [ - 'output', - 'gradInput', + 'output', + 'gradInput', ]) return super(Identity, self).clearState() - diff --git a/torch/legacy/nn/Index.py b/torch/legacy/nn/Index.py index 0a792d6e73..e88c454bff 100644 --- a/torch/legacy/nn/Index.py +++ b/torch/legacy/nn/Index.py @@ -1,25 +1,25 @@ import torch from .Module import Module + class Index(Module): def __init__(self, dimension): - super(Index, self).__init__() - self.dimension = dimension - self.gradInput = [self.gradInput] + super(Index, self).__init__() + self.dimension = dimension + self.gradInput = [self.gradInput] def updateOutput(self, input): - t = input[0] - index = input[1] - torch.index_select(t, self.dimension, index, out=self.output) - return self.output + t = input[0] + index = input[1] + torch.index_select(t, self.dimension, index, out=self.output) + return self.output def updateGradInput(self, input, gradOutput): - t = input[0] - index = input[1] - - gradInput = self.gradInput[0] # no gradient for the index variable - gradInput.resize_as_(t).zero_() - gradInput.index_add_(self.dimension, index, gradOutput) - return self.gradInput + t = input[0] + index = input[1] + gradInput = self.gradInput[0] # no gradient for the index variable + gradInput.resize_as_(t).zero_() + gradInput.index_add_(self.dimension, index, gradOutput) + return self.gradInput diff --git a/torch/legacy/nn/JoinTable.py b/torch/legacy/nn/JoinTable.py index e0df22f9e1..0031945d08 100644 --- a/torch/legacy/nn/JoinTable.py +++ b/torch/legacy/nn/JoinTable.py @@ -43,7 +43,7 @@ class JoinTable(Module): dim = self._getPositiveDimension(input) for i in range(len(input)): - if len(self.gradInput) < i+1: + if len(self.gradInput) < i + 1: self.gradInput.append(input[i].new()) self.gradInput[i].resize_as_(input[i]) self.gradInput = self.gradInput[:len(input)] diff --git a/torch/legacy/nn/L1Cost.py b/torch/legacy/nn/L1Cost.py index 60f0096b83..1a41588f94 100644 --- a/torch/legacy/nn/L1Cost.py +++ b/torch/legacy/nn/L1Cost.py @@ -2,6 +2,7 @@ import torch from .Criterion import Criterion from .utils import clear + class L1Cost(Criterion): def __init__(self): @@ -11,7 +12,7 @@ class L1Cost(Criterion): def updateOutput(self, input, target=None): assert target is None if self.output_tensor is None: - self.output_tensor = input.new(1) + self.output_tensor = input.new(1) self._backend.L1Cost_updateOutput( self._backend.library_state, input, @@ -33,4 +34,3 @@ class L1Cost(Criterion): def clearState(self): clear(self, 'output_tensor') return super(L1Cost, self).clearState() - diff --git a/torch/legacy/nn/L1HingeEmbeddingCriterion.py b/torch/legacy/nn/L1HingeEmbeddingCriterion.py index 8c985a0610..c414934221 100644 --- a/torch/legacy/nn/L1HingeEmbeddingCriterion.py +++ b/torch/legacy/nn/L1HingeEmbeddingCriterion.py @@ -1,6 +1,7 @@ import torch from .Criterion import Criterion + class L1HingeEmbeddingCriterion(Criterion): def __init__(self, margin=1): @@ -9,9 +10,9 @@ class L1HingeEmbeddingCriterion(Criterion): self.gradInput = [torch.Tensor(), torch.Tensor()] def updateOutput(self, input, y): - self.output = input[0].dist(input[1], 1); + self.output = input[0].dist(input[1], 1) if y == -1: - self.output = max(0, self.margin - self.output); + self.output = max(0, self.margin - self.output) return self.output @@ -33,4 +34,3 @@ class L1HingeEmbeddingCriterion(Criterion): self.gradInput[1].zero_().add_(-1, self.gradInput[0]) return self.gradInput - diff --git a/torch/legacy/nn/L1Penalty.py b/torch/legacy/nn/L1Penalty.py index 8f2af01276..05472d75f6 100644 --- a/torch/legacy/nn/L1Penalty.py +++ b/torch/legacy/nn/L1Penalty.py @@ -5,6 +5,7 @@ from .Module import Module # [gradOutput] to the gradient of the L1 loss. The [input] is copied to # the [output]. + class L1Penalty(Module): def __init__(self, l1weight, sizeAverage=False, provideOutput=True): @@ -34,4 +35,3 @@ class L1Penalty(Module): self.gradInput.add_(gradOutput) return self.gradInput - diff --git a/torch/legacy/nn/LeakyReLU.py b/torch/legacy/nn/LeakyReLU.py index c3175946ca..ca3a5cc6b2 100644 --- a/torch/legacy/nn/LeakyReLU.py +++ b/torch/legacy/nn/LeakyReLU.py @@ -1,15 +1,16 @@ import torch from .Module import Module + class LeakyReLU(Module): - def __init__(self, negval=1/100, inplace=False): + def __init__(self, negval=1 / 100, inplace=False): super(LeakyReLU, self).__init__() if isinstance(negval, bool): - inplace = negval - self.negval = 1/100 + inplace = negval + self.negval = 1 / 100 else: - self.negval = negval + self.negval = negval # default for inplace is False self.inplace = inplace @@ -27,7 +28,6 @@ class LeakyReLU(Module): ) return self.output - def updateGradInput(self, input, gradOutput): self._backend.LeakyReLU_updateGradInput( self._backend.library_state, @@ -39,7 +39,5 @@ class LeakyReLU(Module): ) return self.gradInput - def __repr__(self): return str(type(self)) + '({:.4f})'.format(self.negval) - diff --git a/torch/legacy/nn/Linear.py b/torch/legacy/nn/Linear.py index 1f27ff5158..eb69a63e70 100644 --- a/torch/legacy/nn/Linear.py +++ b/torch/legacy/nn/Linear.py @@ -3,6 +3,7 @@ import torch from .Module import Module from .utils import clear + class Linear(Module): def __init__(self, inputSize, outputSize, bias=True): @@ -24,7 +25,7 @@ class Linear(Module): if stdv is not None: stdv = stdv * math.sqrt(3) else: - stdv = 1./math.sqrt(self.weight.size(1)) + stdv = 1. / math.sqrt(self.weight.size(1)) self.weight.uniform_(-stdv, stdv) if self.bias is not None: @@ -35,7 +36,7 @@ class Linear(Module): def _updateAddBuffer(self, input): nframe = input.size(0) if self.addBuffer is None: - self.addBuffer = input.new() + self.addBuffer = input.new() if self.addBuffer.nelement() != nframe: self.addBuffer.resize_(nframe).fill_(1) @@ -80,9 +81,7 @@ class Linear(Module): clear(self, 'addBuffer') return super(Linear, self).clearState() - def __repr__(self): return super(Linear, self).__repr__() + \ - '({} -> {})'.format(self.weight.size(1), self.weight.size(0)) + \ - (' without bias' if self.bias is None else '') - + '({} -> {})'.format(self.weight.size(1), self.weight.size(0)) + \ + (' without bias' if self.bias is None else '') diff --git a/torch/legacy/nn/Log.py b/torch/legacy/nn/Log.py index a036688905..1f5e4bd206 100644 --- a/torch/legacy/nn/Log.py +++ b/torch/legacy/nn/Log.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class Log(Module): def updateOutput(self, input): @@ -9,10 +10,9 @@ class Log(Module): self.output.log_() return self.output - def updateGradInput(self, input, gradOutput) : + def updateGradInput(self, input, gradOutput): self.gradInput.resize_as_(input) self.gradInput.fill_(1) self.gradInput.div_(input) self.gradInput.mul_(gradOutput) return self.gradInput - diff --git a/torch/legacy/nn/LogSigmoid.py b/torch/legacy/nn/LogSigmoid.py index 3373b83725..d6b8761729 100644 --- a/torch/legacy/nn/LogSigmoid.py +++ b/torch/legacy/nn/LogSigmoid.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class LogSigmoid(Module): def __init__(self): @@ -10,7 +11,7 @@ class LogSigmoid(Module): def updateOutput(self, input): if self.buffer is None: - self.buffer = input.new() + self.buffer = input.new() self._backend.LogSigmoid_updateOutput( self._backend.library_state, input, @@ -32,5 +33,3 @@ class LogSigmoid(Module): def clearState(self): clear(self, 'buffer') return super(LogSigmoid, self).clearState() - - diff --git a/torch/legacy/nn/LogSoftMax.py b/torch/legacy/nn/LogSoftMax.py index b10f483a1f..948e9512ac 100644 --- a/torch/legacy/nn/LogSoftMax.py +++ b/torch/legacy/nn/LogSoftMax.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class LogSoftMax(Module): def updateOutput(self, input): @@ -11,7 +12,6 @@ class LogSoftMax(Module): ) return self.output - def updateGradInput(self, input, gradOutput): self._backend.LogSoftMax_updateGradInput( self._backend.library_state, @@ -21,4 +21,3 @@ class LogSoftMax(Module): self.output ) return self.gradInput - diff --git a/torch/legacy/nn/LookupTable.py b/torch/legacy/nn/LookupTable.py index 6413521cc9..dc610b7444 100644 --- a/torch/legacy/nn/LookupTable.py +++ b/torch/legacy/nn/LookupTable.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class LookupTable(Module): def __init__(self, nIndex, nOutput, paddingValue=-1, maxNorm=None, normType=None): @@ -59,16 +60,15 @@ class LookupTable(Module): self.renorm(input) input = self._makeInputContiguous(input) if input.dim() == 1: - torch.index_select(self.weight, 0, input, out=self.output) + torch.index_select(self.weight, 0, input, out=self.output) elif input.dim() == 2: - torch.index_select(self.weight, 0, input.view(-1), out=self.output) - self.output = self.output.view(input.size(0), input.size(1), self.weight.size(1)) + torch.index_select(self.weight, 0, input.view(-1), out=self.output) + self.output = self.output.view(input.size(0), input.size(1), self.weight.size(1)) else: - raise RuntimeError("input must be a vector or matrix") + raise RuntimeError("input must be a vector or matrix") return self.output - def updateGradInput(self, input, gradOutput): # the input can be of any type (as in the forward it's # converted anyway to LongTensor) thus, need to allocate @@ -81,7 +81,6 @@ class LookupTable(Module): return self.gradInput - def accGradParameters(self, input, gradOutput, scale=1): input = self._input if self.copiedInput else input if input.dim() == 2: @@ -110,16 +109,16 @@ class LookupTable(Module): def renorm(self, input): if self.maxNorm is None: - return + return # copy input into _input, so _input is continous. # The copied _input will be modified in the C code. self._input.resize_(input.size()).copy_(input) row_idx = self._input if row_idx.dim() == 2: - row_idx = row_idx.view(-1) + row_idx = row_idx.view(-1) elif row_idx.dim() != 1: - raise RuntimeError("input must be a vector or matrix") + raise RuntimeError("input must be a vector or matrix") # "row_idx" and "weight" will be modified in the C code self._backend.LookupTable_renorm( @@ -151,4 +150,3 @@ class LookupTable(Module): def clearState(self): clear(self, '_count', '_input', '_sorted', '_indices', '_gradOutput') return super(LookupTable, self).clearState() - diff --git a/torch/legacy/nn/MM.py b/torch/legacy/nn/MM.py index 35077edc15..30b0bc6d40 100644 --- a/torch/legacy/nn/MM.py +++ b/torch/legacy/nn/MM.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class MM(Module): def __init__(self, transA=False, transB=False): @@ -35,9 +36,9 @@ class MM(Module): def updateGradInput(self, input, gradOutput): if self.gradInput[0] is None: - self.gradInput[0] = input[0].new() + self.gradInput[0] = input[0].new() if self.gradInput[1] is None: - self.gradInput[1] = input[1].new() + self.gradInput[1] = input[1].new() assert len(input) == 2 a, b = input @@ -69,4 +70,3 @@ class MM(Module): getattr(torch, f)(a, gradOutput, out=self.gradInput[1]) return self.gradInput - diff --git a/torch/legacy/nn/MSECriterion.py b/torch/legacy/nn/MSECriterion.py index 5897d32ee1..05b3ee2a4e 100644 --- a/torch/legacy/nn/MSECriterion.py +++ b/torch/legacy/nn/MSECriterion.py @@ -1,6 +1,7 @@ import torch from .Criterion import Criterion + class MSECriterion(Criterion): def __init__(self, sizeAverage=True): @@ -10,7 +11,7 @@ class MSECriterion(Criterion): def updateOutput(self, input, target): if self.output_tensor is None: - self.output_tensor = input.new(1) + self.output_tensor = input.new(1) self._backend.MSECriterion_updateOutput( self._backend.library_state, input, @@ -30,4 +31,3 @@ class MSECriterion(Criterion): self.sizeAverage ) return self.gradInput - diff --git a/torch/legacy/nn/MV.py b/torch/legacy/nn/MV.py index ff87422083..bebe1b8578 100644 --- a/torch/legacy/nn/MV.py +++ b/torch/legacy/nn/MV.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class MV(Module): """Module to perform matrix vector multiplication on two minibatch inputs, producing a minibatch. @@ -63,4 +64,3 @@ class MV(Module): self.gradInput[1] = M.t() * gradOutput return self.gradInput - diff --git a/torch/legacy/nn/MarginCriterion.py b/torch/legacy/nn/MarginCriterion.py index 628ca2a36f..23d3aed3d6 100644 --- a/torch/legacy/nn/MarginCriterion.py +++ b/torch/legacy/nn/MarginCriterion.py @@ -1,6 +1,7 @@ import torch from .Criterion import Criterion + class MarginCriterion(Criterion): def __init__(self, margin=1, sizeAverage=True): @@ -11,7 +12,7 @@ class MarginCriterion(Criterion): def updateOutput(self, input, target): if self.output_tensor is None: - self.output_tensor = input.new(1) + self.output_tensor = input.new(1) self._backend.MarginCriterion_updateOutput( self._backend.library_state, input, @@ -33,4 +34,3 @@ class MarginCriterion(Criterion): self.margin ) return self.gradInput - diff --git a/torch/legacy/nn/MarginRankingCriterion.py b/torch/legacy/nn/MarginRankingCriterion.py index 31de4660cb..b68c7444f3 100644 --- a/torch/legacy/nn/MarginRankingCriterion.py +++ b/torch/legacy/nn/MarginRankingCriterion.py @@ -1,6 +1,7 @@ import torch from .Criterion import Criterion + class MarginRankingCriterion(Criterion): def __init__(self, margin=1, sizeAverage=True): @@ -15,29 +16,29 @@ class MarginRankingCriterion(Criterion): def updateOutput(self, input, y): if input[0].size(0) == 1: - self.output = max(0, -y*(input[0][0]-input[1][0]) + self.margin) + self.output = max(0, -y * (input[0][0] - input[1][0]) + self.margin) else: - if self._output is None: - self._output = input[0].clone() - self._output.resize_as_(input[0]) - self._output.copy_(input[0]) + if self._output is None: + self._output = input[0].clone() + self._output.resize_as_(input[0]) + self._output.copy_(input[0]) - self._output.add_(-1, input[1]) - self._output.mul_(-1).mul_(y) - self._output.add_(self.margin) + self._output.add_(-1, input[1]) + self._output.mul_(-1).mul_(y) + self._output.add_(self.margin) - self._output.clamp_(min=0) + self._output.clamp_(min=0) - self.output = self._output.sum() + self.output = self._output.sum() - if self.sizeAverage: - self.output = self.output / y.size(0) + if self.sizeAverage: + self.output = self.output / y.size(0) return self.output def updateGradInput(self, input, y): if input[0].size(0) == 1: - dist = -y * (input[0][0]-input[1][0]) + self.margin + dist = -y * (input[0][0] - input[1][0]) + self.margin if dist < 0: self.gradInput[0][0] = 0 self.gradInput[1][0] = 0 @@ -46,7 +47,7 @@ class MarginRankingCriterion(Criterion): self.gradInput[1][0] = y else: if self.dist is None: - self.dist = input[0].new() + self.dist = input[0].new() self.dist = self.dist.resize_as_(input[0]).copy_(input[0]) dist = self.dist @@ -55,7 +56,7 @@ class MarginRankingCriterion(Criterion): dist.add_(self.margin) if self.mask is None: - self.mask = input[0].new() + self.mask = input[0].new() self.mask = self.mask.resize_as_(input[0]).copy_(dist) mask = self.mask @@ -74,4 +75,3 @@ class MarginRankingCriterion(Criterion): self.gradInput[1].div_(y.size(0)) return self.gradInput - diff --git a/torch/legacy/nn/MaskedSelect.py b/torch/legacy/nn/MaskedSelect.py index c1abfff4d8..39be82d70e 100644 --- a/torch/legacy/nn/MaskedSelect.py +++ b/torch/legacy/nn/MaskedSelect.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class MaskedSelect(Module): def __init__(self): @@ -20,10 +21,10 @@ class MaskedSelect(Module): def updateGradInput(self, input, gradOutput): input, mask = input if input.type() == 'torch.cuda.FloatTensor': - torch.range(0, mask.nelement()-1, out=self._maskIndexBufferCPU).resize_(mask.size()) + torch.range(0, mask.nelement() - 1, out=self._maskIndexBufferCPU).resize_(mask.size()) self._maskIndexBuffer.resize_(self._maskIndexBufferCPU.size()).copy_(self._maskIndexBufferCPU) else: - torch.range(0, mask.nelement()-1, out=self._maskIndexBuffer).resize_(mask.size()) + torch.range(0, mask.nelement() - 1, out=self._maskIndexBuffer).resize_(mask.size()) torch.masked_select(self._maskIndexBuffer, mask, out=self._maskIndices) self._gradBuffer.resize_(input.nelement()).zero_() @@ -42,13 +43,13 @@ class MaskedSelect(Module): # These casts apply when switching between cuda/non-cuda types if type != 'torch.cuda.FloatTensor': - self._maskIndexBuffer = self._maskIndexBuffer.long() - self._maskIndices = self._maskIndices.long() - self._gradMask = self._gradMask.byte() + self._maskIndexBuffer = self._maskIndexBuffer.long() + self._maskIndices = self._maskIndices.long() + self._gradMask = self._gradMask.byte() else: - self._maskIndexBuffer = self._maskIndexBuffer.cuda() - self._maskIndices = self._maskIndices.cuda() - self._gradMask = self._gradMask.cuda() + self._maskIndexBuffer = self._maskIndexBuffer.cuda() + self._maskIndices = self._maskIndices.cuda() + self._gradMask = self._gradMask.cuda() self._type = type return self @@ -61,4 +62,3 @@ class MaskedSelect(Module): '_maskIndices', '_gradBuffer', '_gradMask']) - diff --git a/torch/legacy/nn/Max.py b/torch/legacy/nn/Max.py index 615532f0f7..eab9bcae02 100644 --- a/torch/legacy/nn/Max.py +++ b/torch/legacy/nn/Max.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear, addSingletondimension + class Max(Module): def __init__(self, dimension=0): @@ -13,25 +14,25 @@ class Max(Module): def _getPositiveDimension(self, input): dimension = self.dimension if dimension < 0: - dimension = input.dim() + dimension + dimension = input.dim() + dimension return dimension def _lazyInit(self): if self._output is None: - self._output = self.output.new() + self._output = self.output.new() if self._indices is None: - self._indices = \ - (torch.cuda.LongTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' else torch.LongTensor()) + self._indices = \ + (torch.cuda.LongTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' else torch.LongTensor()) def updateOutput(self, input): self._lazyInit() dimension = self._getPositiveDimension(input) torch.max(input, dimension, out=(self._output, self._indices)) if input.dim() > 1: - self.output.set_(self._output.select(dimension, 0)) + self.output.set_(self._output.select(dimension, 0)) else: - self.output.set_(self._output) + self.output.set_(self._output) return self.output @@ -39,9 +40,9 @@ class Max(Module): self._lazyInit() dimension = self._getPositiveDimension(input) if input.dim() > 1: - gradOutputView = addSingletondimension(gradOutput, dimension) + gradOutputView = addSingletondimension(gradOutput, dimension) else: - gradOutputView = gradOutput + gradOutputView = gradOutput self.gradInput.resize_as_(input).zero_().scatter_(dimension, self._indices, gradOutputView) return self.gradInput @@ -64,4 +65,3 @@ class Max(Module): def clearState(self): clear(self, '_indices', '_output') return super(Max, self).clearState() - diff --git a/torch/legacy/nn/Mean.py b/torch/legacy/nn/Mean.py index 905d91dbbf..67048d2aa6 100644 --- a/torch/legacy/nn/Mean.py +++ b/torch/legacy/nn/Mean.py @@ -9,8 +9,8 @@ Please use instead "nn.Sum(dimension, nInputDims, sizeAverage)" """ + class Mean(Sum): def __init__(self, dimension): super(Mean, self).__init__(dimension, True) - diff --git a/torch/legacy/nn/Min.py b/torch/legacy/nn/Min.py index 89809ea984..88967a09d0 100644 --- a/torch/legacy/nn/Min.py +++ b/torch/legacy/nn/Min.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear, addSingletondimension + class Min(Module): def __init__(self, dimension=0): @@ -13,25 +14,25 @@ class Min(Module): def _getPositiveDimension(self, input): dimension = self.dimension if dimension < 0: - dimension = input.dim() + dimension + dimension = input.dim() + dimension return dimension def _lazyInit(self): if self._output is None: - self._output = self.output.new() + self._output = self.output.new() if self._indices is None: - self._indices = \ - (torch.cuda.LongTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' else torch.LongTensor()) + self._indices = \ + (torch.cuda.LongTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' else torch.LongTensor()) def updateOutput(self, input): self._lazyInit() dimension = self._getPositiveDimension(input) torch.min(input, dimension, out=(self._output, self._indices)) if input.dim() > 1: - self.output.set_(self._output.select(dimension, 0)) + self.output.set_(self._output.select(dimension, 0)) else: - self.output.set_(self._output) + self.output.set_(self._output) return self.output @@ -39,9 +40,9 @@ class Min(Module): self._lazyInit() dimension = self._getPositiveDimension(input) if input.dim() > 1: - gradOutputView = addSingletondimension(gradOutput, dimension) + gradOutputView = addSingletondimension(gradOutput, dimension) else: - gradOutputView = gradOutput + gradOutputView = gradOutput self.gradInput.resize_as_(input).zero_().scatter_(dimension, self._indices, gradOutputView) return self.gradInput @@ -64,4 +65,3 @@ class Min(Module): def clearState(self): clear(self, '_indices', '_output') return super(Min, self).clearState() - diff --git a/torch/legacy/nn/MixtureTable.py b/torch/legacy/nn/MixtureTable.py index b70df50619..12fc3ea830 100644 --- a/torch/legacy/nn/MixtureTable.py +++ b/torch/legacy/nn/MixtureTable.py @@ -27,11 +27,11 @@ class MixtureTable(Module): # buffers if self._gaterView is None: - self._gaterView = input[0].new() + self._gaterView = input[0].new() if self._expert is None: - self._expert = input[0].new() + self._expert = input[0].new() if self._expertView is None: - self._expertView = input[0].new() + self._expertView = input[0].new() self.dimG = 1 batchSize = gaterInput.size(0) @@ -43,7 +43,7 @@ class MixtureTable(Module): expertInput = expertInputs[0] if self.batchSize != batchSize: - size = [1] * (expertInput.dim()+1) + size = [1] * (expertInput.dim() + 1) if self.dimG > 0: size[0] = gaterInput.size(0) size[self.dim] = gaterInput.size(self.dimG) @@ -83,11 +83,11 @@ class MixtureTable(Module): # buffers if self._sum is None: - self._sum = input[0].new() + self._sum = input[0].new() if self._expertView2 is None: - self._expertView2 = input[0].new() + self._expertView2 = input[0].new() if self._expert2 is None: - self._expert2 = input[0].new() + self._expert2 = input[0].new() if self.table: if not self.backwardSetup: @@ -99,7 +99,6 @@ class MixtureTable(Module): gaterGradInput.resize_as_(gaterInput) self.backwardSetup = True - # like CMulTable, but with broadcasting for i, expertGradInput in enumerate(expertGradInputs): # gater updateGradInput @@ -140,7 +139,7 @@ class MixtureTable(Module): else: self._expertView2 = expert.view(gaterInput.size(0), gaterInput.size(1), -1) - torch.sum(self._expertView2, self.dimG+1, out=gaterGradInput) + torch.sum(self._expertView2, self.dimG + 1, out=gaterGradInput) gaterGradInput.resize_as_(gaterInput) # expert updateGradInput @@ -159,11 +158,11 @@ class MixtureTable(Module): def clearState(self, ): clear(self, [ - '_gaterView', - '_expert', - '_expertView', - '_sum', - '_expert2', - '_expertView2', + '_gaterView', + '_expert', + '_expertView', + '_sum', + '_expert2', + '_expertView2', ]) return super(MixtureTable, self).clearState() diff --git a/torch/legacy/nn/Module.py b/torch/legacy/nn/Module.py index 6bee40db2c..5d599a109d 100644 --- a/torch/legacy/nn/Module.py +++ b/torch/legacy/nn/Module.py @@ -2,6 +2,7 @@ import torch import torch._thnn from .utils import clear, recursiveType + class Module(object): def __init__(self): @@ -36,13 +37,11 @@ class Module(object): self.accGradParameters(input, gradOutput, scale) return self.gradInput - def backwardUpdate(self, input, gradOutput, lr): self.updateGradInput(input, gradOutput) self.accUpdateGradParameters(input, gradOutput, lr) return self.gradInput - def updateGradInput(self, input, gradOutput): return self.gradInput @@ -58,7 +57,6 @@ class Module(object): self.gradWeight = gradWeight self.gradBias = gradBias - def sharedAccUpdateGradParameters(self, input, gradOutput, lr): if self.parameters(): self.zeroGradParameters() @@ -92,7 +90,7 @@ class Module(object): def type(self, type=None, tensorCache=None): if type is None: - return self._type + return self._type tensorCache = tensorCache or {} @@ -146,6 +144,7 @@ class Module(object): # # TODO: This logically belongs to torch.Tensor, not nn. _flattenTensorBuffer = {} + def _flatten(self, parameters=[]): # returns True if tensor occupies a contiguous region of memory (no holes) @@ -155,14 +154,14 @@ class Module(object): sortedSize = torch.LongTensor(list(tensor.size())).index_select(0, perm) nRealDim = int(torch.clamp(sortedStride, 0, 1).sum()) sortedStride = sortedStride.narrow(0, 0, nRealDim).clone() - sortedSize = sortedSize.narrow(0, 0, nRealDim).clone() + sortedSize = sortedSize.narrow(0, 0, nRealDim).clone() t = tensor.new().set_(tensor.storage(), 0, - tuple(sortedSize), - tuple(sortedStride)) + tuple(sortedSize), + tuple(sortedStride)) return t.is_contiguous() if not parameters: - return torch.Tensor() + return torch.Tensor() Tensor = parameters[0].new BufferTensor = Module._flattenTensorBuffer.get(type(parameters[0]), Tensor) @@ -179,14 +178,12 @@ class Module(object): storages[key] = (storage, num_parameters) num_parameters = num_parameters + storage.size() - parameterMeta.append({ - 'storage_offset': param.storage_offset() + storages[key][1], - 'size' : param.size(), - 'stride' : param.stride() + 'storage_offset': param.storage_offset() + storages[key][1], + 'size': param.size(), + 'stride': param.stride() }) - # 2. construct a single tensor that will hold all the parameters flatParameters = BufferTensor(num_parameters).zero_() @@ -198,14 +195,14 @@ class Module(object): tmp.fill_(1) tensorsCompact = tensorsCompact and isCompact(tmp) - maskParameters = flatParameters.byte().clone() - compactOffsets = flatParameters.long().cumsum(0) + maskParameters = flatParameters.byte().clone() + compactOffsets = flatParameters.long().cumsum(0) used_parameters = compactOffsets[-1] # 4. copy storages into the flattened parameter tensor for storageAndOffset in storages.values(): storage, offset = storageAndOffset - flatParameters[slice(offset, offset+storage.size())].copy_(Tensor().set_(storage)) + flatParameters[slice(offset, offset + storage.size())].copy_(Tensor().set_(storage)) # 5. allow garbage collection storages = None @@ -214,22 +211,22 @@ class Module(object): # 6. compact the flattened parameters if there were holes if used_parameters != num_parameters: - assert tensorsCompact + assert tensorsCompact - flatParameters = BufferTensor(used_parameters).copy_( - flatParameters.masked_select(maskParameters)) - for meta in parameterMeta: - meta['storage_offset'] = compactOffsets[meta['storage_offset']] + flatParameters = BufferTensor(used_parameters).copy_( + flatParameters.masked_select(maskParameters)) + for meta in parameterMeta: + meta['storage_offset'] = compactOffsets[meta['storage_offset']] if BufferTensor != Tensor: - flatParameters = Tensor(flatParameters.nelement()).copy_(flatParameters) + flatParameters = Tensor(flatParameters.nelement()).copy_(flatParameters) # 7. fix up the parameter tensors to point at the flattened parameters for param, meta in zip(parameters, parameterMeta): - param.set_(flatParameters.storage(), - meta['storage_offset'], - meta['size'], - meta['stride']) + param.set_(flatParameters.storage(), + meta['storage_offset'], + meta['size'], + meta['stride']) return flatParameters @@ -290,4 +287,3 @@ class Module(object): for i, module in enumerate(self.modules): self.modules[i] = module.replace(callback) return out - diff --git a/torch/legacy/nn/Mul.py b/torch/legacy/nn/Mul.py index d1d1cfb0d8..4ba0567506 100644 --- a/torch/legacy/nn/Mul.py +++ b/torch/legacy/nn/Mul.py @@ -2,6 +2,7 @@ import math import torch from .Module import Module + class Mul(Module): def __init__(self): @@ -12,9 +13,9 @@ class Mul(Module): def reset(self, stdv=None): if stdv is not None: - stdv = stdv * math.sqrt(3) + stdv = stdv * math.sqrt(3) else: - stdv = 1./math.sqrt(self.weight.size(0)) + stdv = 1. / math.sqrt(self.weight.size(0)) self.weight.uniform_(-stdv, stdv) def updateOutput(self, input): @@ -28,5 +29,4 @@ class Mul(Module): return self.gradInput def accGradParameters(self, input, gradOutput, scale=1): - self.gradWeight[0] = self.gradWeight[0] + scale*input.dot(gradOutput); - + self.gradWeight[0] = self.gradWeight[0] + scale * input.dot(gradOutput) diff --git a/torch/legacy/nn/MulConstant.py b/torch/legacy/nn/MulConstant.py index 1865b29110..6652ffbaac 100644 --- a/torch/legacy/nn/MulConstant.py +++ b/torch/legacy/nn/MulConstant.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class MulConstant(Module): def __init__(self, constant_scalar, inplace=False): @@ -19,7 +20,6 @@ class MulConstant(Module): return self.output - def updateGradInput(self, input, gradOutput): if self.gradInput is None: return @@ -35,4 +35,3 @@ class MulConstant(Module): self.gradInput.mul_(self.constant_scalar) return self.gradInput - diff --git a/torch/legacy/nn/MultiCriterion.py b/torch/legacy/nn/MultiCriterion.py index 969894036a..455b32cf92 100644 --- a/torch/legacy/nn/MultiCriterion.py +++ b/torch/legacy/nn/MultiCriterion.py @@ -2,6 +2,7 @@ import torch from .Criterion import Criterion from .utils import recursiveResizeAs, recursiveFill, recursiveAdd + class MultiCriterion(Criterion): def __init__(self, ): @@ -14,7 +15,7 @@ class MultiCriterion(Criterion): new_weights = torch.DoubleStorage(len(self.criterions)) for i, v in enumerate(self.weights): new_weights[i] = v - new_weights[len(self.criterions)-1] = weight + new_weights[len(self.criterions) - 1] = weight self.weights = new_weights return self @@ -29,13 +30,12 @@ class MultiCriterion(Criterion): self.gradInput = recursiveResizeAs(self.gradInput, input)[0] recursiveFill(self.gradInput, 0) for i in range(len(self.criterions)): - recursiveAdd(self.gradInput, self.weights[i], self.criterions[i].updateGradInput(input, target)) + recursiveAdd(self.gradInput, self.weights[i], self.criterions[i].updateGradInput(input, target)) return self.gradInput def type(self, type): for criterion in self.criterions: - criterion.type(type) + criterion.type(type) return super(MultiCriterion, self).type(type) - diff --git a/torch/legacy/nn/MultiLabelMarginCriterion.py b/torch/legacy/nn/MultiLabelMarginCriterion.py index 4fbed6d1a5..42d6f7ac91 100644 --- a/torch/legacy/nn/MultiLabelMarginCriterion.py +++ b/torch/legacy/nn/MultiLabelMarginCriterion.py @@ -1,6 +1,7 @@ import torch from .Criterion import Criterion + class MultiLabelMarginCriterion(Criterion): def __init__(self, sizeAverage=True): @@ -11,7 +12,7 @@ class MultiLabelMarginCriterion(Criterion): def updateOutput(self, input, target): if self.output_tensor is None: - self.output_tensor = input.new(1) + self.output_tensor = input.new(1) target = target.long() self._backend.MultiLabelMarginCriterion_updateOutput( self._backend.library_state, @@ -35,4 +36,3 @@ class MultiLabelMarginCriterion(Criterion): self.sizeAverage ) return self.gradInput - diff --git a/torch/legacy/nn/MultiLabelSoftMarginCriterion.py b/torch/legacy/nn/MultiLabelSoftMarginCriterion.py index 4cf509efe4..59b2b29b86 100644 --- a/torch/legacy/nn/MultiLabelSoftMarginCriterion.py +++ b/torch/legacy/nn/MultiLabelSoftMarginCriterion.py @@ -3,6 +3,7 @@ from .Criterion import Criterion from .Sigmoid import Sigmoid from .BCECriterion import BCECriterion + class MultiLabelSoftMarginCriterion(Criterion): """ A MultiLabel multiclass criterion based on sigmoid: @@ -18,24 +19,23 @@ class MultiLabelSoftMarginCriterion(Criterion): """ def __init__(self, weights=None): - super(MultiLabelSoftMarginCriterion, self).__init__() - self.lsm = Sigmoid() - self.nll = BCECriterion(weights) + super(MultiLabelSoftMarginCriterion, self).__init__() + self.lsm = Sigmoid() + self.nll = BCECriterion(weights) def updateOutput(self, input, target): - input = input if input.nelement() == 1 else input.squeeze() - target = target if target.nelement() == 1 else target.squeeze() - self.lsm.updateOutput(input) - self.nll.updateOutput(self.lsm.output, target) - self.output = self.nll.output - return self.output + input = input if input.nelement() == 1 else input.squeeze() + target = target if target.nelement() == 1 else target.squeeze() + self.lsm.updateOutput(input) + self.nll.updateOutput(self.lsm.output, target) + self.output = self.nll.output + return self.output def updateGradInput(self, input, target): - size = input.size() - input = input if input.nelement() == 1 else input.squeeze() - target = target if target.nelement() == 1 else target.squeeze() - self.nll.updateGradInput(self.lsm.output, target) - self.lsm.updateGradInput(input, self.nll.gradInput) - self.gradInput = self.lsm.gradInput.view(size) - return self.gradInput - + size = input.size() + input = input if input.nelement() == 1 else input.squeeze() + target = target if target.nelement() == 1 else target.squeeze() + self.nll.updateGradInput(self.lsm.output, target) + self.lsm.updateGradInput(input, self.nll.gradInput) + self.gradInput = self.lsm.gradInput.view(size) + return self.gradInput diff --git a/torch/legacy/nn/MultiMarginCriterion.py b/torch/legacy/nn/MultiMarginCriterion.py index a2e11d3ebb..f6b636fee0 100644 --- a/torch/legacy/nn/MultiMarginCriterion.py +++ b/torch/legacy/nn/MultiMarginCriterion.py @@ -1,6 +1,7 @@ import torch from .Criterion import Criterion + class MultiMarginCriterion(Criterion): def __init__(self, p=1, weights=None, margin=1, sizeAverage=True): @@ -17,7 +18,7 @@ class MultiMarginCriterion(Criterion): def updateOutput(self, input, target): if self.output_tensor is None: - self.output_tensor = input.new(1) + self.output_tensor = input.new(1) target = target.long() self._backend.MultiMarginCriterion_updateOutput( self._backend.library_state, @@ -32,7 +33,6 @@ class MultiMarginCriterion(Criterion): self.output = self.output_tensor[0] return self.output - def updateGradInput(self, input, target): target = target.long() self._backend.MultiMarginCriterion_updateGradInput( @@ -46,4 +46,3 @@ class MultiMarginCriterion(Criterion): self.margin ) return self.gradInput - diff --git a/torch/legacy/nn/Narrow.py b/torch/legacy/nn/Narrow.py index 65997922d3..419be6cb2b 100644 --- a/torch/legacy/nn/Narrow.py +++ b/torch/legacy/nn/Narrow.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class Narrow(Module): def __init__(self, dimension, offset, length=1): @@ -12,21 +13,19 @@ class Narrow(Module): def updateOutput(self, input): length = self.length if length < 0: - length = input.size(self.dimension) - self.index + self.length + 1 + length = input.size(self.dimension) - self.index + self.length + 1 output = input.narrow(self.dimension, self.index, length) self.output = self.output.type_as(output) self.output.resize_as_(output).copy_(output) return self.output - def updateGradInput(self, input, gradOutput): length = self.length if length < 0: - length = input.size(self.dimension) - self.index + self.length + 1 + length = input.size(self.dimension) - self.index + self.length + 1 self.gradInput = self.gradInput.type_as(input) self.gradInput.resize_as_(input).zero_() self.gradInput.narrow(self.dimension, self.index, length).copy_(gradOutput) return self.gradInput - diff --git a/torch/legacy/nn/NarrowTable.py b/torch/legacy/nn/NarrowTable.py index 5176259f83..48d8a03f55 100644 --- a/torch/legacy/nn/NarrowTable.py +++ b/torch/legacy/nn/NarrowTable.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear, recursiveResizeAs, recursiveFill + class NarrowTable(Module): def __init__(self, offset, length=1): @@ -11,7 +12,6 @@ class NarrowTable(Module): self.output = [] self.gradInput = [] - def updateOutput(self, input): self.output[:] = [input[self.offset + i] for i in range(self.length)] return self.output @@ -34,10 +34,8 @@ class NarrowTable(Module): return self.gradInput - def type(self, type=None, tensorCache=None): if not type: return self._type clear(self, 'output', 'gradInput') return super(NarrowTable, self).type(self, type, tensorCache) - diff --git a/torch/legacy/nn/Normalize.py b/torch/legacy/nn/Normalize.py index 81cd4f9d4d..a96fcfcff1 100644 --- a/torch/legacy/nn/Normalize.py +++ b/torch/legacy/nn/Normalize.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class Normalize(Module): def __init__(self, p, eps=1e-10): @@ -19,17 +20,16 @@ class Normalize(Module): self.cross = None self.buffer2 = None - def updateOutput(self, input): assert input.dim() == 2 input_size = input.size() if self._output is None: - self._output = input.new() + self._output = input.new() if self.norm is None: - self.norm = input.new() + self.norm = input.new() if self.buffer is None: - self.buffer = input.new() + self.buffer = input.new() self._output.resize_as_(input) @@ -44,14 +44,14 @@ class Normalize(Module): self.norm.add_(self.eps) else: if self.normp is None: - self.normp = input.new() + self.normp = input.new() if self.p % 2 != 0: torch.abs(input, out=self.buffer).pow_(self.p) else: torch.pow(input, self.p, out=self.buffer) torch.sum(self.buffer, 1, out=self.normp).add_(self.eps) - torch.pow(self.normp, 1./self.p, out=self.norm) + torch.pow(self.normp, 1. / self.p, out=self.norm) torch.div(input, self.norm.view(-1, 1).expand_as(input), out=self._output) @@ -63,40 +63,40 @@ class Normalize(Module): assert gradOutput.dim() == 2 input_size = input.size() - n = input.size(0) # batch size - d = input.size(1) # dimensionality of vectors + n = input.size(0) # batch size + d = input.size(1) # dimensionality of vectors if self._gradInput is None: - self._gradInput = input.new() + self._gradInput = input.new() if self.cross is None: - self.cross = input.new() + self.cross = input.new() # compute diagonal term with gradOutput self._gradInput.resize_(n, d) if self.p == float('inf'): # specialization for the inf case - torch.mul(self.norm.view(n, 1,1).expand(n, d,1), gradOutput, out=self._gradInput) - self.buffer.resize_as_(input).zero_() - self.cross.resize_(n, 1) - torch.gather(input, 1, self._indices, out=self.cross) - self.cross.div_(self.norm) - self.buffer.scatter_(1, self._indices, self.cross) + torch.mul(self.norm.view(n, 1, 1).expand(n, d, 1), gradOutput, out=self._gradInput) + self.buffer.resize_as_(input).zero_() + self.cross.resize_(n, 1) + torch.gather(input, 1, self._indices, out=self.cross) + self.cross.div_(self.norm) + self.buffer.scatter_(1, self._indices, self.cross) else: - torch.mul(self.normp.view(n, 1).expand(n, d), gradOutput, out=self._gradInput) - # small optimizations for different p - # buffer = input*|input|^(p-2) - # for non-even p, need to add absolute value - if self.p % 2 != 0: - if self.p < 2: - # add eps to avoid possible division by 0 - torch.abs(input, out=self.buffer).add_(self.eps).pow_(self.p-2).mul_(input) - else: - torch.abs(input, out=self.buffer).pow_(self.p-2).mul_(input) - # special case for p == 2, pow(x, 0) = 1 - elif self.p == 2: - self.buffer.copy_(input) + torch.mul(self.normp.view(n, 1).expand(n, d), gradOutput, out=self._gradInput) + # small optimizations for different p + # buffer = input*|input|^(p-2) + # for non-even p, need to add absolute value + if self.p % 2 != 0: + if self.p < 2: + # add eps to avoid possible division by 0 + torch.abs(input, out=self.buffer).add_(self.eps).pow_(self.p - 2).mul_(input) else: - # p is even and > 2, pow(x, p) is always positive - torch.pow(input, self.p-2, out=self.buffer).mul_(input) + torch.abs(input, out=self.buffer).pow_(self.p - 2).mul_(input) + # special case for p == 2, pow(x, 0) = 1 + elif self.p == 2: + self.buffer.copy_(input) + else: + # p is even and > 2, pow(x, p) is always positive + torch.pow(input, self.p - 2, out=self.buffer).mul_(input) # compute cross term in two steps self.cross.resize_(n, 1) @@ -105,7 +105,7 @@ class Normalize(Module): #: the computations as b1*(b2*gradOutput). This avoids redundant # computation and also a huge buffer of size n*d^2 if self.buffer2 is None: - self.buffer2 = input.new() # nxd + self.buffer2 = input.new() # nxd torch.mul(input, gradOutput, out=self.buffer2) torch.sum(self.buffer2, 1, out=self.cross) @@ -143,13 +143,12 @@ class Normalize(Module): def clearState(self): clear(self, [ - '_output', - '_indices', - '_gradInput', - 'buffer', - 'norm', - 'normp', - 'cross', + '_output', + '_indices', + '_gradInput', + 'buffer', + 'norm', + 'normp', + 'cross', ]) return super(Normalize, self).clearState() - diff --git a/torch/legacy/nn/PReLU.py b/torch/legacy/nn/PReLU.py index 5f2e1946af..f11d1bcaec 100644 --- a/torch/legacy/nn/PReLU.py +++ b/torch/legacy/nn/PReLU.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class PReLU(Module): def __init__(self, nOutputPlane=0): @@ -36,9 +37,9 @@ class PReLU(Module): def accGradParameters(self, input, gradOutput, scale=1): if self.gradWeightBuf is None: - self.gradWeightBuf = input.new() + self.gradWeightBuf = input.new() if self.gradWeightBuf2 is None: - self.gradWeightBuf2 = input.new() + self.gradWeightBuf2 = input.new() self._backend.PReLU_accGradParameters( self._backend.library_state, input, @@ -56,4 +57,3 @@ class PReLU(Module): def clearState(self): clear(self, 'gradWeightBuf', 'gradWeightBuf2') return super(PReLU, self).clearState() - diff --git a/torch/legacy/nn/Padding.py b/torch/legacy/nn/Padding.py index aa13362e24..db5fd83467 100644 --- a/torch/legacy/nn/Padding.py +++ b/torch/legacy/nn/Padding.py @@ -35,7 +35,8 @@ class Padding(Module): self.output.narrow(dim, 0, input.size(dim)).copy_(input) else: self.output.narrow(dim, 0, index).copy_(input.narrow(dim, 0, index)) - self.output.narrow(dim, index + pad, input.size(dim) - index).copy_(input.narrow(dim, index, input.size(dim) - index)) + self.output.narrow(dim, index + pad, input.size(dim) - + index).copy_(input.narrow(dim, index, input.size(dim) - index)) return self.output @@ -56,6 +57,7 @@ class Padding(Module): self.gradInput.copy_(gradOutput.narrow(dim, 0, input.size(dim))) else: self.gradInput.narrow(dim, 0, index).copy_(gradOutput.narrow(dim, 0, index)) - self.gradInput.narrow(dim, index, input.size(dim) - index).copy_(gradOutput.narrow(dim, index + pad, input.size(dim) - index)) + self.gradInput.narrow(dim, index, input.size( + dim) - index).copy_(gradOutput.narrow(dim, index + pad, input.size(dim) - index)) return self.gradInput diff --git a/torch/legacy/nn/PairwiseDistance.py b/torch/legacy/nn/PairwiseDistance.py index d4a7571a6f..cf083daf1d 100644 --- a/torch/legacy/nn/PairwiseDistance.py +++ b/torch/legacy/nn/PairwiseDistance.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class PairwiseDistance(Module): def __init__(self, p): @@ -20,14 +21,14 @@ class PairwiseDistance(Module): assert input[0].dim() == 2 if self.diff is None: - self.diff = input[0].new() + self.diff = input[0].new() torch.add(input[0], -1, input[1], out=self.diff).abs_() self.output.resize_(input[0].size(0)) self.output.zero_() self.output.add_(self.diff.pow_(self.norm).sum(1)) - self.output.pow_(1./self.norm) + self.output.pow_(1. / self.norm) return self.output @@ -38,10 +39,10 @@ class PairwiseDistance(Module): self.gradInput[:] = [None, None] if self.gradInput[0] is None: - self.gradInput[0] = input[0].new() + self.gradInput[0] = input[0].new() self.gradInput[0].resize_(input[0].size()) if self.gradInput[1] is None: - self.gradInput[1] = input[1].new() + self.gradInput[1] = input[1].new() self.gradInput[1].resize_(input[1].size()) self.gradInput[0].copy_(input[0]) self.gradInput[0].add_(-1, input[1]) @@ -52,21 +53,21 @@ class PairwiseDistance(Module): # Note: derivative of p-norm: # d/dx_k(||x||_p) = (x_k * abs(x_k)^(p-2)) / (||x||_p)^(p-1) if self.norm > 2: - self.gradInput[0].mul_(self.gradInput[0].abs().pow_(self.norm-2)) + self.gradInput[0].mul_(self.gradInput[0].abs().pow_(self.norm - 2)) if self.outExpand is None: - self.outExpand = self.output.new() + self.outExpand = self.output.new() self.outExpand.resize_(self.output.size(0), 1) self.outExpand.copy_(self.output) self.outExpand.add_(1e-6) # Prevent divide by zero errors - self.outExpand.pow_(-(self.norm-1)) + self.outExpand.pow_(-(self.norm - 1)) self.gradInput[0].mul_(self.outExpand.expand(self.gradInput[0].size(0), - self.gradInput[0].size(1))) + self.gradInput[0].size(1))) if self.grad is None: - self.grad = gradOutput.new() + self.grad = gradOutput.new() if self.ones is None: - self.ones = gradOutput.new() + self.ones = gradOutput.new() self.grad.resize_as_(input[0]).zero_() self.ones.resize_(input[0].size(1)).fill_(1) @@ -80,4 +81,3 @@ class PairwiseDistance(Module): def clearState(self): clear(self, 'diff', 'outExpand', 'grad', 'ones') return super(PairwiseDistance, self).clearState() - diff --git a/torch/legacy/nn/Parallel.py b/torch/legacy/nn/Parallel.py index df94084c6e..6db1c060e3 100644 --- a/torch/legacy/nn/Parallel.py +++ b/torch/legacy/nn/Parallel.py @@ -94,8 +94,9 @@ class Parallel(Container): res = torch.typename(self) res += ' {' + line + tab + 'input' for i in range(len(self.modules)): - if i == len(self.modules)-1: - res += line + tab + next + '(' + str(i) + '): ' + str(self.modules[i]).replace(line, line + tab + extlast) + if i == len(self.modules) - 1: + res += line + tab + next + '(' + str(i) + '): ' + \ + str(self.modules[i]).replace(line, line + tab + extlast) else: res += line + tab + next + '(' + str(i) + '): ' + str(self.modules[i]).replace(line, line + tab + ext) diff --git a/torch/legacy/nn/ParallelCriterion.py b/torch/legacy/nn/ParallelCriterion.py index a44541caab..7ecfd95c6b 100644 --- a/torch/legacy/nn/ParallelCriterion.py +++ b/torch/legacy/nn/ParallelCriterion.py @@ -2,6 +2,7 @@ import torch from .Criterion import Criterion from .utils import recursiveResizeAs, recursiveFill, recursiveAdd + class ParallelCriterion(Criterion): def __init__(self, repeatTarget=False): @@ -36,4 +37,3 @@ class ParallelCriterion(Criterion): def type(self, type=None, tensorCache=None): self.gradInput = [] return super(ParallelCriterion, self).type(type, tensorCache) - diff --git a/torch/legacy/nn/ParallelTable.py b/torch/legacy/nn/ParallelTable.py index c3a78a16ba..41912a6a10 100644 --- a/torch/legacy/nn/ParallelTable.py +++ b/torch/legacy/nn/ParallelTable.py @@ -1,6 +1,7 @@ import torch from .Container import Container + class ParallelTable(Container): def __init__(self, ): @@ -9,7 +10,6 @@ class ParallelTable(Container): self.output = [] self.gradInput = [] - def updateOutput(self, input): for i in range(len(self.modules)): tmp = self.modules[i].updateOutput(input[i]) @@ -48,13 +48,13 @@ class ParallelTable(Container): res = torch.typename(self) res = res + ' {' + line + tab + 'input' for i in range(len(self.modules)): - if i == len(self.modules)-1: - res = res + line + tab + next + '(' + str(i) + '): ' + str(self.modules[i]).replace(line, line + tab + extlast) - else: - res = res + line + tab + next + '(' + str(i) + '): ' + str(self.modules[i]).replace(line, line + tab + ext) - + if i == len(self.modules) - 1: + res = res + line + tab + next + '(' + str(i) + '): ' + \ + str(self.modules[i]).replace(line, line + tab + extlast) + else: + res = res + line + tab + next + '(' + str(i) + '): ' + \ + str(self.modules[i]).replace(line, line + tab + ext) res = res + line + tab + last + 'output' res = res + line + '}' return res - diff --git a/torch/legacy/nn/PartialLinear.py b/torch/legacy/nn/PartialLinear.py index 4f9ca847a3..d4e9030e02 100644 --- a/torch/legacy/nn/PartialLinear.py +++ b/torch/legacy/nn/PartialLinear.py @@ -6,6 +6,7 @@ from .Sequential import Sequential from .ParallelTable import ParallelTable from .MM import MM + class PartialLinear(Module): """ PartialLinear is a Linear layer that allows the user to a set a collection of @@ -27,15 +28,15 @@ class PartialLinear(Module): pt.add(Identity()).add(LookupTable(outputsize, inputsize)) self.network = Sequential().add(pt).add(MM(False, True)) if bias: - self.bias = torch.zeros(1, outputsize) + self.bias = torch.zeros(1, outputsize) self.gradBias = torch.zeros(1, outputsize) else: self.bias = self.gradBias = None # set partition: - self.inputsize = inputsize + self.inputsize = inputsize self.outputsize = outputsize - self.allcolumns = torch.range(0, self.outputsize-1).long() + self.allcolumns = torch.range(0, self.outputsize - 1).long() self.resetPartition() self.addBuffer = None self.buffer = None @@ -58,7 +59,7 @@ class PartialLinear(Module): if self.bias is not None: self.output.add_(torch.index_select(self.bias, 1, self.partition).expand_as(self.output)) if self.addBuffer is None: - self.addBuffer = input.new() + self.addBuffer = input.new() if self.addBuffer.nelement() != input.size(0): self.addBuffer.resize_(input.size(0)).fill_(1) @@ -66,8 +67,8 @@ class PartialLinear(Module): def updateGradInput(self, input, gradOutput): if self.gradInput is not None: - self.network.updateGradInput([input, self.partition], gradOutput) - self.gradInput.set_(self.network.gradInput[0]) + self.network.updateGradInput([input, self.partition], gradOutput) + self.gradInput.set_(self.network.gradInput[0]) return self.gradInput @@ -110,6 +111,5 @@ class PartialLinear(Module): def __repr__(self): return super(ParallelTable, self).__repr__() + \ - '({} -> {})'.format(self.inputsize, self.outputsize) + \ - ' without bias' if self.bias is None else '' - + '({} -> {})'.format(self.inputsize, self.outputsize) + \ + ' without bias' if self.bias is None else '' diff --git a/torch/legacy/nn/Power.py b/torch/legacy/nn/Power.py index f86f5e6235..20b23baefd 100644 --- a/torch/legacy/nn/Power.py +++ b/torch/legacy/nn/Power.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class Power(Module): def __init__(self, p): @@ -17,4 +18,3 @@ class Power(Module): self.gradInput.pow_(self.pow - 1) self.gradInput.mul_(gradOutput).mul_(self.pow) return self.gradInput - diff --git a/torch/legacy/nn/RReLU.py b/torch/legacy/nn/RReLU.py index e1c9c83a52..237d927da7 100644 --- a/torch/legacy/nn/RReLU.py +++ b/torch/legacy/nn/RReLU.py @@ -2,9 +2,10 @@ import torch from .Module import Module from .utils import clear + class RReLU(Module): - def __init__(self, lower=1./8, upper=1./3, inplace=False): + def __init__(self, lower=1. / 8, upper=1. / 3, inplace=False): super(RReLU, self).__init__() self.lower = lower self.upper = upper @@ -48,4 +49,3 @@ class RReLU(Module): def clearState(self): clear(self, 'noise') return super(RReLU, self).clearState() - diff --git a/torch/legacy/nn/ReLU.py b/torch/legacy/nn/ReLU.py index 617ade9ba8..2674f47cf9 100644 --- a/torch/legacy/nn/ReLU.py +++ b/torch/legacy/nn/ReLU.py @@ -1,8 +1,8 @@ import torch from .Threshold import Threshold + class ReLU(Threshold): def __init__(self, inplace=False): super(ReLU, self).__init__(0, 0, inplace) - diff --git a/torch/legacy/nn/ReLU6.py b/torch/legacy/nn/ReLU6.py index d833d6139f..cb8b59d2b5 100644 --- a/torch/legacy/nn/ReLU6.py +++ b/torch/legacy/nn/ReLU6.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class ReLU6(Module): def __init__(self, inplace=False): @@ -16,7 +17,6 @@ class ReLU6(Module): ) return self.output - def updateGradInput(self, input, gradOutput): self._backend.HardTanh_updateGradInput( self._backend.library_state, @@ -26,4 +26,3 @@ class ReLU6(Module): 0, 6, self.inplace ) return self.gradInput - diff --git a/torch/legacy/nn/Replicate.py b/torch/legacy/nn/Replicate.py index 3923b06bc4..10f4d80884 100644 --- a/torch/legacy/nn/Replicate.py +++ b/torch/legacy/nn/Replicate.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class Replicate(Module): def __init__(self, nf, dim=0): diff --git a/torch/legacy/nn/Reshape.py b/torch/legacy/nn/Reshape.py index 49c7029736..23d5ad9b8e 100644 --- a/torch/legacy/nn/Reshape.py +++ b/torch/legacy/nn/Reshape.py @@ -23,7 +23,7 @@ class Reshape(Module): def updateOutput(self, input): if not input.is_contiguous(): if self._input is None: - self._input = input.new() + self._input = input.new() self._input.resize_as_(input) self._input.copy_(input) input = self._input @@ -36,7 +36,7 @@ class Reshape(Module): def updateGradInput(self, input, gradOutput): if not gradOutput.is_contiguous(): if self._gradOutput is None: - self._gradOutput = gradOutput.new() + self._gradOutput = gradOutput.new() self._gradOutput.resize_as_(gradOutput) self._gradOutput.copy_(gradOutput) gradOutput = self._gradOutput @@ -46,7 +46,7 @@ class Reshape(Module): def __repr__(self): return super(Reshape, self).__repr__() + \ - '({})'.format('x'.join(map(lambda x: str(x), self.size))) + '({})'.format('x'.join(map(lambda x: str(x), self.size))) def clearState(self): clear(self, '_input', '_gradOutput') diff --git a/torch/legacy/nn/Select.py b/torch/legacy/nn/Select.py index cb6d77fb51..287cb000e4 100644 --- a/torch/legacy/nn/Select.py +++ b/torch/legacy/nn/Select.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class Select(Module): def __init__(self, dimension, index): @@ -20,4 +21,3 @@ class Select(Module): self.gradInput.zero_() self.gradInput.select(self.dimension, index).copy_(gradOutput) return self.gradInput - diff --git a/torch/legacy/nn/SelectTable.py b/torch/legacy/nn/SelectTable.py index fe6e8b3585..7389a33bc3 100644 --- a/torch/legacy/nn/SelectTable.py +++ b/torch/legacy/nn/SelectTable.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import recursiveCopy + class SelectTable(Module): def __init__(self, index): @@ -48,7 +49,5 @@ class SelectTable(Module): del self.output[:] return super(SelectTable, self).type(type, tensorCache) - def __repr__(self): return super(SelectTable, self).__repr__() + '({})'.format(self.index) - diff --git a/torch/legacy/nn/Sequential.py b/torch/legacy/nn/Sequential.py index 04974e61c4..e3c4a0034a 100644 --- a/torch/legacy/nn/Sequential.py +++ b/torch/legacy/nn/Sequential.py @@ -1,6 +1,7 @@ import torch from .Container import Container + class Sequential(Container): def __len__(self): @@ -8,7 +9,7 @@ class Sequential(Container): def add(self, module): if len(self.modules) == 0: - self.gradInput = module.gradInput + self.gradInput = module.gradInput self.modules.append(module) self.output = module.output @@ -75,12 +76,11 @@ class Sequential(Container): res = 'nn.Sequential' res = res + ' {' + line + tab + '[input' for i in range(len(self.modules)): - res = res + next + '(' + str(i) + ')' + res = res + next + '(' + str(i) + ')' res = res + next + 'output]' for i in range(len(self.modules)): - res = res + line + tab + '(' + str(i) + '): ' + str(self.modules[i]).replace(line, line + tab) + res = res + line + tab + '(' + str(i) + '): ' + str(self.modules[i]).replace(line, line + tab) res = res + line + '}' return res - diff --git a/torch/legacy/nn/Sigmoid.py b/torch/legacy/nn/Sigmoid.py index 40d849f61b..6e6343e6b0 100644 --- a/torch/legacy/nn/Sigmoid.py +++ b/torch/legacy/nn/Sigmoid.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class Sigmoid(Module): def updateOutput(self, input): @@ -20,4 +21,3 @@ class Sigmoid(Module): self.output ) return self.gradInput - diff --git a/torch/legacy/nn/SmoothL1Criterion.py b/torch/legacy/nn/SmoothL1Criterion.py index 04748b0545..b16309e6d1 100644 --- a/torch/legacy/nn/SmoothL1Criterion.py +++ b/torch/legacy/nn/SmoothL1Criterion.py @@ -1,6 +1,7 @@ import torch from .Criterion import Criterion + class SmoothL1Criterion(Criterion): def __init__(self, sizeAverage=True): @@ -10,7 +11,7 @@ class SmoothL1Criterion(Criterion): def updateOutput(self, input, target): if self.output_tensor is None: - self.output_tensor = input.new(1) + self.output_tensor = input.new(1) self._backend.SmoothL1Criterion_updateOutput( self._backend.library_state, input, @@ -30,4 +31,3 @@ class SmoothL1Criterion(Criterion): self.sizeAverage ) return self.gradInput - diff --git a/torch/legacy/nn/SoftMarginCriterion.py b/torch/legacy/nn/SoftMarginCriterion.py index 1e8a89f731..612e4e0c03 100644 --- a/torch/legacy/nn/SoftMarginCriterion.py +++ b/torch/legacy/nn/SoftMarginCriterion.py @@ -1,6 +1,7 @@ import torch from .Criterion import Criterion + class SoftMarginCriterion(Criterion): def __init__(self, ): @@ -10,7 +11,7 @@ class SoftMarginCriterion(Criterion): def updateOutput(self, input, target): if self.output_tensor is None: - self.output_tensor = input.new(1) + self.output_tensor = input.new(1) self._backend.SoftMarginCriterion_updateOutput( self._backend.library_state, input, @@ -30,4 +31,3 @@ class SoftMarginCriterion(Criterion): self.sizeAverage ) return self.gradInput - diff --git a/torch/legacy/nn/SoftMax.py b/torch/legacy/nn/SoftMax.py index e924554624..24d5fa5967 100644 --- a/torch/legacy/nn/SoftMax.py +++ b/torch/legacy/nn/SoftMax.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class SoftMax(Module): def updateOutput(self, input): @@ -20,4 +21,3 @@ class SoftMax(Module): self.output ) return self.gradInput - diff --git a/torch/legacy/nn/SoftMin.py b/torch/legacy/nn/SoftMin.py index a6e8737fe7..7c1bbbff3f 100644 --- a/torch/legacy/nn/SoftMin.py +++ b/torch/legacy/nn/SoftMin.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class SoftMin(Module): def __init__(self): @@ -10,7 +11,7 @@ class SoftMin(Module): def updateOutput(self, input): if self.mininput is None: - self.mininput = input.new() + self.mininput = input.new() self.mininput.resize_as_(input).copy_(input).mul_(-1) self._backend.SoftMax_updateOutput( self._backend.library_state, @@ -21,7 +22,7 @@ class SoftMin(Module): def updateGradInput(self, input, gradOutput): if self.mininput is None: - self.mininput = input.new() + self.mininput = input.new() self.mininput.resize_as_(input).copy_(input).mul_(-1) self._backend.SoftMax_updateGradInput( self._backend.library_state, @@ -37,4 +38,3 @@ class SoftMin(Module): def clearState(self): clear(self, 'mininput') return super(SoftMin, self).clearState() - diff --git a/torch/legacy/nn/SoftPlus.py b/torch/legacy/nn/SoftPlus.py index b8f46d030a..854bc8d4fe 100644 --- a/torch/legacy/nn/SoftPlus.py +++ b/torch/legacy/nn/SoftPlus.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class SoftPlus(Module): def __init__(self, beta=1): @@ -35,4 +36,3 @@ class SoftPlus(Module): self.threshold ) return self.gradInput - diff --git a/torch/legacy/nn/SoftShrink.py b/torch/legacy/nn/SoftShrink.py index b663c54f47..a3ac316650 100644 --- a/torch/legacy/nn/SoftShrink.py +++ b/torch/legacy/nn/SoftShrink.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class SoftShrink(Module): def __init__(self, lambd=0.5): @@ -25,4 +26,3 @@ class SoftShrink(Module): self.lambd ) return self.gradInput - diff --git a/torch/legacy/nn/SoftSign.py b/torch/legacy/nn/SoftSign.py index c5e1bcb2aa..9aa58c1f7b 100644 --- a/torch/legacy/nn/SoftSign.py +++ b/torch/legacy/nn/SoftSign.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class SoftSign(Module): def __init__(self): @@ -11,14 +12,14 @@ class SoftSign(Module): def updateOutput(self, input): if self.temp is None: - self.temp = input.new() + self.temp = input.new() self.temp.resize_as_(input).copy_(input).abs_().add_(1) self.output.resize_as_(input).copy_(input).div_(self.temp) return self.output def updateGradInput(self, input, gradOutput): if self.tempgrad is None: - self.tempgrad = input.new() + self.tempgrad = input.new() self.tempgrad.resize_as_(self.output).copy_(input).abs_().add_(1).mul_(self.tempgrad) self.gradInput.resize_as_(input).copy_(gradOutput).div_(self.tempgrad) return self.gradInput @@ -26,4 +27,3 @@ class SoftSign(Module): def clearState(self): clear(self, 'temp', 'tempgrad') return super(SoftSign, self).clearState() - diff --git a/torch/legacy/nn/SpatialAdaptiveMaxPooling.py b/torch/legacy/nn/SpatialAdaptiveMaxPooling.py index f97a849e54..b8ed87492c 100644 --- a/torch/legacy/nn/SpatialAdaptiveMaxPooling.py +++ b/torch/legacy/nn/SpatialAdaptiveMaxPooling.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class SpatialAdaptiveMaxPooling(Module): def __init__(self, w, h): @@ -12,7 +13,7 @@ class SpatialAdaptiveMaxPooling(Module): def updateOutput(self, input): if self.indices is None: - self.indices = input.new() + self.indices = input.new() self.indices = self.indices.long() self._backend.SpatialAdaptiveMaxPooling_updateOutput( self._backend.library_state, @@ -37,4 +38,3 @@ class SpatialAdaptiveMaxPooling(Module): def clearState(self): clear(self, 'indices') return super(SpatialAdaptiveMaxPooling, self).clearState() - diff --git a/torch/legacy/nn/SpatialAveragePooling.py b/torch/legacy/nn/SpatialAveragePooling.py index 7be1b6b8e0..acf4c64083 100644 --- a/torch/legacy/nn/SpatialAveragePooling.py +++ b/torch/legacy/nn/SpatialAveragePooling.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class SpatialAveragePooling(Module): def __init__(self, kW, kH, dW=1, dH=1, padW=0, padH=0): @@ -46,7 +47,7 @@ class SpatialAveragePooling(Module): # for backward compatibility with saved models # which are not supposed to have "divide" field if not self.divide: - self.output.mul_(self.kW*self.kH) + self.output.mul_(self.kW * self.kH) return self.output @@ -65,7 +66,7 @@ class SpatialAveragePooling(Module): ) # for backward compatibility if not self.divide: - self.gradInput.mul_(self.kW*self.kH) + self.gradInput.mul_(self.kW * self.kH) return self.gradInput @@ -76,4 +77,3 @@ class SpatialAveragePooling(Module): s += ', {}, {}'.format(self.padW, self.padH) s += ')' return s - diff --git a/torch/legacy/nn/SpatialBatchNormalization.py b/torch/legacy/nn/SpatialBatchNormalization.py index 3fc70ed0dd..725ebfffc6 100644 --- a/torch/legacy/nn/SpatialBatchNormalization.py +++ b/torch/legacy/nn/SpatialBatchNormalization.py @@ -1,6 +1,7 @@ import torch from .BatchNormalization import BatchNormalization + class SpatialBatchNormalization(BatchNormalization): """ This class implements Batch Normalization as described in the paper: diff --git a/torch/legacy/nn/SpatialClassNLLCriterion.py b/torch/legacy/nn/SpatialClassNLLCriterion.py index af1223ad53..95b9b5d084 100644 --- a/torch/legacy/nn/SpatialClassNLLCriterion.py +++ b/torch/legacy/nn/SpatialClassNLLCriterion.py @@ -1,6 +1,7 @@ import torch from .Criterion import Criterion + class SpatialClassNLLCriterion(Criterion): def __init__(self, weights=None, sizeAverage=True): diff --git a/torch/legacy/nn/SpatialContrastiveNormalization.py b/torch/legacy/nn/SpatialContrastiveNormalization.py index 35e9e0bafc..d7e9251707 100644 --- a/torch/legacy/nn/SpatialContrastiveNormalization.py +++ b/torch/legacy/nn/SpatialContrastiveNormalization.py @@ -4,6 +4,7 @@ from .Sequential import Sequential from .SpatialSubtractiveNormalization import SpatialSubtractiveNormalization from .SpatialDivisiveNormalization import SpatialDivisiveNormalization + class SpatialContrastiveNormalization(Module): def __init__(self, nInputPlane=1, kernel=None, threshold=1e-4, thresval=1e-4): @@ -12,23 +13,23 @@ class SpatialContrastiveNormalization(Module): # get args self.nInputPlane = nInputPlane if kernel is None: - self.kernel = torch.Tensor(9, 9).fill_(1) + self.kernel = torch.Tensor(9, 9).fill_(1) self.threshold = threshold self.thresval = thresval or threshold kdim = self.kernel.ndimension() # check args if kdim != 2 and kdim != 1: - raise ValueError('SpatialContrastiveNormalization averaging kernel must be 2D or 1D') + raise ValueError('SpatialContrastiveNormalization averaging kernel must be 2D or 1D') if self.kernel.size(0) % 2 == 0 or (kdim == 2 and (self.kernel.size(1) % 2) == 0): - raise ValueError('SpatialContrastiveNormalization averaging kernel must have ODD dimensions') + raise ValueError('SpatialContrastiveNormalization averaging kernel must have ODD dimensions') # instantiate sub+div normalization self.normalizer = Sequential() self.normalizer.add(SpatialSubtractiveNormalization(self.nInputPlane, self.kernel)) self.normalizer.add(SpatialDivisiveNormalization(self.nInputPlane, self.kernel, - self.threshold, self.thresval)) + self.threshold, self.thresval)) def updateOutput(self, input): self.output = self.normalizer.forward(input) @@ -37,4 +38,3 @@ class SpatialContrastiveNormalization(Module): def updateGradInput(self, input, gradOutput): self.gradInput = self.normalizer.backward(input, gradOutput) return self.gradInput - diff --git a/torch/legacy/nn/SpatialConvolution.py b/torch/legacy/nn/SpatialConvolution.py index aa9b4c1b6a..d5d8163128 100644 --- a/torch/legacy/nn/SpatialConvolution.py +++ b/torch/legacy/nn/SpatialConvolution.py @@ -3,6 +3,7 @@ import torch from .Module import Module from .utils import clear + class SpatialConvolution(Module): def __init__(self, nInputPlane, nOutputPlane, kW, kH, dW=1, dH=1, padW=0, padH=None): @@ -36,9 +37,9 @@ class SpatialConvolution(Module): def reset(self, stdv=None): if stdv is not None: - stdv = stdv * math.sqrt(3) + stdv = stdv * math.sqrt(3) else: - stdv = 1. / math.sqrt(self.kW*self.kH*self.nInputPlane) + stdv = 1. / math.sqrt(self.kW * self.kH * self.nInputPlane) self.weight.uniform_(-stdv, stdv) if self.bias is not None: @@ -46,15 +47,15 @@ class SpatialConvolution(Module): def _makeContiguous(self, input, gradOutput=None): if not input.is_contiguous(): - if self._input is None: - self._input = input.new() - self._input.resize_as_(input).copy_(input) - input = self._input + if self._input is None: + self._input = input.new() + self._input.resize_as_(input).copy_(input) + input = self._input if gradOutput is not None: if not gradOutput.is_contiguous(): if self._gradOutput is None: - self._gradOutput = gradOutput.new() + self._gradOutput = gradOutput.new() self._gradOutput.resize_as_(gradOutput).copy_(gradOutput) gradOutput = self._gradOutput return input, gradOutput @@ -97,7 +98,6 @@ class SpatialConvolution(Module): self._unviewWeight() return self.output - def updateGradInput(self, input, gradOutput): if self.gradInput is None: return @@ -157,10 +157,9 @@ class SpatialConvolution(Module): s += ')' if self.bias is None: - s += ' without bias' + s += ' without bias' return s def clearState(self): clear(self, 'finput', 'fgradInput', '_input', '_gradOutput') return super(SpatialConvolution, self).clearState() - diff --git a/torch/legacy/nn/SpatialConvolutionLocal.py b/torch/legacy/nn/SpatialConvolutionLocal.py index cdc5d4a395..c87ff5f671 100644 --- a/torch/legacy/nn/SpatialConvolutionLocal.py +++ b/torch/legacy/nn/SpatialConvolutionLocal.py @@ -3,9 +3,10 @@ import torch from .Module import Module from .utils import clear + class SpatialConvolutionLocal(Module): - def __init__(self, nInputPlane, nOutputPlane, iW, iH ,kW, kH, dW=1, dH=1, padW=0, padH=None): + def __init__(self, nInputPlane, nOutputPlane, iW, iH, kW, kH, dW=1, dH=1, padW=0, padH=None): super(SpatialConvolutionLocal, self).__init__() self.nInputPlane = nInputPlane @@ -34,19 +35,19 @@ class SpatialConvolutionLocal(Module): def reset(self, stdv=None): if stdv is not None: - stdv = stdv * math.sqrt(3) + stdv = stdv * math.sqrt(3) else: - stdv = 1. / math.sqrt(self.kW*self.kH*self.nInputPlane) + stdv = 1. / math.sqrt(self.kW * self.kH * self.nInputPlane) self.weight.uniform_(-stdv, stdv) self.bias.uniform_(-stdv, stdv) def _makeContiguous(self, input, gradOutput=None): if not input.is_contiguous(): - if self._input is None: - self._input = input.new() - self._input.resize_as_(input).copy_(input) - input = self._input + if self._input is None: + self._input = input.new() + self._input.resize_as_(input).copy_(input) + input = self._input if gradOutput is not None: if not gradOutput.is_contiguous(): @@ -61,22 +62,24 @@ class SpatialConvolutionLocal(Module): def _viewWeight(self): self.weight = self.weight.view(self.oH * self.oW, self.nOutputPlane, self.nInputPlane * self.kH * self.kW) if self.gradWeight is not None and self.gradWeight.dim() > 0: - self.gradWeight = self.gradWeight.view(self.oH * self.oW, self.nOutputPlane, self.nInputPlane * self.kH * self.kW) + self.gradWeight = self.gradWeight.view( + self.oH * self.oW, self.nOutputPlane, self.nInputPlane * self.kH * self.kW) def _unviewWeight(self): self.weight = self.weight.view(self.oH, self.oW, self.nOutputPlane, self.nInputPlane, self.kH, self.kW) if self.gradWeight is not None and self.gradWeight.dim() > 0: - self.gradWeight = self.gradWeight.view(self.oH, self.oW, self.nOutputPlane, self.nInputPlane, self.kH, self.kW) + self.gradWeight = self.gradWeight.view( + self.oH, self.oW, self.nOutputPlane, self.nInputPlane, self.kH, self.kW) def _checkInputSize(self, input): if input.ndimension() == 3: if input.size(0) != self.nInputPlane or input.size(1) != self.iH or input.size(1) != self.iW: raise RuntimeError('Given input size: ({}x{}x{}) inconsistent with expected input size: ({}x{}x{}).'.format( - input.size(0), input.size(1), input.size(2), self.nInputPlane, self.iH, self.iW)) + input.size(0), input.size(1), input.size(2), self.nInputPlane, self.iH, self.iW)) elif input.ndimension() == 4: if input.size(1) != self.nInputPlane or input.size(2) != self.iH or input.size(3) != self.iW: raise RuntimeError('Given input size: ({}x{}x{}x{}) inconsistent with expected input size: (*x{}x{}x{}).'.format( - input.size(0), input.size(1), input.size(2), input.size(3), self.nInputPlane, self.iH, self.iW)) + input.size(0), input.size(1), input.size(2), input.size(3), self.nInputPlane, self.iH, self.iW)) else: raise RuntimeError('3D or 4D (batch mode) tensor expected') @@ -87,19 +90,19 @@ class SpatialConvolutionLocal(Module): if output.ndimension() == 3: if output.size(0) != self.nOutputPlane or output.size(1) != self.oH or output.size(2) != self.oW: raise RuntimeError('Given output size: ({}x{}x{}) inconsistent with expected output size: ({}x{}x{}).'.format( - output.size(0), output.size(1), output.size(2), self.nOutputPlane, self.oH, self.oW)) + output.size(0), output.size(1), output.size(2), self.nOutputPlane, self.oH, self.oW)) elif output.ndimension() == 4: if output.size(1) != self.nOutputPlane or output.size(2) != self.oH or output.size(3) != self.oW: raise RuntimeError('Given output size: ({}x{}x{}x{}) inconsistent with expected output size: (batchsize x{}x{}x{}).'.format( - output.size(0), output.size(1), output.size(2), output.size(3), self.nOutputPlane, self.oH, self.oW)) + output.size(0), output.size(1), output.size(2), output.size(3), self.nOutputPlane, self.oH, self.oW)) else: raise RuntimeError('3D or 4D(batch mode) tensor expected') def updateOutput(self, input): if self.finput is None: - self.finput = input.new() + self.finput = input.new() if self.fgradInput is None: - self.fgradInput = input.new() + self.fgradInput = input.new() self._checkInputSize(input) self._viewWeight() input = self._makeContiguous(input) @@ -190,4 +193,3 @@ class SpatialConvolutionLocal(Module): def clearState(self): clear(self, 'finput', 'fgradInput', '_input', '_gradOutput') return super(SpatialConvolutionLocal, self).clearState() - diff --git a/torch/legacy/nn/SpatialConvolutionMap.py b/torch/legacy/nn/SpatialConvolutionMap.py index 7f4f7e6b22..e901140a52 100644 --- a/torch/legacy/nn/SpatialConvolutionMap.py +++ b/torch/legacy/nn/SpatialConvolutionMap.py @@ -5,13 +5,14 @@ from .Module import Module # TODO fix THNN... + class SpatialConvolutionMap(Module): class maps(object): @staticmethod def full(nin, nout): - ft = torch.Tensor(nin*nout, 2) + ft = torch.Tensor(nin * nout, 2) p = 0 for j in range(nout): for i in range(nin): @@ -34,19 +35,19 @@ class SpatialConvolutionMap(Module): tbl = torch.Tensor(nker, 2) fi = torch.randperm(nin) frcntr = 0 - nfi = math.floor(nin / nto) # number of distinct nto chunks + nfi = math.floor(nin / nto) # number of distinct nto chunks totbl = tbl.select(1, 1) frtbl = tbl.select(1, 0) - fitbl = fi.narrow(0, 0, (nfi * nto)) # part of fi that covers distinct chunks + fitbl = fi.narrow(0, 0, (nfi * nto)) # part of fi that covers distinct chunks ufrtbl = frtbl.unfold(0, nto, nto) utotbl = totbl.unfold(0, nto, nto) ufitbl = fitbl.unfold(0, nto, nto) # start fill_ing frtbl - for i in range(nout): # fro each unit in target map + for i in range(nout): # fro each unit in target map ufrtbl.select(0, i).copy_(ufitbl.select(0, frcntr)) frcntr += 1 - if frcntr-1 == nfi: # reset fi + if frcntr - 1 == nfi: # reset fi fi.copy_(torch.randperm(nin)) frcntr = 1 @@ -80,11 +81,11 @@ class SpatialConvolutionMap(Module): else: ninp = torch.Tensor(self.nOutputPlane).zero_() for i in range(self.connTable.size(0)): - idx = int(self.connTable[i,1]) + idx = int(self.connTable[i, 1]) ninp[idx] += 1 for k in range(self.connTable.size(0)): - idx = int(self.connTable[k,1]) - stdv = 1. / math.sqrt(self.kW*self.kH*ninp[idx]) + idx = int(self.connTable[k, 1]) + stdv = 1. / math.sqrt(self.kW * self.kH * ninp[idx]) self.weight.select(0, k).uniform_(-stdv, stdv) for k in range(self.bias.size(0)): stdv = 1. / math.sqrt(self.kW * self.kH * ninp[k]) @@ -133,4 +134,3 @@ class SpatialConvolutionMap(Module): self.dW, self.dH, scale ) - diff --git a/torch/legacy/nn/SpatialCrossMapLRN.py b/torch/legacy/nn/SpatialCrossMapLRN.py index 7fa34c92d2..4b7402a46d 100644 --- a/torch/legacy/nn/SpatialCrossMapLRN.py +++ b/torch/legacy/nn/SpatialCrossMapLRN.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class SpatialCrossMapLRN(Module): def __init__(self, size, alpha=1e-4, beta=0.75, k=1): @@ -19,7 +20,7 @@ class SpatialCrossMapLRN(Module): assert input.dim() == 4 if self.scale is None: - self.scale = input.new() + self.scale = input.new() if input.type() == 'torch.cuda.FloatTensor': self._backend.SpatialCrossMapLRN_updateOutput( self._backend.library_state, @@ -32,10 +33,10 @@ class SpatialCrossMapLRN(Module): self.k ) else: - batchSize = input.size(0) - channels = input.size(1) + batchSize = input.size(0) + channels = input.size(1) inputHeight = input.size(2) - inputWidth = input.size(3) + inputWidth = input.size(3) self.output.resize_as_(input) self.scale.resize_as_(input) @@ -44,7 +45,7 @@ class SpatialCrossMapLRN(Module): inputSquare = self.output torch.pow(input, 2, out=inputSquare) - prePad = int((self.size - 1)/2 + 1) + prePad = int((self.size - 1) / 2 + 1) prePadCrop = channels if prePad > channels else prePad scaleFirst = self.scale.select(1, 0) @@ -57,10 +58,10 @@ class SpatialCrossMapLRN(Module): # by adding the next feature map and removing the previous for c in range(1, channels): scalePrevious = self.scale.select(1, c - 1) - scaleCurrent = self.scale.select(1, c) + scaleCurrent = self.scale.select(1, c) scaleCurrent.copy_(scalePrevious) if c < channels - prePad + 1: - squareNext = inputSquare.select(1, c + prePad - 1) + squareNext = inputSquare.select(1, c + prePad - 1) scaleCurrent.add_(1, squareNext) if c > prePad: @@ -91,15 +92,15 @@ class SpatialCrossMapLRN(Module): self.k ) else: - batchSize = input.size(0) - channels = input.size(1) + batchSize = input.size(0) + channels = input.size(1) inputHeight = input.size(2) - inputWidth = input.size(3) + inputWidth = input.size(3) if self.paddedRatio is None: - self.paddedRatio = input.new() + self.paddedRatio = input.new() if self.accumRatio is None: - self.accumRatio = input.new() + self.accumRatio = input.new() self.paddedRatio.resize_(channels + self.size - 1, inputHeight, inputWidth) self.accumRatio.resize_(inputHeight, inputWidth) @@ -114,9 +115,9 @@ class SpatialCrossMapLRN(Module): for n in range(batchSize): torch.mul(gradOutput[n], self.output[n], out=paddedRatioCenter) paddedRatioCenter.div_(self.scale[n]) - torch.sum(self.paddedRatio.narrow(0, 0,self.size-1), 0, out=self.accumRatio) + torch.sum(self.paddedRatio.narrow(0, 0, self.size - 1), 0, out=self.accumRatio) for c in range(channels): - self.accumRatio.add_(self.paddedRatio[c+self.size-1]) + self.accumRatio.add_(self.paddedRatio[c + self.size - 1]) self.gradInput[n][c].addcmul_(-cacheRatioValue, input[n][c], self.accumRatio) self.accumRatio.add_(-1, self.paddedRatio[c]) @@ -125,4 +126,3 @@ class SpatialCrossMapLRN(Module): def clearState(self): clear(self, 'scale', 'paddedRatio', 'accumRatio') return super(SpatialCrossMapLRN, self).clearState() - diff --git a/torch/legacy/nn/SpatialDilatedConvolution.py b/torch/legacy/nn/SpatialDilatedConvolution.py index 0953638af3..73056c8966 100644 --- a/torch/legacy/nn/SpatialDilatedConvolution.py +++ b/torch/legacy/nn/SpatialDilatedConvolution.py @@ -1,6 +1,7 @@ import torch from .SpatialConvolution import SpatialConvolution + class SpatialDilatedConvolution(SpatialConvolution): def __init__(self, nInputPlane, nOutputPlane, kW, kH, dW=1, dH=1, padW=0, padH=None, dilationH=1, dilationW=None): @@ -11,9 +12,9 @@ class SpatialDilatedConvolution(SpatialConvolution): def updateOutput(self, input): if self.finput is None: - self.finput = self.weight.new() + self.finput = self.weight.new() if self.fgradInput is None: - self.fgradInput = self.weight.new() + self.fgradInput = self.weight.new() input = self._makeContiguous(input) self._backend.SpatialDilatedConvolution_updateOutput( self._backend.library_state, @@ -36,7 +37,7 @@ class SpatialDilatedConvolution(SpatialConvolution): input, gradOutput = self._makeContiguous(input, gradOutput) if self.fgradInput is None: - self.fgradInput = self.weight.new() + self.fgradInput = self.weight.new() self._backend.SpatialDilatedConvolution_updateGradInput( self._backend.library_state, input, @@ -54,7 +55,7 @@ class SpatialDilatedConvolution(SpatialConvolution): def accGradParameters(self, input, gradOutput, scale=1): input, gradOutput = self._makeContiguous(input, gradOutput) if self.fgradInput is None: - self.fgradInput = self.weight.new() + self.fgradInput = self.weight.new() self._backend.SpatialDilatedConvolution_accGradParameters( self._backend.library_state, input, @@ -83,6 +84,5 @@ class SpatialDilatedConvolution(SpatialConvolution): s += ')' if self.bias is None: - s += ' without bias' + s += ' without bias' return s - diff --git a/torch/legacy/nn/SpatialDivisiveNormalization.py b/torch/legacy/nn/SpatialDivisiveNormalization.py index 262a4aacb6..7d1b7246df 100644 --- a/torch/legacy/nn/SpatialDivisiveNormalization.py +++ b/torch/legacy/nn/SpatialDivisiveNormalization.py @@ -35,10 +35,10 @@ class SpatialDivisiveNormalization(Module): raise ValueError('SpatialDivisiveNormalization averaging kernel must have ODD dimensions') # padding values - padH = int(math.floor(self.kernel.size(0)/2)) + padH = int(math.floor(self.kernel.size(0) / 2)) padW = padH if kdim == 2: - padW = int(math.floor(self.kernel.size(1)/2)) + padW = int(math.floor(self.kernel.size(1) / 2)) # create convolutional mean estimator self.meanestimator = Sequential() @@ -46,7 +46,8 @@ class SpatialDivisiveNormalization(Module): if kdim == 2: self.meanestimator.add(SpatialConvolution(self.nInputPlane, 1, self.kernel.size(1), self.kernel.size(0))) else: - self.meanestimator.add(SpatialConvolutionMap(SpatialConvolutionMap.maps.oneToOne(self.nInputPlane), self.kernel.size(0), 1)) + self.meanestimator.add(SpatialConvolutionMap( + SpatialConvolutionMap.maps.oneToOne(self.nInputPlane), self.kernel.size(0), 1)) self.meanestimator.add(SpatialConvolution(self.nInputPlane, 1, 1, self.kernel.size(0))) self.meanestimator.add(Replicate(self.nInputPlane, 1)) @@ -58,7 +59,8 @@ class SpatialDivisiveNormalization(Module): if kdim == 2: self.stdestimator.add(SpatialConvolution(self.nInputPlane, 1, self.kernel.size(1), self.kernel.size(0))) else: - self.stdestimator.add(SpatialConvolutionMap(SpatialContolutionMap.maps.oneToOne(self.nInputPlane), self.kernel.size(0), 1)) + self.stdestimator.add(SpatialConvolutionMap( + SpatialContolutionMap.maps.oneToOne(self.nInputPlane), self.kernel.size(0), 1)) self.stdestimator.add(SpatialConvolution(self.nInputPlane, 1, 1, self.kernel.size(0))) self.stdestimator.add(Replicate(self.nInputPlane, 1)) @@ -102,14 +104,14 @@ class SpatialDivisiveNormalization(Module): # compute side coefficients dim = input.dim() - if self.localstds.dim() != self.coef.dim() or (input.size(dim-1) != self.coef.size(dim-1)) or (input.size(dim-2) != self.coef.size(dim-2)): + if self.localstds.dim() != self.coef.dim() or (input.size(dim - 1) != self.coef.size(dim - 1)) or (input.size(dim - 2) != self.coef.size(dim - 2)): if self.ones is None: - self.ones = input.new() + self.ones = input.new() self.ones.resize_as_(input[0:1]).fill_(1) coef = self.meanestimator.updateOutput(self.ones).squeeze(0) if self._coef is None: - self._coef = input.new() - self._coef.resize_as_(coef).copy_(coef) # make contiguous for view + self._coef = input.new() + self._coef.resize_as_(coef).copy_(coef) # make contiguous for view self.coef = self._coef.view(1, *self._coef.size()).expand_as(self.localstds) # normalize std dev diff --git a/torch/legacy/nn/SpatialDropout.py b/torch/legacy/nn/SpatialDropout.py index 7cfb987adb..fc05fcf27f 100644 --- a/torch/legacy/nn/SpatialDropout.py +++ b/torch/legacy/nn/SpatialDropout.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class SpatialDropout(Module): def __init__(self, p=0.5): @@ -18,20 +19,20 @@ class SpatialDropout(Module): else: raise RuntimeError('Input must be 4D (nbatch, nfeat, h, w)') - self.noise.bernoulli_(1-self.p) + self.noise.bernoulli_(1 - self.p) # We expand the random dropouts to the entire feature map because the # features are likely correlated accross the map and so the dropout # should also be correlated. self.output.mul_(self.noise.expand_as(input)) else: - self.output.mul_(1-self.p) + self.output.mul_(1 - self.p) return self.output def updateGradInput(self, input, gradOutput): if self.train: self.gradInput.resize_as_(gradOutput).copy_(gradOutput) - self.gradInput.mul_(self.noise.expand_as(input)) # simply mask the gradients with the noise vector + self.gradInput.mul_(self.noise.expand_as(input)) # simply mask the gradients with the noise vector else: raise RuntimeError('backprop only defined while training') @@ -46,4 +47,3 @@ class SpatialDropout(Module): def clearState(self): clear(self, 'noise') return super(SpatialDropout, self).clearState() - diff --git a/torch/legacy/nn/SpatialFractionalMaxPooling.py b/torch/legacy/nn/SpatialFractionalMaxPooling.py index 9888b842de..9bc55b7bb2 100644 --- a/torch/legacy/nn/SpatialFractionalMaxPooling.py +++ b/torch/legacy/nn/SpatialFractionalMaxPooling.py @@ -2,6 +2,7 @@ import math import torch from .Module import Module + class SpatialFractionalMaxPooling(Module): # Usage: # nn.SpatialFractionalMaxPooling(poolSizeW, poolSizeH, outW, outH) @@ -34,21 +35,21 @@ class SpatialFractionalMaxPooling(Module): self.indices = None if arg1 >= 1 and arg2 >= 1: - # Desired output size: the input tensor will determine the reduction - # ratio - self.outW = arg1 - self.outH = arg2 - self.ratioW = self.ratioH = None + # Desired output size: the input tensor will determine the reduction + # ratio + self.outW = arg1 + self.outH = arg2 + self.ratioW = self.ratioH = None else: - # Reduction ratio specified per each input - # This is the reduction ratio that we use - self.ratioW = arg1 - self.ratioH = arg2 - self.outW = self.outH = None + # Reduction ratio specified per each input + # This is the reduction ratio that we use + self.ratioW = arg1 + self.ratioH = arg2 + self.outW = self.outH = None - # The reduction ratio must be between 0 and 1 - assert self.ratioW > 0 and self.ratioW < 1 - assert self.ratioH > 0 and self.ratioH < 1 + # The reduction ratio must be between 0 and 1 + assert self.ratioW > 0 and self.ratioW < 1 + assert self.ratioH > 0 and self.ratioH < 1 def _getBufferSize(self, input): assert input.ndimension() == 4 @@ -57,7 +58,6 @@ class SpatialFractionalMaxPooling(Module): return torch.Size([batchSize, planeSize, 2]) - def _initSampleBuffer(self, input): sampleBufferSize = self._getBufferSize(input) @@ -93,7 +93,7 @@ class SpatialFractionalMaxPooling(Module): def updateOutput(self, input): if self.indices is None: - self.indices = input.new() + self.indices = input.new() self.indices = self.indices.long() self._initSampleBuffer(input) outW, outH = self._getOutputSizes(input) @@ -130,6 +130,6 @@ class SpatialFractionalMaxPooling(Module): def __repr__(self): return super(SpatialFractionalMaxPooling, self).__repr__() + \ - '({}x{}, {}, {})'.format(self.outW or self.ratioW, - self.outH or self.ratioH, - self.poolSizeW, self.poolSizeH) + '({}x{}, {}, {})'.format(self.outW or self.ratioW, + self.outH or self.ratioH, + self.poolSizeW, self.poolSizeH) diff --git a/torch/legacy/nn/SpatialFullConvolution.py b/torch/legacy/nn/SpatialFullConvolution.py index 212ea4401c..a230dba646 100644 --- a/torch/legacy/nn/SpatialFullConvolution.py +++ b/torch/legacy/nn/SpatialFullConvolution.py @@ -3,6 +3,7 @@ import torch from .Module import Module from .utils import clear + class SpatialFullConvolution(Module): def __init__(self, nInputPlane, nOutputPlane, kW, kH, dW=1, dH=1, padW=0, padH=None, adjW=0, adjH=0): @@ -46,7 +47,7 @@ class SpatialFullConvolution(Module): nInputPlane = self.nInputPlane kH = self.kH kW = self.kW - stdv = 1/math.sqrt(kW*kH*nInputPlane) + stdv = 1 / math.sqrt(kW * kH * nInputPlane) self.weight.uniform_(-stdv, stdv) if self.bias is not None: @@ -54,15 +55,15 @@ class SpatialFullConvolution(Module): def _makeContiguous(self, input, gradOutput=None): if not input.is_contiguous(): - if self._input is None: - self._input = input.new() - self._input.resize_as_(input).copy_(input) - input = self._input + if self._input is None: + self._input = input.new() + self._input.resize_as_(input).copy_(input) + input = self._input if gradOutput is not None: if not gradOutput.is_contiguous(): if self._gradOutput is None: - self._gradOutput = gradOutput.new() + self._gradOutput = gradOutput.new() self._gradOutput.resize_as_(gradOutput).copy_(gradOutput) gradOutput = self._gradOutput return input, gradOutput @@ -82,20 +83,19 @@ class SpatialFullConvolution(Module): inputTensor = input[0] targetTensor = input[1] tDims = targetTensor.dim() - tH = targetTensor.size(tDims-2) - tW = targetTensor.size(tDims-1) + tH = targetTensor.size(tDims - 2) + tW = targetTensor.size(tDims - 1) adjW = self._calculateAdj(tW, self.kW, self.padW, self.dW) adjH = self._calculateAdj(tH, self.kH, self.padH, self.dH) if self.finput is None: - self.finput = input[0].new() + self.finput = input[0].new() if self.fgradInput is None: - self.fgradInput = input[0].new() + self.fgradInput = input[0].new() else: if self.finput is None: - self.finput = input.new() + self.finput = input.new() if self.fgradInput is None: - self.fgradInput = input.new() - + self.fgradInput = input.new() inputTensor = self._makeContiguous(inputTensor) self._backend.SpatialFullConvolution_updateOutput( @@ -125,8 +125,8 @@ class SpatialFullConvolution(Module): inputTensor = input[0] targetTensor = input[1] tDims = targetTensor.dim() - tH = targetTensor.size(tDims-2) - tW = targetTensor.size(tDims-1) + tH = targetTensor.size(tDims - 2) + tW = targetTensor.size(tDims - 1) adjW = self._calculateAdj(tW, self.kW, self.padW, self.dW) adjH = self._calculateAdj(tH, self.kH, self.padH, self.dH) # Momentarily extract the gradInput tensor @@ -150,9 +150,9 @@ class SpatialFullConvolution(Module): if isinstance(input, list): # Create a zero tensor to be expanded and used as gradInput[1]. if self.zeroScalar is None: - self.zeroScalar = input[1].new(1).zero_() + self.zeroScalar = input[1].new(1).zero_() self.ones.resize_(input[1].dim()).fill_(1) - zeroTensor = self.zeroScalar.view_as(self.ones).expand_as(input[1]) + zeroTensor = self.zeroScalar.view_as(self.ones).expand_as(input[1]) self.gradInput = [self.gradInput, zeroTensor] return self.gradInput @@ -167,8 +167,8 @@ class SpatialFullConvolution(Module): inputTensor = input[0] targetTensor = input[1] tDims = targetTensor.dim() - tH = targetTensor.size(tDims-2) - tW = targetTensor.size(tDims-1) + tH = targetTensor.size(tDims - 2) + tW = targetTensor.size(tDims - 1) adjW = calculateAdj(tW, self.kW, self.padW, self.dW) adjH = calculateAdj(tH, self.kH, self.padH, self.dH) @@ -215,5 +215,3 @@ class SpatialFullConvolution(Module): def clearState(self): clear(self, 'finput', 'fgradInput', '_input', '_gradOutput') return super(SpatialFullConvolution, self).clearState() - - diff --git a/torch/legacy/nn/SpatialFullConvolutionMap.py b/torch/legacy/nn/SpatialFullConvolutionMap.py index 9e2309977e..b4981f3fd5 100644 --- a/torch/legacy/nn/SpatialFullConvolutionMap.py +++ b/torch/legacy/nn/SpatialFullConvolutionMap.py @@ -3,6 +3,7 @@ import math import torch from .Module import Module + class SpatialFullConvolutionMap(Module): def __init__(self, conMatrix, kW, kH, dW=1, dH=1): @@ -36,10 +37,10 @@ class SpatialFullConvolutionMap(Module): ninp[idx] += 1 for k in range(self.connTable.size(0)): idx = int(self.connTable[k][1]) - stdv = 1. / math.sqrt(self.kW*self.kH*ninp[idx]) + stdv = 1. / math.sqrt(self.kW * self.kH * ninp[idx]) self.weight[k].uniform_(-stdv, stdv) for k in range(self.bias.size(0)): - stdv = 1. / math.sqrt(self.kW*self.kH*ninp[k]) + stdv = 1. / math.sqrt(self.kW * self.kH * ninp[k]) # TODO: torch.uniform self.bias[k] = random.uniform(-stdv, stdv) @@ -57,7 +58,6 @@ class SpatialFullConvolutionMap(Module): ) return self.output - def updateGradInput(self, input, gradOutput): self._backend.SpatialFullConvolutionMap_updateGradInput( self._backend.library_state, @@ -73,7 +73,6 @@ class SpatialFullConvolutionMap(Module): ) return self.gradInput - def accGradParameters(self, input, gradOutput, scale=1): self._backend.SpatialFullConvolutionMap_accGradParameters( self._backend.library_state, @@ -87,4 +86,3 @@ class SpatialFullConvolutionMap(Module): self.dW, self.dH, scale ) - diff --git a/torch/legacy/nn/SpatialLPPooling.py b/torch/legacy/nn/SpatialLPPooling.py index 82b8f04a66..cf84593da1 100644 --- a/torch/legacy/nn/SpatialLPPooling.py +++ b/torch/legacy/nn/SpatialLPPooling.py @@ -7,6 +7,7 @@ from .SpatialAveragePooling import SpatialAveragePooling from .MulConstant import MulConstant from .Sqrt import Sqrt + class SpatialLPPooling(Sequential): def __init__(self, nInputPlane, pnorm, kW, kH, dW=None, dH=None): @@ -21,16 +22,16 @@ class SpatialLPPooling(Sequential): self.dH = dH if pnorm == 2: - self.add(Square()) + self.add(Square()) else: - self.add(Power(pnorm)) + self.add(Power(pnorm)) self.add(SpatialAveragePooling(kW, kH, dW, dH)) - self.add(MulConstant(kW*kH)) + self.add(MulConstant(kW * kH)) if pnorm == 2: - self.add(Sqrt()) + self.add(Sqrt()) else: - self.add(Power(1./pnorm)) + self.add(Power(1. / pnorm)) # the module is a Sequential: by default, it'll try to learn the parameters # of the sub sampler: we avoid that by redefining its methods. @@ -48,4 +49,3 @@ class SpatialLPPooling(Sequential): def updateParameters(self, learningRate): pass - diff --git a/torch/legacy/nn/SpatialMaxPooling.py b/torch/legacy/nn/SpatialMaxPooling.py index d53b2e2ca2..83eca1f257 100644 --- a/torch/legacy/nn/SpatialMaxPooling.py +++ b/torch/legacy/nn/SpatialMaxPooling.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class SpatialMaxPooling(Module): def __init__(self, kW, kH, dW=None, dH=None, padW=0, padH=0): @@ -31,12 +32,12 @@ class SpatialMaxPooling(Module): def updateOutput(self, input): if self.indices is None: - self.indices = input.new() + self.indices = input.new() self.indices = self.indices.long() dims = input.dim() - self.iheight = input.size(dims-2) - self.iwidth = input.size(dims-1) + self.iheight = input.size(dims - 2) + self.iwidth = input.size(dims - 1) self._backend.SpatialMaxPooling_updateOutput( self._backend.library_state, diff --git a/torch/legacy/nn/SpatialMaxUnpooling.py b/torch/legacy/nn/SpatialMaxUnpooling.py index 789046bb7a..477ef43124 100644 --- a/torch/legacy/nn/SpatialMaxUnpooling.py +++ b/torch/legacy/nn/SpatialMaxUnpooling.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .SpatialMaxPooling import SpatialMaxPooling + class SpatialMaxUnpooling(Module): def __init__(self, poolingModule): @@ -41,4 +42,3 @@ class SpatialMaxUnpooling(Module): def __repr__(self): return 'nn.SpatialMaxUnpooling associated to ' + self.pooling.__repr__() - diff --git a/torch/legacy/nn/SpatialReflectionPadding.py b/torch/legacy/nn/SpatialReflectionPadding.py index d9f2cb32e8..b8f3d15ba3 100644 --- a/torch/legacy/nn/SpatialReflectionPadding.py +++ b/torch/legacy/nn/SpatialReflectionPadding.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class SpatialReflectionPadding(Module): def __init__(self, pad_l, pad_r=None, pad_t=None, pad_b=None): @@ -24,9 +25,9 @@ class SpatialReflectionPadding(Module): def updateGradInput(self, input, gradOutput): assert input.dim() == 4 and gradOutput.dim() == 4 assert input.size(0) == gradOutput.size(0) and \ - input.size(1) == gradOutput.size(1) and \ - input.size(2) + self.pad_t + self.pad_b == gradOutput.size(2) and \ - input.size(3) + self.pad_l + self.pad_r == gradOutput.size(3) + input.size(1) == gradOutput.size(1) and \ + input.size(2) + self.pad_t + self.pad_b == gradOutput.size(2) and \ + input.size(3) + self.pad_l + self.pad_r == gradOutput.size(3) self._backend.SpatialReflectionPadding_updateGradInput( self._backend.library_state, @@ -41,4 +42,3 @@ class SpatialReflectionPadding(Module): s = super(SpatialReflectionPadding, self).__repr__() s += '({}, {}, {}, {})'.format(self.pad_l, self.pad_r, self.pad_t, self.pad_b) return s - diff --git a/torch/legacy/nn/SpatialReplicationPadding.py b/torch/legacy/nn/SpatialReplicationPadding.py index 340f74d66b..67a79a965f 100644 --- a/torch/legacy/nn/SpatialReplicationPadding.py +++ b/torch/legacy/nn/SpatialReplicationPadding.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class SpatialReplicationPadding(Module): def __init__(self, pad_l, pad_r=None, pad_t=None, pad_b=None): @@ -24,9 +25,9 @@ class SpatialReplicationPadding(Module): def updateGradInput(self, input, gradOutput): assert input.dim() == 4 and gradOutput.dim() == 4 assert input.size(0) == gradOutput.size(0) and \ - input.size(1) == gradOutput.size(1) and \ - input.size(2) + self.pad_t + self.pad_b == gradOutput.size(2) and \ - input.size(3) + self.pad_l + self.pad_r == gradOutput.size(3) + input.size(1) == gradOutput.size(1) and \ + input.size(2) + self.pad_t + self.pad_b == gradOutput.size(2) and \ + input.size(3) + self.pad_l + self.pad_r == gradOutput.size(3) self._backend.SpatialReplicationPadding_updateGradInput( self._backend.library_state, @@ -42,4 +43,3 @@ class SpatialReplicationPadding(Module): s = super(SpatialReplicationPadding, self).__repr__() s += '({}, {}, {}, {})'.format(self.pad_l, self.pad_r, self.pad_t, self.pad_b) return s - diff --git a/torch/legacy/nn/SpatialSoftMax.py b/torch/legacy/nn/SpatialSoftMax.py index 7e2341a226..526e6d47dc 100644 --- a/torch/legacy/nn/SpatialSoftMax.py +++ b/torch/legacy/nn/SpatialSoftMax.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class SpatialSoftMax(Module): def updateOutput(self, input): @@ -11,7 +12,6 @@ class SpatialSoftMax(Module): ) return self.output - def updateGradInput(self, input, gradOutput): self._backend.SoftMax_updateGradInput( self._backend.library_state, @@ -21,4 +21,3 @@ class SpatialSoftMax(Module): self.output ) return self.gradInput - diff --git a/torch/legacy/nn/SpatialSubSampling.py b/torch/legacy/nn/SpatialSubSampling.py index 1b2f7e9fd9..2429800f07 100644 --- a/torch/legacy/nn/SpatialSubSampling.py +++ b/torch/legacy/nn/SpatialSubSampling.py @@ -2,6 +2,7 @@ import math import torch from .Module import Module + class SpatialSubSampling(Module): def __init__(self, nInputPlane, kW, kH, dW=1, dH=1): @@ -20,12 +21,11 @@ class SpatialSubSampling(Module): self.reset() - def reset(self, stdv=None): if stdv is not None: stdv = stdv * math.sqrt(3) else: - stdv = 1. / math.sqrt(self.kW*self.kH) + stdv = 1. / math.sqrt(self.kW * self.kH) self.weight.uniform_(-stdv, stdv) self.bias.uniform_(-stdv, stdv) @@ -42,7 +42,6 @@ class SpatialSubSampling(Module): ) return self.output - def updateGradInput(self, input, gradOutput): if self.gradInput is None: return @@ -58,8 +57,6 @@ class SpatialSubSampling(Module): ) return self.gradInput - - def accGradParameters(self, input, gradOutput, scale=1): self._backend.SpatialSubSampling_accGradParameters( self._backend.library_state, @@ -71,4 +68,3 @@ class SpatialSubSampling(Module): self.dW, self.dH, scale ) - diff --git a/torch/legacy/nn/SpatialSubtractiveNormalization.py b/torch/legacy/nn/SpatialSubtractiveNormalization.py index 0e68558dd7..2685bb4705 100644 --- a/torch/legacy/nn/SpatialSubtractiveNormalization.py +++ b/torch/legacy/nn/SpatialSubtractiveNormalization.py @@ -10,6 +10,7 @@ from .CSubTable import CSubTable from .CDivTable import CDivTable from .utils import clear + class SpatialSubtractiveNormalization(Module): def __init__(self, nInputPlane=1, kernel=None): @@ -24,19 +25,19 @@ class SpatialSubtractiveNormalization(Module): # check args if kdim != 2 and kdim != 1: - raise ValueError('SpatialSubtractiveNormalization averaging kernel must be 2D or 1D') + raise ValueError('SpatialSubtractiveNormalization averaging kernel must be 2D or 1D') if (self.kernel.size(0) % 2) == 0 or (kdim == 2 and (self.kernel.size(1) % 2) == 0): - raise ValueError('SpatialSubtractiveNormalization averaging kernel must have ODD dimensions') + raise ValueError('SpatialSubtractiveNormalization averaging kernel must have ODD dimensions') # normalize kernel self.kernel.div_(self.kernel.sum() * self.nInputPlane) # padding values - padH = int(math.floor(self.kernel.size(0)/2)) + padH = int(math.floor(self.kernel.size(0) / 2)) padW = padH if kdim == 2: - padW = int(math.floor(self.kernel.size(1)/2)) + padW = int(math.floor(self.kernel.size(1) / 2)) # create convolutional mean extractor self.meanestimator = Sequential() @@ -45,7 +46,8 @@ class SpatialSubtractiveNormalization(Module): self.meanestimator.add(SpatialConvolution(self.nInputPlane, 1, self.kernel.size(1), self.kernel.size(0))) else: # TODO: map - self.meanestimator.add(SpatialConvolutionMap(SpatialConvolutionMap.maps.oneToOne(self.nInputPlane), self.kernel.size(0), 1)) + self.meanestimator.add(SpatialConvolutionMap( + SpatialConvolutionMap.maps.oneToOne(self.nInputPlane), self.kernel.size(0), 1)) self.meanestimator.add(SpatialConvolution(self.nInputPlane, 1, 1, self.kernel.size(0))) self.meanestimator.add(Replicate(self.nInputPlane, 0)) @@ -76,7 +78,7 @@ class SpatialSubtractiveNormalization(Module): def updateOutput(self, input): # compute side coefficients dim = input.dim() - if input.dim() + 1 != self.coef.dim() or (input.size(dim-1) != self.coef.size(dim-1)) or (input.size(dim-2) != self.coef.size(dim-2)): + if input.dim() + 1 != self.coef.dim() or (input.size(dim - 1) != self.coef.size(dim - 1)) or (input.size(dim - 2) != self.coef.size(dim - 2)): if self.ones is None: self.ones = input.new() if self._coef is None: @@ -84,7 +86,7 @@ class SpatialSubtractiveNormalization(Module): self.ones.resize_as_(input[0:1]).fill_(1) coef = self.meanestimator.updateOutput(self.ones).squeeze(0) - self._coef.resize_as_(coef).copy_(coef) # make contiguous for view + self._coef.resize_as_(coef).copy_(coef) # make contiguous for view size = list(coef.size()) size = [input.size(0)] + size self.coef = self._coef.view(1, *self._coef.size()).expand(*size) diff --git a/torch/legacy/nn/SpatialUpSamplingNearest.py b/torch/legacy/nn/SpatialUpSamplingNearest.py index 4fcf9ab0bb..11d6524be5 100644 --- a/torch/legacy/nn/SpatialUpSamplingNearest.py +++ b/torch/legacy/nn/SpatialUpSamplingNearest.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class SpatialUpSamplingNearest(Module): """ Applies a 2D up-sampling over an input image composed of several input planes. @@ -41,7 +42,6 @@ class SpatialUpSamplingNearest(Module): ) return self.output - def updateGradInput(self, input, gradOutput): self.gradInput.resize_as_(input) self._backend.SpatialUpSamplingNearest_updateGradInput( diff --git a/torch/legacy/nn/SpatialZeroPadding.py b/torch/legacy/nn/SpatialZeroPadding.py index 18d6783082..a97d151336 100644 --- a/torch/legacy/nn/SpatialZeroPadding.py +++ b/torch/legacy/nn/SpatialZeroPadding.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class SpatialZeroPadding(Module): def __init__(self, pad_l, pad_r=None, pad_t=None, pad_b=None): @@ -78,4 +79,3 @@ class SpatialZeroPadding(Module): s = super(SpatialZeroPadding, self).__repr__() s += '({}, {}, {}, {})'.foramat(self.pad_l, self.pad_r, self.pad_t, self.pad_b) return s - diff --git a/torch/legacy/nn/SplitTable.py b/torch/legacy/nn/SplitTable.py index 6f2f12ed46..c93079d574 100644 --- a/torch/legacy/nn/SplitTable.py +++ b/torch/legacy/nn/SplitTable.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class SplitTable(Module): def __init__(self, dimension): @@ -10,7 +11,7 @@ class SplitTable(Module): def _getPositiveDimension(self, input): dimension = self.dimension if dimension < 0: - dimension = input.dim() + dimension + dimension = input.dim() + dimension return dimension @@ -36,4 +37,3 @@ class SplitTable(Module): self.gradInput.select(dimension, i).copy_(gradOutput[i]) return self.gradInput - diff --git a/torch/legacy/nn/Sqrt.py b/torch/legacy/nn/Sqrt.py index f68d93e3fe..e046594be2 100644 --- a/torch/legacy/nn/Sqrt.py +++ b/torch/legacy/nn/Sqrt.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class Sqrt(Module): def __init__(self, b=0, eps=0): @@ -26,4 +27,3 @@ class Sqrt(Module): self.output ) return self.gradInput - diff --git a/torch/legacy/nn/Square.py b/torch/legacy/nn/Square.py index ec05c19377..9ebaa371ed 100644 --- a/torch/legacy/nn/Square.py +++ b/torch/legacy/nn/Square.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class Square(Module): def updateOutput(self, input): @@ -19,4 +20,3 @@ class Square(Module): self.gradInput ) return self.gradInput - diff --git a/torch/legacy/nn/Squeeze.py b/torch/legacy/nn/Squeeze.py index a9b1372c8d..2a4578f1f7 100644 --- a/torch/legacy/nn/Squeeze.py +++ b/torch/legacy/nn/Squeeze.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class Squeeze(Module): def __init__(self, dim=None): @@ -12,9 +13,7 @@ class Squeeze(Module): self.output.set_(input.squeeze(dim) if dim is not None else input.squeeze()) return self.output - def updateGradInput(self, input, gradOutput): assert input.nelement() == gradOutput.nelement() self.gradInput.set_(gradOutput.view_as(input)) return self.gradInput - diff --git a/torch/legacy/nn/Sum.py b/torch/legacy/nn/Sum.py index df89552bf7..84adb89b6e 100644 --- a/torch/legacy/nn/Sum.py +++ b/torch/legacy/nn/Sum.py @@ -37,7 +37,7 @@ class Sum(Module): size[dimension] = 1 if not gradOutput.is_contiguous(): if self._gradOutput is None: - self._gradOutput = gradOutput.new() + self._gradOutput = gradOutput.new() self._gradOutput.resize_as_(gradOutput).copy_(gradOutput) gradOutput = self._gradOutput diff --git a/torch/legacy/nn/Tanh.py b/torch/legacy/nn/Tanh.py index 57e524e44f..bcee876b38 100644 --- a/torch/legacy/nn/Tanh.py +++ b/torch/legacy/nn/Tanh.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class Tanh(Module): def updateOutput(self, input): @@ -20,4 +21,3 @@ class Tanh(Module): self.output ) return self.gradInput - diff --git a/torch/legacy/nn/TanhShrink.py b/torch/legacy/nn/TanhShrink.py index 2faf98b8f6..36ef0f1689 100644 --- a/torch/legacy/nn/TanhShrink.py +++ b/torch/legacy/nn/TanhShrink.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .Tanh import Tanh + class TanhShrink(Module): def __init__(self): @@ -19,4 +20,3 @@ class TanhShrink(Module): self.gradInput.resize_as_(input).copy_(gradOutput) self.gradInput.add_(-1, dth) return self.gradInput - diff --git a/torch/legacy/nn/TemporalConvolution.py b/torch/legacy/nn/TemporalConvolution.py index 3b9383e710..4ac04f264e 100644 --- a/torch/legacy/nn/TemporalConvolution.py +++ b/torch/legacy/nn/TemporalConvolution.py @@ -2,6 +2,7 @@ import math import torch from .Module import Module + class TemporalConvolution(Module): def __init__(self, inputFrameSize, outputFrameSize, kW, dW=1): @@ -12,18 +13,18 @@ class TemporalConvolution(Module): self.kW = kW self.dW = dW - self.weight = torch.Tensor(outputFrameSize, inputFrameSize*kW) + self.weight = torch.Tensor(outputFrameSize, inputFrameSize * kW) self.bias = torch.Tensor(outputFrameSize) - self.gradWeight = torch.Tensor(outputFrameSize, inputFrameSize*kW) + self.gradWeight = torch.Tensor(outputFrameSize, inputFrameSize * kW) self.gradBias = torch.Tensor(outputFrameSize) self.reset() def reset(self, stdv=None): if stdv is not None: - stdv = stdv * math.sqrt(3) + stdv = stdv * math.sqrt(3) else: - stdv = 1. / math.sqrt(self.kW*self.inputFrameSize) + stdv = 1. / math.sqrt(self.kW * self.inputFrameSize) self.weight.uniform_(-stdv, stdv) self.bias.uniform_(-stdv, stdv) @@ -67,4 +68,3 @@ class TemporalConvolution(Module): self.dW, scale ) - diff --git a/torch/legacy/nn/TemporalMaxPooling.py b/torch/legacy/nn/TemporalMaxPooling.py index 01ad2e3e69..d3088ca4f3 100644 --- a/torch/legacy/nn/TemporalMaxPooling.py +++ b/torch/legacy/nn/TemporalMaxPooling.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class TemporalMaxPooling(Module): def __init__(self, kW, dW=None): @@ -12,7 +13,7 @@ class TemporalMaxPooling(Module): def updateOutput(self, input): if self.indices is None: - self.indices = input.new() + self.indices = input.new() self._backend.TemporalMaxPooling_updateOutput( self._backend.library_state, input, @@ -23,10 +24,9 @@ class TemporalMaxPooling(Module): ) return self.output - def updateGradInput(self, input, gradOutput): if self.gradInput is None: - return + return self._backend.TemporalMaxPooling_updateGradInput( self._backend.library_state, input, @@ -41,4 +41,3 @@ class TemporalMaxPooling(Module): def clearState(self): clear(self, 'indices') return super(TemporalMaxPooling, self).clearState() - diff --git a/torch/legacy/nn/TemporalSubSampling.py b/torch/legacy/nn/TemporalSubSampling.py index af662f4751..823070bbcc 100644 --- a/torch/legacy/nn/TemporalSubSampling.py +++ b/torch/legacy/nn/TemporalSubSampling.py @@ -2,6 +2,7 @@ import math import torch from .Module import Module + class TemporalSubSampling(Module): def __init__(self, inputFrameSize, kW, dW=1): @@ -20,9 +21,9 @@ class TemporalSubSampling(Module): def reset(self, stdv=None): if stdv is not None: - stdv = stdv * math.sqrt(3) + stdv = stdv * math.sqrt(3) else: - stdv = 1. / math.sqrt(self.kW) + stdv = 1. / math.sqrt(self.kW) self.weight.uniform_(-stdv, stdv) self.bias.uniform_(-stdv, stdv) @@ -40,7 +41,6 @@ class TemporalSubSampling(Module): ) return self.output - def updateGradInput(self, input, gradOutput): if self.gradInput is None: return @@ -66,4 +66,3 @@ class TemporalSubSampling(Module): self.dW, scale ) - diff --git a/torch/legacy/nn/Threshold.py b/torch/legacy/nn/Threshold.py index 178cb8f27a..f151d023e8 100644 --- a/torch/legacy/nn/Threshold.py +++ b/torch/legacy/nn/Threshold.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class Threshold(Module): def __init__(self, threshold=0, value=0, inplace=False): @@ -41,7 +42,4 @@ class Threshold(Module): if self.inplace: if self.value > self.threshold: raise RuntimeError('in-place processing requires value ({}) to not ' - 'exceed threshold ({})'.format(self.value, self.threshold)) - - - + 'exceed threshold ({})'.format(self.value, self.threshold)) diff --git a/torch/legacy/nn/Transpose.py b/torch/legacy/nn/Transpose.py index 70bf068758..4478c251e0 100644 --- a/torch/legacy/nn/Transpose.py +++ b/torch/legacy/nn/Transpose.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class Transpose(Module): # transpose dimensions: # n = nn.Transpose({1, 4}, {1, 3}) @@ -12,14 +13,12 @@ class Transpose(Module): def updateOutput(self, input): for perm in self.permutations: - input = input.transpose(*perm) + input = input.transpose(*perm) self.output.resize_as_(input).copy_(input) return self.output def updateGradInput(self, input, gradOutput): for perm in self.permutations[::-1]: - gradOutput = gradOutput.transpose(*perm) + gradOutput = gradOutput.transpose(*perm) self.gradInput.resize_as_(gradOutput).copy_(gradOutput) return self.gradInput - - diff --git a/torch/legacy/nn/Unsqueeze.py b/torch/legacy/nn/Unsqueeze.py index 089a07113e..a78ac3022c 100644 --- a/torch/legacy/nn/Unsqueeze.py +++ b/torch/legacy/nn/Unsqueeze.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import addSingletondimension + class Unsqueeze(Module): def __init__(self, dim): @@ -19,4 +20,3 @@ class Unsqueeze(Module): def __repr__(self): return super(Unsqueeze, self).__repr__() + '({})'.format(self.dim) - diff --git a/torch/legacy/nn/View.py b/torch/legacy/nn/View.py index 7bcef91f34..d228fb15a0 100644 --- a/torch/legacy/nn/View.py +++ b/torch/legacy/nn/View.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class View(Module): def resetSize(self, *args): @@ -28,14 +29,13 @@ class View(Module): def updateOutput(self, input): if self.output is None: - self.output = input.new() + self.output = input.new() self.output = input.view(self.size) return self.output - def updateGradInput(self, input, gradOutput): if self.gradInput is None: - self.gradInput = gradOutput.new() + self.gradInput = gradOutput.new() self.gradInput = gradOutput.view(input.size()) return self.gradInput diff --git a/torch/legacy/nn/VolumetricAveragePooling.py b/torch/legacy/nn/VolumetricAveragePooling.py index e6190b9ec0..a89ba4cade 100644 --- a/torch/legacy/nn/VolumetricAveragePooling.py +++ b/torch/legacy/nn/VolumetricAveragePooling.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class VolumetricAveragePooling(Module): def __init__(self, kT, kW, kH, dT=None, dW=None, dH=None): @@ -38,4 +39,3 @@ class VolumetricAveragePooling(Module): s += '({}x{}x{}, {}, {}, {}'.format(self.kT, self.kW, self.kH, self.dT, self.dW, self.dH) s += ')' return s - diff --git a/torch/legacy/nn/VolumetricBatchNormalization.py b/torch/legacy/nn/VolumetricBatchNormalization.py index 2ebd14c359..61bab4c6ef 100644 --- a/torch/legacy/nn/VolumetricBatchNormalization.py +++ b/torch/legacy/nn/VolumetricBatchNormalization.py @@ -2,5 +2,6 @@ import torch from .Module import Module from .BatchNormalization import BatchNormalization + class VolumetricBatchNormalization(BatchNormalization): nDim = 5 diff --git a/torch/legacy/nn/VolumetricConvolution.py b/torch/legacy/nn/VolumetricConvolution.py index c28f21ba30..a4060d95a4 100644 --- a/torch/legacy/nn/VolumetricConvolution.py +++ b/torch/legacy/nn/VolumetricConvolution.py @@ -3,6 +3,7 @@ import torch from .Module import Module from .utils import clear + class VolumetricConvolution(Module): def __init__(self, nInputPlane, nOutputPlane, kT, kW, kH, dT=1, dW=1, dH=1, padT=0, padW=None, padH=None): @@ -31,24 +32,24 @@ class VolumetricConvolution(Module): def reset(self, stdv=None): if stdv is not None: - stdv = stdv * math.sqrt(3) + stdv = stdv * math.sqrt(3) else: - stdv = 1. / math.sqrt(self.kT*self.kW*self.kH*self.nInputPlane) + stdv = 1. / math.sqrt(self.kT * self.kW * self.kH * self.nInputPlane) self.weight.uniform_(-stdv, stdv) self.bias.uniform_(-stdv, stdv) def _makeContiguous(self, input, gradOutput=None): if not input.is_contiguous(): - if self._input is None: - self._input = input.new() - self._input.resize_as_(input).copy_(input) - input = self._input + if self._input is None: + self._input = input.new() + self._input.resize_as_(input).copy_(input) + input = self._input if gradOutput is not None: if not gradOutput.is_contiguous(): if self._gradOutput is None: - self._gradOutput = gradOutput.new() + self._gradOutput = gradOutput.new() self._gradOutput.resize_as_(gradOutput).copy_(gradOutput) gradOutput = self._gradOutput return input, gradOutput @@ -68,9 +69,9 @@ class VolumetricConvolution(Module): def updateOutput(self, input): if self.finput is None: - self.finput = input.new() + self.finput = input.new() if self.fgradInput is None: - self.fgradInput = input.new() + self.fgradInput = input.new() if input.type() == 'torch.cuda.FloatTensor': self._backend.VolumetricConvolution_updateOutput( self._backend.library_state, @@ -178,10 +179,10 @@ class VolumetricConvolution(Module): s += '({} -> {}, {}x{}x{}'.format(self.nInputPlane, self.nOutputPlane, self.kT, self.kW, self.kH) if self.dT != 1 or self.dW != 1 or self.dH != 1 or \ self.padT != 0 or self.padW != 0 or self.padH != 0: - s += ', {}, {}, {}'.format(self.dT, self.dW, self.dH) + s += ', {}, {}, {}'.format(self.dT, self.dW, self.dH) if self.padT != 0 or self.padW != 0 or self.padH != 0: - s += ', {}, {}, {}'.format(self.padT, self.padW, self.padH) + s += ', {}, {}, {}'.format(self.padT, self.padW, self.padH) s += ')' return s diff --git a/torch/legacy/nn/VolumetricDropout.py b/torch/legacy/nn/VolumetricDropout.py index dda60c82f7..4d3c244a10 100644 --- a/torch/legacy/nn/VolumetricDropout.py +++ b/torch/legacy/nn/VolumetricDropout.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class VolumetricDropout(Module): def __init__(self, p=0.5): @@ -16,20 +17,20 @@ class VolumetricDropout(Module): assert input.dim() == 5 self.noise.resize_(input.size(0), input.size(1), 1, 1, 1) - self.noise.bernoulli_(1-self.p) + self.noise.bernoulli_(1 - self.p) # We expand the random dropouts to the entire feature map because the # features are likely correlated accross the map and so the dropout # should also be correlated. self.output.mul_(self.noise.expand_as(input)) else: - self.output.mul_(1-self.p) + self.output.mul_(1 - self.p) return self.output def updateGradInput(self, input, gradOutput): if self.train: self.gradInput.resize_as_(gradOutput).copy_(gradOutput) - self.gradInput.mul_(self.noise.expand_as(input)) # simply mask the gradients with the noise vector + self.gradInput.mul_(self.noise.expand_as(input)) # simply mask the gradients with the noise vector else: raise RuntimeError('backprop only defined while training') @@ -44,4 +45,3 @@ class VolumetricDropout(Module): def clearState(self): clear(self, 'noise') return super(VolumetricDropout, self).clearState() - diff --git a/torch/legacy/nn/VolumetricFullConvolution.py b/torch/legacy/nn/VolumetricFullConvolution.py index 84ee984471..5f5fd01552 100644 --- a/torch/legacy/nn/VolumetricFullConvolution.py +++ b/torch/legacy/nn/VolumetricFullConvolution.py @@ -2,13 +2,14 @@ import math import torch from .Module import Module + class VolumetricFullConvolution(Module): def __init__(self, nInputPlane, nOutputPlane, - kT, kW, kH, # kernel size - dT=1, dW=1, dH=1, # stride - padT=0, padW=0, padH=0, # padding - adjT=0, adjW=0, adjH=0): # extra output adjustment + kT, kW, kH, # kernel size + dT=1, dW=1, dH=1, # stride + padT=0, padW=0, padH=0, # padding + adjT=0, adjW=0, adjH=0): # extra output adjustment super(VolumetricFullConvolution, self).__init__() self.nInputPlane = nInputPlane @@ -28,7 +29,7 @@ class VolumetricFullConvolution(Module): if self.adjW > self.dW - 1 or self.adjH > self.dH - 1 or self.adjT > self.dT - 1: raise RuntimeError('adjW, adjH and adjT must be smaller than self.dW - 1, ' - ' self.dH - 1 and self.dT - 1 respectively') + ' self.dH - 1 and self.dT - 1 respectively') self.weight = torch.Tensor(nInputPlane, nOutputPlane, kT, kH, kW) self.gradWeight = torch.Tensor(nInputPlane, nOutputPlane, kT, kH, kW) @@ -41,7 +42,6 @@ class VolumetricFullConvolution(Module): self.reset() - def reset(self, stdv=None): if stdv is not None: stdv = stdv * math.sqrt(3) @@ -50,22 +50,22 @@ class VolumetricFullConvolution(Module): kT = self.kT kH = self.kH kW = self.kW - stdv = 1. / math.sqrt(kW*kH*kT*nInputPlane) + stdv = 1. / math.sqrt(kW * kH * kT * nInputPlane) self.weight.uniform_(-stdv, stdv) self.bias.uniform_(-stdv, stdv) def _makeContiguous(self, input, gradOutput=None): if not input.is_contiguous(): - if self._input is None: - self._input = input.new() - self._input.resize_as_(input).copy_(input) - input = self._input + if self._input is None: + self._input = input.new() + self._input.resize_as_(input).copy_(input) + input = self._input if gradOutput is not None: if not gradOutput.is_contiguous(): if self._gradOutput is None: - self._gradOutput = gradOutput.new() + self._gradOutput = gradOutput.new() self._gradOutput.resize_as_(gradOutput).copy_(gradOutput) gradOutput = self._gradOutput return input, gradOutput @@ -85,9 +85,9 @@ class VolumetricFullConvolution(Module): inputTensor = input[0] targetTensor = input[1] tDims = targetTensor.dim() - tT = targetTensor.size(tDims-3) - tH = targetTensor.size(tDims-2) - tW = targetTensor.size(tDims-1) + tT = targetTensor.size(tDims - 3) + tH = targetTensor.size(tDims - 2) + tW = targetTensor.size(tDims - 1) adjT = self._calculateAdj(tT, self.kT, self.padT, self.dT) adjW = self._calculateAdj(tW, self.kW, self.padW, self.dW) adjH = self._calculateAdj(tH, self.kH, self.padH, self.dH) @@ -118,9 +118,9 @@ class VolumetricFullConvolution(Module): inputTensor = input[0] targetTensor = input[1] tDims = targetTensor.dim() - tT = targetTensor.size(tDims-3) - tH = targetTensor.size(tDims-2) - tW = targetTensor.size(tDims-1) + tT = targetTensor.size(tDims - 3) + tH = targetTensor.size(tDims - 2) + tW = targetTensor.size(tDims - 1) adjT = self._calculateAdj(tT, self.kT, self.padT, self.dT) adjW = self._calculateAdj(tW, self.kW, self.padW, self.dW) adjH = self._calculateAdj(tH, self.kH, self.padH, self.dH) @@ -145,9 +145,9 @@ class VolumetricFullConvolution(Module): if isinstance(input, list): # Create a zero tensor to be expanded and used as gradInput[1]. if self.zeroScalar is None: - self.zeroScalar = input[1].new(1).zero_() + self.zeroScalar = input[1].new(1).zero_() self.ones.resize_(input[1].dim()).fill_(1) - zeroTensor = self.zeroScalar.view(self.ones.tolist()).expand_as(input[1]) + zeroTensor = self.zeroScalar.view(self.ones.tolist()).expand_as(input[1]) self.gradInput = [self.gradInput, zeroTensor] return self.gradInput @@ -162,9 +162,9 @@ class VolumetricFullConvolution(Module): inputTensor = input[0] targetTensor = input[1] tDims = targetTensor.dim() - tT = targetTensor.size(tDims-3) - tH = targetTensor.size(tDims-2) - tW = targetTensor.size(tDims-1) + tT = targetTensor.size(tDims - 3) + tH = targetTensor.size(tDims - 2) + tW = targetTensor.size(tDims - 1) adjT = self._calculateAdj(tT, self.kT, self.padT, self.dT) adjW = self._calculateAdj(tW, self.kW, self.padW, self.dW) adjH = self._calculateAdj(tH, self.kH, self.padH, self.dH) @@ -193,12 +193,12 @@ class VolumetricFullConvolution(Module): s = super(VolumetricFullConvolution, self).__repr__() s += '({} -> {}, {}x{}x{}'.format(self.nInputPlane, self.nOutputPlane, self.kT, self.kW, self.kH) if self.dT != 1 or self.dW != 1 or self.dH != 1 or \ - self.padT != 0 or self.padW != 0 or self.padH != 0 or \ - self.adjT != 0 or self.adjW != 0 or self.adjH != 0: + self.padT != 0 or self.padW != 0 or self.padH != 0 or \ + self.adjT != 0 or self.adjW != 0 or self.adjH != 0: s += ', {}, {}, {}'.format(self.dT, self.dW, self.dH) if self.padT != 0 or self.padW != 0 or self.padH != 0 or \ - self.adjT != 0 or self.adjW != 0 or self.adjH != 0: + self.adjT != 0 or self.adjW != 0 or self.adjH != 0: s += ', {}, {}, {}'.format(self.padT, self.padW, self.padH) if self.adjT != 0 or self.adjW != 0 or self.adjH != 0: @@ -206,4 +206,3 @@ class VolumetricFullConvolution(Module): s += ')' return s - diff --git a/torch/legacy/nn/VolumetricMaxPooling.py b/torch/legacy/nn/VolumetricMaxPooling.py index edac36fb18..823ab05846 100644 --- a/torch/legacy/nn/VolumetricMaxPooling.py +++ b/torch/legacy/nn/VolumetricMaxPooling.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .utils import clear + class VolumetricMaxPooling(Module): def __init__(self, kT, kW, kH, dT=None, dW=None, dH=None, padT=0, padW=0, padH=0): @@ -22,21 +23,21 @@ class VolumetricMaxPooling(Module): self.indices = torch.LongTensor() def ceil(self): - self.ceil_mode = True - return self + self.ceil_mode = True + return self def floor(self): - self.ceil_mode = False - return self + self.ceil_mode = False + return self def updateOutput(self, input): dims = input.dim() - self.itime = input.size(dims-3) - self.iheight = input.size(dims-2) - self.iwidth = input.size(dims-1) + self.itime = input.size(dims - 3) + self.iheight = input.size(dims - 2) + self.iwidth = input.size(dims - 1) if self.indices is None: - self.indices = input.new() + self.indices = input.new() self.indices = self.indices.long() self._backend.VolumetricMaxPooling_updateOutput( self._backend.library_state, diff --git a/torch/legacy/nn/VolumetricMaxUnpooling.py b/torch/legacy/nn/VolumetricMaxUnpooling.py index 45432ccb98..4de3b52607 100644 --- a/torch/legacy/nn/VolumetricMaxUnpooling.py +++ b/torch/legacy/nn/VolumetricMaxUnpooling.py @@ -2,6 +2,7 @@ import torch from .Module import Module from .VolumetricMaxPooling import VolumetricMaxPooling + class VolumetricMaxUnpooling(Module): def __init__(self, poolingModule): @@ -53,4 +54,3 @@ class VolumetricMaxUnpooling(Module): def __repr__(self): return 'nn.VolumetricMaxUnpooling associated to ' + self.pooling.__repr__() - diff --git a/torch/legacy/nn/VolumetricReplicationPadding.py b/torch/legacy/nn/VolumetricReplicationPadding.py index b43d42c396..16cc7a1c09 100644 --- a/torch/legacy/nn/VolumetricReplicationPadding.py +++ b/torch/legacy/nn/VolumetricReplicationPadding.py @@ -1,6 +1,7 @@ import torch from .Module import Module + class VolumetricReplicationPadding(Module): def __init__(self, pleft, pright=None, ptop=None, pbottom=None, pfront=None, pback=None): @@ -48,8 +49,7 @@ class VolumetricReplicationPadding(Module): def __repr__(self): s = super(VolumetricReplicationPadding, self).__repr__() s += '({}, {}, {}, {}, {}, {})'.format(self.pleft, self.pright, - self.ptop, self.pbottom, - self.pfront, self.pback - ) + self.ptop, self.pbottom, + self.pfront, self.pback + ) return s - diff --git a/torch/legacy/nn/WeightedEuclidean.py b/torch/legacy/nn/WeightedEuclidean.py index 38256f1137..f171bbf020 100644 --- a/torch/legacy/nn/WeightedEuclidean.py +++ b/torch/legacy/nn/WeightedEuclidean.py @@ -2,6 +2,7 @@ import math import torch from .Module import Module + class WeightedEuclidean(Module): def __init__(self, inputSize, outputSize): @@ -36,40 +37,40 @@ class WeightedEuclidean(Module): def reset(self, stdv=None): if stdv is not None: - stdv = stdv * math.sqrt(3) + stdv = stdv * math.sqrt(3) else: - stdv = 1. / math.sqrt(self.weight.size(1)) + stdv = 1. / math.sqrt(self.weight.size(1)) self.weight.uniform_(-stdv, stdv) self.diagCov.fill_(1) def _view(self, res, src, *args): if src.is_contiguous(): - res.set_(src.view(*args)) + res.set_(src.view(*args)) else: - res.set_(src.contiguous().view(*args)) + res.set_(src.contiguous().view(*args)) def updateOutput(self, input): # lazy-initialize if self._diagCov is None: - self._diagCov = self.output.new() + self._diagCov = self.output.new() if self._input is None: - self._input = input.new() + self._input = input.new() if self._weight is None: - self._weight = self.weight.new() + self._weight = self.weight.new() if self._expand is None: - self._expand = self.output.new() + self._expand = self.output.new() if self._expand2 is None: - self._expand2 = self.output.new() + self._expand2 = self.output.new() if self._expand3 is None: - self._expand3 = self.output.new() + self._expand3 = self.output.new() if self._repeat is None: - self._repeat = self.output.new() + self._repeat = self.output.new() if self._repeat2 is None: - self._repeat2 = self.output.new() + self._repeat2 = self.output.new() if self._repeat3 is None: - self._repeat3 = self.output.new() + self._repeat3 = self.output.new() inputSize, outputSize = self.weight.size(0), self.weight.size(1) @@ -106,29 +107,28 @@ class WeightedEuclidean(Module): self._repeat.add_(-1, self._expand2) self._repeat.mul_(self._expand3) - torch.norm(self._repeat, 2, 1, out=self.output) self.output.resize_(batchSize, outputSize) else: - raise RuntimeError("1D or 2D input expected") + raise RuntimeError("1D or 2D input expected") return self.output def updateGradInput(self, input, gradOutput): if self.gradInput is None: - return + return if self._div is None: - self._div = input.new() + self._div = input.new() if self._output is None: - self._output = self.output.new() + self._output = self.output.new() if self._expand4 is None: - self._expand4 = input.new() + self._expand4 = input.new() if self._gradOutput is None: - self._gradOutput = input.new() + self._gradOutput = input.new() if not self.fastBackward: - self.updateOutput(input) + self.updateOutput(input) inputSize, outputSize = self.weight.size(0), self.weight.size(1) @@ -169,7 +169,6 @@ class WeightedEuclidean(Module): torch.mul(self._repeat, self._expand4, out=self._repeat2) self._repeat2.mul_(self._expand3) - torch.sum(self._repeat2, 2, out=self.gradInput) self.gradInput.resize_as_(input) else: @@ -203,11 +202,10 @@ class WeightedEuclidean(Module): else: torch.mul(self._repeat, self._expand4, out=self._repeat2) - self.gradDiagCov.add_(self._repeat2) elif input.dim() == 2: if self._sum is None: - self._sum = input.new() + self._sum = input.new() torch.sum(self._repeat2, 0, out=self._sum) self._sum.resize_(inputSize, outputSize) self.gradWeight.add_(-scale, self._sum) @@ -225,7 +223,6 @@ class WeightedEuclidean(Module): self._repeat.mul_(self._expand3) self._repeat.mul_(self._expand4) - torch.sum(self._repeat, 0, out=self._sum) self._sum.resize_(inputSize, outputSize) self.gradDiagCov.add_(scale, self._sum) @@ -261,4 +258,3 @@ class WeightedEuclidean(Module): self.accGradParameters(input, gradOutput, -lr) self.gradWeight = gradWeight self.gradDiagCov = gradDiagCov - diff --git a/torch/legacy/nn/WeightedMSECriterion.py b/torch/legacy/nn/WeightedMSECriterion.py index 36d8f75dcc..eb1a4dee33 100644 --- a/torch/legacy/nn/WeightedMSECriterion.py +++ b/torch/legacy/nn/WeightedMSECriterion.py @@ -1,6 +1,7 @@ import torch from .Criterion import Criterion + class WeightedMSECriterion(Criterion): def __init__(self, weight, sizeAverage=True): @@ -12,7 +13,7 @@ class WeightedMSECriterion(Criterion): def updateOutput(self, input, target): if self.buffer is None: - self.buffer = input.new() + self.buffer = input.new() self.buffer.resize_as_(input).copy_(target) if input.dim() - 1 == self.weight.dim(): for i in range(input.size(0)): @@ -21,7 +22,7 @@ class WeightedMSECriterion(Criterion): self.buffer.mul_(self.weight) if self.output_tensor is None: - self.output_tensor = input.new(1) + self.output_tensor = input.new(1) self._backend.MSECriterion_updateOutput( self._backend.library_state, input, @@ -48,4 +49,3 @@ class WeightedMSECriterion(Criterion): self.sizeAverage ) return self.gradInput - diff --git a/torch/legacy/nn/__init__.py b/torch/legacy/nn/__init__.py index 929601ebdc..7e2507ac0a 100644 --- a/torch/legacy/nn/__init__.py +++ b/torch/legacy/nn/__init__.py @@ -80,7 +80,7 @@ from .PairwiseDistance import PairwiseDistance from .ParallelCriterion import ParallelCriterion from .PartialLinear import PartialLinear from .Power import Power -from .RReLU import RReLU # TODO implement +from .RReLU import RReLU # TODO implement from .ReLU6 import ReLU6 from .Replicate import Replicate from .Reshape import Reshape diff --git a/torch/legacy/nn/utils.py b/torch/legacy/nn/utils.py index 8a76117f03..0432a6e3a0 100644 --- a/torch/legacy/nn/utils.py +++ b/torch/legacy/nn/utils.py @@ -13,6 +13,8 @@ import torch # > net1:type('torch.cuda.FloatTensor', tensorCache) # > net2:type('torch.cuda.FloatTensor', tensorCache) # > nn.utils.recursiveType(anotherTensor, 'torch.cuda.FloatTensor', tensorCache) + + def recursiveType(param, type, tensorCache={}): from .Criterion import Criterion from .Module import Module @@ -28,12 +30,13 @@ def recursiveType(param, type, tensorCache={}): newparam = tensorCache[key] else: newparam = torch.Tensor().type(type) - storageType = type.replace('Tensor','Storage') + storageType = type.replace('Tensor', 'Storage') param_storage = param.storage() if param_storage: storage_key = param_storage._cdata if storage_key not in tensorCache: - tensorCache[storage_key] = torch._import_dotted_name(storageType)(param_storage.size()).copy_(param_storage) + tensorCache[storage_key] = torch._import_dotted_name( + storageType)(param_storage.size()).copy_(param_storage) newparam.set_( tensorCache[storage_key], param.storage_offset(), @@ -44,6 +47,7 @@ def recursiveType(param, type, tensorCache={}): param = newparam return param + def recursiveResizeAs(t1, t2): if isinstance(t2, list): t1 = t1 if isinstance(t1, list) else [t1] @@ -56,20 +60,22 @@ def recursiveResizeAs(t1, t2): t1 = t1 if torch.is_tensor(t1) else t2.new() t1.resize_as_(t2) else: - raise RuntimeError("Expecting nested tensors or tables. Got " + \ - type(t1).__name__ + " and " + type(t2).__name__ + "instead") + raise RuntimeError("Expecting nested tensors or tables. Got " + + type(t1).__name__ + " and " + type(t2).__name__ + "instead") return t1, t2 + def recursiveFill(t2, val): if isinstance(t2, list): t2 = [recursiveFill(x, val) for x in t2] elif torch.is_tensor(t2): t2.fill_(val) else: - raise RuntimeError("expecting tensor or table thereof. Got " + \ - type(t2).__name__ + " instead") + raise RuntimeError("expecting tensor or table thereof. Got " + + type(t2).__name__ + " instead") return t2 + def recursiveAdd(t1, val=1, t2=None): if t2 is None: t2 = val @@ -81,10 +87,11 @@ def recursiveAdd(t1, val=1, t2=None): elif torch.is_tensor(t1) and torch.is_tensor(t2): t1.add_(val, t2) else: - raise RuntimeError("expecting nested tensors or tables. Got " + \ - type(t1).__name__ + " and " + type(t2).__name__ + " instead") + raise RuntimeError("expecting nested tensors or tables. Got " + + type(t1).__name__ + " and " + type(t2).__name__ + " instead") return t1, t2 + def recursiveCopy(t1, t2): if isinstance(t2, list): t1 = t1 if isinstance(t1, list) else [t1] @@ -94,10 +101,11 @@ def recursiveCopy(t1, t2): t1 = t1 if torch.is_tensor(t1) else t2.new() t1.resize_as_(t2).copy_(t2) else: - raise RuntimeError("expecting nested tensors or tables. Got " + \ - type(t1).__name__ + " and " + type(t2).__name__ + " instead") + raise RuntimeError("expecting nested tensors or tables. Got " + + type(t1).__name__ + " and " + type(t2).__name__ + " instead") return t1, t2 + def addSingletondimension(*args): view = None if len(args) < 3: @@ -109,6 +117,7 @@ def addSingletondimension(*args): view.set_(t) return view.unsqueeze_(dim) + def contiguousView(output, input, *args): if output is None: output = input.new() @@ -123,9 +132,12 @@ def contiguousView(output, input, *args): # go over specified fields and clear them. accepts # nn.clearState(self, ['_buffer', '_buffer2']) and # nn.clearState(self, '_buffer', '_buffer2') + + def clear(self, *args): if len(args) == 1 and isinstance(args[0], list): args = args[0] + def _clear(f): if not hasattr(self, f): return diff --git a/torch/legacy/optim/adadelta.py b/torch/legacy/optim/adadelta.py index 569634f1ad..1edd237560 100644 --- a/torch/legacy/optim/adadelta.py +++ b/torch/legacy/optim/adadelta.py @@ -32,7 +32,7 @@ def adadelta(opfunc, x, config, state=None): # (2) weight decay if wd != 0: - dfdx.add_(wd, x) + dfdx.add_(wd, x) # (3) parameter update if not 'paramVariance' in state: @@ -43,7 +43,8 @@ def adadelta(opfunc, x, config, state=None): state['paramVariance'].mul_(rho).addcmul_(1 - rho, dfdx, dfdx) state['paramStd'].resize_as_(state['paramVariance']).copy_(state['paramVariance']).add_(eps).sqrt_() - state['delta'].resize_as_(state['paramVariance']).copy_(state['accDelta']).add_(eps).sqrt_().div_(state['paramStd']).mul_(dfdx) + state['delta'].resize_as_(state['paramVariance']).copy_( + state['accDelta']).add_(eps).sqrt_().div_(state['paramStd']).mul_(dfdx) x.add_(-1, state['delta']) state['accDelta'].mul_(rho).addcmul_(1 - rho, state['delta'], state['delta']) diff --git a/torch/legacy/optim/adagrad.py b/torch/legacy/optim/adagrad.py index 3c4daee4fb..29757904c5 100644 --- a/torch/legacy/optim/adagrad.py +++ b/torch/legacy/optim/adagrad.py @@ -32,7 +32,6 @@ def adagrad(opfunc, x, config, state=None): if wd != 0: dfdx.add_(wd, x) - # (3) learning rate decay (annealing) clr = lr / (1 + state['evalCounter'] * lrd) @@ -50,4 +49,3 @@ def adagrad(opfunc, x, config, state=None): # return x*, f(x) before optimization return x, fx - diff --git a/torch/legacy/optim/adam.py b/torch/legacy/optim/adam.py index cb81225fbd..607735ab62 100644 --- a/torch/legacy/optim/adam.py +++ b/torch/legacy/optim/adam.py @@ -1,5 +1,6 @@ import math + def adam(opfunc, x, config, state=None): """ An implementation of Adam http://arxiv.org/pdf/1412.6980.pdf @@ -59,7 +60,7 @@ def adam(opfunc, x, config, state=None): biasCorrection1 = 1 - beta1 ** state['t'] biasCorrection2 = 1 - beta2 ** state['t'] - stepSize = lr * math.sqrt(biasCorrection2)/biasCorrection1 + stepSize = lr * math.sqrt(biasCorrection2) / biasCorrection1 # (3) update x x.addcdiv_(-stepSize, state['m'], state['denom']) diff --git a/torch/legacy/optim/adamax.py b/torch/legacy/optim/adamax.py index ebeba9f4b4..5f67ced452 100644 --- a/torch/legacy/optim/adamax.py +++ b/torch/legacy/optim/adamax.py @@ -1,5 +1,6 @@ import torch + def adamax(opfunc, x, config, state=None): """ An implementation of AdaMax http://arxiv.org/pdf/1412.6980.pdf diff --git a/torch/legacy/optim/asgd.py b/torch/legacy/optim/asgd.py index e503eb9057..eb44366e74 100644 --- a/torch/legacy/optim/asgd.py +++ b/torch/legacy/optim/asgd.py @@ -1,5 +1,6 @@ import math + def asgd(opfunc, x, config, state=None): """ An implementation of ASGD @@ -60,12 +61,11 @@ def asgd(opfunc, x, config, state=None): state['tmp'] = state.get('tmp', state['ax'].new().resize_as_(state['ax'])) if state['mu_t'] != 1: state['tmp'].copy_(x) - state['tmp'].add_(-1,state['ax']).mul_(state['mu_t']) + state['tmp'].add_(-1, state['ax']).mul_(state['mu_t']) state['ax'].add_(state['tmp']) else: state['ax'].copy_(x) - # (5) update eta_t and mu_t state['t'] += 1 state['eta_t'] = config['eta0'] / math.pow((1 + config['lambda'] * config['eta0'] * state['t']), config['alpha']) @@ -73,4 +73,3 @@ def asgd(opfunc, x, config, state=None): # return x*, f(x) before optimization, and average(x_t0,x_t1,x_t2,...) return x, fx, state['ax'] - diff --git a/torch/legacy/optim/cg.py b/torch/legacy/optim/cg.py index 9926a9c093..118de3bd96 100644 --- a/torch/legacy/optim/cg.py +++ b/torch/legacy/optim/cg.py @@ -2,9 +2,11 @@ import math INFINITY = float('inf') + def sqrt_nothrow(x): return math.sqrt(x) if x >= 0 else float('nan') + def cg(opfunc, x, config, state=None): """ @@ -45,11 +47,11 @@ def cg(opfunc, x, config, state=None): if config is None and state is None: raise ValueError("cg requires a dictionary to retain state between iterations") state = state if state is not None else config - rho = config.get('rho', 0.01) - sig = config.get('sig', 0.5) - _int = config.get('int', 0.1) - ext = config.get('ext', 3.0) - maxIter = config.get('maxIter', 20) + rho = config.get('rho', 0.01) + sig = config.get('sig', 0.5) + _int = config.get('int', 0.1) + ext = config.get('ext', 3.0) + maxIter = config.get('maxIter', 20) ratio = config.get('ratio', 100) maxEval = config.get('maxEval', maxIter * 1.25) red = 1 @@ -86,13 +88,13 @@ def cg(opfunc, x, config, state=None): f1, tdf = opfunc(x) fx.append(f1) df1.copy_(tdf) - i = i+1 + i = i + 1 # initial search direction s.copy_(df1).mul_(-1) - d1 = -s.dot(s ) # slope - z1 = red/(1-d1) # initial step + d1 = -s.dot(s) # slope + z1 = red / (1 - d1) # initial step while i < abs(maxEval): x0.copy_(x) @@ -113,16 +115,16 @@ def cg(opfunc, x, config, state=None): while (f2 > f1 + z1 * rho * d1 or d2 > -sig * d1) and m > 0: limit = z1 if f2 > f1: - z2 = z3 - (0.5*d3*z3*z3)/(d3*z3+f2-f3) + z2 = z3 - (0.5 * d3 * z3 * z3) / (d3 * z3 + f2 - f3) else: - A = 6*(f2-f3)/z3+3*(d2+d3) - B = 3*(f3-f2)-z3*(d3+2*d2) - z2 = (sqrt_nothrow(B*B-A*d2*z3*z3)-B)/A + A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3) + B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2) + z2 = (sqrt_nothrow(B * B - A * d2 * z3 * z3) - B) / A if z2 != z2 or z2 == INFINITY or z2 == -INFINITY: - z2 = z3/2 + z2 = z3 / 2 - z2 = max(min(z2, _int*z3), (1-_int)*z3) + z2 = max(min(z2, _int * z3), (1 - _int) * z3) z1 = z1 + z2 x.add_(z2, s) f2, tdf = opfunc(x) @@ -134,40 +136,40 @@ def cg(opfunc, x, config, state=None): if f2 > f1 + z1 * rho * d1 or d2 > -sig * d1: break - elif d2 > sig*d1: + elif d2 > sig * d1: success = 1 break elif m == 0: break - A = 6*(f2-f3)/z3+3*(d2+d3) - B = 3*(f3-f2)-z3*(d3+2*d2) - _denom = (B+sqrt_nothrow(B*B-A*d2*z3*z3)) - z2 = -d2*z3*z3/_denom if _denom != 0 else float('nan') + A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3) + B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2) + _denom = (B + sqrt_nothrow(B * B - A * d2 * z3 * z3)) + z2 = -d2 * z3 * z3 / _denom if _denom != 0 else float('nan') if z2 != z2 or z2 == INFINITY or z2 == -INFINITY or z2 < 0: if limit < -0.5: - z2 = z1 * (ext -1) + z2 = z1 * (ext - 1) else: - z2 = (limit-z1)/2 - elif (limit > -0.5) and (z2+z1) > limit: - z2 = (limit-z1)/2 - elif limit < -0.5 and (z2+z1) > z1*ext: - z2 = z1*(ext-1) - elif z2 < -z3*_int: - z2 = -z3*_int - elif limit > -0.5 and z2 < (limit-z1)*(1-_int): - z2 = (limit-z1)*(1-_int) + z2 = (limit - z1) / 2 + elif (limit > -0.5) and (z2 + z1) > limit: + z2 = (limit - z1) / 2 + elif limit < -0.5 and (z2 + z1) > z1 * ext: + z2 = z1 * (ext - 1) + elif z2 < -z3 * _int: + z2 = -z3 * _int + elif limit > -0.5 and z2 < (limit - z1) * (1 - _int): + z2 = (limit - z1) * (1 - _int) f3 = f2 d3 = d2 z3 = -z2 - z1 = z1+z2 + z1 = z1 + z2 x.add_(z2, s) f2, tdf = opfunc(x) df2.copy_(tdf) - i = i+1 + i = i + 1 m = m - 1 d2 = df2.dot(s) @@ -212,4 +214,3 @@ def cg(opfunc, x, config, state=None): state['x0'] = x0 state['s'] = s return x, fx, i - diff --git a/torch/legacy/optim/lbfgs.py b/torch/legacy/optim/lbfgs.py index e8f377f7d0..d6d48e6b2f 100644 --- a/torch/legacy/optim/lbfgs.py +++ b/torch/legacy/optim/lbfgs.py @@ -1,5 +1,6 @@ import torch + def lbfgs(opfunc, x, config, state=None): """ An implementation of L-BFGS, heavily inspired by minFunc (Mark Schmidt) @@ -80,8 +81,8 @@ def lbfgs(opfunc, x, config, state=None): if 'dir_bufs' not in state: # reusable buffers for y's and s's, and their histories verbose('creating recyclable direction/step/history buffers') - state['dir_bufs'] = list(g.new(nCorrection+1, p).split(1)) - state['stp_bufs'] = list(g.new(nCorrection+1, p).split(1)) + state['dir_bufs'] = list(g.new(nCorrection + 1, p).split(1)) + state['stp_bufs'] = list(g.new(nCorrection + 1, p).split(1)) for i in range(len(state['dir_bufs'])): state['dir_bufs'][i] = state['dir_bufs'][i].squeeze(0) state['stp_bufs'][i] = state['stp_bufs'][i].squeeze(0) @@ -155,7 +156,7 @@ def lbfgs(opfunc, x, config, state=None): al = state['al'] torch.mul(g, -1, out=q) - for i in range(k-1, -1, -1): + for i in range(k - 1, -1, -1): al[i] = old_dirs[i].dot(q) * ro[i] q.add_(-al[i], old_stps[i]) @@ -193,7 +194,7 @@ def lbfgs(opfunc, x, config, state=None): lsFuncEval = 0 if lineSearch is not None: # perform line search, using user function - f,g,x,t,lsFuncEval = lineSearch(opfunc,x,t,d,f,g,gtd,lineSearchOpts) + f, g, x, t, lsFuncEval = lineSearch(opfunc, x, t, d, f, g, gtd, lineSearchOpts) f_hist.append(f) else: # no line search, simply move with fixed-step @@ -250,4 +251,4 @@ def lbfgs(opfunc, x, config, state=None): state['d'] = d # return optimal x, and history of f(x) - return x,f_hist,currentFuncEval + return x, f_hist, currentFuncEval diff --git a/torch/legacy/optim/nag.py b/torch/legacy/optim/nag.py index 9be9a106aa..8a8196fe7c 100644 --- a/torch/legacy/optim/nag.py +++ b/torch/legacy/optim/nag.py @@ -40,7 +40,6 @@ def nag(opfunc, x, config, state=None): if mom <= 0: raise ValueError('Momentum must be positive for Nesterov Accelerated Gradient') - # (1) evaluate f(x) and df/dx # first step in the direction of the momentum vector @@ -55,7 +54,6 @@ def nag(opfunc, x, config, state=None): if wd != 0: dfdx.add_(wd, x) - # (3) learning rate decay (annealing) clr = lr / (1 + state['evalCounter'] * lrd) @@ -65,7 +63,6 @@ def nag(opfunc, x, config, state=None): else: state['dfdx'].mul_(mom) - # (5) parameter update with single or individual learning rates if lrs is not None: if 'deltaParameters' in state: @@ -78,10 +75,8 @@ def nag(opfunc, x, config, state=None): x.add_(-clr, dfdx) state['dfdx'].add_(-clr, dfdx) - # (6) update evaluation counter state['evalCounter'] += 1 # return x, f(x) before optimization return x, fx - diff --git a/torch/legacy/optim/rmsprop.py b/torch/legacy/optim/rmsprop.py index 039b9cf399..351c8c3fe6 100644 --- a/torch/legacy/optim/rmsprop.py +++ b/torch/legacy/optim/rmsprop.py @@ -1,5 +1,6 @@ import torch + def rmsprop(opfunc, x, config, state=None): """ An implementation of RMSprop @@ -44,10 +45,9 @@ def rmsprop(opfunc, x, config, state=None): state['m'] = x.new().resize_as_(dfdx).zero_() state['tmp'] = x.new().resize_as_(dfdx) - # (4) calculate new (leaky) mean squared values state['m'].mul_(alpha) - state['m'].addcmul_(1.0-alpha, dfdx, dfdx) + state['m'].addcmul_(1.0 - alpha, dfdx, dfdx) # (5) perform update torch.sqrt(state['m'], out=state['tmp']).add_(epsilon) diff --git a/torch/legacy/optim/rprop.py b/torch/legacy/optim/rprop.py index 6d879b2610..691ef3c208 100644 --- a/torch/legacy/optim/rprop.py +++ b/torch/legacy/optim/rprop.py @@ -1,5 +1,6 @@ import torch + def rprop(opfunc, x, config, state=None): """ A plain implementation of RPROP @@ -42,22 +43,20 @@ def rprop(opfunc, x, config, state=None): # init temp storage if not 'delta' in state: - state['delta'] = dfdx.new(dfdx.size()).zero_() + state['delta'] = dfdx.new(dfdx.size()).zero_() state['stepsize'] = dfdx.new(dfdx.size()).fill_(stepsize) - state['sign'] = dfdx.new(dfdx.size()) + state['sign'] = dfdx.new(dfdx.size()) state['bytesign'] = torch.ByteTensor(dfdx.size()) - state['psign'] = torch.ByteTensor(dfdx.size()) - state['nsign'] = torch.ByteTensor(dfdx.size()) - state['zsign'] = torch.ByteTensor(dfdx.size()) - state['dminmax'] = torch.ByteTensor(dfdx.size()) + state['psign'] = torch.ByteTensor(dfdx.size()) + state['nsign'] = torch.ByteTensor(dfdx.size()) + state['zsign'] = torch.ByteTensor(dfdx.size()) + state['dminmax'] = torch.ByteTensor(dfdx.size()) if str(type(x)).find('Cuda') > -1: # Push to GPU - state['psign'] = state['psign'].cuda() - state['nsign'] = state['nsign'].cuda() - state['zsign'] = state['zsign'].cuda() - state['dminmax'] = state['dminmax'].cuda() - - + state['psign'] = state['psign'].cuda() + state['nsign'] = state['nsign'].cuda() + state['zsign'] = state['zsign'].cuda() + state['dminmax'] = state['dminmax'].cuda() # sign of derivative from last step to this one torch.mul(dfdx, state['delta'], out=state['sign']).sign_() @@ -98,4 +97,3 @@ def rprop(opfunc, x, config, state=None): # return x*, table of f(x) values from each step return x, hfx - diff --git a/torch/legacy/optim/sgd.py b/torch/legacy/optim/sgd.py index a6a6e817d0..69b756d6e2 100644 --- a/torch/legacy/optim/sgd.py +++ b/torch/legacy/optim/sgd.py @@ -1,5 +1,6 @@ import torch + def sgd(opfunc, x, config, state=None): """A plain implementation of SGD @@ -62,7 +63,7 @@ def sgd(opfunc, x, config, state=None): if 'dfdx' not in state: state['dfdx'] = torch.Tensor().type_as(dfdx).resize_as_(dfdx).copy_(dfdx) else: - state['dfdx'].mul_(mom).add_(1-damp, dfdx) + state['dfdx'].mul_(mom).add_(1 - damp, dfdx) if nesterov: dfdx.add_(mom, state['dfdx']) @@ -82,7 +83,6 @@ def sgd(opfunc, x, config, state=None): else: x.add_(-clr, dfdx) - # (6) update evaluation counter state['evalCounter'] += 1 diff --git a/torch/multiprocessing/queue.py b/torch/multiprocessing/queue.py index cc83b536bc..ad0a32b08b 100644 --- a/torch/multiprocessing/queue.py +++ b/torch/multiprocessing/queue.py @@ -26,6 +26,7 @@ class ConnectionWrapper(object): class Queue(multiprocessing.queues.Queue): + def __init__(self, *args, **kwargs): super(Queue, self).__init__(*args, **kwargs) self._reader = ConnectionWrapper(self._reader) @@ -35,6 +36,7 @@ class Queue(multiprocessing.queues.Queue): class SimpleQueue(multiprocessing.queues.SimpleQueue): + def _make_methods(self): if not isinstance(self._reader, ConnectionWrapper): self._reader = ConnectionWrapper(self._reader) diff --git a/torch/multiprocessing/reductions.py b/torch/multiprocessing/reductions.py index e7e384b25a..4c2799c264 100644 --- a/torch/multiprocessing/reductions.py +++ b/torch/multiprocessing/reductions.py @@ -20,6 +20,7 @@ except ImportError: class StorageRef(object): # An object with a cdata field which may be set to None. We subclass object # instead of using a dict() to support weak references. + def __init__(self, ptr): self.cdata = ptr diff --git a/torch/nn/_functions/activation.py b/torch/nn/_functions/activation.py index 4caaf0de82..433e8eee62 100644 --- a/torch/nn/_functions/activation.py +++ b/torch/nn/_functions/activation.py @@ -15,4 +15,3 @@ class Softsign(Function): self.buffer_squared = True grad_input = grad_output.clone().div_(self.buffer) return grad_input - diff --git a/torch/nn/_functions/batchnorm.py b/torch/nn/_functions/batchnorm.py index 6d80eb5d5e..ecf125f1ca 100644 --- a/torch/nn/_functions/batchnorm.py +++ b/torch/nn/_functions/batchnorm.py @@ -5,6 +5,7 @@ import torch.backends.cudnn as cudnn class BatchNorm(Function): + def __init__(self, running_mean, running_var, training, momentum, eps): super(BatchNorm, self).__init__() self.running_mean = running_mean diff --git a/torch/nn/_functions/conv.py b/torch/nn/_functions/conv.py index 6ba0b989d5..f9adca22f6 100644 --- a/torch/nn/_functions/conv.py +++ b/torch/nn/_functions/conv.py @@ -9,6 +9,7 @@ _thnn_convs = {} class ConvNd(Function): + def __init__(self, stride, padding, dilation, transposed, output_padding, groups): super(ConvNd, self).__init__() @@ -161,7 +162,7 @@ class ConvNd(Function): res.append(impl[fn_name](self, self._bufs[g], *grouped_args)) if fn_name == 'grad_params': return [torch.cat(t, 0) if t[0] is not None else None - for t in zip(*res)] + for t in zip(*res)] else: return torch.cat(res, 1) diff --git a/torch/nn/_functions/dropout.py b/torch/nn/_functions/dropout.py index 734eedc478..49ab427a8c 100644 --- a/torch/nn/_functions/dropout.py +++ b/torch/nn/_functions/dropout.py @@ -9,7 +9,7 @@ class Dropout(InplaceFunction): super(Dropout, self).__init__() if p < 0 or p > 1: raise ValueError("dropout probability has to be between 0 and 1, " - "but got {}".format(p)) + "but got {}".format(p)) self.p = p self.train = train self.inplace = inplace @@ -26,7 +26,7 @@ class Dropout(InplaceFunction): if self.p > 0 and self.train: self.noise = self._make_noise(input) - self.noise.bernoulli_(1-self.p).div_(1-self.p) + self.noise.bernoulli_(1 - self.p).div_(1 - self.p) if self.p == 1: self.noise.fill_(0) self.noise = self.noise.expand_as(input) @@ -45,4 +45,4 @@ class FeatureDropout(Dropout): def _make_noise(self, input): return input.new().resize_(input.size(0), input.size(1), - *repeat(1, input.dim()-2)) + *repeat(1, input.dim() - 2)) diff --git a/torch/nn/_functions/linear.py b/torch/nn/_functions/linear.py index 279ea9c2b5..43bae458d5 100644 --- a/torch/nn/_functions/linear.py +++ b/torch/nn/_functions/linear.py @@ -29,4 +29,3 @@ class Linear(Function): return grad_input, grad_weight, grad_bias else: return grad_input, grad_weight - diff --git a/torch/nn/_functions/loss.py b/torch/nn/_functions/loss.py index 3a54f7be51..433b0e5e44 100644 --- a/torch/nn/_functions/loss.py +++ b/torch/nn/_functions/loss.py @@ -16,9 +16,9 @@ class CosineEmbeddingLoss(Function): return torch.ByteTensor() def forward(self, input1, input2, y): - self.w1 = input1.new() + self.w1 = input1.new() self.w22 = input1.new() - self.w = input1.new() + self.w = input1.new() self.w32 = input1.new() self._outputs = input1.new() @@ -98,6 +98,7 @@ class CosineEmbeddingLoss(Function): class HingeEmbeddingLoss(Function): + def __init__(self, margin=1, size_average=True): super(HingeEmbeddingLoss, self).__init__() self.margin = margin @@ -176,4 +177,3 @@ class MarginRankingLoss(Function): grad_input2.div_(y.size(0)) return grad_input1, grad_input2, None - diff --git a/torch/nn/_functions/rnn.py b/torch/nn/_functions/rnn.py index 00e7b70897..eab7a5d65a 100644 --- a/torch/nn/_functions/rnn.py +++ b/torch/nn/_functions/rnn.py @@ -8,43 +8,43 @@ except ImportError: def RNNReLUCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None): - hy = F.relu(F.linear(input, w_ih, b_ih) + F.linear(hidden, w_hh, b_hh)) - return hy + hy = F.relu(F.linear(input, w_ih, b_ih) + F.linear(hidden, w_hh, b_hh)) + return hy def RNNTanhCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None): - hy = F.tanh(F.linear(input, w_ih, b_ih) + F.linear(hidden, w_hh, b_hh)) - return hy + hy = F.tanh(F.linear(input, w_ih, b_ih) + F.linear(hidden, w_hh, b_hh)) + return hy def LSTMCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None): - hx, cx = hidden - gates = F.linear(input, w_ih, b_ih) + F.linear(hx, w_hh, b_hh) - ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1) + hx, cx = hidden + gates = F.linear(input, w_ih, b_ih) + F.linear(hx, w_hh, b_hh) + ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1) - ingate = F.sigmoid(ingate) - forgetgate = F.sigmoid(forgetgate) - cellgate = F.tanh(cellgate) - outgate = F.sigmoid(outgate) + ingate = F.sigmoid(ingate) + forgetgate = F.sigmoid(forgetgate) + cellgate = F.tanh(cellgate) + outgate = F.sigmoid(outgate) - cy = (forgetgate * cx) + (ingate * cellgate) - hy = outgate * F.tanh(cy) + cy = (forgetgate * cx) + (ingate * cellgate) + hy = outgate * F.tanh(cy) - return hy, cy + return hy, cy def GRUCell(input, hidden, w_ih, w_hh, b_ih=None, b_hh=None): - gi = F.linear(input, w_ih, b_ih) - gh = F.linear(hidden, w_hh, b_hh) - i_r, i_i, i_n = gi.chunk(3, 1) - h_r, h_i, h_n = gh.chunk(3, 1) + gi = F.linear(input, w_ih, b_ih) + gh = F.linear(hidden, w_hh, b_hh) + i_r, i_i, i_n = gi.chunk(3, 1) + h_r, h_i, h_n = gh.chunk(3, 1) - resetgate = F.sigmoid(i_r + h_r) - inputgate = F.sigmoid(i_i + h_i) - newgate = F.tanh(i_n + resetgate * h_n) - hy = newgate + inputgate * (hidden - newgate) + resetgate = F.sigmoid(i_r + h_r) + inputgate = F.sigmoid(i_i + h_i) + newgate = F.tanh(i_n + resetgate * h_n) + hy = newgate + inputgate * (hidden - newgate) - return hy + return hy def StackedRNN(inners, num_layers, lstm=False, dropout=0, train=True): @@ -87,6 +87,7 @@ def StackedRNN(inners, num_layers, lstm=False, dropout=0, train=True): return forward + def Recurrent(inner, reverse=False): def forward(input, hidden, weight): output = [] @@ -144,7 +145,8 @@ def AutogradRNN(mode, input_size, hidden_size, num_layers=1, batch_first=False, class CudnnRNN(NestedIOFunction): - def __init__(self, mode, input_size, hidden_size, num_layers=1, batch_first=False, dropout=0, train=True, bidirectional=False, dropout_state=None): + + def __init__(self, mode, input_size, hidden_size, num_layers=1, batch_first=False, dropout=0, train=True, bidirectional=False, dropout_state=None): super(CudnnRNN, self).__init__() if dropout_state is None: dropout_state = {} @@ -177,7 +179,6 @@ class CudnnRNN(NestedIOFunction): self.save_for_backward(input, hx, weight, output) return output, hy - def backward_extended(self, grad_output, grad_hy): input, hx, weight, output = self.saved_tensors diff --git a/torch/nn/_functions/thnn/activation.py b/torch/nn/_functions/thnn/activation.py index 3ac85044fc..a37f004341 100644 --- a/torch/nn/_functions/thnn/activation.py +++ b/torch/nn/_functions/thnn/activation.py @@ -129,4 +129,3 @@ class Softmin(Function): _all_functions.append(PReLU) _all_functions.append(RReLU) _all_functions.append(Softmin) - diff --git a/torch/nn/_functions/thnn/auto.py b/torch/nn/_functions/thnn/auto.py index be8fef82dc..f3ef7057ac 100644 --- a/torch/nn/_functions/thnn/auto.py +++ b/torch/nn/_functions/thnn/auto.py @@ -32,20 +32,20 @@ def _make_function_class_criterion(class_name, update_output, update_grad_input, self._backend = type2backend[type(input)] self.save_for_backward(input, target) if weight_arg_idx >= 0: - insert_idx = weight_arg_idx - 4 # state, input, target, output + insert_idx = weight_arg_idx - 4 # state, input, target, output self.additional_args.insert(insert_idx, self.weight) for idx in buffers_idx: self.additional_args.insert(idx, input.new(1)) output = input.new(1) getattr(self._backend, update_output.name)(self._backend.library_state, input, target, - output, *self.additional_args) + output, *self.additional_args) return output def backward(self, grad_output): input, target = self.saved_tensors grad_input = grad_output.new().resize_as_(input).zero_() getattr(self._backend, update_grad_input.name)(self._backend.library_state, input, target, - grad_input, *self.additional_args) + grad_input, *self.additional_args) grad_output_expanded = grad_output.view(*repeat(1, grad_input.dim())) grad_input.mul_(grad_output_expanded.expand_as(grad_input)) return grad_input, None @@ -76,15 +76,15 @@ def _make_function_class(class_name, update_output, update_grad_input, acc_grad_ param_args = {'weight', 'bias'} ignored_args = {'weight', 'bias', 'gradWeight', 'gradBias', 'output'} expected_params = [arg for arg in update_output.arguments[3:] - if arg.name in param_args] + if arg.name in param_args] buffers = {} buffers['update_output'] = _find_buffers(update_output.arguments[3:], - ignored_args) + ignored_args) buffers['update_grad_input'] = _find_buffers( - update_grad_input.arguments[4:], ignored_args) + update_grad_input.arguments[4:], ignored_args) if acc_grad_parameters is not None: buffers['acc_grad_parameters'] = _find_buffers( - acc_grad_parameters.arguments[3:], ignored_args) + acc_grad_parameters.arguments[3:], ignored_args) # This and __init__ assume that only the last argument can be # an inplace flag @@ -112,8 +112,8 @@ def _make_function_class(class_name, update_output, update_grad_input, acc_grad_ for param in params: if type(param) != type(input): raise RuntimeError("input type ({}) doesn't match the type of " - "a parameter tensor ({})".format(torch.typename(input), - torch.typename(param))) + "a parameter tensor ({})".format(torch.typename(input), + torch.typename(param))) # Allocate temporary buffers and insert them into additional_args self.buffers = defaultdict(type(input)) @@ -246,10 +246,10 @@ def _generate_function_classes(scope_dict): # This has to call a function to retain correct references to functions if 'Criterion' in fn: cls = _make_function_class_criterion(class_name, update_output, - update_grad_input, acc_grad_parameters) + update_grad_input, acc_grad_parameters) else: cls = _make_function_class(class_name, update_output, - update_grad_input, acc_grad_parameters) + update_grad_input, acc_grad_parameters) scope_dict[class_name] = cls if not class_name.startswith('_'): _all_functions.append(cls) diff --git a/torch/nn/_functions/thnn/loss.py b/torch/nn/_functions/thnn/loss.py index 270414a3d8..72cc1d1b0f 100644 --- a/torch/nn/_functions/thnn/loss.py +++ b/torch/nn/_functions/thnn/loss.py @@ -34,4 +34,3 @@ class BCELoss(_BCELoss): _all_functions.append(BCELoss) - diff --git a/torch/nn/_functions/thnn/normalization.py b/torch/nn/_functions/thnn/normalization.py index 13ff3e2e1a..a80ad84cc2 100644 --- a/torch/nn/_functions/thnn/normalization.py +++ b/torch/nn/_functions/thnn/normalization.py @@ -42,10 +42,10 @@ class CrossMapLRN2d(Function): self.k ) else: - batch_size = input.size(0) - channels = input.size(1) + batch_size = input.size(0) + channels = input.size(1) input_height = input.size(2) - input_width = input.size(3) + input_width = input.size(3) output.resize_as_(input) self.scale.resize_as_(input) @@ -54,7 +54,7 @@ class CrossMapLRN2d(Function): input_square = output torch.pow(input, 2, out=input_square) - pre_pad = int((self.size - 1)/2 + 1) + pre_pad = int((self.size - 1) / 2 + 1) pre_pad_crop = channels if pre_pad > channels else pre_pad scale_first = self.scale.select(1, 0) @@ -67,7 +67,7 @@ class CrossMapLRN2d(Function): # by adding the next feature map and removing the previous for c in range(1, channels): scale_previous = self.scale.select(1, c - 1) - scale_current = self.scale.select(1, c) + scale_current = self.scale.select(1, c) scale_current.copy_(scale_previous) if c < channels - pre_pad + 1: square_next = input_square.select(1, c + pre_pad - 1) @@ -103,13 +103,13 @@ class CrossMapLRN2d(Function): self.k ) else: - batch_size = input.size(0) - channels = input.size(1) + batch_size = input.size(0) + channels = input.size(1) input_height = input.size(2) - input_width = input.size(3) + input_width = input.size(3) paddded_ratio = input.new(channels + self.size - 1, input_height, - input_width) + input_width) accum_ratio = input.new(input_height, input_width) cache_ratio_value = 2 * self.alpha * self.beta / self.size @@ -120,16 +120,16 @@ class CrossMapLRN2d(Function): paddded_ratio.zero_() padded_ratio_center = paddded_ratio.narrow(0, inversePrePad, - channels) + channels) for n in range(batch_size): torch.mul(grad_output[n], output[n], out=padded_ratio_center) padded_ratio_center.div_(self.scale[n]) torch.sum( - paddded_ratio.narrow(0, 0, self.size-1), 0, out=accum_ratio) + paddded_ratio.narrow(0, 0, self.size - 1), 0, out=accum_ratio) for c in range(channels): - accum_ratio.add_(paddded_ratio[c+self.size-1]) + accum_ratio.add_(paddded_ratio[c + self.size - 1]) grad_input[n][c].addcmul_(-cache_ratio_value, input[n][c], - accum_ratio) + accum_ratio) accum_ratio.add_(-1, paddded_ratio[c]) return grad_input diff --git a/torch/nn/_functions/thnn/pooling.py b/torch/nn/_functions/thnn/pooling.py index b771f5ff34..701101c76f 100644 --- a/torch/nn/_functions/thnn/pooling.py +++ b/torch/nn/_functions/thnn/pooling.py @@ -6,6 +6,7 @@ from torch.nn.modules.utils import _pair, _triple class MaxPool1d(Function): + def __init__(self, kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False): self.kernel_size = kernel_size @@ -19,12 +20,12 @@ class MaxPool1d(Function): backend = type2backend[type(input)] indices, output = input.new().long(), input.new() backend.SpatialDilatedMaxPooling_updateOutput(backend.library_state, - input, output, indices, - self.kernel_size, 1, - self.stride, 1, - self.pad, 0, - self.dilation, 1, - self.ceil_mode) + input, output, indices, + self.kernel_size, 1, + self.stride, 1, + self.pad, 0, + self.dilation, 1, + self.ceil_mode) if indices.dim() == 4: # TODO: fix when THCUNN handles 3D indices properly indices = indices.squeeze(0) @@ -49,15 +50,17 @@ class MaxPool1d(Function): grad_input = grad_output.new() backend = type2backend[type(input)] backend.SpatialDilatedMaxPooling_updateGradInput(backend.library_state, - input, grad_output, grad_input, indices, - self.kernel_size, 1, - self.stride, 1, - self.pad, 0, - self.dilation, 1, - self.ceil_mode) + input, grad_output, grad_input, indices, + self.kernel_size, 1, + self.stride, 1, + self.pad, 0, + self.dilation, 1, + self.ceil_mode) return grad_input + class MaxPool2d(Function): + def __init__(self, kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False): self.kernel_size = _pair(kernel_size) @@ -71,12 +74,12 @@ class MaxPool2d(Function): backend = type2backend[type(input)] indices, output = input.new().long(), input.new() backend.SpatialDilatedMaxPooling_updateOutput(backend.library_state, - input, output, indices, - self.kernel_size[1], self.kernel_size[0], - self.stride[1], self.stride[0], - self.padding[1], self.padding[0], - self.dilation[1], self.dilation[0], - self.ceil_mode) + input, output, indices, + self.kernel_size[1], self.kernel_size[0], + self.stride[1], self.stride[0], + self.padding[1], self.padding[0], + self.dilation[1], self.dilation[0], + self.ceil_mode) if self.return_indices: self.save_for_backward(input, indices) self.mark_non_differentiable(indices) @@ -95,21 +98,22 @@ class MaxPool2d(Function): grad_input = grad_output.new() backend = type2backend[type(input)] backend.SpatialDilatedMaxPooling_updateGradInput(backend.library_state, - input, grad_output, grad_input, indices, - self.kernel_size[1], self.kernel_size[0], - self.stride[1], self.stride[0], - self.padding[1], self.padding[0], - self.dilation[1], self.dilation[0], - self.ceil_mode) + input, grad_output, grad_input, indices, + self.kernel_size[1], self.kernel_size[0], + self.stride[1], self.stride[0], + self.padding[1], self.padding[0], + self.dilation[1], self.dilation[0], + self.ceil_mode) return grad_input + class MaxPool3d(Function): def __init__(self, kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False): self.kernel_size = _triple(kernel_size) self.stride = _triple(stride if stride is not None else kernel_size) - self.padding = _triple(padding) + self.padding = _triple(padding) self.dilation = _triple(dilation) self.return_indices = return_indices self.ceil_mode = ceil_mode @@ -118,12 +122,12 @@ class MaxPool3d(Function): backend = type2backend[type(input)] indices, output = input.new().long(), input.new() backend.VolumetricDilatedMaxPooling_updateOutput(backend.library_state, - input, output, indices, - self.kernel_size[0], self.kernel_size[2], self.kernel_size[1], - self.stride[0], self.stride[2], self.stride[1], - self.padding[0], self.padding[2], self.padding[1], - self.dilation[0], self.dilation[2], self.dilation[1], - self.ceil_mode) + input, output, indices, + self.kernel_size[0], self.kernel_size[2], self.kernel_size[1], + self.stride[0], self.stride[2], self.stride[1], + self.padding[0], self.padding[2], self.padding[1], + self.dilation[0], self.dilation[2], self.dilation[1], + self.ceil_mode) if self.return_indices: self.save_for_backward(input, indices) self.mark_non_differentiable(indices) @@ -142,16 +146,18 @@ class MaxPool3d(Function): grad_input = grad_output.new() backend = type2backend[type(input)] backend.VolumetricDilatedMaxPooling_updateGradInput(backend.library_state, - input, grad_output, grad_input, indices, - self.kernel_size[0], self.kernel_size[2], self.kernel_size[1], - self.stride[0], self.stride[2], self.stride[1], - self.padding[0], self.padding[2], self.padding[1], - self.dilation[0], self.dilation[2], self.dilation[1], - self.ceil_mode) + input, grad_output, grad_input, indices, + self.kernel_size[0], self.kernel_size[ + 2], self.kernel_size[1], + self.stride[0], self.stride[2], self.stride[1], + self.padding[0], self.padding[2], self.padding[1], + self.dilation[0], self.dilation[2], self.dilation[1], + self.ceil_mode) return grad_input class MaxUnpool2d(Function): + def __init__(self, output_size): super(MaxUnpool2d, self).__init__() self.output_size = output_size @@ -175,6 +181,7 @@ class MaxUnpool2d(Function): class MaxUnpool3d(Function): + def __init__(self, output_size, stride, padding): super(MaxUnpool3d, self).__init__() self.output_size = output_size @@ -206,7 +213,7 @@ class MaxUnpool3d(Function): class FractionalMaxPool2d(Function): def __init__(self, kh, kw, output_size=None, output_ratio=None, - return_indices=False, _random_samples=None): + return_indices=False, _random_samples=None): super(FractionalMaxPool2d, self).__init__() # Pool size (how wide the pooling for each output unit is) @@ -234,7 +241,7 @@ class FractionalMaxPool2d(Function): def forward(self, input): if self.random_samples is None: random_samples = input.new().resize_(input.size(0), - input.size(1), 2).uniform_() + input.size(1), 2).uniform_() else: random_samples = self.random_samples self.random_samples = None @@ -257,7 +264,7 @@ class FractionalMaxPool2d(Function): random_samples ) - self.random_samples = None # Free unnecessary buffers + self.random_samples = None # Free unnecessary buffers if self.return_indices: self.save_for_backward(input, indices) return output, indices @@ -287,6 +294,7 @@ class FractionalMaxPool2d(Function): class AvgPool2d(Function): + def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True): self.kernel_size = _pair(kernel_size) @@ -322,7 +330,9 @@ class AvgPool2d(Function): self.ceil_mode, self.count_include_pad) return grad_input + class AvgPool3d(Function): + def __init__(self, kernel_size, stride=None): self.kernel_size = _triple(kernel_size) self.stride = _triple(stride if stride is not None else kernel_size) @@ -333,9 +343,9 @@ class AvgPool3d(Function): # can avoid this with cudnn self.save_for_backward(input) backend.VolumetricAveragePooling_updateOutput(backend.library_state, - input, output, - self.kernel_size[0], self.kernel_size[2], self.kernel_size[1], - self.stride[0], self.stride[2], self.stride[1]) + input, output, + self.kernel_size[0], self.kernel_size[2], self.kernel_size[1], + self.stride[0], self.stride[2], self.stride[1]) return output def backward(self, grad_output): @@ -343,9 +353,9 @@ class AvgPool3d(Function): input, = self.saved_tensors grad_input = grad_output.new() backend.VolumetricAveragePooling_updateGradInput(backend.library_state, - input, grad_output, grad_input, - self.kernel_size[0], self.kernel_size[2], self.kernel_size[1], - self.stride[0], self.stride[2], self.stride[1]) + input, grad_output, grad_input, + self.kernel_size[0], self.kernel_size[2], self.kernel_size[1], + self.stride[0], self.stride[2], self.stride[1]) return grad_input _all_functions.append(AvgPool2d) diff --git a/torch/nn/_functions/thnn/sparse.py b/torch/nn/_functions/thnn/sparse.py index e717cd26cc..0d4e55e6a4 100644 --- a/torch/nn/_functions/thnn/sparse.py +++ b/torch/nn/_functions/thnn/sparse.py @@ -33,7 +33,7 @@ class Embedding(Function): def _make_sparse(self, indices): i = torch.LongTensor(2, indices.numel()) v = torch.ones(indices.numel()) - i[1].copy_(torch.range(0, indices.numel()-1)) + i[1].copy_(torch.range(0, indices.numel() - 1)) i[0].copy_(indices) return sparse.FloatTensor(i, v, torch.Size( [self._weight_size[0], indices.numel()])).contiguous() diff --git a/torch/nn/_functions/thnn/upsampling.py b/torch/nn/_functions/thnn/upsampling.py index 3ffbd999de..9faa8ad7c6 100644 --- a/torch/nn/_functions/thnn/upsampling.py +++ b/torch/nn/_functions/thnn/upsampling.py @@ -5,7 +5,9 @@ from torch._thnn import type2backend from . import _all_functions + class _UpsamplingBase(Function): + def __init__(self, size=None, scale_factor=None): super(_UpsamplingBase, self).__init__() if size is None and scale_factor is None: @@ -25,15 +27,15 @@ class UpsamplingNearest2d(_UpsamplingBase): if self.scale_factor is None: if (self.size[0] % input.size(2) != 0 or - self.size[1] % input.size(3) != 0): + self.size[1] % input.size(3) != 0): raise RuntimeError("output size specified in UpSamplingNearest " - "({}) has to be divisible by the input size, but got: " - "{}".format('x'.join(map(str, self.size)), - 'x'.join(map(str, input.size())))) + "({}) has to be divisible by the input size, but got: " + "{}".format('x'.join(map(str, self.size)), + 'x'.join(map(str, input.size())))) self.scale_factor = self.size[0] // input.size(2) if self.scale_factor != self.size[1] // input.size(3): raise RuntimeError("input aspect ratio doesn't match the " - "output ratio") + "output ratio") output = input.new() backend = type2backend[type(input)] diff --git a/torch/nn/backends/__init__.py b/torch/nn/backends/__init__.py index 8b13789179..e69de29bb2 100644 --- a/torch/nn/backends/__init__.py +++ b/torch/nn/backends/__init__.py @@ -1 +0,0 @@ - diff --git a/torch/nn/backends/backend.py b/torch/nn/backends/backend.py index 104d4a3eba..fb5424b67a 100644 --- a/torch/nn/backends/backend.py +++ b/torch/nn/backends/backend.py @@ -1,5 +1,6 @@ class FunctionBackend(object): + def __init__(self): self.function_classes = {} @@ -13,4 +14,3 @@ class FunctionBackend(object): if self.function_classes.get(name): raise RuntimeError("Trying to register second function under name " + name + " in " + type(self).__name__) self.function_classes[name] = function_class - diff --git a/torch/nn/backends/thnn.py b/torch/nn/backends/thnn.py index 4a761eff0e..9e2f14b54a 100644 --- a/torch/nn/backends/thnn.py +++ b/torch/nn/backends/thnn.py @@ -1,5 +1,6 @@ from .backend import FunctionBackend + class THNNFunctionBackend(FunctionBackend): def __reduce__(self): diff --git a/torch/nn/functional.py b/torch/nn/functional.py index a46d932985..b4a16fd450 100644 --- a/torch/nn/functional.py +++ b/torch/nn/functional.py @@ -289,7 +289,7 @@ def max_unpool3d(input, indices, kernel_size, stride=None, padding=0, def lp_pool2d(input, norm_type, kernel_size, stride=None, ceil_mode=False): kw, kh = utils._pair(kernel_size) out = avg_pool2d(input.pow(norm_type), kernel_size, stride, 0, ceil_mode) - return out.mul(kw * kh).pow(1./norm_type) + return out.mul(kw * kh).pow(1. / norm_type) # Activation functions @@ -326,7 +326,7 @@ def prelu(input, weight): return _functions.thnn.PReLU()(input, weight) -def rrelu(input, lower=1./8, upper=1./3, training=False, inplace=False): +def rrelu(input, lower=1. / 8, upper=1. / 3, training=False, inplace=False): return _functions.thnn.RReLU(lower, upper, training, inplace)(input) diff --git a/torch/nn/modules/__init__.py b/torch/nn/modules/__init__.py index 2703c03c34..059706a9d4 100644 --- a/torch/nn/modules/__init__.py +++ b/torch/nn/modules/__init__.py @@ -6,9 +6,9 @@ from .activation import Threshold, ReLU, Hardtanh, ReLU6, Sigmoid, Tanh, \ Softmax, Softmax2d, LogSoftmax, ELU, Hardshrink, LeakyReLU, LogSigmoid, \ Softplus, Softshrink, PReLU, Softsign, Softmin, Tanhshrink, RReLU from .loss import L1Loss, NLLLoss, KLDivLoss, MSELoss, BCELoss, NLLLoss2d, \ - CosineEmbeddingLoss, HingeEmbeddingLoss, MarginRankingLoss, \ - MultiLabelMarginLoss, MultiLabelSoftMarginLoss, MultiMarginLoss, \ - SmoothL1Loss, SoftMarginLoss, CrossEntropyLoss + CosineEmbeddingLoss, HingeEmbeddingLoss, MarginRankingLoss, \ + MultiLabelMarginLoss, MultiLabelSoftMarginLoss, MultiMarginLoss, \ + SmoothL1Loss, SoftMarginLoss, CrossEntropyLoss from .container import Container, Sequential from .pooling import AvgPool1d, AvgPool2d, AvgPool3d, MaxPool1d, MaxPool2d, MaxPool3d, \ MaxUnpool1d, MaxUnpool2d, MaxUnpool3d, FractionalMaxPool2d, LPPool2d @@ -18,6 +18,6 @@ from .padding import ReflectionPad2d, ReplicationPad2d, ReplicationPad3d from .normalization import CrossMapLRN2d from .sparse import Embedding from .rnn import RNNBase, RNN, LSTM, GRU, \ - RNNCell, LSTMCell, GRUCell + RNNCell, LSTMCell, GRUCell from .pixelshuffle import PixelShuffle from .upsampling import UpsamplingNearest2d, UpsamplingBillinear2d diff --git a/torch/nn/modules/activation.py b/torch/nn/modules/activation.py index 4c90bfd9ec..be3f17f7f8 100644 --- a/torch/nn/modules/activation.py +++ b/torch/nn/modules/activation.py @@ -9,7 +9,7 @@ class Threshold(Module): """Thresholds each element of the input Tensor Threshold is defined as:: - + y = x if x >= threshold value if x < threshold @@ -29,6 +29,7 @@ class Threshold(Module): >>> print(input) >>> print(m(input)) """ + def __init__(self, threshold, value, inplace=False): super(Threshold, self).__init__() self.threshold = threshold @@ -40,7 +41,7 @@ class Threshold(Module): return F.threshold(input, self.threshold, self.value, self.inplace) def __repr__(self): - inplace_str=', inplace' if self.inplace else '' + inplace_str = ', inplace' if self.inplace else '' return self.__class__.__name__ + ' (' \ + str(self.threshold) \ + ', ' + str(self.value) \ @@ -64,17 +65,19 @@ class ReLU(Threshold): >>> print(input) >>> print(m(input)) """ + def __init__(self, inplace=False): super(ReLU, self).__init__(0, 0, inplace) def __repr__(self): - inplace_str='inplace' if self.inplace else '' + inplace_str = 'inplace' if self.inplace else '' return self.__class__.__name__ + ' (' \ + inplace_str + ')' class RReLU(Module): - def __init__(self, lower=1./8, upper=1./3, inplace=False): + + def __init__(self, lower=1. / 8, upper=1. / 3, inplace=False): super(RReLU, self).__init__() self.lower = lower self.upper = upper @@ -84,7 +87,7 @@ class RReLU(Module): return F.rrelu(input, self.lower, self.upper, self.training, self.inplace) def __repr__(self): - inplace_str=', inplace' if self.inplace else '' + inplace_str = ', inplace' if self.inplace else '' return self.__class__.__name__ + ' (' \ + str(self.lower) \ + ', ' + str(self.upper) \ @@ -95,7 +98,7 @@ class Hardtanh(Module): """Applies the HardTanh function element-wise HardTanh is defined as:: - + f(x) = +1, if x > 1 f(x) = -1, if x < -1 f(x) = x, otherwise @@ -118,6 +121,7 @@ class Hardtanh(Module): >>> print(input) >>> print(m(input)) """ + def __init__(self, min_value=-1, max_value=1, inplace=False): super(Hardtanh, self).__init__() self.min_val = min_value @@ -129,12 +133,13 @@ class Hardtanh(Module): return F.hardtanh(input, self.min_val, self.max_val, self.inplace) def __repr__(self): - inplace_str=', inplace' if self.inplace else '' + inplace_str = ', inplace' if self.inplace else '' return self.__class__.__name__ + ' (' \ + 'min_val=' + str(self.min_val) \ + ', max_val=' + str(self.max_val) \ + inplace_str + ')' + class ReLU6(Hardtanh): """Applies the element-wise function :math:`{ReLU6}(x) = min(max(0,x), 6)` @@ -152,14 +157,16 @@ class ReLU6(Hardtanh): >>> print(input) >>> print(m(input)) """ + def __init__(self, inplace=False): super(ReLU6, self).__init__(0, 6, inplace) def __repr__(self): - inplace_str='inplace' if self.inplace else '' + inplace_str = 'inplace' if self.inplace else '' return self.__class__.__name__ + ' (' \ + inplace_str + ')' + class Sigmoid(Module): """Applies the element-wise function :math:`f(x) = 1 / ( 1 + exp(-x))` @@ -174,6 +181,7 @@ class Sigmoid(Module): >>> print(input) >>> print(m(input)) """ + def forward(self, input): return torch.sigmoid(input) @@ -181,7 +189,6 @@ class Sigmoid(Module): return self.__class__.__name__ + ' ()' - class Tanh(Module): """Applies element-wise, :math:`f(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))` @@ -196,12 +203,14 @@ class Tanh(Module): >>> print(input) >>> print(m(input)) """ + def forward(self, input): return torch.tanh(input) def __repr__(self): return self.__class__.__name__ + ' ()' + class ELU(Module): """Applies element-wise, :math:`f(x) = max(0,x) + min(0, alpha * (exp(x) - 1))` @@ -220,6 +229,7 @@ class ELU(Module): >>> print(input) >>> print(m(input)) """ + def __init__(self, alpha=1., inplace=False): super(ELU, self).__init__() self.alpha = alpha @@ -229,7 +239,7 @@ class ELU(Module): return F.elu(input, self.alpha, self.inplace) def __repr__(self): - inplace_str=', inplace' if self.inplace else '' + inplace_str = ', inplace' if self.inplace else '' return self.__class__.__name__ + ' (' \ + 'alpha=' + str(self.alpha) \ + inplace_str + ')' @@ -256,6 +266,7 @@ class Hardshrink(Module): >>> print(input) >>> print(m(input)) """ + def __init__(self, lambd=0.5): super(Hardshrink, self).__init__() self.lambd = lambd @@ -286,6 +297,7 @@ class LeakyReLU(Module): >>> print(input) >>> print(m(input)) """ + def __init__(self, negative_slope=1e-2, inplace=False): super(LeakyReLU, self).__init__() self.negative_slope = negative_slope @@ -295,11 +307,12 @@ class LeakyReLU(Module): return F.leaky_relu(input, self.negative_slope, self.inplace) def __repr__(self): - inplace_str=', inplace' if self.inplace else '' + inplace_str = ', inplace' if self.inplace else '' return self.__class__.__name__ + ' (' \ + str(self.negative_slope) \ + inplace_str + ')' + class LogSigmoid(Module): """Applies element-wise :math:`LogSigmoid(x) = log( 1 / (1 + exp(-x_i)))` @@ -314,12 +327,14 @@ class LogSigmoid(Module): >>> print(input) >>> print(m(input)) """ + def forward(self, input): return F.logsigmoid(input) def __repr__(self): return self.__class__.__name__ + ' ()' + class Softplus(Module): """Applies element-wise :math:`f(x) = 1/beta * log(1 + exp(beta * x_i))` @@ -344,6 +359,7 @@ class Softplus(Module): >>> print(input) >>> print(m(input)) """ + def __init__(self, beta=1, threshold=20): super(Softplus, self).__init__() self.beta = beta @@ -357,6 +373,7 @@ class Softplus(Module): + 'beta=' + str(self.beta) \ + ', threshold=' + str(self.threshold) + ')' + class Softshrink(Module): """Applies the soft shrinkage function elementwise @@ -379,6 +396,7 @@ class Softshrink(Module): >>> print(input) >>> print(m(input)) """ + def __init__(self, lambd=0.5): super(Softshrink, self).__init__() self.lambd = lambd @@ -398,7 +416,7 @@ class PReLU(Module): across all input channels. If called with nn.PReLU(nChannels), a separate "a" is used for each input channel. - + .. note:: weight decay should not be used when learning "a" for good performance. @@ -417,6 +435,7 @@ class PReLU(Module): >>> print(input) >>> print(m(input)) """ + def __init__(self, num_parameters=1, init=0.25): self.num_parameters = num_parameters super(PReLU, self).__init__() @@ -444,6 +463,7 @@ class Softsign(Module): >>> print(input) >>> print(m(input)) """ + def forward(self, input): return F.softsign(input) @@ -465,19 +485,21 @@ class Tanhshrink(Module): >>> print(input) >>> print(m(input)) """ + def forward(self, input): return F.tanhshrink(input) def __repr__(self): return self.__class__.__name__ + ' ()' + class Softmin(Module): """Applies the Softmin function to an n-dimensional input Tensor rescaling them so that the elements of the n-dimensional output Tensor lie in the range `(0, 1)` and sum to 1 :math:`f(x) = exp(-x_i - {shift}) / sum_j exp(-x_j - {shift})` - + where :math:`{shift} = max_i - x_i` Shape: @@ -487,7 +509,7 @@ class Softmin(Module): Returns: a Tensor of the same dimension and shape as the input, with values in the range [0, 1] - + Examples:: >>> m = nn.Softmin() @@ -495,12 +517,14 @@ class Softmin(Module): >>> print(input) >>> print(m(input)) """ + def forward(self, input): return F.softmin(input) def __repr__(self): return self.__class__.__name__ + ' ()' + class Softmax(Module): """Applies the Softmax function to an n-dimensional input Tensor rescaling them so that the elements of the n-dimensional output Tensor @@ -521,7 +545,7 @@ class Softmax(Module): This module doesn't work directly with NLLLoss, which expects the Log to be computed between the Softmax and itself. Use Logsoftmax instead (it's faster). - + Examples:: >>> m = nn.Softmax() @@ -529,6 +553,7 @@ class Softmax(Module): >>> print(input) >>> print(m(input)) """ + def forward(self, input): assert input.dim() == 2, 'Softmax requires a 2D tensor as input' return F.softmax(input) @@ -536,6 +561,7 @@ class Softmax(Module): def __repr__(self): return self.__class__.__name__ + ' ()' + class Softmax2d(Module): """Applies SoftMax over features to each spatial location @@ -550,7 +576,7 @@ class Softmax2d(Module): Returns: a Tensor of the same dimension and shape as the input with values in the range [0, 1] - + Examples:: >>> m = nn.Softmax2d() @@ -559,6 +585,7 @@ class Softmax2d(Module): >>> print(input) >>> print(m(input)) """ + def forward(self, input): assert input.dim() == 4, 'Softmax2d requires a 4D tensor as input' return F.softmax(input) @@ -566,10 +593,11 @@ class Softmax2d(Module): def __repr__(self): return self.__class__.__name__ + ' ()' + class LogSoftmax(Module): """Applies the Log(Softmax(x)) function to an n-dimensional input Tensor. The LogSoftmax formulation can be simplified as - + :math:`f_i(x) = log(1 / a * exp(x_i))` where :math:`a = sum_j exp(x_j)` Shape: @@ -587,9 +615,9 @@ class LogSoftmax(Module): >>> print(input) >>> print(m(input)) """ + def forward(self, input): return F.log_softmax(input) def __repr__(self): return self.__class__.__name__ + ' ()' - diff --git a/torch/nn/modules/batchnorm.py b/torch/nn/modules/batchnorm.py index 851489082b..268d8263d9 100644 --- a/torch/nn/modules/batchnorm.py +++ b/torch/nn/modules/batchnorm.py @@ -61,7 +61,7 @@ class BatchNorm1d(_BatchNorm): During training, this layer keeps a running estimate of its computed mean and variance. The running sum is kept with a default momentum of 0.1. - + During evaluation, this running mean/variance is used for normalization. Args: @@ -82,6 +82,7 @@ class BatchNorm1d(_BatchNorm): >>> input = autograd.Variable(torch.randn(20, 100)) >>> output = m(input) """ + def _check_input_dim(self, input): if input.dim() != 2 and input.dim() != 3: raise ValueError('expected 2D or 3D input (got {}D input)' @@ -102,7 +103,7 @@ class BatchNorm2d(_BatchNorm): During training, this layer keeps a running estimate of its computed mean and variance. The running sum is kept with a default momentum of 0.1. - + During evaluation, this running mean/variance is used for normalization. Args: @@ -123,6 +124,7 @@ class BatchNorm2d(_BatchNorm): >>> input = autograd.Variable(torch.randn(20, 100, 35, 45)) >>> output = m(input) """ + def _check_input_dim(self, input): if input.dim() != 4: raise ValueError('expected 4D input (got {}D input)' @@ -143,7 +145,7 @@ class BatchNorm3d(_BatchNorm): During training, this layer keeps a running estimate of its computed mean and variance. The running sum is kept with a default momentum of 0.1. - + During evaluation, this running mean/variance is used for normalization. Args: @@ -164,6 +166,7 @@ class BatchNorm3d(_BatchNorm): >>> input = autograd.Variable(torch.randn(20, 100, 35, 45, 10)) >>> output = m(input) """ + def _check_input_dim(self, input): if input.dim() != 5: raise ValueError('expected 5D input (got {}D input)' diff --git a/torch/nn/modules/container.py b/torch/nn/modules/container.py index 64a80bfa7c..5f2c4cc3a4 100644 --- a/torch/nn/modules/container.py +++ b/torch/nn/modules/container.py @@ -4,13 +4,14 @@ import torch import warnings from .module import Module + class Container(Module): def __init__(self, **kwargs): super(Container, self).__init__() # DeprecationWarning is ignored by default <sigh> warnings.warn("nn.Container is deprecated. All of it's functionality " - "is now implemented in nn.Module. Subclass that instead.") + "is now implemented in nn.Module. Subclass that instead.") for key, value in kwargs.items(): self.add_module(key, value) diff --git a/torch/nn/modules/conv.py b/torch/nn/modules/conv.py index 3cf8fe910b..8892ec77d1 100644 --- a/torch/nn/modules/conv.py +++ b/torch/nn/modules/conv.py @@ -7,6 +7,7 @@ from .utils import _single, _pair, _triple class _ConvNd(Module): + def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, transposed, output_padding, groups, bias): super(_ConvNd, self).__init__() @@ -328,6 +329,7 @@ class Conv3d(_ConvNd): class _ConvTransposeMixin(object): + def forward(self, input, output_size=None): output_padding = self._output_padding(input, output_size) func = self._backend.ConvNd( @@ -400,6 +402,7 @@ class ConvTranspose1d(_ConvTransposeMixin, _ConvNd): weight (Tensor): the learnable weights of the module of shape (in_channels, out_channels, kernel_size[0], kernel_size[1]) bias (Tensor): the learnable bias of the module of shape (out_channels) """ + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, output_padding=0, groups=1, bias=True): kernel_size = _single(kernel_size) diff --git a/torch/nn/modules/dropout.py b/torch/nn/modules/dropout.py index 904c1da5a9..f24e7e47ae 100644 --- a/torch/nn/modules/dropout.py +++ b/torch/nn/modules/dropout.py @@ -1,6 +1,7 @@ from .module import Module from .. import functional as F + class Dropout(Module): r"""Randomly zeroes some of the elements of the input tensor. The elements to zero are randomized on every forward call. @@ -19,11 +20,12 @@ class Dropout(Module): >>> input = autograd.Variable(torch.randn(20, 16)) >>> output = m(input) """ + def __init__(self, p=0.5, inplace=False): super(Dropout, self).__init__() if p < 0 or p > 1: raise ValueError("dropout probability has to be between 0 and 1, " - "but got {}".format(p)) + "but got {}".format(p)) self.p = p self.inplace = inplace @@ -70,11 +72,12 @@ class Dropout2d(Module): .. _Efficient Object Localization Using Convolutional Networks: http://arxiv.org/abs/1411.4280 """ + def __init__(self, p=0.5, inplace=False): super(Dropout2d, self).__init__() if p < 0 or p > 1: raise ValueError("dropout probability has to be between 0 and 1, " - "but got {}".format(p)) + "but got {}".format(p)) self.p = p self.inplace = inplace @@ -82,11 +85,12 @@ class Dropout2d(Module): return self._backend.Dropout2d(self.p, self.training, self.inplace)(input) def __repr__(self): - inplace_str=', inplace' if self.inplace else '' + inplace_str = ', inplace' if self.inplace else '' return self.__class__.__name__ + ' (' \ + 'p=' + str(self.p) \ + inplace_str + ')' + class Dropout3d(Module): r"""Randomly zeroes whole channels of the input tensor. The channels to zero are randomized on every forward call. @@ -120,11 +124,12 @@ class Dropout3d(Module): .. _Efficient Object Localization Using Convolutional Networks: http://arxiv.org/abs/1411.4280 """ + def __init__(self, p=0.5, inplace=False): super(Dropout3d, self).__init__() if p < 0 or p > 1: raise ValueError("dropout probability has to be between 0 and 1, " - "but got {}".format(p)) + "but got {}".format(p)) self.p = p self.inplace = inplace @@ -132,8 +137,7 @@ class Dropout3d(Module): return self._backend.Dropout3d(self.p, self.training, self.inplace)(input) def __repr__(self): - inplace_str=', inplace' if self.inplace else '' + inplace_str = ', inplace' if self.inplace else '' return self.__class__.__name__ + ' (' \ + 'p=' + str(self.p) \ + inplace_str + ')' - diff --git a/torch/nn/modules/linear.py b/torch/nn/modules/linear.py index e07be24b60..63aa420839 100644 --- a/torch/nn/modules/linear.py +++ b/torch/nn/modules/linear.py @@ -29,6 +29,7 @@ class Linear(Module): >>> output = m(input) >>> print(output.size()) """ + def __init__(self, in_features, out_features, bias=True): super(Linear, self).__init__() self.in_features = in_features @@ -41,7 +42,7 @@ class Linear(Module): self.reset_parameters() def reset_parameters(self): - stdv = 1./math.sqrt(self.weight.size(1)) + stdv = 1. / math.sqrt(self.weight.size(1)) self.weight.data.uniform_(-stdv, stdv) if self.bias is not None: self.bias.data.uniform_(-stdv, stdv) diff --git a/torch/nn/modules/loss.py b/torch/nn/modules/loss.py index fb2e581ab9..c676866b80 100644 --- a/torch/nn/modules/loss.py +++ b/torch/nn/modules/loss.py @@ -5,6 +5,7 @@ from .container import Sequential from .activation import LogSoftmax from .. import functional as F + def _assert_no_grad(variable): assert not variable.requires_grad, \ "nn criterions don't compute the gradient w.r.t. targets - please " \ @@ -52,7 +53,7 @@ class L1Loss(_Loss): class NLLLoss(_WeighedLoss): r"""The negative log likelihood loss. It is useful to train a classication problem with n classes - + If provided, the optional argument `weights` should be a 1D Tensor assigning weight to each of the classes. @@ -65,7 +66,7 @@ class NLLLoss(_WeighedLoss): adding a `LogSoftmax` layer in the last layer of your network. You may use `CrossEntropyLoss` instead, if you prefer not to add an extra layer. - + The target that this loss expects is a class index `(0 to N-1, where N = number of classes)` The loss can be described as:: @@ -201,14 +202,14 @@ class HingeEmbeddingLoss(_Loss): This is usually used for measuring whether two inputs are similar or dissimilar, e.g. using the L1 pairwise distance, and is typically used for learning nonlinear embeddings or semi-supervised learning:: - + { x_i, if y_i == 1 loss(x, y) = 1/n { { max(0, margin - x_i), if y_i == -1 - + `x` and `y` arbitrary shapes with a total of `n` elements each the sum operation still operates over all the elements, and divides by `n`. - + The division by `n` can be avoided if one sets the internal variable `sizeAverage=False`. The `margin` has a default value of `1`, or can be set in the constructor. @@ -221,9 +222,9 @@ class MultiLabelMarginLoss(_Loss): hinge loss (margin-based loss) between input `x` (a 2D mini-batch `Tensor`) and output `y` (which is a 2D `Tensor` of target class indices). For each sample in the mini-batch:: - + loss(x, y) = sum_ij(max(0, 1 - (x[y[j]] - x[i]))) / x.size(0) - + where `i == 0` to `x.size(0)`, `j == 0` to `y.size(0)`, `y[j] != 0`, and `i != y[j]` for all `i` and `j`. @@ -242,11 +243,11 @@ class SmoothL1Loss(_Loss): It is less sensitive to outliers than the `MSELoss` and in some cases prevents exploding gradients (e.g. see "Fast R-CNN" paper by Ross Girshick). Also known as the Huber loss:: - + { 0.5 * (x_i - y_i)^2, if |x_i - y_i| < 1 loss(x, y) = 1/n \sum { { |x_i - y_i| - 0.5, otherwise - + `x` and `y` arbitrary shapes with a total of `n` elements each the sum operation still operates over all the elements, and divides by `n`. @@ -260,11 +261,11 @@ class SoftMarginLoss(_Loss): r"""Creates a criterion that optimizes a two-class classification logistic loss between input `x` (a 2D mini-batch Tensor) and target `y` (which is a tensor containing either `1` or `-1`). - + :: - + loss(x, y) = sum_i (log(1 + exp(-y[i]*x[i]))) / x.nelement() - + The normalization by the number of elements in the input can be disabled by setting `self.sizeAverage` to `False`. """ @@ -287,7 +288,7 @@ class CrossEntropyLoss(_WeighedLoss): `target` for each value of a 1D tensor of size `n` The loss can be described as:: - + loss(x, class) = -log(exp(x[class]) / (\sum_j exp(x[j]))) = -x[class] + log(\sum_j exp(x[j])) @@ -302,25 +303,28 @@ class CrossEntropyLoss(_WeighedLoss): - Target: :math:`(N)` where each value is `0 <= targets[i] <= C-1` """ + def forward(self, input, target): _assert_no_grad(target) return F.cross_entropy(input, target, - self.weight, self.size_average) + self.weight, self.size_average) + class MultiLabelSoftMarginLoss(_WeighedLoss): r"""Creates a criterion that optimizes a multi-label one-versus-all loss based on max-entropy, between input `x` (a 2D mini-batch `Tensor`) and target `y` (a binary 2D `Tensor`). For each sample in the minibatch:: - + loss(x, y) = - sum_i (y[i] log( exp(x[i]) / (1 + exp(x[i]))) + (1-y[i]) log(1/(1+exp(x[i])))) / x:nElement() - + where `i == 0` to `x.nElement()-1`, `y[i] in {0,1}`. `y` and `x` must have the same size. """ + def forward(self, input, target): return F.binary_cross_entropy(torch.sigmoid(input), target, - self.weight, self.size_average) + self.weight, self.size_average) class CosineEmbeddingLoss(Module): @@ -334,16 +338,17 @@ class CosineEmbeddingLoss(Module): If `margin` is missing, the default value is `0`. The loss function for each sample is:: - + { 1 - cos(x1, x2), if y == 1 loss(x, y) = { { max(0, cos(x1, x2) - margin), if y == -1 - + If the internal variable `sizeAverage` is equal to `True`, the loss function averages the loss over the batch samples; if `sizeAverage` is `False`, then the loss function sums over the batch samples. By default, `sizeAverage = True`. """ + def __init__(self, margin=0, size_average=True): super(CosineEmbeddingLoss, self).__init__() self.margin = margin @@ -351,7 +356,7 @@ class CosineEmbeddingLoss(Module): def forward(self, input1, input2, target): return self._backend.CosineEmbeddingLoss(self.margin, - self.size_average)(input1, input2, target) + self.size_average)(input1, input2, target) class MarginRankingLoss(Module): @@ -363,14 +368,15 @@ class MarginRankingLoss(Module): (have a larger value) than the second input, and vice-versa for `y == -1`. The loss function for each sample in the mini-batch is:: - + loss(x, y) = max(0, -y * (x1 - x2) + margin) - + if the internal variable `sizeAverage = True`, the loss function averages the loss over the batch samples; if `sizeAverage = False`, then the loss function sums over the batch samples. By default, `sizeAverage` equals to `True`. """ + def __init__(self, margin=0, size_average=True): super(MarginRankingLoss, self).__init__() self.margin = margin @@ -378,7 +384,7 @@ class MarginRankingLoss(Module): def forward(self, input1, input2, target): return self._backend.MarginRankingLoss(self.margin, - self.size_average)(input1, input2, target) + self.size_average)(input1, input2, target) class MultiMarginLoss(Module): @@ -401,6 +407,7 @@ class MultiMarginLoss(Module): However, if the field `sizeAverage` is set to `False`, the losses are instead summed. """ + def __init__(self, p=1, margin=1, weight=None, size_average=True): super(MultiMarginLoss, self).__init__() if p != 1 and p != 2: @@ -413,7 +420,7 @@ class MultiMarginLoss(Module): def forward(self, input, target): return self._backend.MultiMarginLoss(self.size_average, self.p, - self.margin, weight=self.weight)(input, target) + self.margin, weight=self.weight)(input, target) # TODO: L1HingeEmbeddingCriterion diff --git a/torch/nn/modules/module.py b/torch/nn/modules/module.py index ef085ad80c..737fd625ab 100644 --- a/torch/nn/modules/module.py +++ b/torch/nn/modules/module.py @@ -60,7 +60,7 @@ class Module(object): if not isinstance(param, Parameter): if isinstance(param, Variable): raise TypeError("can't use a Variable as a module " - "parameter. Convert it to torch.nn.Parameter first.") + "parameter. Convert it to torch.nn.Parameter first.") if param is not None: param = Parameter(param) self._parameters[name] = param @@ -398,6 +398,6 @@ class Module(object): for key, module in self._modules.items(): modstr = module.__repr__() modstr = _addindent(modstr, 2) - tmpstr = tmpstr + ' (' + key + '): ' + modstr + '\n' + tmpstr = tmpstr + ' (' + key + '): ' + modstr + '\n' tmpstr = tmpstr + ')' return tmpstr diff --git a/torch/nn/modules/normalization.py b/torch/nn/modules/normalization.py index 74e29ce599..e2bb96c1c6 100644 --- a/torch/nn/modules/normalization.py +++ b/torch/nn/modules/normalization.py @@ -2,6 +2,7 @@ from .module import Module class CrossMapLRN2d(Module): + def __init__(self, size, alpha=1e-4, beta=0.75, k=1): super(CrossMapLRN2d, self).__init__() self.size = size diff --git a/torch/nn/modules/padding.py b/torch/nn/modules/padding.py index 31127481a4..e02c3fd05f 100644 --- a/torch/nn/modules/padding.py +++ b/torch/nn/modules/padding.py @@ -2,6 +2,8 @@ from .module import Module from .utils import _quadruple, _ntuple # TODO: grad_output size asserts in THNN + + class ReflectionPad2d(Module): def __init__(self, padding): @@ -14,6 +16,7 @@ class ReflectionPad2d(Module): def __repr__(self): return self.__class__.__name__ + ' ' + str(self.padding) + class ReplicationPad2d(Module): def __init__(self, padding): @@ -26,6 +29,7 @@ class ReplicationPad2d(Module): def __repr__(self): return self.__class__.__name__ + ' ' + str(self.padding) + class ReplicationPad3d(Module): def __init__(self, padding): @@ -39,4 +43,3 @@ class ReplicationPad3d(Module): return self.__class__.__name__ + ' ' + str(self.padding) # TODO: ZeroPad2d - diff --git a/torch/nn/modules/pixelshuffle.py b/torch/nn/modules/pixelshuffle.py index 67f7ac13ab..cfc6df2e5b 100644 --- a/torch/nn/modules/pixelshuffle.py +++ b/torch/nn/modules/pixelshuffle.py @@ -5,7 +5,7 @@ from .. import functional as F class PixelShuffle(Module): r"""Rearranges elements in a Tensor of shape :math:`(*, C * r^2, H, W]` to a tensor of shape :math:`(C, H * r, W * r)`. - + This is useful for implementing efficient sub-pixel convolution with a stride of :math:`1/r`. diff --git a/torch/nn/modules/pooling.py b/torch/nn/modules/pooling.py index aae6ab0989..26fbba916c 100644 --- a/torch/nn/modules/pooling.py +++ b/torch/nn/modules/pooling.py @@ -48,6 +48,7 @@ class MaxPool1d(Module): .. _link: https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md """ + def __init__(self, kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False): super(MaxPool1d, self).__init__() @@ -60,8 +61,8 @@ class MaxPool1d(Module): def forward(self, input): return F.max_pool1d(input, self.kernel_size, self.stride, - self.padding, self.dilation, self.ceil_mode, - self.return_indices) + self.padding, self.dilation, self.ceil_mode, + self.return_indices) def __repr__(self): return self.__class__.__name__ + ' (' \ @@ -71,6 +72,7 @@ class MaxPool1d(Module): + ', dilation=' + str(self.dilation) \ + ', ceil_mode=' + str(self.ceil_mode) + ')' + class MaxPool2d(Module): r"""Applies a 2D max pooling over an input signal composed of several input planes. @@ -124,6 +126,7 @@ class MaxPool2d(Module): .. _link: https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md """ + def __init__(self, kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False): super(MaxPool2d, self).__init__() @@ -136,17 +139,17 @@ class MaxPool2d(Module): def forward(self, input): return F.max_pool2d(input, self.kernel_size, self.stride, - self.padding, self.dilation, self.ceil_mode, - self.return_indices) + self.padding, self.dilation, self.ceil_mode, + self.return_indices) def __repr__(self): kh, kw = _pair(self.kernel_size) dh, dw = _pair(self.stride) padh, padw = _pair(self.padding) dilh, dilw = _pair(self.dilation) - padding_str=', padding=(' + str(padh) + ', ' + str(padw) + ')' \ - if padh != 0 and padw !=0 else '' - dilation_str=(', dilation=(' + str(dilh) + ', ' + str(dilw) + ')' \ + padding_str = ', padding=(' + str(padh) + ', ' + str(padw) + ')' \ + if padh != 0 and padw != 0 else '' + dilation_str = (', dilation=(' + str(dilh) + ', ' + str(dilw) + ')' if dilh != 0 and dilw != 0 else '') return self.__class__.__name__ + ' (' \ + 'size=(' + str(kh) + ', ' + str(kw) + ')' \ @@ -185,6 +188,7 @@ class MaxUnpool1d(Module): 0 2 0 4 0 6 0 8 [torch.FloatTensor of size 1x1x8] """ + def __init__(self, kernel_size, stride=None, padding=0): super(MaxUnpool1d, self).__init__() self.kernel_size = _single(kernel_size) @@ -234,6 +238,7 @@ class MaxUnpool2d(Module): 0 14 0 16 [torch.FloatTensor of size 1x1x4x4] """ + def __init__(self, kernel_size, stride=None, padding=0): super(MaxUnpool2d, self).__init__() self.kernel_size = _pair(kernel_size) @@ -276,6 +281,7 @@ class MaxUnpool3d(Module): >>> unpooled_output.size() torch.Size([20, 16, 51, 33, 15]) """ + def __init__(self, kernel_size, stride=None, padding=0): super(MaxUnpool3d, self).__init__() self.kernel_size = _triple(kernel_size) @@ -330,6 +336,7 @@ class AvgPool1d(Module): 2 4 6 [torch.FloatTensor of size 1x1x3] """ + def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True): super(AvgPool1d, self).__init__() @@ -391,8 +398,9 @@ class AvgPool2d(Module): >>> input = autograd.Variable(torch.randn(20, 16, 50, 32)) >>> output = m(input) """ + def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False, - count_include_pad=True): + count_include_pad=True): super(AvgPool2d, self).__init__() self.kernel_size = kernel_size self.stride = stride or kernel_size @@ -402,7 +410,7 @@ class AvgPool2d(Module): def forward(self, input): return F.avg_pool2d(input, self.kernel_size, self.stride, - self.padding, self.ceil_mode, self.count_include_pad) + self.padding, self.ceil_mode, self.count_include_pad) class MaxPool3d(Module): @@ -459,8 +467,9 @@ class MaxPool3d(Module): .. _link: https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md """ + def __init__(self, kernel_size, stride=None, padding=0, dilation=1, - return_indices=False, ceil_mode=False): + return_indices=False, ceil_mode=False): super(MaxPool3d, self).__init__() self.kernel_size = kernel_size self.stride = stride or kernel_size @@ -471,8 +480,9 @@ class MaxPool3d(Module): def forward(self, input): return F.max_pool3d(input, self.kernel_size, self.stride, - self.padding, self.dilation, self.ceil_mode, - self.return_indices) + self.padding, self.dilation, self.ceil_mode, + self.return_indices) + class AvgPool3d(Module): r"""Applies a 3D average pooling over an input signal composed of several input @@ -515,6 +525,7 @@ class AvgPool3d(Module): >>> input = autograd.Variable(torch.randn(20, 16, 50,44, 31)) >>> output = m(input) """ + def __init__(self, kernel_size, stride=None): super(AvgPool3d, self).__init__() self.kernel_size = kernel_size @@ -554,8 +565,9 @@ class FractionalMaxPool2d(Module): .. _Fractional MaxPooling: http://arxiv.org/abs/1412.6071 """ + def __init__(self, kernel_size, output_size=None, output_ratio=None, - return_indices=False, _random_samples=None): + return_indices=False, _random_samples=None): super(FractionalMaxPool2d, self).__init__() self.kh, self.kw = _pair(kernel_size) self.return_indices = return_indices @@ -572,7 +584,7 @@ class FractionalMaxPool2d(Module): assert 0 < self.rw < 1 else: raise ValueError("FractionalMaxPool2d requires specifying either " - "an output size, or a pooling ratio") + "an output size, or a pooling ratio") def forward(self, input): kwargs = {} @@ -581,8 +593,8 @@ class FractionalMaxPool2d(Module): else: kwargs['output_ratio'] = self.rh, self.rw func = self._backend.FractionalMaxPool2d(self.kw, self.kh, - return_indices=self.return_indices, - _random_samples=self._random_samples, **kwargs) + return_indices=self.return_indices, + _random_samples=self._random_samples, **kwargs) return func(input) @@ -632,7 +644,7 @@ class LPPool2d(Module): def forward(self, input): return F.lp_pool2d(input, self.norm_type, self.kernel_size, - self.stride, self.ceil_mode) + self.stride, self.ceil_mode) # TODO: AdaptiveMaxPool2d diff --git a/torch/nn/modules/rnn.py b/torch/nn/modules/rnn.py index b67edb49db..78ddb0c6f0 100644 --- a/torch/nn/modules/rnn.py +++ b/torch/nn/modules/rnn.py @@ -206,6 +206,7 @@ class LSTM(RNNBase): >>> c0 = Variable(torch.randn(2, 3, 20)) >>> output, hn = rnn(input, (h0, c0)) """ + def __init__(self, *args, **kwargs): super(LSTM, self).__init__('LSTM', *args, **kwargs) @@ -403,11 +404,11 @@ class LSTMCell(RNNCellBase): self.input_size = input_size self.hidden_size = hidden_size self.bias = bias - self.weight_ih = Parameter(torch.Tensor(4*hidden_size, input_size)) - self.weight_hh = Parameter(torch.Tensor(4*hidden_size, hidden_size)) + self.weight_ih = Parameter(torch.Tensor(4 * hidden_size, input_size)) + self.weight_hh = Parameter(torch.Tensor(4 * hidden_size, hidden_size)) if bias: - self.bias_ih = Parameter(torch.Tensor(4*hidden_size)) - self.bias_hh = Parameter(torch.Tensor(4*hidden_size)) + self.bias_ih = Parameter(torch.Tensor(4 * hidden_size)) + self.bias_hh = Parameter(torch.Tensor(4 * hidden_size)) else: self.register_parameter('bias_ih', None) self.register_parameter('bias_hh', None) @@ -428,7 +429,7 @@ class LSTMCell(RNNCellBase): class GRUCell(RNNCellBase): r"""A gated recurrent unit (GRU) cell - + .. math:: \begin{array}{ll} @@ -472,11 +473,11 @@ class GRUCell(RNNCellBase): self.input_size = input_size self.hidden_size = hidden_size self.bias = bias - self.weight_ih = Parameter(torch.Tensor(3*hidden_size, input_size)) - self.weight_hh = Parameter(torch.Tensor(3*hidden_size, hidden_size)) + self.weight_ih = Parameter(torch.Tensor(3 * hidden_size, input_size)) + self.weight_hh = Parameter(torch.Tensor(3 * hidden_size, hidden_size)) if bias: - self.bias_ih = Parameter(torch.Tensor(3*hidden_size)) - self.bias_hh = Parameter(torch.Tensor(3*hidden_size)) + self.bias_ih = Parameter(torch.Tensor(3 * hidden_size)) + self.bias_hh = Parameter(torch.Tensor(3 * hidden_size)) else: self.register_parameter('bias_ih', None) self.register_parameter('bias_hh', None) diff --git a/torch/nn/modules/sparse.py b/torch/nn/modules/sparse.py index f56ced86c3..6af3c90616 100644 --- a/torch/nn/modules/sparse.py +++ b/torch/nn/modules/sparse.py @@ -62,6 +62,7 @@ class Embedding(Module): [torch.FloatTensor of size 1x4x3] """ + def __init__(self, num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2, scale_grad_by_freq=False, sparse=False): @@ -89,7 +90,7 @@ class Embedding(Module): return self._backend.Embedding( padding_idx, self.max_norm, self.norm_type, self.scale_grad_by_freq, self.sparse - )(input, self.weight) + )(input, self.weight) def __repr__(self): s = '{name}({num_embeddings}, {embedding_dim}' diff --git a/torch/nn/modules/upsampling.py b/torch/nn/modules/upsampling.py index 95bb0c8857..96ec32fd9e 100644 --- a/torch/nn/modules/upsampling.py +++ b/torch/nn/modules/upsampling.py @@ -6,6 +6,7 @@ from .utils import _pair class _UpsamplingBase(Module): + def __init__(self, size=None, scale_factor=None): super(_UpsamplingBase, self).__init__() if size is None and scale_factor is None: @@ -17,10 +18,12 @@ class _UpsamplingBase(Module): class UpsamplingNearest2d(_UpsamplingBase): + def forward(self, input): return F.upsample_nearest(input, self.size, self.scale_factor) class UpsamplingBillinear2d(_UpsamplingBase): + def forward(self, input): return F.upsample_billinear(input, self.size, self.scale_factor) diff --git a/torch/nn/modules/utils.py b/torch/nn/modules/utils.py index feb6c56d98..c3dc5126e6 100644 --- a/torch/nn/modules/utils.py +++ b/torch/nn/modules/utils.py @@ -1,6 +1,7 @@ import collections from itertools import repeat + def _ntuple(n): def parse(x): if isinstance(x, collections.Iterable): diff --git a/torch/nn/parallel/_functions.py b/torch/nn/parallel/_functions.py index 5c40af2b81..c815c08eae 100644 --- a/torch/nn/parallel/_functions.py +++ b/torch/nn/parallel/_functions.py @@ -33,7 +33,7 @@ class Gather(Function): def backward(self, grad_output): return comm.scatter(grad_output, self.input_gpus, self.input_sizes, - self.dim) + self.dim) class Scatter(Function): diff --git a/torch/nn/parallel/parallel_apply.py b/torch/nn/parallel/parallel_apply.py index fc601cef74..32df2b1ff5 100644 --- a/torch/nn/parallel/parallel_apply.py +++ b/torch/nn/parallel/parallel_apply.py @@ -32,7 +32,7 @@ def parallel_apply(modules, inputs): threads = [threading.Thread(target=_worker, args=(module, input, results, lock)) - for module, input in zip(modules, inputs)] + for module, input in zip(modules, inputs)] for thread in threads: thread.start() @@ -45,4 +45,3 @@ def parallel_apply(modules, inputs): raise output outputs.append(output) return outputs - diff --git a/torch/optim/adadelta.py b/torch/optim/adadelta.py index 0a555f2b70..f1d7b3f809 100644 --- a/torch/optim/adadelta.py +++ b/torch/optim/adadelta.py @@ -1,5 +1,6 @@ from .optimizer import Optimizer + class Adadelta(Optimizer): """Implements Adadelta algorithm. @@ -61,4 +62,3 @@ class Adadelta(Optimizer): acc_delta.mul_(rho).addcmul_(1 - rho, delta, delta) return loss - diff --git a/torch/optim/adagrad.py b/torch/optim/adagrad.py index d273f113a5..9d7448c4c1 100644 --- a/torch/optim/adagrad.py +++ b/torch/optim/adagrad.py @@ -1,5 +1,6 @@ from .optimizer import Optimizer + class Adagrad(Optimizer): """Implements Adagrad algorithm. @@ -53,4 +54,3 @@ class Adagrad(Optimizer): p.data.addcdiv_(-clr, grad, std) return loss - diff --git a/torch/optim/adam.py b/torch/optim/adam.py index 5b77e42c78..38756edc73 100644 --- a/torch/optim/adam.py +++ b/torch/optim/adam.py @@ -1,6 +1,7 @@ import math from .optimizer import Optimizer + class Adam(Optimizer): """Implements Adam algorithm. @@ -21,9 +22,9 @@ class Adam(Optimizer): """ def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, - weight_decay=0): + weight_decay=0): defaults = dict(lr=lr, betas=betas, eps=eps, - weight_decay=weight_decay) + weight_decay=weight_decay) super(Adam, self).__init__(params, defaults) def step(self, closure=None): @@ -71,4 +72,3 @@ class Adam(Optimizer): p.data.addcdiv_(-step_size, exp_avg, denom) return loss - diff --git a/torch/optim/adamax.py b/torch/optim/adamax.py index d939f8729d..93997be24a 100644 --- a/torch/optim/adamax.py +++ b/torch/optim/adamax.py @@ -1,6 +1,7 @@ import torch from .optimizer import Optimizer + class Adamax(Optimizer): """Implements Adamax algorithm (a variant of Adam based on infinity norm). @@ -21,7 +22,7 @@ class Adamax(Optimizer): """ def __init__(self, params, lr=1e-2, betas=(0.9, 0.999), eps=1e-38, - weight_decay=0): + weight_decay=0): defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay) super(Adamax, self).__init__(params, defaults) @@ -71,5 +72,3 @@ class Adamax(Optimizer): p.data.addcdiv_(-clr, exp_avg, exp_inf) return loss - - diff --git a/torch/optim/asgd.py b/torch/optim/asgd.py index 37fe336c8a..0abdd6a80f 100644 --- a/torch/optim/asgd.py +++ b/torch/optim/asgd.py @@ -22,7 +22,7 @@ class ASGD(Optimizer): def __init__(self, params, lr=1e-2, lambd=1e-4, alpha=0.75, t0=1e6, weight_decay=0): defaults = dict(lr=lr, lambd=lambd, alpha=alpha, t0=t0, - weight_decay=weight_decay) + weight_decay=weight_decay) super(ASGD, self).__init__(params, defaults) def step(self, closure=None): @@ -67,8 +67,7 @@ class ASGD(Optimizer): # update eta and mu state['eta'] = (group['lr'] / - math.pow((1 + group['lambd'] * group['lr'] * state['step']), group['alpha'])) + math.pow((1 + group['lambd'] * group['lr'] * state['step']), group['alpha'])) state['mu'] = 1 / max(1, state['step'] - group['t0']) return loss - diff --git a/torch/optim/lbfgs.py b/torch/optim/lbfgs.py index 6971d8edaf..36bd4d29e8 100644 --- a/torch/optim/lbfgs.py +++ b/torch/optim/lbfgs.py @@ -33,8 +33,8 @@ class LBFGS(Optimizer): """ def __init__(self, params, lr=1, max_iter=20, max_eval=None, - tolerance_grad=1e-5, tolerance_change=1e-9, history_size=100, - line_search_fn=None): + tolerance_grad=1e-5, tolerance_change=1e-9, history_size=100, + line_search_fn=None): if max_eval is None: max_eval = max_iter * 5 // 4 defaults = dict(lr=lr, max_iter=max_iter, max_eval=max_eval, @@ -44,7 +44,7 @@ class LBFGS(Optimizer): if len(self.param_groups) != 1: raise ValueError("LBFGS doesn't support per-parameter options " - "(parameter groups)") + "(parameter groups)") self._params = self.param_groups[0]['params'] self._numel_cache = None @@ -56,13 +56,13 @@ class LBFGS(Optimizer): def _gather_flat_grad(self): return torch.cat( - tuple(param.grad.data.view(-1) for param in self._params), 0) + tuple(param.grad.data.view(-1) for param in self._params), 0) def _add_grad(self, step_size, update): offset = 0 for p in self._params: numel = p.numel() - p.data.add_(step_size, update[offset:offset+numel]) + p.data.add_(step_size, update[offset:offset + numel]) offset += numel assert offset == self._numel() @@ -158,7 +158,7 @@ class LBFGS(Optimizer): # iteration in L-BFGS loop collapsed to use just one buffer q = flat_grad.neg() - for i in range(num_old-1, -1, -1): + for i in range(num_old - 1, -1, -1): al[i] = old_dirs[i].dot(q) * ro[i] q.add_(-al[i], old_stps[i]) diff --git a/torch/optim/optimizer.py b/torch/optim/optimizer.py index 6eaf68e9d4..599143a6d9 100644 --- a/torch/optim/optimizer.py +++ b/torch/optim/optimizer.py @@ -21,8 +21,8 @@ class Optimizer(object): def __init__(self, params, defaults): if isinstance(params, Variable) or torch.is_tensor(params): raise TypeError("params argument given to the optimizer should be " - "an iterable of Variables or dicts, but got " + - torch.typename(params)) + "an iterable of Variables or dicts, but got " + + torch.typename(params)) self.state = defaultdict(dict) self.param_groups = list(params) @@ -37,15 +37,15 @@ class Optimizer(object): group_set = set(group['params']) if not param_set.isdisjoint(group_set): raise ValueError("some parameters appear in more than one " - "parameter group") + "parameter group") param_set.update(group_set) for name, default in defaults.items(): for i, group in enumerate(self.param_groups): if default is required and name not in group: raise ValueError("parameter group " + str(i) + " didn't " - "specify a value of required optimization parameter " - + name) + "specify a value of required optimization parameter " + + name) else: group.setdefault(name, default) @@ -53,10 +53,10 @@ class Optimizer(object): for param in group['params']: if not isinstance(param, Variable): raise TypeError("optimizer can only optimize Variables, " - "but one of the params is " + torch.typename(param)) + "but one of the params is " + torch.typename(param)) if not param.requires_grad: raise ValueError("optimizing a parameter that doesn't " - "require gradients") + "require gradients") if param.creator is not None: raise ValueError("can't optimize a non-leaf Variable") @@ -104,17 +104,17 @@ class Optimizer(object): if len(groups) != len(saved_groups): raise ValueError("loaded state dict has a different number of " - "parameter groups") + "parameter groups") param_lens = (len(g['params']) for g in groups) saved_lens = (len(g['params']) for g in saved_groups) if any(p_len != s_len for p_len, s_len in zip(param_lens, saved_lens)): raise ValueError("loaded state dict contains a parameter group " - "that doesn't match the size of optimizer's group") + "that doesn't match the size of optimizer's group") # Update the state id_map = {old_id: p for old_id, p in - zip(chain(*(g['params'] for g in saved_groups)), - chain(*(g['params'] for g in groups)))} + zip(chain(*(g['params'] for g in saved_groups)), + chain(*(g['params'] for g in groups)))} self.state = {id_map.get(k, k): v for k, v in state_dict['state'].items()} # Update parameter groups, setting their 'params' value diff --git a/torch/optim/rmsprop.py b/torch/optim/rmsprop.py index 1b1af293f7..00742d1e98 100644 --- a/torch/optim/rmsprop.py +++ b/torch/optim/rmsprop.py @@ -1,5 +1,6 @@ from .optimizer import Optimizer + class RMSprop(Optimizer): """Implements RMSprop algorithm. @@ -52,6 +53,3 @@ class RMSprop(Optimizer): p.data.addcdiv_(-group['lr'], grad, avg) return loss - - - diff --git a/torch/optim/rprop.py b/torch/optim/rprop.py index 98c0939f47..93403d1f78 100644 --- a/torch/optim/rprop.py +++ b/torch/optim/rprop.py @@ -1,6 +1,7 @@ import math from .optimizer import Optimizer + class Rprop(Optimizer): """Implements the resilient backpropagation algorithm. @@ -65,4 +66,3 @@ class Rprop(Optimizer): state['prev'].copy_(grad) return loss - diff --git a/torch/serialization.py b/torch/serialization.py index 1724839a5d..e41c137427 100644 --- a/torch/serialization.py +++ b/torch/serialization.py @@ -83,7 +83,7 @@ def location_tag(storage): if location: return location raise RuntimeError("don't know how to determine data location of " + - torch.typename(storage)) + torch.typename(storage)) def default_restore_location(storage, location): @@ -92,7 +92,7 @@ def default_restore_location(storage, location): if result is not None: return result raise RuntimeError("don't know how to restore data location of " + - torch.typename(storage) + " (tagged with " + location + ")") + torch.typename(storage) + " (tagged with " + location + ")") def normalize_storage_type(storage_type): @@ -143,8 +143,8 @@ def _save(obj, f, pickle_module, pickle_protocol): source = inspect.getsource(obj) except (TypeError, IOError): warnings.warn("Couldn't retrieve source code for container of " - "type " + obj.__name__ + ". It won't be checked " - "for correctness upon loading.") + "type " + obj.__name__ + ". It won't be checked " + "for correctness upon loading.") return (obj, source_file, source) if torch.is_tensor(obj): serialized_tensors[obj._cdata] = obj @@ -165,7 +165,7 @@ def _save(obj, f, pickle_module, pickle_protocol): storage_id = None pickle_module.dump((key, storage_id, type(tensor)), f, - protocol=pickle_protocol) + protocol=pickle_protocol) f.flush() tensor._write_metadata(f) @@ -178,7 +178,7 @@ def _save(obj, f, pickle_module, pickle_protocol): if root is not storage: storage_views_roots[root._cdata] = root storage_views.append((storage._cdata, root._cdata, offset, - storage.size())) + storage.size())) for view_info in storage_views: del serialized_storages[view_info[0]] serialized_storages.update(storage_views_roots) @@ -188,7 +188,7 @@ def _save(obj, f, pickle_module, pickle_protocol): location = location_tag(storage) storage_type = normalize_storage_type(type(storage)) pickle_module.dump((key, location, storage_type), f, - protocol=pickle_protocol) + protocol=pickle_protocol) f.flush() storage._write_file(f) @@ -203,7 +203,7 @@ def _save(obj, f, pickle_module, pickle_protocol): sys_info = dict( protocol_version=1000, little_endian=sys.byteorder == 'little', - type_sizes = dict( + type_sizes=dict( short=SHORT_SIZE, int=INT_SIZE, long=LONG_SIZE, @@ -273,10 +273,10 @@ def _load(f, map_location, pickle_module): if container_type.dump_patches: file_name = container_type.__name__ + '.patch' diff = difflib.unified_diff( - current_source.split('\n'), - original_source.split('\n'), - source_file, - source_file, lineterm="") + current_source.split('\n'), + original_source.split('\n'), + source_file, + source_file, lineterm="") lines = '\n'.join(diff) try: with open(file_name, 'a+') as f: @@ -312,7 +312,7 @@ def _load(f, map_location, pickle_module): return deserialized_objects[int(saved_id)] with closing(tarfile.open(fileobj=f, mode='r:', format=tarfile.PAX_FORMAT)) as tar, \ - mkdtemp() as tmpdir: + mkdtemp() as tmpdir: tar.extract('storages', path=tmpdir) with open(os.path.join(tmpdir, 'storages'), 'rb', 0) as f: @@ -327,7 +327,7 @@ def _load(f, map_location, pickle_module): storage_views = pickle_module.load(f) for target_cdata, root_cdata, offset, size in storage_views: root = deserialized_objects[root_cdata] - deserialized_objects[target_cdata] = root[offset:offset+size] + deserialized_objects[target_cdata] = root[offset:offset + size] tar.extract('tensors', path=tmpdir) with open(os.path.join(tmpdir, 'tensors'), 'rb', 0) as f: diff --git a/torch/sparse/__init__.py b/torch/sparse/__init__.py index 3c5a50ed8a..b4c8bbe3fb 100644 --- a/torch/sparse/__init__.py +++ b/torch/sparse/__init__.py @@ -5,26 +5,46 @@ import sys _sparse_tensor_classes = set() + class DoubleTensor(_C.SparseDoubleTensorBase): + def is_signed(self): return True + + class FloatTensor(_C.SparseFloatTensorBase): + def is_signed(self): return True + + class LongTensor(_C.SparseLongTensorBase): + def is_signed(self): return True + + class IntTensor(_C.SparseIntTensorBase): + def is_signed(self): return True + + class ShortTensor(_C.SparseShortTensorBase): + def is_signed(self): return True + + class CharTensor(_C.SparseCharTensorBase): + def is_signed(self): # TODO return False + + class ByteTensor(_C.SparseByteTensorBase): + def is_signed(self): return False diff --git a/torch/tensor.py b/torch/tensor.py index 14147391df..787c5dc35f 100644 --- a/torch/tensor.py +++ b/torch/tensor.py @@ -343,7 +343,7 @@ class _TensorBase(object): xtensor = src.new().set_(src) xsize = list(xtensor.size()) - for i in _range(len(repeats)-src.dim()): + for i in _range(len(repeats) - src.dim()): xsize = [1] + xsize size = torch.Size([a * b for a, b in zip(xsize, repeats)]) @@ -351,8 +351,8 @@ class _TensorBase(object): result.resize_(size) urtensor = result.new(result) for i in _range(xtensor.dim()): - urtensor = urtensor.unfold(i,xtensor.size(i),xtensor.size(i)) - for i in _range(urtensor.dim()-xtensor.dim()): + urtensor = urtensor.unfold(i, xtensor.size(i), xtensor.size(i)) + for i in _range(urtensor.dim() - xtensor.dim()): xsize = [1] + xsize xtensor.resize_(torch.Size(xsize)) xxtensor = xtensor.expand_as(urtensor) @@ -391,7 +391,7 @@ class _TensorBase(object): return self.set_(self.storage(), self.storage_offset(), torch.Size(sizes), tuple(strides)) - #TODO: add tests for operators + # TODO: add tests for operators def __add__(self, other): return self.add(other) __radd__ = __add__ @@ -430,7 +430,7 @@ class _TensorBase(object): elif dim_self == 2 and dim_other == 2: return self.mm(other) raise ValueError("both arguments to __matmul__ need to be 1D or 2D, " - "but they are {}D and {}D".format(dim_self, dim_other)) + "but they are {}D and {}D".format(dim_self, dim_other)) def __pow__(self, other): return self.pow(other) diff --git a/torch/utils/__init__.py b/torch/utils/__init__.py index 8b13789179..e69de29bb2 100644 --- a/torch/utils/__init__.py +++ b/torch/utils/__init__.py @@ -1 +0,0 @@ - diff --git a/torch/utils/data/dataset.py b/torch/utils/data/dataset.py index bdf8d52915..cd9506d3cf 100644 --- a/torch/utils/data/dataset.py +++ b/torch/utils/data/dataset.py @@ -39,4 +39,3 @@ class TensorDataset(Dataset): def __len__(self): return self.data_tensor.size(0) - diff --git a/torch/utils/ffi/__init__.py b/torch/utils/ffi/__init__.py index 4c5b620e0e..78c48c8e0b 100644 --- a/torch/utils/ffi/__init__.py +++ b/torch/utils/ffi/__init__.py @@ -14,9 +14,9 @@ except ImportError: raise ImportError("torch.utils.ffi requires the cffi package") -if cffi.__version_info__ < (1,4,0): +if cffi.__version_info__ < (1, 4, 0): raise ImportError("torch.utils.ffi requires cffi version >= 1.4, but " - "got " + '.'.join(map(str, cffi.__version_info__))) + "got " + '.'.join(map(str, cffi.__version_info__))) def _generate_typedefs(): @@ -106,13 +106,13 @@ def _build_extension(ffi, cffi_wrapper_name, target_dir, verbose): def _make_python_wrapper(name, cffi_wrapper_name, target_dir): py_source = PY_MODULE_TEMPLATE.substitute(name=name, - cffi_wrapper_name=cffi_wrapper_name) + cffi_wrapper_name=cffi_wrapper_name) with open(os.path.join(target_dir, '__init__.py'), 'w') as f: f.write(py_source) def create_extension(name, headers, sources, verbose=True, with_cuda=False, - package=False, relative_to='.', **kwargs): + package=False, relative_to='.', **kwargs): """Creates and configures a cffi.FFI object, that builds PyTorch extension. Arguments: @@ -159,6 +159,7 @@ def create_extension(name, headers, sources, verbose=True, with_cuda=False, ffi.cdef(_typedefs + all_headers_source) _make_python_wrapper(name_suffix, '_' + name_suffix, target_dir) + def build(): _build_extension(ffi, cffi_wrapper_name, target_dir, verbose) ffi.build = build @@ -169,9 +170,9 @@ def _wrap_function(function, ffi): @wraps(function) def safe_call(*args, **kwargs): args = tuple(ffi.cast(_torch_to_cffi.get(type(arg), 'void') + '*', arg._cdata) - if torch.is_tensor(arg) or torch.is_storage(arg) - else arg - for arg in args) + if torch.is_tensor(arg) or torch.is_storage(arg) + else arg + for arg in args) args = (function,) + args result = torch._C._safe_call(*args, **kwargs) if isinstance(result, ffi.CData): @@ -183,4 +184,3 @@ def _wrap_function(function, ffi): return _cffi_to_torch[cname](cdata=cdata) return result return safe_call - diff --git a/torch/utils/hooks.py b/torch/utils/hooks.py index 0a09505825..8d9ab4890b 100644 --- a/torch/utils/hooks.py +++ b/torch/utils/hooks.py @@ -3,6 +3,7 @@ import weakref class RemovableHandle(object): """A handle which provides the capability to remove a hook.""" + def __init__(self, hooks_dict): self.hooks_dict_ref = weakref.ref(hooks_dict) diff --git a/torch/utils/model_zoo.py b/torch/utils/model_zoo.py index 791ed3ed74..ddc16b4b9a 100644 --- a/torch/utils/model_zoo.py +++ b/torch/utils/model_zoo.py @@ -92,6 +92,7 @@ def _download_url_to_file(url, dst, hash_prefix): if tqdm is None: # fake tqdm if it's not installed class tqdm(object): + def __init__(self, total): self.total = total self.n = 0 diff --git a/torch/utils/serialization/read_lua_file.py b/torch/utils/serialization/read_lua_file.py index 4d654a0072..1466d32e0b 100644 --- a/torch/utils/serialization/read_lua_file.py +++ b/torch/utils/serialization/read_lua_file.py @@ -130,6 +130,7 @@ def get_python_class(typename): def make_tensor_reader(typename): python_class = get_python_class(typename) + def read_tensor(reader, version): # source: # https://github.com/torch/torch7/blob/master/generic/Tensor.c#L1243 @@ -156,6 +157,7 @@ def make_storage_reader(typename): python_class = get_python_class(typename) # TODO: be smarter about this element_size = python_class().element_size() + def read_storage(reader, version): # source: # https://github.com/torch/torch7/blob/master/generic/Storage.c#L244 @@ -185,6 +187,7 @@ register_torch_class('Tensor', make_tensor_reader) # Reader function for tds.Vector and tds.Hash ################################################################################ + def tds_Vec_reader(reader, version): length = reader.read_long() return [reader.read() for i in range(length)] @@ -207,6 +210,7 @@ reader_registry['tds.Hash'] = tds_Hash_reader # Reader function for nn modules ################################################################################ + def _load_backend(obj): if hasattr(obj, '_type'): obj._backend = type2backend[obj._type] @@ -221,6 +225,7 @@ def _load_backend(obj): pass # Monkey patch the forward to capture the type of input updateOutput_orig = obj.updateOutput + def updateOutput_patch(*args): input = args[0] while not torch.is_tensor(input): @@ -242,13 +247,14 @@ def nn_reader(cls): reader_registry.update({('nn.' + name): nn_reader(module) - for name, module in nn.__dict__.items() - if name[0] != '_' and name[0].upper() == name[0]}) + for name, module in nn.__dict__.items() + if name[0] != '_' and name[0].upper() == name[0]}) def custom_reader(cls): def reader_factory(fn): base = nn_reader(cls) + def wrapper(reader, version): obj = base(reader, version) fn(reader, version, obj) @@ -271,7 +277,7 @@ for prefix in ['', 'Spatial', 'Volumetric']: @custom_reader(nn.Transpose) def Transpose_reader(reader, version, obj): obj.permutations = list( - map(lambda swap: [swap[0]-1, swap[1]-1], obj.permutations)) + map(lambda swap: [swap[0] - 1, swap[1] - 1], obj.permutations)) @custom_reader(nn.SpatialDivisiveNormalization) @@ -299,6 +305,7 @@ def registry_addon(fn): def wrapper_factory(module_name, *args, **kwargs): module_name = 'nn.' + module_name build_fn = reader_registry[module_name] + def wrapper(reader, version): obj = build_fn(reader, version) fn(obj, *args, **kwargs) @@ -306,6 +313,7 @@ def registry_addon(fn): reader_registry[module_name] = wrapper return wrapper_factory + @registry_addon def attr_map(obj, attribute_map): for src, dst in attribute_map.items(): @@ -521,9 +529,9 @@ class T7Reader: if self.unknown_classes: return TorchObject(cls_name, self.read()) raise T7ReaderException(("don't know how to deserialize Lua class " - "{}. If you want to ignore this error and load this object " - "as a dict, specify unknown_classes=True in reader's " - "constructor").format(cls_name)) + "{}. If you want to ignore this error and load this object " + "as a dict, specify unknown_classes=True in reader's " + "constructor").format(cls_name)) def _can_be_list(self, table): def is_natural(key): @@ -546,7 +554,7 @@ class T7Reader: v = self.read() table[k] = v if self.list_heuristic and self._can_be_list(table): - return [table[i] for i in range(1, len(table)+1)] + return [table[i] for i in range(1, len(table) + 1)] return table def read(self): @@ -569,7 +577,7 @@ class T7Reader: return self.read_table() else: raise T7ReaderException("unknown type id {}. The file may be " - "corrupted.".format(typeidx)) + "corrupted.".format(typeidx)) def load_lua(filename, **kwargs): @@ -580,4 +588,3 @@ def load_lua(filename, **kwargs): with open(filename, 'rb') as f: reader = T7Reader(f, **kwargs) return reader.read() - diff --git a/torch/utils/trainer/plugins/__init__.py b/torch/utils/trainer/plugins/__init__.py index 4258e05ebe..e8d10f48ae 100644 --- a/torch/utils/trainer/plugins/__init__.py +++ b/torch/utils/trainer/plugins/__init__.py @@ -3,4 +3,3 @@ from .accuracy import AccuracyMonitor from .time import TimeMonitor from .loss import LossMonitor from .logger import Logger - diff --git a/torch/utils/trainer/plugins/accuracy.py b/torch/utils/trainer/plugins/accuracy.py index c1431e0817..f6f393c16d 100644 --- a/torch/utils/trainer/plugins/accuracy.py +++ b/torch/utils/trainer/plugins/accuracy.py @@ -1,5 +1,6 @@ from .monitor import Monitor + class AccuracyMonitor(Monitor): stat_name = 'accuracy' @@ -16,4 +17,3 @@ class AccuracyMonitor(Monitor): correct = correct.cpu() correct = correct.sum() return 100. * correct / batch_size - diff --git a/torch/utils/trainer/plugins/logger.py b/torch/utils/trainer/plugins/logger.py index f132a11e54..9bc2dfc6a4 100644 --- a/torch/utils/trainer/plugins/logger.py +++ b/torch/utils/trainer/plugins/logger.py @@ -1,6 +1,7 @@ from collections import defaultdict from .plugin import Plugin + class Logger(Plugin): alignment = 4 separator = '#' * 80 @@ -58,7 +59,7 @@ class Logger(Plugin): for f in field: parent, stat = stat, stat[f] name, output = self._gather_outputs(field, log_fields, - parent, stat, require_dict) + parent, stat, require_dict) if not output: continue self._align_output(field_idx, output) @@ -77,7 +78,6 @@ class Logger(Plugin): def epoch(self, epoch_idx): self._log_all('log_epoch_fields', - prefix=self.separator + '\nEpoch summary:', - suffix=self.separator, - require_dict=True) - + prefix=self.separator + '\nEpoch summary:', + suffix=self.separator, + require_dict=True) diff --git a/torch/utils/trainer/plugins/loss.py b/torch/utils/trainer/plugins/loss.py index 320158c856..eea44ca81f 100644 --- a/torch/utils/trainer/plugins/loss.py +++ b/torch/utils/trainer/plugins/loss.py @@ -1,8 +1,8 @@ from .monitor import Monitor + class LossMonitor(Monitor): stat_name = 'loss' def _get_value(self, iteration, input, target, output, loss): return loss[0] - diff --git a/torch/utils/trainer/plugins/monitor.py b/torch/utils/trainer/plugins/monitor.py index 80bdf37e75..cb8da2e6e7 100644 --- a/torch/utils/trainer/plugins/monitor.py +++ b/torch/utils/trainer/plugins/monitor.py @@ -41,7 +41,7 @@ class Monitor(Plugin): if self.with_epoch_average: stats['epoch_stats'] = tuple(sum(t) for t in - zip(stats['epoch_stats'], (stats['last'], 1))) + zip(stats['epoch_stats'], (stats['last'], 1))) if self.with_running_average: previous_avg = stats.get('running_avg', 0) @@ -54,4 +54,3 @@ class Monitor(Plugin): epoch_stats = stats['epoch_stats'] stats['epoch_mean'] = epoch_stats[0] / epoch_stats[1] stats['epoch_stats'] = (0, 0) - diff --git a/torch/utils/trainer/plugins/plugin.py b/torch/utils/trainer/plugins/plugin.py index 145c6b93d1..e1ac25101f 100644 --- a/torch/utils/trainer/plugins/plugin.py +++ b/torch/utils/trainer/plugins/plugin.py @@ -8,4 +8,3 @@ class Plugin(object): def register(self, trainer): raise NotImplementedError - diff --git a/torch/utils/trainer/plugins/progress.py b/torch/utils/trainer/plugins/progress.py index 582f087eb5..06a3c3f92d 100644 --- a/torch/utils/trainer/plugins/progress.py +++ b/torch/utils/trainer/plugins/progress.py @@ -26,4 +26,3 @@ class ProgressMonitor(Plugin): stats = self.trainer.stats.setdefault(self.stat_name, {}) stats['samples_used'] = 0 stats['percent'] = 0 - diff --git a/torch/utils/trainer/plugins/time.py b/torch/utils/trainer/plugins/time.py index 8b79fa3e82..ffdc1988d5 100644 --- a/torch/utils/trainer/plugins/time.py +++ b/torch/utils/trainer/plugins/time.py @@ -22,4 +22,3 @@ class TimeMonitor(Monitor): else: self.last_time = time.time() return 0 - diff --git a/torch/utils/trainer/trainer.py b/torch/utils/trainer/trainer.py index d5157b7b06..9cdf5643c5 100644 --- a/torch/utils/trainer/trainer.py +++ b/torch/utils/trainer/trainer.py @@ -58,6 +58,7 @@ class Trainer(object): target_var = Variable(batch_target) plugin_data = [None, None] + def closure(): batch_output = self.model(input_var) loss = self.criterion(batch_output, target_var) @@ -70,7 +71,7 @@ class Trainer(object): self.optimizer.zero_grad() self.optimizer.step(closure) self.call_plugins('iteration', i, batch_input, batch_target, - *plugin_data) + *plugin_data) self.call_plugins('update', i, self.model) self.iterations += i |