Port L-BFGS from Lua optim

author: Adam Paszke <adam.paszke@gmail.com> 2017-01-21 12:18:36 +0100
committer: Soumith Chintala <soumith@gmail.com> 2017-01-22 18:02:40 -0500
commit: f8ae34706e14fe7bb7826d9723dda4bb6a960a4a (patch)
tree: b90051e4229b73a69e9be6fd4c839f4417a938e2 /test/test_optim.py
parent: f8e89fbe1123f6788992b70361f13ad498665327 (diff)
download: pytorch-f8ae34706e14fe7bb7826d9723dda4bb6a960a4a.tar.gz
pytorch-f8ae34706e14fe7bb7826d9723dda4bb6a960a4a.tar.bz2
pytorch-f8ae34706e14fe7bb7826d9723dda4bb6a960a4a.zip
1 files changed, 33 insertions, 12 deletions
diff --git a/test/test_optim.py b/test/test_optim.py
index 6172887ac7..ddd33c9a58 100644
--- a/test/test_optim.py
+++ b/test/test_optim.py
@@ -36,12 +36,19 @@ class TestOptim(TestCase):
         initial_dist = params.data.dist(solution)
 
         def eval():
+            optimizer.zero_grad()
             loss = rosenbrock(params)
             loss.backward()
+            # loss.backward() will give **slightly** different
+            # gradients, than drosenbtock, because of a different ordering
+            # of floating point operations. In most cases it doesn't matter,
+            # but some optimizers are so sensitive that they can temporarily
+            # diverge up to 1e-4, just to converge again. This makes the
+            # comparison more stable.
+            params.grad.data.copy_(drosenbrock(params.data))
             return loss
 
         for i in range(2000):
-            optimizer.zero_grad()
             optimizer.step(eval)
             old_fn(lambda _: (rosenbrock(params_t), drosenbrock(params_t)),
                     params_t, state)
@@ -56,21 +63,21 @@ class TestOptim(TestCase):
         optimizer = constructor(weight, bias)
 
         def fn():
+            optimizer.zero_grad()
             y = weight.mv(input)
             if y.is_cuda and bias.is_cuda and y.get_device() != bias.get_device():
                 y = y.cuda(bias.get_device())
-            return (y + bias).abs().sum()
+            loss = (y + bias).pow(2).sum()
+            loss.backward()
+            return loss
 
         initial_value = fn().data[0]
         for i in range(200):
-            weight.grad.data.zero_()
-            bias.grad.data.zero_()
-            fn().backward()
-            optimizer.step()
+            optimizer.step(fn)
 
-        self.assertLessEqual(fn().data[0], initial_value)
+        self.assertLess(fn().data[0], initial_value)
 
-    def _test_basic_cases(self, constructor):
+    def _test_basic_cases(self, constructor, ignore_multidevice=False):
         self._test_basic_cases_template(
             torch.randn(10, 5),
             torch.randn(10),
@@ -94,12 +101,12 @@ class TestOptim(TestCase):
             constructor
         )
         # Multi-GPU
-        if not torch.cuda.device_count() > 1:
+        if not torch.cuda.device_count() > 1 or ignore_multidevice:
             return
         self._test_basic_cases_template(
-            torch.randn(10, 5).cuda(),
-            torch.randn(10).cuda(),
-            torch.randn(5).cuda(),
+            torch.randn(10, 5).cuda(0),
+            torch.randn(10).cuda(1),
+            torch.randn(5).cuda(0),
             constructor
         )
 
@@ -275,6 +282,20 @@ class TestOptim(TestCase):
                 lr=1e-3)
         )
 
+    def test_lbfgs(self):
+        self._test_rosenbrock(
+            lambda params: optim.LBFGS(params),
+            wrap_old_fn(old_optim.lbfgs)
+        )
+        self._test_rosenbrock(
+            lambda params: optim.LBFGS(params, lr=5e-2, max_iter=5),
+            wrap_old_fn(old_optim.lbfgs, learningRate=5e-2, maxIter=5)
+        )
+        self._test_basic_cases(
+            lambda weight, bias: optim.LBFGS([weight, bias]),
+            ignore_multidevice=True
+        )
+
     def test_invalid_param_type(self):
         with self.assertRaises(TypeError):
             optim.SGD(Variable(torch.randn(5, 5)), lr=3)
author	Adam Paszke <adam.paszke@gmail.com>	2017-01-21 12:18:36 +0100
committer	Soumith Chintala <soumith@gmail.com>	2017-01-22 18:02:40 -0500
commit	f8ae34706e14fe7bb7826d9723dda4bb6a960a4a (patch)
tree	b90051e4229b73a69e9be6fd4c839f4417a938e2 /test/test_optim.py
parent	f8e89fbe1123f6788992b70361f13ad498665327 (diff)
download	pytorch-f8ae34706e14fe7bb7826d9723dda4bb6a960a4a.tar.gz pytorch-f8ae34706e14fe7bb7826d9723dda4bb6a960a4a.tar.bz2 pytorch-f8ae34706e14fe7bb7826d9723dda4bb6a960a4a.zip