diff options
Diffstat (limited to 'test/test_mirror.py')
-rwxr-xr-x[-rw-r--r--] | test/test_mirror.py | 207 |
1 files changed, 163 insertions, 44 deletions
diff --git a/test/test_mirror.py b/test/test_mirror.py index 70fe069..a175977 100644..100755 --- a/test/test_mirror.py +++ b/test/test_mirror.py @@ -11,9 +11,9 @@ # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the -# Free Software Foundation, Inc., -# 59 Temple Place, Suite 330, +# License along with this library; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, # Boston, MA 02111-1307 USA # This file is part of urlgrabber, a high-level cross-protocol url-grabber @@ -21,14 +21,12 @@ """mirror.py tests""" -# $Id: test_mirror.py,v 1.12 2005/10/22 21:57:27 mstenner Exp $ - import sys import os -import string, tempfile, random, cStringIO, os +import tempfile, random, os import urlgrabber.grabber -from urlgrabber.grabber import URLGrabber, URLGrabError +from urlgrabber.grabber import URLGrabber, URLGrabError, URLGrabberOptions import urlgrabber.mirror from urlgrabber.mirror import MirrorGroup, MGRandomStart, MGRandomOrder @@ -53,9 +51,7 @@ class BasicTests(TestCase): url = 'short_reference' self.mg.urlgrab(url, filename) - fo = open(filename) - data = fo.read() - fo.close() + data = open(filename, 'rb').read() self.assertEqual(data, short_reference_data) @@ -87,9 +83,7 @@ class SubclassTests(TestCase): url = 'short_reference' self.mg.urlgrab(url, filename) - fo = open(filename) - data = fo.read() - fo.close() + data = open(filename, 'rb').read() self.assertEqual(data, short_reference_data) @@ -106,8 +100,11 @@ class CallbackTests(TestCase): self.g = URLGrabber() fullmirrors = [base_mirror_url + m + '/' for m in \ (bad_mirrors + good_mirrors)] + if hasattr(urlgrabber.grabber, '_TH'): + # test assumes mirrors are not re-ordered + urlgrabber.grabber._TH.hosts.clear() self.mg = MirrorGroup(self.g, fullmirrors) - + def test_failure_callback(self): "test that MG executes the failure callback correctly" tricky_list = [] @@ -115,9 +112,9 @@ class CallbackTests(TestCase): tl.append(str(cb_obj.exception)) self.mg.failure_callback = failure_callback, (tricky_list, ), {} data = self.mg.urlread('reference') - self.assert_(data == reference_data) - self.assertEquals(tricky_list[0][:25], - '[Errno 14] HTTP Error 403') + self.assertTrue(data == reference_data) + self.assertEqual(tricky_list[0][:25], + '[Errno 14] HTTP Error 404') def test_callback_reraise(self): "test that the callback can correctly re-raise the exception" @@ -152,10 +149,8 @@ class FailoverTests(TestCase): def cb(e, elist=elist): elist.append(e) self.mg.urlgrab(url, filename, failure_callback=cb) - fo = open(filename) - contents = fo.read() - fo.close() - + contents = open(filename, 'rb').read() + # first be sure that the first mirror failed and that the # callback was called self.assertEqual(len(elist), 1) @@ -168,7 +163,8 @@ class FakeGrabber: self.resultlist = resultlist or [] self.index = 0 self.calls = [] - + self.opts = URLGrabberOptions() + def urlgrab(self, url, filename=None, **kwargs): self.calls.append( (url, filename) ) res = self.resultlist[self.index] @@ -187,11 +183,11 @@ class ActionTests(TestCase): def tearDown(self): urlgrabber.mirror.DEBUG = self.db - + def test_defaults(self): 'test default action policy' self.mg.urlgrab('somefile') - expected_calls = [ (m + '/' + 'somefile', None) \ + expected_calls = [ (m.encode('utf8') + b'/somefile', None) for m in self.mirrors[:3] ] expected_logs = \ ['MIRROR: trying somefile -> a/somefile', @@ -203,15 +199,15 @@ class ActionTests(TestCase): 'GR mirrors: [c d e f] 0', 'MAIN mirrors: [a b c d e f] 2', 'MIRROR: trying somefile -> c/somefile'] - - self.assertEquals(self.g.calls, expected_calls) - self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs) - + + self.assertEqual(self.g.calls, expected_calls) + self.assertEqual(urlgrabber.mirror.DEBUG.logs, expected_logs) + def test_instance_action(self): 'test the effects of passed-in default_action' self.mg.default_action = {'remove_master': 1} self.mg.urlgrab('somefile') - expected_calls = [ (m + '/' + 'somefile', None) \ + expected_calls = [ (m.encode('utf8') + b'/somefile', None) for m in self.mirrors[:3] ] expected_logs = \ ['MIRROR: trying somefile -> a/somefile', @@ -223,14 +219,14 @@ class ActionTests(TestCase): 'GR mirrors: [c d e f] 0', 'MAIN mirrors: [c d e f] 0', 'MIRROR: trying somefile -> c/somefile'] - - self.assertEquals(self.g.calls, expected_calls) - self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs) - + + self.assertEqual(self.g.calls, expected_calls) + self.assertEqual(urlgrabber.mirror.DEBUG.logs, expected_logs) + def test_method_action(self): 'test the effects of method-level default_action' self.mg.urlgrab('somefile', default_action={'remove_master': 1}) - expected_calls = [ (m + '/' + 'somefile', None) \ + expected_calls = [ (m.encode('utf8') + b'/somefile', None) for m in self.mirrors[:3] ] expected_logs = \ ['MIRROR: trying somefile -> a/somefile', @@ -242,18 +238,18 @@ class ActionTests(TestCase): 'GR mirrors: [c d e f] 0', 'MAIN mirrors: [c d e f] 0', 'MIRROR: trying somefile -> c/somefile'] - - self.assertEquals(self.g.calls, expected_calls) - self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs) - + + self.assertEqual(self.g.calls, expected_calls) + self.assertEqual(urlgrabber.mirror.DEBUG.logs, expected_logs) + def callback(self, e): return {'fail': 1} - + def test_callback_action(self): 'test the effects of a callback-returned action' self.assertRaises(URLGrabError, self.mg.urlgrab, 'somefile', failure_callback=self.callback) - expected_calls = [ (m + '/' + 'somefile', None) \ + expected_calls = [ (m.encode('utf8') + b'/somefile', None) for m in self.mirrors[:1] ] expected_logs = \ ['MIRROR: trying somefile -> a/somefile', @@ -261,9 +257,133 @@ class ActionTests(TestCase): 'GR mirrors: [b c d e f] 0', 'MAIN mirrors: [a b c d e f] 1'] - self.assertEquals(self.g.calls, expected_calls) - self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs) - + self.assertEqual(self.g.calls, expected_calls) + self.assertEqual(urlgrabber.mirror.DEBUG.logs, expected_logs) + +import threading, socket + +class HttpReplyCode(TestCase): + def setUp(self): + # start the server + self.exit = False + self.process = lambda data: None + + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + s.bind(('localhost', 0)); s.listen(1) + self.port = s.getsockname()[1] + + def server(): + while True: + c, a = s.accept() + if self.exit: c.close(); break + data = b'' + while not data.endswith(b'\r\n\r\n'): + data = c.recv(4096) + self.process(data) + c.sendall(b'HTTP/1.1 %d %s\r\n' % self.reply) + if self.content is not None: + c.sendall(b'Content-Length: %d\r\n\r\n' % len(self.content)) + c.sendall(self.content) + c.close() + s.close() + self.exit = False + + self.thread = threading.Thread(target=server) + self.thread.start() + + # create grabber and mirror group objects + def failure(obj): + self.code = getattr(obj.exception, 'code', None) + return {} + self.g = URLGrabber() + self.mg = MirrorGroup(self.g, ['http://localhost:%d' % self.port], + failure_callback = failure) + + def tearDown(self): + # shut down the server + self.exit = True + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + s.connect(('localhost', self.port)) # wake it up + except ConnectionRefusedError: + # already gone? + pass + s.close() + self.thread.join() + + def test_grab(self): + 'tests the propagation of HTTP reply code' + self.reply = 503, b'Busy' + self.content = None + + # single + self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo') + self.assertEqual(self.code, 503); del self.code + + # multi + err = [] + self.mg.urlgrab('foo', async_=True, failfunc=err.append) + urlgrabber.grabber.parallel_wait() + self.assertEqual([e.exception.errno for e in err], [256]) + self.assertEqual(self.code, 503); del self.code + + def test_range(self): + 'test client-side processing of HTTP ranges' + # server does not process ranges + self.reply = 200, b'OK' + self.content = b'ABCDEF' + + # no range specified + data = self.mg.urlread('foo') + self.assertEqual(data, b'ABCDEF') + + data = self.mg.urlread('foo', range = (3, 5)) + self.assertEqual(data, b'DE') + + def test_retry_no_cache(self): + 'test bypassing proxy cache on failure' + def process(data): + if b'Pragma:no-cache' in data: + self.content = b'version2' + else: + self.content = b'version1' + + def checkfunc_read(obj): + if obj.data == b'version1': + raise URLGrabError(-1, 'Outdated version of foo') + elif obj.data != b'version2': + self.fail('Unexpected file content') + + def checkfunc_grab(obj): + with open('foo') as f: + data = f.read() + if data == 'version1': + raise URLGrabError(-1, 'Outdated version of foo') + elif data != 'version2': + self.fail('Unexpected file content') + + self.process = process + self.reply = 200, b'OK' + + opts = self.g.opts + opts.retry = 3 + opts.retry_no_cache = True + + # single + opts.checkfunc = checkfunc_read + try: + self.mg.urlread('foo') + except URLGrabError as e: + self.fail(str(e)) + + # multi + opts.checkfunc = checkfunc_grab + self.mg.urlgrab('foo', async_=True) + try: + urlgrabber.grabber.parallel_wait() + except URLGrabError as e: + self.fail(str(e)) def suite(): tl = TestLoader() @@ -272,4 +392,3 @@ def suite(): if __name__ == '__main__': runner = TextTestRunner(stream=sys.stdout,descriptions=1,verbosity=2) runner.run(suite()) - |