summaryrefslogtreecommitdiff
path: root/test/test_grabber.py
diff options
context:
space:
mode:
Diffstat (limited to 'test/test_grabber.py')
-rwxr-xr-x[-rw-r--r--]test/test_grabber.py327
1 files changed, 172 insertions, 155 deletions
diff --git a/test/test_grabber.py b/test/test_grabber.py
index eecdbcf..465e5f5 100644..100755
--- a/test/test_grabber.py
+++ b/test/test_grabber.py
@@ -11,23 +11,37 @@
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
+# License along with this library; if not, write to the
+# Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330,
# Boston, MA 02111-1307 USA
# This file is part of urlgrabber, a high-level cross-protocol url-grabber
# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
-"""grabber.py tests"""
+from __future__ import print_function
-# $Id: test_grabber.py,v 1.31 2006/12/08 00:14:16 mstenner Exp $
+"""grabber.py tests"""
import sys
import os
-import string, tempfile, random, cStringIO, os
-import urllib2
+import tempfile, random, os
import socket
+from io import BytesIO
+from six import string_types
+
+if sys.version_info >= (3,):
+ # We do an explicit version check here because because python2
+ # also has an io module with StringIO, but it is incompatible,
+ # and returns str instead of unicode somewhere.
+ from io import StringIO
+else:
+ from cStringIO import StringIO
+
+try:
+ from urllib.request import urlopen, OpenerDirector
+except ImportError:
+ from urllib2 import urlopen, OpenerDirector
from base_test_code import *
@@ -38,15 +52,14 @@ from urlgrabber.grabber import URLGrabber, URLGrabError, CallbackObject, \
from urlgrabber.progress import text_progress_meter
class FileObjectTests(TestCase):
-
+
def setUp(self):
self.filename = tempfile.mktemp()
- fo = file(self.filename, 'wb')
- fo.write(reference_data)
- fo.close()
+ with open(self.filename, 'wb') as fo:
+ fo.write(reference_data)
- self.fo_input = cStringIO.StringIO(reference_data)
- self.fo_output = cStringIO.StringIO()
+ self.fo_input = BytesIO(reference_data)
+ self.fo_output = BytesIO()
(url, parts) = grabber.default_grabber.opts.urlparser.parse(
self.filename, grabber.default_grabber.opts)
self.wrapper = grabber.PyCurlFileObject(
@@ -60,41 +73,39 @@ class FileObjectTests(TestCase):
"PYCurlFileObject .read() method"
s = self.wrapper.read()
self.fo_output.write(s)
- self.assert_(reference_data == self.fo_output.getvalue())
+ self.assertTrue(reference_data == self.fo_output.getvalue())
def test_readline(self):
"PyCurlFileObject .readline() method"
- while 1:
+ while True:
s = self.wrapper.readline()
self.fo_output.write(s)
if not s: break
- self.assert_(reference_data == self.fo_output.getvalue())
+ self.assertTrue(reference_data == self.fo_output.getvalue())
def test_readlines(self):
"PyCurlFileObject .readlines() method"
li = self.wrapper.readlines()
- self.fo_output.write(string.join(li, ''))
- self.assert_(reference_data == self.fo_output.getvalue())
+ self.fo_output.write(b''.join(li))
+ self.assertTrue(reference_data == self.fo_output.getvalue())
def test_smallread(self):
"PyCurlFileObject .read(N) with small N"
- while 1:
+ while True:
s = self.wrapper.read(23)
self.fo_output.write(s)
if not s: break
- self.assert_(reference_data == self.fo_output.getvalue())
-
+ self.assertTrue(reference_data == self.fo_output.getvalue())
+
class HTTPTests(TestCase):
def test_reference_file(self):
- "download refernce file via HTTP"
+ "download reference file via HTTP"
filename = tempfile.mktemp()
grabber.urlgrab(ref_http, filename)
- fo = file(filename, 'rb')
- contents = fo.read()
- fo.close()
+ contents = open(filename, 'rb').read()
- self.assert_(contents == reference_data)
+ self.assertTrue(contents == reference_data)
def test_post(self):
"do an HTTP post"
@@ -109,46 +120,46 @@ class URLGrabberModuleTestCase(TestCase):
"""Test module level functions defined in grabber.py"""
def setUp(self):
pass
-
+
def tearDown(self):
pass
-
+
def test_urlopen(self):
"module-level urlopen() function"
fo = urlgrabber.urlopen('http://www.python.org')
fo.close()
-
+
def test_urlgrab(self):
"module-level urlgrab() function"
outfile = tempfile.mktemp()
- filename = urlgrabber.urlgrab('http://www.python.org',
+ filename = urlgrabber.urlgrab('http://www.python.org',
filename=outfile)
os.unlink(outfile)
-
+
def test_urlread(self):
"module-level urlread() function"
s = urlgrabber.urlread('http://www.python.org')
-
+
class URLGrabberTestCase(TestCase):
"""Test grabber.URLGrabber class"""
-
+
def setUp(self):
-
- self.meter = text_progress_meter( fo=cStringIO.StringIO() )
+
+ self.meter = text_progress_meter( fo=StringIO() )
pass
-
+
def tearDown(self):
pass
-
+
def testKeywordArgs(self):
"""grabber.URLGrabber.__init__() **kwargs handling.
-
+
This is a simple test that just passes some arbitrary
values into the URLGrabber constructor and checks that
they've been set properly.
"""
- opener = urllib2.OpenerDirector()
+ opener = OpenerDirector()
g = URLGrabber( progress_obj=self.meter,
throttle=0.9,
bandwidth=20,
@@ -160,78 +171,81 @@ class URLGrabberTestCase(TestCase):
proxies={'http' : 'http://www.proxy.com:9090'},
opener=opener )
opts = g.opts
- self.assertEquals( opts.progress_obj, self.meter )
- self.assertEquals( opts.throttle, 0.9 )
- self.assertEquals( opts.bandwidth, 20 )
- self.assertEquals( opts.retry, 20 )
- self.assertEquals( opts.retrycodes, [5,6,7] )
- self.assertEquals( opts.copy_local, 1 )
- self.assertEquals( opts.close_connection, 1 )
- self.assertEquals( opts.user_agent, 'test ua/1.0' )
- self.assertEquals( opts.proxies, {'http' : 'http://www.proxy.com:9090'} )
- self.assertEquals( opts.opener, opener )
-
- nopts = grabber.URLGrabberOptions(delegate=opts, throttle=0.5,
+ self.assertEqual( opts.progress_obj, self.meter )
+ self.assertEqual( opts.throttle, 0.9 )
+ self.assertEqual( opts.bandwidth, 20 )
+ self.assertEqual( opts.retry, 20 )
+ self.assertEqual( opts.retrycodes, [5,6,7] )
+ self.assertEqual( opts.copy_local, 1 )
+ self.assertEqual( opts.close_connection, 1 )
+ self.assertEqual( opts.user_agent, 'test ua/1.0' )
+ self.assertEqual( opts.proxies, {'http' : 'http://www.proxy.com:9090'} )
+ self.assertEqual( opts.opener, opener )
+
+ nopts = grabber.URLGrabberOptions(delegate=opts, throttle=0.5,
copy_local=0)
- self.assertEquals( nopts.progress_obj, self.meter )
- self.assertEquals( nopts.throttle, 0.5 )
- self.assertEquals( nopts.bandwidth, 20 )
- self.assertEquals( nopts.retry, 20 )
- self.assertEquals( nopts.retrycodes, [5,6,7] )
- self.assertEquals( nopts.copy_local, 0 )
- self.assertEquals( nopts.close_connection, 1 )
- self.assertEquals( nopts.user_agent, 'test ua/1.0' )
- self.assertEquals( nopts.proxies, {'http' : 'http://www.proxy.com:9090'} )
+ self.assertEqual( nopts.progress_obj, self.meter )
+ self.assertEqual( nopts.throttle, 0.5 )
+ self.assertEqual( nopts.bandwidth, 20 )
+ self.assertEqual( nopts.retry, 20 )
+ self.assertEqual( nopts.retrycodes, [5,6,7] )
+ self.assertEqual( nopts.copy_local, 0 )
+ self.assertEqual( nopts.close_connection, 1 )
+ self.assertEqual( nopts.user_agent, 'test ua/1.0' )
+ self.assertEqual( nopts.proxies, {'http' : 'http://www.proxy.com:9090'} )
nopts.opener = None
- self.assertEquals( nopts.opener, None )
-
+ self.assertEqual( nopts.opener, None )
+
def test_make_callback(self):
"""grabber.URLGrabber._make_callback() tests"""
def cb(e): pass
tup_cb = (cb, ('stuff'), {'some': 'dict'})
g = URLGrabber()
- self.assertEquals(g._make_callback(cb), (cb, (), {}))
- self.assertEquals(g._make_callback(tup_cb), tup_cb)
+ self.assertEqual(g._make_callback(cb), (cb, (), {}))
+ self.assertEqual(g._make_callback(tup_cb), tup_cb)
class URLParserTestCase(TestCase):
def setUp(self):
pass
-
+
def tearDown(self):
pass
def test_parse_url_with_prefix(self):
"""grabber.URLParser.parse() with opts.prefix"""
- base = 'http://foo.com/dir'
- bases = [base, base+'/']
- filename = 'bar/baz'
- target = base + '/' + filename
-
+ base = b'http://foo.com/dir'
+ bases = [base, base + b'/']
+ filename = b'bar/baz'
+ target = base + b'/' + filename
+
for b in bases:
g = URLGrabber(prefix=b)
(url, parts) = g.opts.urlparser.parse(filename, g.opts)
- self.assertEquals(url, target)
+ self.assertEqual(url, target)
def _test_url(self, urllist):
g = URLGrabber()
try: quote = urllist[3]
except IndexError: quote = None
g.opts.quote = quote
- (url, parts) = g.opts.urlparser.parse(urllist[0], g.opts)
-
+ url = urllist[0].encode('utf8')
+ expected_url = urllist[1].encode('utf8')
+ expected_parts = tuple(part.encode('utf8') for part in urllist[2])
+ (url, parts) = g.opts.urlparser.parse(url, g.opts)
+
if 1:
- self.assertEquals(url, urllist[1])
- self.assertEquals(parts, urllist[2])
+ self.assertEqual(url, expected_url)
+ self.assertEqual(parts, expected_parts)
else:
if url == urllist[1] and parts == urllist[2]:
- print 'OK: %s' % urllist[0]
+ print('OK: %s' % urllist[0])
else:
- print 'ERROR: %s' % urllist[0]
- print ' ' + urllist[1]
- print ' ' + url
- print ' ' + urllist[2]
- print ' ' + parts
-
+ print('ERROR: %s' % urllist[0])
+ print(' ' + urllist[1])
+ print(' ' + url)
+ print(' ' + urllist[2])
+ print(' ' + parts)
+
url_tests_all = (
['http://host.com/path/basename.ext?arg1=val1&arg2=val2#hash',
@@ -251,13 +265,13 @@ class URLParserTestCase(TestCase):
'http://host.com/Should%2520Not',
('http', 'host.com', '/Should%2520Not', '', '', ''), 1],
)
-
+
url_tests_posix = (
['/etc/passwd',
'file:///etc/passwd',
('file', '', '/etc/passwd', '', '', '')],
)
-
+
url_tests_nt = (
[r'\\foo.com\path\file.ext',
'file://foo.com/path/file.ext',
@@ -295,7 +309,7 @@ class FailureTestCase(TestCase):
self.obj = obj
self.args = args
self.kwargs = kwargs
-
+
def test_failure_callback_called(self):
"failure callback is called on retry"
self.failure_callback_called = 0
@@ -303,7 +317,7 @@ class FailureTestCase(TestCase):
failure_callback=self._failure_callback)
try: g.urlgrab(ref_404)
except URLGrabError: pass
- self.assertEquals(self.failure_callback_called, 1)
+ self.assertEqual(self.failure_callback_called, 1)
def test_failure_callback_args(self):
"failure callback is called with the proper args"
@@ -312,14 +326,17 @@ class FailureTestCase(TestCase):
failure_callback=fc)
try: g.urlgrab(ref_404)
except URLGrabError: pass
- self.assert_(hasattr(self, 'obj'))
- self.assert_(hasattr(self, 'args'))
- self.assert_(hasattr(self, 'kwargs'))
- self.assertEquals(self.args, ('foo',))
- self.assertEquals(self.kwargs, {'bar': 'baz'})
- self.assert_(isinstance(self.obj, CallbackObject))
- self.assertEquals(self.obj.url, ref_404)
- self.assert_(isinstance(self.obj.exception, URLGrabError))
+ self.assertTrue(hasattr(self, 'obj'))
+ self.assertTrue(hasattr(self, 'args'))
+ self.assertTrue(hasattr(self, 'kwargs'))
+ self.assertEqual(self.args, ('foo',))
+ self.assertEqual(self.kwargs, {'bar': 'baz'})
+ self.assertTrue(isinstance(self.obj, CallbackObject))
+ url = self.obj.url
+ if not isinstance(url, string_types):
+ url = url.decode('utf8')
+ self.assertEqual(url, ref_404)
+ self.assertTrue(isinstance(self.obj.exception, URLGrabError))
del self.obj
class InterruptTestCase(TestCase):
@@ -339,7 +356,7 @@ class InterruptTestCase(TestCase):
self.kwargs = kwargs
if kwargs.get('exception', None):
raise kwargs['exception']
-
+
def test_interrupt_callback_called(self):
"interrupt callback is called on retry"
self.interrupt_callback_called = 0
@@ -348,7 +365,7 @@ class InterruptTestCase(TestCase):
interrupt_callback=ic)
try: g.urlgrab(ref_http)
except KeyboardInterrupt: pass
- self.assertEquals(self.interrupt_callback_called, 1)
+ self.assertEqual(self.interrupt_callback_called, 1)
def test_interrupt_callback_raises(self):
"interrupt callback raises an exception"
@@ -366,12 +383,12 @@ class CheckfuncTestCase(TestCase):
self.g = grabber.URLGrabber(checkfunc=cf)
self.filename = tempfile.mktemp()
self.data = short_reference_data
-
+
def tearDown(self):
try: os.unlink(self.filename)
except: pass
if hasattr(self, 'obj'): del self.obj
-
+
def _checkfunc(self, obj, *args, **kwargs):
self.obj = obj
self.args = args
@@ -379,37 +396,38 @@ class CheckfuncTestCase(TestCase):
if hasattr(obj, 'filename'):
# we used urlgrab
- fo = file(obj.filename)
- data = fo.read()
- fo.close()
+ data = open(obj.filename, 'rb').read()
else:
# we used urlread
data = obj.data
if data == self.data: return
else: raise URLGrabError(-2, "data doesn't match")
-
+
def _check_common_args(self):
"check the args that are common to both urlgrab and urlread"
- self.assert_(hasattr(self, 'obj'))
- self.assert_(hasattr(self, 'args'))
- self.assert_(hasattr(self, 'kwargs'))
- self.assertEquals(self.args, ('foo',))
- self.assertEquals(self.kwargs, {'bar': 'baz'})
- self.assert_(isinstance(self.obj, CallbackObject))
- self.assertEquals(self.obj.url, short_ref_http)
+ self.assertTrue(hasattr(self, 'obj'))
+ self.assertTrue(hasattr(self, 'args'))
+ self.assertTrue(hasattr(self, 'kwargs'))
+ self.assertEqual(self.args, ('foo',))
+ self.assertEqual(self.kwargs, {'bar': 'baz'})
+ self.assertTrue(isinstance(self.obj, CallbackObject))
+ url = self.obj.url
+ if not isinstance(url, string_types):
+ url = url.decode()
+ self.assertEqual(url, short_ref_http)
def test_checkfunc_urlgrab_args(self):
"check for proper args when used with urlgrab"
self.g.urlgrab(short_ref_http, self.filename)
self._check_common_args()
- self.assertEquals(self.obj.filename, self.filename)
+ self.assertEqual(self.obj.filename, self.filename)
def test_checkfunc_urlread_args(self):
"check for proper args when used with urlread"
self.g.urlread(short_ref_http)
self._check_common_args()
- self.assertEquals(self.obj.data, short_reference_data)
+ self.assertEqual(self.obj.data, short_reference_data)
def test_checkfunc_urlgrab_success(self):
"check success with urlgrab checkfunc"
@@ -425,20 +443,20 @@ class CheckfuncTestCase(TestCase):
"check failure with urlgrab checkfunc"
self.data = 'other data'
self.assertRaises(URLGrabError, self.g.urlgrab,
- short_ref_http, self.filename)
+ ref_404, self.filename)
def test_checkfunc_urlread_failure(self):
"check failure with urlread checkfunc"
self.data = 'other data'
self.assertRaises(URLGrabError, self.g.urlread,
- short_ref_http)
+ ref_404)
class RegetTestBase:
def setUp(self):
self.ref = short_reference_data
self.grabber = grabber.URLGrabber(reget='check_timestamp')
self.filename = tempfile.mktemp()
- self.hl = len(self.ref) / 2
+ self.hl = len(self.ref) // 2
self.url = 'OVERRIDE THIS'
def tearDown(self):
@@ -446,16 +464,13 @@ class RegetTestBase:
except: pass
def _make_half_zero_file(self):
- fo = file(self.filename, 'wb')
- fo.write('0'*self.hl)
- fo.close()
+ with open(self.filename, 'wb') as fo:
+ fo.write(b'0' * self.hl)
def _read_file(self):
- fo = file(self.filename, 'rb')
- data = fo.read()
- fo.close()
+ data = open(self.filename, 'rb').read()
return data
-
+
class CommonRegetTests(RegetTestBase, TestCase):
def test_bad_reget_type(self):
"exception raised for illegal reget mode"
@@ -469,7 +484,7 @@ class FTPRegetTests(RegetTestBase, TestCase):
# this tests to see if the server is available. If it's not,
# then these tests will be skipped
try:
- fo = urllib2.urlopen(self.url).close()
+ fo = urlopen(self.url).close()
except IOError:
self.skip()
@@ -479,52 +494,55 @@ class FTPRegetTests(RegetTestBase, TestCase):
self.grabber.urlgrab(self.url, self.filename, reget='simple')
data = self._read_file()
- self.assertEquals(data[:self.hl], '0'*self.hl)
- self.assertEquals(data[self.hl:], self.ref[self.hl:])
+ self.assertEqual(data[:self.hl], b'0'*self.hl)
+ self.assertEqual(data[self.hl:], self.ref[self.hl:])
class HTTPRegetTests(FTPRegetTests):
def setUp(self):
RegetTestBase.setUp(self)
self.url = short_ref_http
-
+
def test_older_check_timestamp(self):
+ # define this here rather than in the FTP tests because currently,
+ # we get no timestamp information back from ftp servers.
+ self._make_half_zero_file()
+ ts = 1600000000 # set local timestamp to 2020
+ os.utime(self.filename, (ts, ts))
+
try:
- # define this here rather than in the FTP tests because currently,
- # we get no timestamp information back from ftp servers.
- self._make_half_zero_file()
- ts = 1600000000 # set local timestamp to 2020
- os.utime(self.filename, (ts, ts))
self.grabber.urlgrab(self.url, self.filename, reget='check_timestamp')
- data = self._read_file()
-
- self.assertEquals(data[:self.hl], '0'*self.hl)
- self.assertEquals(data[self.hl:], self.ref[self.hl:])
except NotImplementedError:
self.skip()
-
+
+ data = self._read_file()
+
+ self.assertEqual(data[:self.hl], b'0'*self.hl)
+ self.assertEqual(data[self.hl:], self.ref[self.hl:])
+
def test_newer_check_timestamp(self):
+ # define this here rather than in the FTP tests because currently,
+ # we get no timestamp information back from ftp servers.
+ self._make_half_zero_file()
+ ts = 1 # set local timestamp to 1969
+ os.utime(self.filename, (ts, ts))
+
try:
- # define this here rather than in the FTP tests because currently,
- # we get no timestamp information back from ftp servers.
- self._make_half_zero_file()
- ts = 1 # set local timestamp to 1969
- os.utime(self.filename, (ts, ts))
self.grabber.urlgrab(self.url, self.filename, reget='check_timestamp')
- data = self._read_file()
-
- self.assertEquals(data, self.ref)
- except:
+ except NotImplementedError:
self.skip()
-
+
+ data = self._read_file()
+
+ self.assertEqual(data, self.ref)
+
class FileRegetTests(HTTPRegetTests):
def setUp(self):
self.ref = short_reference_data
tmp = tempfile.mktemp()
- tmpfo = file(tmp, 'wb')
- tmpfo.write(self.ref)
- tmpfo.close()
+ with open(tmp, 'wb') as tmpfo:
+ tmpfo.write(self.ref)
self.tmp = tmp
-
+
(url, parts) = grabber.default_grabber.opts.urlparser.parse(
tmp, grabber.default_grabber.opts)
self.url = url
@@ -532,7 +550,7 @@ class FileRegetTests(HTTPRegetTests):
self.grabber = grabber.URLGrabber(reget='check_timestamp',
copy_local=1)
self.filename = tempfile.mktemp()
- self.hl = len(self.ref) / 2
+ self.hl = len(self.ref) // 2
def tearDown(self):
try: os.unlink(self.filename)
@@ -544,14 +562,14 @@ class ProFTPDSucksTests(TestCase):
def setUp(self):
self.url = ref_proftp
try:
- fo = urllib2.urlopen(self.url).close()
+ fo = urlopen(self.url).close()
except IOError:
self.skip()
def test_restart_workaround(self):
inst = grabber.URLGrabber()
rslt = inst.urlread(self.url, range=(500, 1000))
-
+
class BaseProxyTests(TestCase):
good_p = '%s://%s:%s@%s:%i' % (proxy_proto, proxy_user,
good_proxy_pass, proxy_host, proxy_port)
@@ -591,8 +609,8 @@ class ProxyFTPAuthTests(ProxyHTTPAuthTests):
if not self.have_proxy():
self.skip()
try:
- fo = urllib2.urlopen(self.url).close()
- except IOError:
+ fo = urlopen(self.url).close()
+ except URLError:
self.skip()
self.g = URLGrabber()
@@ -604,4 +622,3 @@ if __name__ == '__main__':
grabber.DEBUG = 0
runner = TextTestRunner(stream=sys.stdout,descriptions=1,verbosity=2)
runner.run(suite())
-