diff options
Diffstat (limited to 'scripts/urlgrabber')
-rwxr-xr-x[-rw-r--r--] | scripts/urlgrabber | 75 |
1 files changed, 43 insertions, 32 deletions
diff --git a/scripts/urlgrabber b/scripts/urlgrabber index 518e512..1b1e077 100644..100755 --- a/scripts/urlgrabber +++ b/scripts/urlgrabber @@ -19,6 +19,8 @@ # This file is part of urlgrabber, a high-level cross-protocol url-grabber # Copyright 2002-2006 Michael D. Stenner, Ryan Tomayko +from __future__ import print_function + """NAME urlgrabber - a simple client for the urlgrabber python package @@ -115,6 +117,7 @@ options: including quotes in the case of strings. e.g. --user_agent='"foobar/2.0"' + --output FILE -o FILE write output to FILE, otherwise the basename of the url will be used -O print the names of saved files to STDOUT @@ -130,8 +133,6 @@ options: --profile profile the actual fetching and print the results """ -# $Id: urlgrabber,v 1.7 2006/12/08 00:14:16 mstenner Exp $ - import sys import getopt import re @@ -170,12 +171,17 @@ class client_options: return ug_options, ug_defaults def process_command_line(self): - short_options = 'vd:hoOpD' + short_options = 'vd:ho:OpD' long_options = ['profile', 'repeat=', 'verbose=', - 'debug=', 'help', 'progress'] + 'debug=', 'help', 'progress', 'output='] ug_long = [ o + '=' for o in self.ug_options ] - optlist, args = getopt.getopt(sys.argv[1:], short_options, - long_options + ug_long) + try: + optlist, args = getopt.getopt(sys.argv[1:], short_options, + long_options + ug_long) + except getopt.GetoptError as e: + print("Error:", e, file=sys.stderr) + self.help([], ret=1) + self.verbose = 0 self.debug = None self.outputfile = None @@ -193,6 +199,7 @@ class client_options: if o == '--verbose': self.verbose = v if o == '-v': self.verbose += 1 if o == '-o': self.outputfile = v + if o == '--output': self.outputfile = v if o == '-p' or o == '--progress': self.progress = 1 if o == '-d' or o == '--debug': self.debug = v if o == '--profile': self.profile = 1 @@ -202,7 +209,7 @@ class client_options: self.repeat = int(v) if self.repeat < 1: raise ValueError() except ValueError: - print 'ERROR: repeat value must be an int >= 1' + print('ERROR: repeat value must be an int >= 1') sys.exit(1) if o == '-D': self.verbose = 3 @@ -211,20 +218,20 @@ class client_options: if o in ug_dash: try: val = eval(v) - except Exception, e: - print "error processing option value: %s" % v - print e + except Exception as e: + print("error processing option value: %s" % v) + print(e) sys.exit(1) else: self.ugops[o[2:]] = val if len(self.args) > 1 and self.outputfile is not None: - print "ERROR: cannot use -o when grabbing multiple files" + print("ERROR: cannot use -o when grabbing multiple files") sys.exit(1) - def help(self, args): + def help(self, args, ret=0): if not args: - print MAINHELP + print(MAINHELP) else: for a in args: m = getattr(self, 'help_'+a, None) @@ -233,20 +240,20 @@ class client_options: elif a in self.ug_options: self.help_ug_option(a) else: - print 'ERROR: no help on command "%s"' % a - sys.exit(0) + print('ERROR: no help on command "%s"' % a) + sys.exit(ret) def help_doc(self): - print __doc__ + print(__doc__) def help_options(self): width = max(map(len, self.ug_options)) format = ' %-' + str(width) + 's = %s' hformat = ' %-' + str(width) + 's %s' - print hformat % ('OPTION', 'DEFAULT') - print '-'*(width + 20) + print(hformat % ('OPTION', 'DEFAULT')) + print('-'*(width + 20)) for k in self.ug_options: - print format % (k, self.ug_defaults[k]) + print(format % (k, self.ug_defaults[k])) def help_all(self): for k in self.ug_options: @@ -257,21 +264,21 @@ class client_options: m = re.search(r'^( '+option+'.*?)\s*^ {,2}\S', urlgrabber.grabber.__doc__, re.M|re.S) if m: - print m.group(1) + print(m.group(1)) else: - print ' %s: no help found for this option' % option - print '' + print(' %s: no help found for this option' % option) + print('') class ugclient: def __init__(self): op = client_options() self.op = op if op.verbose >= 2 and op.ugops: - print "Module Options:" + print("Module Options:") width = max(map(len, op.ugops.keys())) format = " %-" + str(width) + "s = %s" for k, v in op.ugops.items(): - print format % (k, repr(v)) + print(format % (k, repr(v))) if op.debug: self.set_debug_logger(op.debug) @@ -287,22 +294,26 @@ class ugclient: def run(self): for url in self.op.args: - if self.op.verbose: print 'grabbing: %s' % url + if self.op.verbose: print('grabbing: %s' % url) try: for i in range(0, self.op.repeat): f = self.g.urlgrab(url, self.op.outputfile) - if self.op.localfile: print f - except URLGrabError, e: - print e - + if self.op.localfile: print(f) + except URLGrabError as e: + print(e) + sys.exit(1) + def set_debug_logger(self, dbspec): try: dbinfo = dbspec.split(',') import logging - level = logging._levelNames.get(dbinfo[0], None) - if level is None: level = int(dbinfo[0]) + if sys.version_info.major == 2: + level = logging._levelNames.get(dbinfo[0], None) + else: + level = logging.getLevelName(dbinfo[0]) + if level is None or not isinstance(level, int): level = int(dbinfo[0]) if level < 1: raise ValueError() - + formatter = logging.Formatter('%(asctime)s %(message)s') if len(dbinfo) > 1: filename = dbinfo[1] else: filename = '' |