summaryrefslogtreecommitdiff
path: root/scripts/urlgrabber
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/urlgrabber')
-rwxr-xr-x[-rw-r--r--]scripts/urlgrabber75
1 files changed, 43 insertions, 32 deletions
diff --git a/scripts/urlgrabber b/scripts/urlgrabber
index 518e512..1b1e077 100644..100755
--- a/scripts/urlgrabber
+++ b/scripts/urlgrabber
@@ -19,6 +19,8 @@
# This file is part of urlgrabber, a high-level cross-protocol url-grabber
# Copyright 2002-2006 Michael D. Stenner, Ryan Tomayko
+from __future__ import print_function
+
"""NAME
urlgrabber - a simple client for the urlgrabber python package
@@ -115,6 +117,7 @@ options:
including quotes in the case of strings.
e.g. --user_agent='"foobar/2.0"'
+ --output FILE
-o FILE write output to FILE, otherwise the basename of the
url will be used
-O print the names of saved files to STDOUT
@@ -130,8 +133,6 @@ options:
--profile profile the actual fetching and print the results
"""
-# $Id: urlgrabber,v 1.7 2006/12/08 00:14:16 mstenner Exp $
-
import sys
import getopt
import re
@@ -170,12 +171,17 @@ class client_options:
return ug_options, ug_defaults
def process_command_line(self):
- short_options = 'vd:hoOpD'
+ short_options = 'vd:ho:OpD'
long_options = ['profile', 'repeat=', 'verbose=',
- 'debug=', 'help', 'progress']
+ 'debug=', 'help', 'progress', 'output=']
ug_long = [ o + '=' for o in self.ug_options ]
- optlist, args = getopt.getopt(sys.argv[1:], short_options,
- long_options + ug_long)
+ try:
+ optlist, args = getopt.getopt(sys.argv[1:], short_options,
+ long_options + ug_long)
+ except getopt.GetoptError as e:
+ print("Error:", e, file=sys.stderr)
+ self.help([], ret=1)
+
self.verbose = 0
self.debug = None
self.outputfile = None
@@ -193,6 +199,7 @@ class client_options:
if o == '--verbose': self.verbose = v
if o == '-v': self.verbose += 1
if o == '-o': self.outputfile = v
+ if o == '--output': self.outputfile = v
if o == '-p' or o == '--progress': self.progress = 1
if o == '-d' or o == '--debug': self.debug = v
if o == '--profile': self.profile = 1
@@ -202,7 +209,7 @@ class client_options:
self.repeat = int(v)
if self.repeat < 1: raise ValueError()
except ValueError:
- print 'ERROR: repeat value must be an int >= 1'
+ print('ERROR: repeat value must be an int >= 1')
sys.exit(1)
if o == '-D':
self.verbose = 3
@@ -211,20 +218,20 @@ class client_options:
if o in ug_dash:
try:
val = eval(v)
- except Exception, e:
- print "error processing option value: %s" % v
- print e
+ except Exception as e:
+ print("error processing option value: %s" % v)
+ print(e)
sys.exit(1)
else:
self.ugops[o[2:]] = val
if len(self.args) > 1 and self.outputfile is not None:
- print "ERROR: cannot use -o when grabbing multiple files"
+ print("ERROR: cannot use -o when grabbing multiple files")
sys.exit(1)
- def help(self, args):
+ def help(self, args, ret=0):
if not args:
- print MAINHELP
+ print(MAINHELP)
else:
for a in args:
m = getattr(self, 'help_'+a, None)
@@ -233,20 +240,20 @@ class client_options:
elif a in self.ug_options:
self.help_ug_option(a)
else:
- print 'ERROR: no help on command "%s"' % a
- sys.exit(0)
+ print('ERROR: no help on command "%s"' % a)
+ sys.exit(ret)
def help_doc(self):
- print __doc__
+ print(__doc__)
def help_options(self):
width = max(map(len, self.ug_options))
format = ' %-' + str(width) + 's = %s'
hformat = ' %-' + str(width) + 's %s'
- print hformat % ('OPTION', 'DEFAULT')
- print '-'*(width + 20)
+ print(hformat % ('OPTION', 'DEFAULT'))
+ print('-'*(width + 20))
for k in self.ug_options:
- print format % (k, self.ug_defaults[k])
+ print(format % (k, self.ug_defaults[k]))
def help_all(self):
for k in self.ug_options:
@@ -257,21 +264,21 @@ class client_options:
m = re.search(r'^( '+option+'.*?)\s*^ {,2}\S',
urlgrabber.grabber.__doc__, re.M|re.S)
if m:
- print m.group(1)
+ print(m.group(1))
else:
- print ' %s: no help found for this option' % option
- print ''
+ print(' %s: no help found for this option' % option)
+ print('')
class ugclient:
def __init__(self):
op = client_options()
self.op = op
if op.verbose >= 2 and op.ugops:
- print "Module Options:"
+ print("Module Options:")
width = max(map(len, op.ugops.keys()))
format = " %-" + str(width) + "s = %s"
for k, v in op.ugops.items():
- print format % (k, repr(v))
+ print(format % (k, repr(v)))
if op.debug:
self.set_debug_logger(op.debug)
@@ -287,22 +294,26 @@ class ugclient:
def run(self):
for url in self.op.args:
- if self.op.verbose: print 'grabbing: %s' % url
+ if self.op.verbose: print('grabbing: %s' % url)
try:
for i in range(0, self.op.repeat):
f = self.g.urlgrab(url, self.op.outputfile)
- if self.op.localfile: print f
- except URLGrabError, e:
- print e
-
+ if self.op.localfile: print(f)
+ except URLGrabError as e:
+ print(e)
+ sys.exit(1)
+
def set_debug_logger(self, dbspec):
try:
dbinfo = dbspec.split(',')
import logging
- level = logging._levelNames.get(dbinfo[0], None)
- if level is None: level = int(dbinfo[0])
+ if sys.version_info.major == 2:
+ level = logging._levelNames.get(dbinfo[0], None)
+ else:
+ level = logging.getLevelName(dbinfo[0])
+ if level is None or not isinstance(level, int): level = int(dbinfo[0])
if level < 1: raise ValueError()
-
+
formatter = logging.Formatter('%(asctime)s %(message)s')
if len(dbinfo) > 1: filename = dbinfo[1]
else: filename = ''