diff options
Diffstat (limited to 'test/grabberperf.py')
-rw-r--r-- | test/grabberperf.py | 137 |
1 files changed, 137 insertions, 0 deletions
diff --git a/test/grabberperf.py b/test/grabberperf.py new file mode 100644 index 0000000..820da2c --- /dev/null +++ b/test/grabberperf.py @@ -0,0 +1,137 @@ +#!/usr/bin/python -t + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, +# Boston, MA 02111-1307 USA + +# This file is part of urlgrabber, a high-level cross-protocol url-grabber +# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko + +import sys +import os +from os.path import dirname, join as joinpath +import tempfile +import time + +import urlgrabber.grabber as grabber +from urlgrabber.grabber import URLGrabber, urlgrab, urlopen, urlread +from urlgrabber.progress import text_progress_meter + +tempsrc = '/tmp/ug-test-src' +tempdst = '/tmp/ug-test-dst' + +# this isn't used but forces a proxy handler to be +# added when creating the urllib2 opener. +proxies = { 'http' : 'http://localhost' } +DEBUG=0 + +def main(): + speedtest(1024) # 1KB + speedtest(10 * 1024) # 10 KB + speedtest(100 * 1024) # 100 KB + speedtest(1000 * 1024) # 1,000 KB (almost 1MB) + #speedtest(10000 * 1024) # 10,000 KB (almost 10MB) + # remove temp files + os.unlink(tempsrc) + os.unlink(tempdst) + +def setuptemp(size): + if DEBUG: print 'writing %d KB to temporary file (%s).' % (size / 1024, tempsrc) + file = open(tempsrc, 'w', 1024) + chars = '0123456789' + for i in range(size): + file.write(chars[i % 10]) + file.flush() + file.close() + +def speedtest(size): + setuptemp(size) + full_times = [] + raw_times = [] + none_times = [] + throttle = 2**40 # throttle to 1 TB/s :) + + try: + from urlgrabber.progress import text_progress_meter + except ImportError, e: + tpm = None + print 'not using progress meter' + else: + tpm = text_progress_meter(fo=open('/dev/null', 'w')) + + # to address concerns that the overhead from the progress meter + # and throttling slow things down, we do this little test. + # + # using this test, you get the FULL overhead of the progress + # meter and throttling, without the benefit: the meter is directed + # to /dev/null and the throttle bandwidth is set EXTREMELY high. + # + # note: it _is_ even slower to direct the progress meter to a real + # tty or file, but I'm just interested in the overhead from _this_ + # module. + + # get it nicely cached before we start comparing + if DEBUG: print 'pre-caching' + for i in range(100): + urlgrab(tempsrc, tempdst, copy_local=1, throttle=None, proxies=proxies) + + if DEBUG: print 'running speed test.' + reps = 500 + for i in range(reps): + if DEBUG: + print '\r%4i/%-4i' % (i+1, reps), + sys.stdout.flush() + t = time.time() + urlgrab(tempsrc, tempdst, + copy_local=1, progress_obj=tpm, + throttle=throttle, proxies=proxies) + full_times.append(1000 * (time.time() - t)) + + t = time.time() + urlgrab(tempsrc, tempdst, + copy_local=1, progress_obj=None, + throttle=None, proxies=proxies) + raw_times.append(1000 * (time.time() - t)) + + t = time.time() + in_fo = open(tempsrc) + out_fo = open(tempdst, 'wb') + while 1: + s = in_fo.read(1024 * 8) + if not s: break + out_fo.write(s) + in_fo.close() + out_fo.close() + none_times.append(1000 * (time.time() - t)) + + if DEBUG: print '\r' + + print "%d KB Results:" % (size / 1024) + print_result('full', full_times) + print_result('raw', raw_times) + print_result('none', none_times) + +def print_result(label, result_list): + format = '[%4s] mean: %6.3f ms, median: %6.3f ms, ' \ + 'min: %6.3f ms, max: %6.3f ms' + result_list.sort() + mean = 0.0 + for i in result_list: mean += i + mean = mean/len(result_list) + median = result_list[int(len(result_list)/2)] + print format % (label, mean, median, result_list[0], result_list[-1]) + +if __name__ == '__main__': + main() |