1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374 |
- """Compare the speed of downloading URLs sequentially vs. using futures."""
- import functools
- import time
- import timeit
- import sys
- try:
- from urllib2 import urlopen
- except ImportError:
- from urllib.request import urlopen
- from concurrent.futures import (as_completed, ThreadPoolExecutor,
- ProcessPoolExecutor)
- URLS = ['http://www.google.com/',
- 'http://www.apple.com/',
- 'http://www.ibm.com',
- 'http://www.thisurlprobablydoesnotexist.com',
- 'http://www.slashdot.org/',
- 'http://www.python.org/',
- 'http://www.bing.com/',
- 'http://www.facebook.com/',
- 'http://www.yahoo.com/',
- 'http://www.youtube.com/',
- 'http://www.blogger.com/']
- def load_url(url, timeout):
- kwargs = {'timeout': timeout} if sys.version_info >= (2, 6) else {}
- return urlopen(url, **kwargs).read()
- def download_urls_sequential(urls, timeout=60):
- url_to_content = {}
- for url in urls:
- try:
- url_to_content[url] = load_url(url, timeout=timeout)
- except:
- pass
- return url_to_content
- def download_urls_with_executor(urls, executor, timeout=60):
- try:
- url_to_content = {}
- future_to_url = dict((executor.submit(load_url, url, timeout), url)
- for url in urls)
- for future in as_completed(future_to_url):
- try:
- url_to_content[future_to_url[future]] = future.result()
- except:
- pass
- return url_to_content
- finally:
- executor.shutdown()
- def main():
- for name, fn in [('sequential',
- functools.partial(download_urls_sequential, URLS)),
- ('processes',
- functools.partial(download_urls_with_executor,
- URLS,
- ProcessPoolExecutor(10))),
- ('threads',
- functools.partial(download_urls_with_executor,
- URLS,
- ThreadPoolExecutor(10)))]:
- sys.stdout.write('%s: ' % name.ljust(12))
- start = time.time()
- url_map = fn()
- sys.stdout.write('%.2f seconds (%d of %d downloaded)\n' %
- (time.time() - start, len(url_map), len(URLS)))
- if __name__ == '__main__':
- main()
|