|  | #!/usr/bin/env python | 
|  | # Copyright (c) 2011, the Dart project authors.  Please see the AUTHORS file | 
|  | # for details. All rights reserved. Use of this source code is governed by a | 
|  | # BSD-style license that can be found in the LICENSE file. | 
|  | ''' | 
|  | This script finds all HTML pages in a folder and downloads all images, replacing | 
|  | the urls with local ones. | 
|  | ''' | 
|  | import os, sys, optparse, subprocess, multiprocessing | 
|  | from os.path import abspath, basename, dirname, join | 
|  |  | 
|  | SWARM_PATH = dirname(abspath(__file__)) | 
|  | CLIENT_PATH = dirname(dirname(SWARM_PATH)) | 
|  | CLIENT_TOOLS_PATH = join(CLIENT_PATH, 'tools') | 
|  |  | 
|  | # Add the client tools directory so we can find htmlconverter.py. | 
|  | sys.path.append(CLIENT_TOOLS_PATH) | 
|  | import htmlconverter | 
|  | converter = CLIENT_TOOLS_PATH + '/htmlconverter.py' | 
|  |  | 
|  | # This has to be a top level function to use with multiprocessing | 
|  | def convertImgs(infile): | 
|  | global options | 
|  | try: | 
|  | htmlconverter.convertForOffline( | 
|  | infile, infile, | 
|  | verbose=options.verbose, | 
|  | encode_images=options.inline_images) | 
|  | print 'Converted ' + infile | 
|  | except BaseException, e: | 
|  | print 'Caught error: %s' % e | 
|  |  | 
|  | def Flags(): | 
|  | """ Constructs a parser for extracting flags from the command line. """ | 
|  | parser = optparse.OptionParser() | 
|  | parser.add_option("--inline_images", | 
|  | help=("Encode img payloads as data:// URLs rather than local files."), | 
|  | default=False, | 
|  | action='store_true') | 
|  | parser.add_option("--verbose", | 
|  | help="Print verbose output", | 
|  | default=False, | 
|  | action="store_true") | 
|  | return parser | 
|  |  | 
|  | def main(): | 
|  | global options | 
|  | parser = Flags() | 
|  | options, args = parser.parse_args() | 
|  | print "args: %s" % args | 
|  | if len(args) < 1 or 'help' in args[0]: | 
|  | print 'Usage: %s DIRECTORY' % basename(sys.argv[0]) | 
|  | return 1 | 
|  |  | 
|  | dirname = args[0] | 
|  | print 'Searching directory ' + dirname | 
|  |  | 
|  | files = [] | 
|  | for root, dirs, fnames in os.walk(dirname): | 
|  | for fname in fnames: | 
|  | if fname.endswith('.html'): | 
|  | files.append(join(root, fname)) | 
|  |  | 
|  | count = 4 * multiprocessing.cpu_count() | 
|  | pool = multiprocessing.Pool(processes=count) | 
|  | # Note: need a timeout to get keyboard interrupt due to a Python bug | 
|  | pool.map_async(convertImgs, files).get(3600) # one hour | 
|  |  | 
|  | if __name__ == '__main__': | 
|  | main() |