#!/usr/bin/env python3
# Copyright (c) 2011, the Dart project authors.  Please see the AUTHORS file
# for details. All rights reserved. Use of this source code is governed by a
# BSD-style license that can be found in the LICENSE file.
'''
This script finds all HTML pages in a folder and downloads all images, replacing
the urls with local ones.
'''
import os, sys, optparse, subprocess, multiprocessing
from os.path import abspath, basename, dirname, join

SWARM_PATH = dirname(abspath(__file__))
CLIENT_PATH = dirname(dirname(SWARM_PATH))
CLIENT_TOOLS_PATH = join(CLIENT_PATH, 'tools')

# Add the client tools directory so we can find htmlconverter.py.
sys.path.append(CLIENT_TOOLS_PATH)
import htmlconverter
converter = CLIENT_TOOLS_PATH + '/htmlconverter.py'


# This has to be a top level function to use with multiprocessing
def convertImgs(infile):
    global options
    try:
        htmlconverter.convertForOffline(infile,
                                        infile,
                                        verbose=options.verbose,
                                        encode_images=options.inline_images)
        print('Converted ' + infile)
    except BaseException as e:
        print('Caught error: %s' % e)


def Flags():
    """ Constructs a parser for extracting flags from the command line. """
    parser = optparse.OptionParser()
    parser.add_option(
        "--inline_images",
        help=("Encode img payloads as data:// URLs rather than local files."),
        default=False,
        action='store_true')
    parser.add_option("--verbose",
                      help="Print verbose output",
                      default=False,
                      action="store_true")
    return parser


def main():
    global options
    parser = Flags()
    options, args = parser.parse_args()
    print("args: %s" % args)
    if len(args) < 1 or 'help' in args[0]:
        print('Usage: %s DIRECTORY' % basename(sys.argv[0]))
        return 1

    dirname = args[0]
    print('Searching directory ' + dirname)

    files = []
    for root, dirs, fnames in os.walk(dirname):
        for fname in fnames:
            if fname.endswith('.html'):
                files.append(join(root, fname))

    count = 4 * multiprocessing.cpu_count()
    pool = multiprocessing.Pool(processes=count)
    # Note: need a timeout to get keyboard interrupt due to a Python bug
    pool.map_async(convertImgs, files).get(3600)  # one hour


if __name__ == '__main__':
    main()
