blob: 40d5ca58d197a51ebea3106ccf5fb457baad3470 [file] [log] [blame]
// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
/// Transfomer that inlines polymer-element definitions from html imports.
library polymer.src.build.import_inliner;
import 'dart:async';
import 'dart:convert';
import 'package:barback/barback.dart';
import 'package:path/path.dart' as path;
import 'package:html5lib/dom.dart' show
Document, DocumentFragment, Element, Node;
import 'package:html5lib/dom_parsing.dart' show TreeVisitor;
import 'package:source_maps/span.dart';
import 'code_extractor.dart'; // import just for documentation.
import 'common.dart';
class _HtmlInliner extends PolymerTransformer {
final TransformOptions options;
final Transform transform;
final TransformLogger logger;
final AssetId docId;
final seen = new Set<AssetId>();
final scriptIds = <AssetId>[];
static const TYPE_DART = 'application/dart';
static const TYPE_JS = 'text/javascript';
_HtmlInliner(this.options, Transform transform)
: transform = transform,
logger = transform.logger,
docId = transform.primaryInput.id;
Future apply() {
seen.add(docId);
Document document;
return readPrimaryAsHtml(transform).then((document) =>
_visitImports(document, docId).then((importsFound) {
var output = transform.primaryInput;
if (importsFound) {
output = new Asset.fromString(docId, document.outerHtml);
}
transform.addOutput(output);
// We produce a secondary asset with extra information for later phases.
transform.addOutput(new Asset.fromString(
docId.addExtension('.scriptUrls'),
JSON.encode(scriptIds, toEncodable: (id) => id.serialize())));
}));
}
/// Visits imports in [document] and add the imported documents to documents.
/// Documents are added in the order they appear, transitive imports are added
/// first.
///
/// Returns `true` if and only if the document was changed and should be
/// written out.
Future<bool> _visitImports(Document document, AssetId sourceId) {
bool changed = false;
_moveHeadToBody(document);
// Note: we need to preserve the import order in the generated output.
return Future.forEach(document.querySelectorAll('link'), (Element tag) {
var rel = tag.attributes['rel'];
if (rel != 'import' && rel != 'stylesheet') return null;
var href = tag.attributes['href'];
var id = resolve(sourceId, href, transform.logger, tag.sourceSpan,
allowAbsolute: rel == 'stylesheet');
if (rel == 'import') {
changed = true;
if (id == null || !seen.add(id)) {
tag.remove();
return null;
}
return _inlineImport(id, tag);
} else if (rel == 'stylesheet') {
if (id == null) return null;
changed = true;
return _inlineStylesheet(id, tag);
}
}).then((_) => changed);
}
/// To preserve the order of scripts with respect to inlined
/// link rel=import, we move both of those into the body before we do any
/// inlining.
///
/// Note: we do this for stylesheets as well to preserve ordering with
/// respect to eachother, because stylesheets can be pulled in transitively
/// from imports.
// TODO(jmesserly): vulcanizer doesn't need this because they inline JS
// scripts, causing them to be naturally moved as part of the inlining.
// Should we do the same? Alternatively could we inline head into head and
// body into body and avoid this whole thing?
void _moveHeadToBody(Document doc) {
var insertionPoint = doc.body.firstChild;
for (var node in doc.head.nodes.toList(growable: false)) {
if (node is! Element) continue;
var tag = node.tagName;
var type = node.attributes['type'];
var rel = node.attributes['rel'];
if (tag == 'style' || tag == 'script' &&
(type == null || type == TYPE_JS || type == TYPE_DART) ||
tag == 'link' && (rel == 'stylesheet' || rel == 'import')) {
// Move the node into the body, where its contents will be placed.
doc.body.insertBefore(node, insertionPoint);
}
}
}
// Loads an asset identified by [id], visits its imports and collects its
// html imports. Then inlines it into the main document.
Future _inlineImport(AssetId id, Element link) =>
readAsHtml(id, transform).then((doc) => _visitImports(doc, id).then((_) {
new _UrlNormalizer(transform, id).visit(doc);
_extractScripts(doc);
// TODO(jmesserly): figure out how this is working in vulcanizer.
// Do they produce a <body> tag with a <head> and <body> inside?
var imported = new DocumentFragment();
imported.nodes..addAll(doc.head.nodes)..addAll(doc.body.nodes);
link.replaceWith(imported);
}));
Future _inlineStylesheet(AssetId id, Element link) {
return transform.readInputAsString(id).then((css) {
var url = spanUrlFor(id, transform);
css = new _UrlNormalizer(transform, id).visitCss(css, url);
link.replaceWith(new Element.tag('style')..text = css);
});
}
/// Split Dart script tags from all the other elements. Now that Dartium
/// only allows a single script tag per page, we can't inline script
/// tags. Instead, we collect the urls of each script tag so we import
/// them directly from the Dart bootstrap code.
void _extractScripts(Document document) {
bool first = true;
for (var script in document.querySelectorAll('script')) {
if (script.attributes['type'] == TYPE_DART) {
script.remove();
// only one Dart script per document is supported in Dartium.
if (first) {
first = false;
var src = script.attributes['src'];
if (src == null) {
logger.warning('unexpected script without a src url. The '
'ImportInliner transformer should run after running the '
'InlineCodeExtractor', span: script.sourceSpan);
continue;
}
scriptIds.add(resolve(docId, src, logger, script.sourceSpan));
} else {
// TODO(jmesserly): remove this when we are running linter.
logger.warning('more than one Dart script per HTML '
'document is not supported. Script will be ignored.',
span: script.sourceSpan);
}
}
}
}
}
/// Recursively inlines the contents of HTML imports. Produces as output a
/// single HTML file that inlines the polymer-element definitions, and a text
/// file that contains, in order, the URIs to each library that sourced in a
/// script tag.
///
/// This transformer assumes that all script tags point to external files. To
/// support script tags with inlined code, use this transformer after running
/// [InlineCodeExtractor] on an earlier phase.
class ImportInliner extends Transformer {
final TransformOptions options;
ImportInliner(this.options);
/// Only run on entry point .html files.
Future<bool> isPrimary(Asset input) =>
new Future.value(options.isHtmlEntryPoint(input.id));
Future apply(Transform transform) =>
new _HtmlInliner(options, transform).apply();
}
/// Internally adjusts urls in the html that we are about to inline.
class _UrlNormalizer extends TreeVisitor {
final Transform transform;
/// Asset where the original content (and original url) was found.
final AssetId sourceId;
_UrlNormalizer(this.transform, this.sourceId);
visitElement(Element node) {
for (var key in node.attributes.keys) {
if (_urlAttributes.contains(key)) {
var url = node.attributes[key];
if (url != null && url != '' && !url.startsWith('{{')) {
node.attributes[key] = _newUrl(url, node.sourceSpan);
}
}
}
super.visitElement(node);
}
static final _URL = new RegExp(r'url\(([^)]*)\)', multiLine: true);
static final _QUOTE = new RegExp('["\']', multiLine: true);
/// Visit the CSS text and replace any relative URLs so we can inline it.
// Ported from:
// https://github.com/Polymer/vulcanize/blob/c14f63696797cda18dc3d372b78aa3378acc691f/lib/vulcan.js#L149
// TODO(jmesserly): use csslib here instead? Parsing with RegEx is sadness.
// Maybe it's reliable enough for finding URLs in CSS? I'm not sure.
String visitCss(String cssText, String url) {
var src = new SourceFile.text(url, cssText);
return cssText.replaceAllMapped(_URL, (match) {
// Extract the URL, without any surrounding quotes.
var span = src.span(match.start, match.end);
var href = match[1].replaceAll(_QUOTE, '');
href = _newUrl(href, span);
return 'url($href)';
});
}
_newUrl(String href, Span span) {
var uri = Uri.parse(href);
if (uri.isAbsolute) return href;
if (!uri.scheme.isEmpty) return href;
if (!uri.host.isEmpty) return href;
if (uri.path.isEmpty) return href; // Implies standalone ? or # in URI.
if (path.isAbsolute(href)) return href;
var id = resolve(sourceId, href, transform.logger, span);
if (id == null) return href;
var primaryId = transform.primaryInput.id;
if (id.path.startsWith('lib/')) {
return 'packages/${id.package}/${id.path.substring(4)}';
}
if (id.path.startsWith('asset/')) {
return 'assets/${id.package}/${id.path.substring(6)}';
}
if (primaryId.package != id.package) {
// Techincally we shouldn't get there
transform.logger.error("don't know how to include $id from $primaryId",
span: span);
return href;
}
var builder = path.url;
return builder.relative(builder.join('/', id.path),
from: builder.join('/', builder.dirname(primaryId.path)));
}
}
/// HTML attributes that expect a URL value.
/// <http://dev.w3.org/html5/spec/section-index.html#attributes-1>
///
/// Every one of these attributes is a URL in every context where it is used in
/// the DOM. The comments show every DOM element where an attribute can be used.
const _urlAttributes = const [
'action', // in form
'background', // in body
'cite', // in blockquote, del, ins, q
'data', // in object
'formaction', // in button, input
'href', // in a, area, link, base, command
'icon', // in command
'manifest', // in html
'poster', // in video
'src', // in audio, embed, iframe, img, input, script, source, track,
// video
];