blob: 1fbafbd9bc5dc8402a8a30f01c6e8fe2a7f7d860 [file] [log] [blame]
// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
part of dart.dom.html;
/**
* Interface used to validate that only accepted elements and attributes are
* allowed while parsing HTML strings into DOM nodes.
*
* In general, customization of validation behavior should be done via the
* [NodeValidatorBuilder] class to mitigate the chances of incorrectly
* implementing validation rules.
*/
abstract class NodeValidator {
/**
* Construct a default NodeValidator which only accepts whitelisted HTML5
* elements and attributes.
*
* If a uriPolicy is not specified then the default uriPolicy will be used.
*/
factory NodeValidator({UriPolicy? uriPolicy}) =>
new _Html5NodeValidator(uriPolicy: uriPolicy);
factory NodeValidator.throws(NodeValidator base) =>
new _ThrowsNodeValidator(base);
/**
* Returns true if the tagName is an accepted type.
*/
bool allowsElement(Element element);
/**
* Returns true if the attribute is allowed.
*
* The attributeName parameter will always be in lowercase.
*
* See [allowsElement] for format of tagName.
*/
bool allowsAttribute(Element element, String attributeName, String value);
}
/**
* Performs sanitization of a node tree after construction to ensure that it
* does not contain any disallowed elements or attributes.
*
* In general custom implementations of this class should not be necessary and
* all validation customization should be done in custom NodeValidators, but
* custom implementations of this class can be created to perform more complex
* tree sanitization.
*/
abstract class NodeTreeSanitizer {
/**
* Constructs a default tree sanitizer which will remove all elements and
* attributes which are not allowed by the provided validator.
*/
factory NodeTreeSanitizer(NodeValidator validator) =>
new _ValidatingTreeSanitizer(validator);
/**
* Called with the root of the tree which is to be sanitized.
*
* This method needs to walk the entire tree and either remove elements and
* attributes which are not recognized as safe or throw an exception which
* will mark the entire tree as unsafe.
*/
void sanitizeTree(Node node);
/**
* A sanitizer for trees that we trust. It does no validation and allows
* any elements. It is also more efficient, since it can pass the text
* directly through to the underlying APIs without creating a document
* fragment to be sanitized.
*/
static const trusted = const _TrustedHtmlTreeSanitizer();
}
/**
* A sanitizer for trees that we trust. It does no validation and allows
* any elements.
*/
class _TrustedHtmlTreeSanitizer implements NodeTreeSanitizer {
const _TrustedHtmlTreeSanitizer();
sanitizeTree(Node node) {}
}
/**
* Defines the policy for what types of uris are allowed for particular
* attribute values.
*
* This can be used to provide custom rules such as allowing all http:// URIs
* for image attributes but only same-origin URIs for anchor tags.
*/
abstract class UriPolicy {
/**
* Constructs the default UriPolicy which is to only allow Uris to the same
* origin as the application was launched from.
*
* This will block all ftp: mailto: URIs. It will also block accessing
* https://example.com if the app is running from http://example.com.
*/
factory UriPolicy() => new _SameOriginUriPolicy();
/**
* Checks if the uri is allowed on the specified attribute.
*
* The uri provided may or may not be a relative path.
*/
bool allowsUri(String uri);
}
/**
* Allows URIs to the same origin as the current application was loaded from
* (such as https://example.com:80).
*/
class _SameOriginUriPolicy implements UriPolicy {
final AnchorElement _hiddenAnchor = new AnchorElement();
final Location _loc = window.location;
bool allowsUri(String uri) {
_hiddenAnchor.href = uri;
// IE leaves an empty hostname for same-origin URIs.
return (_hiddenAnchor.hostname == _loc.hostname &&
_hiddenAnchor.port == _loc.port &&
_hiddenAnchor.protocol == _loc.protocol) ||
(_hiddenAnchor.hostname == '' &&
_hiddenAnchor.port == '' &&
(_hiddenAnchor.protocol == ':' || _hiddenAnchor.protocol == ''));
}
}
class _ThrowsNodeValidator implements NodeValidator {
final NodeValidator validator;
_ThrowsNodeValidator(this.validator) {}
bool allowsElement(Element element) {
if (!validator.allowsElement(element)) {
throw new ArgumentError(Element._safeTagName(element));
}
return true;
}
bool allowsAttribute(Element element, String attributeName, String value) {
if (!validator.allowsAttribute(element, attributeName, value)) {
throw new ArgumentError(
'${Element._safeTagName(element)}[$attributeName="$value"]');
}
return true;
}
}
/**
* Standard tree sanitizer which validates a node tree against the provided
* validator and removes any nodes or attributes which are not allowed.
*/
class _ValidatingTreeSanitizer implements NodeTreeSanitizer {
NodeValidator validator;
/// Number of tree modifications this instance has made.
int numTreeModifications = 0;
_ValidatingTreeSanitizer(this.validator) {}
void sanitizeTree(Node node) {
void walk(Node node, Node? parent) {
sanitizeNode(node, parent);
var child = node.lastChild;
while (null != child) {
Node? nextChild;
try {
// Child may be removed during the walk, and we may not even be able
// to get its previousNode. But it's also possible that previousNode
// (i.e. previousSibling) is being spoofed, so double-check it.
nextChild = child.previousNode;
if (nextChild != null && nextChild.nextNode != child) {
throw StateError("Corrupt HTML");
}
} catch (e) {
// Child appears bad, remove it. We want to check the rest of the
// children of node and, but we have no way of getting to the next
// child, so start again from the last child.
_removeNode(child, node);
child = null;
nextChild = node.lastChild;
}
if (child != null) walk(child, node);
child = nextChild;
}
}
// Walk the tree until no new modifications are added to the tree.
var previousTreeModifications;
do {
previousTreeModifications = numTreeModifications;
walk(node, null);
} while (previousTreeModifications != numTreeModifications);
}
/// Aggressively try to remove node.
void _removeNode(Node node, Node? parent) {
// If we have the parent, it's presumably already passed more sanitization
// or is the fragment, so ask it to remove the child. And if that fails
// try to set the outer html.
numTreeModifications++;
if (parent == null || parent != node.parentNode) {
node.remove();
} else {
parent._removeChild(node);
}
}
/// Sanitize the element, assuming we can't trust anything about it.
void _sanitizeUntrustedElement(/* Element */ element, Node? parent) {
// If the _hasCorruptedAttributes does not successfully return false,
// then we consider it corrupted and remove.
// TODO(alanknight): This is a workaround because on Firefox
// embed/object
// tags typeof is "function", not "object". We don't recognize them, and
// can't call methods. This does mean that you can't explicitly allow an
// embed tag. The only thing that will let it through is a null
// sanitizer that doesn't traverse the tree at all. But sanitizing while
// allowing embeds seems quite unlikely. This is also the reason that we
// can't declare the type of element, as an embed won't pass any type
// check in dart2js.
var corrupted = true;
var attrs;
var isAttr;
try {
// If getting/indexing attributes throws, count that as corrupt.
attrs = element.attributes;
isAttr = attrs['is'];
var corruptedTest1 = Element._hasCorruptedAttributes(element);
// On IE, erratically, the hasCorruptedAttributes test can return false,
// even though it clearly is corrupted. A separate copy of the test
// inlining just the basic check seems to help.
corrupted = corruptedTest1
? true
: Element._hasCorruptedAttributesAdditionalCheck(element);
} catch (e) {}
var elementText = 'element unprintable';
try {
elementText = element.toString();
} catch (e) {}
try {
var elementTagName = Element._safeTagName(element);
_sanitizeElement(element, parent, corrupted, elementText, elementTagName,
attrs, isAttr);
} on ArgumentError {
// Thrown by _ThrowsNodeValidator
rethrow;
} catch (e) {
// Unexpected exception sanitizing -> remove
_removeNode(element, parent);
window.console.warn('Removing corrupted element $elementText');
}
}
/// Having done basic sanity checking on the element, and computed the
/// important attributes we want to check, remove it if it's not valid
/// or not allowed, either as a whole or particular attributes.
void _sanitizeElement(Element element, Node? parent, bool corrupted,
String text, String tag, Map attrs, String? isAttr) {
if (false != corrupted) {
_removeNode(element, parent);
window.console
.warn('Removing element due to corrupted attributes on <$text>');
return;
}
if (!validator.allowsElement(element)) {
_removeNode(element, parent);
window.console.warn('Removing disallowed element <$tag> from $parent');
return;
}
if (isAttr != null) {
if (!validator.allowsAttribute(element, 'is', isAttr)) {
_removeNode(element, parent);
window.console.warn('Removing disallowed type extension '
'<$tag is="$isAttr">');
return;
}
}
// TODO(blois): Need to be able to get all attributes, irrespective of
// XMLNS.
var keys = attrs.keys.toList();
for (var i = attrs.length - 1; i >= 0; --i) {
var name = keys[i];
if (!validator.allowsAttribute(
element, name.toLowerCase(), attrs[name])) {
window.console.warn('Removing disallowed attribute '
'<$tag $name="${attrs[name]}">');
attrs.remove(name);
}
}
if (element is TemplateElement) {
TemplateElement template = element;
sanitizeTree(template.content!);
}
}
/// Sanitize the node and its children recursively.
void sanitizeNode(Node node, Node? parent) {
switch (node.nodeType) {
case Node.ELEMENT_NODE:
_sanitizeUntrustedElement(node, parent);
break;
case Node.COMMENT_NODE:
case Node.DOCUMENT_FRAGMENT_NODE:
case Node.TEXT_NODE:
case Node.CDATA_SECTION_NODE:
break;
default:
_removeNode(node, parent);
}
}
}