blob: e64df2e93fd8979b136a1eb3b90007e250aa3a22 [file] [log] [blame]
// Copyright (c) 2023, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
/// A lightweight HTML parser.
library;
// ignore: implementation_imports
import 'package:analyzer/src/manifest/manifest_validator.dart';
import 'html_dom.dart';
/// Given HTML text, return a parsed HTML tree.
Document parse(String htmlContents, Uri uri) {
RegExp commentRegex = RegExp(r'<!--[^>]+-->');
Element createElement(XmlElement xmlElement) {
// element
var element = Element.tag(xmlElement.name);
// attributes
for (var MapEntry(:key, value: attribute)
in xmlElement.attributes.entries) {
element.attributes[key] = attribute.value;
}
// From the immediate children, determine where the text between the tags is
// report any such non empty text as Text nodes.
var text = xmlElement.sourceSpan?.text ?? '';
if (!text.endsWith('/>')) {
var indices = <int>[];
var offset = xmlElement.sourceSpan!.start.offset;
indices.add(text.indexOf('>') + 1);
for (var child in xmlElement.children) {
var childSpan = child.sourceSpan!;
indices.add(childSpan.start.offset - offset);
indices.add(childSpan.end.offset - offset);
}
indices.add(text.lastIndexOf('<'));
var textNodes = <Text>[];
for (var index = 0; index < indices.length; index += 2) {
var start = indices[index];
var end = indices[index + 1];
// Remove html comments (<!-- -->) from text.
textNodes.add(
Text(text.substring(start, end).replaceAll(commentRegex, '')),
);
}
element.append(textNodes.removeAt(0));
for (var child in xmlElement.children) {
element.append(createElement(child));
element.append(textNodes.removeAt(0));
}
element.nodes.removeWhere((node) => node is Text && node.text.isEmpty);
}
return element;
}
var parser = ManifestParser.general(htmlContents, uri: uri);
var result = parser.parseXmlTag();
while (result.parseResult != ParseTagResult.eof.parseResult) {
if (result.element case var element?) {
var document = Document();
document.append(createElement(element));
return document;
}
result = parser.parseXmlTag();
}
throw 'parse error - element not found';
}