blob: 10b0e1fd9050f1d50abe4236b1d69cfecd08aa74 [file] [log] [blame]
// Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import 'package:analyzer/analysis_rule/rule_context.dart';
import 'package:analyzer/analysis_rule/rule_visitor_registry.dart';
import 'package:analyzer/dart/ast/ast.dart';
import 'package:analyzer/dart/ast/visitor.dart';
import 'package:analyzer/error/error.dart';
import 'package:collection/collection.dart';
import '../analyzer.dart';
const _desc =
r'Use of angle brackets in a doc comment is treated as HTML by '
'Markdown.';
/// Valid HTML tags that should not be linted.
///
/// These tags are from
/// [CommonMark 0.30](https://spec.commonmark.org/0.30/#raw-html).
const _validHtmlTags = [
'a',
'abbr',
'address',
'area',
'article',
'aside',
'audio',
'b',
'bdi',
'bdo',
'blockquote',
'br',
'button',
'canvas',
'caption',
'cite',
'code',
'col',
'colgroup',
'data',
'datalist',
'dd',
'del',
'dfn',
'div',
'dl',
'dt',
'em',
'fieldset',
'figcaption',
'figure',
'footer',
'form',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'header',
'hr',
'i',
'iframe',
'img',
'input',
'ins',
'kbd',
'keygen',
'label',
'legend',
'li',
'link',
'main',
'map',
'mark',
'meta',
'meter',
'nav',
'noscript',
'object',
'ol',
'optgroup',
'option',
'output',
'p',
'param',
'pre',
'progress',
'q',
's',
'samp',
'script',
'section',
'select',
'small',
'source',
'span',
'strong',
'style',
'sub',
'sup',
'table',
'tbody',
'td',
'template',
'textarea',
'tfoot',
'th',
'thead',
'time',
'title',
'tr',
'track',
'u',
'ul',
'var',
'video',
'wbr',
];
class UnintendedHtmlInDocComment extends LintRule {
UnintendedHtmlInDocComment()
: super(name: LintNames.unintended_html_in_doc_comment, description: _desc);
@override
DiagnosticCode get diagnosticCode =>
LinterLintCode.unintendedHtmlInDocComment;
@override
void registerNodeProcessors(
RuleVisitorRegistry registry,
RuleContext context,
) {
var visitor = _Visitor(this);
registry.addComment(this, visitor);
}
}
/// Represents the [offset] and [length] of an unintended HTML tag in a doc
/// comment.
class _UnintendedTag {
final int offset;
final int length;
_UnintendedTag(this.offset, this.length);
}
class _Visitor extends SimpleAstVisitor<void> {
/// Pattern for HTML-tags and non-HTML regions.
///
/// Pattern which matches sequences of characters with content that is known
/// to *not* be interpreted as Markdown or HTML tags, and anything else that
/// looks like an HTML tag.
/// Because [RegExp.allMatches] matches do not overlap, including the
/// non-HTML sections in the same RegExp ensures that the HTML tag match
/// will not be matched against the non-HTML content.
static final _markdownTokenPattern = RegExp(
// Escaped Markdown character, including `\<` and `\\`.
r'\\.'
// Or a Markdown code span, from "`"*N to "`"*N.
// Also matches an unterminated start tag to avoid "```a``"
// being matched as "``a``".
// The ```-sequence is atomic.
r'|(?<cq>`+)(?:[^]+?\k<cq>)?'
// Or autolink, starting with scheme + `:`, followed by non-whitespace/
// control characters until a closing `>`.
r'|<[a-z][a-z\d\-+.]+:[^\x00-\x20\x7f<>]*>'
// Or HTML comments.
r'|<!--(?:-?>|[^]*?-->)'
// Or HTML declarations, like `<!DOCTYPE ...>`.
r'|<![a-z][^]*?!>'
// Or HTML processing instructions.
r'|<\?[^]*?\?>'
// Or HTML CDATA sections sections.
r'|<\[CDATA[^]*\]>'
// Or plain `[...]` which DartDoc interprets as Dart source links,
// and which can contain type parameters like `... [List<int>] ...`.
// Here recognized as `[...]` with no `]` inside, not preceded by `]`
// or followed by `(` or `[`.
r'|(?<!\])\[[^\]]*\](?![(\[])'
// Or valid HTML tag.
// Matches `<validTag>`, `<validTag ...>`, `<validTag/>`, `</validTag>`
// and `</validTag ...>.
r'|<(?<et>/?)(?:'
'${_validHtmlTags.join('|')}'
r')'
r'(?:/(?=\k<et>)>|>|[\x20\r\n\t][^]*?>)'
// Or any of the following matches which are considered invalid tags.
// If the "nh" capture group is participating, one of these matched.
r'|(?<nh>)(?:'
// Any other `</?tag ...>` sequence.
r'</?[a-z][^]*?>'
r')',
caseSensitive: false,
);
final LintRule rule;
_Visitor(this.rule);
@override
void visitComment(Comment node) {
var codeBlockLines = node.codeBlocks
.map((codeBlock) => codeBlock.lines)
.flattened;
for (var token in node.tokens) {
// Make sure that the current doc comment line isn't contained in a code
// block.
var offsetAfterSlash = token.offset + 3;
var inCodeBlock = codeBlockLines.any(
(codeBlockLine) =>
codeBlockLine.offset <= offsetAfterSlash &&
offsetAfterSlash <= codeBlockLine.offset + codeBlockLine.length,
);
if (inCodeBlock) continue;
var tags = _findUnintendedHtmlTags(token.lexeme);
for (var tag in tags) {
rule.reportAtOffset(token.offset + tag.offset, tag.length);
}
}
}
/// Finds tags that are not valid HTML tags, not contained in a code span, and
/// are not autolinks.
List<_UnintendedTag> _findUnintendedHtmlTags(String text) {
var matches = <_UnintendedTag>[];
for (var match in _markdownTokenPattern.allMatches(text)) {
if (match.namedGroup('nh') != null) {
matches.add(_UnintendedTag(match.start, match.end - match.start));
}
}
return matches;
}
}