pkg/linter/lib/src/rules/unintended_html_in_doc_comment.dart - sdk - Git at Google

 // Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.

 import 'package:analyzer/analysis_rule/rule_context.dart';
 import 'package:analyzer/analysis_rule/rule_visitor_registry.dart';
 import 'package:analyzer/dart/ast/ast.dart';
 import 'package:analyzer/dart/ast/visitor.dart';
 import 'package:analyzer/error/error.dart';
 import 'package:collection/collection.dart';

 import '../analyzer.dart';

 const _desc =
     r'Use of angle brackets in a doc comment is treated as HTML by '
     'Markdown.';

 /// Valid HTML tags that should not be linted.
 ///
 /// These tags are from
 /// [CommonMark 0.30](https://spec.commonmark.org/0.30/#raw-html).
 const _validHtmlTags = [
   'a',
   'abbr',
   'address',
   'area',
   'article',
   'aside',
   'audio',
   'b',
   'bdi',
   'bdo',
   'blockquote',
   'br',
   'button',
   'canvas',
   'caption',
   'cite',
   'code',
   'col',
   'colgroup',
   'data',
   'datalist',
   'dd',
   'del',
   'dfn',
   'div',
   'dl',
   'dt',
   'em',
   'fieldset',
   'figcaption',
   'figure',
   'footer',
   'form',
   'h1',
   'h2',
   'h3',
   'h4',
   'h5',
   'h6',
   'header',
   'hr',
   'i',
   'iframe',
   'img',
   'input',
   'ins',
   'kbd',
   'keygen',
   'label',
   'legend',
   'li',
   'link',
   'main',
   'map',
   'mark',
   'meta',
   'meter',
   'nav',
   'noscript',
   'object',
   'ol',
   'optgroup',
   'option',
   'output',
   'p',
   'param',
   'pre',
   'progress',
   'q',
   's',
   'samp',
   'script',
   'section',
   'select',
   'small',
   'source',
   'span',
   'strong',
   'style',
   'sub',
   'sup',
   'table',
   'tbody',
   'td',
   'template',
   'textarea',
   'tfoot',
   'th',
   'thead',
   'time',
   'title',
   'tr',
   'track',
   'u',
   'ul',
   'var',
   'video',
   'wbr',
 ];

 class UnintendedHtmlInDocComment extends LintRule {
   UnintendedHtmlInDocComment()
     : super(name: LintNames.unintended_html_in_doc_comment, description: _desc);

   @override
   DiagnosticCode get diagnosticCode =>
       LinterLintCode.unintendedHtmlInDocComment;

   @override
   void registerNodeProcessors(
     RuleVisitorRegistry registry,
     RuleContext context,
   ) {
     var visitor = _Visitor(this);
     registry.addComment(this, visitor);
   }
 }

 /// Represents the [offset] and [length] of an unintended HTML tag in a doc
 /// comment.
 class _UnintendedTag {
   final int offset;
   final int length;
   _UnintendedTag(this.offset, this.length);
 }

 class _Visitor extends SimpleAstVisitor<void> {
   /// Pattern for HTML-tags and non-HTML regions.
   ///
   /// Pattern which matches sequences of characters with content that is known
   /// to *not* be interpreted as Markdown or HTML tags,  and anything else that
   /// looks like an HTML tag.
   /// Because [RegExp.allMatches] matches do not overlap, including the
   /// non-HTML sections in the same RegExp ensures that the HTML tag match
   /// will not be matched against the non-HTML content.
   static final _markdownTokenPattern = RegExp(
     // Escaped Markdown character, including `\<` and `\\`.
     r'\\.'
     // Or a Markdown code span, from "`"*N to "`"*N.
     // Also matches an unterminated start tag to avoid "```a``"
     // being matched as "``a``".
     // The ```-sequence is atomic.
     r'|(?<cq>`+)(?:[^]+?\k<cq>)?'
     // Or autolink, starting with scheme + `:`, followed by non-whitespace/
     // control characters until a closing `>`.
     r'|<[a-z][a-z\d\-+.]+:[^\x00-\x20\x7f<>]*>'
     // Or HTML comments.
     r'|<!--(?:-?>|[^]*?-->)'
     // Or HTML declarations, like `<!DOCTYPE ...>`.
     r'|<![a-z][^]*?!>'
     // Or HTML processing instructions.
     r'|<\?[^]*?\?>'
     // Or HTML CDATA sections sections.
     r'|<\[CDATA[^]*\]>'
     // Or plain `[...]` which DartDoc interprets as Dart source links,
     // and which can contain type parameters like `... [List<int>] ...`.
     // Here recognized as `[...]` with no `]` inside, not preceded by `]`
     // or followed by `(` or `[`.
     r'|(?<!\])\[[^\]]*\](?![(\[])'
     // Or valid HTML tag.
     // Matches `<validTag>`, `<validTag ...>`, `<validTag/>`, `</validTag>`
     // and `</validTag ...>.
     r'|<(?<et>/?)(?:'
     '${_validHtmlTags.join('|')}'
     r')'
     r'(?:/(?=\k<et>)>|>|[\x20\r\n\t][^]*?>)'
     // Or any of the following matches which are considered invalid tags.
     // If the "nh" capture group is participating, one of these matched.
     r'|(?<nh>)(?:'
     // Any other `</?tag ...>` sequence.
     r'</?[a-z][^]*?>'
     r')',
     caseSensitive: false,
   );

   final LintRule rule;

   _Visitor(this.rule);

   @override
   void visitComment(Comment node) {
     var codeBlockLines = node.codeBlocks
         .map((codeBlock) => codeBlock.lines)
         .flattened;

     for (var token in node.tokens) {
       // Make sure that the current doc comment line isn't contained in a code
       // block.
       var offsetAfterSlash = token.offset + 3;
       var inCodeBlock = codeBlockLines.any(
         (codeBlockLine) =>
             codeBlockLine.offset <= offsetAfterSlash &&
             offsetAfterSlash <= codeBlockLine.offset + codeBlockLine.length,
       );
       if (inCodeBlock) continue;

       var tags = _findUnintendedHtmlTags(token.lexeme);
       for (var tag in tags) {
         rule.reportAtOffset(token.offset + tag.offset, tag.length);
       }
     }
   }

   /// Finds tags that are not valid HTML tags, not contained in a code span, and
   /// are not autolinks.
   List<_UnintendedTag> _findUnintendedHtmlTags(String text) {
     var matches = <_UnintendedTag>[];
     for (var match in _markdownTokenPattern.allMatches(text)) {
       if (match.namedGroup('nh') != null) {
         matches.add(_UnintendedTag(match.start, match.end - match.start));
       }
     }
     return matches;
   }
 }
	// Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file
	// for details. All rights reserved. Use of this source code is governed by a
	// BSD-style license that can be found in the LICENSE file.

	import 'package:analyzer/analysis_rule/rule_context.dart';
	import 'package:analyzer/analysis_rule/rule_visitor_registry.dart';
	import 'package:analyzer/dart/ast/ast.dart';
	import 'package:analyzer/dart/ast/visitor.dart';
	import 'package:analyzer/error/error.dart';
	import 'package:collection/collection.dart';

	import '../analyzer.dart';

	const _desc =
	r'Use of angle brackets in a doc comment is treated as HTML by '
	'Markdown.';

	/// Valid HTML tags that should not be linted.
	///
	/// These tags are from
	/// [CommonMark 0.30](https://spec.commonmark.org/0.30/#raw-html).
	const _validHtmlTags = [
	'a',
	'abbr',
	'address',
	'area',
	'article',
	'aside',
	'audio',
	'b',
	'bdi',
	'bdo',
	'blockquote',
	'br',
	'button',
	'canvas',
	'caption',
	'cite',
	'code',
	'col',
	'colgroup',
	'data',
	'datalist',
	'dd',
	'del',
	'dfn',
	'div',
	'dl',
	'dt',
	'em',
	'fieldset',
	'figcaption',
	'figure',
	'footer',
	'form',
	'h1',
	'h2',
	'h3',
	'h4',
	'h5',
	'h6',
	'header',
	'hr',
	'i',
	'iframe',
	'img',
	'input',
	'ins',
	'kbd',
	'keygen',
	'label',
	'legend',
	'li',
	'link',
	'main',
	'map',
	'mark',
	'meta',
	'meter',
	'nav',
	'noscript',
	'object',
	'ol',
	'optgroup',
	'option',
	'output',
	'p',
	'param',
	'pre',
	'progress',
	'q',
	's',
	'samp',
	'script',
	'section',
	'select',
	'small',
	'source',
	'span',
	'strong',
	'style',
	'sub',
	'sup',
	'table',
	'tbody',
	'td',
	'template',
	'textarea',
	'tfoot',
	'th',
	'thead',
	'time',
	'title',
	'tr',
	'track',
	'u',
	'ul',
	'var',
	'video',
	'wbr',
	];

	class UnintendedHtmlInDocComment extends LintRule {
	UnintendedHtmlInDocComment()
	: super(name: LintNames.unintended_html_in_doc_comment, description: _desc);

	@override
	DiagnosticCode get diagnosticCode =>
	LinterLintCode.unintendedHtmlInDocComment;

	@override
	void registerNodeProcessors(
	RuleVisitorRegistry registry,
	RuleContext context,
	) {
	var visitor = _Visitor(this);
	registry.addComment(this, visitor);
	}
	}

	/// Represents the [offset] and [length] of an unintended HTML tag in a doc
	/// comment.
	class _UnintendedTag {
	final int offset;
	final int length;
	_UnintendedTag(this.offset, this.length);
	}

	class _Visitor extends SimpleAstVisitor<void> {
	/// Pattern for HTML-tags and non-HTML regions.
	///
	/// Pattern which matches sequences of characters with content that is known
	/// to not be interpreted as Markdown or HTML tags, and anything else that
	/// looks like an HTML tag.
	/// Because [RegExp.allMatches] matches do not overlap, including the
	/// non-HTML sections in the same RegExp ensures that the HTML tag match
	/// will not be matched against the non-HTML content.
	static final _markdownTokenPattern = RegExp(
	// Escaped Markdown character, including `\<` and `\\`.
	r'\\.'
	// Or a Markdown code span, from "`"N to "`"N.
	// Also matches an unterminated start tag to avoid "```a``"
	// being matched as "``a``".
	// The ```-sequence is atomic.
	r'\|(?<cq>`+)(?:[^]+?\k<cq>)?'
	// Or autolink, starting with scheme + `:`, followed by non-whitespace/
	// control characters until a closing `>`.
	r'\|<[a-z][a-z\d\-+.]+:[^\x00-\x20\x7f<>]*>'
	// Or HTML comments.
	r'\|<!--(?:-?>\|[^]*?-->)'
	// Or HTML declarations, like `<!DOCTYPE ...>`.
	r'\|<![a-z][^]*?!>'
	// Or HTML processing instructions.
	r'\|<\?[^]*?\?>'
	// Or HTML CDATA sections sections.
	r'\|<\[CDATA[^]*\]>'
	// Or plain `[...]` which DartDoc interprets as Dart source links,
	// and which can contain type parameters like `... [List<int>] ...`.
	// Here recognized as `[...]` with no `]` inside, not preceded by `]`
	// or followed by `(` or `[`.
	r'\|(?<!\])\[[^\]]*\](?![(\[])'
	// Or valid HTML tag.
	// Matches `<validTag>`, `<validTag ...>`, `<validTag/>`, `</validTag>`
	// and `</validTag ...>.
	r'\|<(?<et>/?)(?:'
	'${_validHtmlTags.join('\|')}'
	r')'
	r'(?:/(?=\k<et>)>\|>\|[\x20\r\n\t][^]*?>)'
	// Or any of the following matches which are considered invalid tags.
	// If the "nh" capture group is participating, one of these matched.
	r'\|(?<nh>)(?:'
	// Any other `</?tag ...>` sequence.
	r'</?[a-z][^]*?>'
	r')',
	caseSensitive: false,
	);

	final LintRule rule;

	_Visitor(this.rule);

	@override
	void visitComment(Comment node) {
	var codeBlockLines = node.codeBlocks
	.map((codeBlock) => codeBlock.lines)
	.flattened;

	for (var token in node.tokens) {
	// Make sure that the current doc comment line isn't contained in a code
	// block.
	var offsetAfterSlash = token.offset + 3;
	var inCodeBlock = codeBlockLines.any(
	(codeBlockLine) =>
	codeBlockLine.offset <= offsetAfterSlash &&
	offsetAfterSlash <= codeBlockLine.offset + codeBlockLine.length,
	);
	if (inCodeBlock) continue;

	var tags = _findUnintendedHtmlTags(token.lexeme);
	for (var tag in tags) {
	rule.reportAtOffset(token.offset + tag.offset, tag.length);
	}
	}
	}

	/// Finds tags that are not valid HTML tags, not contained in a code span, and
	/// are not autolinks.
	List<_UnintendedTag> _findUnintendedHtmlTags(String text) {
	var matches = <_UnintendedTag>[];
	for (var match in _markdownTokenPattern.allMatches(text)) {
	if (match.namedGroup('nh') != null) {
	matches.add(_UnintendedTag(match.start, match.end - match.start));
	}
	}
	return matches;
	}
	}