[linter] Add tests and make regex static for unintended_html lint. Follow up to https://dart-review.googlesource.com/c/sdk/+/379519 Adding a few more tests and making sure the regex isn't instantiated every time we check. Added some tests from https://github.com/dart-lang/sdk/issues/56450 as well. Change-Id: Ief895cdfb5fa1049ce4599e2241577e91bc5701a Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/380320 Reviewed-by: Brian Wilkerson <brianwilkerson@google.com> Commit-Queue: Kallen Tu <kallentu@google.com>

commit: 64969c74b0f65aec3717d3eeba07a05c5f0748e1 [log] [tgz]
author: Kallen Tu <kallentu@google.com> Tue Aug 13 17:32:58 2024 +0000
committer: Commit Queue <dart-scoped@luci-project-accounts.iam.gserviceaccount.com> Tue Aug 13 17:32:58 2024 +0000
tree: 8205cfff76fdda7ca0ec62265df5d3a914d12ce4
parent: baa9a043d2478b131d6016f8122b60ae3a857bd1 [diff]
diff --git a/pkg/linter/lib/src/rules/unintended_html_in_doc_comment.dart b/pkg/linter/lib/src/rules/unintended_html_in_doc_comment.dart
index 768f844..7b5fda3 100644
--- a/pkg/linter/lib/src/rules/unintended_html_in_doc_comment.dart
+++ b/pkg/linter/lib/src/rules/unintended_html_in_doc_comment.dart

@@ -208,6 +208,46 @@
 }
 
 class _Visitor extends SimpleAstVisitor<void> {
+  static final _markdownTokenPattern = RegExp(
+      // Escaped Markdown character.
+      r'\\.'
+
+      // Or code span, from "`"*N to "`"*N or just the start if it's
+      // unterminated, to avoid "```a``" matching the "``a``".
+      // The ```-sequence is atomic.
+      r'|(?<cq>`+)(?:[^]*?\k<cq>)?'
+
+      // Or autolink, start with scheme + `:`.
+      r'|<[a-z][a-z\d\-+.]+:[^\x00-\x20\x7f<>]*>'
+
+      // Or HTML comments.
+      r'|<!--(?:-?>|[^]*?-->)'
+
+      // Or HTML declarations.
+      r'|<![a-z][^]*?!>'
+
+      // Or HTML processing instructions.
+      r'|<\?[^]*?\?>'
+
+      // Or HTML CDATA sections sections.
+      r'|<\[CDATA[^]*\]>'
+
+      // Or valid HTML tag.
+      // Matches `<validTag>`, `<validTag ...>`, `<validTag/>`, `</validTag>`
+      // and `</validTag ...>.
+      r'|<(?<et>/?)(?:'
+      '${_validHtmlTags.join('|')}'
+      r')'
+      r'(?:/(?=\k<et>)>|>|[\x20\r\n\t][^]*?>)'
+
+      // Or any of the following matches which are considered invalid tags.
+      // If the "nh" capture group is participating, one of these matched.
+      r'|(?<nh>)(?:'
+
+      // Any other `</?tag ...>` sequence.
+      r'</?[a-z][^]*?>'
+      r')', caseSensitive: false);
+
   final LintRule rule;
 
   _Visitor(this.rule);
@@ -236,48 +276,8 @@
   /// Finds tags that are not valid HTML tags, not contained in a code span, and
   /// are not autolinks.
   List<_UnintendedTag> _findUnintendedHtmlTags(String text) {
-    var markdownTokenPattern = RegExp(
-        // Escaped Markdown character.
-        r'\\.'
-
-        // Or code span, from "`"*N to "`"*N or just the start if it's
-        // unterminated, to avoid "```a``" matching the "``a``".
-        // The ```-sequence is atomic.
-        r'|(?<cq>`+)(?:[^]*?\k<cq>)?'
-
-        // Or autolink, start with scheme + `:`.
-        r'|<[a-z][a-z\d\-+.]+:[^\x00-\x20\x7f<>]*>'
-
-        // Or HTML comments.
-        r'|<!--(?:-?>|[^]*?-->)'
-
-        // Or HTML declarations.
-        r'|<![a-z][^]*?!>'
-
-        // Or HTML processing instructions.
-        r'|<\?[^]*?\?>'
-
-        // Or HTML CDATA sections sections.
-        r'|<\[CDATA[^]*\]>'
-
-        // Or valid HTML tag.
-        // Matches `<validTag>`, `<validTag ...>`, `<validTag/>`, `</validTag>`
-        // and `</validTag ...>.
-        r'|<(?<et>/?)(?:'
-        '${_validHtmlTags.join('|')}'
-        r')'
-        r'(?:/(?=\k<et>)>|>|[\x20\r\n\t][^]*?>)'
-
-        // Or any of the following matches which are considered invalid tags.
-        // If the "nh" capture group is participating, one of these matched.
-        r'|(?<nh>)(?:'
-
-        // Any other `</?tag ...>` sequence.
-        r'</?[a-z][^]*?>'
-        r')', caseSensitive: false);
-
     var matches = <_UnintendedTag>[];
-    for (var match in markdownTokenPattern.allMatches(text)) {
+    for (var match in _markdownTokenPattern.allMatches(text)) {
       if (match.namedGroup('nh') != null) {
         matches.add(_UnintendedTag(match.start, match.end - match.start));
       }

diff --git a/pkg/linter/test/rules/unintended_html_in_doc_comment_test.dart b/pkg/linter/test/rules/unintended_html_in_doc_comment_test.dart
index 09bb8c9..3c7eae1 100644
--- a/pkg/linter/test/rules/unintended_html_in_doc_comment_test.dart
+++ b/pkg/linter/test/rules/unintended_html_in_doc_comment_test.dart

@@ -90,6 +90,13 @@
 ''');
   }
 
+  test_html_cData_nested() async {
+    await assertNoDiagnostics(r'''
+/// <[CDATA[<bad>]]>
+class C {}
+''');
+  }
+
   test_html_comment() async {
     await assertNoDiagnostics(r'''
 /// <!--comment-->
@@ -97,6 +104,13 @@
 ''');
   }
 
+  test_html_comment_nested() async {
+    await assertNoDiagnostics(r'''
+/// <!--<bad>-->
+class C {}
+''');
+  }
+
   test_html_declaration() async {
     await assertNoDiagnostics(r'''
 /// <!DOCTYPE html>
@@ -111,6 +125,23 @@
 ''');
   }
 
+  test_html_processingInstruction_nested() async {
+    await assertNoDiagnostics(r'''
+/// <?<bad>?>
+class C {}
+''');
+  }
+
+  test_html_tags_valid() async {
+    await assertNoDiagnostics(r'''
+/// <table class="properties">
+/// <th scope="row">
+/// <br />
+/// <h1> Test. </h1>
+class C {}
+''');
+  }
+
   test_notDocComment() async {
     await assertNoDiagnostics(r'''
 // List<int> <tag>
@@ -183,17 +214,6 @@
     ]);
   }
 
-  test_unintendedHtml_multipleTags() async {
-    await assertDiagnostics(r'''
-/// <assignment> -> <variable> = <expression>
-class C {}
-''', [
-      lint(4, 12), // <assignment>
-      lint(20, 10), // <variable>
-      lint(33, 12), // <expression>
-    ]);
-  }
-
   test_unintendedHtml_nested() async {
     await assertDiagnostics(r'''
 /// Text List<List<int>>.
@@ -223,10 +243,24 @@
     ]);
   }
 
-  test_validHtmlTag() async {
-    await assertNoDiagnostics(r'''
-/// <h1> Test. </h1>
+  test_unintendedHtml_tags_multiple() async {
+    await assertDiagnostics(r'''
+/// <assignment> -> <variable> = <expression>
 class C {}
-''');
+''', [
+      lint(4, 12), // <assignment>
+      lint(20, 10), // <variable>
+      lint(33, 12), // <expression>
+    ]);
+  }
+
+  test_unintendedHtml_tags_slash() async {
+    await assertDiagnostics(r'''
+/// </bad> <bad/>
+class C {}
+''', [
+      lint(4, 6), // </bad>
+      lint(11, 6), // <bad/>
+    ]);
   }
 }
commit	64969c74b0f65aec3717d3eeba07a05c5f0748e1	[log] [tgz]
author	Kallen Tu <kallentu@google.com>	Tue Aug 13 17:32:58 2024 +0000
committer	Commit Queue <dart-scoped@luci-project-accounts.iam.gserviceaccount.com>	Tue Aug 13 17:32:58 2024 +0000
tree	8205cfff76fdda7ca0ec62265df5d3a914d12ce4
parent	baa9a043d2478b131d6016f8122b60ae3a857bd1 [diff]