Reapply "Avoid computing the URI scanner tables at runtime." This reverts commit 855e1cd975feae07822187f226568130a0fcfbe9. The blocking issue in internal test code is assumed fixed. Change-Id: I74e0be130d149a45f77dc90c354916308b76b741 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/255248 Commit-Queue: Lasse Nielsen <lrn@google.com> Reviewed-by: Martin Kustermann <kustermann@google.com> Auto-Submit: Lasse Nielsen <lrn@google.com>

commit: 4c7110332e2c81366b6e85cb231a59840e370cd6 [log] [tgz]
author: Lasse R.H. Nielsen <lrn@google.com> Tue Aug 16 14:53:07 2022 +0000
committer: Commit Bot <commit-bot@chromium.org> Tue Aug 16 14:53:07 2022 +0000
tree: d85e3ed203ba2114b88c99baed65290f7f427fd7
parent: 91c88ab06710b4e6ec6bec4b658a46e9c37b9b61 [diff]
diff --git a/sdk/lib/core/uri.dart b/sdk/lib/core/uri.dart
index bb84862..b465edf 100644
--- a/sdk/lib/core/uri.dart
+++ b/sdk/lib/core/uri.dart

@@ -3977,6 +3977,10 @@
   static const _uricTable = _Uri._queryCharTable;
 }
 
+// --- URI PARSER TABLE --- start --- generated code, do not edit ---
+// Use tools/generate_uri_parser_tables.dart to generate this code
+// if necessary.
+
 // --------------------------------------------------------------------
 // Constants used to read the scanner result.
 // The indices points into the table filled by [_scan] which contains
@@ -4005,7 +4009,7 @@
 const int _notSimpleIndex = 7;
 
 // Initial state for scanner.
-const int _uriStart = 00;
+const int _uriStart = 0;
 
 // If scanning of a URI terminates in this state or above,
 // consider the URI non-simple
@@ -4014,14 +4018,15 @@
 // Initial state for scheme validation.
 const int _schemeStart = 20;
 
+// --------------------------------------------------------------------
 /// Transition tables are used to scan a URI to determine its structure.
 ///
 /// The tables represent a state machine with output.
 ///
 /// To scan the URI, start in the [_uriStart] state, then read each character
 /// of the URI in order, from start to end, and for each character perform a
-/// transition to a new state while writing the current position into the output
-/// buffer at a designated index.
+/// transition to a new state while writing the current position
+/// into the output buffer at a designated index.
 ///
 /// Each state, represented by an integer which is an index into
 /// [_scannerTables], has a set of transitions, one for each character.
@@ -4030,320 +4035,133 @@
 ///
 /// For URI scanning, only characters in the range U+0020 through U+007E are
 /// interesting; all characters outside that range are treated the same.
-/// The tables only contain 96 entries, representing the characters in the
-/// interesting range, plus one more to represent all values outside the range.
-/// The character entries are stored in one `Uint8List` per state, with the
-/// transition for a character at position `character ^ 0x60`,
+/// The tables only contain 96 entries, representing the 95 characters in the
+/// interesting range, and one entry for all values outside the range.
+/// The character entries are stored in one `String` of 96 characters per state,
+/// with the transition for a character at position `character ^ 0x60`,
 /// which maps the range U+0020 .. U+007F into positions 0 .. 95.
 /// All remaining characters are mapped to position 31 (`0x7f ^ 0x60`), which
 /// represents the transition for all remaining characters.
-final List<Uint8List> _scannerTables = _createTables();
-
-// ----------------------------------------------------------------------
-// Code to create the URI scanner table.
-
-/// Creates the tables for [_scannerTables] used by [Uri.parse].
-///
-/// See [_scannerTables] for the generated format.
-///
-/// The concrete tables are chosen as a trade-off between the number of states
-/// needed and the precision of the result.
-/// This allows definitely recognizing the general structure of the URI
-/// (presence and location of scheme, user-info, host, port, path, query and
-/// fragment) while at the same time detecting that some components are not
-/// in canonical form (anything containing a `%`, a host-name containing a
-/// capital letter). Since the scanner doesn't know whether something is a
-/// scheme or a path until it sees `:`, or user-info or host until it sees
-/// a `@`, a second pass is needed to validate the scheme and any user-info
-/// is considered non-canonical by default.
-///
-/// The states (starting from [_uriStart]) write positions while scanning
-/// a string from `start` to `end` as follows:
-///
-/// - [_schemeEndIndex]: Should be initialized to `start-1`.
-///   If the URI has a scheme, it is set to the position of the `:` after
-///   the scheme.
-/// - [_hostStartIndex]: Should be initialized to `start - 1`.
-///   If the URI has an authority, it is set to the character before the
-///   host name - either the second `/` in the `//` leading the authority,
-///   or the `@` after a user-info. Comparing this value to the scheme end
-///   position can be used to detect that there is a user-info component.
-/// - [_portStartIndex]: Should be initialized to `start`.
-///   Set to the position of the last `:` in an authority, and unchanged
-///   if there is no authority or no `:` in an authority.
-///   If this position is after the host start, there is a port, otherwise it
-///   is just marking a colon in the user-info component.
-/// - [_pathStartIndex]: Should be initialized to `start`.
-///   Is set to the first path character unless the path is empty.
-///   If the path is empty, the position is either unchanged (`start`) or
-///   the first slash of an authority. So, if the path start is before a
-///   host start or scheme end, the path is empty.
-/// - [_queryStartIndex]: Should be initialized to `end`.
-///   The position of the `?` leading a query if the URI contains a query.
-/// - [_fragmentStartIndex]: Should be initialized to `end`.
-///   The position of the `#` leading a fragment if the URI contains a fragment.
-/// - [_notSimpleIndex]: Should be initialized to `start - 1`.
-///   Set to another value if the URI is considered "not simple".
-///   This is elaborated below.
-///
-/// # Simple URIs
-/// A URI is considered "simple" if it is in a normalized form containing no
-/// escapes. This allows us to skip normalization and checking whether escapes
-/// are valid, and to extract components without worrying about unescaping.
-///
-/// The scanner computes a conservative approximation of being "simple".
-/// It rejects any URI with an escape, with a user-info component (mainly
-/// because they are rare and would increase the number of states in the
-/// scanner significantly), with an IPV6 host or with a capital letter in
-/// the scheme or host name (the scheme is handled in a second scan using
-/// a separate two-state table).
-/// Further, paths containing `..` or `.` path segments are considered
-/// non-simple except for pure relative paths (no scheme or authority) starting
-/// with a sequence of "../" segments.
-///
-/// The transition tables cannot detect a trailing ".." in the path,
-/// followed by a query or fragment, because the segment is not known to be
-/// complete until we are past it, and we then need to store the query/fragment
-/// start instead. This cast is checked manually post-scanning (such a path
-/// needs to be normalized to end in "../", so the URI shouldn't be considered
-/// simple).
-List<Uint8List> _createTables() {
-  // TODO(lrn): Use a precomputed table.
-
-  // Total number of states for the scanner.
-  const int stateCount = 22;
-
-  // States used to scan a URI from scratch.
-  const int schemeOrPath = 01;
-  const int authOrPath = 02;
-  const int authOrPathSlash = 03;
-  const int uinfoOrHost0 = 04;
-  const int uinfoOrHost = 05;
-  const int uinfoOrPort0 = 06;
-  const int uinfoOrPort = 07;
-  const int ipv6Host = 08;
-  const int relPathSeg = 09;
-  const int pathSeg = 10;
-  const int path = 11;
-  const int query = 12;
-  const int fragment = 13;
-  const int schemeOrPathDot = 14;
-  const int schemeOrPathDot2 = 15;
-  const int relPathSegDot = 16;
-  const int relPathSegDot2 = 17;
-  const int pathSegDot = 18;
-  const int pathSegDot2 = 19;
-
-  // States used to validate a scheme after its end position has been found.
-  const int scheme0 = _schemeStart;
-  const int scheme = 21;
-
-  // Constants encoding the write-index for the state transition into the top 5
-  // bits of a byte.
-  const int schemeEnd = _schemeEndIndex << 5;
-  const int hostStart = _hostStartIndex << 5;
-  const int portStart = _portStartIndex << 5;
-  const int pathStart = _pathStartIndex << 5;
-  const int queryStart = _queryStartIndex << 5;
-  const int fragmentStart = _fragmentStartIndex << 5;
-  const int notSimple = _notSimpleIndex << 5;
-
-  /// The `unreserved` characters of RFC 3986.
-  const unreserved =
-      "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-._~";
-
-  /// The `sub-delim` characters of RFC 3986.
-  const subDelims = r"!$&'()*+,;=";
-  // The `pchar` characters of RFC 3986: characters that may occur in a path,
-  // excluding escapes.
-  const pchar = "$unreserved$subDelims";
-
-  var tables = List<Uint8List>.generate(stateCount, (_) => Uint8List(96));
-
-  // Helper function which initialize the table for [state] with a default
-  // transition and returns the table.
-  Uint8List build(state, defaultTransition) =>
-      tables[state]..fillRange(0, 96, defaultTransition);
-
-  // Helper function which sets the transition for each character in [chars]
-  // to [transition] in the [target] table.
-  // The [chars] string must contain only characters in the U+0020 .. U+007E
-  // range.
-  void setChars(Uint8List target, String chars, int transition) {
-    for (int i = 0; i < chars.length; i++) {
-      var char = chars.codeUnitAt(i);
-      target[char ^ 0x60] = transition;
-    }
-  }
-
-  /// Helper function which sets the transition for all characters in the
-  /// range from `range[0]` to `range[1]` to [transition] in the [target] table.
-  ///
-  /// The [range] must be a two-character string where both characters are in
-  /// the U+0020 .. U+007E range and the former character must have a lower
-  /// code point than the latter.
-  void setRange(Uint8List target, String range, int transition) {
-    for (int i = range.codeUnitAt(0), n = range.codeUnitAt(1); i <= n; i++) {
-      target[i ^ 0x60] = transition;
-    }
-  }
-
-  // Create the transitions for each state.
-  var b;
-
-  // Validate as path, if it is a scheme, we handle it later.
-  b = build(_uriStart, schemeOrPath | notSimple);
-  setChars(b, pchar, schemeOrPath);
-  setChars(b, ".", schemeOrPathDot);
-  setChars(b, ":", authOrPath | schemeEnd); // Handle later.
-  setChars(b, "/", authOrPathSlash);
-  setChars(b, "?", query | queryStart);
-  setChars(b, "#", fragment | fragmentStart);
-
-  b = build(schemeOrPathDot, schemeOrPath | notSimple);
-  setChars(b, pchar, schemeOrPath);
-  setChars(b, ".", schemeOrPathDot2);
-  setChars(b, ':', authOrPath | schemeEnd);
-  setChars(b, "/", pathSeg | notSimple);
-  setChars(b, "?", query | queryStart);
-  setChars(b, "#", fragment | fragmentStart);
-
-  b = build(schemeOrPathDot2, schemeOrPath | notSimple);
-  setChars(b, pchar, schemeOrPath);
-  setChars(b, "%", schemeOrPath | notSimple);
-  setChars(b, ':', authOrPath | schemeEnd);
-  setChars(b, "/", relPathSeg);
-  setChars(b, "?", query | queryStart);
-  setChars(b, "#", fragment | fragmentStart);
-
-  b = build(schemeOrPath, schemeOrPath | notSimple);
-  setChars(b, pchar, schemeOrPath);
-  setChars(b, ':', authOrPath | schemeEnd);
-  setChars(b, "/", pathSeg);
-  setChars(b, "?", query | queryStart);
-  setChars(b, "#", fragment | fragmentStart);
-
-  b = build(authOrPath, path | notSimple);
-  setChars(b, pchar, path | pathStart);
-  setChars(b, "/", authOrPathSlash | pathStart);
-  setChars(b, ".", pathSegDot | pathStart);
-  setChars(b, "?", query | queryStart);
-  setChars(b, "#", fragment | fragmentStart);
-
-  b = build(authOrPathSlash, path | notSimple);
-  setChars(b, pchar, path);
-  setChars(b, "/", uinfoOrHost0 | hostStart);
-  setChars(b, ".", pathSegDot);
-  setChars(b, "?", query | queryStart);
-  setChars(b, "#", fragment | fragmentStart);
-
-  b = build(uinfoOrHost0, uinfoOrHost | notSimple);
-  setChars(b, pchar, uinfoOrHost);
-  setRange(b, "AZ", uinfoOrHost | notSimple);
-  setChars(b, ":", uinfoOrPort0 | portStart);
-  setChars(b, "@", uinfoOrHost0 | hostStart);
-  setChars(b, "[", ipv6Host | notSimple);
-  setChars(b, "/", pathSeg | pathStart);
-  setChars(b, "?", query | queryStart);
-  setChars(b, "#", fragment | fragmentStart);
-
-  b = build(uinfoOrHost, uinfoOrHost | notSimple);
-  setChars(b, pchar, uinfoOrHost);
-  setRange(b, "AZ", uinfoOrHost | notSimple);
-  setChars(b, ":", uinfoOrPort0 | portStart);
-  setChars(b, "@", uinfoOrHost0 | hostStart);
-  setChars(b, "/", pathSeg | pathStart);
-  setChars(b, "?", query | queryStart);
-  setChars(b, "#", fragment | fragmentStart);
-
-  b = build(uinfoOrPort0, uinfoOrPort | notSimple);
-  setRange(b, "19", uinfoOrPort);
-  setChars(b, "@", uinfoOrHost0 | hostStart);
-  setChars(b, "/", pathSeg | pathStart);
-  setChars(b, "?", query | queryStart);
-  setChars(b, "#", fragment | fragmentStart);
-
-  b = build(uinfoOrPort, uinfoOrPort | notSimple);
-  setRange(b, "09", uinfoOrPort);
-  setChars(b, "@", uinfoOrHost0 | hostStart);
-  setChars(b, "/", pathSeg | pathStart);
-  setChars(b, "?", query | queryStart);
-  setChars(b, "#", fragment | fragmentStart);
-
-  b = build(ipv6Host, ipv6Host);
-  setChars(b, "]", uinfoOrHost);
-
-  b = build(relPathSeg, path | notSimple);
-  setChars(b, pchar, path);
-  setChars(b, ".", relPathSegDot);
-  setChars(b, "/", pathSeg | notSimple);
-  setChars(b, "?", query | queryStart);
-  setChars(b, "#", fragment | fragmentStart);
-
-  b = build(relPathSegDot, path | notSimple);
-  setChars(b, pchar, path);
-  setChars(b, ".", relPathSegDot2);
-  setChars(b, "/", pathSeg | notSimple);
-  setChars(b, "?", query | queryStart);
-  setChars(b, "#", fragment | fragmentStart);
-
-  b = build(relPathSegDot2, path | notSimple);
-  setChars(b, pchar, path);
-  setChars(b, "/", relPathSeg);
-  setChars(b, "?", query | queryStart); // This should be non-simple.
-  setChars(b, "#", fragment | fragmentStart); // This should be non-simple.
-
-  b = build(pathSeg, path | notSimple);
-  setChars(b, pchar, path);
-  setChars(b, ".", pathSegDot);
-  setChars(b, "/", pathSeg | notSimple);
-  setChars(b, "?", query | queryStart);
-  setChars(b, "#", fragment | fragmentStart);
-
-  b = build(pathSegDot, path | notSimple);
-  setChars(b, pchar, path);
-  setChars(b, ".", pathSegDot2);
-  setChars(b, "/", pathSeg | notSimple);
-  setChars(b, "?", query | queryStart);
-  setChars(b, "#", fragment | fragmentStart);
-
-  b = build(pathSegDot2, path | notSimple);
-  setChars(b, pchar, path);
-  setChars(b, "/", pathSeg | notSimple);
-  setChars(b, "?", query | queryStart);
-  setChars(b, "#", fragment | fragmentStart);
-
-  b = build(path, path | notSimple);
-  setChars(b, pchar, path);
-  setChars(b, "/", pathSeg);
-  setChars(b, "?", query | queryStart);
-  setChars(b, "#", fragment | fragmentStart);
-
-  b = build(query, query | notSimple);
-  setChars(b, pchar, query);
-  setChars(b, "?", query);
-  setChars(b, "#", fragment | fragmentStart);
-
-  b = build(fragment, fragment | notSimple);
-  setChars(b, pchar, fragment);
-  setChars(b, "?", fragment);
-
-  // A separate two-state validator for lower-case scheme names.
-  // Any non-scheme character or upper-case letter is marked as non-simple.
-  b = build(scheme0, scheme | notSimple);
-  setRange(b, "az", scheme);
-
-  b = build(scheme, scheme | notSimple);
-  setRange(b, "az", scheme);
-  setRange(b, "09", scheme);
-  setChars(b, "+-.", scheme);
-
-  return tables;
-}
+const String _scannerTables = "\xE1\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
+    "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\xE1\xE1\xE1"
+    "\x01\xE1\xE1\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
+    "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\xE1\xE1\xE1\xE1\x01\xE1\x01"
+    "\xE1\xCD\x01\xE1\x01\x01\x01\x01\x01\x01\x01\x01\x0E\x03\x01\x01\x01\x01"
+    "\x01\x01\x01\x01\x01\x01\x22\x01\xE1\x01\xE1\xAC\xE1\x01\x01\x01\x01\x01"
+    "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
+    "\x01\x01\x01\xE1\xE1\xE1\x01\xE1\xE1\x01\x01\x01\x01\x01\x01\x01\x01\x01"
+    "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\xE1"
+    "\xE1\xE1\xE1\x01\xE1\x01\xE1\xCD\x01\xE1\x01\x01\x01\x01\x01\x01\x01\x01"
+    "\x01\x0A\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x22\x01\xE1\x01\xE1\xAC"
+    "\xEB\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B"
+    "\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\xEB\xEB\xEB\x8B\xEB\xEB\x8B\x8B\x8B"
+    "\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B"
+    "\x8B\x8B\x8B\x8B\x8B\xEB\xEB\xEB\xEB\x8B\xEB\x8B\xEB\xCD\x8B\xEB\x8B\x8B"
+    "\x8B\x8B\x8B\x8B\x8B\x8B\x92\x83\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B\x8B"
+    "\xEB\x8B\xEB\x8B\xEB\xAC\xEB\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\xEB\xEB\xEB"
+    "\x0B\xEB\xEB\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\xEB\xEB\xEB\xEB\x0B\xEB\x0B"
+    "\xEB\xCD\x0B\xEB\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x12\x44\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\xEB\x0B\xEB\x0B\xEB\xAC\xE5\x05\x05\x05\x05\x05"
+    "\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05"
+    "\x05\x05\x05\xE5\xE5\xE5\x05\xE5\x44\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5"
+    "\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE8"
+    "\xE5\xE5\xE5\x05\xE5\x05\xE5\xCD\x05\xE5\x05\x05\x05\x05\x05\x05\x05\x05"
+    "\x05\x8A\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x66\x05\xE5\x05\xE5\xAC"
+    "\xE5\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05"
+    "\x05\x05\x05\x05\x05\x05\x05\x05\x05\xE5\xE5\xE5\x05\xE5\x44\xE5\xE5\xE5"
+    "\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5"
+    "\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\xE5\x05\xE5\x05\xE5\xCD\x05\xE5\x05\x05"
+    "\x05\x05\x05\x05\x05\x05\x05\x8A\x05\x05\x05\x05\x05\x05\x05\x05\x05\x05"
+    "\x66\x05\xE5\x05\xE5\xAC\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7"
+    "\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7"
+    "\xE7\xE7\x44\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7"
+    "\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7"
+    "\xE7\xCD\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\x8A\xE7\x07\x07\x07"
+    "\x07\x07\x07\x07\x07\x07\xE7\xE7\xE7\xE7\xE7\xAC\xE7\xE7\xE7\xE7\xE7\xE7"
+    "\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7"
+    "\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\x44\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7"
+    "\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7"
+    "\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xCD\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7\xE7"
+    "\xE7\x8A\x07\x07\x07\x07\x07\x07\x07\x07\x07\x07\xE7\xE7\xE7\xE7\xE7\xAC"
+    "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08"
+    "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08"
+    "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08"
+    "\x08\x08\x08\x08\x08\x08\x08\x05\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08"
+    "\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08\x08"
+    "\x08\x08\x08\x08\x08\x08\xEB\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\xEB\xEB\xEB"
+    "\x0B\xEB\xEB\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\xEB\xEB\xEB\xEB\x0B\xEB\x0B"
+    "\xEB\xCD\x0B\xEB\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x10\xEA\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\xEB\x0B\xEB\x0B\xEB\xAC\xEB\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\xEB\xEB\xEB\x0B\xEB\xEB\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\xEB"
+    "\xEB\xEB\xEB\x0B\xEB\x0B\xEB\xCD\x0B\xEB\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x12\xEA\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\xEB\x0B\xEB\x0B\xEB\xAC"
+    "\xEB\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\xEB\xEB\xEB\x0B\xEB\xEB\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\xEB\xEB\xEB\xEB\x0B\xEB\x0B\xEB\xCD\x0B\xEB\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0A\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\xEB\x0B\xEB\x0B\xEB\xAC\xEC\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C"
+    "\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\xEC\xEC\xEC"
+    "\x0C\xEC\xEC\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C"
+    "\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\xEC\xEC\xEC\xEC\x0C\xEC\x0C"
+    "\xEC\xCD\x0C\xEC\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\x0C\xEC\x0C\x0C\x0C\x0C"
+    "\x0C\x0C\x0C\x0C\x0C\x0C\xEC\x0C\xEC\x0C\xEC\x0C\xED\x0D\x0D\x0D\x0D\x0D"
+    "\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D"
+    "\x0D\x0D\x0D\xED\xED\xED\x0D\xED\xED\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D"
+    "\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\xED"
+    "\xED\xED\xED\x0D\xED\x0D\xED\xED\x0D\xED\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D"
+    "\x0D\xED\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\x0D\xED\x0D\xED\x0D\xED\x0D"
+    "\xE1\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
+    "\x01\x01\x01\x01\x01\x01\x01\x01\x01\xE1\xE1\xE1\x01\xE1\xE1\x01\x01\x01"
+    "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
+    "\x01\x01\x01\x01\x01\xE1\xE1\xE1\xE1\x01\xE1\x01\xE1\xCD\x01\xE1\x01\x01"
+    "\x01\x01\x01\x01\x01\x01\x0F\xEA\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
+    "\x22\x01\xE1\x01\xE1\xAC\xE1\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
+    "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\xE1\xE1\xE1"
+    "\x01\xE1\xE1\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
+    "\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\xE1\xE1\xE1\xE1\x01\xE1\x01"
+    "\xE1\xCD\x01\xE1\x01\x01\x01\x01\x01\x01\x01\x01\x01\x09\x01\x01\x01\x01"
+    "\x01\x01\x01\x01\x01\x01\x22\x01\xE1\x01\xE1\xAC\xEB\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\xEB\xEB\xEB\x0B\xEB\xEB\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\xEB"
+    "\xEB\xEB\xEB\x0B\xEB\x0B\xEB\xCD\x0B\xEB\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x11\xEA\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\xEB\x0B\xEB\x0B\xEB\xAC"
+    "\xEB\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\xEB\xEB\xEB\x0B\xEB\xEB\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\xEB\xEB\xEB\xEB\x0B\xEB\x0B\xEB\xCD\x0B\xEB\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x09\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\xEB\x0B\xEB\x0B\xEB\xAC\xEB\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\xEB\xEB\xEB"
+    "\x0B\xEB\xEB\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\xEB\xEB\xEB\xEB\x0B\xEB\x0B"
+    "\xEB\xCD\x0B\xEB\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x13\xEA\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\xEB\x0B\xEB\x0B\xEB\xAC\xEB\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\xEB\xEB\xEB\x0B\xEB\xEB\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\xEB"
+    "\xEB\xEB\xEB\x0B\xEB\x0B\xEB\xCD\x0B\xEB\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B"
+    "\x0B\xEA\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\x0B\xEB\x0B\xEB\x0B\xEB\xAC"
+    "\xF5\x15\x15\x15\x15\x15\x15\x15\x15\x15\x15\x15\x15\x15\x15\x15\x15\x15"
+    "\x15\x15\x15\x15\x15\x15\x15\x15\x15\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5"
+    "\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5"
+    "\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5"
+    "\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5"
+    "\xF5\xF5\xF5\xF5\xF5\xF5\xF5\x15\x15\x15\x15\x15\x15\x15\x15\x15\x15\x15"
+    "\x15\x15\x15\x15\x15\x15\x15\x15\x15\x15\x15\x15\x15\x15\x15\xF5\xF5\xF5"
+    "\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5"
+    "\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5"
+    "\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\xF5\x15\xF5\x15\x15\xF5\x15\x15\x15\x15"
+    "\x15\x15\x15\x15\x15\x15\xF5\xF5\xF5\xF5\xF5\xF5";
 
 // --------------------------------------------------------------------
-// Code that uses the URI scanner table.
-
 /// Scan a string using the [_scannerTables] state machine.
 ///
 /// Scans [uri] from [start] to [end], starting in state [state] and
@@ -4351,20 +4169,20 @@
 ///
 /// Returns the final state.
 int _scan(String uri, int start, int end, int state, List<int> indices) {
-  var tables = _scannerTables;
+  const int stateTableSize = 96;
   assert(end <= uri.length);
   for (int i = start; i < end; i++) {
-    var table = tables[state];
     // Xor with 0x60 to move range 0x20-0x7f into 0x00-0x5f
     int char = uri.codeUnitAt(i) ^ 0x60;
     // Use 0x1f (nee 0x7f) to represent all unhandled characters.
     if (char > 0x5f) char = 0x1f;
-    int transition = table[char];
+    int transition = _scannerTables.codeUnitAt(state * stateTableSize + char);
     state = transition & 0x1f;
     indices[transition >> 5] = i;
   }
   return state;
 }
+// --- URI PARSER TABLE --- end --- generated code, do not edit ---
 
 class _SimpleUri implements Uri {
   final String _uri;

diff --git a/tools/generate_uri_parser_tables.dart b/tools/generate_uri_parser_tables.dart
new file mode 100644
index 0000000..419d955
--- /dev/null
+++ b/tools/generate_uri_parser_tables.dart

@@ -0,0 +1,568 @@
+// Copyright (c) 2022, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+// ----------------------------------------------------------------------
+// Code to create the URI scanner table used by `uri.dart`.
+//
+// This file exists in case someone, some day, will want to change the
+// representation of the tables, maybe if Dart gets `Uint8List` literals.
+// It should not otherwise be necessary to re-generate the tables.
+//
+// The table is stored in the `uri.dart` file as a 1-byte string literal.
+// This script generates the string literal and prints it on stdout.
+// If passed the `-u filename` flag, it instead updates the file directly.
+// The file should be the `sdk/lib/core/uri.dart` file, which contains markers
+// showing where to insert the generated code.
+
+import "dart:io";
+import "dart:typed_data";
+
+/// Index of the position of that `:` after a scheme.
+const int _schemeEndIndex = 1;
+
+/// Index of the position of the character just before the host name.
+const int _hostStartIndex = 2;
+
+/// Index of the position of the `:` before a port value.
+const int _portStartIndex = 3;
+
+/// Index of the position of the first character of a path.
+const int _pathStartIndex = 4;
+
+/// Index of the position of the `?` before a query.
+const int _queryStartIndex = 5;
+
+/// Index of the position of the `#` before a fragment.
+const int _fragmentStartIndex = 6;
+
+/// Index of a position where the URI was determined to be "non-simple".
+const int _notSimpleIndex = 7;
+
+// Initial state for scanner.
+const int _uriStart = 0;
+
+// If scanning of a URI terminates in this state or above,
+// consider the URI non-simple
+const int _nonSimpleEndStates = 14;
+
+// Initial state for scheme validation.
+const int _schemeStart = 20;
+
+void main(List<String> args) {
+  var tables = _createTables();
+  var literalBuilder = StringLiteralBuilder("_scannerTables");
+  for (var table in tables) {
+    literalBuilder.writeBytes(table, hexAll: true);
+  }
+  var tableString = literalBuilder.close();
+
+  var result = """
+// Use tools/generate_uri_parser_tables.dart to generate this code
+// if necessary.
+
+// --------------------------------------------------------------------
+// Constants used to read the scanner result.
+// The indices points into the table filled by [_scan] which contains
+// recognized positions in the scanned URI.
+// The `0` index is only used internally.
+
+/// Index of the position of that `:` after a scheme.
+const int _schemeEndIndex = $_schemeEndIndex;
+
+/// Index of the position of the character just before the host name.
+const int _hostStartIndex = $_hostStartIndex;
+
+/// Index of the position of the `:` before a port value.
+const int _portStartIndex = $_portStartIndex;
+
+/// Index of the position of the first character of a path.
+const int _pathStartIndex = $_pathStartIndex;
+
+/// Index of the position of the `?` before a query.
+const int _queryStartIndex = $_queryStartIndex;
+
+/// Index of the position of the `#` before a fragment.
+const int _fragmentStartIndex = $_fragmentStartIndex;
+
+/// Index of a position where the URI was determined to be "non-simple".
+const int _notSimpleIndex = $_notSimpleIndex;
+
+// Initial state for scanner.
+const int _uriStart = $_uriStart;
+
+// If scanning of a URI terminates in this state or above,
+// consider the URI non-simple
+const int _nonSimpleEndStates = $_nonSimpleEndStates;
+
+// Initial state for scheme validation.
+const int _schemeStart = $_schemeStart;
+
+// --------------------------------------------------------------------
+/// Transition tables are used to scan a URI to determine its structure.
+///
+/// The tables represent a state machine with output.
+///
+/// To scan the URI, start in the [_uriStart] state, then read each character
+/// of the URI in order, from start to end, and for each character perform a
+/// transition to a new state while writing the current position
+/// into the output buffer at a designated index.
+///
+/// Each state, represented by an integer which is an index into
+/// [_scannerTables], has a set of transitions, one for each character.
+/// The transitions are encoded as a 5-bit integer representing the next state
+/// and a 3-bit index into the output table.
+///
+/// For URI scanning, only characters in the range U+0020 through U+007E are
+/// interesting; all characters outside that range are treated the same.
+/// The tables only contain 96 entries, representing the 95 characters in the
+/// interesting range, and one entry for all values outside the range.
+/// The character entries are stored in one `String` of 96 characters per state,
+/// with the transition for a character at position `character ^ 0x60`,
+/// which maps the range U+0020 .. U+007F into positions 0 .. 95.
+/// All remaining characters are mapped to position 31 (`0x7f ^ 0x60`), which
+/// represents the transition for all remaining characters.
+$tableString
+// --------------------------------------------------------------------
+/// Scan a string using the [_scannerTables] state machine.
+///
+/// Scans [uri] from [start] to [end], starting in state [state] and
+/// writing output into [indices].
+///
+/// Returns the final state.
+int _scan(String uri, int start, int end, int state, List<int> indices) {
+  const int stateTableSize = 96;
+  assert(end <= uri.length);
+  for (int i = start; i < end; i++) {
+    // Xor with 0x60 to move range 0x20-0x7f into 0x00-0x5f
+    int char = uri.codeUnitAt(i) ^ 0x60;
+    // Use 0x1f (nee 0x7f) to represent all unhandled characters.
+    if (char > 0x5f) char = 0x1f;
+    int transition = _scannerTables.codeUnitAt(state * stateTableSize + char);
+    state = transition & 0x1f;
+    indices[transition >> 5] = i;
+  }
+  return state;
+}
+""";
+  if (args.isEmpty || !args.first.startsWith("-u")) {
+    print(result);
+    return;
+  }
+  var arg = args.first;
+  var filePath = "sdk/lib/core/uri.dart";
+  // Default file location, if run from root of SDK.
+  if (arg.length > 2) {
+    filePath = arg.substring(2);
+  } else if (args.length > 1) {
+    filePath = args[1];
+  }
+  var file = File(filePath);
+  if (!file.existsSync()) {
+    stderr.writeln("Cannot find file: $filePath");
+    exit(1);
+  }
+  var contents = file.readAsStringSync();
+  var pattern = RegExp(r"^// --- URI PARSER TABLE --- (start|end) --- [^]*?^",
+      multiLine: true);
+  var matches = pattern.allMatches(contents).toList();
+  if (matches.length != 2) {
+    stderr.writeln("Cannot find marked section in file $filePath");
+    exit(1);
+  }
+  var start = matches.first.end;
+  var end = matches.last.start;
+  var newContents = contents.replaceRange(start, end, result);
+  if (newContents != contents) {
+    file.writeAsStringSync(newContents);
+    print("$filePath updated.");
+  } else {
+    stderr.writeln("No update needed.");
+    return;
+  }
+}
+
+/// Creates a literal of the form
+/// ```dart
+/// const String someName = "ab\x82azx......"
+///     "more bytes and escapes \xff        "
+///     "....";
+/// ```
+/// while escaping non-printable charactes, `"`, `$` and `\`,
+/// and trying to fit as many characters on each line as possible.
+class StringLiteralBuilder {
+  final buffer = StringBuffer();
+  String indent;
+  var lineLength = 0;
+  StringLiteralBuilder(String name, {int indent = 0})
+      : indent = " " * (indent + 4) {
+    if (indent > 0) buffer.write(" " * indent);
+    buffer
+      ..write("const String ")
+      ..write(name)
+      ..write(" = \"");
+    lineLength = buffer.length;
+  }
+
+  void writeBytes(Uint8List bytes, {bool hexAll = false}) {
+    for (var byte in bytes) {
+      var string = hexAll ? hex(byte) : charString(byte);
+      lineLength += string.length;
+      if (lineLength > 79) {
+        buffer
+          ..write('"\n')
+          ..write(indent)
+          ..write('"');
+        lineLength = indent.length + 1 + string.length;
+      }
+      buffer.write(string);
+    }
+  }
+
+  /// Terminates the string literal.
+  ///
+  /// Do not call use builder after calling close.
+  String close() {
+    if (lineLength < 78) {
+      buffer.write("\";\n");
+    } else {
+      buffer
+        ..write("\"\n")
+        ..write(indent)
+        ..write(";\n");
+    }
+    return buffer.toString();
+  }
+
+  static String charString(int byte) {
+    // Recognized characters that need escaping, or has a short escape.
+    switch (byte) {
+      case 0x08:
+        return r"\b";
+      case 0x09:
+        return r"\t";
+      case 0x0a:
+        return r"\n";
+      case 0x0b:
+        return r"\v";
+      case 0x0c:
+        return r"\f";
+      case 0x0d:
+        return r"\r";
+      case 0x22:
+        return r'\"';
+      case 0x5c:
+        return r"\\";
+      case 0x24:
+        return r"\$";
+    }
+    // All control characters.
+    if (byte & 0x60 == 0 || byte == 0x7F) {
+      // 0x00 - 0x1F, 0x80 - 0xBF, 0x7F
+      return hex(byte);
+    }
+    return String.fromCharCode(byte);
+  }
+
+  static String hex(int byte) {
+    const digits = "0123456789ABCDEF";
+    return "\\x${digits[byte >> 4]}${digits[byte & 0xf]}";
+  }
+}
+
+/// Creates the tables for [_scannerTables] used by [Uri.parse].
+///
+/// See [_scannerTables] for the generated format.
+///
+/// The concrete tables are chosen as a trade-off between the number of states
+/// needed and the precision of the result.
+/// This allows definitely recognizing the general structure of the URI
+/// (presence and location of scheme, user-info, host, port, path, query and
+/// fragment) while at the same time detecting that some components are not
+/// in canonical form (anything containing a `%`, a host-name containing a
+/// capital letter). Since the scanner doesn't know whether something is a
+/// scheme or a path until it sees `:`, or user-info or host until it sees
+/// a `@`, a second pass is needed to validate the scheme and any user-info
+/// is considered non-canonical by default.
+///
+/// The states (starting from [_uriStart]) write positions while scanning
+/// a string from `start` to `end` as follows:
+///
+/// - [_schemeEndIndex]: Should be initialized to `start-1`.
+///   If the URI has a scheme, it is set to the position of the `:` after
+///   the scheme.
+/// - [_hostStartIndex]: Should be initialized to `start - 1`.
+///   If the URI has an authority, it is set to the character before the
+///   host name - either the second `/` in the `//` leading the authority,
+///   or the `@` after a user-info. Comparing this value to the scheme end
+///   position can be used to detect that there is a user-info component.
+/// - [_portStartIndex]: Should be initialized to `start`.
+///   Set to the position of the last `:` in an authority, and unchanged
+///   if there is no authority or no `:` in an authority.
+///   If this position is after the host start, there is a port, otherwise it
+///   is just marking a colon in the user-info component.
+/// - [_pathStartIndex]: Should be initialized to `start`.
+///   Is set to the first path character unless the path is empty.
+///   If the path is empty, the position is either unchanged (`start`) or
+///   the first slash of an authority. So, if the path start is before a
+///   host start or scheme end, the path is empty.
+/// - [_queryStartIndex]: Should be initialized to `end`.
+///   The position of the `?` leading a query if the URI contains a query.
+/// - [_fragmentStartIndex]: Should be initialized to `end`.
+///   The position of the `#` leading a fragment if the URI contains a fragment.
+/// - [_notSimpleIndex]: Should be initialized to `start - 1`.
+///   Set to another value if the URI is considered "not simple".
+///   This is elaborated below.
+///
+/// # Simple URIs
+/// A URI is considered "simple" if it is in a normalized form containing no
+/// escapes. This allows us to skip normalization and checking whether escapes
+/// are valid, and to extract components without worrying about unescaping.
+///
+/// The scanner computes a conservative approximation of being "simple".
+/// It rejects any URI with an escape, with a user-info component (mainly
+/// because they are rare and would increase the number of states in the
+/// scanner significantly), with an IPV6 host or with a capital letter in
+/// the scheme or host name (the scheme is handled in a second scan using
+/// a separate two-state table).
+/// Further, paths containing `..` or `.` path segments are considered
+/// non-simple except for pure relative paths (no scheme or authority) starting
+/// with a sequence of "../" segments.
+///
+/// The transition tables cannot detect a trailing ".." in the path,
+/// followed by a query or fragment, because the segment is not known to be
+/// complete until we are past it, and we then need to store the query/fragment
+/// start instead. This cast is checked manually post-scanning (such a path
+/// needs to be normalized to end in "../", so the URI shouldn't be considered
+/// simple).
+List<Uint8List> _createTables() {
+  // Total number of states for the scanner.
+  const int stateCount = 22;
+
+  // States used to scan a URI from scratch.
+  const int schemeOrPath = 01;
+  const int authOrPath = 02;
+  const int authOrPathSlash = 03;
+  const int uinfoOrHost0 = 04;
+  const int uinfoOrHost = 05;
+  const int uinfoOrPort0 = 06;
+  const int uinfoOrPort = 07;
+  const int ipv6Host = 08;
+  const int relPathSeg = 09;
+  const int pathSeg = 10;
+  const int path = 11;
+  const int query = 12;
+  const int fragment = 13;
+  const int schemeOrPathDot = 14;
+  const int schemeOrPathDot2 = 15;
+  const int relPathSegDot = 16;
+  const int relPathSegDot2 = 17;
+  const int pathSegDot = 18;
+  const int pathSegDot2 = 19;
+
+  // States used to validate a scheme after its end position has been found.
+  const int scheme0 = _schemeStart;
+  const int scheme = 21;
+
+  // Constants encoding the write-index for the state transition into the top 5
+  // bits of a byte.
+  const int schemeEnd = _schemeEndIndex << 5;
+  const int hostStart = _hostStartIndex << 5;
+  const int portStart = _portStartIndex << 5;
+  const int pathStart = _pathStartIndex << 5;
+  const int queryStart = _queryStartIndex << 5;
+  const int fragmentStart = _fragmentStartIndex << 5;
+  const int notSimple = _notSimpleIndex << 5;
+
+  /// The `unreserved` characters of RFC 3986.
+  const unreserved =
+      "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-._~";
+
+  /// The `sub-delim` characters of RFC 3986.
+  const subDelims = r"!$&'()*+,;=";
+  // The `pchar` characters of RFC 3986: characters that may occur in a path,
+  // excluding escapes.
+  const pchar = "$unreserved$subDelims";
+
+  var tables = List<Uint8List>.generate(stateCount, (_) => Uint8List(96));
+
+  // Helper function which initialize the table for [state] with a default
+  // transition and returns the table.
+  Uint8List build(state, defaultTransition) =>
+      tables[state]..fillRange(0, 96, defaultTransition);
+
+  // Helper function which sets the transition for each character in [chars]
+  // to [transition] in the [target] table.
+  // The [chars] string must contain only characters in the U+0020 .. U+007E
+  // range.
+  void setChars(Uint8List target, String chars, int transition) {
+    for (int i = 0; i < chars.length; i++) {
+      var char = chars.codeUnitAt(i);
+      target[char ^ 0x60] = transition;
+    }
+  }
+
+  /// Helper function which sets the transition for all characters in the
+  /// range from `range[0]` to `range[1]` to [transition] in the [target] table.
+  ///
+  /// The [range] must be a two-character string where both characters are in
+  /// the U+0020 .. U+007E range and the former character must have a lower
+  /// code point than the latter.
+  void setRange(Uint8List target, String range, int transition) {
+    for (int i = range.codeUnitAt(0), n = range.codeUnitAt(1); i <= n; i++) {
+      target[i ^ 0x60] = transition;
+    }
+  }
+
+  // Create the transitions for each state.
+  Uint8List b;
+
+  // Validate as path, if it is a scheme, we handle it later.
+  b = build(_uriStart, schemeOrPath | notSimple);
+  setChars(b, pchar, schemeOrPath);
+  setChars(b, ".", schemeOrPathDot);
+  setChars(b, ":", authOrPath | schemeEnd); // Handle later.
+  setChars(b, "/", authOrPathSlash);
+  setChars(b, "?", query | queryStart);
+  setChars(b, "#", fragment | fragmentStart);
+
+  b = build(schemeOrPathDot, schemeOrPath | notSimple);
+  setChars(b, pchar, schemeOrPath);
+  setChars(b, ".", schemeOrPathDot2);
+  setChars(b, ':', authOrPath | schemeEnd);
+  setChars(b, "/", pathSeg | notSimple);
+  setChars(b, "?", query | queryStart);
+  setChars(b, "#", fragment | fragmentStart);
+
+  b = build(schemeOrPathDot2, schemeOrPath | notSimple);
+  setChars(b, pchar, schemeOrPath);
+  setChars(b, "%", schemeOrPath | notSimple);
+  setChars(b, ':', authOrPath | schemeEnd);
+  setChars(b, "/", relPathSeg);
+  setChars(b, "?", query | queryStart);
+  setChars(b, "#", fragment | fragmentStart);
+
+  b = build(schemeOrPath, schemeOrPath | notSimple);
+  setChars(b, pchar, schemeOrPath);
+  setChars(b, ':', authOrPath | schemeEnd);
+  setChars(b, "/", pathSeg);
+  setChars(b, "?", query | queryStart);
+  setChars(b, "#", fragment | fragmentStart);
+
+  b = build(authOrPath, path | notSimple);
+  setChars(b, pchar, path | pathStart);
+  setChars(b, "/", authOrPathSlash | pathStart);
+  setChars(b, ".", pathSegDot | pathStart);
+  setChars(b, "?", query | queryStart);
+  setChars(b, "#", fragment | fragmentStart);
+
+  b = build(authOrPathSlash, path | notSimple);
+  setChars(b, pchar, path);
+  setChars(b, "/", uinfoOrHost0 | hostStart);
+  setChars(b, ".", pathSegDot);
+  setChars(b, "?", query | queryStart);
+  setChars(b, "#", fragment | fragmentStart);
+
+  b = build(uinfoOrHost0, uinfoOrHost | notSimple);
+  setChars(b, pchar, uinfoOrHost);
+  setRange(b, "AZ", uinfoOrHost | notSimple);
+  setChars(b, ":", uinfoOrPort0 | portStart);
+  setChars(b, "@", uinfoOrHost0 | hostStart);
+  setChars(b, "[", ipv6Host | notSimple);
+  setChars(b, "/", pathSeg | pathStart);
+  setChars(b, "?", query | queryStart);
+  setChars(b, "#", fragment | fragmentStart);
+
+  b = build(uinfoOrHost, uinfoOrHost | notSimple);
+  setChars(b, pchar, uinfoOrHost);
+  setRange(b, "AZ", uinfoOrHost | notSimple);
+  setChars(b, ":", uinfoOrPort0 | portStart);
+  setChars(b, "@", uinfoOrHost0 | hostStart);
+  setChars(b, "/", pathSeg | pathStart);
+  setChars(b, "?", query | queryStart);
+  setChars(b, "#", fragment | fragmentStart);
+
+  b = build(uinfoOrPort0, uinfoOrPort | notSimple);
+  setRange(b, "19", uinfoOrPort);
+  setChars(b, "@", uinfoOrHost0 | hostStart);
+  setChars(b, "/", pathSeg | pathStart);
+  setChars(b, "?", query | queryStart);
+  setChars(b, "#", fragment | fragmentStart);
+
+  b = build(uinfoOrPort, uinfoOrPort | notSimple);
+  setRange(b, "09", uinfoOrPort);
+  setChars(b, "@", uinfoOrHost0 | hostStart);
+  setChars(b, "/", pathSeg | pathStart);
+  setChars(b, "?", query | queryStart);
+  setChars(b, "#", fragment | fragmentStart);
+
+  b = build(ipv6Host, ipv6Host);
+  setChars(b, "]", uinfoOrHost);
+
+  b = build(relPathSeg, path | notSimple);
+  setChars(b, pchar, path);
+  setChars(b, ".", relPathSegDot);
+  setChars(b, "/", pathSeg | notSimple);
+  setChars(b, "?", query | queryStart);
+  setChars(b, "#", fragment | fragmentStart);
+
+  b = build(relPathSegDot, path | notSimple);
+  setChars(b, pchar, path);
+  setChars(b, ".", relPathSegDot2);
+  setChars(b, "/", pathSeg | notSimple);
+  setChars(b, "?", query | queryStart);
+  setChars(b, "#", fragment | fragmentStart);
+
+  b = build(relPathSegDot2, path | notSimple);
+  setChars(b, pchar, path);
+  setChars(b, "/", relPathSeg);
+  setChars(b, "?", query | queryStart); // This should be non-simple.
+  setChars(b, "#", fragment | fragmentStart); // This should be non-simple.
+
+  b = build(pathSeg, path | notSimple);
+  setChars(b, pchar, path);
+  setChars(b, ".", pathSegDot);
+  setChars(b, "/", pathSeg | notSimple);
+  setChars(b, "?", query | queryStart);
+  setChars(b, "#", fragment | fragmentStart);
+
+  b = build(pathSegDot, path | notSimple);
+  setChars(b, pchar, path);
+  setChars(b, ".", pathSegDot2);
+  setChars(b, "/", pathSeg | notSimple);
+  setChars(b, "?", query | queryStart);
+  setChars(b, "#", fragment | fragmentStart);
+
+  b = build(pathSegDot2, path | notSimple);
+  setChars(b, pchar, path);
+  setChars(b, "/", pathSeg | notSimple);
+  setChars(b, "?", query | queryStart);
+  setChars(b, "#", fragment | fragmentStart);
+
+  b = build(path, path | notSimple);
+  setChars(b, pchar, path);
+  setChars(b, "/", pathSeg);
+  setChars(b, "?", query | queryStart);
+  setChars(b, "#", fragment | fragmentStart);
+
+  b = build(query, query | notSimple);
+  setChars(b, pchar, query);
+  setChars(b, "?", query);
+  setChars(b, "#", fragment | fragmentStart);
+
+  b = build(fragment, fragment | notSimple);
+  setChars(b, pchar, fragment);
+  setChars(b, "?", fragment);
+
+  // A separate two-state validator for lower-case scheme names.
+  // Any non-scheme character or upper-case letter is marked as non-simple.
+  b = build(scheme0, scheme | notSimple);
+  setRange(b, "az", scheme);
+
+  b = build(scheme, scheme | notSimple);
+  setRange(b, "az", scheme);
+  setRange(b, "09", scheme);
+  setChars(b, "+-.", scheme);
+
+  return tables;
+}
commit	4c7110332e2c81366b6e85cb231a59840e370cd6	[log] [tgz]
author	Lasse R.H. Nielsen <lrn@google.com>	Tue Aug 16 14:53:07 2022 +0000
committer	Commit Bot <commit-bot@chromium.org>	Tue Aug 16 14:53:07 2022 +0000
tree	d85e3ed203ba2114b88c99baed65290f7f427fd7
parent	91c88ab06710b4e6ec6bec4b658a46e9c37b9b61 [diff]