[vm] RFC6874 support zoneID in uri parser

An implementation of https://tools.ietf.org/html/rfc6874.

IP-literal = "[" ( IPv6address / IPvFuture  ) "]"

will be updated to:

IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture  ) "]"
ZoneID = 1*( unreserved / pct-encoded )
IPv6addrz = IPv6address "%25" ZoneID

Bug: https://github.com/dart-lang/sdk/issues/29456
Change-Id: Ieac7b00e97d3ceff794f3b56ed4b6e4d9d6bbb47
Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/103544
Commit-Queue: Zichang Guo <zichangguo@google.com>
Reviewed-by: Lasse R.H. Nielsen <lrn@google.com>
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 299563f..acde42a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -77,6 +77,16 @@
 
 [1]: https://github.com/dart-lang/sdk/blob/master/CHANGELOG.md#200---2018-08-07
 
+#### `dart:core`
+
+* Update `Uri` class to support RFC6874: https://tools.ietf.org/html/rfc6874
+  "%25" or "%" can be appended to the end of a valid IPv6 representing a Zone
+  Identifier. A valid zone ID consists of unreversed character or Percent
+  encoded octet, which was defined in RFC3986.
+  IPv6addrz = IPv6address "%25" ZoneID
+
+  [29456]: https://github.com/dart-lang/sdk/issues/29456
+
 ### Dart VM
 
 ### Tools
diff --git a/sdk/lib/core/uri.dart b/sdk/lib/core/uri.dart
index 2c7d918..044c863 100644
--- a/sdk/lib/core/uri.dart
+++ b/sdk/lib/core/uri.dart
@@ -761,6 +761,9 @@
     // authority     = [ userinfo "@" ] host [ ":" port ]
     // userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
     // host          = IP-literal / IPv4address / reg-name
+    // IP-literal    = "[" ( IPv6address / IPv6addrz / IPvFuture ) "]"
+    // IPv6addrz     = IPv6address "%25" ZoneID
+    // ZoneID        = 1*( unreserved / pct-encoded )
     // port          = *DIGIT
     // reg-name      = *( unreserved / pct-encoded / sub-delims )
     //
@@ -1643,14 +1646,24 @@
       if (hostStart < authority.length &&
           authority.codeUnitAt(hostStart) == _LEFT_BRACKET) {
         // IPv6 host.
+        int escapeForZoneID = -1;
         for (; hostEnd < authority.length; hostEnd++) {
-          if (authority.codeUnitAt(hostEnd) == _RIGHT_BRACKET) break;
+          int char = authority.codeUnitAt(hostEnd);
+          if (char == _PERCENT && escapeForZoneID < 0) {
+            escapeForZoneID = hostEnd;
+            if (authority.startsWith("25", hostEnd + 1)) {
+              hostEnd += 2; // Might as well skip the already checked escape.
+            }
+          } else if (char == _RIGHT_BRACKET) {
+            break;
+          }
         }
         if (hostEnd == authority.length) {
           throw FormatException(
               "Invalid IPv6 host entry.", authority, hostStart);
         }
-        Uri.parseIPv6Address(authority, hostStart + 1, hostEnd);
+        Uri.parseIPv6Address(authority, hostStart + 1,
+            (escapeForZoneID < 0) ? hostEnd : escapeForZoneID);
         hostEnd++; // Skip the closing bracket.
         if (hostEnd != authority.length &&
             authority.codeUnitAt(hostEnd) != _COLON) {
@@ -1959,22 +1972,124 @@
       if (host.codeUnitAt(end - 1) != _RIGHT_BRACKET) {
         _fail(host, start, 'Missing end `]` to match `[` in host');
       }
-      Uri.parseIPv6Address(host, start + 1, end - 1);
+      String zoneID = "";
+      int index = _checkZoneID(host, start + 1, end - 1);
+      if (index < end - 1) {
+        int zoneIDstart =
+            (host.startsWith("25", index + 1)) ? index + 3 : index + 1;
+        zoneID = _normalizeZoneID(host, zoneIDstart, end - 1, "%25");
+      }
+      Uri.parseIPv6Address(host, start + 1, index);
       // RFC 5952 requires hex digits to be lower case.
-      return host.substring(start, end).toLowerCase();
+      return host.substring(start, index).toLowerCase() + zoneID + ']';
     }
     if (!strictIPv6) {
       // TODO(lrn): skip if too short to be a valid IPv6 address?
       for (int i = start; i < end; i++) {
         if (host.codeUnitAt(i) == _COLON) {
-          Uri.parseIPv6Address(host, start, end);
-          return '[$host]';
+          String zoneID = "";
+          int index = _checkZoneID(host, start, end);
+          if (index < end) {
+            int zoneIDstart =
+                (host.startsWith("25", index + 1)) ? index + 3 : index + 1;
+            zoneID = _normalizeZoneID(host, zoneIDstart, end, "%25");
+          }
+          Uri.parseIPv6Address(host, start, index);
+          return '[${host.substring(start, index)}' + zoneID + ']';
         }
       }
     }
     return _normalizeRegName(host, start, end);
   }
 
+  // RFC 6874 check for ZoneID
+  // Return the index of first appeared `%`.
+  static int _checkZoneID(String host, int start, int end) {
+    int index = host.indexOf('%', start);
+    index = (index >= start && index < end) ? index : end;
+    return index;
+  }
+
+  static bool _isZoneIDChar(int char) {
+    return char < 127 && (_zoneIDTable[char >> 4] & (1 << (char & 0xf))) != 0;
+  }
+
+  /**
+   * Validates and does case- and percent-encoding normalization.
+   *
+   * The same as [_normalizeOrSubstring]
+   * except this function does not convert characters to lower case.
+   * The [host] must be an RFC6874 "ZoneID".
+   * ZoneID = 1*(unreserved / pct-encoded)
+   */
+  static String _normalizeZoneID(String host, int start, int end,
+      [String prefix = '']) {
+    StringBuffer buffer;
+    if (prefix != '') {
+      buffer = StringBuffer(prefix);
+    }
+    int sectionStart = start;
+    int index = start;
+    // Whether all characters between sectionStart and index are normalized,
+    bool isNormalized = true;
+
+    while (index < end) {
+      int char = host.codeUnitAt(index);
+      if (char == _PERCENT) {
+        String replacement = _normalizeEscape(host, index, true);
+        if (replacement == null && isNormalized) {
+          index += 3;
+          continue;
+        }
+        buffer ??= StringBuffer();
+        String slice = host.substring(sectionStart, index);
+        buffer.write(slice);
+        int sourceLength = 3;
+        if (replacement == null) {
+          replacement = host.substring(index, index + 3);
+        } else if (replacement == "%") {
+          _fail(host, index, "ZoneID should not contain % anymore");
+        }
+        buffer.write(replacement);
+        index += sourceLength;
+        sectionStart = index;
+        isNormalized = true;
+      } else if (_isZoneIDChar(char)) {
+        if (isNormalized && _UPPER_CASE_A <= char && _UPPER_CASE_Z >= char) {
+          // Put initial slice in buffer and continue in non-normalized mode
+          buffer ??= StringBuffer();
+          if (sectionStart < index) {
+            buffer.write(host.substring(sectionStart, index));
+            sectionStart = index;
+          }
+          isNormalized = false;
+        }
+        index++;
+      } else {
+        int sourceLength = 1;
+        if ((char & 0xFC00) == 0xD800 && (index + 1) < end) {
+          int tail = host.codeUnitAt(index + 1);
+          if ((tail & 0xFC00) == 0xDC00) {
+            char = 0x10000 | ((char & 0x3ff) << 10) | (tail & 0x3ff);
+            sourceLength = 2;
+          }
+        }
+        buffer ??= StringBuffer();
+        String slice = host.substring(sectionStart, index);
+        buffer.write(slice);
+        buffer.write(_escapeChar(char));
+        index += sourceLength;
+        sectionStart = index;
+      }
+    }
+    if (buffer == null) return host.substring(start, end);
+    if (sectionStart < end) {
+      String slice = host.substring(sectionStart, end);
+      buffer.write(slice);
+    }
+    return buffer.toString();
+  }
+
   static bool _isRegNameChar(int char) {
     return char < 127 && (_regNameTable[char >> 4] & (1 << (char & 0xf))) != 0;
   }
@@ -3120,6 +3235,27 @@
     //                      pqrstuvwxyz   ~
     0x47ff, // 0x70 - 0x7f  1111111111100010
   ];
+
+  // Characters allowed in the ZoneID as of RFC 6874.
+  // ZoneID = 1*( unreserved / pct-encoded )
+  static const _zoneIDTable = <int>[
+    //                     LSB            MSB
+    //                      |              |
+    0x0000, // 0x00 - 0x0f  0000000000000000
+    0x0000, // 0x10 - 0x1f  0000000000000000
+    //                       !  $%&'()*+,-.
+    0x6000, // 0x20 - 0x2f  0000000000000110
+    //                      0123456789 ; =
+    0x03ff, // 0x30 - 0x3f  1111111111000000
+    //                       ABCDEFGHIJKLMNO
+    0xfffe, // 0x40 - 0x4f  0111111111111111
+    //                      PQRSTUVWXYZ    _
+    0x87ff, // 0x50 - 0x5f  1111111111100001
+    //                       abcdefghijklmno
+    0xfffe, // 0x60 - 0x6f  0111111111111111
+    //                      pqrstuvwxyz   ~
+    0x47ff, // 0x70 - 0x7f  1111111111100010
+  ];
 }
 
 // --------------------------------------------------------------------
diff --git a/tests/corelib_2/uri_http_test.dart b/tests/corelib_2/uri_http_test.dart
index 60d9732..399c403 100644
--- a/tests/corelib_2/uri_http_test.dart
+++ b/tests/corelib_2/uri_http_test.dart
@@ -33,6 +33,11 @@
       new Uri.http("host", "/a/b", {"c=": "&d"}), "http://host/a/b?c%3D=%26d");
   check(new Uri.http("[::]", "a"), "http://[::]/a");
   check(new Uri.http("[::127.0.0.1]", "a"), "http://[::127.0.0.1]/a");
+  check(new Uri.http('[fe80::8eae:4c4d:fee9:8434%rename3]', ''),
+      'http://[fe80::8eae:4c4d:fee9:8434%25rename3]');
+  check(new Uri.http('[ff02::1%1%41]', ''), 'http://[ff02::1%251a]');
+  check(new Uri.http('[ff02::1%321]', ''), 'http://[ff02::1%25321]');
+  check(new Uri.http('[ff02::1%%321]', ''), 'http://[ff02::1%2521]');
 }
 
 testHttpsUri() {
diff --git a/tests/corelib_2/uri_ipv6_test.dart b/tests/corelib_2/uri_ipv6_test.dart
index e6a7a4d..e5dbb9df 100644
--- a/tests/corelib_2/uri_ipv6_test.dart
+++ b/tests/corelib_2/uri_ipv6_test.dart
@@ -104,6 +104,76 @@
   Expect.equals(80, uri.port);
   Expect.equals('', uri.path);
   Expect.equals(path.toLowerCase(), uri.toString());
+
+  // Checks for ZoneID in RFC 6874
+  path = 'https://[fe80::a%en1]:443/index.html';
+  uri = Uri.parse(path);
+  Expect.equals('https', uri.scheme);
+  Expect.equals('fe80::a%25en1', uri.host);
+  Expect.equals(443, uri.port);
+  Expect.equals('/index.html', uri.path);
+  Expect.equals('https://[fe80::a%25en1]/index.html', uri.toString());
+
+  path = 'https://[fe80::a%25eE1]:443/index.html';
+  uri = Uri.parse(path);
+  Expect.equals('https', uri.scheme);
+  Expect.equals('fe80::a%25eE1', uri.host);
+  Expect.equals(443, uri.port);
+  Expect.equals('/index.html', uri.path);
+  Expect.equals('https://[fe80::a%25eE1]/index.html', uri.toString());
+
+  // Recognize bare '%' and transform into '%25'
+  path = 'https://[fe80::a%1]:443/index.html';
+  uri = Uri.parse(path);
+  Expect.equals('https', uri.scheme);
+  Expect.equals('fe80::a%251', uri.host);
+  Expect.equals(443, uri.port);
+  Expect.equals('/index.html', uri.path);
+  Expect.equals('https://[fe80::a%251]/index.html', uri.toString());
+
+  path = 'https://[ff02::5678%pvc1.3]/index.html';
+  uri = Uri.parse(path);
+  Expect.equals('https', uri.scheme);
+  Expect.equals('ff02::5678%25pvc1.3', uri.host);
+  Expect.equals('/index.html', uri.path);
+  Expect.equals('https://[ff02::5678%25pvc1.3]/index.html', uri.toString());
+
+  // ZoneID contains percent encoded
+  path = 'https://[ff02::1%%321]/index.html';
+  uri = Uri.parse(path);
+  Expect.equals('https', uri.scheme);
+  Expect.equals('ff02::1%2521', uri.host);
+  Expect.equals('/index.html', uri.path);
+  Expect.equals('https://[ff02::1%2521]/index.html', uri.toString());
+
+  path = 'https://[ff02::1%321]/index.html';
+  uri = Uri.parse(path);
+  Expect.equals('https', uri.scheme);
+  Expect.equals('ff02::1%25321', uri.host);
+  Expect.equals('/index.html', uri.path);
+  Expect.equals('https://[ff02::1%25321]/index.html', uri.toString());
+
+  // Lower cases
+  path = 'https://[ff02::1%1%41]/index.html';
+  uri = Uri.parse(path);
+  Expect.equals('https', uri.scheme);
+  Expect.equals('ff02::1%251a', uri.host);
+  Expect.equals('/index.html', uri.path);
+  Expect.equals('https://[ff02::1%251a]/index.html', uri.toString());
+
+  path = 'https://[fe80::8eae:4c4d:fee9:8434%rename3]/index.html';
+  uri = Uri.parse(path);
+  Expect.equals('https', uri.scheme);
+  Expect.equals('fe80::8eae:4c4d:fee9:8434%25rename3', uri.host);
+  Expect.equals('/index.html', uri.path);
+  Expect.equals('https://[fe80::8eae:4c4d:fee9:8434%25rename3]/index.html',
+      uri.toString());
+
+  // Test construtors with host name
+  uri = Uri(scheme: 'https', host: '[ff02::5678%pvc1.3]');
+  uri = Uri(scheme: 'https', host: '[fe80::a%1]');
+  uri = Uri(scheme: 'https', host: '[fe80::a%25eE1]');
+  uri = Uri(scheme: 'https', host: '[fe80::a%en1]');
 }
 
 void testParseIPv6Address() {