| // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| part of dart.uri; |
| |
| /** |
| * Javascript-like URI encode/decode functions. |
| * The documentation here borrows heavily from the original Javascript |
| * doumentation on MDN at: |
| * https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects |
| */ |
| |
| /** |
| * A JavaScript-like URI encoder. Encodes Uniform Resource Identifier [uri] |
| * by replacing each instance of certain characters by one, two, three, or four |
| * escape sequences representing the UTF-8 encoding of the character (will |
| * only be four escape sequences for characters composed of two "surrogate" |
| * characters). This assumes that [uri] is a complete URI, so does not encode |
| * reserved characters that have special meaning in the URI: [:#;,/?:@&=+\$:] |
| * It returns the escaped URI. |
| */ |
| String encodeUri(String uri) { |
| // Bit vector of 128 bits where each bit indicate whether a |
| // character code on the 0-127 needs to be escaped or not. |
| const canonicalTable = const [ |
| // LSB MSB |
| // | | |
| 0x0000, // 0x00 - 0x0f 0000000000000000 |
| 0x0000, // 0x10 - 0x1f 0000000000000000 |
| // ! #$ &'()*+,-./ |
| 0xf7da, // 0x20 - 0x2f 0101101111101111 |
| // 0123456789:; = ? |
| 0xafff, // 0x30 - 0x3f 1111111111110101 |
| // @ABCDEFGHIJKLMNO |
| 0xffff, // 0x40 - 0x4f 1111111111111111 |
| // PQRSTUVWXYZ _ |
| 0x87ff, // 0x50 - 0x5f 1111111111100001 |
| // abcdefghijklmno |
| 0xfffe, // 0x60 - 0x6f 0111111111111111 |
| // pqrstuvwxyz ~ |
| 0x47ff]; // 0x70 - 0x7f 1111111111100010 |
| return _uriEncode(canonicalTable, uri); |
| } |
| |
| /** |
| * An implementation of JavaScript's decodeURIComponent function. |
| * Decodes a Uniform Resource Identifier [uri] previously created by |
| * encodeURI or by a similar routine. It replaces each escape sequence |
| * in [uri] with the character that it represents. It does not decode |
| * escape sequences that could not have been introduced by encodeURI. |
| * It returns the unescaped URI. |
| */ |
| String decodeUri(String uri) { |
| return _uriDecode(uri); |
| } |
| |
| /** |
| * A javaScript-like URI component encoder, this encodes a URI |
| * [component] by replacing each instance of certain characters by one, |
| * two, three, or four escape sequences representing the UTF-8 encoding of |
| * the character (will only be four escape sequences for characters composed |
| * of two "surrogate" characters). |
| * To avoid unexpected requests to the server, you should call |
| * encodeURIComponent on any user-entered parameters that will be passed as |
| * part of a URI. For example, a user could type "Thyme &time=again" for a |
| * variable comment. Not using encodeURIComponent on this variable will give |
| * comment=Thyme%20&time=again. Note that the ampersand and the equal sign |
| * mark a new key and value pair. So instead of having a POST comment key |
| * equal to "Thyme &time=again", you have two POST keys, one equal to "Thyme " |
| * and another (time) equal to again. |
| * It returns the escaped string. |
| */ |
| String encodeUriComponent(String component) { |
| // Bit vector of 128 bits where each bit indicate whether a |
| // character code on the 0-127 needs to be escaped or not. |
| const canonicalTable = const [ |
| // LSB MSB |
| // | | |
| 0x0000, // 0x00 - 0x0f 0000000000000000 |
| 0x0000, // 0x10 - 0x1f 0000000000000000 |
| // ! '()* -. |
| 0x6782, // 0x20 - 0x2f 0100000111100110 |
| // 0123456789 |
| 0x03ff, // 0x30 - 0x3f 1111111111000000 |
| // @ABCDEFGHIJKLMNO |
| 0xfffe, // 0x40 - 0x4f 0111111111111111 |
| // PQRSTUVWXYZ _ |
| 0x87ff, // 0x50 - 0x5f 1111111111100001 |
| // abcdefghijklmno |
| 0xfffe, // 0x60 - 0x6f 0111111111111111 |
| // pqrstuvwxyz ~ |
| 0x47ff]; // 0x70 - 0x7f 1111111111100010 |
| return _uriEncode(canonicalTable, component); |
| } |
| |
| /** |
| * An implementation of JavaScript's decodeURIComponent function. |
| * Decodes a Uniform Resource Identifier (URI) [component] previously |
| * created by encodeURIComponent or by a similar routine. |
| * It returns the unescaped string. |
| */ |
| String decodeUriComponent(String encodedComponent) { |
| return _uriDecode(encodedComponent); |
| } |
| |
| /** |
| * This is the internal implementation of JavaScript's encodeURI function. |
| * It encodes all characters in the string [text] except for those |
| * that appear in [canonicalTable], and returns the escaped string. |
| */ |
| String _uriEncode(List<int> canonicalTable, String text) { |
| final String hex = '0123456789ABCDEF'; |
| var byteToHex = (int v) => '%${hex[v >> 4]}${hex[v & 0x0f]}'; |
| StringBuffer result = new StringBuffer(); |
| for (int i = 0; i < text.length; i++) { |
| int ch = text.charCodeAt(i); |
| if (ch < 128 && ((canonicalTable[ch >> 4] & (1 << (ch & 0x0f))) != 0)) { |
| result.add(text[i]); |
| } else if (text[i] == " ") { |
| result.add("+"); |
| } else { |
| if (ch >= 0xD800 && ch < 0xDC00) { |
| // Low surrogate. We expect a next char high surrogate. |
| ++i; |
| int nextCh = text.length == i ? 0 : text.charCodeAt(i); |
| if (nextCh >= 0xDC00 && nextCh < 0xE000) { |
| // convert the pair to a U+10000 codepoint |
| ch = 0x10000 + ((ch-0xD800) << 10) + (nextCh - 0xDC00); |
| } else { |
| throw new ArgumentError('Malformed URI'); |
| } |
| } |
| for (int codepoint in codepointsToUtf8([ch])) { |
| result.add(byteToHex(codepoint)); |
| } |
| } |
| } |
| return result.toString(); |
| } |
| |
| /** |
| * Convert a byte (2 character hex sequence) in string [s] starting |
| * at position [pos] to its ordinal value |
| */ |
| |
| int _hexCharPairToByte(String s, int pos) { |
| int byte = 0; |
| for (int i = 0; i < 2; i++) { |
| var charCode = s.charCodeAt(pos + i); |
| if (0x30 <= charCode && charCode <= 0x39) { |
| byte = byte * 16 + charCode - 0x30; |
| } else { |
| // Check ranges A-F (0x41-0x46) and a-f (0x61-0x66). |
| charCode |= 0x20; |
| if (0x61 <= charCode && charCode <= 0x66) { |
| byte = byte * 16 + charCode - 0x57; |
| } else { |
| throw new ArgumentError("Invalid URL encoding"); |
| } |
| } |
| } |
| return byte; |
| } |
| |
| /** |
| * A JavaScript-like decodeURI function. It unescapes the string [text] and |
| * returns the unescaped string. |
| */ |
| String _uriDecode(String text) { |
| StringBuffer result = new StringBuffer(); |
| List<int> codepoints = new List<int>(); |
| for (int i = 0; i < text.length;) { |
| String ch = text[i]; |
| if (ch != '%') { |
| if (ch == '+') { |
| result.add(" "); |
| } else { |
| result.add(ch); |
| } |
| i++; |
| } else { |
| codepoints.clear(); |
| while (ch == '%') { |
| if (++i > text.length - 2) { |
| throw new ArgumentError('Truncated URI'); |
| } |
| codepoints.add(_hexCharPairToByte(text, i)); |
| i += 2; |
| if (i == text.length) |
| break; |
| ch = text[i]; |
| } |
| result.add(decodeUtf8(codepoints)); |
| } |
| } |
| return result.toString(); |
| } |
| |