blob: 124c803a38d30318f4c334054a355da6556f7d34 [file] [log] [blame]
// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
part of dart.uri;
/**
* Javascript-like URI encode/decode functions.
* The documentation here borrows heavily from the original Javascript
* doumentation on MDN at:
* https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects
*/
/**
* A JavaScript-like URI encoder. Encodes Uniform Resource Identifier [uri]
* by replacing each instance of certain characters by one, two, three, or four
* escape sequences representing the UTF-8 encoding of the character (will
* only be four escape sequences for characters composed of two "surrogate"
* characters). This assumes that [uri] is a complete URI, so does not encode
* reserved characters that have special meaning in the URI: [:#;,/?:@&=+\$:]
* It returns the escaped URI.
*/
String encodeUri(String uri) {
// Bit vector of 128 bits where each bit indicate whether a
// character code on the 0-127 needs to be escaped or not.
const canonicalTable = const [
// LSB MSB
// | |
0x0000, // 0x00 - 0x0f 0000000000000000
0x0000, // 0x10 - 0x1f 0000000000000000
// ! #$ &'()*+,-./
0xf7da, // 0x20 - 0x2f 0101101111101111
// 0123456789:; = ?
0xafff, // 0x30 - 0x3f 1111111111110101
// @ABCDEFGHIJKLMNO
0xffff, // 0x40 - 0x4f 1111111111111111
// PQRSTUVWXYZ _
0x87ff, // 0x50 - 0x5f 1111111111100001
// abcdefghijklmno
0xfffe, // 0x60 - 0x6f 0111111111111111
// pqrstuvwxyz ~
0x47ff]; // 0x70 - 0x7f 1111111111100010
return _uriEncode(canonicalTable, uri);
}
/**
* An implementation of JavaScript's decodeURIComponent function.
* Decodes a Uniform Resource Identifier [uri] previously created by
* encodeURI or by a similar routine. It replaces each escape sequence
* in [uri] with the character that it represents. It does not decode
* escape sequences that could not have been introduced by encodeURI.
* It returns the unescaped URI.
*/
String decodeUri(String uri) {
return _uriDecode(uri);
}
/**
* A javaScript-like URI component encoder, this encodes a URI
* [component] by replacing each instance of certain characters by one,
* two, three, or four escape sequences representing the UTF-8 encoding of
* the character (will only be four escape sequences for characters composed
* of two "surrogate" characters).
* To avoid unexpected requests to the server, you should call
* encodeURIComponent on any user-entered parameters that will be passed as
* part of a URI. For example, a user could type "Thyme &time=again" for a
* variable comment. Not using encodeURIComponent on this variable will give
* comment=Thyme%20&time=again. Note that the ampersand and the equal sign
* mark a new key and value pair. So instead of having a POST comment key
* equal to "Thyme &time=again", you have two POST keys, one equal to "Thyme "
* and another (time) equal to again.
* It returns the escaped string.
*/
String encodeUriComponent(String component) {
// Bit vector of 128 bits where each bit indicate whether a
// character code on the 0-127 needs to be escaped or not.
const canonicalTable = const [
// LSB MSB
// | |
0x0000, // 0x00 - 0x0f 0000000000000000
0x0000, // 0x10 - 0x1f 0000000000000000
// ! '()* -.
0x6782, // 0x20 - 0x2f 0100000111100110
// 0123456789
0x03ff, // 0x30 - 0x3f 1111111111000000
// @ABCDEFGHIJKLMNO
0xfffe, // 0x40 - 0x4f 0111111111111111
// PQRSTUVWXYZ _
0x87ff, // 0x50 - 0x5f 1111111111100001
// abcdefghijklmno
0xfffe, // 0x60 - 0x6f 0111111111111111
// pqrstuvwxyz ~
0x47ff]; // 0x70 - 0x7f 1111111111100010
return _uriEncode(canonicalTable, component);
}
/**
* An implementation of JavaScript's decodeURIComponent function.
* Decodes a Uniform Resource Identifier (URI) [component] previously
* created by encodeURIComponent or by a similar routine.
* It returns the unescaped string.
*/
String decodeUriComponent(String encodedComponent) {
return _uriDecode(encodedComponent);
}
/**
* This is the internal implementation of JavaScript's encodeURI function.
* It encodes all characters in the string [text] except for those
* that appear in [canonicalTable], and returns the escaped string.
*/
String _uriEncode(List<int> canonicalTable, String text) {
final String hex = '0123456789ABCDEF';
var byteToHex = (int v) => '%${hex[v >> 4]}${hex[v & 0x0f]}';
StringBuffer result = new StringBuffer();
for (int i = 0; i < text.length; i++) {
int ch = text.codeUnitAt(i);
if (ch < 128 && ((canonicalTable[ch >> 4] & (1 << (ch & 0x0f))) != 0)) {
result.add(text[i]);
} else if (text[i] == " ") {
result.add("+");
} else {
if (ch >= 0xD800 && ch < 0xDC00) {
// Low surrogate. We expect a next char high surrogate.
++i;
int nextCh = text.length == i ? 0 : text.codeUnitAt(i);
if (nextCh >= 0xDC00 && nextCh < 0xE000) {
// convert the pair to a U+10000 codepoint
ch = 0x10000 + ((ch - 0xD800) << 10) + (nextCh - 0xDC00);
} else {
throw new ArgumentError('Malformed URI');
}
}
for (int codepoint in codepointsToUtf8([ch])) {
result.add(byteToHex(codepoint));
}
}
}
return result.toString();
}
/**
* Convert a byte (2 character hex sequence) in string [s] starting
* at position [pos] to its ordinal value
*/
int _hexCharPairToByte(String s, int pos) {
int byte = 0;
for (int i = 0; i < 2; i++) {
var charCode = s.codeUnitAt(pos + i);
if (0x30 <= charCode && charCode <= 0x39) {
byte = byte * 16 + charCode - 0x30;
} else {
// Check ranges A-F (0x41-0x46) and a-f (0x61-0x66).
charCode |= 0x20;
if (0x61 <= charCode && charCode <= 0x66) {
byte = byte * 16 + charCode - 0x57;
} else {
throw new ArgumentError("Invalid URL encoding");
}
}
}
return byte;
}
/**
* A JavaScript-like decodeURI function. It unescapes the string [text] and
* returns the unescaped string.
*/
String _uriDecode(String text) {
StringBuffer result = new StringBuffer();
List<int> codepoints = new List<int>();
for (int i = 0; i < text.length;) {
String ch = text[i];
if (ch != '%') {
if (ch == '+') {
result.add(" ");
} else {
result.add(ch);
}
i++;
} else {
codepoints.clear();
while (ch == '%') {
if (++i > text.length - 2) {
throw new ArgumentError('Truncated URI');
}
codepoints.add(_hexCharPairToByte(text, i));
i += 2;
if (i == text.length)
break;
ch = text[i];
}
result.add(decodeUtf8(codepoints));
}
}
return result.toString();
}