sdk/lib/uri/encode_decode.dart - sdk.git - Git at Google

 // Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.

 part of dart.uri;

 /**
  * Javascript-like URI encode/decode functions.
  * The documentation here borrows heavily from the original Javascript
  * doumentation on MDN at:
  * https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects
  */

 /**
  * A JavaScript-like URI encoder. Encodes Uniform Resource Identifier [uri]
  * by replacing each instance of certain characters by one, two, three, or four
  * escape sequences representing the UTF-8 encoding of the character (will
  * only be four escape sequences for characters composed of two "surrogate"
  * characters). This assumes that [uri] is a complete URI, so does not encode
  * reserved characters that have special meaning in the URI: [:#;,/?:@&=+\$:]
  * It returns the escaped URI.
  */
 String encodeUri(String uri) {
   // Bit vector of 128 bits where each bit indicate whether a
   // character code on the 0-127 needs to be escaped or not.
   const canonicalTable = const [
                 //             LSB            MSB
                 //              |              |
       0x0000,   // 0x00 - 0x0f  0000000000000000
       0x0000,   // 0x10 - 0x1f  0000000000000000
                 //               ! #$ &'()*+,-./
       0xf7da,   // 0x20 - 0x2f  0101101111101111
                 //              0123456789:; = ?
       0xafff,   // 0x30 - 0x3f  1111111111110101
                 //              @ABCDEFGHIJKLMNO
       0xffff,   // 0x40 - 0x4f  1111111111111111
                 //              PQRSTUVWXYZ    _
       0x87ff,   // 0x50 - 0x5f  1111111111100001
                 //               abcdefghijklmno
       0xfffe,   // 0x60 - 0x6f  0111111111111111
                 //              pqrstuvwxyz   ~
       0x47ff];  // 0x70 - 0x7f  1111111111100010
   return _uriEncode(canonicalTable, uri);
 }

 /**
  * An implementation of JavaScript's decodeURIComponent function.
  * Decodes a Uniform Resource Identifier [uri] previously created by
  * encodeURI or by a similar routine. It replaces each escape sequence
  * in [uri] with the character that it represents. It does not decode
  * escape sequences that could not have been introduced by encodeURI.
  * It returns the unescaped URI.
  */
 String decodeUri(String uri) {
   return _uriDecode(uri);
 }

 /**
  * A javaScript-like URI component encoder, this encodes a URI
  * [component] by replacing each instance of certain characters by one,
  * two, three, or four escape sequences representing the UTF-8 encoding of
  * the character (will only be four escape sequences for characters composed
  * of two "surrogate" characters).
  * To avoid unexpected requests to the server, you should call
  * encodeURIComponent on any user-entered parameters that will be passed as
  * part of a URI. For example, a user could type "Thyme &time=again" for a
  * variable comment. Not using encodeURIComponent on this variable will give
  * comment=Thyme%20&time=again. Note that the ampersand and the equal sign
  * mark a new key and value pair. So instead of having a POST comment key
  * equal to "Thyme &time=again", you have two POST keys, one equal to "Thyme "
  * and another (time) equal to again.
  * It returns the escaped string.
  */
 String encodeUriComponent(String component) {
   // Bit vector of 128 bits where each bit indicate whether a
   // character code on the 0-127 needs to be escaped or not.
   const canonicalTable = const [
                 //             LSB            MSB
                 //              |              |
       0x0000,   // 0x00 - 0x0f  0000000000000000
       0x0000,   // 0x10 - 0x1f  0000000000000000
                 //               !     '()*  -.
       0x6782,   // 0x20 - 0x2f  0100000111100110
                 //              0123456789
       0x03ff,   // 0x30 - 0x3f  1111111111000000
                 //              @ABCDEFGHIJKLMNO
       0xfffe,   // 0x40 - 0x4f  0111111111111111
                 //              PQRSTUVWXYZ    _
       0x87ff,   // 0x50 - 0x5f  1111111111100001
                 //               abcdefghijklmno
       0xfffe,   // 0x60 - 0x6f  0111111111111111
                 //              pqrstuvwxyz   ~
       0x47ff];  // 0x70 - 0x7f  1111111111100010
   return _uriEncode(canonicalTable, component);
 }

 /**
  * An implementation of JavaScript's decodeURIComponent function.
  * Decodes a Uniform Resource Identifier (URI) [component] previously
  * created by encodeURIComponent or by a similar routine.
  * It returns the unescaped string.
  */
 String decodeUriComponent(String encodedComponent) {
   return _uriDecode(encodedComponent);
 }

 /**
  * This is the internal implementation of JavaScript's encodeURI function.
  * It encodes all characters in the string [text] except for those
  * that appear in [canonicalTable], and returns the escaped string.
  */
 String _uriEncode(List<int> canonicalTable, String text) {
   final String hex = '0123456789ABCDEF';
   var byteToHex = (int v) => '%${hex[v >> 4]}${hex[v & 0x0f]}';
   StringBuffer result = new StringBuffer();
   for (int i = 0; i < text.length; i++) {
     int ch = text.charCodeAt(i);
     if (ch < 128 && ((canonicalTable[ch >> 4] & (1 << (ch & 0x0f))) != 0)) {
       result.add(text[i]);
     } else if (text[i] == " ") {
       result.add("+");
     } else {
       if (ch >= 0xD800 && ch < 0xDC00) {
         // Low surrogate. We expect a next char high surrogate.
         ++i;
         int nextCh = text.length == i ? 0 : text.charCodeAt(i);
         if (nextCh >= 0xDC00 && nextCh < 0xE000) {
           // convert the pair to a U+10000 codepoint
           ch = 0x10000 + ((ch-0xD800) << 10) + (nextCh - 0xDC00);
         } else {
           throw new ArgumentError('Malformed URI');
         }
       }
       for (int codepoint in codepointsToUtf8([ch])) {
         result.add(byteToHex(codepoint));
       }
     }
   }
   return result.toString();
 }

 /**
  * Convert a byte (2 character hex sequence) in string [s] starting
  * at position [pos] to its ordinal value
  */

 int _hexCharPairToByte(String s, int pos) {
   int byte = 0;
   for (int i = 0; i < 2; i++) {
     var charCode = s.charCodeAt(pos + i);
     if (0x30 <= charCode && charCode <= 0x39) {
       byte = byte * 16 + charCode - 0x30;
     } else {
       // Check ranges A-F (0x41-0x46) and a-f (0x61-0x66).
       charCode |= 0x20;
       if (0x61 <= charCode && charCode <= 0x66) {
         byte = byte * 16 + charCode - 0x57;
       } else {
         throw new ArgumentError("Invalid URL encoding");
       }
     }
   }
   return byte;
 }

 /**
  * A JavaScript-like decodeURI function. It unescapes the string [text] and
  * returns the unescaped string.
  */
 String _uriDecode(String text) {
   StringBuffer result = new StringBuffer();
   List<int> codepoints = new List<int>();
   for (int i = 0; i < text.length;) {
     String ch = text[i];
     if (ch != '%') {
       if (ch == '+') {
         result.add(" ");
       } else {
         result.add(ch);
       }
       i++;
     } else {
       codepoints.clear();
       while (ch == '%') {
         if (++i > text.length - 2) {
           throw new ArgumentError('Truncated URI');
         }
         codepoints.add(_hexCharPairToByte(text, i));
         i += 2;
         if (i == text.length)
           break;
         ch = text[i];
       }
       result.add(decodeUtf8(codepoints));
     }
   }
   return result.toString();
 }
	// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
	// for details. All rights reserved. Use of this source code is governed by a
	// BSD-style license that can be found in the LICENSE file.

	part of dart.uri;

	/**
	* Javascript-like URI encode/decode functions.
	* The documentation here borrows heavily from the original Javascript
	* doumentation on MDN at:
	* https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects
	*/

	/**
	* A JavaScript-like URI encoder. Encodes Uniform Resource Identifier [uri]
	* by replacing each instance of certain characters by one, two, three, or four
	* escape sequences representing the UTF-8 encoding of the character (will
	* only be four escape sequences for characters composed of two "surrogate"
	* characters). This assumes that [uri] is a complete URI, so does not encode
	* reserved characters that have special meaning in the URI: [:#;,/?:@&=+\$:]
	* It returns the escaped URI.
	*/
	String encodeUri(String uri) {
	// Bit vector of 128 bits where each bit indicate whether a
	// character code on the 0-127 needs to be escaped or not.
	const canonicalTable = const [
	// LSB MSB
	// \| \|
	0x0000, // 0x00 - 0x0f 0000000000000000
	0x0000, // 0x10 - 0x1f 0000000000000000
	// ! #$ &'()*+,-./
	0xf7da, // 0x20 - 0x2f 0101101111101111
	// 0123456789:; = ?
	0xafff, // 0x30 - 0x3f 1111111111110101
	// @ABCDEFGHIJKLMNO
	0xffff, // 0x40 - 0x4f 1111111111111111
	// PQRSTUVWXYZ _
	0x87ff, // 0x50 - 0x5f 1111111111100001
	// abcdefghijklmno
	0xfffe, // 0x60 - 0x6f 0111111111111111
	// pqrstuvwxyz ~
	0x47ff]; // 0x70 - 0x7f 1111111111100010
	return _uriEncode(canonicalTable, uri);
	}

	/**
	* An implementation of JavaScript's decodeURIComponent function.
	* Decodes a Uniform Resource Identifier [uri] previously created by
	* encodeURI or by a similar routine. It replaces each escape sequence
	* in [uri] with the character that it represents. It does not decode
	* escape sequences that could not have been introduced by encodeURI.
	* It returns the unescaped URI.
	*/
	String decodeUri(String uri) {
	return _uriDecode(uri);
	}

	/**
	* A javaScript-like URI component encoder, this encodes a URI
	* [component] by replacing each instance of certain characters by one,
	* two, three, or four escape sequences representing the UTF-8 encoding of
	* the character (will only be four escape sequences for characters composed
	* of two "surrogate" characters).
	* To avoid unexpected requests to the server, you should call
	* encodeURIComponent on any user-entered parameters that will be passed as
	* part of a URI. For example, a user could type "Thyme &time=again" for a
	* variable comment. Not using encodeURIComponent on this variable will give
	* comment=Thyme%20&time=again. Note that the ampersand and the equal sign
	* mark a new key and value pair. So instead of having a POST comment key
	* equal to "Thyme &time=again", you have two POST keys, one equal to "Thyme "
	* and another (time) equal to again.
	* It returns the escaped string.
	*/
	String encodeUriComponent(String component) {
	// Bit vector of 128 bits where each bit indicate whether a
	// character code on the 0-127 needs to be escaped or not.
	const canonicalTable = const [
	// LSB MSB
	// \| \|
	0x0000, // 0x00 - 0x0f 0000000000000000
	0x0000, // 0x10 - 0x1f 0000000000000000
	// ! '()* -.
	0x6782, // 0x20 - 0x2f 0100000111100110
	// 0123456789
	0x03ff, // 0x30 - 0x3f 1111111111000000
	// @ABCDEFGHIJKLMNO
	0xfffe, // 0x40 - 0x4f 0111111111111111
	// PQRSTUVWXYZ _
	0x87ff, // 0x50 - 0x5f 1111111111100001
	// abcdefghijklmno
	0xfffe, // 0x60 - 0x6f 0111111111111111
	// pqrstuvwxyz ~
	0x47ff]; // 0x70 - 0x7f 1111111111100010
	return _uriEncode(canonicalTable, component);
	}

	/**
	* An implementation of JavaScript's decodeURIComponent function.
	* Decodes a Uniform Resource Identifier (URI) [component] previously
	* created by encodeURIComponent or by a similar routine.
	* It returns the unescaped string.
	*/
	String decodeUriComponent(String encodedComponent) {
	return _uriDecode(encodedComponent);
	}

	/**
	* This is the internal implementation of JavaScript's encodeURI function.
	* It encodes all characters in the string [text] except for those
	* that appear in [canonicalTable], and returns the escaped string.
	*/
	String _uriEncode(List<int> canonicalTable, String text) {
	final String hex = '0123456789ABCDEF';
	var byteToHex = (int v) => '%${hex[v >> 4]}${hex[v & 0x0f]}';
	StringBuffer result = new StringBuffer();
	for (int i = 0; i < text.length; i++) {
	int ch = text.charCodeAt(i);
	if (ch < 128 && ((canonicalTable[ch >> 4] & (1 << (ch & 0x0f))) != 0)) {
	result.add(text[i]);
	} else if (text[i] == " ") {
	result.add("+");
	} else {
	if (ch >= 0xD800 && ch < 0xDC00) {
	// Low surrogate. We expect a next char high surrogate.
	++i;
	int nextCh = text.length == i ? 0 : text.charCodeAt(i);
	if (nextCh >= 0xDC00 && nextCh < 0xE000) {
	// convert the pair to a U+10000 codepoint
	ch = 0x10000 + ((ch-0xD800) << 10) + (nextCh - 0xDC00);
	} else {
	throw new ArgumentError('Malformed URI');
	}
	}
	for (int codepoint in codepointsToUtf8([ch])) {
	result.add(byteToHex(codepoint));
	}
	}
	}
	return result.toString();
	}

	/**
	* Convert a byte (2 character hex sequence) in string [s] starting
	* at position [pos] to its ordinal value
	*/

	int _hexCharPairToByte(String s, int pos) {
	int byte = 0;
	for (int i = 0; i < 2; i++) {
	var charCode = s.charCodeAt(pos + i);
	if (0x30 <= charCode && charCode <= 0x39) {
	byte = byte * 16 + charCode - 0x30;
	} else {
	// Check ranges A-F (0x41-0x46) and a-f (0x61-0x66).
	charCode \|= 0x20;
	if (0x61 <= charCode && charCode <= 0x66) {
	byte = byte * 16 + charCode - 0x57;
	} else {
	throw new ArgumentError("Invalid URL encoding");
	}
	}
	}
	return byte;
	}

	/**
	* A JavaScript-like decodeURI function. It unescapes the string [text] and
	* returns the unescaped string.
	*/
	String _uriDecode(String text) {
	StringBuffer result = new StringBuffer();
	List<int> codepoints = new List<int>();
	for (int i = 0; i < text.length;) {
	String ch = text[i];
	if (ch != '%') {
	if (ch == '+') {
	result.add(" ");
	} else {
	result.add(ch);
	}
	i++;
	} else {
	codepoints.clear();
	while (ch == '%') {
	if (++i > text.length - 2) {
	throw new ArgumentError('Truncated URI');
	}
	codepoints.add(_hexCharPairToByte(text, i));
	i += 2;
	if (i == text.length)
	break;
	ch = text[i];
	}
	result.add(decodeUtf8(codepoints));
	}
	}
	return result.toString();
	}