|  | // Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file | 
|  | // for details. All rights reserved. Use of this source code is governed by a | 
|  | // BSD-style license that can be found in the LICENSE file. | 
|  |  | 
|  | part of utf; | 
|  |  | 
|  | // TODO(floitsch): make this transformer reusable. | 
|  | abstract class _StringDecoder | 
|  | implements StreamTransformer<List<int>, String>, EventSink<List<int>> { | 
|  | List<int> _carry; | 
|  | List<int> _buffer; | 
|  | int _replacementChar; | 
|  |  | 
|  | EventSink<String> _outSink; | 
|  |  | 
|  | _StringDecoder(int this._replacementChar); | 
|  |  | 
|  | Stream<String> bind(Stream<List<int>> stream) { | 
|  | return new Stream.eventTransformed( | 
|  | stream, | 
|  | (EventSink<String> sink) { | 
|  | if (_outSink != null) { | 
|  | throw new StateError("String decoder already used"); | 
|  | } | 
|  | _outSink = sink; | 
|  | return this; | 
|  | }); | 
|  | } | 
|  |  | 
|  | void add(List<int> bytes) { | 
|  | try { | 
|  | _buffer = <int>[]; | 
|  | List<int> carry = _carry; | 
|  | _carry = null; | 
|  | int pos = 0; | 
|  | int available = bytes.length; | 
|  | // If we have carry-over data, start from negative index, indicating carry | 
|  | // index. | 
|  | int goodChars = 0; | 
|  | if (carry != null) pos = -carry.length; | 
|  | while (pos < available) { | 
|  | int currentPos = pos; | 
|  | int getNext() { | 
|  | if (pos < 0) { | 
|  | return carry[pos++ + carry.length]; | 
|  | } else if (pos < available) { | 
|  | return bytes[pos++]; | 
|  | } | 
|  | return null; | 
|  | } | 
|  | int consumed = _processBytes(getNext); | 
|  | if (consumed > 0) { | 
|  | goodChars = _buffer.length; | 
|  | } else if (consumed == 0) { | 
|  | _buffer.length = goodChars; | 
|  | if (currentPos < 0) { | 
|  | _carry = []; | 
|  | _carry.addAll(carry); | 
|  | _carry.addAll(bytes); | 
|  | } else { | 
|  | _carry = bytes.sublist(currentPos); | 
|  | } | 
|  | break; | 
|  | } else { | 
|  | // Invalid byte at position pos - 1 | 
|  | _buffer.length = goodChars; | 
|  | _addChar(-1); | 
|  | goodChars = _buffer.length; | 
|  | } | 
|  | } | 
|  | if (_buffer.length > 0) { | 
|  | // Limit to 'goodChars', if lower than actual charCodes in the buffer. | 
|  | _outSink.add(new String.fromCharCodes(_buffer)); | 
|  | } | 
|  | _buffer = null; | 
|  | } catch (e, stackTrace) { | 
|  | _outSink.addError(e, stackTrace); | 
|  | } | 
|  | } | 
|  |  | 
|  | void addError(Object error, [StackTrace stackTrace]) { | 
|  | _outSink.addError(error, stackTrace); | 
|  | } | 
|  |  | 
|  | void close() { | 
|  | if (_carry != null) { | 
|  | if (_replacementChar != null) { | 
|  | _outSink.add(new String.fromCharCodes( | 
|  | new List.filled(_carry.length, _replacementChar))); | 
|  | } else { | 
|  | throw new ArgumentError('Invalid codepoint'); | 
|  | } | 
|  | } | 
|  | _outSink.close(); | 
|  | } | 
|  |  | 
|  | int _processBytes(int getNext()); | 
|  |  | 
|  | void _addChar(int char) { | 
|  | void error() { | 
|  | if (_replacementChar != null) { | 
|  | char = _replacementChar; | 
|  | } else { | 
|  | throw new ArgumentError('Invalid codepoint'); | 
|  | } | 
|  | } | 
|  | if (char < 0) error(); | 
|  | if (char >= 0xD800 && char <= 0xDFFF) error(); | 
|  | if (char > 0x10FFFF) error(); | 
|  | _buffer.add(char); | 
|  | } | 
|  | } | 
|  |  | 
|  | /** | 
|  | * StringTransformer that decodes a stream of UTF-8 encoded bytes. | 
|  | */ | 
|  | class Utf8DecoderTransformer extends _StringDecoder { | 
|  | Utf8DecoderTransformer( | 
|  | [int replacementChar = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) | 
|  | : super(replacementChar); | 
|  |  | 
|  | int _processBytes(int getNext()) { | 
|  | int value = getNext(); | 
|  | if ((value & 0xFF) != value) return -1;  // Not a byte. | 
|  | if ((value & 0x80) == 0x80) { | 
|  | int additionalBytes; | 
|  | int min; | 
|  | if ((value & 0xe0) == 0xc0) {  // 110xxxxx | 
|  | value = value & 0x1F; | 
|  | additionalBytes = 1; | 
|  | min = 0x80; | 
|  | } else if ((value & 0xf0) == 0xe0) {  // 1110xxxx | 
|  | value = value & 0x0F; | 
|  | additionalBytes = 2; | 
|  | min = 0x800; | 
|  | } else if ((value & 0xf8) == 0xf0) {  // 11110xxx | 
|  | value = value & 0x07; | 
|  | additionalBytes = 3; | 
|  | min = 0x10000; | 
|  | } else if ((value & 0xfc) == 0xf8) {  // 111110xx | 
|  | value = value & 0x03; | 
|  | additionalBytes = 4; | 
|  | min = 0x200000; | 
|  | } else if ((value & 0xfe) == 0xfc) {  // 1111110x | 
|  | value = value & 0x01; | 
|  | additionalBytes = 5; | 
|  | min = 0x4000000; | 
|  | } else { | 
|  | return -1; | 
|  | } | 
|  | for (int i = 0; i < additionalBytes; i++) { | 
|  | int next = getNext(); | 
|  | if (next == null) return 0;  // Not enough chars, reset. | 
|  | if ((next & 0xc0) != 0x80 || (next & 0xff) != next) return -1; | 
|  | value = value << 6 | (next & 0x3f); | 
|  | if (additionalBytes >= 3 && i == 0 && value << 12 > 0x10FFFF) { | 
|  | _addChar(-1); | 
|  | } | 
|  | } | 
|  | // Invalid charCode if less then minimum expected. | 
|  | if (value < min) value = -1; | 
|  | _addChar(value); | 
|  | return 1 + additionalBytes; | 
|  | } | 
|  | _addChar(value); | 
|  | return 1; | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | abstract class _StringEncoder | 
|  | implements StreamTransformer<String, List<int>>, EventSink<String> { | 
|  |  | 
|  | EventSink<List<int>> _outSink; | 
|  |  | 
|  | Stream<List<int>> bind(Stream<String> stream) { | 
|  | return new Stream.eventTransformed( | 
|  | stream, | 
|  | (EventSink<List<int>> sink) { | 
|  | if (_outSink != null) { | 
|  | throw new StateError("String encoder already used"); | 
|  | } | 
|  | _outSink = sink; | 
|  | return this; | 
|  | }); | 
|  | } | 
|  |  | 
|  | void add(String data) { | 
|  | _outSink.add(_processString(data)); | 
|  | } | 
|  |  | 
|  | void addError(Object error, [StackTrace stackTrace]) { | 
|  | _outSink.addError(error, stackTrace); | 
|  | } | 
|  |  | 
|  | void close() { _outSink.close(); } | 
|  |  | 
|  | List<int> _processString(String string); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * StringTransformer that UTF-8 encodes a stream of strings. | 
|  | */ | 
|  | class Utf8EncoderTransformer extends _StringEncoder { | 
|  | List<int> _processString(String string) { | 
|  | var bytes = []; | 
|  | int pos = 0; | 
|  | List<int> codepoints = _utf16CodeUnitsToCodepoints(string.codeUnits); | 
|  | int length = codepoints.length; | 
|  | for (int i = 0; i < length; i++) { | 
|  | int additionalBytes; | 
|  | int charCode = codepoints[i]; | 
|  | if (charCode <= 0x007F) { | 
|  | additionalBytes = 0; | 
|  | bytes.add(charCode); | 
|  | } else if (charCode <= 0x07FF) { | 
|  | // 110xxxxx (xxxxx is top 5 bits). | 
|  | bytes.add(((charCode >> 6) & 0x1F) | 0xC0); | 
|  | additionalBytes = 1; | 
|  | } else if (charCode <= 0xFFFF) { | 
|  | // 1110xxxx (xxxx is top 4 bits) | 
|  | bytes.add(((charCode >> 12) & 0x0F)| 0xE0); | 
|  | additionalBytes = 2; | 
|  | } else { | 
|  | // 11110xxx (xxx is top 3 bits) | 
|  | bytes.add(((charCode >> 18) & 0x07) | 0xF0); | 
|  | additionalBytes = 3; | 
|  | } | 
|  | for (int i = additionalBytes; i > 0; i--) { | 
|  | // 10xxxxxx (xxxxxx is next 6 bits from the top). | 
|  | bytes.add(((charCode >> (6 * (i - 1))) & 0x3F) | 0x80); | 
|  | } | 
|  | pos += additionalBytes + 1; | 
|  | } | 
|  | return bytes; | 
|  | } | 
|  | } |