Remove support for deprecated encodings and parser_console.dart library (#93)

commit: 5abd53c071dda513e34228066109ffa5c4d17794 [log] [tgz]
author: Kevin Moore <kevmoo@users.noreply.github.com> Mon Apr 08 13:15:55 2019 -0700
committer: GitHub <noreply@github.com> Mon Apr 08 13:15:55 2019 -0700
tree: 373e06a7939b9715a696164c1eb5eeb7a90b822f
parent: 30367f553a208504da9d3cc0f224dced9bca8ff7 [diff]
diff --git a/.travis.yml b/.travis.yml
index 714aa86..337ceca 100644
--- a/.travis.yml
+++ b/.travis.yml

@@ -1,9 +1,12 @@
 language: dart
+
 dart:
   - dev
+  - 2.0.0
+
 dart_task:
   - test: -p vm
-  - test: -p chrome,firefox
+  - test: -p chrome
   - dartanalyzer: --fatal-warnings --fatal-infos .
 
 matrix:

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 14eb41c..2ddbae7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md

@@ -1,3 +1,10 @@
+## 0.14.0
+
+*BREAKING CHANGES*
+
+- Drop support for encodings other than UTF-8 and ASCII.
+- Removed `parser_console.dart` library.
+
 ## 0.13.4+1
 
 * Fixes to readme and pubspec.

diff --git a/lib/parser_console.dart b/lib/parser_console.dart
deleted file mode 100644
index 28dee14..0000000
--- a/lib/parser_console.dart
+++ /dev/null

@@ -1,42 +0,0 @@
-/// This library adds `dart:io` support to the HTML5 parser. Call
-/// [initDartIOSupport] before calling the [parse] methods and they will accept
-/// a [RandomAccessFile] as input, in addition to the other input types.
-library parser_console;
-
-import 'dart:io';
-import 'parser.dart';
-import 'src/inputstream.dart' as inputstream;
-
-/// Adds support to the [HtmlParser] for running on a console VM. In particular
-/// this means it will be able to handle `dart:io` and [RandomAccessFile]s as
-/// input to the various [parse] methods.
-void useConsole() {
-  inputstream.consoleSupport = _ConsoleSupport();
-}
-
-class _ConsoleSupport extends inputstream.ConsoleSupport {
-  List<int> bytesFromFile(source) {
-    if (source is! RandomAccessFile) return null;
-    return readAllBytesFromFile(source);
-  }
-}
-
-// TODO(jmesserly): this should be `RandomAccessFile.readAllBytes`.
-/// Synchronously reads all bytes from the [file].
-List<int> readAllBytesFromFile(RandomAccessFile file) {
-  int length = file.lengthSync();
-  var bytes = List<int>(length);
-
-  int bytesRead = 0;
-  while (bytesRead < length) {
-    int read = file.readIntoSync(bytes, bytesRead, length - bytesRead);
-    if (read <= 0) {
-      // This could happen if, for example, the file was resized while
-      // we're reading. Just shrink the bytes array and move on.
-      bytes = bytes.sublist(0, bytesRead);
-      break;
-    }
-    bytesRead += read;
-  }
-  return bytes;
-}

diff --git a/lib/src/char_encodings.dart b/lib/src/char_encodings.dart
index ba10a4a..6120056 100644
--- a/lib/src/char_encodings.dart
+++ b/lib/src/char_encodings.dart

@@ -1,8 +1,4 @@
-/// Decodes bytes using the correct name. See [decodeBytes].
-library char_encodings;
-
-import 'dart:collection';
-import 'package:utf/utf.dart';
+import 'utf.dart';
 
 // TODO(jmesserly): this function is conspicuously absent from dart:utf.
 /// Returns true if the [bytes] starts with a UTF-8 byte order mark.
@@ -21,15 +17,9 @@
 /// Decodes the [bytes] with the provided [encoding] and returns an iterable for
 /// the codepoints. Supports the major unicode encodings as well as ascii and
 /// and windows-1252 encodings.
-Iterable<int> decodeBytes(String encoding, List<int> bytes,
-    [int offset = 0,
-    int length,
-    int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
-  if (length == null) length = bytes.length;
-  final replace = replacementCodepoint;
+Iterable<int> decodeBytes(String encoding, List<int> bytes) {
   switch (encoding) {
     case 'ascii':
-      bytes = bytes.sublist(offset, offset + length);
       // TODO(jmesserly): this was taken from runtime/bin/string_stream.dart
       for (int byte in bytes) {
         if (byte > 127) {
@@ -41,32 +31,18 @@
       }
       return bytes;
 
-    case 'windows-1252':
-    case 'cp1252':
-      return decodeWindows1252AsIterable(bytes, offset, length, replace);
-
     case 'utf-8':
       // NOTE: to match the behavior of the other decode functions, we eat the
       // utf-8 BOM here.
-      if (hasUtf8Bom(bytes, offset, length)) {
+
+      var offset = 0;
+      var length = bytes.length;
+
+      if (hasUtf8Bom(bytes)) {
         offset += 3;
         length -= 3;
       }
-      return decodeUtf8AsIterable(bytes, offset, length, replace);
-
-    case 'utf-16':
-      return decodeUtf16AsIterable(bytes, offset, length, replace);
-    case 'utf-16-be':
-      return decodeUtf16beAsIterable(bytes, offset, length, true, replace);
-    case 'utf-16-le':
-      return decodeUtf16leAsIterable(bytes, offset, length, true, replace);
-
-    case 'utf-32':
-      return decodeUtf32AsIterable(bytes, offset, length, replace);
-    case 'utf-32-be':
-      return decodeUtf32beAsIterable(bytes, offset, length, true, replace);
-    case 'utf-32-le':
-      return decodeUtf32leAsIterable(bytes, offset, length, true, replace);
+      return decodeUtf8AsIterable(bytes, offset, length);
 
     default:
       throw ArgumentError('Encoding $encoding not supported');
@@ -94,135 +70,3 @@
   }
   return newCodes;
 }
-
-/// Decodes [windows-1252](http://en.wikipedia.org/wiki/Windows-1252) bytes as
-/// an iterable. Thus, the consumer can only convert as much of the input as
-/// needed. Set the [replacementCharacter] to null to throw an [ArgumentError]
-/// rather than replace the bad value.
-IterableWindows1252Decoder decodeWindows1252AsIterable(List<int> bytes,
-    [int offset = 0,
-    int length,
-    int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
-  return IterableWindows1252Decoder(
-      bytes, offset, length, replacementCodepoint);
-}
-
-/// Return type of [decodeWindows1252AsIterable] and variants. The Iterable type
-/// provides an iterator on demand and the iterator will only translate bytes
-/// as requested by the user of the iterator. (Note: results are not cached.)
-class IterableWindows1252Decoder extends IterableBase<int> {
-  final List<int> bytes;
-  final int offset;
-  final int length;
-  final int replacementCodepoint;
-
-  IterableWindows1252Decoder(this.bytes,
-      [this.offset = 0,
-      this.length,
-      this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
-
-  Windows1252Decoder get iterator =>
-      Windows1252Decoder(bytes, offset, length, replacementCodepoint);
-}
-
-/// Provides an iterator of Unicode codepoints from windows-1252 encoded bytes.
-/// The parameters can set an offset into a list of bytes (as int), limit the
-/// length of the values to be decoded, and override the default Unicode
-/// replacement character. Set the replacementCharacter to null to throw an
-/// ArgumentError rather than replace the bad value. The return value
-/// from this method can be used as an Iterable (e.g. in a for-loop).
-class Windows1252Decoder implements Iterator<int> {
-  final int replacementCodepoint;
-  final List<int> _bytes;
-  int _offset;
-  final int _length;
-
-  Windows1252Decoder(List<int> bytes,
-      [int offset = 0,
-      int length,
-      this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])
-      : _bytes = bytes,
-        _offset = offset - 1,
-        _length = length == null ? bytes.length : length;
-
-  bool get _inRange => _offset >= 0 && _offset < _length;
-  int get current => _inRange ? _mapChar(_bytes[_offset]) : null;
-
-  bool moveNext() {
-    _offset++;
-    return _inRange;
-  }
-
-  int _mapChar(int char) {
-    // TODO(jmesserly): this is duplicating entitiesWindows1252 and
-    // replacementCharacters from constants.dart
-    switch (char) {
-      case 0x80:
-        return 0x20AC; // EURO SIGN
-      case 0x82:
-        return 0x201A; // SINGLE LOW-9 QUOTATION MARK
-      case 0x83:
-        return 0x0192; // LATIN SMALL LETTER F WITH HOOK
-      case 0x84:
-        return 0x201E; // DOUBLE LOW-9 QUOTATION MARK
-      case 0x85:
-        return 0x2026; // HORIZONTAL ELLIPSIS
-      case 0x86:
-        return 0x2020; // DAGGER
-      case 0x87:
-        return 0x2021; // DOUBLE DAGGER
-      case 0x88:
-        return 0x02C6; // MODIFIER LETTER CIRCUMFLEX ACCENT
-      case 0x89:
-        return 0x2030; // PER MILLE SIGN
-      case 0x8A:
-        return 0x0160; // LATIN CAPITAL LETTER S WITH CARON
-      case 0x8B:
-        return 0x2039; // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-      case 0x8C:
-        return 0x0152; // LATIN CAPITAL LIGATURE OE
-      case 0x8E:
-        return 0x017D; // LATIN CAPITAL LETTER Z WITH CARON
-      case 0x91:
-        return 0x2018; // LEFT SINGLE QUOTATION MARK
-      case 0x92:
-        return 0x2019; // RIGHT SINGLE QUOTATION MARK
-      case 0x93:
-        return 0x201C; // LEFT DOUBLE QUOTATION MARK
-      case 0x94:
-        return 0x201D; // RIGHT DOUBLE QUOTATION MARK
-      case 0x95:
-        return 0x2022; // BULLET
-      case 0x96:
-        return 0x2013; // EN DASH
-      case 0x97:
-        return 0x2014; // EM DASH
-      case 0x98:
-        return 0x02DC; // SMALL TILDE
-      case 0x99:
-        return 0x2122; // TRADE MARK SIGN
-      case 0x9A:
-        return 0x0161; // LATIN SMALL LETTER S WITH CARON
-      case 0x9B:
-        return 0x203A; // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-      case 0x9C:
-        return 0x0153; // LATIN SMALL LIGATURE OE
-      case 0x9E:
-        return 0x017E; // LATIN SMALL LETTER Z WITH CARON
-      case 0x9F:
-        return 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS
-
-      case 0x81:
-      case 0x8D:
-      case 0x8F:
-      case 0x90:
-      case 0x9D:
-        if (replacementCodepoint == null) {
-          throw ArgumentError(
-              "Invalid windows-1252 code point $char at $_offset");
-        }
-        return replacementCodepoint;
-    }
-    return char;
-  }
-}

diff --git a/lib/src/constants.dart b/lib/src/constants.dart
index fc0c6a1..34addfc 100644
--- a/lib/src/constants.dart
+++ b/lib/src/constants.dart

@@ -29,10 +29,10 @@
       "Entity used with illegal number (windows-1252 reference).",
   "cant-convert-numeric-entity":
       "Numeric entity couldn't be converted to character "
-      "(codepoint U+%(charAsInt)08x).",
+          "(codepoint U+%(charAsInt)08x).",
   "illegal-codepoint-for-numeric-entity":
       "Numeric entity represents an illegal codepoint: "
-      "U+%(charAsInt)08x.",
+          "U+%(charAsInt)08x.",
   "numeric-entity-without-semicolon": "Numeric entity didn't end with ';'.",
   "expected-numeric-entity-but-got-eof":
       "Numeric entity expected. Got end of file instead.",
@@ -46,7 +46,7 @@
       "Expected tag name. Got '>' instead.",
   "expected-tag-name-but-got-question-mark":
       "Expected tag name. Got '?' instead. (HTML doesn't "
-      "support processing instructions.)",
+          "support processing instructions.)",
   "expected-tag-name": "Expected tag name. Got something else instead",
   "expected-closing-tag-but-got-right-bracket":
       "Expected closing tag. Got '>' instead. Ignoring '</>'.",
@@ -133,7 +133,7 @@
   "missing-end-tags": "Missing end tags (%(name)s).",
   "unexpected-start-tag-implies-end-tag":
       "Unexpected start tag (%(startName)s) "
-      "implies end tag (%(endName)s).",
+          "implies end tag (%(endName)s).",
   "unexpected-start-tag-treated-as":
       "Unexpected start tag (%(originalName)s). Treated as %(newName)s.",
   "deprecated-tag": "Unexpected start tag %(name)s. Don't use it!",
@@ -165,7 +165,7 @@
   "unexpected-form-in-table": "Unexpected form in table context.",
   "unexpected-start-tag-implies-table-voodoo":
       "Unexpected start tag (%(name)s) in "
-      "table context caused voodoo mode.",
+          "table context caused voodoo mode.",
   "unexpected-end-tag-implies-table-voodoo": "Unexpected end tag (%(name)s) in "
       "table context caused voodoo mode.",
   "unexpected-cell-in-table-body": "Unexpected table cell start tag (%(name)s) "
@@ -180,12 +180,12 @@
       "Unexpected end tag (%(name)s) in the table row phase. Ignored.",
   "unexpected-select-in-select":
       "Unexpected select start tag in the select phase "
-      "treated as select end tag.",
+          "treated as select end tag.",
   "unexpected-input-in-select":
       "Unexpected input start tag in the select phase.",
   "unexpected-start-tag-in-select":
       "Unexpected start tag token (%(name)s in the select phase. "
-      "Ignored.",
+          "Ignored.",
   "unexpected-end-tag-in-select":
       "Unexpected end tag (%(name)s) in the select phase. Ignored.",
   "unexpected-table-element-start-tag-in-select-in-table":
@@ -204,7 +204,7 @@
       " in the frameset phase. Ignored.",
   "unexpected-frameset-in-frameset-innerhtml":
       "Unexpected end tag token (frameset) "
-      "in the frameset phase (innerHTML).",
+          "in the frameset phase (innerHTML).",
   "unexpected-end-tag-in-frameset": "Unexpected end tag token (%(name)s)"
       " in the frameset phase. Ignored.",
   "unexpected-char-after-frameset": "Unexpected non-space characters in the "

diff --git a/lib/src/encoding_parser.dart b/lib/src/encoding_parser.dart
index d61e76a..d0f40d6 100644
--- a/lib/src/encoding_parser.dart
+++ b/lib/src/encoding_parser.dart

@@ -1,7 +1,5 @@
-library encoding_parser;
-
 import 'constants.dart';
-import 'inputstream.dart';
+import 'html_input_stream.dart';
 
 // TODO(jmesserly): I converted StopIteration to StateError("No more elements").
 // Seems strange to throw this from outside of an iterator though.
@@ -10,15 +8,15 @@
 /// raised.
 class EncodingBytes {
   final String _bytes;
-  int _position = -1;
+  int __position = -1;
 
   EncodingBytes(this._bytes);
 
-  int get length => _bytes.length;
+  int get _length => _bytes.length;
 
-  String next() {
-    var p = _position = _position + 1;
-    if (p >= length) {
+  String _next() {
+    var p = __position = __position + 1;
+    if (p >= _length) {
       throw StateError("No more elements");
     } else if (p < 0) {
       throw RangeError(p);
@@ -26,59 +24,59 @@
     return _bytes[p];
   }
 
-  String previous() {
-    var p = _position;
-    if (p >= length) {
+  String _previous() {
+    var p = __position;
+    if (p >= _length) {
       throw StateError("No more elements");
     } else if (p < 0) {
       throw RangeError(p);
     }
-    _position = p = p - 1;
+    __position = p = p - 1;
     return _bytes[p];
   }
 
-  set position(int value) {
-    if (_position >= length) {
+  set _position(int value) {
+    if (__position >= _length) {
       throw StateError("No more elements");
     }
-    _position = value;
+    __position = value;
   }
 
-  int get position {
-    if (_position >= length) {
+  int get _position {
+    if (__position >= _length) {
       throw StateError("No more elements");
     }
-    if (_position >= 0) {
-      return _position;
+    if (__position >= 0) {
+      return __position;
     } else {
       return 0;
     }
   }
 
-  String get currentByte => _bytes[position];
+  String get _currentByte => _bytes[_position];
 
   /// Skip past a list of characters. Defaults to skipping [isWhitespace].
-  String skipChars([CharPreciate skipChars]) {
+  String _skipChars([_CharPredicate skipChars]) {
     if (skipChars == null) skipChars = isWhitespace;
-    var p = position; // use property for the error-checking
-    while (p < length) {
+    var p = _position; // use property for the error-checking
+    while (p < _length) {
       var c = _bytes[p];
       if (!skipChars(c)) {
-        _position = p;
+        __position = p;
         return c;
       }
       p += 1;
     }
-    _position = p;
+    __position = p;
     return null;
   }
 
-  String skipUntil(CharPreciate untilChars) {
-    var p = position;
-    while (p < length) {
+  String _skipUntil(_CharPredicate untilChars) {
+    var p = _position;
+    while (p < _length) {
       var c = _bytes[p];
       if (untilChars(c)) {
-        _position = p;
+        __position = p;
         return c;
       }
       p += 1;
@@ -89,14 +87,14 @@
   /// Look for a sequence of bytes at the start of a string. If the bytes
   /// are found return true and advance the position to the byte after the
   /// match. Otherwise return false and leave the position alone.
-  bool matchBytes(String bytes) {
-    var p = position;
+  bool _matchBytes(String bytes) {
+    var p = _position;
     if (_bytes.length < p + bytes.length) {
       return false;
     }
     var data = _bytes.substring(p, p + bytes.length);
     if (data == bytes) {
-      position += bytes.length;
+      _position += bytes.length;
       return true;
     }
     return false;
@@ -104,19 +102,19 @@
 
   /// Look for the next sequence of bytes matching a given sequence. If
   /// a match is found advance the position to the last byte of the match
-  bool jumpTo(String bytes) {
-    var newPosition = _bytes.indexOf(bytes, position);
+  bool _jumpTo(String bytes) {
+    var newPosition = _bytes.indexOf(bytes, _position);
     if (newPosition >= 0) {
-      _position = newPosition + bytes.length - 1;
+      __position = newPosition + bytes.length - 1;
       return true;
     } else {
       throw StateError("No more elements");
     }
   }
 
-  String slice(int start, [int end]) {
-    if (end == null) end = length;
-    if (end < 0) end += length;
+  String _slice(int start, [int end]) {
+    if (end == null) end = _length;
+    if (end < 0) end += _length;
     return _bytes.substring(start, end);
   }
 }
@@ -126,68 +124,69 @@
 class _DispatchEntry {
   final String pattern;
   final _MethodHandler handler;
+
   _DispatchEntry(this.pattern, this.handler);
 }
 
 /// Mini parser for detecting character encoding from meta elements.
 class EncodingParser {
-  final EncodingBytes data;
-  String encoding;
+  final EncodingBytes _data;
+  String _encoding;
 
   /// [bytes] - the data to work on for encoding detection.
   EncodingParser(List<int> bytes)
       // Note: this is intentionally interpreting bytes as codepoints.
-      : data = EncodingBytes(String.fromCharCodes(bytes).toLowerCase());
+      : _data = EncodingBytes(String.fromCharCodes(bytes).toLowerCase());
 
   String getEncoding() {
     final methodDispatch = [
-      _DispatchEntry("<!--", handleComment),
-      _DispatchEntry("<meta", handleMeta),
-      _DispatchEntry("</", handlePossibleEndTag),
-      _DispatchEntry("<!", handleOther),
-      _DispatchEntry("<?", handleOther),
-      _DispatchEntry("<", handlePossibleStartTag),
+      _DispatchEntry("<!--", _handleComment),
+      _DispatchEntry("<meta", _handleMeta),
+      _DispatchEntry("</", _handlePossibleEndTag),
+      _DispatchEntry("<!", _handleOther),
+      _DispatchEntry("<?", _handleOther),
+      _DispatchEntry("<", _handlePossibleStartTag),
     ];
 
     try {
       for (;;) {
         for (var dispatch in methodDispatch) {
-          if (data.matchBytes(dispatch.pattern)) {
+          if (_data._matchBytes(dispatch.pattern)) {
             var keepParsing = dispatch.handler();
             if (keepParsing) break;
 
             // We found an encoding. Stop.
-            return encoding;
+            return _encoding;
           }
         }
-        data.position += 1;
+        _data._position += 1;
       }
     } on StateError catch (_) {
       // Catch this here to match behavior of Python's StopIteration
       // TODO(jmesserly): refactor to not use exceptions
     }
-    return encoding;
+    return _encoding;
   }
 
   /// Skip over comments.
-  bool handleComment() => data.jumpTo("-->");
+  bool _handleComment() => _data._jumpTo("-->");
 
-  bool handleMeta() {
-    if (!isWhitespace(data.currentByte)) {
+  bool _handleMeta() {
+    if (!isWhitespace(_data._currentByte)) {
       // if we have <meta not followed by a space so just keep going
       return true;
     }
     // We have a valid meta element we want to search for attributes
     while (true) {
       // Try to find the next attribute after the current position
-      var attr = getAttribute();
+      var attr = _getAttribute();
       if (attr == null) return true;
 
       if (attr[0] == "charset") {
         var tentativeEncoding = attr[1];
         var codec = codecName(tentativeEncoding);
         if (codec != null) {
-          encoding = codec;
+          _encoding = codec;
           return false;
         }
       } else if (attr[0] == "content") {
@@ -195,54 +194,54 @@
         var tentativeEncoding = contentParser.parse();
         var codec = codecName(tentativeEncoding);
         if (codec != null) {
-          encoding = codec;
+          _encoding = codec;
           return false;
         }
       }
     }
   }
 
-  bool handlePossibleStartTag() => handlePossibleTag(false);
+  bool _handlePossibleStartTag() => _handlePossibleTag(false);
 
-  bool handlePossibleEndTag() {
-    data.next();
-    return handlePossibleTag(true);
+  bool _handlePossibleEndTag() {
+    _data._next();
+    return _handlePossibleTag(true);
   }
 
-  bool handlePossibleTag(bool endTag) {
-    if (!isLetter(data.currentByte)) {
+  bool _handlePossibleTag(bool endTag) {
+    if (!isLetter(_data._currentByte)) {
       //If the next byte is not an ascii letter either ignore this
       //fragment (possible start tag case) or treat it according to
       //handleOther
       if (endTag) {
-        data.previous();
-        handleOther();
+        _data._previous();
+        _handleOther();
       }
       return true;
     }
 
-    var c = data.skipUntil(isSpaceOrAngleBracket);
+    var c = _data._skipUntil(_isSpaceOrAngleBracket);
     if (c == "<") {
       // return to the first step in the overall "two step" algorithm
       // reprocessing the < byte
-      data.previous();
+      _data._previous();
     } else {
       //Read all attributes
-      var attr = getAttribute();
+      var attr = _getAttribute();
       while (attr != null) {
-        attr = getAttribute();
+        attr = _getAttribute();
       }
     }
     return true;
   }
 
-  bool handleOther() => data.jumpTo(">");
+  bool _handleOther() => _data._jumpTo(">");
 
   /// Return a name,value pair for the next attribute in the stream,
   /// if one is found, or null
-  List<String> getAttribute() {
+  List<String> _getAttribute() {
     // Step 1 (skip chars)
-    var c = data.skipChars((x) => x == "/" || isWhitespace(x));
+    var c = _data._skipChars((x) => x == "/" || isWhitespace(x));
     // Step 2
     if (c == ">" || c == null) {
       return null;
@@ -258,8 +257,8 @@
         break;
       } else if (isWhitespace(c)) {
         // Step 6!
-        c = data.skipChars();
-        c = data.next();
+        c = _data._skipChars();
+        c = _data._next();
         break;
       } else if (c == "/" || c == ">") {
         return [attrName.join(), ""];
@@ -269,27 +268,27 @@
         attrName.add(c);
       }
       // Step 5
-      c = data.next();
+      c = _data._next();
     }
     // Step 7
     if (c != "=") {
-      data.previous();
+      _data._previous();
       return [attrName.join(), ""];
     }
     // Step 8
-    data.next();
+    _data._next();
     // Step 9
-    c = data.skipChars();
+    c = _data._skipChars();
     // Step 10
     if (c == "'" || c == '"') {
       // 10.1
       var quoteChar = c;
       while (true) {
         // 10.2
-        c = data.next();
+        c = _data._next();
         if (c == quoteChar) {
           // 10.3
-          data.next();
+          _data._next();
           return [attrName.join(), attrValue.join()];
         } else if (isLetter(c)) {
           // 10.4
@@ -310,8 +309,8 @@
     }
     // Step 11
     while (true) {
-      c = data.next();
-      if (isSpaceOrAngleBracket(c)) {
+      c = _data._next();
+      if (_isSpaceOrAngleBracket(c)) {
         return [attrName.join(), attrValue.join()];
       } else if (c == null) {
         return null;
@@ -333,34 +332,34 @@
     try {
       // Check if the attr name is charset
       // otherwise return
-      data.jumpTo("charset");
-      data.position += 1;
-      data.skipChars();
-      if (data.currentByte != "=") {
+      data._jumpTo("charset");
+      data._position += 1;
+      data._skipChars();
+      if (data._currentByte != "=") {
         // If there is no = sign keep looking for attrs
         return null;
       }
-      data.position += 1;
-      data.skipChars();
+      data._position += 1;
+      data._skipChars();
       // Look for an encoding between matching quote marks
-      if (data.currentByte == '"' || data.currentByte == "'") {
-        var quoteMark = data.currentByte;
-        data.position += 1;
-        var oldPosition = data.position;
-        if (data.jumpTo(quoteMark)) {
-          return data.slice(oldPosition, data.position);
+      if (data._currentByte == '"' || data._currentByte == "'") {
+        var quoteMark = data._currentByte;
+        data._position += 1;
+        var oldPosition = data._position;
+        if (data._jumpTo(quoteMark)) {
+          return data._slice(oldPosition, data._position);
         } else {
           return null;
         }
       } else {
         // Unquoted value
-        var oldPosition = data.position;
+        var oldPosition = data._position;
         try {
-          data.skipUntil(isWhitespace);
-          return data.slice(oldPosition, data.position);
+          data._skipUntil(isWhitespace);
+          return data._slice(oldPosition, data._position);
         } on StateError catch (_) {
           //Return the whole remaining value
-          return data.slice(oldPosition);
+          return data._slice(oldPosition);
         }
       }
     } on StateError catch (_) {
@@ -369,8 +368,8 @@
   }
 }
 
-bool isSpaceOrAngleBracket(String char) {
+bool _isSpaceOrAngleBracket(String char) {
   return char == ">" || char == "<" || isWhitespace(char);
 }
 
-typedef CharPreciate = bool Function(String char);
+typedef _CharPredicate = bool Function(String char);

diff --git a/lib/src/inputstream.dart b/lib/src/html_input_stream.dart
similarity index 85%
rename from lib/src/inputstream.dart
rename to lib/src/html_input_stream.dart
index dbcf98b..84d2f13 100644
--- a/lib/src/inputstream.dart
+++ b/lib/src/html_input_stream.dart

@@ -1,21 +1,12 @@
-library inputstream;
-
 import 'dart:collection';
-import 'package:utf/utf.dart';
+
 import 'package:source_span/source_span.dart';
+
 import 'char_encodings.dart';
 import 'constants.dart';
 import 'encoding_parser.dart';
 import 'utils.dart';
 
-/// Hooks to call into dart:io without directly referencing it.
-class ConsoleSupport {
-  List<int> bytesFromFile(source) => null;
-}
-
-// TODO(jmesserly): use lazy init here when supported.
-ConsoleSupport consoleSupport = ConsoleSupport();
-
 /// Provides a unicode stream of characters to the HtmlTokenizer.
 ///
 /// This class takes care of character encoding and removing or replacing
@@ -26,7 +17,7 @@
   static const int numBytesMeta = 512;
 
   /// Encoding to use if no other information can be found.
-  static const String defaultEncoding = 'windows-1252';
+  static const String defaultEncoding = 'utf-8';
 
   /// The name of the character encoding.
   String charEncodingName;
@@ -81,18 +72,8 @@
     } else if (source is List<int>) {
       _rawBytes = source;
     } else {
-      // TODO(jmesserly): it's unfortunate we need to read all bytes in advance,
-      // but it's necessary because of how the UTF decoders work.
-      _rawBytes = consoleSupport.bytesFromFile(source);
-
-      if (_rawBytes == null) {
-        // TODO(jmesserly): we should accept some kind of stream API too.
-        // Unfortunately dart:io InputStream is async only, which won't work.
-        throw ArgumentError("'source' must be a String or "
-            "List<int> (of bytes). You can also pass a RandomAccessFile if you"
-            "`import 'package:html/parser_console.dart'` and call "
-            "`useConsole()`.");
-      }
+      throw ArgumentError.value(
+          source, 'source', 'Must be a String or List<int>.');
     }
 
     // Detect encoding iff no explicit "transport level" encoding is supplied
@@ -121,7 +102,7 @@
         if (c == NEWLINE) continue;
       }
 
-      if (invalidUnicode(c)) errors.add('invalid-codepoint');
+      if (_invalidUnicode(c)) errors.add('invalid-codepoint');
 
       if (0xD800 <= c && c <= 0xDFFF) {
         c = 0xFFFD;
@@ -199,14 +180,6 @@
     if (hasUtf8Bom(_rawBytes)) {
       return 'utf-8';
     }
-    // Note: we don't need to remember whether it was big or little endian
-    // because the decoder will do that later. It will also eat the BOM for us.
-    if (hasUtf16Bom(_rawBytes)) {
-      return 'utf-16';
-    }
-    if (hasUtf32Bom(_rawBytes)) {
-      return 'utf-32';
-    }
     return null;
   }
 
@@ -262,7 +235,7 @@
 
 // TODO(jmesserly): the Python code used a regex to check for this. But
 // Dart doesn't let you create a regexp with invalid characters.
-bool invalidUnicode(int c) {
+bool _invalidUnicode(int c) {
   if (0x0001 <= c && c <= 0x0008) return true;
   if (0x000E <= c && c <= 0x001F) return true;
   if (0x007F <= c && c <= 0x009F) return true;

diff --git a/lib/src/tokenizer.dart b/lib/src/tokenizer.dart
index 48d6365..638663e 100644
--- a/lib/src/tokenizer.dart
+++ b/lib/src/tokenizer.dart

@@ -3,7 +3,7 @@
 import 'dart:collection';
 import 'package:html/parser.dart' show HtmlParser;
 import 'constants.dart';
-import 'inputstream.dart';
+import 'html_input_stream.dart';
 import 'token.dart';
 import 'utils.dart';
 

diff --git a/lib/src/utf.dart b/lib/src/utf.dart
new file mode 100644
index 0000000..a635db8
--- /dev/null
+++ b/lib/src/utf.dart

@@ -0,0 +1,237 @@
+// Large portions of this code where taken from https://github.com/dart-lang/utf
+
+import "dart:collection";
+
+const int _replacementCodepoint = 0xfffd;
+
+const int _UNICODE_VALID_RANGE_MAX = 0x10ffff;
+const int _UNICODE_UTF16_RESERVED_LO = 0xd800;
+const int _UNICODE_UTF16_RESERVED_HI = 0xdfff;
+
+const int _UTF8_ONE_BYTE_MAX = 0x7f;
+const int _UTF8_TWO_BYTE_MAX = 0x7ff;
+const int _UTF8_THREE_BYTE_MAX = 0xffff;
+
+const int _UTF8_LO_SIX_BIT_MASK = 0x3f;
+
+const int _UTF8_FIRST_BYTE_OF_TWO_BASE = 0xc0;
+const int _UTF8_FIRST_BYTE_OF_THREE_BASE = 0xe0;
+const int _UTF8_FIRST_BYTE_OF_FOUR_BASE = 0xf0;
+const int _UTF8_FIRST_BYTE_OF_FIVE_BASE = 0xf8;
+const int _UTF8_FIRST_BYTE_OF_SIX_BASE = 0xfc;
+
+const int _UTF8_FIRST_BYTE_BOUND_EXCL = 0xfe;
+
+/// Decodes the UTF-8 bytes as an iterable. Thus, the consumer can only convert
+/// as much of the input as needed. Set the replacementCharacter to null to
+/// throw an ArgumentError rather than replace the bad value.
+Iterable<int> decodeUtf8AsIterable(List<int> bytes, int offset, int length) =>
+    _IterableUtf8Decoder(bytes, offset, length);
+
+/// Return type of [decodeUtf8AsIterable] and variants. The Iterable type
+/// provides an iterator on demand and the iterator will only translate bytes
+/// as requested by the user of the iterator. (Note: results are not cached.)
+// TODO(floitsch): Consider removing the extend and switch to implements since
+// that's cheaper to allocate.
+class _IterableUtf8Decoder extends IterableBase<int> {
+  final List<int> bytes;
+  final int offset;
+  final int length;
+
+  _IterableUtf8Decoder(this.bytes, this.offset, this.length);
+
+  _Utf8Decoder get iterator => _Utf8Decoder(bytes, offset, length);
+}
+
+/// Provides an iterator of Unicode codepoints from UTF-8 encoded bytes. The
+/// parameters can set an offset into a list of bytes (as int), limit the length
+/// of the values to be decoded, and override the default Unicode replacement
+/// character. Set the replacementCharacter to null to throw an
+/// ArgumentError rather than replace the bad value. The return value
+/// from this method can be used as an Iterable (e.g. in a for-loop).
+class _Utf8Decoder implements Iterator<int> {
+  final _ListRangeIterator utf8EncodedBytesIterator;
+  int _current;
+
+  _Utf8Decoder(List<int> utf8EncodedBytes, int offset, int length)
+      : utf8EncodedBytesIterator =
+            (_ListRange(utf8EncodedBytes, offset, length)).iterator;
+
+  _Utf8Decoder._fromListRangeIterator(_ListRange source)
+      : utf8EncodedBytesIterator = source.iterator;
+
+  /// Decode the remaininder of the characters in this decoder
+  /// into a [List<int>].
+  List<int> decodeRest() {
+    List<int> codepoints = List<int>(utf8EncodedBytesIterator.remaining);
+    int i = 0;
+    while (moveNext()) {
+      codepoints[i++] = current;
+    }
+    if (i == codepoints.length) {
+      return codepoints;
+    } else {
+      List<int> truncCodepoints = List<int>(i);
+      truncCodepoints.setRange(0, i, codepoints);
+      return truncCodepoints;
+    }
+  }
+
+  int get current => _current;
+
+  bool moveNext() {
+    _current = null;
+
+    if (!utf8EncodedBytesIterator.moveNext()) return false;
+
+    int value = utf8EncodedBytesIterator.current;
+    int additionalBytes = 0;
+
+    if (value < 0) {
+      if (_replacementCodepoint != null) {
+        _current = _replacementCodepoint;
+        return true;
+      } else {
+        throw ArgumentError(
+            "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");
+      }
+    } else if (value <= _UTF8_ONE_BYTE_MAX) {
+      _current = value;
+      return true;
+    } else if (value < _UTF8_FIRST_BYTE_OF_TWO_BASE) {
+      if (_replacementCodepoint != null) {
+        _current = _replacementCodepoint;
+        return true;
+      } else {
+        throw ArgumentError(
+            "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");
+      }
+    } else if (value < _UTF8_FIRST_BYTE_OF_THREE_BASE) {
+      value -= _UTF8_FIRST_BYTE_OF_TWO_BASE;
+      additionalBytes = 1;
+    } else if (value < _UTF8_FIRST_BYTE_OF_FOUR_BASE) {
+      value -= _UTF8_FIRST_BYTE_OF_THREE_BASE;
+      additionalBytes = 2;
+    } else if (value < _UTF8_FIRST_BYTE_OF_FIVE_BASE) {
+      value -= _UTF8_FIRST_BYTE_OF_FOUR_BASE;
+      additionalBytes = 3;
+    } else if (value < _UTF8_FIRST_BYTE_OF_SIX_BASE) {
+      value -= _UTF8_FIRST_BYTE_OF_FIVE_BASE;
+      additionalBytes = 4;
+    } else if (value < _UTF8_FIRST_BYTE_BOUND_EXCL) {
+      value -= _UTF8_FIRST_BYTE_OF_SIX_BASE;
+      additionalBytes = 5;
+    } else if (_replacementCodepoint != null) {
+      _current = _replacementCodepoint;
+      return true;
+    } else {
+      throw ArgumentError(
+          "Invalid UTF8 at ${utf8EncodedBytesIterator.position}");
+    }
+    int j = 0;
+    while (j < additionalBytes && utf8EncodedBytesIterator.moveNext()) {
+      int nextValue = utf8EncodedBytesIterator.current;
+      if (nextValue > _UTF8_ONE_BYTE_MAX &&
+          nextValue < _UTF8_FIRST_BYTE_OF_TWO_BASE) {
+        value = ((value << 6) | (nextValue & _UTF8_LO_SIX_BIT_MASK));
+      } else {
+        // if sequence-starting code unit, reposition cursor to start here
+        if (nextValue >= _UTF8_FIRST_BYTE_OF_TWO_BASE) {
+          utf8EncodedBytesIterator.backup();
+        }
+        break;
+      }
+      j++;
+    }
+    bool validSequence = (j == additionalBytes &&
+        (value < _UNICODE_UTF16_RESERVED_LO ||
+            value > _UNICODE_UTF16_RESERVED_HI));
+    bool nonOverlong = (additionalBytes == 1 && value > _UTF8_ONE_BYTE_MAX) ||
+        (additionalBytes == 2 && value > _UTF8_TWO_BYTE_MAX) ||
+        (additionalBytes == 3 && value > _UTF8_THREE_BYTE_MAX);
+    bool inRange = value <= _UNICODE_VALID_RANGE_MAX;
+    if (validSequence && nonOverlong && inRange) {
+      _current = value;
+      return true;
+    } else if (_replacementCodepoint != null) {
+      _current = _replacementCodepoint;
+      return true;
+    } else {
+      throw ArgumentError(
+          "Invalid UTF8 at ${utf8EncodedBytesIterator.position - j}");
+    }
+  }
+}
+
+/// _ListRange in an internal type used to create a lightweight Interable on a
+/// range within a source list. DO NOT MODIFY the underlying list while
+/// iterating over it. The results of doing so are undefined.
+// TODO(floitsch): Consider removing the extend and switch to implements since
+// that's cheaper to allocate.
+class _ListRange extends IterableBase<int> {
+  final List<int> _source;
+  final int _offset;
+  final int _length;
+
+  _ListRange(List<int> source, [int offset = 0, int length])
+      : _source = source,
+        _offset = offset,
+        _length = (length == null ? source.length - offset : length) {
+    if (_offset < 0 || _offset > _source.length) {
+      throw RangeError.value(_offset);
+    }
+    if (_length != null && (_length < 0)) {
+      throw RangeError.value(_length);
+    }
+    if (_length + _offset > _source.length) {
+      throw RangeError.value(_length + _offset);
+    }
+  }
+
+  _ListRangeIterator get iterator =>
+      _ListRangeIteratorImpl(_source, _offset, _offset + _length);
+
+  int get length => _length;
+}
+
+/// The ListRangeIterator provides more capabilities than a standard iterator,
+/// including the ability to get the current position, count remaining items,
+/// and move forward/backward within the iterator.
+abstract class _ListRangeIterator implements Iterator<int> {
+  bool moveNext();
+
+  int get current;
+
+  int get position;
+
+  void backup([int by]);
+
+  int get remaining;
+
+  void skip([int count]);
+}
+
+class _ListRangeIteratorImpl implements _ListRangeIterator {
+  final List<int> _source;
+  int _offset;
+  final int _end;
+
+  _ListRangeIteratorImpl(this._source, int offset, this._end)
+      : _offset = offset - 1;
+
+  int get current => _source[_offset];
+
+  bool moveNext() => ++_offset < _end;
+
+  int get position => _offset;
+
+  void backup([int by = 1]) {
+    _offset -= by;
+  }
+
+  int get remaining => _end - _offset - 1;
+
+  void skip([int count = 1]) {
+    _offset += count;
+  }
+}

diff --git a/pubspec.yaml b/pubspec.yaml
index b3cf488..92e502e 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml

@@ -1,5 +1,5 @@
 name: html
-version: 0.13.4+1
+version: 0.14.0-dev
 
 description: APIs for parsing and manipulating HTML content outside the browser.
 author: Dart Team <misc@dartlang.org>
@@ -11,9 +11,9 @@
 dependencies:
   csslib: '>=0.13.2 <0.15.0'
   source_span: '>=1.0.0 <2.0.0'
-  utf: '>=0.9.0 <0.10.0'
 
 dev_dependencies:
   path: ^1.6.2
   pedantic: ^1.3.0
   test: ^1.3.0
+  utf: '>=0.9.0 <0.10.0'

diff --git a/test/data/parser_feature/raw_file.html b/test/data/parser_feature/raw_file.html
deleted file mode 100644
index bcdbf76..0000000
--- a/test/data/parser_feature/raw_file.html
+++ /dev/null

@@ -1,6 +0,0 @@
-<!doctype html>
-<html>
-<body>
-Hello world!
-</body>
-</html>

diff --git a/test/parser_feature_test.dart b/test/parser_feature_test.dart
index 2591a2d..0889f44 100644
--- a/test/parser_feature_test.dart
+++ b/test/parser_feature_test.dart

@@ -1,13 +1,13 @@
 /// Additional feature tests that aren't based on test data.
 library parser_feature_test;
 
-import 'package:test/test.dart';
 import 'package:html/dom.dart';
 import 'package:html/parser.dart';
 import 'package:html/src/constants.dart';
 import 'package:html/src/encoding_parser.dart';
 import 'package:html/src/treebuilder.dart';
 import 'package:source_span/source_span.dart';
+import 'package:test/test.dart';
 
 main() {
   _testElementSpans();

diff --git a/test/parser_test.dart b/test/parser_test.dart
index 1289f61..1db1586 100644
--- a/test/parser_test.dart
+++ b/test/parser_test.dart

@@ -2,13 +2,12 @@
 library parser_test;
 
 import 'dart:convert';
-import 'dart:io';
-import 'package:path/path.dart' as pathos;
-import 'package:test/test.dart';
+
 import 'package:html/dom.dart';
 import 'package:html/parser.dart';
-import 'package:html/parser_console.dart' as parser_console;
-import 'package:html/src/inputstream.dart' as inputstream;
+import 'package:path/path.dart' as pathos;
+import 'package:test/test.dart';
+
 import 'support.dart';
 
 // Run the parse error checks
@@ -71,16 +70,6 @@
 }
 
 void main() {
-  test('dart:io', () {
-    // ensure IO support is unregistered
-    expect(inputstream.consoleSupport,
-        const TypeMatcher<inputstream.ConsoleSupport>());
-    var file = File('$testDataDir/parser_feature/raw_file.html').openSync();
-    expect(() => parse(file), throwsA(const TypeMatcher<ArgumentError>()));
-    parser_console.useConsole();
-    expect(parse(file).body.innerHtml.trim(), 'Hello world!');
-  });
-
   for (var path in getDataFiles('tree-construction')) {
     if (!path.endsWith('.dat')) continue;
commit	5abd53c071dda513e34228066109ffa5c4d17794	[log] [tgz]
author	Kevin Moore <kevmoo@users.noreply.github.com>	Mon Apr 08 13:15:55 2019 -0700
committer	GitHub <noreply@github.com>	Mon Apr 08 13:15:55 2019 -0700
tree	373e06a7939b9715a696164c1eb5eeb7a90b822f
parent	30367f553a208504da9d3cc0f224dced9bca8ff7 [diff]