blob: bb44fee592ffb19bc46a86154f8f6fd2c18a429e [file] [log] [blame]
// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
// Generated by scripts/tokenizer_gen.py.
part of '../parser.dart';
/// Tokenizer state to support look ahead for Less' nested selectors.
class TokenizerState {
final int index;
final int startIndex;
final bool inSelectorExpression;
final bool inSelector;
TokenizerState(TokenizerBase base)
: index = base._index,
startIndex = base._startIndex,
inSelectorExpression = base.inSelectorExpression,
inSelector = base.inSelector;
}
/// The base class for our tokenizer. The hand coded parts are in this file,
/// with the generated parts in the subclass Tokenizer.
abstract class TokenizerBase {
final SourceFile _file;
final String _text;
// TODO: this seems like a bug – this field *is* used
// ignore: prefer_final_fields
bool _inString;
/// Changes tokenization when in a pseudo function expression. If true then
/// minus signs are handled as operators instead of identifiers.
bool inSelectorExpression = false;
/// Changes tokenization when in selectors. If true, it prevents identifiers
/// from being treated as units. This would break things like ":lang(fr)" or
/// the HTML (unknown) tag name "px", which is legal to use in a selector.
// TODO(jmesserly): is this a problem elsewhere? "fr" for example will be
// processed as a "fraction" unit token, preventing it from working in
// places where an identifier is expected. This was breaking selectors like:
// :lang(fr)
// The assumption that "fr" always means fraction (and similar issue with
// other units) doesn't seem valid. We probably should defer this
// analysis until we reach places in the parser where units are expected.
// I'm not sure this is tokenizing as described in the specs:
// http://dev.w3.org/csswg/css-syntax/
// http://dev.w3.org/csswg/selectors4/
bool inSelector = false;
int _index = 0;
int _startIndex = 0;
TokenizerBase(this._file, this._text, this._inString, [this._index = 0]);
Token next();
int getIdentifierKind();
/// Snapshot of Tokenizer scanning state.
TokenizerState get mark => TokenizerState(this);
/// Restore Tokenizer scanning state.
void restore(TokenizerState markedData) {
_index = markedData.index;
_startIndex = markedData.startIndex;
inSelectorExpression = markedData.inSelectorExpression;
inSelector = markedData.inSelector;
}
int _nextChar() {
if (_index < _text.length) {
return _text.codeUnitAt(_index++);
} else {
return 0;
}
}
int _peekChar([int offset = 0]) {
if (_index + offset < _text.length) {
return _text.codeUnitAt(_index + offset);
} else {
return 0;
}
}
bool _maybeEatChar(int ch) {
if (_index < _text.length) {
if (_text.codeUnitAt(_index) == ch) {
_index++;
return true;
} else {
return false;
}
} else {
return false;
}
}
bool _nextCharsAreNumber(int first) {
if (TokenizerHelpers.isDigit(first)) return true;
var second = _peekChar();
if (first == TokenChar.DOT) return TokenizerHelpers.isDigit(second);
if (first == TokenChar.PLUS || first == TokenChar.MINUS) {
return TokenizerHelpers.isDigit(second) ||
(second == TokenChar.DOT && TokenizerHelpers.isDigit(_peekChar(1)));
}
return false;
}
Token _finishToken(int kind) {
return Token(kind, _file.span(_startIndex, _index));
}
Token _errorToken([String? message]) {
return ErrorToken(
TokenKind.ERROR, _file.span(_startIndex, _index), message);
}
Token finishWhitespace() {
_index--;
while (_index < _text.length) {
final ch = _text.codeUnitAt(_index++);
if (ch == TokenChar.SPACE ||
ch == TokenChar.TAB ||
ch == TokenChar.RETURN) {
// do nothing
} else if (ch == TokenChar.NEWLINE) {
if (!_inString) {
return _finishToken(TokenKind.WHITESPACE); // note the newline?
}
} else {
_index--;
if (_inString) {
return next();
} else {
return _finishToken(TokenKind.WHITESPACE);
}
}
}
return _finishToken(TokenKind.END_OF_FILE);
}
Token finishMultiLineComment() {
var nesting = 1;
do {
var ch = _nextChar();
if (ch == 0) {
return _errorToken();
} else if (ch == TokenChar.ASTERISK) {
if (_maybeEatChar(TokenChar.SLASH)) {
nesting--;
}
} else if (ch == TokenChar.SLASH) {
if (_maybeEatChar(TokenChar.ASTERISK)) {
nesting++;
}
}
} while (nesting > 0);
if (_inString) {
return next();
} else {
return _finishToken(TokenKind.COMMENT);
}
}
void eatDigits() {
while (_index < _text.length) {
if (TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) {
_index++;
} else {
return;
}
}
}
static int _hexDigit(int c) {
if (c >= 48 /*0*/ && c <= 57 /*9*/) {
return c - 48;
} else if (c >= 97 /*a*/ && c <= 102 /*f*/) {
return c - 87;
} else if (c >= 65 /*A*/ && c <= 70 /*F*/) {
return c - 55;
} else {
return -1;
}
}
int readHex([int? hexLength]) {
int maxIndex;
if (hexLength == null) {
maxIndex = _text.length - 1;
} else {
// TODO(jimhug): What if this is too long?
maxIndex = _index + hexLength;
if (maxIndex >= _text.length) return -1;
}
var result = 0;
while (_index < maxIndex) {
final digit = _hexDigit(_text.codeUnitAt(_index));
if (digit == -1) {
if (hexLength == null) {
return result;
} else {
return -1;
}
}
_hexDigit(_text.codeUnitAt(_index));
// Multiply by 16 rather than shift by 4 since that will result in a
// correct value for numbers that exceed the 32 bit precision of JS
// 'integers'.
// TODO: Figure out a better solution to integer truncation. Issue 638.
result = (result * 16) + digit;
_index++;
}
return result;
}
Token finishNumber() {
eatDigits();
if (_peekChar() == TokenChar.DOT) {
// Handle the case of 1.toString().
_nextChar();
if (TokenizerHelpers.isDigit(_peekChar())) {
eatDigits();
return finishNumberExtra(TokenKind.DOUBLE);
} else {
_index--;
}
}
return finishNumberExtra(TokenKind.INTEGER);
}
Token finishNumberExtra(int kind) {
if (_maybeEatChar(101 /*e*/) || _maybeEatChar(69 /*E*/)) {
kind = TokenKind.DOUBLE;
_maybeEatChar(TokenKind.MINUS);
_maybeEatChar(TokenKind.PLUS);
eatDigits();
}
if (_peekChar() != 0 && TokenizerHelpers.isIdentifierStart(_peekChar())) {
_nextChar();
return _errorToken('illegal character in number');
}
return _finishToken(kind);
}
Token _makeStringToken(List<int> buf, bool isPart) {
final s = String.fromCharCodes(buf);
final kind = isPart ? TokenKind.STRING_PART : TokenKind.STRING;
return LiteralToken(kind, _file.span(_startIndex, _index), s);
}
Token makeIEFilter(int start, int end) {
var filter = _text.substring(start, end);
return LiteralToken(TokenKind.STRING, _file.span(start, end), filter);
}
Token _makeRawStringToken(bool isMultiline) {
String s;
if (isMultiline) {
// Skip initial newline in multiline strings
var start = _startIndex + 4;
if (_text[start] == '\n') start++;
s = _text.substring(start, _index - 3);
} else {
s = _text.substring(_startIndex + 2, _index - 1);
}
return LiteralToken(TokenKind.STRING, _file.span(_startIndex, _index), s);
}
Token finishMultilineString(int quote) {
var buf = <int>[];
while (true) {
var ch = _nextChar();
if (ch == 0) {
return _errorToken();
} else if (ch == quote) {
if (_maybeEatChar(quote)) {
if (_maybeEatChar(quote)) {
return _makeStringToken(buf, false);
}
buf.add(quote);
}
buf.add(quote);
} else if (ch == TokenChar.BACKSLASH) {
var escapeVal = readEscapeSequence();
if (escapeVal == -1) {
return _errorToken('invalid hex escape sequence');
} else {
buf.add(escapeVal);
}
} else {
buf.add(ch);
}
}
}
Token finishString(int quote) {
if (_maybeEatChar(quote)) {
if (_maybeEatChar(quote)) {
// skip an initial newline
_maybeEatChar(TokenChar.NEWLINE);
return finishMultilineString(quote);
} else {
return _makeStringToken(<int>[], false);
}
}
return finishStringBody(quote);
}
Token finishRawString(int quote) {
if (_maybeEatChar(quote)) {
if (_maybeEatChar(quote)) {
return finishMultilineRawString(quote);
} else {
return _makeStringToken(<int>[], false);
}
}
while (true) {
var ch = _nextChar();
if (ch == quote) {
return _makeRawStringToken(false);
} else if (ch == 0) {
return _errorToken();
}
}
}
Token finishMultilineRawString(int quote) {
while (true) {
var ch = _nextChar();
if (ch == 0) {
return _errorToken();
} else if (ch == quote && _maybeEatChar(quote) && _maybeEatChar(quote)) {
return _makeRawStringToken(true);
}
}
}
Token finishStringBody(int quote) {
var buf = <int>[];
while (true) {
var ch = _nextChar();
if (ch == quote) {
return _makeStringToken(buf, false);
} else if (ch == 0) {
return _errorToken();
} else if (ch == TokenChar.BACKSLASH) {
var escapeVal = readEscapeSequence();
if (escapeVal == -1) {
return _errorToken('invalid hex escape sequence');
} else {
buf.add(escapeVal);
}
} else {
buf.add(ch);
}
}
}
int readEscapeSequence() {
final ch = _nextChar();
int hexValue;
switch (ch) {
case 110 /*n*/ :
return TokenChar.NEWLINE;
case 114 /*r*/ :
return TokenChar.RETURN;
case 102 /*f*/ :
return TokenChar.FF;
case 98 /*b*/ :
return TokenChar.BACKSPACE;
case 116 /*t*/ :
return TokenChar.TAB;
case 118 /*v*/ :
return TokenChar.FF;
case 120 /*x*/ :
hexValue = readHex(2);
break;
case 117 /*u*/ :
if (_maybeEatChar(TokenChar.LBRACE)) {
hexValue = readHex();
if (!_maybeEatChar(TokenChar.RBRACE)) {
return -1;
}
} else {
hexValue = readHex(4);
}
break;
default:
return ch;
}
if (hexValue == -1) return -1;
// According to the Unicode standard the high and low surrogate halves
// used by UTF-16 (U+D800 through U+DFFF) and values above U+10FFFF
// are not legal Unicode values.
if (hexValue < 0xD800 || hexValue > 0xDFFF && hexValue <= 0xFFFF) {
return hexValue;
} else if (hexValue <= 0x10FFFF) {
messages.error('unicode values greater than 2 bytes not implemented yet',
_file.span(_startIndex, _startIndex + 1));
return -1;
} else {
return -1;
}
}
Token finishDot() {
if (TokenizerHelpers.isDigit(_peekChar())) {
eatDigits();
return finishNumberExtra(TokenKind.DOUBLE);
} else {
return _finishToken(TokenKind.DOT);
}
}
}