blob: cb1b516eaac93f0beb4b0d99be4801e9327d056b [file] [log] [blame]
// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
class Tokenizer extends TokenizerBase {
TokenKind tmplTokens;
bool _selectorParsing;
Tokenizer(SourceFile source, bool skipWhitespace, [int index = 0])
: super(source, skipWhitespace, index), _selectorParsing = false {
tmplTokens = new TokenKind();
}
int get startIndex => _startIndex;
void set index(int idx) {
_index = idx;
}
Token next([bool inTag = true]) {
// keep track of our starting position
_startIndex = _index;
if (_interpStack != null && _interpStack.depth == 0) {
var istack = _interpStack;
_interpStack = _interpStack.pop();
/* TODO(terry): Enable for variable and string interpolation.
* if (istack.isMultiline) {
* return finishMultilineStringBody(istack.quote);
* } else {
* return finishStringBody(istack.quote);
* }
*/
}
int ch;
ch = _nextChar();
switch(ch) {
case 0:
return _finishToken(TokenKind.END_OF_FILE);
case tmplTokens.tokens[TokenKind.SPACE]:
case tmplTokens.tokens[TokenKind.TAB]:
case tmplTokens.tokens[TokenKind.NEWLINE]:
case tmplTokens.tokens[TokenKind.RETURN]:
if (inTag) {
return finishWhitespace();
} else {
return _finishToken(TokenKind.WHITESPACE);
}
case tmplTokens.tokens[TokenKind.END_OF_FILE]:
return _finishToken(TokenKind.END_OF_FILE);
case tmplTokens.tokens[TokenKind.LPAREN]:
return _finishToken(TokenKind.LPAREN);
case tmplTokens.tokens[TokenKind.RPAREN]:
return _finishToken(TokenKind.RPAREN);
case tmplTokens.tokens[TokenKind.COMMA]:
return _finishToken(TokenKind.COMMA);
case tmplTokens.tokens[TokenKind.LBRACE]:
return _finishToken(TokenKind.LBRACE);
case tmplTokens.tokens[TokenKind.RBRACE]:
return _finishToken(TokenKind.RBRACE);
case tmplTokens.tokens[TokenKind.LESS_THAN]:
return _finishToken(TokenKind.LESS_THAN);
case tmplTokens.tokens[TokenKind.GREATER_THAN]:
return _finishToken(TokenKind.GREATER_THAN);
case tmplTokens.tokens[TokenKind.EQUAL]:
if (inTag) {
if (_maybeEatChar(tmplTokens.tokens[TokenKind.SINGLE_QUOTE])) {
return finishQuotedAttrValue(
tmplTokens.tokens[TokenKind.SINGLE_QUOTE]);
} else if (_maybeEatChar(tmplTokens.tokens[TokenKind.DOUBLE_QUOTE])) {
return finishQuotedAttrValue(
tmplTokens.tokens[TokenKind.DOUBLE_QUOTE]);
} else if (TokenizerHelpers.isAttributeValueStart(_peekChar())) {
return finishAttrValue();
}
}
return _finishToken(TokenKind.EQUAL);
case tmplTokens.tokens[TokenKind.SLASH]:
if (_maybeEatChar(tmplTokens.tokens[TokenKind.GREATER_THAN])) {
return _finishToken(TokenKind.END_NO_SCOPE_TAG); // />
} else if (_maybeEatChar(tmplTokens.tokens[TokenKind.ASTERISK])) {
return finishMultiLineComment();
} else {
return _finishToken(TokenKind.SLASH);
}
case tmplTokens.tokens[TokenKind.DOLLAR]:
if (_maybeEatChar(tmplTokens.tokens[TokenKind.LBRACE])) {
if (_maybeEatChar(tmplTokens.tokens[TokenKind.HASH])) {
return _finishToken(TokenKind.START_COMMAND); // ${#
} else if (_maybeEatChar(tmplTokens.tokens[TokenKind.SLASH])) {
return _finishToken(TokenKind.END_COMMAND); // ${/
} else {
return _finishToken(TokenKind.START_EXPRESSION); // ${
}
} else {
return _finishToken(TokenKind.DOLLAR);
}
default:
if (TokenizerHelpers.isIdentifierStart(ch)) {
return this.finishIdentifier();
} else if (TokenizerHelpers.isDigit(ch)) {
return this.finishNumber();
} else {
return _errorToken();
}
}
}
// TODO(jmesserly): we need a way to emit human readable error messages from
// the tokenizer.
Token _errorToken([String message = null]) {
return _finishToken(TokenKind.ERROR);
}
int getIdentifierKind() {
// Is the identifier an element?
int tokId = TokenKind.matchElements(_text, _startIndex,
_index - _startIndex);
if (tokId == -1) {
// No, is it an attribute?
// tokId = TokenKind.matchAttributes(_text, _startIndex, _index - _startIndex);
}
if (tokId == -1) {
tokId = TokenKind.matchKeywords(_text, _startIndex, _index - _startIndex);
}
return tokId >= 0 ? tokId : TokenKind.IDENTIFIER;
}
// Need to override so CSS version of isIdentifierPart is used.
Token finishIdentifier() {
while (_index < _text.length) {
// if (!TokenizerHelpers.isIdentifierPart(_text.codeUnitAt(_index++))) {
if (!TokenizerHelpers.isIdentifierPart(_text.codeUnitAt(_index))) {
// _index--;
break;
} else {
_index += 1;
}
}
if (_interpStack != null && _interpStack.depth == -1) {
_interpStack.depth = 0;
}
int kind = getIdentifierKind();
if (kind == TokenKind.IDENTIFIER) {
return _finishToken(TokenKind.IDENTIFIER);
} else {
return _finishToken(kind);
}
}
Token _makeAttributeValueToken(List<int> buf) {
final s = new String.fromCharCodes(buf);
return new LiteralToken(TokenKind.ATTR_VALUE, _source, _startIndex, _index,
s);
}
/* quote if -1 signals to read upto first whitespace otherwise read upto
* single or double quote char.
*/
Token finishQuotedAttrValue([int quote = -1]) {
var buf = new List<int>();
while (true) {
int ch = _nextChar();
if (ch == quote) {
return _makeAttributeValueToken(buf);
} else if (ch == 0) {
return _errorToken();
} else {
buf.add(ch);
}
}
}
Token finishAttrValue() {
var buf = new List<int>();
while (true) {
int ch = _peekChar();
if (TokenizerHelpers.isWhitespace(ch) || TokenizerHelpers.isSlash(ch) ||
TokenizerHelpers.isCloseTag(ch)) {
return _makeAttributeValueToken(buf);
} else if (ch == 0) {
return _errorToken();
} else {
buf.add(_nextChar());
}
}
}
Token finishNumber() {
eatDigits();
if (_peekChar() == 46/*.*/) {
// Handle the case of 1.toString().
_nextChar();
if (TokenizerHelpers.isDigit(_peekChar())) {
eatDigits();
return _finishToken(TokenKind.DOUBLE);
} else {
_index -= 1;
}
}
return _finishToken(TokenKind.INTEGER);
}
bool maybeEatDigit() {
if (_index < _text.length && TokenizerHelpers.isDigit(
_text.codeUnitAt(_index))) {
_index += 1;
return true;
}
return false;
}
void eatHexDigits() {
while (_index < _text.length) {
if (TokenizerHelpers.isHexDigit(_text.codeUnitAt(_index))) {
_index += 1;
} else {
return;
}
}
}
bool maybeEatHexDigit() {
if (_index < _text.length && TokenizerHelpers.isHexDigit(
_text.codeUnitAt(_index))) {
_index += 1;
return true;
}
return false;
}
Token finishMultiLineComment() {
while (true) {
int ch = _nextChar();
if (ch == 0) {
return _finishToken(TokenKind.INCOMPLETE_COMMENT);
} else if (ch == 42/*'*'*/) {
if (_maybeEatChar(47/*'/'*/)) {
if (_skipWhitespace) {
return next();
} else {
return _finishToken(TokenKind.COMMENT);
}
}
} else if (ch == tmplTokens.tokens[TokenKind.MINUS]) {
/* Check if close part of Comment Definition --> (CDC). */
if (_maybeEatChar(tmplTokens.tokens[TokenKind.MINUS])) {
if (_maybeEatChar(tmplTokens.tokens[TokenKind.GREATER_THAN])) {
if (_skipWhitespace) {
return next();
} else {
return _finishToken(TokenKind.HTML_COMMENT);
}
}
}
}
}
return _errorToken();
}
}
/** Static helper methods. */
class TokenizerHelpers {
static bool isIdentifierStart(int c) {
return ((c >= 97/*a*/ && c <= 122/*z*/) ||
(c >= 65/*A*/ && c <= 90/*Z*/) || c == 95/*_*/);
}
static bool isDigit(int c) {
return (c >= 48/*0*/ && c <= 57/*9*/);
}
static bool isHexDigit(int c) {
return (isDigit(c) || (c >= 97/*a*/ && c <= 102/*f*/) ||
(c >= 65/*A*/ && c <= 70/*F*/));
}
static bool isWhitespace(int c) {
return (c == 32/*' '*/ || c == 9/*'\t'*/ || c == 10/*'\n'*/ ||
c == 13/*'\r'*/);
}
static bool isIdentifierPart(int c) {
return (isIdentifierStart(c) || isDigit(c) || c == 45/*-*/ ||
c == 58/*:*/ || c == 46/*.*/);
}
static bool isInterpIdentifierPart(int c) {
return (isIdentifierStart(c) || isDigit(c));
}
static bool isAttributeValueStart(int c) {
return !isWhitespace(c) && !isSlash(c) && !isCloseTag(c);
}
static bool isSlash(int c) {
return (c == 47/* / */);
}
static bool isCloseTag(int c) {
return (c == 62/* > */);
}
}