| // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| library polymer_expressions.tokenizer; |
| |
| const int _TAB = 9; |
| const int _LF = 10; |
| const int _VTAB = 11; |
| const int _FF = 12; |
| const int _CR = 13; |
| const int _SPACE = 32; |
| const int _BANG = 33; |
| const int _DQ = 34; |
| const int _$ = 36; |
| const int _PERCENT = 37; |
| const int _AMPERSAND = 38; |
| const int _SQ = 39; |
| const int _OPEN_PAREN = 40; |
| const int _CLOSE_PAREN = 41; |
| const int _STAR = 42; |
| const int _PLUS = 43; |
| const int _COMMA = 44; |
| const int _MINUS = 45; |
| const int _PERIOD = 46; |
| const int _SLASH = 47; |
| const int _0 = 48; |
| const int _9 = 57; |
| const int _COLON = 58; |
| const int _LT = 60; |
| const int _EQ = 61; |
| const int _GT = 62; |
| const int _QUESTION = 63; |
| const int _A = 65; |
| const int _Z = 90; |
| const int _OPEN_SQUARE_BRACKET = 91; |
| const int _BACKSLASH = 92; |
| const int _CLOSE_SQUARE_BRACKET = 93; |
| const int _CARET = 94; |
| const int _US = 95; |
| const int _a = 97; |
| const int _f = 102; |
| const int _n = 110; |
| const int _r = 114; |
| const int _t = 116; |
| const int _v = 118; |
| const int _z = 122; |
| const int _OPEN_CURLY_BRACKET = 123; |
| const int _BAR = 124; |
| const int _CLOSE_CURLY_BRACKET = 125; |
| const int _NBSP = 160; |
| |
| const _OPERATORS = const [_PLUS, _MINUS, _STAR, _SLASH, _BANG, _AMPERSAND, |
| _PERCENT, _LT, _EQ, _GT, _QUESTION, _CARET, _BAR]; |
| |
| const _GROUPERS = const [_OPEN_PAREN, _CLOSE_PAREN, |
| _OPEN_SQUARE_BRACKET, _CLOSE_SQUARE_BRACKET, |
| _OPEN_CURLY_BRACKET, _CLOSE_CURLY_BRACKET]; |
| |
| const _TWO_CHAR_OPS = const ['==', '!=', '<=', '>=', '||', '&&']; |
| |
| const _KEYWORDS = const ['in', 'this']; |
| |
| const _PRECEDENCE = const { |
| '!': 0, |
| ':': 0, |
| ',': 0, |
| ')': 0, |
| ']': 0, |
| '}': 0, // ? |
| '?': 1, |
| '||': 2, |
| '&&': 3, |
| '|': 4, |
| '^': 5, |
| '&': 6, |
| |
| // equality |
| '!=': 7, |
| '==': 7, |
| |
| // relational |
| '>=': 8, |
| '>': 8, |
| '<=': 8, |
| '<': 8, |
| |
| // additive |
| '+': 9, |
| '-': 9, |
| |
| // multiplicative |
| '%': 10, |
| '/': 10, |
| '*': 10, |
| |
| // postfix |
| '(': 11, |
| '[': 11, |
| '.': 11, |
| '{': 11, //not sure this is correct |
| }; |
| |
| const POSTFIX_PRECEDENCE = 11; |
| |
| const int STRING_TOKEN = 1; |
| const int IDENTIFIER_TOKEN = 2; |
| const int DOT_TOKEN = 3; |
| const int COMMA_TOKEN = 4; |
| const int COLON_TOKEN = 5; |
| const int INTEGER_TOKEN = 6; |
| const int DECIMAL_TOKEN = 7; |
| const int OPERATOR_TOKEN = 8; |
| const int GROUPER_TOKEN = 9; |
| const int KEYWORD_TOKEN = 10; |
| |
| bool isWhitespace(int next) => next == _SPACE || next == _TAB || next == _NBSP; |
| |
| bool isIdentifierOrKeywordStart(int next) => (_a <= next && next <= _z) || |
| (_A <= next && next <= _Z) || next == _US || next == _$ || next > 127; |
| |
| bool isIdentifier(int next) => (_a <= next && next <= _z) || |
| (_A <= next && next <= _Z) || (_0 <= next && next <= _9) || |
| next == _US || next == _$ || next > 127; |
| |
| bool isQuote(int next) => next == _DQ || next == _SQ; |
| |
| bool isNumber(int next) => _0 <= next && next <= _9; |
| |
| bool isOperator(int next) => _OPERATORS.contains(next); |
| |
| bool isGrouper(int next) => _GROUPERS.contains(next); |
| |
| int escape(int c) { |
| switch (c) { |
| case _f: return _FF; |
| case _n: return _LF; |
| case _r: return _CR; |
| case _t: return _TAB; |
| case _v: return _VTAB; |
| default: return c; |
| } |
| } |
| |
| class Token { |
| final int kind; |
| final String value; |
| final int precedence; |
| |
| Token(this.kind, this.value, [this.precedence = 0]); |
| |
| String toString() => "($kind, '$value')"; |
| } |
| |
| class Tokenizer { |
| final List<Token> _tokens = <Token>[]; |
| final StringBuffer _sb = new StringBuffer(); |
| final RuneIterator _iterator; |
| |
| int _next; |
| |
| Tokenizer(String input) : _iterator = new RuneIterator(input); |
| |
| _advance() { |
| _next = _iterator.moveNext() ? _iterator.current : null; |
| } |
| |
| List<Token> tokenize() { |
| _advance(); |
| while(_next != null) { |
| if (isWhitespace(_next)) { |
| _advance(); |
| } else if (isQuote(_next)) { |
| tokenizeString(); |
| } else if (isIdentifierOrKeywordStart(_next)) { |
| tokenizeIdentifierOrKeyword(); |
| } else if (isNumber(_next)) { |
| tokenizeNumber(); |
| } else if (_next == _PERIOD) { |
| tokenizeDot(); |
| } else if (_next == _COMMA) { |
| tokenizeComma(); |
| } else if (_next == _COLON) { |
| tokenizeColon(); |
| } else if (isOperator(_next)) { |
| tokenizeOperator(); |
| } else if (isGrouper(_next)) { |
| tokenizeGrouper(); |
| } else { |
| _advance(); |
| } |
| } |
| return _tokens; |
| } |
| |
| tokenizeString() { |
| int quoteChar = _next; |
| _advance(); |
| while (_next != quoteChar) { |
| if (_next == null) throw new ParseException("unterminated string"); |
| if (_next == _BACKSLASH) { |
| _advance(); |
| if (_next == null) throw new ParseException("unterminated string"); |
| _sb.writeCharCode(escape(_next)); |
| } else { |
| _sb.writeCharCode(_next); |
| } |
| _advance(); |
| } |
| _tokens.add(new Token(STRING_TOKEN, _sb.toString())); |
| _sb.clear(); |
| _advance(); |
| } |
| |
| tokenizeIdentifierOrKeyword() { |
| while (_next != null && isIdentifier(_next)) { |
| _sb.writeCharCode(_next); |
| _advance(); |
| } |
| var value = _sb.toString(); |
| if (_KEYWORDS.contains(value)) { |
| _tokens.add(new Token(KEYWORD_TOKEN, value)); |
| } else { |
| _tokens.add(new Token(IDENTIFIER_TOKEN, value)); |
| } |
| _sb.clear(); |
| } |
| |
| tokenizeNumber() { |
| while (_next != null && isNumber(_next)) { |
| _sb.writeCharCode(_next); |
| _advance(); |
| } |
| if (_next == _PERIOD) { |
| tokenizeDot(); |
| } else { |
| _tokens.add(new Token(INTEGER_TOKEN, _sb.toString())); |
| _sb.clear(); |
| } |
| } |
| |
| tokenizeDot() { |
| _advance(); |
| if (isNumber(_next)) { |
| tokenizeFraction(); |
| } else { |
| _tokens.add(new Token(DOT_TOKEN, '.', POSTFIX_PRECEDENCE)); |
| } |
| } |
| |
| tokenizeComma() { |
| _advance(); |
| _tokens.add(new Token(COMMA_TOKEN, ',')); |
| } |
| |
| tokenizeColon() { |
| _advance(); |
| _tokens.add(new Token(COLON_TOKEN, ':')); |
| } |
| |
| tokenizeFraction() { |
| _sb.writeCharCode(_PERIOD); |
| while (_next != null && isNumber(_next)) { |
| _sb.writeCharCode(_next); |
| _advance(); |
| } |
| _tokens.add(new Token(DECIMAL_TOKEN, _sb.toString())); |
| _sb.clear(); |
| } |
| |
| tokenizeOperator() { |
| int startChar = _next; |
| _advance(); |
| var op; |
| // check for 2 character operators |
| if (isOperator(_next)) { |
| var op2 = new String.fromCharCodes([startChar, _next]); |
| if (_TWO_CHAR_OPS.contains(op2)) { |
| op = op2; |
| _advance(); |
| } else { |
| op = new String.fromCharCode(startChar); |
| } |
| } else { |
| op = new String.fromCharCode(startChar); |
| } |
| _tokens.add(new Token(OPERATOR_TOKEN, op, _PRECEDENCE[op])); |
| } |
| |
| tokenizeGrouper() { |
| var value = new String.fromCharCode(_next); |
| _tokens.add(new Token(GROUPER_TOKEN, value, _PRECEDENCE[value])); |
| _advance(); |
| } |
| } |
| |
| class ParseException implements Exception { |
| final String message; |
| ParseException(this.message); |
| String toString() => "ParseException: $message"; |
| } |