// Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

library polymer_expressions.tokenizer;

const int _TAB = 9;
const int _LF = 10;
const int _VTAB = 11;
const int _FF = 12;
const int _CR = 13;
const int _SPACE = 32;
const int _BANG = 33;
const int _DQ = 34;
const int _$ = 36;
const int _AMPERSAND = 38;
const int _SQ = 39;
const int _OPEN_PAREN = 40;
const int _CLOSE_PAREN = 41;
const int _STAR = 42;
const int _PLUS = 43;
const int _COMMA = 44;
const int _MINUS = 45;
const int _PERIOD = 46;
const int _SLASH = 47;
const int _0 = 48;
const int _9 = 57;
const int _COLON = 58;
const int _LT = 60;
const int _EQ = 61;
const int _GT = 62;
const int _QUESTION = 63;
const int _A = 65;
const int _Z = 90;
const int _OPEN_SQUARE_BRACKET = 91;
const int _BACKSLASH = 92;
const int _CLOSE_SQUARE_BRACKET = 93;
const int _CARET = 94;
const int _US = 95;
const int _a = 97;
const int _f = 102;
const int _n = 110;
const int _r = 114;
const int _t = 116;
const int _v = 118;
const int _z = 122;
const int _OPEN_CURLY_BRACKET = 123;
const int _BAR = 124;
const int _CLOSE_CURLY_BRACKET = 125;
const int _NBSP = 160;

const _OPERATORS = const [_PLUS, _MINUS, _STAR, _SLASH, _BANG, _AMPERSAND,
                          /*_COMMA,*/ _LT, _EQ, _GT, _QUESTION, _CARET, _BAR];

const _GROUPERS = const [_OPEN_PAREN, _CLOSE_PAREN,
                         _OPEN_SQUARE_BRACKET, _CLOSE_SQUARE_BRACKET,
                         _OPEN_CURLY_BRACKET, _CLOSE_CURLY_BRACKET];

const _TWO_CHAR_OPS = const ['==', '!=', '<=', '>=', '||', '&&'];

const _KEYWORDS = const ['in', 'this'];

const _PRECEDENCE = const {
  '!':  0,
  ':':  0,
  ',':  0,
  ')':  0,
  ']':  0,
  '}':  0, // ?
  '?':  1,
  '||': 2,
  '&&': 3,
  '|':  4,
  '^':  5,
  '&':  6,

  // equality
  '!=': 7,
  '==': 7,

  // relational
  '>=': 8,
  '>':  8,
  '<=': 8,
  '<':  8,

  // additive
  '+':  9,
  '-':  9,

  // multiplicative
  '%':  10,
  '/':  10,
  '*':  10,

  // postfix
  '(':  11,
  '[':  11,
  '.':  11,
  '{': 11, //not sure this is correct
};

const POSTFIX_PRECEDENCE = 11;

const int STRING_TOKEN = 1;
const int IDENTIFIER_TOKEN = 2;
const int DOT_TOKEN = 3;
const int COMMA_TOKEN = 4;
const int COLON_TOKEN = 5;
const int INTEGER_TOKEN = 6;
const int DECIMAL_TOKEN = 7;
const int OPERATOR_TOKEN = 8;
const int GROUPER_TOKEN = 9;
const int KEYWORD_TOKEN = 10;

bool isWhitespace(int next) => next == _SPACE || next == _TAB || next == _NBSP;

bool isIdentifierOrKeywordStart(int next) => (_a <= next && next <= _z) ||
    (_A <= next && next <= _Z) || next == _US || next == _$ || next > 127;

bool isIdentifier(int next) => (_a <= next && next <= _z) ||
    (_A <= next && next <= _Z) || (_0 <= next && next <= _9) ||
    next == _US || next == _$ || next > 127;

bool isQuote(int next) => next == _DQ || next == _SQ;

bool isNumber(int next) => _0 <= next && next <= _9;

bool isOperator(int next) => _OPERATORS.contains(next);

bool isGrouper(int next) => _GROUPERS.contains(next);

int escape(int c) {
  switch (c) {
    case _f: return _FF; break;
    case _n: return _LF; break;
    case _r: return _CR; break;
    case _t: return _TAB; break;
    case _v: return _VTAB; break;
    default: return c;
  }
}

class Token {
  final int kind;
  final String value;
  final int precedence;

  Token(this.kind, this.value, [this.precedence = 0]);

  String toString() => "($kind, '$value')";
}

class Tokenizer {
  final List<Token> _tokens = <Token>[];
  final StringBuffer _sb = new StringBuffer();
  final RuneIterator _iterator;

  int _next;

  Tokenizer(String input) : _iterator = new RuneIterator(input);

  _advance() {
    _next = _iterator.moveNext() ? _iterator.current : null;
  }

  List<Token> tokenize() {
    _advance();
    while(_next != null) {
      if (isWhitespace(_next)) {
        _advance();
      } else if (isQuote(_next)) {
        tokenizeString();
      } else if (isIdentifierOrKeywordStart(_next)) {
        tokenizeIdentifierOrKeyword();
      } else if (isNumber(_next)) {
        tokenizeNumber();
      } else if (_next == _PERIOD) {
        tokenizeDot();
      } else if (_next == _COMMA) {
        tokenizeComma();
      } else if (_next == _COLON) {
        tokenizeColon();
      } else if (isOperator(_next)) {
        tokenizeOperator();
      } else if (isGrouper(_next)) {
        tokenizeGrouper();
      } else {
        _advance();
      }
    }
    return _tokens;
  }

  tokenizeString() {
    int quoteChar = _next;
    _advance();
    while (_next != quoteChar) {
      if (_next == null) throw new ParseException("unterminated string");
      if (_next == _BACKSLASH) {
        _advance();
        if (_next == null) throw new ParseException("unterminated string");
        _sb.writeCharCode(escape(_next));
      } else {
        _sb.writeCharCode(_next);
      }
      _advance();
    }
    _tokens.add(new Token(STRING_TOKEN, _sb.toString()));
    _sb.clear();
    _advance();
  }

  tokenizeIdentifierOrKeyword() {
    while (_next != null && isIdentifier(_next)) {
      _sb.writeCharCode(_next);
      _advance();
    }
    var value = _sb.toString();
    if (_KEYWORDS.contains(value)) {
      _tokens.add(new Token(KEYWORD_TOKEN, value));
    } else {
      _tokens.add(new Token(IDENTIFIER_TOKEN, value));
    }
    _sb.clear();
  }

  tokenizeNumber() {
    while (_next != null && isNumber(_next)) {
      _sb.writeCharCode(_next);
      _advance();
    }
    if (_next == _PERIOD) {
      tokenizeDot();
    } else {
      _tokens.add(new Token(INTEGER_TOKEN, _sb.toString()));
      _sb.clear();
    }
  }

  tokenizeDot() {
    _advance();
    if (isNumber(_next)) {
      tokenizeFraction();
    } else {
      _tokens.add(new Token(DOT_TOKEN, '.', POSTFIX_PRECEDENCE));
    }
  }

  tokenizeComma() {
    _advance();
    _tokens.add(new Token(COMMA_TOKEN, ','));
  }

  tokenizeColon() {
    _advance();
    _tokens.add(new Token(COLON_TOKEN, ':'));
  }

  tokenizeFraction() {
    _sb.writeCharCode(_PERIOD);
    while (_next != null && isNumber(_next)) {
      _sb.writeCharCode(_next);
      _advance();
    }
    _tokens.add(new Token(DECIMAL_TOKEN, _sb.toString()));
    _sb.clear();
  }

  tokenizeOperator() {
    int startChar = _next;
    _advance();
    var op;
    // check for 2 character operators
    if (isOperator(_next)) {
      var op2 = new String.fromCharCodes([startChar, _next]);
      if (_TWO_CHAR_OPS.contains(op2)) {
        op = op2;
        _advance();
      } else {
        op = new String.fromCharCode(startChar);
      }
    } else {
      op = new String.fromCharCode(startChar);
    }
    _tokens.add(new Token(OPERATOR_TOKEN, op, _PRECEDENCE[op]));
  }

  tokenizeGrouper() {
    var value = new String.fromCharCode(_next);
    _tokens.add(new Token(GROUPER_TOKEN, value, _PRECEDENCE[value]));
    _advance();
  }
}

class ParseException implements Exception {
  final String message;
  ParseException(this.message);
  String toString() => "ParseException: $message";
}
