part of mustache;

List<_Token> _scan(String source, bool lenient) => _trim(new _Scanner(source).scan());

//FIXME use enums
const int _TEXT = 1;
const int _VARIABLE = 2;
const int _PARTIAL = 3;
const int _OPEN_SECTION = 4;
const int _OPEN_INV_SECTION = 5;
const int _CLOSE_SECTION = 6;
const int _COMMENT = 7;
const int _UNESC_VARIABLE = 8;
const int _WHITESPACE = 9; // Should be filtered out, before returned by scan.
const int _LINE_END = 10; // Should be filtered out, before returned by scan.

_tokenTypeString(int type) => [
	'?', 
	'Text',
	'Var',
	'Par',
	'Open',
	'OpenInv',
	'Close',
	'Comment',
	'UnescVar',
	'Whitespace',
	'LineEnd'][type];

const int _EOF = -1;
const int _TAB = 9;
const int _NEWLINE = 10;
const int _RETURN = 13;
const int _SPACE = 32;
const int _EXCLAIM = 33;
const int _QUOTE = 34;
const int _APOS = 39;
const int _HASH = 35;
const int _AMP = 38;
const int _PERIOD = 46;
const int _FORWARD_SLASH = 47;
const int _LT = 60;
const int _GT = 62;
const int _CARET = 94;

const int _OPEN_MUSTACHE = 123;
const int _CLOSE_MUSTACHE = 125;

// Takes a list of tokens, and removes _NEWLINE, and _WHITESPACE tokens.
// This is used to implement mustache standalone lines.
// Where TAG is one of: OPEN_SECTION, INV_SECTION, CLOSE_SECTION
// LINE_END, [WHITESPACE], TAG, [WHITESPACE], LINE_END => LINE_END, TAG
// WHITESPACE => TEXT
// LINE_END => TEXT
//TODO Consecutive text tokens will also be merged into a single token. (Do in a separate merge func).
List<_Token> _trim(List<_Token> tokens) {
	int i = 0;
	_Token read() { var ret = i < tokens.length ? tokens[i++] : null; /* print('Read: $ret'); */ return ret; }
	_Token peek([int n = 0]) => i + n < tokens.length ? tokens[i + n] : null;

	bool isTag(token) => token != null
	    && const [_OPEN_SECTION, _OPEN_INV_SECTION, _CLOSE_SECTION, _COMMENT, _PARTIAL].contains(token.type);

	bool isWhitespace(token) => token != null && token.type == _WHITESPACE;
	bool isLineEnd(token) => token != null && token.type == _LINE_END;

	var result = new List<_Token>();
	add(token) => result.add(token);

	standaloneLineCheck() {
		// Swallow leading whitespace 
		// Note, the scanner will only ever create a single whitespace token. There
		// is no need to handle multiple whitespace tokens.
		if (isWhitespace(peek())
			  && isTag(peek(1))
			  && (isLineEnd(peek(2)) || peek(2) == null)) { // null == EOF
			read();
		} else if (isWhitespace(peek())
			  && isTag(peek(1))
			  && isWhitespace(peek(2))
			  && (isLineEnd(peek(3)) || peek(3) == null)) {
			read();
		}

		if ((isTag(peek()) && isLineEnd(peek(1)))
			  || (isTag(peek()) 
			  	  && isWhitespace(peek(1))
			  	  && (isLineEnd(peek(2)) || peek(2) == null))) {			

			// Add tag
			add(read());

			// Swallow trailing whitespace.
			if (isWhitespace(peek()))
				read();

			// Swallow line end.
			assert(isLineEnd(peek()));
			read();

			standaloneLineCheck(); //FIXME don't use recursion.
		}
	}

	// Handle case where first line is a standalone tag.
	standaloneLineCheck();

	var t;
	while ((t = read()) != null) {
		if (t.type == _LINE_END) {
			// Convert line end to text token
			add(new _Token(_TEXT, t.value, t.line, t.column));
			standaloneLineCheck();
		} else if (t.type == _WHITESPACE) {
			// Convert whitespace to text token
			add(new _Token(_TEXT, t.value, t.line, t.column));
		} else {
			// Preserve token
			add(t);
		}
	}

	return result;
}

class _Token {
	_Token(this.type, this.value, this.line, this.column, {this.indent});
	
	final int type;
	final String value;	
	final int line;
	final int column;
	final String indent;
	
	// Store offsets to extract text from source for lambdas.
	// Only used for section, inverse section and close section tags.
	int offset;
	
	toString() => "${_tokenTypeString(type)}: "
	  "\"${value.replaceAll('\n', '\\n')}\" $line:$column";
}

class _Scanner {
	_Scanner(String source, [this._templateName])
	 : _r = new _CharReader(source);

	final String _templateName;
	_CharReader _r;
	List<_Token> _tokens = new List<_Token>();

	int _read() => _r.read();
	int _peek() => _r.peek();

	_addStringToken(int type) {
		int l = _r.line, c = _r.column;
		var value = type == _TEXT ? _readLine() : _readString();
		if (type != _TEXT && type != _COMMENT) value = value.trim();		
		_tokens.add(new _Token(type, value, l, c));
	}

	_addCharToken(int type, int charCode) {
		int l = _r.line, c = _r.column;
		var value = new String.fromCharCode(charCode);
		_tokens.add(new _Token(type, value, l, c));
	}

	_addPartialToken() {
    // Capture whitespace preceding a partial tag so it can used for indentation during rendering.
	  var indent = '';
	  if (_tokens.isNotEmpty) {
	    if (_tokens.length == 1 && _tokens.last.type == _WHITESPACE) {
	      indent = _tokens.last.value;
	    
	    } else if (_tokens.length > 1) {
	      if (_tokens.last.type == _WHITESPACE
	          && _tokens[_tokens.length - 2].type == _NEWLINE) {
	        indent = _tokens.last.value;
	      }
	    }
	  }
	  
	  int l = _r.line, c = _r.column;
    var value = _readString().trim();
    _tokens.add(new _Token(_PARTIAL, value, l, c, indent: indent));
	}
	
	_expect(int expectedCharCode) {
		int c = _read();

		if (c == _EOF) {
			throw new TemplateException('Unexpected end of input',
			    _templateName, _r.line, _r.column);

		} else if (c != expectedCharCode) {
			throw new TemplateException('Unexpected character, '
				'expected: ${new String.fromCharCode(expectedCharCode)} ($expectedCharCode), '
				'was: ${new String.fromCharCode(c)} ($c)', 
				_templateName, _r.line, _r.column);
		}
	}

	String _readString() => _r.readWhile(
		(c) => c != _OPEN_MUSTACHE
		    && c != _CLOSE_MUSTACHE
		    && c != _EOF);

	String _readLine() => _r.readWhile(
		(c) => c != _OPEN_MUSTACHE
		    && c != _CLOSE_MUSTACHE
		    && c != _EOF
		    && c != _NEWLINE);

	List<_Token> scan() {
		while(true) {
			switch(_peek()) {
				case _EOF:
					return _tokens;
				case _OPEN_MUSTACHE:
					_scanMustacheTag();
					break;
				default:
					_scanText();
			}
		}
	}

	_scanText() {
		while(true) {
			switch(_peek()) {
				case _EOF:
					return;
				case _OPEN_MUSTACHE:
					return;
				case _CLOSE_MUSTACHE:
					_read();
					_addCharToken(_TEXT, _CLOSE_MUSTACHE);
					break;
				case _RETURN:
					_read();
					if (_peek() == _NEWLINE) {
						_read();
						_tokens.add(new _Token(_LINE_END, '\r\n', _r.line, _r.column));
					} else {
						_addCharToken(_TEXT, _RETURN);
					}
					break;
				case _NEWLINE:
					_read();
					_addCharToken(_LINE_END, _NEWLINE); //TODO handle \r\n
					break;
				case _SPACE:
				case _TAB:
					var value = _r.readWhile((c) => c == _SPACE || c == _TAB);
					_tokens.add(new _Token(_WHITESPACE, value, _r.line, _r.column));
					break;
				default:
					_addStringToken(_TEXT);
			}
		}	
	}

	_scanMustacheTag() {
	  int startOffset = _r.offset;
	  
		_expect(_OPEN_MUSTACHE);

		// If just a single mustache, return this as a text token.
		//FIXME is this missing a read call to advance ??
		if (_peek() != _OPEN_MUSTACHE) {
			_addCharToken(_TEXT, _OPEN_MUSTACHE);
			return;
		}

		_expect(_OPEN_MUSTACHE);

    // Escaped text {{{ ... }}}
		if (_peek() == _OPEN_MUSTACHE) {
		  _read();
      _addStringToken(_UNESC_VARIABLE);
      _expect(_CLOSE_MUSTACHE);
      _expect(_CLOSE_MUSTACHE);
      _expect(_CLOSE_MUSTACHE);
      return;
		}

    // Skip whitespace at start of tag. i.e. {{ # foo }}  {{ / foo }}
		_r.readWhile((c) => const [_SPACE, _TAB , _NEWLINE, _RETURN].contains(c));
		
		switch(_peek()) {
			case _EOF:
				throw new TemplateException('Unexpected end of input',
				    _templateName, _r.line, _r.column);
  			
			// Escaped text {{& ... }}
			case _AMP:
				_read();
				_addStringToken(_UNESC_VARIABLE);
				break;

			// Comment {{! ... }}
			case _EXCLAIM:
				_read();
				_addStringToken(_COMMENT);
				break;

			// Partial {{> ... }}
			case _GT:
				_read();
				_addPartialToken();
				break;

			// Open section {{# ... }}
			case _HASH:
				_read();
				_addStringToken(_OPEN_SECTION);
				break;

			// Open inverted section {{^ ... }}
			case _CARET:
				_read();
				_addStringToken(_OPEN_INV_SECTION);
				break;

			// Close section {{/ ... }}
			case _FORWARD_SLASH:
				_read();
				_addStringToken(_CLOSE_SECTION);
		    // Store source file offset, so source substrings can be extracted for
        // lambdas.
				_tokens.last.offset = startOffset;
				break;

			// Variable {{ ... }}
			default:
				_addStringToken(_VARIABLE);
		}

		_expect(_CLOSE_MUSTACHE);
		_expect(_CLOSE_MUSTACHE);
		
		// Store source file offset, so source substrings can be extracted for
		// lambdas.
		if (_tokens.isNotEmpty) {
		  var t = _tokens.last;
		  if (t.type == _OPEN_SECTION || t.type == _OPEN_INV_SECTION) {
		    t.offset = _r.offset;
		  }
		}
	}
}

