blob: 83be433b2fa81b9667cef020dbf7e04bed17f7a6 [file] [log] [blame]
part of mustache;
List<_Token> _scan(String source, bool lenient) => _trim(new _Scanner(source).scan());
//FIXME use enums
const int _TEXT = 1;
const int _VARIABLE = 2;
const int _PARTIAL = 3;
const int _OPEN_SECTION = 4;
const int _OPEN_INV_SECTION = 5;
const int _CLOSE_SECTION = 6;
const int _COMMENT = 7;
const int _UNESC_VARIABLE = 8;
const int _WHITESPACE = 9; // Should be filtered out, before returned by scan.
const int _LINE_END = 10; // Should be filtered out, before returned by scan.
_tokenTypeString(int type) => [
'?',
'Text',
'Var',
'Par',
'Open',
'OpenInv',
'Close',
'Comment',
'UnescVar',
'Whitespace',
'LineEnd'][type];
const int _EOF = -1;
const int _TAB = 9;
const int _NEWLINE = 10;
const int _RETURN = 13;
const int _SPACE = 32;
const int _EXCLAIM = 33;
const int _QUOTE = 34;
const int _APOS = 39;
const int _HASH = 35;
const int _AMP = 38;
const int _PERIOD = 46;
const int _FORWARD_SLASH = 47;
const int _LT = 60;
const int _GT = 62;
const int _CARET = 94;
const int _OPEN_MUSTACHE = 123;
const int _CLOSE_MUSTACHE = 125;
// Takes a list of tokens, and removes _NEWLINE, and _WHITESPACE tokens.
// This is used to implement mustache standalone lines.
// Where TAG is one of: OPEN_SECTION, INV_SECTION, CLOSE_SECTION
// LINE_END, [WHITESPACE], TAG, [WHITESPACE], LINE_END => LINE_END, TAG
// WHITESPACE => TEXT
// LINE_END => TEXT
//TODO Consecutive text tokens will also be merged into a single token. (Do in a separate merge func).
List<_Token> _trim(List<_Token> tokens) {
int i = 0;
_Token read() { var ret = i < tokens.length ? tokens[i++] : null; /* print('Read: $ret'); */ return ret; }
_Token peek([int n = 0]) => i + n < tokens.length ? tokens[i + n] : null;
bool isTag(token) =>
token != null
&& (token.type == _OPEN_SECTION
|| token.type == _OPEN_INV_SECTION
|| token.type == _CLOSE_SECTION
|| token.type == _COMMENT);
bool isWhitespace(token) => token != null && token.type == _WHITESPACE;
bool isLineEnd(token) => token != null && token.type == _LINE_END;
var result = new List<_Token>();
add(token) => result.add(token);
standaloneLineCheck() {
// Swallow leading whitespace
// Note, the scanner will only ever create a single whitespace token. There
// is no need to handle multiple whitespace tokens.
if (isWhitespace(peek())
&& isTag(peek(1))
&& (isLineEnd(peek(2)) || peek(2) == null)) { // null == EOF
read();
} else if (isWhitespace(peek())
&& isTag(peek(1))
&& isWhitespace(peek(2))
&& (isLineEnd(peek(3)) || peek(3) == null)) {
read();
}
if ((isTag(peek()) && isLineEnd(peek(1)))
|| (isTag(peek())
&& isWhitespace(peek(1))
&& (isLineEnd(peek(2)) || peek(2) == null))) {
// Add tag
add(read());
// Swallow trailing whitespace.
if (isWhitespace(peek()))
read();
// Swallow line end.
assert(isLineEnd(peek()));
read();
standaloneLineCheck(); //FIXME don't use recursion.
}
}
// Handle case where first line is a standalone tag.
standaloneLineCheck();
var t;
while ((t = read()) != null) {
if (t.type == _LINE_END) {
// Convert line end to text token
add(new _Token(_TEXT, t.value, t.line, t.column));
standaloneLineCheck();
} else if (t.type == _WHITESPACE) {
// Convert whitespace to text token
add(new _Token(_TEXT, t.value, t.line, t.column));
} else {
// Preserve token
add(t);
}
}
return result;
}
class _Token {
_Token(this.type, this.value, this.line, this.column);
final int type;
final String value;
final int line;
final int column;
toString() => "${_tokenTypeString(type)}: \"${value.replaceAll('\n', '\\n')}\" $line:$column";
}
class _Scanner {
_Scanner(String source, [this._templateName])
: _r = new _CharReader(source);
final String _templateName;
_CharReader _r;
List<_Token> _tokens = new List<_Token>();
int _read() => _r.read();
int _peek() => _r.peek();
_addStringToken(int type) {
int l = _r.line, c = _r.column;
var value = type == _TEXT ? _readLine() : _readString();
if (type != _TEXT && type != _COMMENT) value = value.trim();
_tokens.add(new _Token(type, value, l, c));
}
_addCharToken(int type, int charCode) {
int l = _r.line, c = _r.column;
var value = new String.fromCharCode(charCode);
_tokens.add(new _Token(type, value, l, c));
}
_expect(int expectedCharCode) {
int c = _read();
if (c == _EOF) {
throw new TemplateException('Unexpected end of input',
_templateName, _r.line, _r.column);
} else if (c != expectedCharCode) {
throw new TemplateException('Unexpected character, '
'expected: ${new String.fromCharCode(expectedCharCode)} ($expectedCharCode), '
'was: ${new String.fromCharCode(c)} ($c)',
_templateName, _r.line, _r.column);
}
}
String _readString() => _r.readWhile(
(c) => c != _OPEN_MUSTACHE
&& c != _CLOSE_MUSTACHE
&& c != _EOF);
String _readLine() => _r.readWhile(
(c) => c != _OPEN_MUSTACHE
&& c != _CLOSE_MUSTACHE
&& c != _EOF
&& c != _NEWLINE);
List<_Token> scan() {
while(true) {
switch(_peek()) {
case _EOF:
return _tokens;
case _OPEN_MUSTACHE:
_scanMustacheTag();
break;
default:
_scanText();
}
}
}
_scanText() {
while(true) {
switch(_peek()) {
case _EOF:
return;
case _OPEN_MUSTACHE:
return;
case _CLOSE_MUSTACHE:
_read();
_addCharToken(_TEXT, _CLOSE_MUSTACHE);
break;
case _RETURN:
_read();
if (_peek() == _NEWLINE) {
_read();
_tokens.add(new _Token(_LINE_END, '\r\n', _r.line, _r.column));
} else {
_addCharToken(_TEXT, _RETURN);
}
break;
case _NEWLINE:
_read();
_addCharToken(_LINE_END, _NEWLINE); //TODO handle \r\n
break;
case _SPACE:
case _TAB:
var value = _r.readWhile((c) => c == _SPACE || c == _TAB);
_tokens.add(new _Token(_WHITESPACE, value, _r.line, _r.column));
break;
default:
_addStringToken(_TEXT);
}
}
}
_scanMustacheTag() {
_expect(_OPEN_MUSTACHE);
// If just a single mustache, return this as a text token.
if (_peek() != _OPEN_MUSTACHE) {
_addCharToken(_TEXT, _OPEN_MUSTACHE);
return;
}
_expect(_OPEN_MUSTACHE);
// Escaped text {{{ ... }}}
if (_peek() == _OPEN_MUSTACHE) {
_read();
_addStringToken(_UNESC_VARIABLE);
_expect(_CLOSE_MUSTACHE);
_expect(_CLOSE_MUSTACHE);
_expect(_CLOSE_MUSTACHE);
return;
}
// Skip whitespace at start of tag. i.e. {{ # foo }} {{ / foo }}
_r.readWhile((c) => const [_SPACE, _TAB , _NEWLINE, _RETURN].contains(c));
switch(_peek()) {
case _EOF:
throw new TemplateException('Unexpected end of input',
_templateName, _r.line, _r.column);
// Escaped text {{& ... }}
case _AMP:
_read();
_addStringToken(_UNESC_VARIABLE);
break;
// Comment {{! ... }}
case _EXCLAIM:
_read();
_addStringToken(_COMMENT);
break;
// Partial {{> ... }}
case _GT:
_read();
_addStringToken(_PARTIAL);
break;
// Open section {{# ... }}
case _HASH:
_read();
_addStringToken(_OPEN_SECTION);
break;
// Open inverted section {{^ ... }}
case _CARET:
_read();
_addStringToken(_OPEN_INV_SECTION);
break;
// Close section {{/ ... }}
case _FORWARD_SLASH:
_read();
_addStringToken(_CLOSE_SECTION);
break;
// Variable {{ ... }}
default:
_addStringToken(_VARIABLE);
}
_expect(_CLOSE_MUSTACHE);
_expect(_CLOSE_MUSTACHE);
}
}