part of mustache.impl; | |
class Scanner { | |
Scanner(String source, this._templateName, String delimiters, {bool lenient: true}) | |
: _source = source, | |
_lenient = lenient, | |
_itr = source.runes.iterator { | |
var delims = _parseDelimiterString(delimiters); | |
_openDelimiter = delims[0]; | |
_openDelimiterInner = delims[1]; | |
_closeDelimiterInner = delims[2]; | |
_closeDelimiter = delims[3]; | |
if (source == '') { | |
_c = _EOF; | |
} else { | |
_itr.moveNext(); | |
_c = _itr.current; | |
} | |
} | |
final String _templateName; | |
final String _source; | |
final bool _lenient; | |
final Iterator<int> _itr; | |
int _offset = 0; | |
int _c = 0; | |
final List<Token> _tokens = new List<Token>(); | |
// These can be changed by the change delimiter tag. | |
int _openDelimiter; | |
int _openDelimiterInner; | |
int _closeDelimiterInner; | |
int _closeDelimiter; | |
List<Token> scan() { | |
while(true) { | |
int c = _peek(); | |
if (c == _EOF) break; | |
else if (c == _openDelimiter) _scanMustacheTag(); | |
else _scanText(); | |
} | |
return _tokens; | |
} | |
int _peek() => _c; | |
int _read() { | |
var c = _c; | |
if (_itr.moveNext()) { | |
_offset++; | |
_c = _itr.current; | |
} else { | |
_c = _EOF; | |
} | |
return c; | |
} | |
String _readWhile(bool test(int charCode), [Function endOfFile]) { | |
int start = _offset; | |
while (_peek() != _EOF && test(_peek())) { | |
_read(); | |
} | |
if (_peek() == _EOF && endOfFile != null) endOfFile(); | |
int end = _peek() == _EOF ? _source.length : _offset; | |
return _source.substring(start, end); | |
} | |
_expect(int expectedCharCode) { | |
int c = _read(); | |
if (c == _EOF) { | |
throw new TemplateException('Unexpected end of input', | |
_templateName, _source, _offset); | |
} else if (c != expectedCharCode) { | |
throw new TemplateException('Unexpected character, ' | |
'expected: ${new String.fromCharCode(expectedCharCode)} ($expectedCharCode), ' | |
'was: ${new String.fromCharCode(c)} ($c)', | |
_templateName, _source, _offset); | |
} | |
} | |
bool _isWhitespace(int c) | |
=> const [_SPACE, _TAB , _NEWLINE, _RETURN].contains(c); | |
// A sigil is the word commonly used to describe the special character at the | |
// start of mustache tag i.e. #, ^ or /. | |
bool _isSigil(int c) | |
=> const [_HASH, _CARET, _FORWARD_SLASH, _GT, _AMP, _EXCLAIM, _EQUAL] | |
.contains(c); | |
bool _isAlphanum(int c) | |
=> (c >= _a && c <= _z) | |
|| (c >= _A && c <= _Z) | |
|| (c >= _0 && c <= _9) | |
|| c == _MINUS | |
|| c == _UNDERSCORE | |
|| c == _PERIOD; | |
_scanText() { | |
while(true) { | |
int c = _peek(); | |
int start = _offset; | |
if (c == _EOF) { | |
return; | |
} else if (c == _openDelimiter) { | |
return; | |
// Newlines and whitespace have separate tokens so the standalone lines | |
// logic can be implemented. | |
} else if (c == _NEWLINE) { | |
_read(); | |
var value = new String.fromCharCode(c); | |
_tokens.add(new Token(_LINE_END, value, start, _offset)); | |
} else if (c == _RETURN) { | |
_read(); | |
if (_peek() == _NEWLINE) { | |
_read(); | |
_tokens.add(new Token(_LINE_END, '\r\n', start, _offset)); | |
} else { | |
_tokens.add(new Token(_TEXT, '\r', start, _offset)); | |
} | |
} else if (c == _SPACE || c == _TAB) { | |
var value = _readWhile((c) => c == _SPACE || c == _TAB); | |
_tokens.add(new Token(_WHITESPACE, value, start, _offset)); | |
} else { | |
var value = _readWhile((c) => c != _openDelimiter && c != _NEWLINE); | |
_tokens.add(new Token(_TEXT, value, start, _offset)); | |
} | |
} | |
} | |
void _scanMustacheTag() { | |
int start = _offset; | |
int sigil = 0; | |
_expect(_openDelimiter); | |
// If just a single delimeter character then this is a text token. | |
if (_openDelimiterInner != null && _peek() != _openDelimiterInner) { | |
var value = new String.fromCharCode(_openDelimiter); | |
_tokens.add(new Token(_TEXT, value, start, _offset)); | |
return; | |
} | |
if (_openDelimiterInner != null) _expect(_openDelimiterInner); | |
if (_peek() == _OPEN_MUSTACHE) { | |
_scanTripleMustacheTag(start); | |
return; | |
} | |
_scanTagWhitespace(); | |
if (_isSigil(_peek())) sigil = _read(); | |
if (sigil == _EQUAL) { | |
_scanChangeDelimiterTag(start); | |
return; | |
} else if (sigil == _EXCLAIM) { | |
_scanCommentTag(start); | |
return; | |
} | |
_scanTagWhitespace(); | |
var identifier = _scanTagIdentifier(); | |
if (identifier.isEmpty) throw _error('Expected tag identifier.'); | |
_scanTagWhitespace(); | |
if (_closeDelimiterInner != null) _expect(_closeDelimiterInner); | |
_expect(_closeDelimiter); | |
const sigils = const <int, int> { | |
0: _VARIABLE, | |
_HASH: _OPEN_SECTION, | |
_FORWARD_SLASH: _CLOSE_SECTION, | |
_CARET: _OPEN_INV_SECTION, | |
_GT: _PARTIAL, | |
_AMP: _UNESC_VARIABLE | |
}; | |
var type = sigils[sigil]; | |
var indent = type == _PARTIAL ? _getPrecedingWhitespace() : ''; | |
_tokens.add(new Token(type, identifier, start, _offset, indent: indent)); | |
} | |
_errorEofInTag() => throw _error('Tag not closed before the end of the template.'); | |
_scanTagWhitespace() { | |
if (_lenient) { | |
_readWhile(_isWhitespace, _errorEofInTag); | |
} else { | |
_readWhile((c) => c == _SPACE, _errorEofInTag); | |
if (_isWhitespace(_peek())) | |
throw _error('Tags may not contain newlines or tabs.'); | |
} | |
} | |
String _scanTagIdentifier({bool tripleMo: false}) { | |
var delim = _closeDelimiterInner != null | |
? _closeDelimiterInner | |
: _closeDelimiter; | |
if (_lenient) { | |
return _readWhile( | |
(c) => c != delim | |
|| tripleMo && c != _CLOSE_MUSTACHE, | |
_errorEofInTag).trim(); | |
} else { | |
var id = _readWhile(_isAlphanum, _errorEofInTag); | |
_scanTagWhitespace(); | |
if (_peek() != delim) throw _error('Unless in lenient mode tags may only ' | |
'contain the characters a-z, A-Z, minus, underscore and period.'); | |
return id; | |
} | |
} | |
// Capture whitespace preceding a partial tag so it can used for indentation | |
// during rendering. | |
String _getPrecedingWhitespace() { | |
var indent = ''; | |
if (_tokens.isNotEmpty) { | |
if (_tokens.length == 1 && _tokens.last.type == _WHITESPACE) { | |
indent = _tokens.last.value; | |
} else if (_tokens.length > 1) { | |
if (_tokens.last.type == _WHITESPACE | |
&& _tokens[_tokens.length - 2].type == _NEWLINE) { | |
indent = _tokens.last.value; | |
} | |
} | |
} | |
return indent; | |
} | |
void _scanTripleMustacheTag(int start) { | |
_expect(_OPEN_MUSTACHE); | |
var value = _scanTagIdentifier(); | |
_expect(_CLOSE_MUSTACHE); | |
if (_closeDelimiterInner != null) _expect(_closeDelimiterInner); | |
_expect(_closeDelimiter); | |
_tokens.add(new Token(_UNESC_VARIABLE, value, start, _offset)); | |
} | |
void _scanCommentTag(int start) { | |
var value = _closeDelimiterInner != null | |
? _readWhile((c) => c != _closeDelimiterInner, _errorEofInTag).trim() | |
: _readWhile((c) => c != _closeDelimiter, _errorEofInTag).trim(); | |
if (_closeDelimiterInner != null) _expect(_closeDelimiterInner); | |
_expect(_closeDelimiter); | |
_tokens.add(new Token(_COMMENT, value, start, _offset)); | |
} | |
//TODO consider changing the parsing here to use a regexp. It will probably | |
// be simpler to read. | |
void _scanChangeDelimiterTag(int start) { | |
// Open delimiter characters and = have already been read. | |
var delimiterInner = _closeDelimiterInner; | |
var delimiter = _closeDelimiter; | |
_scanTagWhitespace(); | |
int c; | |
c = _read(); | |
if (c == _EQUAL) throw _error('Incorrect change delimiter tag.'); | |
_openDelimiter = c; | |
c = _read(); | |
if (_isWhitespace(c)) { | |
_openDelimiterInner = null; | |
} else { | |
_openDelimiterInner = c; | |
} | |
_scanTagWhitespace(); | |
c = _read(); | |
if (_isWhitespace(c) || c == _EQUAL) | |
throw _error('Incorrect change delimiter tag.'); | |
if (_isWhitespace(_peek()) || _peek() == _EQUAL) { | |
_closeDelimiterInner = null; | |
_closeDelimiter = c; | |
} else { | |
_closeDelimiterInner = c; | |
_closeDelimiter = _read(); | |
} | |
_scanTagWhitespace(); | |
_expect(_EQUAL); | |
_scanTagWhitespace(); | |
_expect(delimiterInner); | |
_expect(delimiter); | |
var value = _delimiterString( | |
_openDelimiter, | |
_openDelimiterInner, | |
_closeDelimiterInner, | |
_closeDelimiter); | |
_tokens.add(new Token(_CHANGE_DELIMITER, value, start, _offset)); | |
} | |
m.TemplateException _error(String message) { | |
return new TemplateException(message, _templateName, _source, _offset); | |
} | |
} | |
_delimiterString(int open, int openInner, int closeInner, int close) { | |
var buffer = new StringBuffer(); | |
buffer.writeCharCode(open); | |
if (openInner != null) buffer.writeCharCode(openInner); | |
buffer.write(' '); | |
if (closeInner != null) buffer.writeCharCode(closeInner); | |
buffer.writeCharCode(close); | |
return buffer.toString(); | |
} | |
List<int> _parseDelimiterString(String s) { | |
if (s == null) return [_OPEN_MUSTACHE, _OPEN_MUSTACHE, | |
_CLOSE_MUSTACHE, _CLOSE_MUSTACHE]; | |
if (s.length == 3) { | |
return [s.codeUnits[0], null, null, s.codeUnits[2]]; | |
} else if (s.length == 5) { | |
return [s.codeUnits[0], | |
s.codeUnits[1], | |
s.codeUnits[3], | |
s.codeUnits[4]]; | |
} else { | |
throw new TemplateException( | |
'Invalid delimiter string $s', null, null, null); | |
} | |
} | |
const int _EOF = -1; | |
const int _TAB = 9; | |
const int _NEWLINE = 10; | |
const int _RETURN = 13; | |
const int _SPACE = 32; | |
const int _EXCLAIM = 33; | |
const int _QUOTE = 34; | |
const int _APOS = 39; | |
const int _HASH = 35; | |
const int _AMP = 38; | |
const int _PERIOD = 46; | |
const int _FORWARD_SLASH = 47; | |
const int _LT = 60; | |
const int _EQUAL = 61; | |
const int _GT = 62; | |
const int _CARET = 94; | |
const int _OPEN_MUSTACHE = 123; | |
const int _CLOSE_MUSTACHE = 125; | |
const int _A = 65; | |
const int _Z = 90; | |
const int _a = 97; | |
const int _z = 122; | |
const int _0 = 48; | |
const int _9 = 57; | |
const int _UNDERSCORE = 95; | |
const int _MINUS = 45; |