Move standalone whitespace removal into parse, merge text nodes during parse
diff --git a/lib/src/parse.dart b/lib/src/parse.dart
index d4dcccf..6a92296 100644
--- a/lib/src/parse.dart
+++ b/lib/src/parse.dart
@@ -5,13 +5,14 @@
_Node _parse(String source, bool lenient, String templateName,
Delimiters delimiters) {
+
if (source == null) throw new ArgumentError.notNull('Template source');
- var tokens = _scan(source, lenient, delimiters);
- var ast = _parseTokens(tokens, lenient, templateName);
- return ast;
-}
-
-_Node _parseTokens(List<_Token> tokens, bool lenient, String templateName) {
+
+ var tokens =
+ new _Scanner(source, templateName, delimiters, lenient: lenient).scan();
+
+ tokens = _removeStandaloneWhitespace(tokens);
+ tokens = _mergeAdjacentText(tokens);
var stack = new List<_Node>()..add(new _Node(_OPEN_SECTION, 'root', 0, 0));
@@ -64,3 +65,109 @@
templateName, t.line, t.column);
}
}
+
+// Takes a list of tokens, and removes _NEWLINE, and _WHITESPACE tokens.
+// This is used to implement mustache standalone lines.
+// Where TAG is one of: OPEN_SECTION, INV_SECTION, CLOSE_SECTION
+// LINE_END, [WHITESPACE], TAG, [WHITESPACE], LINE_END => LINE_END, TAG
+// WHITESPACE => TEXT
+// LINE_END => TEXT
+// TODO could rewrite this to use a generator, rather than creating an inter-
+// mediate list.
+List<_Token> _removeStandaloneWhitespace(List<_Token> tokens) {
+ int i = 0;
+ _Token read() { var ret = i < tokens.length ? tokens[i++] : null; /* print('Read: $ret'); */ return ret; }
+ _Token peek([int n = 0]) => i + n < tokens.length ? tokens[i + n] : null;
+
+ bool isTag(token) => token != null
+ && const [_OPEN_SECTION, _OPEN_INV_SECTION, _CLOSE_SECTION, _COMMENT,
+ _PARTIAL, _CHANGE_DELIMITER].contains(token.type);
+
+ bool isWhitespace(token) => token != null && token.type == _WHITESPACE;
+ bool isLineEnd(token) => token != null && token.type == _LINE_END;
+
+ var result = new List<_Token>();
+ add(token) => result.add(token);
+
+ standaloneLineCheck() {
+ // Swallow leading whitespace
+ // Note, the scanner will only ever create a single whitespace token. There
+ // is no need to handle multiple whitespace tokens.
+ if (isWhitespace(peek())
+ && isTag(peek(1))
+ && (isLineEnd(peek(2)) || peek(2) == null)) { // null == EOF
+ read();
+ } else if (isWhitespace(peek())
+ && isTag(peek(1))
+ && isWhitespace(peek(2))
+ && (isLineEnd(peek(3)) || peek(3) == null)) {
+ read();
+ }
+
+ if ((isTag(peek()) && isLineEnd(peek(1)))
+ || (isTag(peek())
+ && isWhitespace(peek(1))
+ && (isLineEnd(peek(2)) || peek(2) == null))) {
+
+ // Add tag
+ add(read());
+
+ // Swallow trailing whitespace.
+ if (isWhitespace(peek()))
+ read();
+
+ // Swallow line end.
+ assert(isLineEnd(peek()));
+ read();
+
+ standaloneLineCheck(); //FIXME don't use recursion.
+ }
+ }
+
+ // Handle case where first line is a standalone tag.
+ standaloneLineCheck();
+
+ var t;
+ while ((t = read()) != null) {
+ if (t.type == _LINE_END) {
+ // Convert line end to text token
+ add(new _Token(_TEXT, t.value, t.line, t.column));
+ standaloneLineCheck();
+ } else if (t.type == _WHITESPACE) {
+ // Convert whitespace to text token
+ add(new _Token(_TEXT, t.value, t.line, t.column));
+ } else {
+ // Preserve token
+ add(t);
+ }
+ }
+
+ return result;
+}
+
+// Merging adjacent text nodes will improve the render speed, but slow down
+// parsing. It will be beneficial where templates are parsed once and rendered
+// a number of times.
+List<_Token> _mergeAdjacentText(List<_Token> tokens) {
+ if (tokens.isEmpty) return <_Token>[];
+
+ var result = new List<_Token>();
+ int i = 0;
+ while(i < tokens.length) {
+ var t = tokens[i];
+
+ if (t.type != _TEXT
+ || (i < tokens.length - 1 && tokens[i + 1].type != _TEXT)) {
+ result.add(tokens[i]);
+ i++;
+ } else {
+ var buffer = new StringBuffer();
+ while(i < tokens.length && tokens[i].type == _TEXT) {
+ buffer.write(tokens[i].value);
+ i++;
+ }
+ result.add(new _Token(_TEXT, buffer.toString(), t.line, t.column));
+ }
+ }
+ return result;
+}
diff --git a/lib/src/scanner.dart b/lib/src/scanner.dart
index ba7670a..61218b4 100644
--- a/lib/src/scanner.dart
+++ b/lib/src/scanner.dart
@@ -1,8 +1,5 @@
part of mustache;
-List<_Token> _scan(String source, bool lenient, Delimiters delimiters)
- => _trim(new _Scanner(source, null, delimiters).scan());
-
const int _EOF = -1;
const int _TAB = 9;
const int _NEWLINE = 10;
@@ -23,96 +20,23 @@
const int _OPEN_MUSTACHE = 123;
const int _CLOSE_MUSTACHE = 125;
-// Takes a list of tokens, and removes _NEWLINE, and _WHITESPACE tokens.
-// This is used to implement mustache standalone lines.
-// Where TAG is one of: OPEN_SECTION, INV_SECTION, CLOSE_SECTION
-// LINE_END, [WHITESPACE], TAG, [WHITESPACE], LINE_END => LINE_END, TAG
-// WHITESPACE => TEXT
-// LINE_END => TEXT
-//TODO Consecutive text tokens will also be merged into a single token. (Do in a separate merge func).
-List<_Token> _trim(List<_Token> tokens) {
- int i = 0;
- _Token read() { var ret = i < tokens.length ? tokens[i++] : null; /* print('Read: $ret'); */ return ret; }
- _Token peek([int n = 0]) => i + n < tokens.length ? tokens[i + n] : null;
-
- bool isTag(token) => token != null
- && const [_OPEN_SECTION, _OPEN_INV_SECTION, _CLOSE_SECTION, _COMMENT,
- _PARTIAL, _CHANGE_DELIMITER].contains(token.type);
-
- bool isWhitespace(token) => token != null && token.type == _WHITESPACE;
- bool isLineEnd(token) => token != null && token.type == _LINE_END;
-
- var result = new List<_Token>();
- add(token) => result.add(token);
-
- standaloneLineCheck() {
- // Swallow leading whitespace
- // Note, the scanner will only ever create a single whitespace token. There
- // is no need to handle multiple whitespace tokens.
- if (isWhitespace(peek())
- && isTag(peek(1))
- && (isLineEnd(peek(2)) || peek(2) == null)) { // null == EOF
- read();
- } else if (isWhitespace(peek())
- && isTag(peek(1))
- && isWhitespace(peek(2))
- && (isLineEnd(peek(3)) || peek(3) == null)) {
- read();
- }
-
- if ((isTag(peek()) && isLineEnd(peek(1)))
- || (isTag(peek())
- && isWhitespace(peek(1))
- && (isLineEnd(peek(2)) || peek(2) == null))) {
-
- // Add tag
- add(read());
-
- // Swallow trailing whitespace.
- if (isWhitespace(peek()))
- read();
-
- // Swallow line end.
- assert(isLineEnd(peek()));
- read();
-
- standaloneLineCheck(); //FIXME don't use recursion.
- }
- }
-
- // Handle case where first line is a standalone tag.
- standaloneLineCheck();
-
- var t;
- while ((t = read()) != null) {
- if (t.type == _LINE_END) {
- // Convert line end to text token
- add(new _Token(_TEXT, t.value, t.line, t.column));
- standaloneLineCheck();
- } else if (t.type == _WHITESPACE) {
- // Convert whitespace to text token
- add(new _Token(_TEXT, t.value, t.line, t.column));
- } else {
- // Preserve token
- add(t);
- }
- }
-
- return result;
-}
-
class _Scanner {
- _Scanner(String source, [this._templateName, Delimiters initial])
+ _Scanner(String source, this._templateName, Delimiters initial, {bool lenient: true})
: _r = new _CharReader(source),
+ _lenient = lenient,
_openDelimiter = (initial == null) ? _OPEN_MUSTACHE : initial.open,
_openDelimiterInner =
(initial == null) ? _OPEN_MUSTACHE : initial.openInner,
_closeDelimiterInner =
(initial == null) ? _CLOSE_MUSTACHE : initial.closeInner,
- _closeDelimiter = (initial == null) ? _CLOSE_MUSTACHE : initial.close;
+ _closeDelimiter = (initial == null) ? _CLOSE_MUSTACHE : initial.close;
final String _templateName;
+
+ //FIXME not used yet.
+ final bool _lenient;
+
_CharReader _r;
List<_Token> _tokens = new List<_Token>();
@@ -125,10 +49,11 @@
List<_Token> scan() {
while(true) {
int c = _peek();
- if (c == _EOF) return _tokens;
+ if (c == _EOF) break;
else if (c == _openDelimiter) _scanMustacheTag();
else _scanText();
}
+ return _tokens;
}
int _read() => _r.read();