Improve parser
diff --git a/lib/src/node.dart b/lib/src/node.dart
index 7d74dc3..5228486 100644
--- a/lib/src/node.dart
+++ b/lib/src/node.dart
@@ -44,7 +44,7 @@
class TextNode extends Node {
TextNode(this.text, int start, int end) : super(start, end);
-
+
final String text;
String toString() => '(TextNode "$_debugText" $start $end)';
@@ -54,6 +54,7 @@
return t.length < 50 ? t : t.substring(0, 48) + '...';
}
+ // Remove me.
// Only used for testing.
bool operator ==(o) => o is TextNode
&& text == o.text
diff --git a/lib/src/parser.dart b/lib/src/parser.dart
index 930fc63..cd55bce 100644
--- a/lib/src/parser.dart
+++ b/lib/src/parser.dart
@@ -14,6 +14,16 @@
return parser.parse();
}
+class Tag {
+ Tag(this.sigil, this.name, this.start, this.end);
+ final String sigil;
+ final String name;
+ final int start;
+ final int end;
+ //TODO parse the tag contents.
+ //final List<List<String>> arguments;
+}
+
class Parser {
Parser(this._source, this._templateName, this._delimiters, {lenient: false})
@@ -56,12 +66,49 @@
return list;
}
+ // Add a text node to top most section on the stack and merge consecutive
+ // text nodes together.
+ void _appendTextToken(Token token) {
+ assert(const [TokenType.text, TokenType.lineEnd, TokenType.whitespace]
+ .contains(token.type));
+ var children = _stack.last.children;
+ if (children.isEmpty || children.last is! TextNode) {
+ children.add(new TextNode(token.value, token.start, token.end));
+ } else {
+ var last = children.removeLast();
+ var node = new TextNode(last.text + token.value, last.start, token.end);
+ children.add(node);
+ }
+ }
+
+ // Add the node to top most section on the stack. If a section node then
+ // push it onto the stack, if a close section tag, then pop the stack.
+ void _appendTag(Tag tag, Node node) {
+ switch (tag.sigil) {
+
+ // Section and inverse section.
+ case '#':
+ case '^':
+ _stack.last.children.add(node);
+ _stack.add(node);
+ break;
+
+ // Close section tag
+ case '/':
+ if (tag.name != _stack.last.name) throw 'boom!'; //TODO error message.
+ _stack.removeLast();
+ break;
+
+ default:
+ if (node != null) _stack.last.children.add(node);
+ }
+ }
+
List<Node> parse() {
_scanner = new Scanner(_source, _templateName, _delimiters,
lenient: _lenient);
_tokens = _scanner.scan();
- _tokens = _removeStandaloneWhitespace(_tokens);
_currentDelimiters = _delimiters;
@@ -69,27 +116,17 @@
for (var token = _peek(); token != null; token = _peek()) {
switch(token.type) {
+
case TokenType.text:
- case TokenType.whitespace:
- case TokenType.lineEnd:
- // Merge adjacent text nodes. This will improve the
- // rendering performance.
- bool isMergeable(Token t)
- => const [TokenType.text,
- TokenType.whitespace,
- TokenType.lineEnd].contains(t.type);
- var tokens = _readWhile(isMergeable);
- var str = tokens.map((t) => t.value).join();
- _stack.last.children.add(
- new TextNode(str, token.start, tokens.last.end));
+ case TokenType.whitespace:
+ _read();
+ _appendTextToken(token);
break;
case TokenType.openDelimiter:
- if (token.value == '{{{') {
- _parseTripleMustacheTag();
- } else {
- _parseTag();
- }
+ var tag = _readTag();
+ var node = _createNodeFromTag(tag);
+ if (tag != null) _appendTag(tag, node);
break;
case TokenType.changeDelimiter:
@@ -97,6 +134,14 @@
_currentDelimiters = token.value;
break;
+ //TODO think about this. It looks like this loop will usually just call
+ // into parseLine(). May be able to simplify the logic.
+ case TokenType.lineEnd:
+ //TODO the first line can be a standalone line too, and there is
+ // no lineEnd. Perhaps _parseLine(firstLine: true)?
+ _parseLine();
+ break;
+
default:
throw 'boom!'; //TODO error message.
}
@@ -108,22 +153,115 @@
return _stack.last.children;
}
- void _parseTripleMustacheTag() {
- var open = _read();
- var name = _parseIdentifier();
- var close = _read();
- _stack.last.children.add(
- new VariableNode(name, open.start, open.end, escape: false));
+ // Handle standalone tags and indented partials.
+ //
+ // A "standalone tag" in the spec is a tag one a line where the line only
+ // contains whitespace. During rendering the whitespace is ommitted.
+ // Standalone partials also indent their content to match the tag during
+ // rendering.
+
+ // match:
+ // newline whitespace openDelimiter any* closeDelimiter whitespace newline
+ //
+ // Where newline can also mean start/end of the source.
+ void _parseLine() {
+ //TODO handle EOFs. i.e. check for null return from peek.
+ //TODO make this EOF handling clearer.
+
+ assert(_peek().type == TokenType.lineEnd); //TODO expect.
+ var precedingLineEnd = _read();
+
+ // The scanner guarantees that there will only be a single whitespace token,
+ // there are never consecutive whitespace tokens.
+ var precedingWhitespace =
+ _peek() != null && _peek().type == TokenType.whitespace ? _read() : null;
+
+ Tag tag;
+ Node tagNode;
+ if (_peek() != null && _peek().type == TokenType.openDelimiter) {
+ tag = _readTag();
+ tagNode = _createNodeFromTag(tag,
+ partialIndent: precedingWhitespace == null
+ ? ''
+ : precedingWhitespace.value);
+ }
+
+ var followingWhitespace =
+ _peek() != null && _peek().type == TokenType.whitespace ? _read() : null;
+
+ if (precedingLineEnd != null) _appendTextToken(precedingLineEnd);
+
+ if (tag != null &&
+ (_peek() == null || _peek().type == TokenType.lineEnd) &&
+ const ['#', '/', '^', '>'].contains(tag.sigil)) {
+
+ // This is a standalone line, so do not create text nodes for whitespace,
+ // or the following newline.
+
+ _appendTag(tag, tagNode);
+
+ } else {
+
+ // This is not a standalone line so add the whitespace to the ast.
+ if (precedingWhitespace != null) _appendTextToken(precedingWhitespace);
+
+ // Can be null for comment tags, or close section tags, or if this isn't
+ // a standalone line.
+ if (tag != null) _appendTag(tag, tagNode);
+
+ if (followingWhitespace != null) _appendTextToken(followingWhitespace);
+ }
}
- void _parseTag() {
+ Node _createNodeFromTag(Tag tag, {String partialIndent: ''}) {
+ Node node = null;
+ switch (tag.sigil) {
+
+ // Section and inverse section.
+ case '#':
+ case '^':
+ bool inverse = tag.sigil == '^';
+ node = new SectionNode(tag.name, tag.start, tag.end,
+ _currentDelimiters, inverse: inverse);
+ break;
+
+ // Variable tag or unescaped variable tag.
+ case '&':
+ case '':
+ bool escape = tag.sigil == '';
+ node = new VariableNode(tag.name, tag.start, tag.end, escape: escape);
+ break;
+
+ // Partial tag.
+ case '>':
+ node = new PartialNode(tag.name, tag.start, tag.end, partialIndent);
+ break;
+
+ default:
+ node = null;
+ }
+ return node;
+ }
+
+ // Note the caller is responsible for pushing the returned node onto the
+ // stack. Note this can return null, i.e. for a comment tag.
+ Tag _readTag() {
+
var open = _read();
+ if (open.value == '{{{') {
+ var open = _read();
+ var name = _parseIdentifier();
+ var close = _read();
+ return new Tag('{', name, open.start, open.end);
+ }
+
if (_peek().type == TokenType.whitespace) _read();
- // sigil character, or null. A sigil is the character which identifies which
- // sort of tag it is, i.e. '#', '/', or '>'.
- var sigil = _peek().type == TokenType.sigil ? _read().value : null;
+ // sigil character, or empty string if a variable tag. A sigil is the
+ // character which identifies which sort of tag it is,
+ // i.e. '#', '/', or '>'.
+ var sigil = _peek().type == TokenType.sigil ? _read().value : '';
if (_peek().type == TokenType.whitespace) _read();
@@ -133,38 +271,7 @@
var close = _read();
- if (sigil == '#' || sigil == '^') {
- // Section and inverser section.
- bool inverse = sigil == '^';
- var node = new SectionNode(name, open.start, close.end,
- _currentDelimiters, inverse: inverse);
- _stack.last.children.add(node);
- _stack.add(node);
-
- } else if (sigil == '/') {
- // Close section tag
- if (name != _stack.last.name) throw 'boom!';
- _stack.removeLast();
-
- } else if (sigil == '&' || sigil == null) {
- // Variable and unescaped variable tag
- bool escape = sigil == null;
- _stack.last.children.add(
- new VariableNode(name, open.start, close.end, escape: escape));
-
- } else if (sigil == '>') {
- // Partial tag
- //TODO find precending whitespace.
- var indent = '';
- _stack.last.children.add(
- new PartialNode(name, open.start, close.end, indent));
-
- } else if (sigil == '!') {
- // Ignore comments
-
- } else {
- assert(false); //TODO
- }
+ return new Tag(sigil, name, open.start, close.end);
}
//TODO shouldn't just return a string.
@@ -177,84 +284,6 @@
.trim();
return name;
- }
-
- // Takes a list of tokens, and removes _NEWLINE, and _WHITESPACE tokens.
- // This is used to implement mustache standalone lines.
- // Where TAG is one of: OPEN_SECTION, INV_SECTION, CLOSE_SECTION
- // LINE_END, [WHITESPACE], TAG, [WHITESPACE], LINE_END => LINE_END, TAG
- // WHITESPACE => TEXT
- // LINE_END => TEXT
- // TODO could rewrite this to use a generator, rather than creating an inter-
- // mediate list.
- List<Token> _removeStandaloneWhitespace(List<Token> tokens) {
- int i = 0;
- Token read() { var ret = i < tokens.length ? tokens[i++] : null; return ret; }
- Token peek([int n = 0]) => i + n < tokens.length ? tokens[i + n] : null;
-
- bool isTag(token) => token != null
- && const [TokenType.openDelimiter, TokenType.changeDelimiter].contains(token.type);
-
- bool isWhitespace(token) => token != null && token.type == TokenType.whitespace;
- bool isLineEnd(token) => token != null && token.type == TokenType.lineEnd;
-
- var result = new List<Token>();
- add(token) => result.add(token);
-
- standaloneLineCheck() {
- // Swallow leading whitespace
- // Note, the scanner will only ever create a single whitespace token. There
- // is no need to handle multiple whitespace tokens.
- if (isWhitespace(peek())
- && isTag(peek(1))
- && (isLineEnd(peek(2)) || peek(2) == null)) { // null == EOF
- read();
- } else if (isWhitespace(peek())
- && isTag(peek(1))
- && isWhitespace(peek(2))
- && (isLineEnd(peek(3)) || peek(3) == null)) {
- read();
- }
-
- if ((isTag(peek()) && isLineEnd(peek(1)))
- || (isTag(peek())
- && isWhitespace(peek(1))
- && (isLineEnd(peek(2)) || peek(2) == null))) {
-
- // Add tag
- add(read());
-
- // Swallow trailing whitespace.
- if (isWhitespace(peek()))
- read();
-
- // Swallow line end.
- assert(isLineEnd(peek()));
- read();
-
- standaloneLineCheck(); //FIXME don't use recursion.
- }
- }
-
- // Handle case where first line is a standalone tag.
- standaloneLineCheck();
-
- var t;
- while ((t = read()) != null) {
- if (t.type == TokenType.lineEnd) {
- // Convert line end to text token
- add(new Token(TokenType.text, t.value, t.start, t.end));
- standaloneLineCheck();
- } else if (t.type == TokenType.whitespace) {
- // Convert whitespace to text token
- add(new Token(TokenType.text, t.value, t.start, t.end));
- } else {
- // Preserve token
- add(t);
- }
- }
-
- return result;
}
}
diff --git a/test/parser_test.dart b/test/parser_test.dart
index 48e20cc..708e1dd 100644
--- a/test/parser_test.dart
+++ b/test/parser_test.dart
@@ -152,7 +152,7 @@
var nodes = parser.parse();
expect(nodes, orderedEquals([
new TextNode('abc\n', 0, 4),
- new SectionNode('foo', 3, 25, '{{ }}'),
+ new SectionNode('foo', 4, 25, '{{ }}'),
new TextNode('\nghi', 25, 29)
]));
//TODO figure out correct behaviour.
@@ -169,17 +169,15 @@
]));
});
-
- skip_test('parse partial', () {
+ test('parse partial', () {
var source = 'abc\n {{>foo}}def';
var parser = new Parser(source, 'foo', '{{ }}', lenient: false);
var nodes = parser.parse();
expect(nodes, orderedEquals([
new TextNode('abc\n ', 0, 7),
new PartialNode('foo', 7, 15, ' '),
- new TextNode('ghi', 15, 18)
+ new TextNode('def', 15, 18)
]));
- expect(nodes[1].children, orderedEquals([new TextNode('def', 11, 14)]));
});
});