Start updating scanner

commit: 0b24c372661d2678d9b215b67e70967651b1d8aa [log] [tgz]
author: Greg Lowe <greg@vis.net.nz> Wed Feb 11 20:46:25 2015 +1300
committer: Greg Lowe <greg@vis.net.nz> Wed Feb 11 20:46:25 2015 +1300
tree: 52ea416285f4c23d20a75fcff406786d66b23829
parent: b33eaa5bae50ead0b01c3ff714450a42bfd4bca0 [diff]
diff --git a/lib/src/char_reader.dart b/lib/src/char_reader.dart
index 66740f9..e474670 100644
--- a/lib/src/char_reader.dart
+++ b/lib/src/char_reader.dart

@@ -1,5 +1,6 @@
 part of mustache;
 
+// TODO remove line and column. Not required anymore.
 class _CharReader {
 
   String _source;

diff --git a/lib/src/node.dart b/lib/src/node.dart
index 47f468d..7b3531c 100644
--- a/lib/src/node.dart
+++ b/lib/src/node.dart

@@ -2,25 +2,21 @@
 
 class _Node {
   
-  _Node(this.type, this.value, this.line, this.column, {this.indent});
+  _Node(this.type, this.value, this.start, this.end, {this.indent});
   
-  _Node.fromToken(_Token token)
+  _Node.fromToken(_Token token, {int start})
     : type = token.type,
       value = token.value,
-      line = token.line,
-      column = token.column,
+      start = start == null ? token.start : start,
+      end = token.end,
       indent = token.indent;
-
+  
   final int type;
   final String value;
-  final int line;
-  final int column;
+  final int start;
+  int end;
   final String indent;
   final List<_Node> children = new List<_Node>();
   
-   //TODO ideally these could be made final.
-   int start;
-   int end;
-  
   String toString() => '_Node: ${_tokenTypeString(type)}';
 }

diff --git a/lib/src/parse.dart b/lib/src/parse.dart
index 40ecd0d..e523667 100644
--- a/lib/src/parse.dart
+++ b/lib/src/parse.dart

@@ -16,12 +16,13 @@
   tokens = _removeStandaloneWhitespace(tokens);
   tokens = _mergeAdjacentText(tokens);
 
+  //FIXME this should be handled by scanner now.
   checkTagChars(_Token t) {
       if (!lenient && !_validTag.hasMatch(t.value)) {
         throw new _TemplateException(
           'Tag contained invalid characters in name, '
           'allowed: 0-9, a-z, A-Z, underscore, and minus',
-          templateName, source, t.offset);
+          templateName, source, t.start);
       }
   }
 
@@ -42,8 +43,9 @@
       case _OPEN_SECTION:
       case _OPEN_INV_SECTION:
         checkTagChars(t);
-        var child = new _Node.fromToken(t);
-        child.start = t.offset;
+        // Store the start, end of the inner string content not
+        // including the tag.
+        var child = new _Node.fromToken(t, start: t.end);
         stack.last.children.add(child);
         stack.add(child);
         break;
@@ -54,10 +56,10 @@
         if (stack.last.value != t.value) {
           throw new _TemplateException(
             "Mismatched tag, expected: '${stack.last.value}', was: '${t.value}'",
-            templateName, source, t.offset);
+            templateName, source, t.start);
         }
   
-        stack.last.end = t.offset;
+        stack.last.end = t.start;
         
         stack.removeLast();
         break;
@@ -144,11 +146,11 @@
   while ((t = read()) != null) {
     if (t.type == _LINE_END) {
       // Convert line end to text token
-      add(new _Token(_TEXT, t.value, t.line, t.column));
+      add(new _Token(_TEXT, t.value, t.start, t.end));
       standaloneLineCheck();
     } else if (t.type == _WHITESPACE) {
       // Convert whitespace to text token
-      add(new _Token(_TEXT, t.value, t.line, t.column));
+      add(new _Token(_TEXT, t.value, t.start, t.end));
     } else {
       // Preserve token
       add(t);
@@ -179,7 +181,7 @@
         buffer.write(tokens[i].value);
         i++;
       }
-      result.add(new _Token(_TEXT, buffer.toString(), t.line, t.column));
+      result.add(new _Token(_TEXT, buffer.toString(), t.start, t.end));
     }
   }
   return result;

diff --git a/lib/src/scanner.dart b/lib/src/scanner.dart
index 1a1c01c..09430bf 100644
--- a/lib/src/scanner.dart
+++ b/lib/src/scanner.dart

@@ -1,25 +1,5 @@
 part of mustache;

 

-const int _EOF = -1;

-const int _TAB = 9;

-const int _NEWLINE = 10;

-const int _RETURN = 13;

-const int _SPACE = 32;

-const int _EXCLAIM = 33;

-const int _QUOTE = 34;

-const int _APOS = 39;

-const int _HASH = 35;

-const int _AMP = 38;

-const int _PERIOD = 46;

-const int _FORWARD_SLASH = 47;

-const int _LT = 60;

-const int _EQUAL = 61;

-const int _GT = 62;

-const int _CARET = 94;

-

-const int _OPEN_MUSTACHE = 123;

-const int _CLOSE_MUSTACHE = 125;

-

 class _Scanner {

   

 	_Scanner(String source, this._templateName, String delimiters, {bool lenient: true})

@@ -36,8 +16,6 @@
 

 	final String _templateName;

 	final String _source;

-	

-	//FIXME not used yet.

 	final bool _lenient;

 	

 	_CharReader _r;

@@ -61,40 +39,7 @@
 	

 	int _read() => _r.read();

 	int _peek() => _r.peek();

-

-	_addStringToken(int type) {

-		int l = _r.line, c = _r.column;

-		var value = type == _TEXT ? _readLine() : _readString();

-		if (type != _TEXT && type != _COMMENT) value = value.trim();		

-		_tokens.add(new _Token(type, value, l, c));

-	}

-

-	_addCharToken(int type, int charCode) {

-		int l = _r.line, c = _r.column;

-		var value = new String.fromCharCode(charCode);

-		_tokens.add(new _Token(type, value, l, c));

-	}

-

-	_addPartialToken() {

-    // Capture whitespace preceding a partial tag so it can used for indentation during rendering.

-	  var indent = '';

-	  if (_tokens.isNotEmpty) {

-	    if (_tokens.length == 1 && _tokens.last.type == _WHITESPACE) {

-	      indent = _tokens.last.value;

-	    

-	    } else if (_tokens.length > 1) {

-	      if (_tokens.last.type == _WHITESPACE

-	          && _tokens[_tokens.length - 2].type == _NEWLINE) {

-	        indent = _tokens.last.value;

-	      }

-	    }

-	  }

-	  

-	  int l = _r.line, c = _r.column;

-    var value = _readString().trim();

-    _tokens.add(new _Token(_PARTIAL, value, l, c, indent: indent));

-	}

-	

+		

 	_expect(int expectedCharCode) {

 		int c = _read();

 

@@ -110,83 +55,83 @@
 		}

 	}

 

-  // FIXME probably need to differentiate between searching for open, or close

-  // delimiter.

-	String _readString() => _r.readWhile(

-		(c) => c != _closeDelimiterInner

-		    //FIXME && (_closeDelimiterInner == null && c != _closeDelimiter)

-		    && c != _closeDelimiter

-		    && c != _openDelimiter

-		    && c != _openDelimiterInner

-		    && c != _EOF); //FIXME EOF should be error.

-

-	// FIXME probably need to differentiate between searching for open, or close

-	// delimiter.

-	String _readLine() => _r.readWhile(

-		(c) => c != _closeDelimiterInner

-        //FIXME && (_closeDelimiterInner == null && c != _closeDelimiter)

-        && c != _closeDelimiter

-        && c != _openDelimiter

-        && c != _openDelimiterInner

-		    && c != _EOF   //FIXME EOF should be error.

-		    && c != _NEWLINE);

-

   //FIXME unless in lenient mode only allow spaces.

 	String _readTagWhitespace() => _r.readWhile(_isWhitespace);

 	

 	bool _isWhitespace(int c)

 	  => const [_SPACE, _TAB , _NEWLINE, _RETURN].contains(c);

 	

+	// A sigil is the word commonly used to describe the special character at the

+	// start of mustache tag i.e. #, ^ or /.

+	bool _isSigil(int c)

+	 => const [_HASH, _CARET, _FORWARD_SLASH, _GT, _AMP, _EXCLAIM, _EQUAL]

+	   .contains(c);

+	

+	bool _isAlphanum(int c) 

+    => (c >= _a && c <= _z)

+        || (c >= _A && c <= _Z)

+        || (c >= _0 && c <= _9)

+        || c == _MINUS

+        || c == _UNDERSCORE

+        || c == _PERIOD;

+

 	_scanText() {

+	  

 		while(true) {

 		  int c = _peek();

-			

+		  int start = _r.offset;

+		  

 		  if (c == _EOF) {

 		    return; 

 		  

 		  } else if (c == _openDelimiter) { 

 			  return;

-			

-		 } else if (c == _RETURN) {

+			  

+      // Newlines and whitespace have separate tokens so the standalone lines

+			// logic can be implemented.

+		  } else if (c == _NEWLINE) {

+        _read();

+        var value = new String.fromCharCode(c);

+        _tokens.add(new _Token(_LINE_END, value, start, _r.offset));

+			  

+		  } else if (c == _RETURN) {

         _read();

         if (_peek() == _NEWLINE) {

           _read();

-          _tokens.add(new _Token(_LINE_END, '\r\n', _r.line, _r.column));

+          _tokens.add(new _Token(_LINE_END, '\r\n', start, _r.offset));

         } else {

-          _addCharToken(_TEXT, _RETURN);

+          var value = new String.fromCharCode(_RETURN);

+          _tokens.add(new _Token(_TEXT, '\n', start, _r.offset));

         }			  

-			} else if (c == _NEWLINE) {

-			  _read();

-			  _addCharToken(_LINE_END, _NEWLINE);

 			

 			} else if (c == _SPACE || c == _TAB) {

         var value = _r.readWhile((c) => c == _SPACE || c == _TAB);

-        _tokens.add(new _Token(_WHITESPACE, value, _r.line, _r.column));

+        _tokens.add(new _Token(_WHITESPACE, value, start, _r.offset));

 			

       //FIXME figure out why this is required

 			} else if (c == _closeDelimiter || c == _closeDelimiterInner) {

         _read();

-        _addCharToken(_TEXT, c);

-        

+        var value = new String.fromCharCode(c);

+        _tokens.add(new _Token(_TEXT, value, start, _r.offset));

+			 

 			} else {

-			  _addStringToken(_TEXT);

+        var value = _r.readWhile((c) => c != _openDelimiter

+                                        && c != _EOF

+                                        && c != _NEWLINE);

+        _tokens.add(new _Token(_TEXT, value, start, _r.offset));

 			}

 		}	

 	}

 	

 	//TODO consider changing the parsing here to use a regexp. It will probably

 	// be simpler to read.

-	_scanChangeDelimiterTag() {

-	  // Open delimiter characters have already been read.

-	  _expect(_EQUAL);

-	  

-	  int line = _r.line;

-	  int col = _r.column;

+	_scanChangeDelimiterTag(int start) {

+	  // Open delimiter characters and = have already been read.

 	  

     var delimiterInner = _closeDelimiterInner;

     var delimiter = _closeDelimiter;

     

-    _readTagWhitespace();

+    _scanTagWhitespace();

     

     int c;

     c = _r.read();

@@ -201,7 +146,7 @@
       _openDelimiterInner = c;

     }

     

-    _readTagWhitespace();

+    _scanTagWhitespace();

     

     c = _r.read();

     

@@ -215,9 +160,9 @@
       _closeDelimiter = _read();

     }

     

-    _readTagWhitespace();

+    _scanTagWhitespace();

     _expect(_EQUAL);

-    _readTagWhitespace();

+    _scanTagWhitespace();

      

      _expect(delimiterInner);

      _expect(delimiter);

@@ -228,101 +173,118 @@
          _closeDelimiterInner,

          _closeDelimiter);

           

-     _tokens.add(new _Token(_CHANGE_DELIMITER, value, line, col));

+     _tokens.add(new _Token(_CHANGE_DELIMITER, value, start, _r.offset));

+	}

+

+	_scanTagWhitespace() {

+	  const whitepsace = const [_SPACE, _NEWLINE, _RETURN, _TAB];

+	  if (_lenient) {

+	    _r.readWhile(_isWhitespace);	    

+	  } else {

+	    _r.readWhile((c) => c == _SPACE);

+	    if (_isWhitespace(_peek()))

+	      throw _error('Tags may not contain newlines or tabs.');

+	  }

 	}

 	

-	_scanMustacheTag() {

-	  int startOffset = _r.offset;

-	  

-		_expect(_openDelimiter);

-

-		// If just a single mustache, return this as a text token.

-		//FIXME is this missing a read call to advance ??

-		if (_openDelimiterInner != null && _peek() != _openDelimiterInner) {

-			_addCharToken(_TEXT, _openDelimiter);

-			return;

-		}

-

-		if (_openDelimiterInner != null) _expect(_openDelimiterInner);

-

-    // Escaped text {{{ ... }}}

-		if (_peek() == _OPEN_MUSTACHE) {

-		  _read();

-      _addStringToken(_UNESC_VARIABLE);

-      _expect(_CLOSE_MUSTACHE);

-      _expect(_closeDelimiterInner);

-      _expect(_closeDelimiter);

+	String _scanTagIdentifier() {

+	  if (_lenient) {

+	    return _closeDelimiterInner != null

+	        ? _r.readWhile((c) => c != _closeDelimiterInner) //FIXME reimplement readWhile to throw error on eof.

+	        : _r.readWhile((c) => c != _closeDelimiter);

+	  } else {

+	    return _r.readWhile(_isAlphanum);

+	  }

+	}

+	

+  _scanMustacheTag() {

+    int start = _r.offset;

+    int sigil = 0;

+     

+    _expect(_openDelimiter);

+    

+    //FIXME move this code into _scan(). Need a peek2()

+    // If just a single delimeter character then this is a text token.

+    if (_openDelimiterInner != null && _peek() != _openDelimiterInner) {

+      _read();

+      var value = new String.fromCharCode(_openDelimiter);

+      _tokens.add(new _Token(_TEXT, value, start, _r.offset));

       return;

-		}

+    }

+    

+    if (_openDelimiterInner != null) _expect(_openDelimiterInner);

+     

+    if (_peek() == _OPEN_MUSTACHE) {

+      _scanTripleMustacheTag(start);

+      return;

+    }

+ 

+    _scanTagWhitespace();

+ 

+    if (_isSigil(_peek())) sigil = _read();

+ 

+    if (sigil == _EQUAL) {

+      _scanChangeDelimiterTag(start);

+      return;

+    } else if (sigil == _EXCLAIM) {

+      _scanCommentTag(start);

+      return;

+    }

+ 

+    _scanTagWhitespace();

+ 

+    var identifier = _scanTagIdentifier();

 

-    // Skip whitespace at start of tag. i.e. {{ # foo }}  {{ / foo }}

-		_readTagWhitespace();

+    var value = identifier.trim();

+    

+    if (value.isEmpty) throw _error('Expected tag identifier.');

+    

+    _scanTagWhitespace();

+ 

+    if (_closeDelimiterInner != null) _expect(_closeDelimiterInner);

+    _expect(_closeDelimiter);

+

+    const sigils = const <int, int> {

+      0: _VARIABLE,

+      _HASH: _OPEN_SECTION,

+      _FORWARD_SLASH: _CLOSE_SECTION,

+      _CARET: _OPEN_INV_SECTION,

+      _GT: _PARTIAL,

+      _AMP: _UNESC_VARIABLE

+    };

+    

+    var type = sigils[sigil];

+    

+    if (type == _PARTIAL) {

+      //FIXME do magic to get indent text.

+      //Consider whether it makes sense to move this into parsing.

+      _tokens.add(new _Token(type, value, start, _r.offset, indent: ''));

+    } else {

+      _tokens.add(new _Token(type, value, start, _r.offset));

+    }

+  }

+	

+  _scanTripleMustacheTag(int start) {

+    _expect(_OPEN_MUSTACHE);

+    var value = _r.readWhile((c) => c != _CLOSE_MUSTACHE).trim();

+    //FIXME lenient/strict mode identifier parsing.

+    _expect(_CLOSE_MUSTACHE);

+    if (_closeDelimiterInner != null) _expect(_closeDelimiterInner);

+    _expect(_closeDelimiter);

+    _tokens.add(new _Token(_UNESC_VARIABLE, value, start, _r.offset));

+  }

+  

+  _scanCommentTag(int start) {

+    var value = _closeDelimiterInner != null

+        ? _r.readWhile((c) => c != _closeDelimiterInner).trim()

+        : _r.readWhile((c) => c != _closeDelimiter).trim();

+    if (_closeDelimiterInner != null) _expect(_closeDelimiterInner);

+    _expect(_closeDelimiter);

+    _tokens.add(new _Token(_COMMENT, value, start, _r.offset));

+  }

 		

-		switch(_peek()) {

-			case _EOF:

-				throw new _TemplateException('Unexpected end of input',

-				    _templateName, _source,  _r.offset);

-  			

-			// Escaped text {{& ... }}

-			case _AMP:

-				_read();

-				_addStringToken(_UNESC_VARIABLE);

-				break;

-

-			// Comment {{! ... }}

-			case _EXCLAIM:

-				_read();

-				_addStringToken(_COMMENT);

-				break;

-

-			// Partial {{> ... }}

-			case _GT:

-				_read();

-				_addPartialToken();

-				break;

-

-			// Open section {{# ... }}

-			case _HASH:

-				_read();

-				_addStringToken(_OPEN_SECTION);

-				break;

-

-			// Open inverted section {{^ ... }}

-			case _CARET:

-				_read();

-				_addStringToken(_OPEN_INV_SECTION);

-				break;

-

-			// Close section {{/ ... }}

-			case _FORWARD_SLASH:

-				_read();

-				_addStringToken(_CLOSE_SECTION);

-		    // Store source file offset, so source substrings can be extracted for

-        // lambdas.

-				_tokens.last.offset = startOffset;

-				break;

-				

-			// Change delimiter {{= ... =}}

-			case _EQUAL:

-			  _scanChangeDelimiterTag();

-        return;

-

-			// Variable {{ ... }}

-			default:

-				_addStringToken(_VARIABLE);

-		}

-

-		if (_closeDelimiterInner != null) _expect(_closeDelimiterInner);

-		_expect(_closeDelimiter);

-		

-		// Store source file offset, so source substrings can be extracted for

-		// lambdas.

-		if (_tokens.isNotEmpty) {

-		  var t = _tokens.last;

-		  if (t.type == _OPEN_SECTION || t.type == _OPEN_INV_SECTION) {

-		    t.offset = _r.offset;

-		  }

-		}

+	TemplateException _error(String message) {

+	  return new _TemplateException(message, _templateName, _source, _r.offset);

 	}

 }

 

@@ -350,4 +312,34 @@
   } else {

     throw 'Invalid delimiter string $s'; //FIXME

   }  

-}
\ No newline at end of file
+}

+

+const int _EOF = -1;

+const int _TAB = 9;

+const int _NEWLINE = 10;

+const int _RETURN = 13;

+const int _SPACE = 32;

+const int _EXCLAIM = 33;

+const int _QUOTE = 34;

+const int _APOS = 39;

+const int _HASH = 35;

+const int _AMP = 38;

+const int _PERIOD = 46;

+const int _FORWARD_SLASH = 47;

+const int _LT = 60;

+const int _EQUAL = 61;

+const int _GT = 62;

+const int _CARET = 94;

+

+const int _OPEN_MUSTACHE = 123;

+const int _CLOSE_MUSTACHE = 125;

+

+const int _A = 65;

+const int _Z = 90;

+const int _a = 97;

+const int _z = 122;

+const int _0 = 48;

+const int _9 = 57;

+

+const int _UNDERSCORE = 95;

+const int _MINUS = 45;


diff --git a/lib/src/token.dart b/lib/src/token.dart
index a435455..15d147a 100644
--- a/lib/src/token.dart
+++ b/lib/src/token.dart

@@ -1,20 +1,18 @@
 part of mustache;
 
 class _Token {
-  _Token(this.type, this.value, this.line, this.column, {this.indent});
+  
+  _Token(this.type, this.value, this.start, this.end, {this.indent});
   
   final int type;
-  final String value; 
-  final int line;
-  final int column;
+  final String value;
+  
+  final int start;
+  final int end;
   final String indent;
   
-  // Store offsets to extract text from source for lambdas.
-  // Only used for section, inverse section and close section tags.
-  int offset;
-  
   toString() => "${_tokenTypeString(type)}: "
-    "\"${value.replaceAll('\n', '\\n')}\" $line:$column";
+    "\"${value.replaceAll('\n', '\\n')}\"";
 }
 
 //FIXME use enums
commit	0b24c372661d2678d9b215b67e70967651b1d8aa	[log] [tgz]
author	Greg Lowe <greg@vis.net.nz>	Wed Feb 11 20:46:25 2015 +1300
committer	Greg Lowe <greg@vis.net.nz>	Wed Feb 11 20:46:25 2015 +1300
tree	52ea416285f4c23d20a75fcff406786d66b23829
parent	b33eaa5bae50ead0b01c3ff714450a42bfd4bca0 [diff]