pkg/front_end/lib/src/fasta/scanner/array_based_scanner.dart - sdk.git - Git at Google

 // Copyright (c) 2011, the Dart project authors.  Please see the AUTHORS file
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.

 library fasta.scanner.array_based_scanner;

 import 'error_token.dart' show ErrorToken, UnmatchedToken;

 import '../../scanner/token.dart'
     show BeginToken, Keyword, KeywordToken, SyntheticToken, Token, TokenType;

 import '../../scanner/token.dart' as analyzer show StringToken;

 import 'token_constants.dart'
     show
         LT_TOKEN,
         OPEN_CURLY_BRACKET_TOKEN,
         OPEN_PAREN_TOKEN,
         STRING_INTERPOLATION_TOKEN;

 import 'characters.dart' show $LF, $STX;

 import 'abstract_scanner.dart' show AbstractScanner, closeBraceInfoFor;

 import '../util/link.dart' show Link;

 abstract class ArrayBasedScanner extends AbstractScanner {
   bool hasErrors = false;

   ArrayBasedScanner(bool includeComments, bool scanGenericMethodComments,
       {int numberOfBytesHint})
       : super(includeComments, scanGenericMethodComments,
             numberOfBytesHint: numberOfBytesHint);

   /**
    * The stack of open groups, e.g [: { ... ( .. :]
    * Each BeginToken has a pointer to the token where the group
    * ends. This field is set when scanning the end group token.
    */
   Link<BeginToken> groupingStack = const Link<BeginToken>();

   /**
    * Appends a fixed token whose kind and content is determined by [type].
    * Appends an *operator* token from [type].
    *
    * An operator token represent operators like ':', '.', ';', '&&', '==', '--',
    * '=>', etc.
    */
   void appendPrecedenceToken(TokenType type) {
     appendToken(new Token(type, tokenStart, comments));
   }

   /**
    * Appends a fixed token based on whether the current char is [choice] or not.
    * If the current char is [choice] a fixed token whose kind and content
    * is determined by [yes] is appended, otherwise a fixed token whose kind
    * and content is determined by [no] is appended.
    */
   int select(int choice, TokenType yes, TokenType no) {
     int next = advance();
     if (identical(next, choice)) {
       appendPrecedenceToken(yes);
       return advance();
     } else {
       appendPrecedenceToken(no);
       return next;
     }
   }

   /**
    * Appends a keyword token whose kind is determined by [keyword].
    */
   void appendKeywordToken(Keyword keyword) {
     String syntax = keyword.lexeme;
     // Type parameters and arguments cannot contain 'this'.
     if (identical(syntax, 'this')) {
       discardOpenLt();
     }
     appendToken(new KeywordToken(keyword, tokenStart, comments));
   }

   void appendEofToken() {
     beginToken();
     discardOpenLt();
     while (!groupingStack.isEmpty) {
       unmatchedBeginGroup(groupingStack.head);
       groupingStack = groupingStack.tail;
     }
     appendToken(new Token.eof(tokenStart, comments));
   }

   /**
    * Notifies scanning a whitespace character. Note that [appendWhiteSpace] is
    * not always invoked for [$SPACE] characters.
    *
    * This method is used by the scanners to track line breaks and create the
    * [lineStarts] map.
    */
   void appendWhiteSpace(int next) {
     if (next == $LF) {
       lineStarts.add(stringOffset + 1); // +1, the line starts after the $LF.
     }
   }

   /**
    * Notifies on [$LF] characters in multi-line comments or strings.
    *
    * This method is used by the scanners to track line breaks and create the
    * [lineStarts] map.
    */
   void lineFeedInMultiline() {
     lineStarts.add(stringOffset + 1);
   }

   /**
    * Appends a token that begins a new group, represented by [type].
    * Group begin tokens are '{', '(', '[', '<' and '${'.
    */
   void appendBeginGroup(TokenType type) {
     Token token = new BeginToken(type, tokenStart, comments);
     appendToken(token);

     // { [ ${ cannot appear inside a type parameters / arguments.
     if (!identical(type.kind, LT_TOKEN) &&
         !identical(type.kind, OPEN_PAREN_TOKEN)) {
       discardOpenLt();
     }
     groupingStack = groupingStack.prepend(token);
   }

   /**
    * Appends a token that begins an end group, represented by [type].
    * It handles the group end tokens '}', ')' and ']'. The tokens '>' and
    * '>>' are handled separately by [appendGt] and [appendGtGt].
    */
   int appendEndGroup(TokenType type, int openKind) {
     assert(!identical(openKind, LT_TOKEN)); // openKind is < for > and >>
     if (!discardBeginGroupUntil(openKind)) {
       // No begin group found. Just continue.
       appendPrecedenceToken(type);
       return advance();
     }
     appendPrecedenceToken(type);
     Token close = tail;
     BeginToken begin = groupingStack.head;
     if (!identical(begin.kind, openKind)) {
       assert(begin.kind == STRING_INTERPOLATION_TOKEN &&
           openKind == OPEN_CURLY_BRACKET_TOKEN);
       // We're ending an interpolated expression.
       begin.endGroup = close;
       groupingStack = groupingStack.tail;
       // Using "start-of-text" to signal that we're back in string
       // scanning mode.
       return $STX;
     }
     begin.endGroup = close;
     groupingStack = groupingStack.tail;
     return advance();
   }

   /**
    * If a begin group token matches [openKind],
    * then discard begin group tokens up to that match and return `true`,
    * otherwise return `false`.
    * This recovers nicely from from situations like "{[}" and "{foo());}",
    * but not "foo(() {bar());});
    */
   bool discardBeginGroupUntil(int openKind) {
     Link<BeginToken> originalStack = groupingStack;

     bool first = true;
     do {
       // Don't report unmatched errors for <; it is also the less-than operator.
       discardOpenLt();
       if (groupingStack.isEmpty) break; // recover
       BeginToken begin = groupingStack.head;
       if (openKind == begin.kind ||
           (openKind == OPEN_CURLY_BRACKET_TOKEN &&
               begin.kind == STRING_INTERPOLATION_TOKEN)) {
         if (first) {
           // If the expected opener has been found on the first pass
           // then no recovery necessary.
           return true;
         }
         break; // recover
       }
       first = false;
       groupingStack = groupingStack.tail;
     } while (!groupingStack.isEmpty);

     // If the stack does not have any opener of the given type,
     // then return without discarding anything.
     // This recovers nicely from from situations like "{foo());}".
     if (groupingStack.isEmpty) {
       groupingStack = originalStack;
       return false;
     }

     // Insert synthetic closers and report errors for any unbalanced openers.
     // This recovers nicely from from situations like "{[}".
     while (!identical(originalStack, groupingStack)) {
       // Don't report unmatched errors for <; it is also the less-than operator.
       if (!identical(groupingStack.head.kind, LT_TOKEN))
         unmatchedBeginGroup(originalStack.head);
       originalStack = originalStack.tail;
     }
     return true;
   }

   /**
    * Appends a token for '>'.
    * This method does not issue unmatched errors, because > is also the
    * greater-than operator. It does not necessarily have to close a group.
    */
   void appendGt(TokenType type) {
     appendPrecedenceToken(type);
     if (groupingStack.isEmpty) return;
     if (identical(groupingStack.head.kind, LT_TOKEN)) {
       groupingStack.head.endGroup = tail;
       groupingStack = groupingStack.tail;
     }
   }

   /**
    * Appends a token for '>>'.
    * This method does not issue unmatched errors, because >> is also the
    * shift operator. It does not necessarily have to close a group.
    */
   void appendGtGt(TokenType type) {
     appendPrecedenceToken(type);
     if (groupingStack.isEmpty) return;
     if (identical(groupingStack.head.kind, LT_TOKEN)) {
       // Don't assign endGroup: in "T<U<V>>", the '>>' token closes the outer
       // '<', the inner '<' is left without endGroup.
       groupingStack = groupingStack.tail;
     }
     if (groupingStack.isEmpty) return;
     if (identical(groupingStack.head.kind, LT_TOKEN)) {
       groupingStack.head.endGroup = tail;
       groupingStack = groupingStack.tail;
     }
   }

   void appendErrorToken(ErrorToken token) {
     hasErrors = true;
     appendToken(token);
   }

   @override
   void appendSubstringToken(TokenType type, int start, bool asciiOnly,
       [int extraOffset = 0]) {
     appendToken(createSubstringToken(type, start, asciiOnly, extraOffset));
   }

   @override
   void appendSyntheticSubstringToken(
       TokenType type, int start, bool asciiOnly, String syntheticChars) {
     appendToken(
         createSyntheticSubstringToken(type, start, asciiOnly, syntheticChars));
   }

   /**
    * Returns a new substring from the scan offset [start] to the current
    * [scanOffset] plus the [extraOffset]. For example, if the current
    * scanOffset is 10, then [appendSubstringToken(5, -1)] will append the
    * substring string [5,9).
    *
    * Note that [extraOffset] can only be used if the covered character(s) are
    * known to be ASCII.
    */
   analyzer.StringToken createSubstringToken(
       TokenType type, int start, bool asciiOnly,
       [int extraOffset = 0]);

   /**
    * Returns a new synthetic substring from the scan offset [start]
    * to the current [scanOffset] plus the [syntheticChars].
    * The [syntheticChars] are appended to the unterminated string
    * literal's lexeme but the returned token's length will *not* include
    * those additional characters so as to be true to the original source.
    */
   analyzer.StringToken createSyntheticSubstringToken(
       TokenType type, int start, bool asciiOnly, String syntheticChars);

   /**
    * This method is called to discard '<' from the "grouping" stack.
    *
    * [PartialParser.skipExpression] relies on the fact that we do not
    * create groups for stuff like:
    * [:a = b < c, d = e > f:].
    *
    * In other words, this method is called when the scanner recognizes
    * something which cannot possibly be part of a type parameter/argument
    * list, like the '=' in the above example.
    */
   void discardOpenLt() {
     while (!groupingStack.isEmpty &&
         identical(groupingStack.head.kind, LT_TOKEN)) {
       groupingStack = groupingStack.tail;
     }
   }

   /**
    * This method is called to discard '${' from the "grouping" stack.
    *
    * This method is called when the scanner finds an unterminated
    * interpolation expression.
    */
   void discardInterpolation() {
     while (!groupingStack.isEmpty) {
       BeginToken beginToken = groupingStack.head;
       unmatchedBeginGroup(beginToken);
       groupingStack = groupingStack.tail;
       if (identical(beginToken.kind, STRING_INTERPOLATION_TOKEN)) break;
     }
   }

   void unmatchedBeginGroup(BeginToken begin) {
     // We want to ensure that unmatched BeginTokens are reported as
     // errors.  However, the diet parser assumes that groups are well-balanced
     // and will never look at the endGroup token.  This is a nice property that
     // allows us to skip quickly over correct code. By inserting an additional
     // synthetic token in the stream, we can keep ignoring endGroup tokens.
     //
     // [begin] --next--> [tail]
     // [begin] --endG--> [synthetic] --next--> [next] --next--> [tail]
     //
     // This allows the diet parser to skip from [begin] via endGroup to
     // [synthetic] and ignore the [synthetic] token (assuming it's correct),
     // then the error will be reported when parsing the [next] token.
     //
     // For example, tokenize("{[1};") produces:
     //
     // SymbolToken({) --endGroup------------------------+
     //      |                                           |
     //     next                                         |
     //      v                                           |
     // SymbolToken([) --endGroup--+                     |
     //      |                     |                     |
     //     next                   |                     |
     //      v                     |                     |
     // StringToken(1)             |                     |
     //      |                     |                     |
     //     next                   |                     |
     //      v                     |                     |
     // SymbolToken(])<------------+ <-- Synthetic token |
     //      |                                           |
     //     next                                         |
     //      v                                           |
     // UnmatchedToken([)                                |
     //      |                                           |
     //     next                                         |
     //      v                                           |
     // SymbolToken(})<----------------------------------+
     //      |
     //     next
     //      v
     // SymbolToken(;)
     //      |
     //     next
     //      v
     //     EOF
     TokenType type = closeBraceInfoFor(begin);
     appendToken(new SyntheticToken(type, tokenStart)..beforeSynthetic = tail);
     begin.endGroup = tail;
     appendErrorToken(new UnmatchedToken(begin));
   }
 }
	// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
	// for details. All rights reserved. Use of this source code is governed by a
	// BSD-style license that can be found in the LICENSE file.

	library fasta.scanner.array_based_scanner;

	import 'error_token.dart' show ErrorToken, UnmatchedToken;

	import '../../scanner/token.dart'
	show BeginToken, Keyword, KeywordToken, SyntheticToken, Token, TokenType;

	import '../../scanner/token.dart' as analyzer show StringToken;

	import 'token_constants.dart'
	show
	LT_TOKEN,
	OPEN_CURLY_BRACKET_TOKEN,
	OPEN_PAREN_TOKEN,
	STRING_INTERPOLATION_TOKEN;

	import 'characters.dart' show $LF, $STX;

	import 'abstract_scanner.dart' show AbstractScanner, closeBraceInfoFor;

	import '../util/link.dart' show Link;

	abstract class ArrayBasedScanner extends AbstractScanner {
	bool hasErrors = false;

	ArrayBasedScanner(bool includeComments, bool scanGenericMethodComments,
	{int numberOfBytesHint})
	: super(includeComments, scanGenericMethodComments,
	numberOfBytesHint: numberOfBytesHint);

	/**
	* The stack of open groups, e.g [: { ... ( .. :]
	* Each BeginToken has a pointer to the token where the group
	* ends. This field is set when scanning the end group token.
	*/
	Link<BeginToken> groupingStack = const Link<BeginToken>();

	/**
	* Appends a fixed token whose kind and content is determined by [type].
	* Appends an operator token from [type].
	*
	* An operator token represent operators like ':', '.', ';', '&&', '==', '--',
	* '=>', etc.
	*/
	void appendPrecedenceToken(TokenType type) {
	appendToken(new Token(type, tokenStart, comments));
	}

	/**
	* Appends a fixed token based on whether the current char is [choice] or not.
	* If the current char is [choice] a fixed token whose kind and content
	* is determined by [yes] is appended, otherwise a fixed token whose kind
	* and content is determined by [no] is appended.
	*/
	int select(int choice, TokenType yes, TokenType no) {
	int next = advance();
	if (identical(next, choice)) {
	appendPrecedenceToken(yes);
	return advance();
	} else {
	appendPrecedenceToken(no);
	return next;
	}
	}

	/**
	* Appends a keyword token whose kind is determined by [keyword].
	*/
	void appendKeywordToken(Keyword keyword) {
	String syntax = keyword.lexeme;
	// Type parameters and arguments cannot contain 'this'.
	if (identical(syntax, 'this')) {
	discardOpenLt();
	}
	appendToken(new KeywordToken(keyword, tokenStart, comments));
	}

	void appendEofToken() {
	beginToken();
	discardOpenLt();
	while (!groupingStack.isEmpty) {
	unmatchedBeginGroup(groupingStack.head);
	groupingStack = groupingStack.tail;
	}
	appendToken(new Token.eof(tokenStart, comments));
	}

	/**
	* Notifies scanning a whitespace character. Note that [appendWhiteSpace] is
	* not always invoked for [$SPACE] characters.
	*
	* This method is used by the scanners to track line breaks and create the
	* [lineStarts] map.
	*/
	void appendWhiteSpace(int next) {
	if (next == $LF) {
	lineStarts.add(stringOffset + 1); // +1, the line starts after the $LF.
	}
	}

	/**
	* Notifies on [$LF] characters in multi-line comments or strings.
	*
	* This method is used by the scanners to track line breaks and create the
	* [lineStarts] map.
	*/
	void lineFeedInMultiline() {
	lineStarts.add(stringOffset + 1);
	}

	/**
	* Appends a token that begins a new group, represented by [type].
	* Group begin tokens are '{', '(', '[', '<' and '${'.
	*/
	void appendBeginGroup(TokenType type) {
	Token token = new BeginToken(type, tokenStart, comments);
	appendToken(token);

	// { [ ${ cannot appear inside a type parameters / arguments.
	if (!identical(type.kind, LT_TOKEN) &&
	!identical(type.kind, OPEN_PAREN_TOKEN)) {
	discardOpenLt();
	}
	groupingStack = groupingStack.prepend(token);
	}

	/**
	* Appends a token that begins an end group, represented by [type].
	* It handles the group end tokens '}', ')' and ']'. The tokens '>' and
	* '>>' are handled separately by [appendGt] and [appendGtGt].
	*/
	int appendEndGroup(TokenType type, int openKind) {
	assert(!identical(openKind, LT_TOKEN)); // openKind is < for > and >>
	if (!discardBeginGroupUntil(openKind)) {
	// No begin group found. Just continue.
	appendPrecedenceToken(type);
	return advance();
	}
	appendPrecedenceToken(type);
	Token close = tail;
	BeginToken begin = groupingStack.head;
	if (!identical(begin.kind, openKind)) {
	assert(begin.kind == STRING_INTERPOLATION_TOKEN &&
	openKind == OPEN_CURLY_BRACKET_TOKEN);
	// We're ending an interpolated expression.
	begin.endGroup = close;
	groupingStack = groupingStack.tail;
	// Using "start-of-text" to signal that we're back in string
	// scanning mode.
	return $STX;
	}
	begin.endGroup = close;
	groupingStack = groupingStack.tail;
	return advance();
	}

	/**
	* If a begin group token matches [openKind],
	* then discard begin group tokens up to that match and return `true`,
	* otherwise return `false`.
	* This recovers nicely from from situations like "{[}" and "{foo());}",
	* but not "foo(() {bar());});
	*/
	bool discardBeginGroupUntil(int openKind) {
	Link<BeginToken> originalStack = groupingStack;

	bool first = true;
	do {
	// Don't report unmatched errors for <; it is also the less-than operator.
	discardOpenLt();
	if (groupingStack.isEmpty) break; // recover
	BeginToken begin = groupingStack.head;
	if (openKind == begin.kind \|\|
	(openKind == OPEN_CURLY_BRACKET_TOKEN &&
	begin.kind == STRING_INTERPOLATION_TOKEN)) {
	if (first) {
	// If the expected opener has been found on the first pass
	// then no recovery necessary.
	return true;
	}
	break; // recover
	}
	first = false;
	groupingStack = groupingStack.tail;
	} while (!groupingStack.isEmpty);

	// If the stack does not have any opener of the given type,
	// then return without discarding anything.
	// This recovers nicely from from situations like "{foo());}".
	if (groupingStack.isEmpty) {
	groupingStack = originalStack;
	return false;
	}

	// Insert synthetic closers and report errors for any unbalanced openers.
	// This recovers nicely from from situations like "{[}".
	while (!identical(originalStack, groupingStack)) {
	// Don't report unmatched errors for <; it is also the less-than operator.
	if (!identical(groupingStack.head.kind, LT_TOKEN))
	unmatchedBeginGroup(originalStack.head);
	originalStack = originalStack.tail;
	}
	return true;
	}

	/**
	* Appends a token for '>'.
	* This method does not issue unmatched errors, because > is also the
	* greater-than operator. It does not necessarily have to close a group.
	*/
	void appendGt(TokenType type) {
	appendPrecedenceToken(type);
	if (groupingStack.isEmpty) return;
	if (identical(groupingStack.head.kind, LT_TOKEN)) {
	groupingStack.head.endGroup = tail;
	groupingStack = groupingStack.tail;
	}
	}

	/**
	* Appends a token for '>>'.
	* This method does not issue unmatched errors, because >> is also the
	* shift operator. It does not necessarily have to close a group.
	*/
	void appendGtGt(TokenType type) {
	appendPrecedenceToken(type);
	if (groupingStack.isEmpty) return;
	if (identical(groupingStack.head.kind, LT_TOKEN)) {
	// Don't assign endGroup: in "T<U<V>>", the '>>' token closes the outer
	// '<', the inner '<' is left without endGroup.
	groupingStack = groupingStack.tail;
	}
	if (groupingStack.isEmpty) return;
	if (identical(groupingStack.head.kind, LT_TOKEN)) {
	groupingStack.head.endGroup = tail;
	groupingStack = groupingStack.tail;
	}
	}

	void appendErrorToken(ErrorToken token) {
	hasErrors = true;
	appendToken(token);
	}

	@override
	void appendSubstringToken(TokenType type, int start, bool asciiOnly,
	[int extraOffset = 0]) {
	appendToken(createSubstringToken(type, start, asciiOnly, extraOffset));
	}

	@override
	void appendSyntheticSubstringToken(
	TokenType type, int start, bool asciiOnly, String syntheticChars) {
	appendToken(
	createSyntheticSubstringToken(type, start, asciiOnly, syntheticChars));
	}

	/**
	* Returns a new substring from the scan offset [start] to the current
	* [scanOffset] plus the [extraOffset]. For example, if the current
	* scanOffset is 10, then [appendSubstringToken(5, -1)] will append the
	* substring string [5,9).
	*
	* Note that [extraOffset] can only be used if the covered character(s) are
	* known to be ASCII.
	*/
	analyzer.StringToken createSubstringToken(
	TokenType type, int start, bool asciiOnly,
	[int extraOffset = 0]);

	/**
	* Returns a new synthetic substring from the scan offset [start]
	* to the current [scanOffset] plus the [syntheticChars].
	* The [syntheticChars] are appended to the unterminated string
	* literal's lexeme but the returned token's length will not include
	* those additional characters so as to be true to the original source.
	*/
	analyzer.StringToken createSyntheticSubstringToken(
	TokenType type, int start, bool asciiOnly, String syntheticChars);

	/**
	* This method is called to discard '<' from the "grouping" stack.
	*
	* [PartialParser.skipExpression] relies on the fact that we do not
	* create groups for stuff like:
	* [:a = b < c, d = e > f:].
	*
	* In other words, this method is called when the scanner recognizes
	* something which cannot possibly be part of a type parameter/argument
	* list, like the '=' in the above example.
	*/
	void discardOpenLt() {
	while (!groupingStack.isEmpty &&
	identical(groupingStack.head.kind, LT_TOKEN)) {
	groupingStack = groupingStack.tail;
	}
	}

	/**
	* This method is called to discard '${' from the "grouping" stack.
	*
	* This method is called when the scanner finds an unterminated
	* interpolation expression.
	*/
	void discardInterpolation() {
	while (!groupingStack.isEmpty) {
	BeginToken beginToken = groupingStack.head;
	unmatchedBeginGroup(beginToken);
	groupingStack = groupingStack.tail;
	if (identical(beginToken.kind, STRING_INTERPOLATION_TOKEN)) break;
	}
	}

	void unmatchedBeginGroup(BeginToken begin) {
	// We want to ensure that unmatched BeginTokens are reported as
	// errors. However, the diet parser assumes that groups are well-balanced
	// and will never look at the endGroup token. This is a nice property that
	// allows us to skip quickly over correct code. By inserting an additional
	// synthetic token in the stream, we can keep ignoring endGroup tokens.
	//
	// [begin] --next--> [tail]
	// [begin] --endG--> [synthetic] --next--> [next] --next--> [tail]
	//
	// This allows the diet parser to skip from [begin] via endGroup to
	// [synthetic] and ignore the [synthetic] token (assuming it's correct),
	// then the error will be reported when parsing the [next] token.
	//
	// For example, tokenize("{[1};") produces:
	//
	// SymbolToken({) --endGroup------------------------+
	// \| \|
	// next \|
	// v \|
	// SymbolToken([) --endGroup--+ \|
	// \| \| \|
	// next \| \|
	// v \| \|
	// StringToken(1) \| \|
	// \| \| \|
	// next \| \|
	// v \| \|
	// SymbolToken(])<------------+ <-- Synthetic token \|
	// \| \|
	// next \|
	// v \|
	// UnmatchedToken([) \|
	// \| \|
	// next \|
	// v \|
	// SymbolToken(})<----------------------------------+
	// \|
	// next
	// v
	// SymbolToken(;)
	// \|
	// next
	// v
	// EOF
	TokenType type = closeBraceInfoFor(begin);
	appendToken(new SyntheticToken(type, tokenStart)..beforeSynthetic = tail);
	begin.endGroup = tail;
	appendErrorToken(new UnmatchedToken(begin));
	}
	}