compiler/java/com/google/dart/compiler/parser/DartScanner.java - sdk.git - Git at Google

 // Copyright (c) 2012, the Dart project authors.  Please see the AUTHORS file
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.

 package com.google.dart.compiler.parser;

 import com.google.dart.compiler.DartCompilationError;
 import com.google.dart.compiler.DartCompilerListener;
 import com.google.dart.compiler.Source;
 import com.google.dart.compiler.common.SourceInfo;
 import com.google.dart.compiler.metrics.DartEventType;
 import com.google.dart.compiler.metrics.Tracer;
 import com.google.dart.compiler.metrics.Tracer.TraceEvent;
 import com.google.dart.compiler.parser.DartScanner.InternalState.Mode;

 import java.util.ArrayList;
 import java.util.List;
 import java.util.Stack;

 /**
  * The Dart scanner. Should normally be used only by {@link DartParser}.
  */
 public class DartScanner {

   /**
    * Represents a span of characters in a source file.
    */
   public static class Location {
     public static final Location NONE = null;
     private int begin;
     private int end;

     public Location(int begin, int end) {
       this.begin = begin;
       this.end = end;
     }

     public Location(int begin) {
       this.begin = this.end = begin;
     }

     public int getBegin() {
       return begin;
     }

     public int getEnd() {
       return end;
     }

     @Override
     public String toString() {
       return begin + "::" + end;
     }
   }

   public static class State {
     State(int baseOffset) {
       this.baseOffset = baseOffset;
     }

     static class RollbackToken {
       public final int absoluteOffset;
       final Token replacedToken;

       public RollbackToken(int tokenOffset, Token token) {
         absoluteOffset = tokenOffset;
         replacedToken = token;
       }
     }

     /* Stack of tokens present before setPeek() */
     Stack<RollbackToken> rollbackTokens = null;
     final int baseOffset;

     @Override
     public String toString() {
       return "ofs=" + baseOffset;
     }
   }

   /**
    * Stores the entire state for the scanner.
    */
   protected static class InternalState {
     enum Mode {
       DEFAULT,

       IN_STRING,

       /**
        * Inside a string, scanning a string-interpolation expression.
        * Ex: "${foo}".
        */
       IN_STRING_EMBEDDED_EXPRESSION,

       /**
        * Inside a string, scanning a string-interpolation identifier.
        * <pre>
        * Ex: "$foo bc".
        *        ^
        * </pre>
        */
       IN_STRING_EMBEDDED_EXPRESSION_IDENTIFIER,

       /**
        * Inside a string, just after having scanned a string-interpolation identifier.
        * <pre>
        * Ex: "$foo bc".
        *          ^
        * </pre>
        */
       IN_STRING_EMBEDDED_EXPRESSION_END
     }

     /**
      * Maintains the state of scanning strings, including interpolated
      * expressions/identifiers, nested braces for terminating an interpolated
      * expression, the quote character used to start/end the string, and whether
      * it is a multiline string.
      */
     public static class StringState {
       private int bracesCount;
       private Mode mode;
       private final boolean multiLine;
       private final int quote;

       /**
        * Push a new mode on state stack.  If the new mode is
        * {@link Mode#IN_STRING_EMBEDDED_EXPRESSION}, mark that we have seen an
        * opening brace.
        *
        * @param mode
        * @param quote
        * @param multiLine
        */
       public StringState(Mode mode, int quote, boolean multiLine) {
         this.bracesCount = mode == Mode.IN_STRING_EMBEDDED_EXPRESSION ? 1 : 0;
         this.mode = mode;
         this.quote = quote;
         this.multiLine = multiLine;
       }

       /**
        * Mark that we have seen an opening brace.
        */
       public void openBrace() {
         if (mode == Mode.IN_STRING_EMBEDDED_EXPRESSION) {
           bracesCount++;
         }
       }

       /**
        * Mark that we have seen a closing brace.
        *
        * @return true if the current mode is now complete and should be popped
        * off the stack
        */
       public boolean closeBrace() {
         if (mode == Mode.IN_STRING_EMBEDDED_EXPRESSION) {
           return --bracesCount == 0;
         }
         return false;
       }

       /**
        * @return the string scanning mode.
        */
       public Mode getMode() {
         return mode;
       }

       /**
        * @return the codepoint of the quote character used to bound the current
        * string.
        */
       public int getQuote() {
         return quote;
       }

       /**
        * @return true if the current string is a multi-line string.
        */
       public boolean isMultiLine() {
         return multiLine;
       }

       /**
        * @param mode the string scanning mode.
        */
       public void setMode(Mode mode) {
         this.mode = mode;
       }

       @Override
       public String toString() {
         StringBuilder buf = new StringBuilder();
         buf.append(mode).append("/quote=").appendCodePoint(quote);
         if (multiLine) {
           buf.append("/multiline");
         }
         return buf.toString();
       }
     }

     private int lookahead[] = new int[NUM_LOOKAHEAD];
     private int lookaheadPos[] = new int[NUM_LOOKAHEAD];
     private int nextLookaheadPos;
     private ArrayList<TokenData> tokens;
     private TokenData lastToken;

     // Current offset in the token list
     int currentOffset;

     // The following fields store data used for parsing string interpolation.
     // The scanner splits the interpolated string in segments, alternating
     // strings and expressions so that the parser can construct the embedded
     // expressions as it goes. The following information is used to ensure that
     // the string is closed with matching quotes, and to deal with parsing
     // ambiguity of "}" (which closes both embedded expressions and braces
     // within embedded expressions).

     /** The string scanning state stack. */
     private List<StringState> stringStateStack = new ArrayList<StringState>();

     public InternalState() {
       currentOffset = 0;
     }

     @Override
     public String toString() {
       StringBuilder ret = new StringBuilder();

       ret.append("currentOffset(");
       ret.append(currentOffset);
       ret.append(")");
       if ( currentOffset > -1 ) {
         TokenData tok = tokens.get(currentOffset);
         ret.append(" = [");
         ret.append(tok.token);
         if (tok.value != null) {
           ret.append(" (" + tok.value + ")");
         }
         ret.append("], ");
       }

       ret.append("[");
       for (int i = 0; i < tokens.size(); i++) {
         TokenData tok = tokens.get(i);
         ret.append(tok.token);
         if (tok.value != null) {
           ret.append(" (" + tok.value + ")");
         }
         if (i < tokens.size() - 1) {
           ret.append(", ");
         }
       }
       ret.append("]");
       if (getMode() != InternalState.Mode.DEFAULT) {
         ret.append("(within string starting with ");
         ret.appendCodePoint(getQuote());
         if (isMultiLine()) {
           ret.appendCodePoint(getQuote());
           ret.appendCodePoint(getQuote());
         }
         ret.append(')');
       }
       return ret.toString();
     }

     /**
      * @return the current scanning mode
      */
     protected Mode getMode() {
       return stringStateStack.isEmpty() ? Mode.DEFAULT : getCurrentState().getMode();
     }

     /**
      * Mark that we have seen an open brace.
      */
     protected void openBrace() {
       if (!stringStateStack.isEmpty()) {
         getCurrentState().openBrace();
       }
     }

     /**
      * Mark that we have seen a close brace.
      *
      * @return true if the current mode is now complete and should be popped
      */
     protected boolean closeBrace() {
       if (!stringStateStack.isEmpty()) {
         return getCurrentState().closeBrace();
       }
       return false;
     }

     /**
      * Pop the current mode.
      */
     protected void popMode() {
       if (!stringStateStack.isEmpty()) {
         stringStateStack.remove(stringStateStack.size() - 1);
       }
     }

     /**
      * @param mode the mode to push
      */
     protected void pushMode(Mode mode, int quote, boolean multiLine) {
       stringStateStack.add(new StringState(mode, quote, multiLine));
     }

     /**
      * @param mode the mode to push
      */
     protected void replaceMode(Mode mode) {
       getCurrentState().setMode(mode);
     }

     /**
      * Remove all modes, returning to the default state.
      */
     public void resetModes() {
       stringStateStack.clear();
     }

     /**
      * @return the quote
      */
     private int getQuote() {
       return getCurrentState().getQuote();
     }

     /**
      * @return the current string scanning state
      */
     private StringState getCurrentState() {
       assert !stringStateStack.isEmpty() : "called with empty state stack";
       return stringStateStack.get(stringStateStack.size() - 1);
     }

     /**
      * @return the multiLine
      */
     private boolean isMultiLine() {
       return getCurrentState().isMultiLine();
     }
   }

   private static class TokenData {
     Token token;
     Location location;
     String value;

     @Override
     public String toString() {
       String str = token.toString();
       return (value != null) ? str + "(" + value + ")" : str;
     }
   }

   private static final int NUM_LOOKAHEAD = 2;

   private static boolean isDecimalDigit(int c) {
     return c >= '0' && c <= '9';
   }

   private static boolean isHexDigit(int c) {
     return isDecimalDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
   }

   private static boolean isIdentifierPart(int c) {
     return isIdentifierStart(c) || isDecimalDigit(c);
   }

   private static boolean isIdentifierStart(int c) {
     return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_') || (c == '$');
   }

   private static boolean isLineTerminator(int c) {
     return c == '\r' || c == '\n';
   }

   private static boolean isWhiteSpace(int c) {
     return c == ' ' || c == '\t';
   }

   private int commentCharCount;
   private int lastCommentStart;
   private int lastCommentStop;
   private String source;
   private InternalState internalState;
   private Source sourceReference;
   private DartCompilerListener listener;

   public DartScanner(String source) {
     this(source, 0, null, null);
   }

   public DartScanner(String source, int start) {
     this(source, 0, null, null);
   }

   public DartScanner(String source, int start, Source sourceReference, DartCompilerListener listener) {
     final TraceEvent logEvent = Tracer.canTrace() ? Tracer.start(DartEventType.SCANNER) : null;
     try {
       this.source = source;
       this.sourceReference = sourceReference;
       this.listener = listener;
       internalState = new InternalState();
       internalState.tokens = new ArrayList<TokenData>(source.length()/2);

       // Initialize lookahead positions.
       // TODO Determine if line & column should be relative to 0 or 'start'
       internalState.nextLookaheadPos = start;
       for (int i = 0; i < internalState.lookaheadPos.length; ++i) {
         internalState.lookaheadPos[i] = start;
       }

       // Fill all the characters in the look-ahead and all the peek
       // elements in the tokens buffer.
       for (int i = 0; i < NUM_LOOKAHEAD; i++) {
         advance();
       }

       // Scan all the tokens up front
       scanFile();
     } finally {
       Tracer.end(logEvent);
     }
   }

   /**
    * Returns the number of characters of source code that were scanned.
    */
   public int getCharCount() {
     return internalState.nextLookaheadPos;
   }

   /**
    * Returns the number of characters of source code that were scanned excluding the number of
    * characters consumed by comments.
    */
   public int getNonCommentCharCount() {
     return getCharCount() - commentCharCount;
   }

   /**
    * Get the token value for one of the look-ahead tokens.
    */
   public String getPeekTokenValue(int n) {
     assert (0 <= n && (internalState.currentOffset + n + 1) < internalState.tokens.size());
     return internalState.tokens.get(internalState.currentOffset + n + 1).value;
   }

   /**
    * Gets a copy of the current scanner state. This state can be passed to {@link
    * #restoreState(State)}.
    */
   public State getState() {
 //    System.out.println("get state: " + internalState.currentOffset + " hash: 0x" + Integer.toHexString(this.hashCode()));
     return new State(internalState.currentOffset);
   }

   /**
    * Gets the current offset of the scanner.
    */
   public int getOffset() {
     return internalState.currentOffset;
   }

   /**
    * Gets the current token.
    */
   public Token getToken() {
     return internalState.tokens.get(internalState.currentOffset).token;
   }

   /**
    * Gets the location of the current token.
    */
   public Location getTokenLocation() {
     return internalState.tokens.get(internalState.currentOffset).location;
   }

   public Location peekTokenLocation(int n) {
     if ((internalState.currentOffset + n + 1) < internalState.tokens.size()) {
       return internalState.tokens.get(internalState.currentOffset + n + 1).location;
     } else {
       // It is not valid to read beyond the end of the token stream, so we
       // return the Location of the EOS token.
       return internalState.tokens.get(internalState.tokens.size() - 1).location;
     }

   }

   /**
    * Get the token value or location for the current token previously returned
    * by a call to next().
    */
   public String getTokenValue() {
     return internalState.tokens.get(internalState.currentOffset).value;
   }

   public String peekTokenValue(int n) {
     if ((internalState.currentOffset + n + 1) < internalState.tokens.size()) {
       return internalState.tokens.get(internalState.currentOffset + n + 1).value;
     } else {
       // It is not valid to read beyond the end of the token stream, so we
       // return the null, the default value of an EOS token.
       return null;
     }
   }

   /**
    * Returns the next token.
    */
   public Token next() {
     // Do not advance the current offset beyond the end of the stoken stream
     if (internalState.currentOffset + 1 < internalState.tokens.size()) {
       internalState.currentOffset++;
     }
     return getToken();
   }

   /**
    * Token look-ahead - past the token returned by next().
    */
   public Token peek(int n) {
     if ((internalState.currentOffset + n + 1) < internalState.tokens.size()) {
       return internalState.tokens.get(internalState.currentOffset + n + 1).token;
     } else {
       // It is not valid to read beyond the end of the token stream, so we
       // return the EOS token
       return Token.EOS;
     }
   }

   /**
    * Sets the scanner's state, using a state object returned from {@link #getState()}.
    */
   public void restoreState(State oldState) {
 //    System.out.println("restore state " + oldState.baseOffset + " hash: 0x" + Integer.toHexString(this.hashCode()));
     // reset offset
     internalState.currentOffset = oldState.baseOffset;
   }

   /**
    * Sets the token at the specified slot in the lookahead buffer.
    */
   public void setPeek(int n, Token token) {
     assert (0 <= n && (internalState.currentOffset + n + 1) < internalState.tokens.size());
     internalState.tokens.get(internalState.currentOffset + n + 1).token = token;
   }

   /**
    * Sets the token at the specified slot in the lookahead buffer.
    */
   public void setAbsolutePeek(int n, Token token) {
     assert (0 <= n && n < internalState.tokens.size());
     internalState.tokens.get(n).token = token;
   }

   @Override
   public String toString() {
     if (internalState == null) {
       return super.toString();
     }
     return internalState.toString();
   }

   /**
    * A hook into low-level scanning machinery. Use with care and only as directed.<p>
    * Record the location of a comment. Given a source string <code>source,</code>
    * the actual comment string is <code>source.substring(start - 1, stop)</code>
    * because the comment cannot be recognized until its second character is
    * scanned.<p>
    * Note: A single comment may be scanned multiple times. If the scanner has
    * to backtrack it will re-scan comments until it no longer has to backtrack.
    * Clients are responsible for filtering duplicate comment locations.<p>
    * Warning: This method may be called during initialization of the scanner in
    * the <code>DartScanner</code> constructor. Fields defined in the subclass
    * that implements this method may not have been initialized before the first
    * invocation.
    * @param start the character position of the second character in the comment
    * @param stop the character position of the final character in the comment
    */
   protected void recordCommentLocation(int start, int stop) {
   }

   private void advance() {
     for (int i = 0; i < NUM_LOOKAHEAD - 1; ++i) {
       internalState.lookahead[i] = internalState.lookahead[i + 1];
       internalState.lookaheadPos[i] = internalState.lookaheadPos[i + 1];
     }
     if (internalState.nextLookaheadPos < source.length()) {
       int ch = source.codePointAt(internalState.nextLookaheadPos);
       internalState.lookahead[NUM_LOOKAHEAD - 1] = ch;
       internalState.lookaheadPos[NUM_LOOKAHEAD - 1] = internalState.nextLookaheadPos;
       internalState.nextLookaheadPos = source.offsetByCodePoints(internalState.nextLookaheadPos, 1);
     } else {
       // Let the last look-ahead position be past the source. This makes
       // the position information for the last token correct.
       internalState.lookahead[NUM_LOOKAHEAD - 1] = -1;
       internalState.lookaheadPos[NUM_LOOKAHEAD - 1] = source.length();

       // Leave the nextLookahead position pointing to the line after the last line
       internalState.nextLookaheadPos = source.length();
     }
   }

   /**
    * Called when comments are identified to aggregate the total number of comment lines and comment
    * characters then delegate to {@link #recordCommentLocation(int, int)}.  This provides
    * a light weight way to track how much of the code is made up of comments without having to keep
    * all comments.
    *
    * @param start the character position of the second character in the comment
    * @param stop the character position of the final character in the comment
    */
   private void commentLocation(int start, int stop) {
     if (start <= lastCommentStart && stop <= lastCommentStop) {
       return;
     }

     lastCommentStart = start;
     lastCommentStop = stop;
     commentCharCount += stop - start + 1;

     recordCommentLocation(start, stop);
   }

   private boolean is(int c) {
     return internalState.lookahead[0] == c;
   }

   private boolean isEos() {
     return internalState.lookahead[0] < 0;
   }

   private int lookahead(int n) {
     assert (0 <= n && n < NUM_LOOKAHEAD);
     return internalState.lookahead[n];
   }

   // Get the current source code position.
   private int position() {
     return internalState.lookaheadPos[0];
   }

   private void scanFile() {
     // First node inserted as a dummy.
     internalState.lastToken = new TokenData();
     internalState.tokens.add(internalState.lastToken);

     while (true) {
       internalState.lastToken = new TokenData();
       Token token;
       int begin, end;
       do {
         skipWhiteSpace();
         begin = position();
         token = scanToken();
       } while (token == Token.COMMENT);
       end = position();

       internalState.lastToken.token = token;
       internalState.lastToken.location = new Location(begin, end);
       internalState.tokens.add(internalState.lastToken);
       if (token == Token.EOS) {
 //        System.out.print("tokens: ");
 //        for(TokenData t : internalState.tokens) {
 //          if (t != null) {
 //            if (t.token != null) {
 //              System.out.print(t + ", ");
 //            } else {
 //              System.out.print("Null, ");
 //            }
 //          }
 //        }
 //        System.out.println();
         return;
       }
     }
   }

   private Token scanIdentifier(boolean allowDollars) {
     assert (isIdentifierStart(lookahead(0)));
     int begin = position();
     while (true) {
       int nextChar = lookahead(0);
       if (!isIdentifierPart(nextChar) || (!allowDollars && nextChar == '$')) {
         break;
       }
       advance();
     }
     int size = position() - begin;

     // Use a substring of the source string instead of copying all the
     // characters to the token value buffer.
     String result = source.substring(begin, begin + size);
     internalState.lastToken.value = result;
     return Token.lookup(result);
   }

   private Token scanNumber() {
     boolean isDouble = false;
     assert (isDecimalDigit(lookahead(0)) || is('.'));
     int begin = position();
     while (isDecimalDigit(lookahead(0)))
       advance();
     if (is('.') && isDecimalDigit(lookahead(1))) {
       isDouble = true;
       advance();  // Consume .
       while (isDecimalDigit(lookahead(0)))
         advance();
     }
     if (isE()) {
       isDouble = true;
       advance();
       if (is('+') || is('-')) {
         advance();
       }
       if (!isDecimalDigit(lookahead(0))) {
         return Token.ILLEGAL;
       }
       while (isDecimalDigit(lookahead(0)))
         advance();
     } else if (isIdentifierStart(lookahead(0))) {
       // Number literals must not be followed directly by an identifier.
       return Token.ILLEGAL;
     }
     int size = position() - begin;
     internalState.lastToken.value = source.substring(begin, begin + size);
     return isDouble ? Token.DOUBLE_LITERAL : Token.INTEGER_LITERAL;
   }

   private boolean isE() {
     return is('e') || is('E');
   }

   private Token scanHexNumber() {
     assert (isDecimalDigit(lookahead(0)) && (lookahead(1) == 'x' || lookahead(1) == 'X'));
     // Skip 0x/0X.
     advance();
     advance();

     int begin = position();
     if (!isHexDigit(lookahead(0))) {
       return Token.ILLEGAL;
     }
     advance();
     while (isHexDigit(lookahead(0))) {
       advance();
     }
     if (isIdentifierStart(lookahead(0))) {
       return Token.ILLEGAL;
     }
     internalState.lastToken.value = source.substring(begin, position());
     return Token.HEX_LITERAL;
   }

   private Token scanString(boolean isRaw) {
     int quote = lookahead(0);
     assert (is('\'') || is('"'));
     boolean multiLine = false;
     advance();

     // detect whether this is a multi-line string:
     if (lookahead(0) == quote && lookahead(1) == quote) {
       multiLine = true;
       advance();
       advance();
       // according to the dart guide, when multi-line strings start immediately
       // with a \n, the \n is not part of the string:
       if (is('\n')) {
         advance();
       }
     }
     internalState.pushMode(InternalState.Mode.IN_STRING, quote, multiLine);
     if (isRaw) {
       return scanRawString();
     } else {
       return scanWithinString(true);
     }
   }

   private Token scanRawString() {
     assert (internalState.getMode() == InternalState.Mode.IN_STRING);
     int quote = internalState.getQuote();
     boolean multiLine = internalState.isMultiLine();
     // TODO(floitsch): Do we really need a StringBuffer to accumulate the characters?
     StringBuilder tokenValueBuffer = new StringBuilder();
     while (true) {
       if (isEos()) {
         // Unterminated string (either multi-line or not).
         internalState.popMode();
         return Token.ILLEGAL;
       }
       int c = lookahead(0);
       advance();
       if (c == quote) {
         if (!multiLine) {
           // Done parsing the string literal.
           break;
         } else if (lookahead(0) == quote && lookahead(1) == quote) {
           // Done parsing the multi-line string literal.
           advance();
           advance();
           break;
         }
       } else if (c == '\n' && !multiLine) {
         advance();
         internalState.popMode();
         // unterminated (non multi-line) string
         return Token.ILLEGAL;
       }
       tokenValueBuffer.appendCodePoint(c);
     }
     internalState.lastToken.value = tokenValueBuffer.toString();
     internalState.popMode();
     return Token.STRING;
   }

   /**
    * Scan within a string watching for embedded expressions (string
    * interpolation). This function returns 4 kinds of tokens:
    * <ul>
    *   <li> {@link Token#STRING} when {@code start} is true and no embedded
    *   expressions are found (default to string literals when no interpolation
    *   was used).
    *   <li> {@link Token#STRING_SEGMENT} when the string is interrupted with an
    *   embedded expression.
    *   <li> {@link Token#STRING_EMBED_EXP_START} when an embedded expression is
    *   found right away (the lookahead is "${").
    *   <li> {@link Token#STRING_LAST_SEGMENT} when {@code start} is false and no
    *   more embedded expressions are found.
    * </ul>
    */
   private Token scanWithinString(boolean start) {
     assert (internalState.getMode() == InternalState.Mode.IN_STRING);
     int quote = internalState.getQuote();
     boolean multiLine = internalState.isMultiLine();
     StringBuffer tokenValueBuffer = new StringBuffer();
     while (true) {
       if (isEos()) {
         // Unterminated string (either multi-line or not).
         internalState.resetModes();
         return Token.EOS;
       }
       int c = lookahead(0);
       if (c == quote) {
         advance();
         if (!multiLine) {
           // Done parsing string constant.
           break;
         } else if (lookahead(0) == quote && lookahead(1) == quote) {
           // Done parsing multi-line string constant.
           advance();
           advance();
           break;
         }
       } else if (c == '\n' && !multiLine) {
         advance();
         internalState.popMode();
         // unterminated (non multi-line) string
         return Token.ILLEGAL;
       } else if (c == '\\') {
         advance();
         if (isEos()) {
           // Unterminated string (either multi-line or not).
           internalState.resetModes();
           return Token.EOS;
         }
         c = lookahead(0);
         advance();
         switch (c) {
           case '\n':
             reportError(position() - 1, ParserErrorCode.ESCAPED_NEWLINE);
             c = '\n';
             break;
           case 'b':
             c = 0x08;
             break;
           case 'f':
             c = 0x0C;
             break;
           case 'n':
             c = '\n';
             break;
           case 'r':
             c = '\r';
             break;
           case 't':
             c = '\t';
             break;
           case 'v':
             c = 0x0B;
             break;
           case 'x':
           case 'u':
             // Parse Unicode escape sequences, which are of the form (backslash) xXX, (backslash)
             // uXXXX or (backslash) u{X*} where X is a hexadecimal digit - the delimited form must
             // be between 1 and 6 digits.
             int len = (c == 'u') ? 4 : 2;
             if (isEos()) {
               // Unterminated string (either multi-line or not).
               internalState.resetModes();
               return Token.EOS;
             }
             c = lookahead(0);
             int unicodeCodePoint = 0;
             // count of characters remaining or negative if delimited
             if (c == '{') {
               len = -1;
               advance();
               if (isEos()) {
                 // Unterminated string (either multi-line or not).
                 internalState.resetModes();
                 return Token.EOS;
               }
               c = lookahead(0);
             }
             while (len != 0) {
               advance();
               int digit = Character.getNumericValue(c);
               if (digit < 0 || digit > 15) {
                 // TODO(jat): how to handle an error?  We would prefer to give a better error
                 // message about an invalid Unicode escape sequence
                 return Token.ILLEGAL;
               }
               unicodeCodePoint = unicodeCodePoint * 16 + digit;
               c = lookahead(0);
               if (len-- < 0 && c == '}') {
                 advance();
                 break;
               }
               if (isEos()) {
                 // Unterminated string (either multi-line or not).
                 internalState.resetModes();
                 return Token.EOS;
               }
               if (len < -6) {
                 // TODO(jat): better way to indicate error
                 // too many characters for a delimited character
                 return Token.ILLEGAL;
               }
             }
             c = unicodeCodePoint;
             // Unicode escapes must specify a valid Unicode scalar value, and may not specify
             // UTF16 surrogates.
             if (!Character.isValidCodePoint(c) || (c < 0x10000
                 && (Character.isHighSurrogate((char) c) || Character.isLowSurrogate((char) c)))) {
               // TODO(jat): better way to indicate error
               return Token.ILLEGAL;
             }
             // TODO(jat): any other checks?  We could use Character.isDefined, but then we risk
             // version skew with the JRE's Unicode data.  For now, assume anything in the Unicode
             // range besides surrogates are fine.
             break;

           default:
             // any other character following a backslash is just itself
             // see Dart guide 3.3
             break;
         }
       } else if (c == '$') {
         // TODO(sigmund): add support for named embedded expressions and
         // function embedded expressions for string templates.
         if (tokenValueBuffer.length() == 0) {
           advance();
           int nextChar = lookahead(0);
           if (nextChar == '{') {
             advance();
             internalState.pushMode(InternalState.Mode.IN_STRING_EMBEDDED_EXPRESSION, quote,
                 multiLine);
           } else {
             internalState.pushMode(InternalState.Mode.IN_STRING_EMBEDDED_EXPRESSION_IDENTIFIER,
                 quote, multiLine);
           }
           return Token.STRING_EMBED_EXP_START;
         } else {
           // Encountered the beginning of an embedded expression (string
           // interpolation), return the current segment, and keep the "$" for
           // the next token.
           internalState.lastToken.value = tokenValueBuffer.toString();
           return Token.STRING_SEGMENT;
         }
       } else {
         advance();
       }
       tokenValueBuffer.appendCodePoint(c);
     }

     internalState.lastToken.value = tokenValueBuffer.toString();
     internalState.popMode();
     if (start) {
       return Token.STRING;
     } else {
       return Token.STRING_LAST_SEGMENT;
     }
   }

   private Token scanToken() {
     switch (internalState.getMode()) {
       case IN_STRING:
         return scanWithinString(false);
       case IN_STRING_EMBEDDED_EXPRESSION_IDENTIFIER:
         // We are inside a string looking for an identifier. Ex: "$foo".
         internalState.replaceMode(InternalState.Mode.IN_STRING_EMBEDDED_EXPRESSION_END);
         int c = lookahead(0);
         if (isIdentifierStart(c) && c != '$') {
           boolean allowDollars = false;
           return scanIdentifier(allowDollars);
         } else {
           internalState.popMode();
           if (!isEos()) {
             internalState.lastToken.value = String.valueOf(c);
           }
           return Token.ILLEGAL;
         }
       case IN_STRING_EMBEDDED_EXPRESSION_END:
         // We scanned the identifier of a string-interpolation. New we return the
         // end-of-embedded-expression token.
         internalState.popMode();
         return Token.STRING_EMBED_EXP_END;
       default:
         // fall through
     }

     switch (lookahead(0)) {
       case '"':
       case '\'': {
         boolean isRaw = false;
         return scanString(isRaw);
       }

       case '<':
         // < <= << <<=
         advance();
         if (is('='))
           return select(Token.LTE);
         if (is('<'))
           return select('=', Token.ASSIGN_SHL, Token.SHL);
         return Token.LT;

       case '>':
         // > >= >> >>=
         advance();
         if (is('='))
           return select(Token.GTE);
         if (is('>')) {
           // >> >>=
           advance();
           if (is('='))
             return select(Token.ASSIGN_SAR);
           return Token.SAR;
         }
         return Token.GT;

       case '=':
         // = == === =>
         advance();
         if (is('>')) {
           return select(Token.ARROW);
         }
         if (is('='))
           return select('=', Token.EQ_STRICT, Token.EQ);
         return Token.ASSIGN;

       case '!':
         // ! != !==
         advance();
         if (is('='))
           return select('=', Token.NE_STRICT, Token.NE);
         return Token.NOT;

       case '+':
         // + ++ +=
         advance();
         if (is('+'))
           return select(Token.INC);
         if (is('='))
           return select(Token.ASSIGN_ADD);
         return Token.ADD;

       case '-':
         // - -- -=
         advance();
         if (is('-'))
           return select(Token.DEC);
         if (is('='))
           return select(Token.ASSIGN_SUB);
         return Token.SUB;

       case '*':
         // * *=
         return select('=', Token.ASSIGN_MUL, Token.MUL);

       case '%':
         // % %=
         return select('=', Token.ASSIGN_MOD, Token.MOD);

       case '/':
         // / // /* /=
         advance();
         if (is('/'))
           return skipSingleLineComment();
         if (is('*'))
           return skipMultiLineComment();
         if (is('='))
           return select(Token.ASSIGN_DIV);
         return Token.DIV;

       case '&':
         // & && &=
         advance();
         if (is('&'))
           return select(Token.AND);
         if (is('='))
           return select(Token.ASSIGN_BIT_AND);
         return Token.BIT_AND;

       case '|':
         // | || |=
         advance();
         if (is('|'))
           return select(Token.OR);
         if (is('='))
           return select(Token.ASSIGN_BIT_OR);
         return Token.BIT_OR;

       case '^':
         // ^ ^=
         return select('=', Token.ASSIGN_BIT_XOR, Token.BIT_XOR);

       case '.':
         // . <number>
         if (isDecimalDigit(lookahead(1))) {
           return scanNumber();
         } else {
           advance();
           if (lookahead(0) == '.') {
             if (lookahead(1) == '.') {
               advance();
               advance();
               return Token.ELLIPSIS;
             }
             advance();
             return Token.CASCADE;
           }
           return Token.PERIOD;
         }

       case ':':
         return select(Token.COLON);

       case ';':
         return select(Token.SEMICOLON);

       case ',':
         return select(Token.COMMA);

       case '(':
         return select(Token.LPAREN);

       case ')':
         return select(Token.RPAREN);

       case '[':
         advance();
         if (is(']')) {
           return select('=', Token.ASSIGN_INDEX, Token.INDEX);
         }
         return Token.LBRACK;

       case ']':
         return select(Token.RBRACK);

       case '{':
         internalState.openBrace();
         return select(Token.LBRACE);

       case '}':
         if (internalState.closeBrace()) {
           internalState.popMode();
           return select(Token.STRING_EMBED_EXP_END);
         }
         return select(Token.RBRACE);

       case '?':
         return select(Token.CONDITIONAL);

       case '~':
         // ~ ~/ ~/=
         advance();
         if (is('/')) {
           if (lookahead(1) == '=') {
             advance();
             return select(Token.ASSIGN_TRUNC);
           } else {
             return select(Token.TRUNC);
           }
         } else {
           return Token.BIT_NOT;
         }

       case '@':
         // Raw strings.
         advance();
         if (is('\'') || is('"')) {
           reportError(position() - 1, ParserErrorCode.DEPRECATED_RAW_STRING);
           Token token = scanString(true);
           return token;
         } else {
           return Token.AT;
         }

       case '#':
         return scanDirective();

       case 'r':
         if (lookahead(1) == '\'' || lookahead(1) == '"') {
           advance();
           return scanString(true);
         }
         return scanIdentifier(true);

       default:
         if (isIdentifierStart(lookahead(0))) {
           boolean allowDollars = true;
           return scanIdentifier(allowDollars);
         }
         if (isDecimalDigit(lookahead(0))) {
           if (lookahead(0) == '0' && (lookahead(1) == 'x' || lookahead(1) == 'X')) {
             return scanHexNumber();
           } else {
             return scanNumber();
           }
         }
         if (isEos())
           return Token.EOS;
         return select(Token.ILLEGAL);
     }
   }

   private void reportError(int offset, ParserErrorCode errorCode) {
     if (listener != null) {
       listener.onError(new DartCompilationError(
         new SourceInfo(sourceReference, offset, position() - offset),
         errorCode));
     }
   }

   /**
    * Scan for #library, #import, #source, and #resource directives
    */
   private Token scanDirective() {
     assert (is('#'));
     int currPos = position();
     int start = currPos;

     // Skip over the #! if it exists and consider it a comment
     if (start == 0) {
       if (lookahead(1) == '!') {
         while (!isEos() && !isLineTerminator(lookahead(0)))
           advance();
         int stop = internalState.lookaheadPos[0];
         commentLocation(start, stop);
         return Token.COMMENT;
       }
     }

     // Directives must start at the beginning of a line
     if (start > 0 && !isLineTerminator(source.codePointBefore(start)))
       return select(Token.ILLEGAL);

     // Determine which directive is being specified
     advance();
     while (true) {
       int ch = lookahead(0);
       if (ch < 'a' || ch > 'z') {
         break;
       }
       advance();
     }
     String syntax = source.substring(start, position());
     Token token = Token.lookup(syntax);
     return token == Token.IDENTIFIER ? Token.ILLEGAL : token;
   }

   private Token select(int next, Token yes, Token no) {
     advance();
     if (lookahead(0) != next)
       return no;
     advance();
     return yes;
   }

   private Token select(Token token) {
     advance();
     return token;
   }

   private Token skipMultiLineComment() {
     assert (is('*'));
     int currPos = internalState.lookaheadPos[0];
     int start = currPos - 1;
     int commentDepth = 1;
     advance();
     while (!isEos()) {
       int first = lookahead(0);
       advance();
       if (first == '*' && is('/')) {
         if(--commentDepth == 0) {
           Token result = select(Token.COMMENT);
           int stop = internalState.lookaheadPos[0];
           commentLocation(start, stop);
           return result;
         }
         advance();
       } else if (first == '/' && is('*')) {
         commentDepth++;
         advance();
       }
     }
     int stop = internalState.lookaheadPos[0];
     commentLocation(start, stop);
     // Unterminated multi-line comment.
     return Token.ILLEGAL;
   }

   private Token skipSingleLineComment() {
     assert (is('/'));
     int currPos = internalState.lookaheadPos[0];
     int start = currPos - 1;
     advance();
     while (!isEos() && !isLineTerminator(lookahead(0)))
       advance();
     int stop = internalState.lookaheadPos[0];
     commentLocation(start, stop);
     return Token.COMMENT;
   }

   private void skipWhiteSpace() {
     Mode mode = internalState.getMode();
     if ((mode != InternalState.Mode.DEFAULT)
         && (mode != InternalState.Mode.IN_STRING_EMBEDDED_EXPRESSION)) {
       return;
     }
     while (true) {
       int c = lookahead(0);
       if (isLineTerminator(c)) {
       } else if (!isWhiteSpace(c)) {
         break;
       }
       advance();
     }
   }
 }