blob: a019d93eb19113686d42f2d0ba7f7e5c16b6e83e [file] [log] [blame]
// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
package com.google.dart.compiler.parser;
import com.google.dart.compiler.DartCompilationError;
import com.google.dart.compiler.DartCompilerListener;
import com.google.dart.compiler.Source;
import com.google.dart.compiler.common.SourceInfo;
import com.google.dart.compiler.metrics.DartEventType;
import com.google.dart.compiler.metrics.Tracer;
import com.google.dart.compiler.metrics.Tracer.TraceEvent;
import com.google.dart.compiler.parser.DartScanner.InternalState.Mode;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
/**
* The Dart scanner. Should normally be used only by {@link DartParser}.
*/
public class DartScanner {
/**
* Represents a span of characters in a source file.
*/
public static class Location {
public static final Location NONE = null;
private int begin;
private int end;
public Location(int begin, int end) {
this.begin = begin;
this.end = end;
}
public Location(int begin) {
this.begin = this.end = begin;
}
public int getBegin() {
return begin;
}
public int getEnd() {
return end;
}
@Override
public String toString() {
return begin + "::" + end;
}
}
public static class State {
State(int baseOffset) {
this.baseOffset = baseOffset;
}
static class RollbackToken {
public final int absoluteOffset;
final Token replacedToken;
public RollbackToken(int tokenOffset, Token token) {
absoluteOffset = tokenOffset;
replacedToken = token;
}
}
/* Stack of tokens present before setPeek() */
Stack<RollbackToken> rollbackTokens = null;
final int baseOffset;
@Override
public String toString() {
return "ofs=" + baseOffset;
}
}
/**
* Stores the entire state for the scanner.
*/
protected static class InternalState {
enum Mode {
DEFAULT,
IN_STRING,
/**
* Inside a string, scanning a string-interpolation expression.
* Ex: "${foo}".
*/
IN_STRING_EMBEDDED_EXPRESSION,
/**
* Inside a string, scanning a string-interpolation identifier.
* <pre>
* Ex: "$foo bc".
* ^
* </pre>
*/
IN_STRING_EMBEDDED_EXPRESSION_IDENTIFIER,
/**
* Inside a string, just after having scanned a string-interpolation identifier.
* <pre>
* Ex: "$foo bc".
* ^
* </pre>
*/
IN_STRING_EMBEDDED_EXPRESSION_END
}
/**
* Maintains the state of scanning strings, including interpolated
* expressions/identifiers, nested braces for terminating an interpolated
* expression, the quote character used to start/end the string, and whether
* it is a multiline string.
*/
public static class StringState {
private int bracesCount;
private Mode mode;
private final boolean multiLine;
private final int quote;
/**
* Push a new mode on state stack. If the new mode is
* {@link Mode#IN_STRING_EMBEDDED_EXPRESSION}, mark that we have seen an
* opening brace.
*
* @param mode
* @param quote
* @param multiLine
*/
public StringState(Mode mode, int quote, boolean multiLine) {
this.bracesCount = mode == Mode.IN_STRING_EMBEDDED_EXPRESSION ? 1 : 0;
this.mode = mode;
this.quote = quote;
this.multiLine = multiLine;
}
/**
* Mark that we have seen an opening brace.
*/
public void openBrace() {
if (mode == Mode.IN_STRING_EMBEDDED_EXPRESSION) {
bracesCount++;
}
}
/**
* Mark that we have seen a closing brace.
*
* @return true if the current mode is now complete and should be popped
* off the stack
*/
public boolean closeBrace() {
if (mode == Mode.IN_STRING_EMBEDDED_EXPRESSION) {
return --bracesCount == 0;
}
return false;
}
/**
* @return the string scanning mode.
*/
public Mode getMode() {
return mode;
}
/**
* @return the codepoint of the quote character used to bound the current
* string.
*/
public int getQuote() {
return quote;
}
/**
* @return true if the current string is a multi-line string.
*/
public boolean isMultiLine() {
return multiLine;
}
/**
* @param mode the string scanning mode.
*/
public void setMode(Mode mode) {
this.mode = mode;
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder();
buf.append(mode).append("/quote=").appendCodePoint(quote);
if (multiLine) {
buf.append("/multiline");
}
return buf.toString();
}
}
private int lookahead[] = new int[NUM_LOOKAHEAD];
private int lookaheadPos[] = new int[NUM_LOOKAHEAD];
private int nextLookaheadPos;
private ArrayList<TokenData> tokens;
private TokenData lastToken;
// Current offset in the token list
int currentOffset;
// The following fields store data used for parsing string interpolation.
// The scanner splits the interpolated string in segments, alternating
// strings and expressions so that the parser can construct the embedded
// expressions as it goes. The following information is used to ensure that
// the string is closed with matching quotes, and to deal with parsing
// ambiguity of "}" (which closes both embedded expressions and braces
// within embedded expressions).
/** The string scanning state stack. */
private List<StringState> stringStateStack = new ArrayList<StringState>();
public InternalState() {
currentOffset = 0;
}
@Override
public String toString() {
StringBuilder ret = new StringBuilder();
ret.append("currentOffset(");
ret.append(currentOffset);
ret.append(")");
if ( currentOffset > -1 ) {
TokenData tok = tokens.get(currentOffset);
ret.append(" = [");
ret.append(tok.token);
if (tok.value != null) {
ret.append(" (" + tok.value + ")");
}
ret.append("], ");
}
ret.append("[");
for (int i = 0; i < tokens.size(); i++) {
TokenData tok = tokens.get(i);
ret.append(tok.token);
if (tok.value != null) {
ret.append(" (" + tok.value + ")");
}
if (i < tokens.size() - 1) {
ret.append(", ");
}
}
ret.append("]");
if (getMode() != InternalState.Mode.DEFAULT) {
ret.append("(within string starting with ");
ret.appendCodePoint(getQuote());
if (isMultiLine()) {
ret.appendCodePoint(getQuote());
ret.appendCodePoint(getQuote());
}
ret.append(')');
}
return ret.toString();
}
/**
* @return the current scanning mode
*/
protected Mode getMode() {
return stringStateStack.isEmpty() ? Mode.DEFAULT : getCurrentState().getMode();
}
/**
* Mark that we have seen an open brace.
*/
protected void openBrace() {
if (!stringStateStack.isEmpty()) {
getCurrentState().openBrace();
}
}
/**
* Mark that we have seen a close brace.
*
* @return true if the current mode is now complete and should be popped
*/
protected boolean closeBrace() {
if (!stringStateStack.isEmpty()) {
return getCurrentState().closeBrace();
}
return false;
}
/**
* Pop the current mode.
*/
protected void popMode() {
if (!stringStateStack.isEmpty()) {
stringStateStack.remove(stringStateStack.size() - 1);
}
}
/**
* @param mode the mode to push
*/
protected void pushMode(Mode mode, int quote, boolean multiLine) {
stringStateStack.add(new StringState(mode, quote, multiLine));
}
/**
* @param mode the mode to push
*/
protected void replaceMode(Mode mode) {
getCurrentState().setMode(mode);
}
/**
* Remove all modes, returning to the default state.
*/
public void resetModes() {
stringStateStack.clear();
}
/**
* @return the quote
*/
private int getQuote() {
return getCurrentState().getQuote();
}
/**
* @return the current string scanning state
*/
private StringState getCurrentState() {
assert !stringStateStack.isEmpty() : "called with empty state stack";
return stringStateStack.get(stringStateStack.size() - 1);
}
/**
* @return the multiLine
*/
private boolean isMultiLine() {
return getCurrentState().isMultiLine();
}
}
private static class TokenData {
Token token;
Location location;
String value;
@Override
public String toString() {
String str = token.toString();
return (value != null) ? str + "(" + value + ")" : str;
}
}
private static final int NUM_LOOKAHEAD = 2;
private static boolean isDecimalDigit(int c) {
return c >= '0' && c <= '9';
}
private static boolean isHexDigit(int c) {
return isDecimalDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
}
private static boolean isIdentifierPart(int c) {
return isIdentifierStart(c) || isDecimalDigit(c);
}
private static boolean isIdentifierStart(int c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '_') || (c == '$');
}
private static boolean isLineTerminator(int c) {
return c == '\r' || c == '\n';
}
private static boolean isWhiteSpace(int c) {
return c == ' ' || c == '\t';
}
private int commentCharCount;
private int lastCommentStart;
private int lastCommentStop;
private String source;
private InternalState internalState;
private Source sourceReference;
private DartCompilerListener listener;
public DartScanner(String source) {
this(source, 0, null, null);
}
public DartScanner(String source, int start) {
this(source, 0, null, null);
}
public DartScanner(String source, int start, Source sourceReference, DartCompilerListener listener) {
final TraceEvent logEvent = Tracer.canTrace() ? Tracer.start(DartEventType.SCANNER) : null;
try {
this.source = source;
this.sourceReference = sourceReference;
this.listener = listener;
internalState = new InternalState();
internalState.tokens = new ArrayList<TokenData>(source.length()/2);
// Initialize lookahead positions.
// TODO Determine if line & column should be relative to 0 or 'start'
internalState.nextLookaheadPos = start;
for (int i = 0; i < internalState.lookaheadPos.length; ++i) {
internalState.lookaheadPos[i] = start;
}
// Fill all the characters in the look-ahead and all the peek
// elements in the tokens buffer.
for (int i = 0; i < NUM_LOOKAHEAD; i++) {
advance();
}
// Scan all the tokens up front
scanFile();
} finally {
Tracer.end(logEvent);
}
}
/**
* Returns the number of characters of source code that were scanned.
*/
public int getCharCount() {
return internalState.nextLookaheadPos;
}
/**
* Returns the number of characters of source code that were scanned excluding the number of
* characters consumed by comments.
*/
public int getNonCommentCharCount() {
return getCharCount() - commentCharCount;
}
/**
* Get the token value for one of the look-ahead tokens.
*/
public String getPeekTokenValue(int n) {
assert (0 <= n && (internalState.currentOffset + n + 1) < internalState.tokens.size());
return internalState.tokens.get(internalState.currentOffset + n + 1).value;
}
/**
* Gets a copy of the current scanner state. This state can be passed to {@link
* #restoreState(State)}.
*/
public State getState() {
// System.out.println("get state: " + internalState.currentOffset + " hash: 0x" + Integer.toHexString(this.hashCode()));
return new State(internalState.currentOffset);
}
/**
* Gets the current offset of the scanner.
*/
public int getOffset() {
return internalState.currentOffset;
}
/**
* Gets the current token.
*/
public Token getToken() {
return internalState.tokens.get(internalState.currentOffset).token;
}
/**
* Gets the location of the current token.
*/
public Location getTokenLocation() {
return internalState.tokens.get(internalState.currentOffset).location;
}
public Location peekTokenLocation(int n) {
if ((internalState.currentOffset + n + 1) < internalState.tokens.size()) {
return internalState.tokens.get(internalState.currentOffset + n + 1).location;
} else {
// It is not valid to read beyond the end of the token stream, so we
// return the Location of the EOS token.
return internalState.tokens.get(internalState.tokens.size() - 1).location;
}
}
/**
* Get the token value or location for the current token previously returned
* by a call to next().
*/
public String getTokenValue() {
return internalState.tokens.get(internalState.currentOffset).value;
}
public String peekTokenValue(int n) {
if ((internalState.currentOffset + n + 1) < internalState.tokens.size()) {
return internalState.tokens.get(internalState.currentOffset + n + 1).value;
} else {
// It is not valid to read beyond the end of the token stream, so we
// return the null, the default value of an EOS token.
return null;
}
}
/**
* Returns the next token.
*/
public Token next() {
// Do not advance the current offset beyond the end of the stoken stream
if (internalState.currentOffset + 1 < internalState.tokens.size()) {
internalState.currentOffset++;
}
return getToken();
}
/**
* Token look-ahead - past the token returned by next().
*/
public Token peek(int n) {
if ((internalState.currentOffset + n + 1) < internalState.tokens.size()) {
return internalState.tokens.get(internalState.currentOffset + n + 1).token;
} else {
// It is not valid to read beyond the end of the token stream, so we
// return the EOS token
return Token.EOS;
}
}
/**
* Sets the scanner's state, using a state object returned from {@link #getState()}.
*/
public void restoreState(State oldState) {
// System.out.println("restore state " + oldState.baseOffset + " hash: 0x" + Integer.toHexString(this.hashCode()));
// reset offset
internalState.currentOffset = oldState.baseOffset;
}
/**
* Sets the token at the specified slot in the lookahead buffer.
*/
public void setPeek(int n, Token token) {
assert (0 <= n && (internalState.currentOffset + n + 1) < internalState.tokens.size());
internalState.tokens.get(internalState.currentOffset + n + 1).token = token;
}
/**
* Sets the token at the specified slot in the lookahead buffer.
*/
public void setAbsolutePeek(int n, Token token) {
assert (0 <= n && n < internalState.tokens.size());
internalState.tokens.get(n).token = token;
}
@Override
public String toString() {
if (internalState == null) {
return super.toString();
}
return internalState.toString();
}
/**
* A hook into low-level scanning machinery. Use with care and only as directed.<p>
* Record the location of a comment. Given a source string <code>source,</code>
* the actual comment string is <code>source.substring(start - 1, stop)</code>
* because the comment cannot be recognized until its second character is
* scanned.<p>
* Note: A single comment may be scanned multiple times. If the scanner has
* to backtrack it will re-scan comments until it no longer has to backtrack.
* Clients are responsible for filtering duplicate comment locations.<p>
* Warning: This method may be called during initialization of the scanner in
* the <code>DartScanner</code> constructor. Fields defined in the subclass
* that implements this method may not have been initialized before the first
* invocation.
* @param start the character position of the second character in the comment
* @param stop the character position of the final character in the comment
*/
protected void recordCommentLocation(int start, int stop) {
}
private void advance() {
for (int i = 0; i < NUM_LOOKAHEAD - 1; ++i) {
internalState.lookahead[i] = internalState.lookahead[i + 1];
internalState.lookaheadPos[i] = internalState.lookaheadPos[i + 1];
}
if (internalState.nextLookaheadPos < source.length()) {
int ch = source.codePointAt(internalState.nextLookaheadPos);
internalState.lookahead[NUM_LOOKAHEAD - 1] = ch;
internalState.lookaheadPos[NUM_LOOKAHEAD - 1] = internalState.nextLookaheadPos;
internalState.nextLookaheadPos = source.offsetByCodePoints(internalState.nextLookaheadPos, 1);
} else {
// Let the last look-ahead position be past the source. This makes
// the position information for the last token correct.
internalState.lookahead[NUM_LOOKAHEAD - 1] = -1;
internalState.lookaheadPos[NUM_LOOKAHEAD - 1] = source.length();
// Leave the nextLookahead position pointing to the line after the last line
internalState.nextLookaheadPos = source.length();
}
}
/**
* Called when comments are identified to aggregate the total number of comment lines and comment
* characters then delegate to {@link #recordCommentLocation(int, int)}. This provides
* a light weight way to track how much of the code is made up of comments without having to keep
* all comments.
*
* @param start the character position of the second character in the comment
* @param stop the character position of the final character in the comment
*/
private void commentLocation(int start, int stop) {
if (start <= lastCommentStart && stop <= lastCommentStop) {
return;
}
lastCommentStart = start;
lastCommentStop = stop;
commentCharCount += stop - start + 1;
recordCommentLocation(start, stop);
}
private boolean is(int c) {
return internalState.lookahead[0] == c;
}
private boolean isEos() {
return internalState.lookahead[0] < 0;
}
private int lookahead(int n) {
assert (0 <= n && n < NUM_LOOKAHEAD);
return internalState.lookahead[n];
}
// Get the current source code position.
private int position() {
return internalState.lookaheadPos[0];
}
private void scanFile() {
// First node inserted as a dummy.
internalState.lastToken = new TokenData();
internalState.tokens.add(internalState.lastToken);
while (true) {
internalState.lastToken = new TokenData();
Token token;
int begin, end;
do {
skipWhiteSpace();
begin = position();
token = scanToken();
} while (token == Token.COMMENT);
end = position();
internalState.lastToken.token = token;
internalState.lastToken.location = new Location(begin, end);
internalState.tokens.add(internalState.lastToken);
if (token == Token.EOS) {
// System.out.print("tokens: ");
// for(TokenData t : internalState.tokens) {
// if (t != null) {
// if (t.token != null) {
// System.out.print(t + ", ");
// } else {
// System.out.print("Null, ");
// }
// }
// }
// System.out.println();
return;
}
}
}
private Token scanIdentifier(boolean allowDollars) {
assert (isIdentifierStart(lookahead(0)));
int begin = position();
while (true) {
int nextChar = lookahead(0);
if (!isIdentifierPart(nextChar) || (!allowDollars && nextChar == '$')) {
break;
}
advance();
}
int size = position() - begin;
// Use a substring of the source string instead of copying all the
// characters to the token value buffer.
String result = source.substring(begin, begin + size);
internalState.lastToken.value = result;
return Token.lookup(result);
}
private Token scanNumber() {
boolean isDouble = false;
assert (isDecimalDigit(lookahead(0)) || is('.'));
int begin = position();
while (isDecimalDigit(lookahead(0)))
advance();
if (is('.') && isDecimalDigit(lookahead(1))) {
isDouble = true;
advance(); // Consume .
while (isDecimalDigit(lookahead(0)))
advance();
}
if (isE()) {
isDouble = true;
advance();
if (is('+') || is('-')) {
advance();
}
if (!isDecimalDigit(lookahead(0))) {
return Token.ILLEGAL;
}
while (isDecimalDigit(lookahead(0)))
advance();
} else if (isIdentifierStart(lookahead(0))) {
// Number literals must not be followed directly by an identifier.
return Token.ILLEGAL;
}
int size = position() - begin;
internalState.lastToken.value = source.substring(begin, begin + size);
return isDouble ? Token.DOUBLE_LITERAL : Token.INTEGER_LITERAL;
}
private boolean isE() {
return is('e') || is('E');
}
private Token scanHexNumber() {
assert (isDecimalDigit(lookahead(0)) && (lookahead(1) == 'x' || lookahead(1) == 'X'));
// Skip 0x/0X.
advance();
advance();
int begin = position();
if (!isHexDigit(lookahead(0))) {
return Token.ILLEGAL;
}
advance();
while (isHexDigit(lookahead(0))) {
advance();
}
if (isIdentifierStart(lookahead(0))) {
return Token.ILLEGAL;
}
internalState.lastToken.value = source.substring(begin, position());
return Token.HEX_LITERAL;
}
private Token scanString(boolean isRaw) {
int quote = lookahead(0);
assert (is('\'') || is('"'));
boolean multiLine = false;
advance();
// detect whether this is a multi-line string:
if (lookahead(0) == quote && lookahead(1) == quote) {
multiLine = true;
advance();
advance();
// according to the dart guide, when multi-line strings start immediately
// with a \n, the \n is not part of the string:
if (is('\n')) {
advance();
}
}
internalState.pushMode(InternalState.Mode.IN_STRING, quote, multiLine);
if (isRaw) {
return scanRawString();
} else {
return scanWithinString(true);
}
}
private Token scanRawString() {
assert (internalState.getMode() == InternalState.Mode.IN_STRING);
int quote = internalState.getQuote();
boolean multiLine = internalState.isMultiLine();
// TODO(floitsch): Do we really need a StringBuffer to accumulate the characters?
StringBuilder tokenValueBuffer = new StringBuilder();
while (true) {
if (isEos()) {
// Unterminated string (either multi-line or not).
internalState.popMode();
return Token.ILLEGAL;
}
int c = lookahead(0);
advance();
if (c == quote) {
if (!multiLine) {
// Done parsing the string literal.
break;
} else if (lookahead(0) == quote && lookahead(1) == quote) {
// Done parsing the multi-line string literal.
advance();
advance();
break;
}
} else if (c == '\n' && !multiLine) {
advance();
internalState.popMode();
// unterminated (non multi-line) string
return Token.ILLEGAL;
}
tokenValueBuffer.appendCodePoint(c);
}
internalState.lastToken.value = tokenValueBuffer.toString();
internalState.popMode();
return Token.STRING;
}
/**
* Scan within a string watching for embedded expressions (string
* interpolation). This function returns 4 kinds of tokens:
* <ul>
* <li> {@link Token#STRING} when {@code start} is true and no embedded
* expressions are found (default to string literals when no interpolation
* was used).
* <li> {@link Token#STRING_SEGMENT} when the string is interrupted with an
* embedded expression.
* <li> {@link Token#STRING_EMBED_EXP_START} when an embedded expression is
* found right away (the lookahead is "${").
* <li> {@link Token#STRING_LAST_SEGMENT} when {@code start} is false and no
* more embedded expressions are found.
* </ul>
*/
private Token scanWithinString(boolean start) {
assert (internalState.getMode() == InternalState.Mode.IN_STRING);
int quote = internalState.getQuote();
boolean multiLine = internalState.isMultiLine();
StringBuffer tokenValueBuffer = new StringBuffer();
while (true) {
if (isEos()) {
// Unterminated string (either multi-line or not).
internalState.resetModes();
return Token.EOS;
}
int c = lookahead(0);
if (c == quote) {
advance();
if (!multiLine) {
// Done parsing string constant.
break;
} else if (lookahead(0) == quote && lookahead(1) == quote) {
// Done parsing multi-line string constant.
advance();
advance();
break;
}
} else if (c == '\n' && !multiLine) {
advance();
internalState.popMode();
// unterminated (non multi-line) string
return Token.ILLEGAL;
} else if (c == '\\') {
advance();
if (isEos()) {
// Unterminated string (either multi-line or not).
internalState.resetModes();
return Token.EOS;
}
c = lookahead(0);
advance();
switch (c) {
case '\n':
reportError(position() - 1, ParserErrorCode.ESCAPED_NEWLINE);
c = '\n';
break;
case 'b':
c = 0x08;
break;
case 'f':
c = 0x0C;
break;
case 'n':
c = '\n';
break;
case 'r':
c = '\r';
break;
case 't':
c = '\t';
break;
case 'v':
c = 0x0B;
break;
case 'x':
case 'u':
// Parse Unicode escape sequences, which are of the form (backslash) xXX, (backslash)
// uXXXX or (backslash) u{X*} where X is a hexadecimal digit - the delimited form must
// be between 1 and 6 digits.
int len = (c == 'u') ? 4 : 2;
if (isEos()) {
// Unterminated string (either multi-line or not).
internalState.resetModes();
return Token.EOS;
}
c = lookahead(0);
int unicodeCodePoint = 0;
// count of characters remaining or negative if delimited
if (c == '{') {
len = -1;
advance();
if (isEos()) {
// Unterminated string (either multi-line or not).
internalState.resetModes();
return Token.EOS;
}
c = lookahead(0);
}
while (len != 0) {
advance();
int digit = Character.getNumericValue(c);
if (digit < 0 || digit > 15) {
// TODO(jat): how to handle an error? We would prefer to give a better error
// message about an invalid Unicode escape sequence
return Token.ILLEGAL;
}
unicodeCodePoint = unicodeCodePoint * 16 + digit;
c = lookahead(0);
if (len-- < 0 && c == '}') {
advance();
break;
}
if (isEos()) {
// Unterminated string (either multi-line or not).
internalState.resetModes();
return Token.EOS;
}
if (len < -6) {
// TODO(jat): better way to indicate error
// too many characters for a delimited character
return Token.ILLEGAL;
}
}
c = unicodeCodePoint;
// Unicode escapes must specify a valid Unicode scalar value, and may not specify
// UTF16 surrogates.
if (!Character.isValidCodePoint(c) || (c < 0x10000
&& (Character.isHighSurrogate((char) c) || Character.isLowSurrogate((char) c)))) {
// TODO(jat): better way to indicate error
return Token.ILLEGAL;
}
// TODO(jat): any other checks? We could use Character.isDefined, but then we risk
// version skew with the JRE's Unicode data. For now, assume anything in the Unicode
// range besides surrogates are fine.
break;
default:
// any other character following a backslash is just itself
// see Dart guide 3.3
break;
}
} else if (c == '$') {
// TODO(sigmund): add support for named embedded expressions and
// function embedded expressions for string templates.
if (tokenValueBuffer.length() == 0) {
advance();
int nextChar = lookahead(0);
if (nextChar == '{') {
advance();
internalState.pushMode(InternalState.Mode.IN_STRING_EMBEDDED_EXPRESSION, quote,
multiLine);
} else {
internalState.pushMode(InternalState.Mode.IN_STRING_EMBEDDED_EXPRESSION_IDENTIFIER,
quote, multiLine);
}
return Token.STRING_EMBED_EXP_START;
} else {
// Encountered the beginning of an embedded expression (string
// interpolation), return the current segment, and keep the "$" for
// the next token.
internalState.lastToken.value = tokenValueBuffer.toString();
return Token.STRING_SEGMENT;
}
} else {
advance();
}
tokenValueBuffer.appendCodePoint(c);
}
internalState.lastToken.value = tokenValueBuffer.toString();
internalState.popMode();
if (start) {
return Token.STRING;
} else {
return Token.STRING_LAST_SEGMENT;
}
}
private Token scanToken() {
switch (internalState.getMode()) {
case IN_STRING:
return scanWithinString(false);
case IN_STRING_EMBEDDED_EXPRESSION_IDENTIFIER:
// We are inside a string looking for an identifier. Ex: "$foo".
internalState.replaceMode(InternalState.Mode.IN_STRING_EMBEDDED_EXPRESSION_END);
int c = lookahead(0);
if (isIdentifierStart(c) && c != '$') {
boolean allowDollars = false;
return scanIdentifier(allowDollars);
} else {
internalState.popMode();
if (!isEos()) {
internalState.lastToken.value = String.valueOf(c);
}
return Token.ILLEGAL;
}
case IN_STRING_EMBEDDED_EXPRESSION_END:
// We scanned the identifier of a string-interpolation. New we return the
// end-of-embedded-expression token.
internalState.popMode();
return Token.STRING_EMBED_EXP_END;
default:
// fall through
}
switch (lookahead(0)) {
case '"':
case '\'': {
boolean isRaw = false;
return scanString(isRaw);
}
case '<':
// < <= << <<=
advance();
if (is('='))
return select(Token.LTE);
if (is('<'))
return select('=', Token.ASSIGN_SHL, Token.SHL);
return Token.LT;
case '>':
// > >= >> >>=
advance();
if (is('='))
return select(Token.GTE);
if (is('>')) {
// >> >>=
advance();
if (is('='))
return select(Token.ASSIGN_SAR);
return Token.SAR;
}
return Token.GT;
case '=':
// = == === =>
advance();
if (is('>')) {
return select(Token.ARROW);
}
if (is('='))
return select('=', Token.EQ_STRICT, Token.EQ);
return Token.ASSIGN;
case '!':
// ! != !==
advance();
if (is('='))
return select('=', Token.NE_STRICT, Token.NE);
return Token.NOT;
case '+':
// + ++ +=
advance();
if (is('+'))
return select(Token.INC);
if (is('='))
return select(Token.ASSIGN_ADD);
return Token.ADD;
case '-':
// - -- -=
advance();
if (is('-'))
return select(Token.DEC);
if (is('='))
return select(Token.ASSIGN_SUB);
return Token.SUB;
case '*':
// * *=
return select('=', Token.ASSIGN_MUL, Token.MUL);
case '%':
// % %=
return select('=', Token.ASSIGN_MOD, Token.MOD);
case '/':
// / // /* /=
advance();
if (is('/'))
return skipSingleLineComment();
if (is('*'))
return skipMultiLineComment();
if (is('='))
return select(Token.ASSIGN_DIV);
return Token.DIV;
case '&':
// & && &=
advance();
if (is('&'))
return select(Token.AND);
if (is('='))
return select(Token.ASSIGN_BIT_AND);
return Token.BIT_AND;
case '|':
// | || |=
advance();
if (is('|'))
return select(Token.OR);
if (is('='))
return select(Token.ASSIGN_BIT_OR);
return Token.BIT_OR;
case '^':
// ^ ^=
return select('=', Token.ASSIGN_BIT_XOR, Token.BIT_XOR);
case '.':
// . <number>
if (isDecimalDigit(lookahead(1))) {
return scanNumber();
} else {
advance();
if (lookahead(0) == '.') {
if (lookahead(1) == '.') {
advance();
advance();
return Token.ELLIPSIS;
}
advance();
return Token.CASCADE;
}
return Token.PERIOD;
}
case ':':
return select(Token.COLON);
case ';':
return select(Token.SEMICOLON);
case ',':
return select(Token.COMMA);
case '(':
return select(Token.LPAREN);
case ')':
return select(Token.RPAREN);
case '[':
advance();
if (is(']')) {
return select('=', Token.ASSIGN_INDEX, Token.INDEX);
}
return Token.LBRACK;
case ']':
return select(Token.RBRACK);
case '{':
internalState.openBrace();
return select(Token.LBRACE);
case '}':
if (internalState.closeBrace()) {
internalState.popMode();
return select(Token.STRING_EMBED_EXP_END);
}
return select(Token.RBRACE);
case '?':
return select(Token.CONDITIONAL);
case '~':
// ~ ~/ ~/=
advance();
if (is('/')) {
if (lookahead(1) == '=') {
advance();
return select(Token.ASSIGN_TRUNC);
} else {
return select(Token.TRUNC);
}
} else {
return Token.BIT_NOT;
}
case '@':
// Raw strings.
advance();
if (is('\'') || is('"')) {
reportError(position() - 1, ParserErrorCode.DEPRECATED_RAW_STRING);
Token token = scanString(true);
return token;
} else {
return Token.AT;
}
case '#':
return scanDirective();
case 'r':
if (lookahead(1) == '\'' || lookahead(1) == '"') {
advance();
return scanString(true);
}
return scanIdentifier(true);
default:
if (isIdentifierStart(lookahead(0))) {
boolean allowDollars = true;
return scanIdentifier(allowDollars);
}
if (isDecimalDigit(lookahead(0))) {
if (lookahead(0) == '0' && (lookahead(1) == 'x' || lookahead(1) == 'X')) {
return scanHexNumber();
} else {
return scanNumber();
}
}
if (isEos())
return Token.EOS;
return select(Token.ILLEGAL);
}
}
private void reportError(int offset, ParserErrorCode errorCode) {
if (listener != null) {
listener.onError(new DartCompilationError(
new SourceInfo(sourceReference, offset, position() - offset),
errorCode));
}
}
/**
* Scan for #library, #import, #source, and #resource directives
*/
private Token scanDirective() {
assert (is('#'));
int currPos = position();
int start = currPos;
// Skip over the #! if it exists and consider it a comment
if (start == 0) {
if (lookahead(1) == '!') {
while (!isEos() && !isLineTerminator(lookahead(0)))
advance();
int stop = internalState.lookaheadPos[0];
commentLocation(start, stop);
return Token.COMMENT;
}
}
// Directives must start at the beginning of a line
if (start > 0 && !isLineTerminator(source.codePointBefore(start)))
return select(Token.ILLEGAL);
// Determine which directive is being specified
advance();
while (true) {
int ch = lookahead(0);
if (ch < 'a' || ch > 'z') {
break;
}
advance();
}
String syntax = source.substring(start, position());
Token token = Token.lookup(syntax);
return token == Token.IDENTIFIER ? Token.ILLEGAL : token;
}
private Token select(int next, Token yes, Token no) {
advance();
if (lookahead(0) != next)
return no;
advance();
return yes;
}
private Token select(Token token) {
advance();
return token;
}
private Token skipMultiLineComment() {
assert (is('*'));
int currPos = internalState.lookaheadPos[0];
int start = currPos - 1;
int commentDepth = 1;
advance();
while (!isEos()) {
int first = lookahead(0);
advance();
if (first == '*' && is('/')) {
if(--commentDepth == 0) {
Token result = select(Token.COMMENT);
int stop = internalState.lookaheadPos[0];
commentLocation(start, stop);
return result;
}
advance();
} else if (first == '/' && is('*')) {
commentDepth++;
advance();
}
}
int stop = internalState.lookaheadPos[0];
commentLocation(start, stop);
// Unterminated multi-line comment.
return Token.ILLEGAL;
}
private Token skipSingleLineComment() {
assert (is('/'));
int currPos = internalState.lookaheadPos[0];
int start = currPos - 1;
advance();
while (!isEos() && !isLineTerminator(lookahead(0)))
advance();
int stop = internalState.lookaheadPos[0];
commentLocation(start, stop);
return Token.COMMENT;
}
private void skipWhiteSpace() {
Mode mode = internalState.getMode();
if ((mode != InternalState.Mode.DEFAULT)
&& (mode != InternalState.Mode.IN_STRING_EMBEDDED_EXPRESSION)) {
return;
}
while (true) {
int c = lookahead(0);
if (isLineTerminator(c)) {
} else if (!isWhiteSpace(c)) {
break;
}
advance();
}
}
}