|  | // Copyright (c) 2012, the Dart project authors.  Please see the AUTHORS file | 
|  | // for details. All rights reserved. Use of this source code is governed by a | 
|  | // BSD-style license that can be found in the LICENSE file. | 
|  |  | 
|  | #include "vm/scanner.h" | 
|  |  | 
|  | #include "platform/assert.h" | 
|  | #include "vm/dart.h" | 
|  | #include "vm/flags.h" | 
|  | #include "vm/object.h" | 
|  | #include "vm/object_store.h" | 
|  | #include "vm/symbols.h" | 
|  | #include "vm/token.h" | 
|  | #include "vm/unicode.h" | 
|  |  | 
|  | namespace dart { | 
|  |  | 
|  | // Quick access to the locally defined zone() and thread() methods. | 
|  | #define Z (zone()) | 
|  | #define T (thread()) | 
|  |  | 
|  |  | 
|  | class ScanContext : public ZoneAllocated { | 
|  | public: | 
|  | explicit ScanContext(Scanner* scanner) | 
|  | : next_(scanner->saved_context_), | 
|  | string_delimiter_(scanner->string_delimiter_), | 
|  | string_is_multiline_(scanner->string_is_multiline_), | 
|  | brace_level_(scanner->brace_level_) {} | 
|  |  | 
|  | void CopyTo(Scanner* scanner) { | 
|  | scanner->string_delimiter_ = string_delimiter_; | 
|  | scanner->string_is_multiline_ = string_is_multiline_; | 
|  | scanner->brace_level_ = brace_level_; | 
|  | } | 
|  |  | 
|  | ScanContext* next() const { return next_; } | 
|  |  | 
|  | private: | 
|  | ScanContext* next_; | 
|  | const char string_delimiter_; | 
|  | const bool string_is_multiline_; | 
|  | const int brace_level_; | 
|  | }; | 
|  |  | 
|  |  | 
|  | Scanner::KeywordTable Scanner::keywords_[Token::kNumKeywords]; | 
|  | int Scanner::keywords_char_offset_[Scanner::kNumLowercaseChars]; | 
|  |  | 
|  |  | 
|  | void Scanner::Reset() { | 
|  | // Non-changing newline properties. | 
|  | newline_token_.kind = Token::kNEWLINE; | 
|  | newline_token_.literal = NULL; | 
|  | // We don't preserve the column information. | 
|  | newline_token_.position.column = 0; | 
|  |  | 
|  | // Non-changing empty string token properties. | 
|  | empty_string_token_.kind = Token::kSTRING; | 
|  | empty_string_token_.literal = &Symbols::Empty(); | 
|  | empty_string_token_.position.column = 0; | 
|  |  | 
|  | lookahead_pos_ = -1; | 
|  | token_start_ = 0; | 
|  | c0_ = '\0'; | 
|  | newline_seen_ = false; | 
|  | prev_token_line_ = 1; | 
|  | saved_context_ = NULL; | 
|  | string_delimiter_ = '\0'; | 
|  | string_is_multiline_ = false; | 
|  | brace_level_ = 0; | 
|  | c0_pos_.line = 1; | 
|  | c0_pos_.column = 0; | 
|  | ReadChar(); | 
|  | } | 
|  |  | 
|  |  | 
|  | Scanner::Scanner(const String& src, const String& private_key) | 
|  | : source_(src), | 
|  | source_length_(src.Length()), | 
|  | saved_context_(NULL), | 
|  | private_key_(String::ZoneHandle(private_key.raw())), | 
|  | char_at_func_(src.CharAtFunc()), | 
|  | thread_(Thread::Current()), | 
|  | zone_(thread_->zone()) { | 
|  | Reset(); | 
|  | } | 
|  |  | 
|  |  | 
|  | Scanner::~Scanner() {} | 
|  |  | 
|  |  | 
|  | void Scanner::ErrorMsg(const char* msg) { | 
|  | current_token_.kind = Token::kERROR; | 
|  | current_token_.literal = &String::ZoneHandle(Z, Symbols::New(T, msg)); | 
|  | current_token_.position = c0_pos_; | 
|  | token_start_ = lookahead_pos_; | 
|  | current_token_.offset = lookahead_pos_; | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::PushContext() { | 
|  | ScanContext* ctx = new (Z) ScanContext(this); | 
|  | saved_context_ = ctx; | 
|  | string_delimiter_ = '\0'; | 
|  | string_is_multiline_ = false; | 
|  | brace_level_ = 1;  // Account for the opening ${ token. | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::PopContext() { | 
|  | ASSERT(saved_context_ != NULL); | 
|  | ASSERT(brace_level_ == 0); | 
|  | ASSERT(string_delimiter_ == '\0'); | 
|  | ScanContext* ctx = saved_context_; | 
|  | ctx->CopyTo(this); | 
|  | saved_context_ = ctx->next(); | 
|  | ASSERT(string_delimiter_ != '\0'); | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::BeginStringLiteral(const char delimiter) { | 
|  | string_delimiter_ = delimiter; | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::EndStringLiteral() { | 
|  | string_delimiter_ = '\0'; | 
|  | string_is_multiline_ = false; | 
|  | } | 
|  |  | 
|  |  | 
|  | bool Scanner::IsLetter(int32_t c) { | 
|  | return (('A' <= c) && (c <= 'Z')) || (('a' <= c) && (c <= 'z')); | 
|  | } | 
|  |  | 
|  |  | 
|  | bool Scanner::IsDecimalDigit(int32_t c) { | 
|  | return '0' <= c && c <= '9'; | 
|  | } | 
|  |  | 
|  |  | 
|  | bool Scanner::IsNumberStart(int32_t ch) { | 
|  | return IsDecimalDigit(ch) || ch == '.'; | 
|  | } | 
|  |  | 
|  |  | 
|  | bool Scanner::IsHexDigit(int32_t c) { | 
|  | return IsDecimalDigit(c) || (('A' <= c) && (c <= 'F')) || | 
|  | (('a' <= c) && (c <= 'f')); | 
|  | } | 
|  |  | 
|  |  | 
|  | bool Scanner::IsIdentStartChar(int32_t c) { | 
|  | return IsLetter(c) || (c == '_') || (c == '$'); | 
|  | } | 
|  |  | 
|  |  | 
|  | bool Scanner::IsIdentChar(int32_t c) { | 
|  | return IsLetter(c) || IsDecimalDigit(c) || (c == '_') || (c == '$'); | 
|  | } | 
|  |  | 
|  |  | 
|  | bool Scanner::IsIdent(const String& str) { | 
|  | if (!str.IsOneByteString()) { | 
|  | return false; | 
|  | } | 
|  | if (str.Length() == 0 || !IsIdentStartChar(CallCharAt()(str, 0))) { | 
|  | return false; | 
|  | } | 
|  | for (int i = 1; i < str.Length(); i++) { | 
|  | if (!IsIdentChar(CallCharAt()(str, i))) { | 
|  | return false; | 
|  | } | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  |  | 
|  | // This method is used when parsing integers in Dart code. We | 
|  | // are reusing the Scanner's handling of number literals in that situation. | 
|  | bool Scanner::IsValidInteger(const String& str, | 
|  | bool* is_positive, | 
|  | const String** value) { | 
|  | Scanner s(str, Symbols::Empty()); | 
|  | TokenDescriptor tokens[3]; | 
|  | s.Scan(); | 
|  | tokens[0] = s.current_token(); | 
|  | s.Scan(); | 
|  | tokens[1] = s.current_token(); | 
|  | s.Scan(); | 
|  | tokens[2] = s.current_token(); | 
|  |  | 
|  | if ((tokens[0].kind == Token::kINTEGER) && (tokens[1].kind == Token::kEOS)) { | 
|  | *is_positive = true; | 
|  | *value = tokens[0].literal; | 
|  | return true; | 
|  | } | 
|  | if (((tokens[0].kind == Token::kADD) || (tokens[0].kind == Token::kSUB)) && | 
|  | (tokens[1].kind == Token::kINTEGER) && (tokens[2].kind == Token::kEOS)) { | 
|  | // Check there is no space between "+/-" and number. | 
|  | if ((tokens[0].offset + 1) != tokens[1].offset) { | 
|  | return false; | 
|  | } | 
|  | *is_positive = tokens[0].kind == Token::kADD; | 
|  | *value = tokens[1].literal; | 
|  | return true; | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::ReadChar() { | 
|  | if (lookahead_pos_ < source_length_) { | 
|  | if (c0_ == '\n') { | 
|  | newline_seen_ = true; | 
|  | c0_pos_.line++; | 
|  | c0_pos_.column = 0; | 
|  | if (CallCharAt()(source_, lookahead_pos_) == '\r') { | 
|  | // Replace a sequence of '\r' '\n' with a single '\n'. | 
|  | if (LookaheadChar(1) == '\n') { | 
|  | lookahead_pos_++; | 
|  | } | 
|  | } | 
|  | } | 
|  | lookahead_pos_++; | 
|  | c0_pos_.column++; | 
|  | c0_ = LookaheadChar(0); | 
|  | // Replace '\r' with '\n'. | 
|  | if (c0_ == '\r') { | 
|  | c0_ = '\n'; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | // Look ahead 'how_many' characters. Returns the character, or '\0' if | 
|  | // the lookahead position is beyond the end of the string. Does not | 
|  | // normalize line end characters into '\n'. | 
|  | int32_t Scanner::LookaheadChar(int how_many) { | 
|  | ASSERT(how_many >= 0); | 
|  | int32_t lookahead_char = '\0'; | 
|  | if (lookahead_pos_ + how_many < source_length_) { | 
|  | lookahead_char = CallCharAt()(source_, lookahead_pos_ + how_many); | 
|  | } | 
|  | return lookahead_char; | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::ConsumeWhiteSpace() { | 
|  | while (c0_ == ' ' || c0_ == '\t' || c0_ == '\n') { | 
|  | ReadChar(); | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::ConsumeLineComment() { | 
|  | ASSERT(c0_ == '/'); | 
|  | while (c0_ != '\n' && c0_ != '\0') { | 
|  | ReadChar(); | 
|  | } | 
|  | ReadChar(); | 
|  | current_token_.kind = Token::kWHITESP; | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::ConsumeBlockComment() { | 
|  | ASSERT(c0_ == '*'); | 
|  | ReadChar(); | 
|  | int nesting_level = 1; | 
|  |  | 
|  | while (true) { | 
|  | const char c = c0_; | 
|  | ReadChar(); | 
|  | if (c0_ == '\0') { | 
|  | break; | 
|  | } | 
|  | if (c == '/' && c0_ == '*') { | 
|  | nesting_level++; | 
|  | ReadChar();  // Consume asterisk. | 
|  | } else if (c == '*' && c0_ == '/') { | 
|  | nesting_level--; | 
|  | ReadChar();  // Consume slash. | 
|  | if (nesting_level == 0) { | 
|  | break; | 
|  | } | 
|  | } | 
|  | } | 
|  | current_token_.kind = | 
|  | (nesting_level == 0) ? Token::kWHITESP : Token::kILLEGAL; | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::ScanIdentChars(bool allow_dollar) { | 
|  | ASSERT(IsIdentStartChar(c0_)); | 
|  | ASSERT(allow_dollar || (c0_ != '$')); | 
|  | int ident_length = 0; | 
|  | int ident_pos = lookahead_pos_; | 
|  | int32_t ident_char0 = CallCharAt()(source_, ident_pos); | 
|  | while (IsIdentChar(c0_) && (allow_dollar || (c0_ != '$'))) { | 
|  | ReadChar(); | 
|  | ident_length++; | 
|  | } | 
|  |  | 
|  | // Check whether the characters we read are a known keyword. | 
|  | // Note, can't use strcmp since token_chars is not null-terminated. | 
|  | if (('a' <= ident_char0) && (ident_char0 <= 'z')) { | 
|  | int i = keywords_char_offset_[ident_char0 - 'a']; | 
|  | while ((i < Token::kNumKeywords) && | 
|  | (keywords_[i].keyword_chars[0] <= ident_char0)) { | 
|  | if (keywords_[i].keyword_len == ident_length) { | 
|  | const char* keyword = keywords_[i].keyword_chars; | 
|  | int char_pos = 1; | 
|  | while ((char_pos < ident_length) && | 
|  | (keyword[char_pos] == | 
|  | CallCharAt()(source_, ident_pos + char_pos))) { | 
|  | char_pos++; | 
|  | } | 
|  | if (char_pos == ident_length) { | 
|  | current_token_.literal = keywords_[i].keyword_symbol; | 
|  | current_token_.kind = keywords_[i].kind; | 
|  | return; | 
|  | } | 
|  | } | 
|  | i++; | 
|  | } | 
|  | } | 
|  |  | 
|  | // We did not read a keyword. | 
|  | current_token_.kind = Token::kIDENT; | 
|  | String& literal = String::ZoneHandle(Z); | 
|  | if (ident_char0 == Library::kPrivateIdentifierStart) { | 
|  | // Private identifiers are mangled on a per library basis. | 
|  | literal = String::SubString(T, source_, ident_pos, ident_length); | 
|  | literal = Symbols::FromConcat(T, literal, private_key_); | 
|  | } else { | 
|  | literal = Symbols::New(T, source_, ident_pos, ident_length); | 
|  | } | 
|  | current_token_.literal = &literal; | 
|  | } | 
|  |  | 
|  |  | 
|  | // Parse integer or double number literal of format: | 
|  | // NUMBER = INTEGER | DOUBLE | 
|  | // INTEGER = D+ | (("0x" | "0X") H+) | 
|  | // DOUBLE = ((D+ ["." D*]) | ("." D+)) [ EXPONENT ] | 
|  | // EXPONENT = ("e" | "E") ["+" | "-"] D+ | 
|  | void Scanner::ScanNumber(bool dec_point_seen) { | 
|  | ASSERT(IsDecimalDigit(c0_)); | 
|  | char first_digit = c0_; | 
|  |  | 
|  | Recognize(dec_point_seen ? Token::kDOUBLE : Token::kINTEGER); | 
|  | if (!dec_point_seen && first_digit == '0' && (c0_ == 'x' || c0_ == 'X')) { | 
|  | ReadChar(); | 
|  | if (!IsHexDigit(c0_)) { | 
|  | ErrorMsg("hexadecimal digit expected"); | 
|  | return; | 
|  | } | 
|  | while (IsHexDigit(c0_)) { | 
|  | ReadChar(); | 
|  | } | 
|  | } else { | 
|  | while (IsDecimalDigit(c0_)) { | 
|  | ReadChar(); | 
|  | } | 
|  | if (c0_ == '.' && !dec_point_seen && IsDecimalDigit(LookaheadChar(1))) { | 
|  | Recognize(Token::kDOUBLE); | 
|  | while (IsDecimalDigit(c0_)) { | 
|  | ReadChar(); | 
|  | } | 
|  | } | 
|  | if (((c0_ == 'e') || (c0_ == 'E')) && | 
|  | (IsDecimalDigit(LookaheadChar(1)) || (LookaheadChar(1) == '-') || | 
|  | (LookaheadChar(1) == '+'))) { | 
|  | Recognize(Token::kDOUBLE); | 
|  | if ((c0_ == '-') || (c0_ == '+')) { | 
|  | ReadChar(); | 
|  | } | 
|  | if (!IsDecimalDigit(c0_)) { | 
|  | ErrorMsg("missing exponent digits"); | 
|  | return; | 
|  | } | 
|  | while (IsDecimalDigit(c0_)) { | 
|  | ReadChar(); | 
|  | } | 
|  | } | 
|  | } | 
|  | if (current_token_.kind != Token::kILLEGAL) { | 
|  | intptr_t len = lookahead_pos_ - token_start_; | 
|  | const String& str = | 
|  | String::ZoneHandle(Z, Symbols::New(T, source_, token_start_, len)); | 
|  | current_token_.literal = &str; | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::SkipLine() { | 
|  | while (c0_ != '\n' && c0_ != '\0') { | 
|  | ReadChar(); | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::ScanScriptTag() { | 
|  | ReadChar(); | 
|  | ASSERT(c0_ == '!'); | 
|  | Recognize(Token::kSCRIPTTAG); | 
|  | // The script tag extends to the end of the line. Just treat this | 
|  | // similar to a line comment. | 
|  | SkipLine(); | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::ScanLiteralString(bool is_raw) { | 
|  | ASSERT(!IsScanningString()); | 
|  | ASSERT(c0_ == '"' || c0_ == '\''); | 
|  |  | 
|  | // Entering string scanning mode. | 
|  | BeginStringLiteral(c0_); | 
|  | ReadChar(); | 
|  |  | 
|  | if ((c0_ == string_delimiter_) && (LookaheadChar(1) == string_delimiter_)) { | 
|  | string_is_multiline_ = true; | 
|  | ReadChar();  // Skip two additional string delimiters. | 
|  | ReadChar(); | 
|  | } | 
|  | ScanLiteralStringChars(is_raw, string_is_multiline_); | 
|  | } | 
|  |  | 
|  |  | 
|  | bool Scanner::ScanHexDigits(int digits, int32_t* value) { | 
|  | *value = 0; | 
|  | for (int i = 0; i < digits; ++i) { | 
|  | ReadChar(); | 
|  | if (!IsHexDigit(c0_)) { | 
|  | ErrorMsg("too few hexadecimal digits"); | 
|  | return false; | 
|  | } | 
|  | *value <<= 4; | 
|  | *value |= Utils::HexDigitToInt(c0_); | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  |  | 
|  | bool Scanner::ScanHexDigits(int min_digits, int max_digits, int32_t* value) { | 
|  | *value = 0; | 
|  | ReadChar(); | 
|  | for (int i = 0; i < max_digits; ++i) { | 
|  | if (!IsHexDigit(c0_)) { | 
|  | if (i < min_digits) { | 
|  | ErrorMsg("hexadecimal digit expected"); | 
|  | return false; | 
|  | } | 
|  | break; | 
|  | } | 
|  | *value <<= 4; | 
|  | *value |= Utils::HexDigitToInt(c0_); | 
|  | ReadChar(); | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::ScanEscapedCodePoint(int32_t* code_point) { | 
|  | ASSERT(c0_ == 'u' || c0_ == 'x'); | 
|  | bool is_valid; | 
|  | if (c0_ == 'x') { | 
|  | is_valid = ScanHexDigits(2, code_point); | 
|  | } else if (c0_ == 'u' && LookaheadChar(1) != '{') { | 
|  | is_valid = ScanHexDigits(4, code_point); | 
|  | } else { | 
|  | ReadChar();  // Skip left curly bracket. | 
|  | is_valid = ScanHexDigits(1, 6, code_point); | 
|  | if (is_valid) { | 
|  | if (c0_ != '}') { | 
|  | ErrorMsg("expected '}' after character code"); | 
|  | return; | 
|  | } | 
|  | } | 
|  | } | 
|  | if (is_valid && (Utf::IsOutOfRange(*code_point))) { | 
|  | ErrorMsg("invalid code point"); | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::ScanLiteralStringChars(bool is_raw, bool remove_whitespace) { | 
|  | GrowableArray<int32_t> string_chars(64); | 
|  |  | 
|  | ASSERT(IsScanningString()); | 
|  | // We are at the first character of a string literal piece. A string literal | 
|  | // can be broken up into multiple pieces by string interpolation. | 
|  | while (true) { | 
|  | if ((c0_ == '\0') || ((c0_ == '\n') && !string_is_multiline_)) { | 
|  | ErrorMsg("unterminated string literal"); | 
|  | EndStringLiteral(); | 
|  | return; | 
|  | } | 
|  | if (c0_ == '\\' && !is_raw) { | 
|  | // Parse escape sequence. | 
|  | int32_t escape_char = '\0'; | 
|  | ReadChar(); | 
|  | switch (c0_) { | 
|  | case 'n': | 
|  | escape_char = '\n'; | 
|  | break; | 
|  | case 'r': | 
|  | escape_char = '\r'; | 
|  | break; | 
|  | case 'f': | 
|  | escape_char = '\f'; | 
|  | break; | 
|  | case 't': | 
|  | escape_char = '\t'; | 
|  | break; | 
|  | case 'b': | 
|  | escape_char = '\b'; | 
|  | break; | 
|  | case 'v': | 
|  | escape_char = '\v'; | 
|  | break; | 
|  | case 'u': | 
|  | case 'x': { | 
|  | ScanEscapedCodePoint(&escape_char); | 
|  | break; | 
|  | } | 
|  | default: | 
|  | if ((c0_ == '\0') || ((c0_ == '\n') && !string_is_multiline_)) { | 
|  | ErrorMsg("unterminated string literal"); | 
|  | EndStringLiteral(); | 
|  | return; | 
|  | } | 
|  | escape_char = c0_; | 
|  | break; | 
|  | } | 
|  | string_chars.Add(escape_char); | 
|  | } else if (c0_ == '$' && !is_raw) { | 
|  | // Scanned a string piece. | 
|  | ASSERT(string_chars.data() != NULL); | 
|  | // Strings are canonicalized: Allocate a symbol. | 
|  | current_token_.literal = &String::ZoneHandle( | 
|  | Z, Symbols::FromUTF32(T, string_chars.data(), string_chars.length())); | 
|  | // Preserve error tokens. | 
|  | if (current_token_.kind != Token::kERROR) { | 
|  | current_token_.kind = Token::kSTRING; | 
|  | } | 
|  | return; | 
|  | } else if (c0_ == string_delimiter_) { | 
|  | // Check if we are at the end of the string literal. | 
|  | if (!string_is_multiline_ || ((LookaheadChar(1) == string_delimiter_) && | 
|  | (LookaheadChar(2) == string_delimiter_))) { | 
|  | if (string_is_multiline_) { | 
|  | ReadChar();  // Skip two string delimiters. | 
|  | ReadChar(); | 
|  | } | 
|  | // Preserve error tokens. | 
|  | if (current_token_.kind == Token::kERROR) { | 
|  | ReadChar(); | 
|  | } else { | 
|  | Recognize(Token::kSTRING); | 
|  | ASSERT(string_chars.data() != NULL); | 
|  | // Strings are canonicalized: Allocate a symbol. | 
|  | current_token_.literal = | 
|  | &String::ZoneHandle(Z, Symbols::FromUTF32(T, string_chars.data(), | 
|  | string_chars.length())); | 
|  | } | 
|  | EndStringLiteral(); | 
|  | return; | 
|  | } else { | 
|  | string_chars.Add(string_delimiter_); | 
|  | } | 
|  | } else { | 
|  | // Test for a two part utf16 sequence, and decode to a code point | 
|  | // if we find one. | 
|  | int32_t ch1 = c0_; | 
|  | if (Utf16::IsLeadSurrogate(ch1)) { | 
|  | const int32_t ch2 = LookaheadChar(1); | 
|  | if (Utf16::IsTrailSurrogate(ch2)) { | 
|  | ch1 = Utf16::Decode(ch1, ch2); | 
|  | ReadChar(); | 
|  | } | 
|  | } | 
|  | string_chars.Add(ch1); | 
|  | } | 
|  | // The first line of a multi-line string is discarded if it only | 
|  | // contains whitespace. | 
|  | if (remove_whitespace && (string_chars.Last() == '\n')) { | 
|  | bool whitespace_only = true; | 
|  | // Last character is the newline, don't inspect it. | 
|  | const intptr_t len = string_chars.length() - 1; | 
|  | for (int i = 0; i < len; i++) { | 
|  | int32_t ch = string_chars[i]; | 
|  | if ((ch != ' ') && (ch != '\t')) { | 
|  | // Non-whitespace character, keep the first line. | 
|  | whitespace_only = false; | 
|  | break; | 
|  | } | 
|  | } | 
|  | if (whitespace_only) { | 
|  | string_chars.Clear();  // Discard characters on first line. | 
|  | } | 
|  | remove_whitespace = false; | 
|  | } | 
|  | ReadChar(); | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::Scan() { | 
|  | newline_seen_ = false; | 
|  |  | 
|  | do { | 
|  | if (!IsScanningString()) { | 
|  | ConsumeWhiteSpace(); | 
|  | } | 
|  | token_start_ = lookahead_pos_; | 
|  | current_token_.offset = lookahead_pos_; | 
|  | current_token_.position = c0_pos_; | 
|  | current_token_.literal = NULL; | 
|  | current_token_.kind = Token::kILLEGAL; | 
|  | if (IsScanningString()) { | 
|  | if (c0_ == '$') { | 
|  | ReadChar();  // Skip the '$' character. | 
|  | if (IsIdentStartChar(c0_) && (c0_ != '$')) { | 
|  | ScanIdentNoDollar(); | 
|  | current_token_.kind = Token::kINTERPOL_VAR; | 
|  | } else if (c0_ == '{') { | 
|  | Recognize(Token::kINTERPOL_START); | 
|  | PushContext(); | 
|  | } else { | 
|  | ErrorMsg("illegal character after $ in string interpolation"); | 
|  | EndStringLiteral(); | 
|  | break; | 
|  | } | 
|  | } else { | 
|  | ScanLiteralStringChars(false, false); | 
|  | } | 
|  | break; | 
|  | } | 
|  | switch (c0_) { | 
|  | case '\0': | 
|  | current_token_.kind = Token::kEOS; | 
|  | break; | 
|  |  | 
|  | case '+':  // +  ++  += | 
|  | Recognize(Token::kADD); | 
|  | if (c0_ == '+') { | 
|  | Recognize(Token::kINCR); | 
|  | } else if (c0_ == '=') { | 
|  | Recognize(Token::kASSIGN_ADD); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case '-':  // -  --  -= | 
|  | Recognize(Token::kSUB); | 
|  | if (c0_ == '-') { | 
|  | Recognize(Token::kDECR); | 
|  | } else if (c0_ == '=') { | 
|  | Recognize(Token::kASSIGN_SUB); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case '*':  // *  *= | 
|  | Recognize(Token::kMUL); | 
|  | if (c0_ == '=') { | 
|  | Recognize(Token::kASSIGN_MUL); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case '%':  // %  %= | 
|  | Recognize(Token::kMOD); | 
|  | if (c0_ == '=') { | 
|  | Recognize(Token::kASSIGN_MOD); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case '/':  //  /  /=  //  /* | 
|  | Recognize(Token::kDIV); | 
|  | if (c0_ == '/') { | 
|  | ConsumeLineComment(); | 
|  | } else if (c0_ == '*') { | 
|  | ConsumeBlockComment(); | 
|  | } else if (c0_ == '=') { | 
|  | Recognize(Token::kASSIGN_DIV); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case '&':  // &  &=  && | 
|  | Recognize(Token::kBIT_AND); | 
|  | if (c0_ == '=') { | 
|  | Recognize(Token::kASSIGN_AND); | 
|  | } else if (c0_ == '&') { | 
|  | Recognize(Token::kAND); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case '|':  // |  |=  || | 
|  | Recognize(Token::kBIT_OR); | 
|  | if (c0_ == '=') { | 
|  | Recognize(Token::kASSIGN_OR); | 
|  | } else if (c0_ == '|') { | 
|  | Recognize(Token::kOR); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case '^':  // ^  ^= | 
|  | Recognize(Token::kBIT_XOR); | 
|  | if (c0_ == '=') { | 
|  | Recognize(Token::kASSIGN_XOR); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case '[':  // [  []  []= | 
|  | Recognize(Token::kLBRACK); | 
|  | if (c0_ == ']') { | 
|  | Recognize(Token::kINDEX); | 
|  | if (c0_ == '=') { | 
|  | Recognize(Token::kASSIGN_INDEX); | 
|  | } | 
|  | } | 
|  | break; | 
|  |  | 
|  | case ']':  //  ] | 
|  | Recognize(Token::kRBRACK); | 
|  | break; | 
|  |  | 
|  | case '<':  // <  <=  <<  <<= | 
|  | Recognize(Token::kLT); | 
|  | if (c0_ == '=') { | 
|  | Recognize(Token::kLTE); | 
|  | } else if (c0_ == '<') { | 
|  | Recognize(Token::kSHL); | 
|  | if (c0_ == '=') { | 
|  | Recognize(Token::kASSIGN_SHL); | 
|  | } | 
|  | } | 
|  | break; | 
|  |  | 
|  | case '>':  // >  >=  >>  >>= | 
|  | Recognize(Token::kGT); | 
|  | if (c0_ == '=') { | 
|  | Recognize(Token::kGTE); | 
|  | } else if (c0_ == '>') { | 
|  | Recognize(Token::kSHR); | 
|  | if (c0_ == '=') { | 
|  | Recognize(Token::kASSIGN_SHR); | 
|  | } | 
|  | } | 
|  | break; | 
|  |  | 
|  | case '!':  // !  != | 
|  | Recognize(Token::kNOT); | 
|  | if (c0_ == '=') { | 
|  | Recognize(Token::kNE); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case '~': | 
|  | Recognize(Token::kBIT_NOT); | 
|  | if (c0_ == '/') { | 
|  | Recognize(Token::kTRUNCDIV); | 
|  | if (c0_ == '=') { | 
|  | Recognize(Token::kASSIGN_TRUNCDIV); | 
|  | } | 
|  | } | 
|  | break; | 
|  |  | 
|  | case '=':  // =  ==  => | 
|  | Recognize(Token::kASSIGN); | 
|  | if (c0_ == '=') { | 
|  | Recognize(Token::kEQ); | 
|  | } else if (c0_ == '>') { | 
|  | Recognize(Token::kARROW); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case '.':  // .  ..  Number | 
|  | Recognize(Token::kPERIOD); | 
|  | if (c0_ == '.') { | 
|  | Recognize(Token::kCASCADE); | 
|  | } else if (IsDecimalDigit(c0_)) { | 
|  | ScanNumber(true); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case '?':  // ?  ?.  ??  ??= | 
|  | Recognize(Token::kCONDITIONAL); | 
|  | if (c0_ == '.') { | 
|  | Recognize(Token::kQM_PERIOD); | 
|  | } else if (c0_ == '?') { | 
|  | Recognize(Token::kIFNULL); | 
|  | if (c0_ == '=') { | 
|  | Recognize(Token::kASSIGN_COND); | 
|  | } | 
|  | } | 
|  | break; | 
|  |  | 
|  | case ':': | 
|  | Recognize(Token::kCOLON); | 
|  | break; | 
|  |  | 
|  | case ';': | 
|  | Recognize(Token::kSEMICOLON); | 
|  | break; | 
|  |  | 
|  | case '{': | 
|  | Recognize(Token::kLBRACE); | 
|  | if (IsNestedContext()) { | 
|  | brace_level_++; | 
|  | } | 
|  | break; | 
|  |  | 
|  | case '}': | 
|  | Recognize(Token::kRBRACE); | 
|  | if (IsNestedContext()) { | 
|  | ASSERT(brace_level_ > 0); | 
|  | brace_level_--; | 
|  | if (brace_level_ == 0) { | 
|  | current_token_.kind = Token::kINTERPOL_END; | 
|  | PopContext(); | 
|  | } | 
|  | } | 
|  | break; | 
|  |  | 
|  | case '(': | 
|  | Recognize(Token::kLPAREN); | 
|  | break; | 
|  |  | 
|  | case ')': | 
|  | Recognize(Token::kRPAREN); | 
|  | break; | 
|  |  | 
|  | case ',': | 
|  | Recognize(Token::kCOMMA); | 
|  | break; | 
|  |  | 
|  | case '@': | 
|  | Recognize(Token::kAT); | 
|  | break; | 
|  |  | 
|  | case 'r': | 
|  | if ((LookaheadChar(1) == '"') || (LookaheadChar(1) == '\'')) { | 
|  | ReadChar(); | 
|  | ScanLiteralString(true); | 
|  | } else { | 
|  | ScanIdent(); | 
|  | } | 
|  | break; | 
|  |  | 
|  | case '"': | 
|  | case '\'': | 
|  | ScanLiteralString(false); | 
|  | break; | 
|  |  | 
|  | case '#': | 
|  | if (LookaheadChar(1) == '!') { | 
|  | ScanScriptTag(); | 
|  | } else { | 
|  | Recognize(Token::kHASH); | 
|  | } | 
|  | break; | 
|  |  | 
|  | default: | 
|  | if (IsIdentStartChar(c0_)) { | 
|  | ScanIdent(); | 
|  | } else if (IsDecimalDigit(c0_)) { | 
|  | ScanNumber(false); | 
|  | } else { | 
|  | char msg[128]; | 
|  | char utf8_char[5]; | 
|  | int len = Utf8::Encode(c0_, utf8_char); | 
|  | utf8_char[len] = '\0'; | 
|  | OS::SNPrint(msg, sizeof(msg), "unexpected character: '%s' (U+%04X)\n", | 
|  | utf8_char, c0_); | 
|  | ErrorMsg(msg); | 
|  | ReadChar(); | 
|  | } | 
|  | } | 
|  | } while (current_token_.kind == Token::kWHITESP); | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::ScanAll(TokenCollector* collector) { | 
|  | Reset(); | 
|  | do { | 
|  | Scan(); | 
|  | bool inserted_new_lines = false; | 
|  | for (intptr_t diff = current_token_.position.line - prev_token_line_; | 
|  | diff > 0; diff--) { | 
|  | newline_token_.position.line = current_token_.position.line - diff; | 
|  | collector->AddToken(newline_token_); | 
|  | inserted_new_lines = true; | 
|  | } | 
|  | if (inserted_new_lines && | 
|  | ((current_token_.kind == Token::kINTERPOL_VAR) || | 
|  | (current_token_.kind == Token::kINTERPOL_START))) { | 
|  | // NOTE: If this changes, be sure to update | 
|  | // Script::GenerateLineNumberArray to stay in sync. | 
|  | empty_string_token_.position.line = current_token_.position.line; | 
|  | collector->AddToken(empty_string_token_); | 
|  | } | 
|  | collector->AddToken(current_token_); | 
|  | prev_token_line_ = current_token_.position.line; | 
|  | } while (current_token_.kind != Token::kEOS); | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::ScanTo(intptr_t token_index) { | 
|  | ASSERT(token_index >= 0); | 
|  | intptr_t index = 0; | 
|  | Reset(); | 
|  | do { | 
|  | Scan(); | 
|  | bool inserted_new_lines = false; | 
|  | for (intptr_t diff = current_token_.position.line - prev_token_line_; | 
|  | diff > 0; diff--) { | 
|  | // Advance the index to account for tokens added in ScanAll. | 
|  | index++; | 
|  | inserted_new_lines = true; | 
|  | } | 
|  | if (inserted_new_lines && | 
|  | ((current_token_.kind == Token::kINTERPOL_VAR) || | 
|  | (current_token_.kind == Token::kINTERPOL_START))) { | 
|  | // Advance the index to account for tokens added in ScanAll. | 
|  | index++; | 
|  | } | 
|  | index++; | 
|  | prev_token_line_ = current_token_.position.line; | 
|  | } while ((token_index >= index) && (current_token_.kind != Token::kEOS)); | 
|  | } | 
|  |  | 
|  |  | 
|  | void Scanner::InitOnce() { | 
|  | ASSERT(Isolate::Current() == Dart::vm_isolate()); | 
|  | for (int i = 0; i < kNumLowercaseChars; i++) { | 
|  | keywords_char_offset_[i] = Token::kNumKeywords; | 
|  | } | 
|  | for (int i = 0; i < Token::kNumKeywords; i++) { | 
|  | Token::Kind token = static_cast<Token::Kind>(Token::kFirstKeyword + i); | 
|  | keywords_[i].kind = token; | 
|  | keywords_[i].keyword_chars = Token::Str(token); | 
|  | keywords_[i].keyword_len = strlen(Token::Str(token)); | 
|  | keywords_[i].keyword_symbol = &Symbols::Token(token); | 
|  |  | 
|  | int ch = keywords_[i].keyword_chars[0] - 'a'; | 
|  | if (keywords_char_offset_[ch] == Token::kNumKeywords) { | 
|  | keywords_char_offset_[ch] = i; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | }  // namespace dart |