blob: f378b11edbe44bda6a856d37df4cfa55a6c57758 [file] [log] [blame]
// Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
//
// Scanner class for the Dart language. The scanner reads source text
// and produces a stream of tokens which is used by the parser.
//
#ifndef RUNTIME_VM_SCANNER_H_
#define RUNTIME_VM_SCANNER_H_
#include "vm/growable_array.h"
#include "vm/token.h"
namespace dart {
// Forward declarations.
class Array;
class Library;
class RawString;
class ScanContext;
class String;
// A call to Scan() scans the source one token at at time.
// The scanned token is returned by current_token().
// GetStream() scans the entire source text and returns a stream of tokens.
class Scanner : ValueObject {
public:
typedef uint16_t (*CharAtFunc)(const String& str, intptr_t index);
// SourcePosition describes a text location in user friendly
// terms of line number and column.
struct SourcePosition {
int line;
int column;
};
// TokenDesc defines the kind of a token and its location in
// the source text.
struct TokenDescriptor {
Token::Kind kind;
int offset; // Offset in source string.
SourcePosition position; // Text position in source.
const String* literal; // Identifier, number or string literal.
};
class TokenCollector : public ValueObject {
public:
TokenCollector() {}
virtual ~TokenCollector() {}
virtual void AddToken(const TokenDescriptor& token) {}
private:
DISALLOW_COPY_AND_ASSIGN(TokenCollector);
};
// Initializes scanner to scan string source.
Scanner(const String& source, const String& private_key);
~Scanner();
// Scans one token at a time.
void Scan();
// Scans the entire source and collects tokens in the provided collector.
void ScanAll(TokenCollector* collector);
// Scans to specified token position.
// Use CurrentPosition() to extract line and column number.
void ScanTo(intptr_t token_index);
// Info about most recently recognized token.
const TokenDescriptor& current_token() const { return current_token_; }
// Was there a line break before the current token?
bool NewlineBeforeToken() const { return newline_seen_; }
// Source code line number and column of current token.
const SourcePosition& CurrentPosition() const {
return current_token_.position;
}
static void InitOnce();
// Return true if str is an identifier.
bool IsIdent(const String& str);
// Does the token stream contain a valid integer literal.
static bool IsValidInteger(const String& str,
bool* is_positive,
const String** value);
static bool IsIdentStartChar(int32_t c);
static bool IsIdentChar(int32_t c);
private:
friend class ScanContext;
static const int kNumLowercaseChars = 26;
struct KeywordTable {
Token::Kind kind;
const char* keyword_chars;
int keyword_len;
const String* keyword_symbol;
};
// Rewind scanner position to token 0.
void Reset();
// Reads next lookahead character.
void ReadChar();
// Read and discard characters up to end of line.
void SkipLine();
// Recognizes token 'kind' and reads next character in input.
void Recognize(Token::Kind kind) {
ReadChar();
current_token_.kind = kind;
}
int32_t LookaheadChar(int how_many);
void ErrorMsg(const char* msg);
// These functions return true if the given character is a letter,
// a decimal digit, a hexadecimal digit, etc.
static bool IsLetter(int32_t c);
static bool IsDecimalDigit(int32_t c);
static bool IsNumberStart(int32_t);
static bool IsHexDigit(int32_t c);
// Skips up to next non-whitespace character.
void ConsumeWhiteSpace();
// Skips characters up to end of line.
void ConsumeLineComment();
// Skips characters up to matching '*/'.
void ConsumeBlockComment();
// Is this scanner currently scanning a string literal.
bool IsScanningString() const { return string_delimiter_ != '\0'; }
void BeginStringLiteral(const char delimiter);
void EndStringLiteral();
// Is this scanner currently scanning a string interpolation expression.
bool IsNestedContext() const { return saved_context_ != NULL; }
void PushContext();
void PopContext();
// Starts reading a string literal.
void ScanLiteralString(bool is_raw);
// Read the characters of a string literal. Remove whitespace up to
// and including the first newline character if remove_whitespace
// is true.
void ScanLiteralStringChars(bool is_raw, bool remove_whitespace);
// Reads a fixed number of hexadecimal digits.
bool ScanHexDigits(int digits, int32_t* value);
// Reads a variable number of hexadecimal digits.
bool ScanHexDigits(int min_digits, int max_digits, int32_t* value);
// Reads an escaped code point from within a string literal.
void ScanEscapedCodePoint(int32_t* escaped_char);
// Reads identifier.
void ScanIdentChars(bool allow_dollar);
void ScanIdent() { ScanIdentChars(true); }
void ScanIdentNoDollar() { ScanIdentChars(false); }
// Reads a number literal.
void ScanNumber(bool dec_point_seen);
void ScanScriptTag();
CharAtFunc CallCharAt() const { return char_at_func_; }
Thread* thread() const { return thread_; }
Zone* zone() const { return zone_; }
TokenDescriptor current_token_; // Current token.
TokenDescriptor newline_token_; // Newline token.
TokenDescriptor empty_string_token_; // Token for "".
const String& source_; // The source text being tokenized.
intptr_t source_length_; // The length of the source text.
intptr_t lookahead_pos_; // Position of lookahead character
// within source_.
intptr_t token_start_; // Begin of current token in src_.
int32_t c0_; // Lookahead character.
bool newline_seen_; // Newline before current token.
intptr_t prev_token_line_; // Line number of the previous token.
// The following fields keep track whether we are scanning a string literal
// and its interpolated expressions.
ScanContext* saved_context_;
int32_t string_delimiter_;
bool string_is_multiline_;
int brace_level_;
const String& private_key_;
SourcePosition c0_pos_; // Source position of lookahead character c0_.
const CharAtFunc char_at_func_;
Thread* thread_;
Zone* zone_;
static KeywordTable keywords_[Token::kNumKeywords];
static int keywords_char_offset_[kNumLowercaseChars];
};
} // namespace dart
#endif // RUNTIME_VM_SCANNER_H_