// Copyright (c) 2011, the Dart project authors.  Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
//
// Scanner class for the Dart language. The scanner reads source text
// and produces a stream of tokens which is used by the parser.
//

#ifndef RUNTIME_VM_SCANNER_H_
#define RUNTIME_VM_SCANNER_H_

#include "vm/growable_array.h"
#include "vm/token.h"

namespace dart {

// Forward declarations.
class Array;
class Library;
class RawString;
class ScanContext;
class String;

// A call to Scan() scans the source one token at at time.
// The scanned token is returned by current_token().
// GetStream() scans the entire source text and returns a stream of tokens.
class Scanner : ValueObject {
 public:
  typedef uint16_t (*CharAtFunc)(const String& str, intptr_t index);

  // SourcePosition describes a text location in user friendly
  // terms of line number and column.
  struct SourcePosition {
    int line;
    int column;
  };

  // TokenDesc defines the kind of a token and its location in
  // the source text.
  struct TokenDescriptor {
    Token::Kind kind;
    int offset;               // Offset in source string.
    SourcePosition position;  // Text position in source.
    const String* literal;    // Identifier, number or string literal.
  };

  class TokenCollector : public ValueObject {
   public:
    TokenCollector() {}
    virtual ~TokenCollector() {}
    virtual void AddToken(const TokenDescriptor& token) {}

   private:
    DISALLOW_COPY_AND_ASSIGN(TokenCollector);
  };

  // Initializes scanner to scan string source.
  Scanner(const String& source, const String& private_key);
  ~Scanner();

  // Scans one token at a time.
  void Scan();

  // Scans the entire source and collects tokens in the provided collector.
  void ScanAll(TokenCollector* collector);

  // Scans to specified token position.
  // Use CurrentPosition() to extract line and column number.
  void ScanTo(intptr_t token_index);

  // Info about most recently recognized token.
  const TokenDescriptor& current_token() const { return current_token_; }

  // Was there a line break before the current token?
  bool NewlineBeforeToken() const { return newline_seen_; }

  // Source code line number and column of current token.
  const SourcePosition& CurrentPosition() const {
    return current_token_.position;
  }

  static void InitOnce();

  // Return true if str is an identifier.
  bool IsIdent(const String& str);

  // Does the token stream contain a valid integer literal.
  static bool IsValidInteger(const String& str,
                             bool* is_positive,
                             const String** value);

  static bool IsIdentStartChar(int32_t c);
  static bool IsIdentChar(int32_t c);

 private:
  friend class ScanContext;

  static const int kNumLowercaseChars = 26;

  struct KeywordTable {
    Token::Kind kind;
    const char* keyword_chars;
    int keyword_len;
    const String* keyword_symbol;
  };

  // Rewind scanner position to token 0.
  void Reset();

  // Reads next lookahead character.
  void ReadChar();

  // Read and discard characters up to end of line.
  void SkipLine();

  // Recognizes token 'kind' and reads next character in input.
  void Recognize(Token::Kind kind) {
    ReadChar();
    current_token_.kind = kind;
  }

  int32_t LookaheadChar(int how_many);

  void ErrorMsg(const char* msg);

  // These functions return true if the given character is a letter,
  // a decimal digit, a hexadecimal digit, etc.
  static bool IsLetter(int32_t c);
  static bool IsDecimalDigit(int32_t c);
  static bool IsNumberStart(int32_t);
  static bool IsHexDigit(int32_t c);

  // Skips up to next non-whitespace character.
  void ConsumeWhiteSpace();

  // Skips characters up to end of line.
  void ConsumeLineComment();

  // Skips characters up to matching '*/'.
  void ConsumeBlockComment();

  // Is this scanner currently scanning a string literal.
  bool IsScanningString() const { return string_delimiter_ != '\0'; }
  void BeginStringLiteral(const char delimiter);
  void EndStringLiteral();

  // Is this scanner currently scanning a string interpolation expression.
  bool IsNestedContext() const { return saved_context_ != NULL; }
  void PushContext();
  void PopContext();

  // Starts reading a string literal.
  void ScanLiteralString(bool is_raw);

  // Read the characters of a string literal. Remove whitespace up to
  // and including the first newline character if remove_whitespace
  // is true.
  void ScanLiteralStringChars(bool is_raw, bool remove_whitespace);

  // Reads a fixed number of hexadecimal digits.
  bool ScanHexDigits(int digits, int32_t* value);

  // Reads a variable number of hexadecimal digits.
  bool ScanHexDigits(int min_digits, int max_digits, int32_t* value);

  // Reads an escaped code point from within a string literal.
  void ScanEscapedCodePoint(int32_t* escaped_char);

  // Reads identifier.
  void ScanIdentChars(bool allow_dollar);
  void ScanIdent() { ScanIdentChars(true); }
  void ScanIdentNoDollar() { ScanIdentChars(false); }

  // Reads a number literal.
  void ScanNumber(bool dec_point_seen);

  void ScanScriptTag();

  CharAtFunc CallCharAt() const { return char_at_func_; }

  Thread* thread() const { return thread_; }
  Zone* zone() const { return zone_; }

  TokenDescriptor current_token_;       // Current token.
  TokenDescriptor newline_token_;       // Newline token.
  TokenDescriptor empty_string_token_;  // Token for "".
  const String& source_;                // The source text being tokenized.
  intptr_t source_length_;              // The length of the source text.
  intptr_t lookahead_pos_;              // Position of lookahead character
                                        // within source_.
  intptr_t token_start_;                // Begin of current token in src_.
  int32_t c0_;                          // Lookahead character.
  bool newline_seen_;                   // Newline before current token.
  intptr_t prev_token_line_;            // Line number of the previous token.

  // The following fields keep track whether we are scanning a string literal
  // and its interpolated expressions.
  ScanContext* saved_context_;
  int32_t string_delimiter_;
  bool string_is_multiline_;
  int brace_level_;

  const String& private_key_;

  SourcePosition c0_pos_;  // Source position of lookahead character c0_.

  const CharAtFunc char_at_func_;

  Thread* thread_;
  Zone* zone_;

  static KeywordTable keywords_[Token::kNumKeywords];
  static int keywords_char_offset_[kNumLowercaseChars];
};

}  // namespace dart

#endif  // RUNTIME_VM_SCANNER_H_
