| // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| #ifndef VM_REGEXP_PARSER_H_ |
| #define VM_REGEXP_PARSER_H_ |
| |
| // SNIP |
| |
| namespace dart { |
| |
| // SNIP |
| |
| // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |
| class RegExpBuilder: public ZoneObject { |
| public: |
| explicit RegExpBuilder(Zone* zone); |
| void AddCharacter(uc16 character); |
| // "Adds" an empty expression. Does nothing except consume a |
| // following quantifier |
| void AddEmpty(); |
| void AddAtom(RegExpTree* tree); |
| void AddAssertion(RegExpTree* tree); |
| void NewAlternative(); // '|' |
| void AddQuantifierToAtom( |
| int min, int max, RegExpQuantifier::QuantifierType type); |
| RegExpTree* ToRegExp(); |
| |
| private: |
| void FlushCharacters(); |
| void FlushText(); |
| void FlushTerms(); |
| Zone* zone() const { return zone_; } |
| |
| Zone* zone_; |
| bool pending_empty_; |
| ZoneList<uc16>* characters_; |
| BufferedZoneList<RegExpTree, 2> terms_; |
| BufferedZoneList<RegExpTree, 2> text_; |
| BufferedZoneList<RegExpTree, 2> alternatives_; |
| #ifdef DEBUG |
| enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; |
| #define LAST(x) last_added_ = x; |
| #else |
| #define LAST(x) |
| #endif |
| }; |
| |
| |
| class RegExpParser BASE_EMBEDDED { |
| public: |
| RegExpParser(FlatStringReader* in, |
| Handle<String>* error, |
| bool multiline_mode, |
| Zone* zone); |
| |
| static bool ParseRegExp(FlatStringReader* input, |
| bool multiline, |
| RegExpCompileData* result, |
| Zone* zone); |
| |
| RegExpTree* ParsePattern(); |
| RegExpTree* ParseDisjunction(); |
| RegExpTree* ParseGroup(); |
| RegExpTree* ParseCharacterClass(); |
| |
| // Parses a {...,...} quantifier and stores the range in the given |
| // out parameters. |
| bool ParseIntervalQuantifier(int* min_out, int* max_out); |
| |
| // Parses and returns a single escaped character. The character |
| // must not be 'b' or 'B' since they are usually handle specially. |
| uc32 ParseClassCharacterEscape(); |
| |
| // Checks whether the following is a length-digit hexadecimal number, |
| // and sets the value if it is. |
| bool ParseHexEscape(int length, uc32* value); |
| |
| uc32 ParseOctalLiteral(); |
| |
| // Tries to parse the input as a back reference. If successful it |
| // stores the result in the output parameter and returns true. If |
| // it fails it will push back the characters read so the same characters |
| // can be reparsed. |
| bool ParseBackReferenceIndex(int* index_out); |
| |
| CharacterRange ParseClassAtom(uc16* char_class); |
| RegExpTree* ReportError(Vector<const char> message); |
| void Advance(); |
| void Advance(int dist); |
| void Reset(int pos); |
| |
| // Reports whether the pattern might be used as a literal search string. |
| // Only use if the result of the parse is a single atom node. |
| bool simple(); |
| bool contains_anchor() { return contains_anchor_; } |
| void set_contains_anchor() { contains_anchor_ = true; } |
| int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } |
| int position() { return next_pos_ - 1; } |
| bool failed() { return failed_; } |
| |
| static const int kMaxCaptures = 1 << 16; |
| static const uc32 kEndMarker = (1 << 21); |
| |
| private: |
| enum SubexpressionType { |
| INITIAL, |
| CAPTURE, // All positive values represent captures. |
| POSITIVE_LOOKAHEAD, |
| NEGATIVE_LOOKAHEAD, |
| GROUPING |
| }; |
| |
| class RegExpParserState : public ZoneObject { |
| public: |
| RegExpParserState(RegExpParserState* previous_state, |
| SubexpressionType group_type, |
| int disjunction_capture_index, |
| Zone* zone) |
| : previous_state_(previous_state), |
| builder_(new(zone) RegExpBuilder(zone)), |
| group_type_(group_type), |
| disjunction_capture_index_(disjunction_capture_index) {} |
| // Parser state of containing expression, if any. |
| RegExpParserState* previous_state() { return previous_state_; } |
| bool IsSubexpression() { return previous_state_ != NULL; } |
| // RegExpBuilder building this regexp's AST. |
| RegExpBuilder* builder() { return builder_; } |
| // Type of regexp being parsed (parenthesized group or entire regexp). |
| SubexpressionType group_type() { return group_type_; } |
| // Index in captures array of first capture in this sub-expression, if any. |
| // Also the capture index of this sub-expression itself, if group_type |
| // is CAPTURE. |
| int capture_index() { return disjunction_capture_index_; } |
| |
| private: |
| // Linked list implementation of stack of states. |
| RegExpParserState* previous_state_; |
| // Builder for the stored disjunction. |
| RegExpBuilder* builder_; |
| // Stored disjunction type (capture, look-ahead or grouping), if any. |
| SubexpressionType group_type_; |
| // Stored disjunction's capture index (if any). |
| int disjunction_capture_index_; |
| }; |
| |
| Isolate* isolate() { return isolate_; } |
| Zone* zone() const { return zone_; } |
| |
| uc32 current() { return current_; } |
| bool has_more() { return has_more_; } |
| bool has_next() { return next_pos_ < in()->length(); } |
| uc32 Next(); |
| FlatStringReader* in() { return in_; } |
| void ScanForCaptures(); |
| |
| Isolate* isolate_; |
| Zone* zone_; |
| Handle<String>* error_; |
| ZoneList<RegExpCapture*>* captures_; |
| FlatStringReader* in_; |
| uc32 current_; |
| int next_pos_; |
| // The capture count is only valid after we have scanned for captures. |
| int capture_count_; |
| bool has_more_; |
| bool multiline_; |
| bool simple_; |
| bool contains_anchor_; |
| bool is_scanned_for_captures_; |
| bool failed_; |
| }; |
| |
| // SNIP |
| |
| } // namespace dart |
| |
| #endif // VM_REGEXP_PARSER_H_ |