| // Copyright 2012 the V8 project authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef V8_REGEXP_REGEXP_BYTECODE_GENERATOR_H_ |
| #define V8_REGEXP_REGEXP_BYTECODE_GENERATOR_H_ |
| |
| #include "vm/regexp/label.h" |
| #include "vm/regexp/regexp-bytecodes.h" |
| #include "vm/regexp/regexp-macro-assembler.h" |
| |
| namespace dart { |
| |
| class RegExpBytecodeWriter { |
| public: |
| explicit RegExpBytecodeWriter(Zone* zone); |
| virtual ~RegExpBytecodeWriter() = default; |
| |
| // Helpers for peephole optimization. |
| template <typename T> |
| void OverwriteValue(T value, int absolute_offset); |
| // MUST start and end at a bytecode boundary. |
| void EmitRawBytecodeStream(const uint8_t* data, int length); |
| void EmitRawBytecodeStream(const RegExpBytecodeWriter* src_writer, |
| int src_offset, |
| int length); |
| void Finalize(RegExpBytecode bc); |
| |
| // Bytecode buffer access. |
| // TODO(jgruber): Remove access to details, at least the non-const accessors. |
| int pc() const { return pc_; } |
| ZoneVector<uint8_t>& buffer() { return buffer_; } |
| const ZoneVector<uint8_t>& buffer() const { return buffer_; } |
| |
| // Code and bitmap emission. |
| template <typename T> |
| inline void Emit(T value, int offset); |
| inline void EmitBytecode(RegExpBytecode bc); |
| |
| // Update bookkeeping at bytecode boundaries. |
| inline void ResetPc(int new_pc); |
| // Reset all state. |
| void Reset(); |
| |
| // Templated code emission. |
| template <RegExpBytecode bytecode, typename... Args> |
| void Emit(Args... args); |
| template <RegExpBytecodeOperandType OperandType, typename T> |
| void EmitOperand(T value, int offset); |
| template <RegExpBytecodeOperandType OperandType, typename T> |
| auto GetCheckedBasicOperandValue(T value); |
| |
| // Runtime versions. |
| template <typename T> |
| void EmitOperand(RegExpBytecodeOperandType type, T value, int offset); |
| |
| int length() const { return pc_; } |
| void CopyBufferTo(uint8_t* a) const; |
| |
| ZoneMap<int, int>& jump_edges() { return jump_edges_; } |
| const ZoneMap<int, int>& jump_edges() const { return jump_edges_; } |
| |
| void PatchJump(int target, int absolute_offset); |
| |
| #ifdef DEBUG |
| // Emit padding from start (inclusive) to end (exclusive) |
| inline void EmitPadding(int offset); |
| #define EMIT_PADDING(offset) EmitPadding(offset) |
| #else |
| #define EMIT_PADDING(offset) ((void)0) |
| #endif |
| |
| protected: |
| // The buffer into which code and relocation info are generated. |
| static constexpr int kInitialBufferSizeInBytes = 1 * KB; |
| static constexpr size_t kMaxBufferGrowthInBytes = 1 * MB; |
| ZoneVector<uint8_t> buffer_; |
| |
| // The program counter. Always points at the beginning of a bytecode while |
| // we generate the ByteArray. Points to the end when we are done. |
| int pc_; |
| |
| private: |
| // Stores jump edges emitted for the bytecode (used by |
| // RegExpBytecodePeepholeOptimization). |
| // Key: jump source (offset in buffer_ where jump destination is stored). |
| // Value: jump destination (offset in buffer_ to jump to). |
| ZoneMap<int, int> jump_edges_; |
| |
| #ifdef DEBUG |
| // End of the bytecode we are currently emitting (exclusive). Absolute value |
| // greater than `pc_`. |
| int end_of_bc_; |
| // Position (absolute) within the current bytecode. This value is updated with |
| // every operand and is guaranteed to be between `pc_` and `end_of_bc_`. |
| int pc_within_bc_; |
| #endif |
| |
| // TODO(jgruber): Reasonable protected/private organisation once the dust has |
| // settled. |
| inline void EnsureCapacity(size_t size); |
| void ExpandBuffer(size_t new_size); |
| }; |
| |
| // An assembler/generator for the Irregexp byte code. |
| class RegExpBytecodeGenerator : public RegExpMacroAssembler, |
| public RegExpBytecodeWriter { |
| public: |
| // Create an assembler. Instructions and relocation information are emitted |
| // into a buffer, with the instructions starting from the beginning and the |
| // relocation information starting from the end of the buffer. See CodeDesc |
| // for a detailed comment on the layout (globals.h). |
| // |
| // The assembler allocates and grows its own buffer, and buffer_size |
| // determines the initial buffer size. The buffer is owned by the assembler |
| // and deallocated upon destruction of the assembler. |
| RegExpBytecodeGenerator(Isolate* isolate, Zone* zone, Mode mode); |
| ~RegExpBytecodeGenerator() override; |
| void Bind(V8Label* label) override; |
| void AdvanceCurrentPosition(int by) override; // Signed cp change. |
| void PopCurrentPosition() override; |
| void PushCurrentPosition() override; |
| void Backtrack() override; |
| void GoTo(V8Label* label) override; |
| void PushBacktrack(V8Label* label) override; |
| bool Succeed() override; |
| void Fail() override; |
| void PopRegister(int register_index) override; |
| void PushRegister(int register_index, |
| StackCheckFlag check_stack_limit) override; |
| void AdvanceRegister(int register_index, int by) override; // r[reg] += by. |
| void SetCurrentPositionFromEnd(int by) override; |
| void SetRegister(int register_index, int to) override; |
| void WriteCurrentPositionToRegister(int register_index, |
| int cp_offset) override; |
| void ClearRegisters(int reg_from, int reg_to) override; |
| void ReadCurrentPositionFromRegister(int reg) override; |
| void WriteStackPointerToRegister(int register_index) override; |
| void ReadStackPointerFromRegister(int register_index) override; |
| void CheckPosition(int cp_offset, V8Label* on_outside_input) override; |
| void CheckSpecialClassRanges(StandardCharacterSet type, |
| V8Label* on_no_match) override; |
| void LoadCurrentCharacterImpl(int cp_offset, |
| V8Label* on_end_of_input, |
| bool check_bounds, |
| int characters, |
| int eats_at_least) override; |
| void CheckCharacter(unsigned c, V8Label* on_equal) override; |
| void CheckCharacterAfterAnd(unsigned c, |
| unsigned mask, |
| V8Label* on_equal) override; |
| void CheckCharacterGT(uint16_t limit, V8Label* on_greater) override; |
| void CheckCharacterLT(uint16_t limit, V8Label* on_less) override; |
| void CheckFixedLengthLoop(V8Label* on_tos_equals_current_position) override; |
| void CheckAtStart(int cp_offset, V8Label* on_at_start) override; |
| void CheckNotAtStart(int cp_offset, V8Label* on_not_at_start) override; |
| void CheckNotCharacter(unsigned c, V8Label* on_not_equal) override; |
| void CheckNotCharacterAfterAnd(unsigned c, |
| unsigned mask, |
| V8Label* on_not_equal) override; |
| void CheckNotCharacterAfterMinusAnd(uint16_t c, |
| uint16_t minus, |
| uint16_t mask, |
| V8Label* on_not_equal) override; |
| void CheckCharacterInRange(uint16_t from, |
| uint16_t to, |
| V8Label* on_in_range) override; |
| void CheckCharacterNotInRange(uint16_t from, |
| uint16_t to, |
| V8Label* on_not_in_range) override; |
| bool CheckCharacterInRangeArray(const ZoneList<CharacterRange>* ranges, |
| V8Label* on_in_range) override { |
| // Disabled in the interpreter, because 1) there is no constant pool that |
| // could store the ByteArray pointer, 2) bytecode size limits are not as |
| // restrictive as code (e.g. branch distances on arm), 3) bytecode for |
| // large character classes is already quite compact. |
| // TODO(jgruber): Consider using BytecodeArrays (with a constant pool) |
| // instead of plain ByteArrays; then we could implement |
| // CheckCharacterInRangeArray in the interpreter. |
| return false; |
| } |
| bool CheckCharacterNotInRangeArray(const ZoneList<CharacterRange>* ranges, |
| V8Label* on_not_in_range) override { |
| return false; |
| } |
| void CheckBitInTable(const TypedData& table, V8Label* on_bit_set) override; |
| void SkipUntilBitInTable(int cp_offset, |
| const TypedData& table, |
| const TypedData& nibble_table, |
| int advance_by, |
| V8Label* on_match, |
| V8Label* on_no_match) override; |
| void SkipUntilCharAnd(int cp_offset, |
| int advance_by, |
| unsigned character, |
| unsigned mask, |
| int eats_at_least, |
| V8Label* on_match, |
| V8Label* on_no_match) override; |
| void SkipUntilChar(int cp_offset, |
| int advance_by, |
| unsigned character, |
| V8Label* on_match, |
| V8Label* on_no_match) override; |
| void SkipUntilCharPosChecked(int cp_offset, |
| int advance_by, |
| unsigned character, |
| int eats_at_least, |
| V8Label* on_match, |
| V8Label* on_no_match) override; |
| void SkipUntilCharOrChar(int cp_offset, |
| int advance_by, |
| unsigned char1, |
| unsigned char2, |
| V8Label* on_match, |
| V8Label* on_no_match) override; |
| void SkipUntilGtOrNotBitInTable(int cp_offset, |
| int advance_by, |
| unsigned character, |
| const TypedData& table, |
| V8Label* on_match, |
| V8Label* on_no_match) override; |
| void SkipUntilOneOfMasked(int cp_offset, |
| int advance_by, |
| unsigned both_chars, |
| unsigned both_mask, |
| int max_offset, |
| unsigned chars1, |
| unsigned mask1, |
| unsigned chars2, |
| unsigned mask2, |
| V8Label* on_match1, |
| V8Label* on_match2, |
| V8Label* on_failure) override; |
| void SkipUntilOneOfMasked3(const SkipUntilOneOfMasked3Args& args) override; |
| void CheckNotBackReference(int start_reg, |
| bool read_backward, |
| V8Label* on_no_match) override; |
| void CheckNotBackReferenceIgnoreCase(int start_reg, |
| bool read_backward, |
| bool unicode, |
| V8Label* on_no_match) override; |
| void IfRegisterLT(int register_index, |
| int comparand, |
| V8Label* on_less_than) override; |
| void IfRegisterGE(int register_index, |
| int comparand, |
| V8Label* on_greater_or_equal) override; |
| void IfRegisterEqPos(int register_index, V8Label* on_equal) override; |
| void RecordComment(std::string_view comment) override {} |
| // MacroAssembler* masm() override { return nullptr; } |
| |
| IrregexpImplementation Implementation() override; |
| ObjectPtr GetCode(const String& source, RegExpFlags flags) override; |
| |
| private: |
| template <RegExpBytecode bytecode, typename... Args> |
| void Emit(Args... args); |
| using RegExpBytecodeWriter::Emit; |
| |
| void EmitSkipTable(const TypedData& table); |
| |
| V8Label backtrack_; |
| |
| Isolate* isolate_; |
| |
| DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpBytecodeGenerator); |
| }; |
| |
| } // namespace dart |
| |
| #endif // V8_REGEXP_REGEXP_BYTECODE_GENERATOR_H_ |