blob: 40b8ab8954050c46c528dacc9ec1b96daa2a74ca [file] [edit]
// Copyright 2025 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_REGEXP_REGEXP_BYTECODES_INL_H_
#define V8_REGEXP_REGEXP_BYTECODES_INL_H_
#include "vm/regexp/regexp-bytecodes.h"
// Include the non-inl header before the rest of the headers.
#include <array>
#include <limits>
#include <string_view>
#include <type_traits>
#include "vm/regexp/regexp-macro-assembler.h" // For StackCheckFlag
namespace dart {
template <RegExpBytecodeOperandType>
struct RegExpOperandTypeTraits;
#define DECLARE_BASIC_OPERAND_TYPE_TRAITS(Name, CType) \
template <> \
struct RegExpOperandTypeTraits<RegExpBytecodeOperandType::k##Name> { \
static_assert(!std::is_pointer_v<CType>); \
static constexpr uint8_t kSize = sizeof(CType); \
using kCType = CType; \
static constexpr bool kIsBasic = true; \
static constexpr kCType kMinValue = std::numeric_limits<kCType>::min(); \
static constexpr kCType kMaxValue = std::numeric_limits<kCType>::max(); \
static constexpr size_t kAlignment = kSize; \
};
BASIC_BYTECODE_OPERAND_TYPE_LIST(DECLARE_BASIC_OPERAND_TYPE_TRAITS)
#undef DECLARE_OPERAND_TYPE_TRAITS
#define DECLARE_BASIC_OPERAND_TYPE_LIMITS_TRAITS(Name, CType, MinValue, \
MaxValue) \
template <> \
struct RegExpOperandTypeTraits<RegExpBytecodeOperandType::k##Name> { \
static_assert(!std::is_pointer_v<CType>); \
static constexpr uint8_t kSize = sizeof(CType); \
using kCType = CType; \
static constexpr bool kIsBasic = true; \
static_assert(std::is_enum_v<kCType> || \
MinValue >= std::numeric_limits<kCType>::min()); \
static_assert(std::is_enum_v<kCType> || \
MaxValue <= std::numeric_limits<kCType>::max()); \
static constexpr kCType kMinValue = MinValue; \
static constexpr kCType kMaxValue = MaxValue; \
static constexpr size_t kAlignment = kSize; \
};
BASIC_BYTECODE_OPERAND_TYPE_LIMITS_LIST(
DECLARE_BASIC_OPERAND_TYPE_LIMITS_TRAITS)
#undef DECLARE_OPERAND_TYPE_LIMITS_TRAITS
#define DECLARE_SPECIAL_OPERAND_TYPE_TRAITS(Name, Size, Alignment) \
template <> \
struct RegExpOperandTypeTraits<RegExpBytecodeOperandType::k##Name> { \
static constexpr uint8_t kSize = Size; \
static constexpr bool kIsBasic = false; \
static constexpr size_t kAlignment = Alignment; \
static_assert(Utils::IsAligned(kSize, kAlignment)); \
};
SPECIAL_BYTECODE_OPERAND_TYPE_LIST(DECLARE_SPECIAL_OPERAND_TYPE_TRAITS)
#undef DECLARE_OPERAND_TYPE_TRAITS
namespace detail {
template <auto... Args>
constexpr int CountOf() {
return sizeof...(Args);
}
template <size_t N>
consteval std::array<std::string_view, N> SplitNames(const char* raw_names) {
std::array<std::string_view, N> result;
std::string_view names(raw_names);
// Remove '(' and ')'.
DCHECK_EQ(names.front(), '(');
DCHECK_EQ(names.back(), ')');
size_t start = 1;
size_t names_size = names.size() - 1;
for (size_t i = 0; i < N; ++i) {
size_t comma = names.find(',', start);
// DCHECK_EQ(i == N - 1, comma == std::string_view::npos);
// Trim whitespace.
start = names.find_first_not_of(" ", start);
size_t end = (comma == std::string_view::npos) ? names_size : comma;
end = names.find_last_not_of(" ,)", end) + 1;
result[i] = names.substr(start, end - start);
start = comma + 1;
}
return result;
}
// Calculates packed offsets for each Bytecode operand.
// All operands are aligned to their own size.
template <RegExpBytecodeOperandType... operand_types>
consteval auto CalculateAlignedOffsets() {
constexpr int N = sizeof...(operand_types);
constexpr std::array<uint8_t, N> kOperandSizes = {
RegExpOperandTypeTraits<operand_types>::kSize...};
constexpr std::array<uint8_t, N> kOperandAlignments = {
RegExpOperandTypeTraits<operand_types>::kAlignment...};
std::array<int, N> offsets{};
int first_offset = sizeof(RegExpBytecode);
int offset = first_offset;
for (size_t i = 0; i < N; ++i) {
uint8_t operand_size = kOperandSizes[i];
size_t operand_alignment = kOperandAlignments[i];
offset = Utils::RoundUp(offset, operand_alignment);
// If the operand doesn't fit into the current 4-byte block, start a new
// 4-byte block.
if ((offset % kBytecodeAlignment) + operand_size > kBytecodeAlignment) {
offset = Utils::RoundUp(offset, kBytecodeAlignment);
}
offsets[i] = offset;
offset += operand_size;
}
return offsets;
}
template <RegExpBytecodeOperandType... ops>
struct RegExpBytecodeOperandsTraits {
static constexpr int kOperandCount = sizeof...(ops);
static constexpr std::array<RegExpBytecodeOperandType, kOperandCount>
kOperandTypes = {ops...};
static constexpr std::array<uint8_t, kOperandCount> kOperandSizes = {
RegExpOperandTypeTraits<ops>::kSize...};
static constexpr std::array<uint8_t, kOperandCount> kOperandAlignments = {
RegExpOperandTypeTraits<ops>::kAlignment...};
static constexpr std::array<int, kOperandCount> kOperandOffsets =
CalculateAlignedOffsets<ops...>();
static constexpr int kSize = Utils::RoundUp(
kOperandCount == 0 ? sizeof(RegExpBytecode)
: kOperandOffsets.back() + kOperandSizes.back(),
kBytecodeAlignment);
};
template <RegExpBytecode bc>
struct RegExpBytecodeOperandNames;
#define DECLARE_OPERAND_NAMES(CamelName, OpNames, OpTypes) \
template <> \
struct RegExpBytecodeOperandNames<RegExpBytecode::k##CamelName> { \
enum class Operand { UNPAREN(OpNames) }; \
using enum Operand; \
static constexpr size_t kCount = detail::CountOf<UNPAREN(OpNames)>(); \
static constexpr auto kNames = detail::SplitNames<kCount>(#OpNames); \
static /*constexpr*/ std::string_view Name(Operand op) { \
return kNames[static_cast<size_t>(op)]; \
} \
};
REGEXP_BYTECODE_LIST(DECLARE_OPERAND_NAMES)
#undef DECLARE_OPERAND_NAMES
template <RegExpBytecode bc, RegExpBytecodeOperandType... OpTypes>
class RegExpBytecodeOperandsBase {
public:
static constexpr RegExpBytecode kBytecode = bc;
using Operand = RegExpBytecodeOperandNames<bc>::Operand;
using Traits = RegExpBytecodeOperandsTraits<OpTypes...>;
static constexpr int kCount = Traits::kOperandCount;
static constexpr int kTotalSize = Traits::kSize;
static constexpr int Index(Operand op) { return static_cast<uint8_t>(op); }
static constexpr int Size(Operand op) {
return Traits::kOperandSizes[Index(op)];
}
static constexpr int Offset(Operand op) {
return Traits::kOperandOffsets[Index(op)];
}
static constexpr RegExpBytecodeOperandType Type(Operand op) {
return Traits::kOperandTypes[Index(op)];
}
static constexpr std::string_view Name(Operand op) {
return RegExpBytecodeOperandNames<bc>::Name(op);
}
// Returns a tuple of all operands.
static consteval auto GetOperandsTuple() {
return []<size_t... Is>(std::index_sequence<Is...>) {
return std::tuple_cat([]<size_t I>() {
constexpr auto id = static_cast<Operand>(I);
return std::tuple(std::integral_constant<Operand, id>{});
}.template operator()<Is>()...);
}(std::make_index_sequence<kCount>{});
}
// Calls |f| templatized by Operand for each Operand in the Operands list.
// Example:
// using Operands = RegExpBytecodeOperands<RegExpBytecode::...>;
// size_t op_sizes = 0;
// Operands::ForEachOperand([]<auto op>() {
// op_sizes += Operands::Size(op);
// });
// Note that this gets evaluated at compile time, so op_sizes in the example
// above is essentially a constant.
template <typename Func>
static constexpr void ForEachOperand(Func&& f) {
constexpr auto filtered_ops = GetOperandsTuple();
std::apply([&](auto... ops) { (..., f.template operator()<ops.value>()); },
filtered_ops);
}
// Similar to ForEachOperand, but additionally provides the current index as
// a template argument. The index is a sequential index of operands.
template <typename Func>
static constexpr void ForEachOperandWithIndex(Func&& f) {
constexpr auto filtered_ops = GetOperandsTuple();
[&]<size_t... I>(std::index_sequence<I...>) {
(...,
f.template operator()<
std::tuple_element_t<I, decltype(filtered_ops)>::value /* Operand */,
I /* Index */>());
}(std::make_index_sequence<std::tuple_size_v<decltype(filtered_ops)>>{});
}
// Similar to above, but calls |f| only for operands of a given type.
template <RegExpBytecodeOperandType OpType, typename Func>
static constexpr void ForEachOperandOfType(Func&& f) {
ForEachOperand([&]<auto operand>() {
if constexpr (Type(operand) == OpType) {
f.template operator()<operand>();
}
});
}
public:
template <Operand op>
requires(RegExpOperandTypeTraits<Type(op)>::kIsBasic)
static auto Get(const uint8_t* pc, const DisallowGarbageCollection& no_gc) {
DCHECK_EQ(RegExpBytecodes::FromPtr(pc), bc);
constexpr RegExpBytecodeOperandType OperandType = Type(op);
constexpr int offset = Offset(op);
using CType = RegExpOperandTypeTraits<OperandType>::kCType;
ASSERT(Utils::IsAligned(offset, sizeof(CType)));
return *reinterpret_cast<const CType*>(pc + offset);
}
template <Operand op>
requires(RegExpOperandTypeTraits<Type(op)>::kIsBasic)
static auto Get(const TypedData& bytecode, int offset, Zone* zone) {
// Basic operand types won't allocate, so we can always fallback to the
// GC-unsafe version.
DisallowGarbageCollection no_gc;
//return Get<op>(bytecode->begin() + offset);
return Get<op>((uint8_t*)bytecode.DataAddr(offset), no_gc);
}
template <Operand op>
requires(Type(op) == RegExpBytecodeOperandType::kBitTable)
static auto Get(const uint8_t* pc, DisallowGarbageCollection no_gc) {
static_assert(Size(op) == RegExpMacroAssembler::kTableSize / kBitsPerByte);
DCHECK_EQ(RegExpBytecodes::FromPtr(pc), bc);
constexpr int offset = Offset(op);
return pc + offset;
}
template <Operand op>
requires(Type(op) == RegExpBytecodeOperandType::kBitTable)
static auto Get(const TypedData& bytecode, int offset, Zone* zone) {
static_assert(Size(op) == RegExpMacroAssembler::kTableSize / kBitsPerByte);
// DCHECK_EQ(RegExpBytecodes::FromPtr(bytecode->begin() + offset), bc);
constexpr int op_offset = Offset(op);
const uint8_t* start = (uint8_t*)bytecode.DataAddr(0) + offset + op_offset;
const uint8_t* end = start + Size(op);
return ZoneVector<uint8_t>(start, end, zone);
}
};
} // namespace detail
#define PACK_OPTIONAL(x, ...) x __VA_OPT__(, ) __VA_ARGS__
#define DECLARE_OPERANDS(CamelName, OpNames, OpTypes) \
template <> \
class RegExpBytecodeOperands<RegExpBytecode::k##CamelName> final \
: public detail::RegExpBytecodeOperandsBase<PACK_OPTIONAL( \
RegExpBytecode::k##CamelName, UNPAREN(OpTypes))>, \
public AllStatic { \
public: \
enum class Operand { UNPAREN(OpNames) }; \
using enum Operand; \
};
REGEXP_BYTECODE_LIST(DECLARE_OPERANDS)
#undef DECLARE_OPERANDS
namespace detail {
#define DECLARE_BYTECODE_NAMES(CamelName, ...) #CamelName,
static constexpr const char* kBytecodeNames[] = {
REGEXP_BYTECODE_LIST(DECLARE_BYTECODE_NAMES)};
#undef DECLARE_BYTECODE_NAMES
#define DECLARE_BYTECODE_SIZES(CamelName, ...) \
RegExpBytecodeOperands<RegExpBytecode::k##CamelName>::kTotalSize,
static constexpr uint8_t kBytecodeSizes[] = {
REGEXP_BYTECODE_LIST(DECLARE_BYTECODE_SIZES)};
#undef DECLARE_BYTECODE_SIZES
#define DECLARE_OPERAND_TYPE_SIZE(Name, ...) \
RegExpOperandTypeTraits<RegExpBytecodeOperandType::k##Name>::kSize,
static constexpr uint8_t kOperandTypeSizes[] = {
BYTECODE_OPERAND_TYPE_LIST(DECLARE_OPERAND_TYPE_SIZE)};
#undef DECLARE_OPERAND_TYPE_SIZE
} // namespace detail
// static
template <typename Func>
decltype(auto) RegExpBytecodes::DispatchOnBytecode(RegExpBytecode bytecode,
Func&& f) {
switch (bytecode) {
#define CASE(CamelName, ...) \
case RegExpBytecode::k##CamelName: \
return f.template operator()<RegExpBytecode::k##CamelName>();
REGEXP_BYTECODE_LIST(CASE)
#undef CASE
}
UNREACHABLE();
}
// static
constexpr const char* RegExpBytecodes::Name(RegExpBytecode bytecode) {
return Name(ToByte(bytecode));
}
// static
constexpr const char* RegExpBytecodes::Name(uint8_t bytecode) {
DCHECK_LT(bytecode, kCount);
return detail::kBytecodeNames[bytecode];
}
// static
constexpr uint8_t RegExpBytecodes::Size(RegExpBytecode bytecode) {
return Size(ToByte(bytecode));
}
// static
constexpr uint8_t RegExpBytecodes::Size(uint8_t bytecode) {
DCHECK_LT(bytecode, kCount);
return detail::kBytecodeSizes[bytecode];
}
// static
constexpr uint8_t RegExpBytecodes::Size(RegExpBytecodeOperandType type) {
return detail::kOperandTypeSizes[static_cast<int>(type)];
}
} // namespace dart
#endif // V8_REGEXP_REGEXP_BYTECODES_INL_H_