blob: e984c1bd16bc2fa2b8d5bbb1583791c8567f4f83 [file] [log] [blame]
// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
#include "vm/symbols.h"
#include "vm/handles.h"
#include "vm/handles_impl.h"
#include "vm/hash_table.h"
#include "vm/isolate.h"
#include "vm/object.h"
#include "vm/object_store.h"
#include "vm/raw_object.h"
#include "vm/snapshot_ids.h"
#include "vm/unicode.h"
#include "vm/visitor.h"
namespace dart {
RawString* Symbols::predefined_[Symbols::kNumberOfOneCharCodeSymbols];
String* Symbols::symbol_handles_[Symbols::kMaxPredefinedId];
static const char* names[] = {
NULL,
#define DEFINE_SYMBOL_LITERAL(symbol, literal) \
literal,
PREDEFINED_SYMBOLS_LIST(DEFINE_SYMBOL_LITERAL)
#undef DEFINE_SYMBOL_LITERAL
"", // matches kKwTableStart.
#define DEFINE_KEYWORD_SYMBOL_INDEX(t, s, p, a) \
s,
DART_KEYWORD_LIST(DEFINE_KEYWORD_SYMBOL_INDEX)
#undef DEFINE_KEYWORD_SYMBOL_INDEX
};
DEFINE_FLAG(bool, dump_symbol_stats, false, "Dump symbol table statistics");
RawString* StringFrom(const uint8_t* data, intptr_t len, Heap::Space space) {
return String::FromLatin1(data, len, space);
}
RawString* StringFrom(const uint16_t* data, intptr_t len, Heap::Space space) {
return String::FromUTF16(data, len, space);
}
RawString* StringFrom(const int32_t* data, intptr_t len, Heap::Space space) {
return String::FromUTF32(data, len, space);
}
template<typename CharType>
class CharArray {
public:
CharArray(const CharType* data, intptr_t len)
: data_(data), len_(len) {
hash_ = String::Hash(data, len);
}
RawString* ToSymbol() const {
String& result = String::Handle(StringFrom(data_, len_, Heap::kOld));
result.SetCanonical();
result.SetHash(hash_);
return result.raw();
}
bool Equals(const String& other) const {
return other.Equals(data_, len_);
}
intptr_t Hash() const { return hash_; }
private:
const CharType* data_;
intptr_t len_;
intptr_t hash_;
};
typedef CharArray<uint8_t> Latin1Array;
typedef CharArray<uint16_t> UTF16Array;
typedef CharArray<int32_t> UTF32Array;
class StringSlice {
public:
StringSlice(const String& str, intptr_t begin_index, intptr_t length)
: str_(str), begin_index_(begin_index), len_(length) {
hash_ = is_all() ? str.Hash() : String::Hash(str, begin_index, length);
}
RawString* ToSymbol() const;
bool Equals(const String& other) const {
return other.Equals(str_, begin_index_, len_);
}
intptr_t Hash() const { return hash_; }
private:
bool is_all() const { return begin_index_ == 0 && len_ == str_.Length(); }
const String& str_;
intptr_t begin_index_;
intptr_t len_;
intptr_t hash_;
};
RawString* StringSlice::ToSymbol() const {
if (is_all() && str_.IsOld()) {
str_.SetCanonical();
return str_.raw();
} else {
String& result = String::Handle(
String::SubString(str_, begin_index_, len_, Heap::kOld));
result.SetCanonical();
result.SetHash(hash_);
return result.raw();
}
}
class ConcatString {
public:
ConcatString(const String& str1, const String& str2)
: str1_(str1), str2_(str2), hash_(String::HashConcat(str1, str2)) {}
RawString* ToSymbol() const;
bool Equals(const String& other) const {
return other.EqualsConcat(str1_, str2_);
}
intptr_t Hash() const { return hash_; }
private:
const String& str1_;
const String& str2_;
intptr_t hash_;
};
RawString* ConcatString::ToSymbol() const {
String& result = String::Handle(String::Concat(str1_, str2_, Heap::kOld));
result.SetCanonical();
result.SetHash(hash_);
return result.raw();
}
class SymbolTraits {
public:
static bool IsMatch(const Object& a, const Object& b) {
return String::Cast(a).Equals(String::Cast(b));
}
template<typename CharType>
static bool IsMatch(const CharArray<CharType>& array, const Object& obj) {
return array.Equals(String::Cast(obj));
}
static bool IsMatch(const StringSlice& slice, const Object& obj) {
return slice.Equals(String::Cast(obj));
}
static bool IsMatch(const ConcatString& concat, const Object& obj) {
return concat.Equals(String::Cast(obj));
}
static uword Hash(const Object& key) {
return String::Cast(key).Hash();
}
template<typename CharType>
static uword Hash(const CharArray<CharType>& array) {
return array.Hash();
}
static uword Hash(const StringSlice& slice) {
return slice.Hash();
}
static uword Hash(const ConcatString& concat) {
return concat.Hash();
}
template<typename CharType>
static RawObject* NewKey(const CharArray<CharType>& array) {
return array.ToSymbol();
}
static RawObject* NewKey(const StringSlice& slice) {
return slice.ToSymbol();
}
static RawObject* NewKey(const ConcatString& concat) {
return concat.ToSymbol();
}
};
typedef UnorderedHashSet<SymbolTraits> SymbolTable;
const char* Symbols::Name(SymbolId symbol) {
ASSERT((symbol > kIllegal) && (symbol < kNullCharId));
return names[symbol];
}
const String& Symbols::Keyword(Token::Kind keyword) {
const int kw_index = keyword - Token::kFirstKeyword;
ASSERT((0 <= kw_index) && (kw_index < Token::kNumKeywords));
// First keyword symbol is in symbol_handles_[kKwTableStart + 1].
const intptr_t keyword_id = Symbols::kKwTableStart + 1 + kw_index;
ASSERT(symbol_handles_[keyword_id] != NULL);
return *symbol_handles_[keyword_id];
}
void Symbols::InitOnce(Isolate* vm_isolate) {
// Should only be run by the vm isolate.
ASSERT(Isolate::Current() == Dart::vm_isolate());
ASSERT(vm_isolate == Dart::vm_isolate());
Zone* zone = Thread::Current()->zone();
// Create and setup a symbol table in the vm isolate.
SetupSymbolTable(vm_isolate);
// Create all predefined symbols.
ASSERT((sizeof(names) / sizeof(const char*)) == Symbols::kNullCharId);
SymbolTable table(zone, vm_isolate->object_store()->symbol_table());
// First set up all the predefined string symbols.
// Create symbols for language keywords. Some keywords are equal to
// symbols we already created, so use New() instead of Add() to ensure
// that the symbols are canonicalized.
for (intptr_t i = 1; i < Symbols::kNullCharId; i++) {
String* str = String::ReadOnlyHandle();
*str = OneByteString::New(names[i], Heap::kOld);
str->Hash();
str->SetCanonical();
bool present = table.Insert(*str);
ASSERT(!present);
symbol_handles_[i] = str;
}
// Add Latin1 characters as Symbols, so that Symbols::FromCharCode is fast.
for (intptr_t c = 0; c < kNumberOfOneCharCodeSymbols; c++) {
intptr_t idx = (kNullCharId + c);
ASSERT(idx < kMaxPredefinedId);
ASSERT(Utf::IsLatin1(c));
uint8_t ch = static_cast<uint8_t>(c);
String* str = String::ReadOnlyHandle();
*str = OneByteString::New(&ch, 1, Heap::kOld);
str->Hash();
str->SetCanonical();
bool present = table.Insert(*str);
ASSERT(!present);
predefined_[c] = str->raw();
symbol_handles_[idx] = str;
}
vm_isolate->object_store()->set_symbol_table(table.Release());
}
void Symbols::InitOnceFromSnapshot(Isolate* vm_isolate) {
// Should only be run by the vm isolate.
ASSERT(Isolate::Current() == Dart::vm_isolate());
ASSERT(vm_isolate == Dart::vm_isolate());
Zone* zone = Thread::Current()->zone();
SymbolTable table(zone, vm_isolate->object_store()->symbol_table());
// Lookup all the predefined string symbols and language keyword symbols
// and cache them in the read only handles for fast access.
for (intptr_t i = 1; i < Symbols::kNullCharId; i++) {
String* str = String::ReadOnlyHandle();
const unsigned char* name =
reinterpret_cast<const unsigned char*>(names[i]);
*str ^= table.GetOrNull(Latin1Array(name, strlen(names[i])));
ASSERT(!str->IsNull());
ASSERT(str->HasHash());
ASSERT(str->IsCanonical());
symbol_handles_[i] = str;
}
// Lookup Latin1 character Symbols and cache them in read only handles,
// so that Symbols::FromCharCode is fast.
for (intptr_t c = 0; c < kNumberOfOneCharCodeSymbols; c++) {
intptr_t idx = (kNullCharId + c);
ASSERT(idx < kMaxPredefinedId);
ASSERT(Utf::IsLatin1(c));
uint8_t ch = static_cast<uint8_t>(c);
String* str = String::ReadOnlyHandle();
*str ^= table.GetOrNull(Latin1Array(&ch, 1));
ASSERT(!str->IsNull());
ASSERT(str->HasHash());
ASSERT(str->IsCanonical());
predefined_[c] = str->raw();
symbol_handles_[idx] = str;
}
vm_isolate->object_store()->set_symbol_table(table.Release());
}
void Symbols::AddPredefinedSymbolsToIsolate() {
// Should only be run by regular Dart isolates.
Thread* thread = Thread::Current();
Isolate* isolate = thread->isolate();
Zone* zone = thread->zone();
ASSERT(isolate != Dart::vm_isolate());
String& str = String::Handle(zone);
SymbolTable table(zone, isolate->object_store()->symbol_table());
// Set up all the predefined string symbols and create symbols for
// language keywords.
for (intptr_t i = 1; i < Symbols::kNullCharId; i++) {
str = OneByteString::New(names[i], Heap::kOld);
str.Hash();
str.SetCanonical();
bool present = table.Insert(str);
ASSERT(!present);
}
// Add Latin1 characters as Symbols, so that Symbols::FromCharCode is fast.
for (intptr_t c = 0; c < kNumberOfOneCharCodeSymbols; c++) {
intptr_t idx = (kNullCharId + c);
ASSERT(idx < kMaxPredefinedId);
ASSERT(Utf::IsLatin1(c));
uint8_t ch = static_cast<uint8_t>(c);
str = OneByteString::New(&ch, 1, Heap::kOld);
str.Hash();
str.SetCanonical();
bool present = table.Insert(str);
ASSERT(!present);
}
isolate->object_store()->set_symbol_table(table.Release());
}
void Symbols::SetupSymbolTable(Isolate* isolate) {
ASSERT(isolate != NULL);
// Setup the symbol table used within the String class.
const intptr_t initial_size = (isolate == Dart::vm_isolate()) ?
kInitialVMIsolateSymtabSize : kInitialSymtabSize;
Array& array =
Array::Handle(HashTables::New<SymbolTable>(initial_size, Heap::kOld));
isolate->object_store()->set_symbol_table(array);
}
void Symbols::GetStats(Isolate* isolate, intptr_t* size, intptr_t* capacity) {
ASSERT(isolate != NULL);
SymbolTable table(isolate->object_store()->symbol_table());
*size = table.NumOccupied();
*capacity = table.NumEntries();
table.Release();
}
RawString* Symbols::New(const char* cstr, intptr_t len) {
ASSERT((cstr != NULL) && (len >= 0));
const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(cstr);
return Symbols::FromUTF8(utf8_array, len);
}
RawString* Symbols::FromUTF8(const uint8_t* utf8_array, intptr_t array_len) {
if (array_len == 0 || utf8_array == NULL) {
return FromLatin1(reinterpret_cast<uint8_t*>(NULL), 0);
}
Utf8::Type type;
intptr_t len = Utf8::CodeUnitCount(utf8_array, array_len, &type);
ASSERT(len != 0);
Zone* zone = Thread::Current()->zone();
if (type == Utf8::kLatin1) {
uint8_t* characters = zone->Alloc<uint8_t>(len);
Utf8::DecodeToLatin1(utf8_array, array_len, characters, len);
return FromLatin1(characters, len);
}
ASSERT((type == Utf8::kBMP) || (type == Utf8::kSupplementary));
uint16_t* characters = zone->Alloc<uint16_t>(len);
Utf8::DecodeToUTF16(utf8_array, array_len, characters, len);
return FromUTF16(characters, len);
}
RawString* Symbols::FromLatin1(const uint8_t* latin1_array, intptr_t len) {
return NewSymbol(Latin1Array(latin1_array, len));
}
RawString* Symbols::FromUTF16(const uint16_t* utf16_array, intptr_t len) {
return NewSymbol(UTF16Array(utf16_array, len));
}
RawString* Symbols::FromUTF32(const int32_t* utf32_array, intptr_t len) {
return NewSymbol(UTF32Array(utf32_array, len));
}
RawString* Symbols::FromConcat(const String& str1, const String& str2) {
if (str1.Length() == 0) {
return New(str2);
} else if (str2.Length() == 0) {
return New(str1);
} else {
return NewSymbol(ConcatString(str1, str2));
}
}
// TODO(srdjan): If this becomes performance critical code, consider looking
// up symbol from hash of pieces instead of concatenating them first into
// a string.
RawString* Symbols::FromConcatAll(
const GrowableHandlePtrArray<const String>& strs) {
const intptr_t strs_length = strs.length();
GrowableArray<intptr_t> lengths(strs_length);
intptr_t len_sum = 0;
const intptr_t kOneByteChar = 1;
intptr_t char_size = kOneByteChar;
for (intptr_t i = 0; i < strs_length; i++) {
const String& str = strs[i];
const intptr_t str_len = str.Length();
if ((String::kMaxElements - len_sum) < str_len) {
Exceptions::ThrowOOM();
UNREACHABLE();
}
len_sum += str_len;
lengths.Add(str_len);
char_size = Utils::Maximum(char_size, str.CharSize());
}
const bool is_one_byte_string = char_size == kOneByteChar;
Zone* zone = Thread::Current()->zone();
if (is_one_byte_string) {
uint8_t* buffer = zone->Alloc<uint8_t>(len_sum);
const uint8_t* const orig_buffer = buffer;
for (intptr_t i = 0; i < strs_length; i++) {
NoSafepointScope no_safepoint;
intptr_t str_len = lengths[i];
if (str_len > 0) {
const String& str = strs[i];
ASSERT(str.IsOneByteString() || str.IsExternalOneByteString());
const uint8_t* src_p = str.IsOneByteString() ?
OneByteString::CharAddr(str, 0) :
ExternalOneByteString::CharAddr(str, 0);
memmove(buffer, src_p, str_len);
buffer += str_len;
}
}
ASSERT(len_sum == buffer - orig_buffer);
return Symbols::FromLatin1(orig_buffer, len_sum);
} else {
uint16_t* buffer = zone->Alloc<uint16_t>(len_sum);
const uint16_t* const orig_buffer = buffer;
for (intptr_t i = 0; i < strs_length; i++) {
NoSafepointScope no_safepoint;
intptr_t str_len = lengths[i];
if (str_len > 0) {
const String& str = strs[i];
if (str.IsTwoByteString()) {
memmove(buffer, TwoByteString::CharAddr(str, 0), str_len * 2);
} else if (str.IsExternalTwoByteString()) {
memmove(buffer, ExternalTwoByteString::CharAddr(str, 0), str_len * 2);
} else {
// One-byte to two-byte string copy.
ASSERT(str.IsOneByteString() || str.IsExternalOneByteString());
const uint8_t* src_p = str.IsOneByteString() ?
OneByteString::CharAddr(str, 0) :
ExternalOneByteString::CharAddr(str, 0);
for (int n = 0; n < str_len; n++) {
buffer[n] = src_p[n];
}
}
buffer += str_len;
}
}
ASSERT(len_sum == buffer - orig_buffer);
return Symbols::FromUTF16(orig_buffer, len_sum);
}
}
// StringType can be StringSlice, ConcatString, or {Latin1,UTF16,UTF32}Array.
template<typename StringType>
RawString* Symbols::NewSymbol(const StringType& str) {
Thread* thread = Thread::Current();
Isolate* isolate = thread->isolate();
Zone* zone = thread->zone();
String& symbol = String::Handle(zone);
{
Isolate* vm_isolate = Dart::vm_isolate();
SymbolTable table(zone, vm_isolate->object_store()->symbol_table());
symbol ^= table.GetOrNull(str);
table.Release();
}
if (symbol.IsNull()) {
SymbolTable table(zone, isolate->object_store()->symbol_table());
symbol ^= table.InsertNewOrGet(str);
isolate->object_store()->set_symbol_table(table.Release());
}
ASSERT(symbol.IsSymbol());
ASSERT(symbol.HasHash());
return symbol.raw();
}
template<typename StringType>
RawString* Symbols::Lookup(const StringType& str) {
Thread* thread = Thread::Current();
Isolate* isolate = thread->isolate();
Zone* zone = thread->zone();
String& symbol = String::Handle(zone);
{
Isolate* vm_isolate = Dart::vm_isolate();
SymbolTable table(zone, vm_isolate->object_store()->symbol_table());
symbol ^= table.GetOrNull(str);
table.Release();
}
if (symbol.IsNull()) {
SymbolTable table(zone, isolate->object_store()->symbol_table());
symbol ^= table.GetOrNull(str);
table.Release();
}
ASSERT(symbol.IsNull() || symbol.IsSymbol());
ASSERT(symbol.IsNull() || symbol.HasHash());
return symbol.raw();
}
RawString* Symbols::LookupFromConcat(const String& str1, const String& str2) {
if (str1.Length() == 0) {
return Lookup(str2);
} else if (str2.Length() == 0) {
return Lookup(str1);
} else {
return Lookup(ConcatString(str1, str2));
}
}
RawString* Symbols::New(const String& str) {
if (str.IsSymbol()) {
return str.raw();
}
return New(str, 0, str.Length());
}
RawString* Symbols::New(const String& str, intptr_t begin_index, intptr_t len) {
return NewSymbol(StringSlice(str, begin_index, len));
}
RawString* Symbols::NewFormatted(const char* format, ...) {
va_list args;
va_start(args, format);
RawString* result = NewFormattedV(format, args);
NoSafepointScope no_safepoint;
va_end(args);
return result;
}
RawString* Symbols::NewFormattedV(const char* format, va_list args) {
va_list args_copy;
va_copy(args_copy, args);
intptr_t len = OS::VSNPrint(NULL, 0, format, args_copy);
va_end(args_copy);
Zone* zone = Thread::Current()->zone();
char* buffer = zone->Alloc<char>(len + 1);
OS::VSNPrint(buffer, (len + 1), format, args);
return Symbols::New(buffer);
}
RawString* Symbols::FromCharCode(int32_t char_code) {
if (char_code > kMaxOneCharCodeSymbol) {
return FromUTF32(&char_code, 1);
}
return predefined_[char_code];
}
void Symbols::DumpStats() {
if (FLAG_dump_symbol_stats) {
intptr_t size = -1;
intptr_t capacity = -1;
// First dump VM symbol table stats.
GetStats(Dart::vm_isolate(), &size, &capacity);
OS::Print("VM Isolate: Number of symbols : %" Pd "\n", size);
OS::Print("VM Isolate: Symbol table capacity : %" Pd "\n", capacity);
// Now dump regular isolate symbol table stats.
GetStats(Isolate::Current(), &size, &capacity);
OS::Print("Isolate: Number of symbols : %" Pd "\n", size);
OS::Print("Isolate: Symbol table capacity : %" Pd "\n", capacity);
// TODO(koda): Consider recording growth and collision stats in HashTable,
// in DEBUG mode.
}
}
intptr_t Symbols::LookupVMSymbol(RawObject* obj) {
for (intptr_t i = 1; i < Symbols::kMaxPredefinedId; i++) {
if (symbol_handles_[i]->raw() == obj) {
return (i + kMaxPredefinedObjectIds);
}
}
return kInvalidIndex;
}
RawObject* Symbols::GetVMSymbol(intptr_t object_id) {
ASSERT(IsVMSymbolId(object_id));
intptr_t i = (object_id - kMaxPredefinedObjectIds);
if ((i > kIllegal) && (i < Symbols::kMaxPredefinedId)) {
return symbol_handles_[i]->raw();
}
return Object::null();
}
} // namespace dart