| // Copyright (c) 2012, the Dart project authors.  Please see the AUTHORS file | 
 | // for details. All rights reserved. Use of this source code is governed by a | 
 | // BSD-style license that can be found in the LICENSE file. | 
 |  | 
 | #include "vm/symbols.h" | 
 |  | 
 | #include "platform/unicode.h" | 
 | #include "vm/canonical_tables.h" | 
 | #include "vm/handles.h" | 
 | #include "vm/hash_table.h" | 
 | #include "vm/heap/safepoint.h" | 
 | #include "vm/isolate.h" | 
 | #include "vm/object.h" | 
 | #include "vm/object_store.h" | 
 | #include "vm/raw_object.h" | 
 | #include "vm/reusable_handles.h" | 
 | #include "vm/visitor.h" | 
 |  | 
 | namespace dart { | 
 |  | 
 | StringPtr Symbols::predefined_[Symbols::kNumberOfOneCharCodeSymbols]; | 
 | String* Symbols::symbol_handles_[Symbols::kMaxPredefinedId]; | 
 |  | 
 | static const char* const names[] = { | 
 |     // clang-format off | 
 |   nullptr, | 
 | #define DEFINE_SYMBOL_LITERAL(symbol, literal) literal, | 
 |   PREDEFINED_SYMBOLS_LIST(DEFINE_SYMBOL_LITERAL) | 
 | #undef DEFINE_SYMBOL_LITERAL | 
 |   "",  // matches kTokenTableStart. | 
 | #define DEFINE_TOKEN_SYMBOL_INDEX(t, s, p, a) s, | 
 |   DART_TOKEN_LIST(DEFINE_TOKEN_SYMBOL_INDEX) | 
 |   DART_KEYWORD_LIST(DEFINE_TOKEN_SYMBOL_INDEX) | 
 | #undef DEFINE_TOKEN_SYMBOL_INDEX | 
 |     // clang-format on | 
 | }; | 
 |  | 
 | StringPtr StringFrom(const uint8_t* data, intptr_t len, Heap::Space space) { | 
 |   return String::FromLatin1(data, len, space); | 
 | } | 
 |  | 
 | StringPtr StringFrom(const uint16_t* data, intptr_t len, Heap::Space space) { | 
 |   return String::FromUTF16(data, len, space); | 
 | } | 
 |  | 
 | StringPtr StringSlice::ToSymbol() const { | 
 |   if (is_all() && str_.IsOld()) { | 
 |     str_.SetCanonical(); | 
 |     return str_.ptr(); | 
 |   } else { | 
 |     String& result = | 
 |         String::Handle(String::SubString(str_, begin_index_, len_, Heap::kOld)); | 
 |     result.SetCanonical(); | 
 |     result.SetHash(hash_); | 
 |     return result.ptr(); | 
 |   } | 
 | } | 
 |  | 
 | StringPtr ConcatString::ToSymbol() const { | 
 |   String& result = String::Handle(String::Concat(str1_, str2_, Heap::kOld)); | 
 |   result.SetCanonical(); | 
 |   result.SetHash(hash_); | 
 |   return result.ptr(); | 
 | } | 
 |  | 
 | const char* Symbols::Name(SymbolId symbol) { | 
 |   ASSERT((symbol > kIllegal) && (symbol < kNullCharId)); | 
 |   return names[symbol]; | 
 | } | 
 |  | 
 | const String& Symbols::Token(Token::Kind token) { | 
 |   const int tok_index = token; | 
 |   ASSERT((0 <= tok_index) && (tok_index < Token::kNumTokens)); | 
 |   // First keyword symbol is in symbol_handles_[kTokenTableStart + 1]. | 
 |   const intptr_t token_id = Symbols::kTokenTableStart + 1 + tok_index; | 
 |   ASSERT(symbol_handles_[token_id] != nullptr); | 
 |   return *symbol_handles_[token_id]; | 
 | } | 
 |  | 
 | void Symbols::Init(IsolateGroup* vm_isolate_group) { | 
 |   // Should only be run by the vm isolate. | 
 |   ASSERT(IsolateGroup::Current() == Dart::vm_isolate_group()); | 
 |   ASSERT(vm_isolate_group == Dart::vm_isolate_group()); | 
 |   Zone* zone = Thread::Current()->zone(); | 
 |  | 
 |   // Create and setup a symbol table in the vm isolate. | 
 |   SetupSymbolTable(vm_isolate_group); | 
 |  | 
 |   // Create all predefined symbols. | 
 |   ASSERT((sizeof(names) / sizeof(const char*)) == Symbols::kNullCharId); | 
 |  | 
 |   CanonicalStringSet table(zone, | 
 |                            vm_isolate_group->object_store()->symbol_table()); | 
 |  | 
 |   // First set up all the predefined string symbols. | 
 |   // Create symbols for language keywords. Some keywords are equal to | 
 |   // symbols we already created, so use New() instead of Add() to ensure | 
 |   // that the symbols are canonicalized. | 
 |   for (intptr_t i = 1; i < Symbols::kNullCharId; i++) { | 
 |     String* str = String::ReadOnlyHandle(); | 
 |     *str = OneByteString::New(names[i], Heap::kOld); | 
 |     str->Hash(); | 
 |     *str ^= table.InsertOrGet(*str); | 
 |     str->SetCanonical();  // Make canonical once entered. | 
 |     symbol_handles_[i] = str; | 
 |   } | 
 |  | 
 |   // Add Latin1 characters as Symbols, so that Symbols::FromCharCode is fast. | 
 |   for (intptr_t c = 0; c < kNumberOfOneCharCodeSymbols; c++) { | 
 |     intptr_t idx = (kNullCharId + c); | 
 |     ASSERT(idx < kMaxPredefinedId); | 
 |     ASSERT(Utf::IsLatin1(c)); | 
 |     uint8_t ch = static_cast<uint8_t>(c); | 
 |     String* str = String::ReadOnlyHandle(); | 
 |     *str = OneByteString::New(&ch, 1, Heap::kOld); | 
 |     str->Hash(); | 
 |     *str ^= table.InsertOrGet(*str); | 
 |     ASSERT(predefined_[c] == nullptr); | 
 |     str->SetCanonical();  // Make canonical once entered. | 
 |     predefined_[c] = str->ptr(); | 
 |     symbol_handles_[idx] = str; | 
 |   } | 
 |  | 
 |   vm_isolate_group->object_store()->set_symbol_table(table.Release()); | 
 | } | 
 |  | 
 | void Symbols::InitFromSnapshot(IsolateGroup* vm_isolate_group) { | 
 |   // Should only be run by the vm isolate. | 
 |   ASSERT(IsolateGroup::Current() == Dart::vm_isolate_group()); | 
 |   ASSERT(vm_isolate_group == Dart::vm_isolate_group()); | 
 |   Zone* zone = Thread::Current()->zone(); | 
 |  | 
 |   CanonicalStringSet table(zone, | 
 |                            vm_isolate_group->object_store()->symbol_table()); | 
 |  | 
 |   // Lookup all the predefined string symbols and language keyword symbols | 
 |   // and cache them in the read only handles for fast access. | 
 |   for (intptr_t i = 1; i < Symbols::kNullCharId; i++) { | 
 |     String* str = String::ReadOnlyHandle(); | 
 |     const unsigned char* name = | 
 |         reinterpret_cast<const unsigned char*>(names[i]); | 
 |     *str ^= table.GetOrNull(Latin1Array(name, strlen(names[i]))); | 
 |     ASSERT(!str->IsNull()); | 
 |     ASSERT(str->HasHash()); | 
 |     ASSERT(str->IsCanonical()); | 
 |     symbol_handles_[i] = str; | 
 |   } | 
 |  | 
 |   // Lookup Latin1 character Symbols and cache them in read only handles, | 
 |   // so that Symbols::FromCharCode is fast. | 
 |   for (intptr_t c = 0; c < kNumberOfOneCharCodeSymbols; c++) { | 
 |     intptr_t idx = (kNullCharId + c); | 
 |     ASSERT(idx < kMaxPredefinedId); | 
 |     ASSERT(Utf::IsLatin1(c)); | 
 |     uint8_t ch = static_cast<uint8_t>(c); | 
 |     String* str = String::ReadOnlyHandle(); | 
 |     *str ^= table.GetOrNull(Latin1Array(&ch, 1)); | 
 |     ASSERT(!str->IsNull()); | 
 |     ASSERT(str->HasHash()); | 
 |     ASSERT(str->IsCanonical()); | 
 |     predefined_[c] = str->ptr(); | 
 |     symbol_handles_[idx] = str; | 
 |   } | 
 |  | 
 |   vm_isolate_group->object_store()->set_symbol_table(table.Release()); | 
 | } | 
 |  | 
 | void Symbols::SetupSymbolTable(IsolateGroup* isolate_group) { | 
 |   ASSERT(isolate_group != nullptr); | 
 |  | 
 |   // Setup the symbol table used within the String class. | 
 |   const intptr_t initial_size = (isolate_group == Dart::vm_isolate_group()) | 
 |                                     ? kInitialVMIsolateSymtabSize | 
 |                                     : kInitialSymtabSize; | 
 |   class WeakArray& array = WeakArray::Handle( | 
 |       HashTables::New<CanonicalStringSet>(initial_size, Heap::kOld)); | 
 |   isolate_group->object_store()->set_symbol_table(array); | 
 | } | 
 |  | 
 | void Symbols::GetStats(IsolateGroup* isolate_group, | 
 |                        intptr_t* size, | 
 |                        intptr_t* capacity) { | 
 |   ASSERT(isolate_group != nullptr); | 
 |   CanonicalStringSet table(isolate_group->object_store()->symbol_table()); | 
 |   *size = table.NumOccupied(); | 
 |   *capacity = table.NumEntries(); | 
 |   table.Release(); | 
 | } | 
 |  | 
 | StringPtr Symbols::New(Thread* thread, const char* cstr, intptr_t len) { | 
 |   ASSERT((cstr != nullptr) && (len >= 0)); | 
 |   const uint8_t* utf8_array = reinterpret_cast<const uint8_t*>(cstr); | 
 |   return Symbols::FromUTF8(thread, utf8_array, len); | 
 | } | 
 |  | 
 | StringPtr Symbols::FromUTF8(Thread* thread, | 
 |                             const uint8_t* utf8_array, | 
 |                             intptr_t array_len) { | 
 |   if (array_len == 0 || utf8_array == nullptr) { | 
 |     return FromLatin1(thread, static_cast<uint8_t*>(nullptr), 0); | 
 |   } | 
 |   Utf8::Type type; | 
 |   intptr_t len = Utf8::CodeUnitCount(utf8_array, array_len, &type); | 
 |   ASSERT(len != 0); | 
 |   Zone* zone = thread->zone(); | 
 |   if (type == Utf8::kLatin1) { | 
 |     uint8_t* characters = zone->Alloc<uint8_t>(len); | 
 |     if (!Utf8::DecodeToLatin1(utf8_array, array_len, characters, len)) { | 
 |       Utf8::ReportInvalidByte(utf8_array, array_len, len); | 
 |       return String::null(); | 
 |     } | 
 |     return FromLatin1(thread, characters, len); | 
 |   } | 
 |   ASSERT((type == Utf8::kBMP) || (type == Utf8::kSupplementary)); | 
 |   uint16_t* characters = zone->Alloc<uint16_t>(len); | 
 |   if (!Utf8::DecodeToUTF16(utf8_array, array_len, characters, len)) { | 
 |     Utf8::ReportInvalidByte(utf8_array, array_len, len); | 
 |     return String::null(); | 
 |   } | 
 |   return FromUTF16(thread, characters, len); | 
 | } | 
 |  | 
 | StringPtr Symbols::FromLatin1(Thread* thread, | 
 |                               const uint8_t* latin1_array, | 
 |                               intptr_t len) { | 
 |   return NewSymbol(thread, Latin1Array(latin1_array, len)); | 
 | } | 
 |  | 
 | StringPtr Symbols::FromUTF16(Thread* thread, | 
 |                              const uint16_t* utf16_array, | 
 |                              intptr_t len) { | 
 |   return NewSymbol(thread, UTF16Array(utf16_array, len)); | 
 | } | 
 |  | 
 | StringPtr Symbols::FromConcat(Thread* thread, | 
 |                               const String& str1, | 
 |                               const String& str2) { | 
 |   if (str1.Length() == 0) { | 
 |     return New(thread, str2); | 
 |   } else if (str2.Length() == 0) { | 
 |     return New(thread, str1); | 
 |   } else { | 
 |     return NewSymbol(thread, ConcatString(str1, str2)); | 
 |   } | 
 | } | 
 |  | 
 | StringPtr Symbols::FromGet(Thread* thread, const String& str) { | 
 |   return FromConcat(thread, GetterPrefix(), str); | 
 | } | 
 |  | 
 | StringPtr Symbols::FromSet(Thread* thread, const String& str) { | 
 |   return FromConcat(thread, SetterPrefix(), str); | 
 | } | 
 |  | 
 | StringPtr Symbols::FromDot(Thread* thread, const String& str) { | 
 |   return FromConcat(thread, str, Dot()); | 
 | } | 
 |  | 
 | // TODO(srdjan): If this becomes performance critical code, consider looking | 
 | // up symbol from hash of pieces instead of concatenating them first into | 
 | // a string. | 
 | StringPtr Symbols::FromConcatAll( | 
 |     Thread* thread, | 
 |     const GrowableHandlePtrArray<const String>& strs) { | 
 |   const intptr_t strs_length = strs.length(); | 
 |   GrowableArray<intptr_t> lengths(strs_length); | 
 |  | 
 |   intptr_t len_sum = 0; | 
 |   const intptr_t kOneByteChar = 1; | 
 |   intptr_t char_size = kOneByteChar; | 
 |  | 
 |   for (intptr_t i = 0; i < strs_length; i++) { | 
 |     const String& str = strs[i]; | 
 |     const intptr_t str_len = str.Length(); | 
 |     if ((String::kMaxElements - len_sum) < str_len) { | 
 |       Exceptions::ThrowOOM(); | 
 |       UNREACHABLE(); | 
 |     } | 
 |     len_sum += str_len; | 
 |     lengths.Add(str_len); | 
 |     char_size = Utils::Maximum(char_size, str.CharSize()); | 
 |   } | 
 |   const bool is_one_byte_string = char_size == kOneByteChar; | 
 |  | 
 |   Zone* zone = thread->zone(); | 
 |   if (is_one_byte_string) { | 
 |     uint8_t* buffer = zone->Alloc<uint8_t>(len_sum); | 
 |     const uint8_t* const orig_buffer = buffer; | 
 |     for (intptr_t i = 0; i < strs_length; i++) { | 
 |       NoSafepointScope no_safepoint; | 
 |       intptr_t str_len = lengths[i]; | 
 |       if (str_len > 0) { | 
 |         const String& str = strs[i]; | 
 |         ASSERT(str.IsOneByteString()); | 
 |         const uint8_t* src_p = OneByteString::DataStart(str); | 
 |         memmove(buffer, src_p, str_len); | 
 |         buffer += str_len; | 
 |       } | 
 |     } | 
 |     ASSERT(len_sum == buffer - orig_buffer); | 
 |     return Symbols::FromLatin1(thread, orig_buffer, len_sum); | 
 |   } else { | 
 |     uint16_t* buffer = zone->Alloc<uint16_t>(len_sum); | 
 |     const uint16_t* const orig_buffer = buffer; | 
 |     for (intptr_t i = 0; i < strs_length; i++) { | 
 |       NoSafepointScope no_safepoint; | 
 |       intptr_t str_len = lengths[i]; | 
 |       if (str_len > 0) { | 
 |         const String& str = strs[i]; | 
 |         if (str.IsTwoByteString()) { | 
 |           memmove(buffer, TwoByteString::DataStart(str), str_len * 2); | 
 |         } else { | 
 |           // One-byte to two-byte string copy. | 
 |           ASSERT(str.IsOneByteString()); | 
 |           const uint8_t* src_p = OneByteString::DataStart(str); | 
 |           for (int n = 0; n < str_len; n++) { | 
 |             buffer[n] = src_p[n]; | 
 |           } | 
 |         } | 
 |         buffer += str_len; | 
 |       } | 
 |     } | 
 |     ASSERT(len_sum == buffer - orig_buffer); | 
 |     return Symbols::FromUTF16(thread, orig_buffer, len_sum); | 
 |   } | 
 | } | 
 |  | 
 | // StringType can be StringSlice, ConcatString, or {Latin1,UTF16}Array. | 
 | template <typename StringType> | 
 | StringPtr Symbols::NewSymbol(Thread* thread, const StringType& str) { | 
 |   REUSABLE_OBJECT_HANDLESCOPE(thread); | 
 |   REUSABLE_SMI_HANDLESCOPE(thread); | 
 |   REUSABLE_WEAK_ARRAY_HANDLESCOPE(thread); | 
 |   String& symbol = String::Handle(thread->zone()); | 
 |   dart::Object& key = thread->ObjectHandle(); | 
 |   Smi& value = thread->SmiHandle(); | 
 |   class WeakArray& data = thread->WeakArrayHandle(); | 
 |   { | 
 |     auto vm_isolate_group = Dart::vm_isolate_group(); | 
 |     data = vm_isolate_group->object_store()->symbol_table(); | 
 |     CanonicalStringSet table(&key, &value, &data); | 
 |     symbol ^= table.GetOrNull(str); | 
 |     table.Release(); | 
 |   } | 
 |   if (symbol.IsNull()) { | 
 |     IsolateGroup* group = thread->isolate_group(); | 
 |     ObjectStore* object_store = group->object_store(); | 
 |     RELEASE_ASSERT(thread->CanAcquireSafepointLocks()); | 
 |  | 
 |     // Most common case: The symbol is already in the table. | 
 |     { | 
 |       // We do allow lock-free concurrent read access to the symbol table. | 
 |       // Both, the array in the ObjectStore as well as elements in the array | 
 |       // are accessed via store-release/load-acquire barriers. | 
 |       data = object_store->symbol_table(); | 
 |       CanonicalStringSet table(&key, &value, &data); | 
 |       symbol ^= table.GetOrNull(str); | 
 |       table.Release(); | 
 |     } | 
 |     // Otherwise we'll have to get exclusive access and get-or-insert it. | 
 |     if (symbol.IsNull()) { | 
 |       SafepointMutexLocker ml(group->symbols_mutex()); | 
 |       data = object_store->symbol_table(); | 
 |       CanonicalStringSet table(&key, &value, &data); | 
 |       symbol ^= table.InsertNewOrGet(str); | 
 |       object_store->set_symbol_table(table.Release()); | 
 |     } | 
 |   } | 
 |   ASSERT(symbol.IsSymbol()); | 
 |   ASSERT(symbol.HasHash()); | 
 |   return symbol.ptr(); | 
 | } | 
 |  | 
 | template <typename StringType> | 
 | StringPtr Symbols::Lookup(Thread* thread, const StringType& str) { | 
 |   REUSABLE_OBJECT_HANDLESCOPE(thread); | 
 |   REUSABLE_SMI_HANDLESCOPE(thread); | 
 |   REUSABLE_ARRAY_HANDLESCOPE(thread); | 
 |   String& symbol = String::Handle(thread->zone()); | 
 |   dart::Object& key = thread->ObjectHandle(); | 
 |   Smi& value = thread->SmiHandle(); | 
 |   class WeakArray& data = thread->WeakArrayHandle(); | 
 |   { | 
 |     auto vm_isolate_group = Dart::vm_isolate_group(); | 
 |     data = vm_isolate_group->object_store()->symbol_table(); | 
 |     CanonicalStringSet table(&key, &value, &data); | 
 |     symbol ^= table.GetOrNull(str); | 
 |     table.Release(); | 
 |   } | 
 |   if (symbol.IsNull()) { | 
 |     IsolateGroup* group = thread->isolate_group(); | 
 |     ObjectStore* object_store = group->object_store(); | 
 |     // See `Symbols::NewSymbol` for more information why we separate the two | 
 |     // cases. | 
 |     if (thread->OwnsSafepoint()) { | 
 |       data = object_store->symbol_table(); | 
 |       CanonicalStringSet table(&key, &value, &data); | 
 |       symbol ^= table.GetOrNull(str); | 
 |       table.Release(); | 
 |     } else { | 
 |       data = object_store->symbol_table(); | 
 |       CanonicalStringSet table(&key, &value, &data); | 
 |       symbol ^= table.GetOrNull(str); | 
 |       table.Release(); | 
 |     } | 
 |   } | 
 |   ASSERT(symbol.IsNull() || symbol.IsSymbol()); | 
 |   ASSERT(symbol.IsNull() || symbol.HasHash()); | 
 |   return symbol.ptr(); | 
 | } | 
 |  | 
 | StringPtr Symbols::LookupFromConcat(Thread* thread, | 
 |                                     const String& str1, | 
 |                                     const String& str2) { | 
 |   if (str1.Length() == 0) { | 
 |     return Lookup(thread, str2); | 
 |   } else if (str2.Length() == 0) { | 
 |     return Lookup(thread, str1); | 
 |   } else { | 
 |     return Lookup(thread, ConcatString(str1, str2)); | 
 |   } | 
 | } | 
 |  | 
 | StringPtr Symbols::LookupFromGet(Thread* thread, const String& str) { | 
 |   return LookupFromConcat(thread, GetterPrefix(), str); | 
 | } | 
 |  | 
 | StringPtr Symbols::LookupFromSet(Thread* thread, const String& str) { | 
 |   return LookupFromConcat(thread, SetterPrefix(), str); | 
 | } | 
 |  | 
 | StringPtr Symbols::LookupFromDot(Thread* thread, const String& str) { | 
 |   return LookupFromConcat(thread, str, Dot()); | 
 | } | 
 |  | 
 | StringPtr Symbols::New(Thread* thread, const String& str) { | 
 |   if (str.IsSymbol()) { | 
 |     return str.ptr(); | 
 |   } | 
 |   return New(thread, str, 0, str.Length()); | 
 | } | 
 |  | 
 | StringPtr Symbols::New(Thread* thread, | 
 |                        const String& str, | 
 |                        intptr_t begin_index, | 
 |                        intptr_t len) { | 
 |   return NewSymbol(thread, StringSlice(str, begin_index, len)); | 
 | } | 
 |  | 
 | StringPtr Symbols::NewFormatted(Thread* thread, const char* format, ...) { | 
 |   va_list args; | 
 |   va_start(args, format); | 
 |   StringPtr result = NewFormattedV(thread, format, args); | 
 |   NoSafepointScope no_safepoint; | 
 |   va_end(args); | 
 |   return result; | 
 | } | 
 |  | 
 | StringPtr Symbols::NewFormattedV(Thread* thread, | 
 |                                  const char* format, | 
 |                                  va_list args) { | 
 |   va_list args_copy; | 
 |   va_copy(args_copy, args); | 
 |   intptr_t len = Utils::VSNPrint(nullptr, 0, format, args_copy); | 
 |   va_end(args_copy); | 
 |  | 
 |   Zone* zone = Thread::Current()->zone(); | 
 |   char* buffer = zone->Alloc<char>(len + 1); | 
 |   Utils::VSNPrint(buffer, (len + 1), format, args); | 
 |  | 
 |   return Symbols::New(thread, buffer); | 
 | } | 
 |  | 
 | StringPtr Symbols::FromCharCode(Thread* thread, uint16_t char_code) { | 
 |   if (char_code > kMaxOneCharCodeSymbol) { | 
 |     return FromUTF16(thread, &char_code, 1); | 
 |   } | 
 |   return predefined_[char_code]; | 
 | } | 
 |  | 
 | void Symbols::DumpStats(IsolateGroup* isolate_group) { | 
 |   intptr_t size = -1; | 
 |   intptr_t capacity = -1; | 
 |   // First dump VM symbol table stats. | 
 |   GetStats(Dart::vm_isolate_group(), &size, &capacity); | 
 |   OS::PrintErr("VM Isolate: Number of symbols : %" Pd "\n", size); | 
 |   OS::PrintErr("VM Isolate: Symbol table capacity : %" Pd "\n", capacity); | 
 |   // Now dump regular isolate symbol table stats. | 
 |   GetStats(isolate_group, &size, &capacity); | 
 |   OS::PrintErr("Isolate: Number of symbols : %" Pd "\n", size); | 
 |   OS::PrintErr("Isolate: Symbol table capacity : %" Pd "\n", capacity); | 
 |   // TODO(koda): Consider recording growth and collision stats in HashTable, | 
 |   // in DEBUG mode. | 
 | } | 
 |  | 
 | void Symbols::DumpTable(IsolateGroup* isolate_group) { | 
 |   OS::PrintErr("symbols:\n"); | 
 |   CanonicalStringSet table(isolate_group->object_store()->symbol_table()); | 
 |   table.Dump(); | 
 |   table.Release(); | 
 | } | 
 |  | 
 | }  // namespace dart |