[VM/Compiler/AOT] Bare instructions 7: Trampolines for out-of-range calls

This CL adds support to the ImageWriter to write opaque blocks of
trampoline bytes

The AOT code relocator is adapted to allow limited range calls and
inserts trampolines if need be. The algorithm tries to minimize the
number of trampolines added, which for small applications will be 0.

The unconditional pc-relative calls have limited range:
  * on ARM (+/-32 MB)
  * on ARM64 (+/-128 MB)

To avoid verbose code for doubly-linked list, this CL adds double_list.h

Issue https://github.com/dart-lang/sdk/issues/33274

Change-Id: I0354cf4b2dd58ed5de25d67fc818f0603a2ec501
Reviewed-on: https://dart-review.googlesource.com/c/89283
Commit-Queue: Martin Kustermann <kustermann@google.com>
Reviewed-by: Vyacheslav Egorov <vegorov@google.com>
diff --git a/runtime/tests/vm/dart/bare_instructions_trampolines_test.dart b/runtime/tests/vm/dart/bare_instructions_trampolines_test.dart
new file mode 100644
index 0000000..b7d734d
--- /dev/null
+++ b/runtime/tests/vm/dart/bare_instructions_trampolines_test.dart
@@ -0,0 +1,12 @@
+// Copyright (c) 2019, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+// VMOptions=--always-generate-trampolines-for-testing --use-bare-instructions
+
+// We use a reasonable sized test and run it with the above options.
+import 'hello_fuchsia_test.dart' as test;
+
+main(args) {
+  test.main(args);
+}
diff --git a/runtime/tests/vm/vm.status b/runtime/tests/vm/vm.status
index dda2b1d..a7a4aad 100644
--- a/runtime/tests/vm/vm.status
+++ b/runtime/tests/vm/vm.status
@@ -106,6 +106,9 @@
 dart/spawn_infinite_loop_test: SkipByDesign # VM shutdown test
 dart/spawn_shutdown_test: SkipByDesign # VM Shutdown test
 
+[ $runtime != dart_precompiled ]
+dart/bare_instructions_trampolines_test: SkipByDesign # This test is for VM AOT only.
+
 [ $system == fuchsia ]
 cc/CorelibIsolateStartup: Skip # OOM crash can bring down the OS.
 cc/Read: Fail # TODO(zra): Investigate, ../../dart/runtime/bin/file_test.cc: 34: error: expected: !file->WriteByte(1)
diff --git a/runtime/vm/compiler/relocation.cc b/runtime/vm/compiler/relocation.cc
index fc568c3..910032e 100644
--- a/runtime/vm/compiler/relocation.cc
+++ b/runtime/vm/compiler/relocation.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2018, the Dart project authors.  Please see the AUTHORS file
+// Copyright (c) 2019, the Dart project authors.  Please see the AUTHORS file
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.
 
@@ -14,151 +14,81 @@
 #if defined(DART_PRECOMPILER) && !defined(TARGET_ARCH_DBC) &&                  \
     !defined(TARGET_ARCH_IA32)
 
-class InstructionsMapTraits {
- public:
-  struct Pair {
-    RawInstructions* instructions;
-    intptr_t inst_nr;
+// Only for testing.
+DEFINE_FLAG(bool,
+            always_generate_trampolines_for_testing,
+            false,
+            "Generate always trampolines (for testing purposes).");
 
-    Pair() : instructions(nullptr), inst_nr(-1) {}
-    Pair(RawInstructions* i, intptr_t nr) : instructions(i), inst_nr(nr) {}
-  };
+const intptr_t kTrampolineSize = OS::kMaxPreferredCodeAlignment;
 
-  typedef const RawInstructions* Key;
-  typedef const intptr_t Value;
-
-  static Key KeyOf(Pair kv) { return kv.instructions; }
-  static Value ValueOf(Pair kv) { return kv.inst_nr; }
-  static inline intptr_t Hashcode(Key key) {
-    return reinterpret_cast<intptr_t>(key);
-  }
-  static inline bool IsKeyEqual(Pair pair, Key key) {
-    return pair.instructions == key;
-  }
-};
-
-typedef DirectChainedHashMap<InstructionsMapTraits> InstructionsMap;
+CodeRelocator::CodeRelocator(Thread* thread,
+                             GrowableArray<RawCode*>* code_objects,
+                             GrowableArray<ImageWriterCommand>* commands)
+    : StackResource(thread),
+      code_objects_(code_objects),
+      commands_(commands),
+      kind_type_and_offset_(Smi::Handle(thread->zone())),
+      target_(Object::Handle(thread->zone())),
+      destination_(Code::Handle(thread->zone())) {}
 
 void CodeRelocator::Relocate(bool is_vm_isolate) {
-  auto zone = Thread::Current()->zone();
-  intptr_t next_text_offsets = 0;
+  Zone* zone = Thread::Current()->zone();
+  auto& current_caller = Code::Handle(zone);
+  auto& call_targets = Array::Handle(zone);
 
-  // Keeps track of mapping from Code to the index in [commands_] at which the
-  // code object's instructions are located. This allows us to calculate the
-  // distance to the destination using commands_[index].expected_offset.
-  InstructionsMap instructions_map;
-
-  // The callers which has an unresolved call.
-  GrowableArray<RawCode*> callers;
-  // The offset from the instruction at which the call happens.
-  GrowableArray<intptr_t> call_offsets;
-  // Type entry-point type we call in the destination.
-  GrowableArray<Code::CallEntryPoint> call_entry_points;
-  // The offset in the .text segment where the call happens.
-  GrowableArray<intptr_t> text_offsets;
-  // The target of the forward call.
-  GrowableArray<RawCode*> callees;
-
-  auto& targets = Array::Handle(zone);
-  auto& kind_type_and_offset = Smi::Handle(zone);
-  auto& target = Object::Handle(zone);
-  auto& destination = Code::Handle(zone);
-  auto& instructions = Instructions::Handle(zone);
-  auto& caller = Code::Handle(zone);
+  // Find out the size of the largest [RawInstructions] object.
   for (intptr_t i = 0; i < code_objects_->length(); ++i) {
-    caller = (*code_objects_)[i];
-    instructions = caller.instructions();
-
-    // If two [Code] objects point to the same [Instructions] object, we'll just
-    // use the first one (they are equivalent for all practical purposes).
-    if (instructions_map.HasKey(instructions.raw())) {
-      continue;
+    current_caller = (*code_objects_)[i];
+    const intptr_t size = current_caller.instructions()->Size();
+    if (size > max_instructions_size_) {
+      max_instructions_size_ = size;
     }
-    instructions_map.Insert({instructions.raw(), commands_->length()});
 
-    // First we'll add the instructions of [caller] itself.
-    const intptr_t active_code_text_offsets = next_text_offsets;
-    commands_->Add(ImageWriterCommand(
-        next_text_offsets, ImageWriterCommand::InsertInstructionOfCode,
-        caller.raw()));
-
-    next_text_offsets += instructions.raw()->Size();
-
-    targets = caller.static_calls_target_table();
-    if (!targets.IsNull()) {
-      StaticCallsTable calls(targets);
-      for (auto call : calls) {
-        kind_type_and_offset = call.Get<Code::kSCallTableKindAndOffset>();
-        auto kind = Code::KindField::decode(kind_type_and_offset.Value());
-        auto offset = Code::OffsetField::decode(kind_type_and_offset.Value());
-        auto entry_point =
-            Code::EntryPointField::decode(kind_type_and_offset.Value());
-
-        if (kind == Code::kCallViaCode) {
-          continue;
-        }
-
-        target = call.Get<Code::kSCallTableFunctionTarget>();
-        if (target.IsFunction()) {
-          auto& fun = Function::Cast(target);
-          ASSERT(fun.HasCode());
-          destination = fun.CurrentCode();
-          ASSERT(!destination.IsStubCode());
-        } else {
-          target = call.Get<Code::kSCallTableCodeTarget>();
-          ASSERT(target.IsCode());
-          destination = Code::Cast(target).raw();
-        }
-
-        const intptr_t start_of_call =
-            active_code_text_offsets + instructions.HeaderSize() + offset;
-
-        callers.Add(caller.raw());
-        callees.Add(destination.raw());
-        text_offsets.Add(start_of_call);
-        call_offsets.Add(offset);
-        call_entry_points.Add(entry_point);
+    call_targets = current_caller.static_calls_target_table();
+    if (!call_targets.IsNull()) {
+      StaticCallsTable calls(call_targets);
+      const intptr_t num_calls = calls.Length();
+      if (num_calls > max_calls_) {
+        max_calls_ = num_calls;
       }
     }
   }
 
-  auto& callee = Code::Handle(zone);
-  auto& caller_instruction = Instructions::Handle(zone);
-  auto& destination_instruction = Instructions::Handle(zone);
-  for (intptr_t i = 0; i < callees.length(); ++i) {
-    caller = callers[i];
-    callee = callees[i];
-    const intptr_t text_offset = text_offsets[i];
-    const intptr_t call_offset = call_offsets[i];
-    const bool use_unchecked_entry =
-        call_entry_points[i] == Code::kUncheckedEntry;
-    caller_instruction = caller.instructions();
-    destination_instruction = callee.instructions();
+  // Emit all instructions and do relocations on the way.
+  for (intptr_t i = 0; i < code_objects_->length(); ++i) {
+    current_caller = (*code_objects_)[i];
 
-    const uword entry_point = use_unchecked_entry ? callee.UncheckedEntryPoint()
-                                                  : callee.EntryPoint();
-    const intptr_t unchecked_offset =
-        destination_instruction.HeaderSize() +
-        (entry_point - destination_instruction.PayloadStart());
-
-    auto map_entry = instructions_map.Lookup(destination_instruction.raw());
-    auto& dst = (*commands_)[map_entry->inst_nr];
-    ASSERT(dst.op == ImageWriterCommand::InsertInstructionOfCode);
-
-    const int32_t distance =
-        (dst.expected_offset + unchecked_offset) - text_offset;
-    {
-      NoSafepointScope no_safepoint_scope;
-
-      PcRelativeCallPattern call(caller_instruction.PayloadStart() +
-                                 call_offset);
-      ASSERT(call.IsValid());
-      call.set_distance(static_cast<int32_t>(distance));
-      ASSERT(call.distance() == distance);
+    const intptr_t code_text_offset = next_text_offset_;
+    if (!AddInstructionsToText(current_caller.raw())) {
+      continue;
     }
+
+    call_targets = current_caller.static_calls_target_table();
+    ScanCallTargets(current_caller, call_targets, code_text_offset);
+
+    // Any unresolved calls to this instruction can be fixed now.
+    ResolveUnresolvedCallsTargeting(current_caller.instructions());
+
+    // If we have forward/backwards calls which are almost out-of-range, we'll
+    // create trampolines now.
+    BuildTrampolinesForAlmostOutOfRangeCalls();
+  }
+
+  // We're guaranteed to have all calls resolved, since
+  //   * backwards calls are resolved eagerly
+  //   * forward calls are resolved once the target is written
+  ASSERT(all_unresolved_calls_.IsEmpty());
+  ASSERT(unresolved_calls_by_destination_.IsEmpty());
+
+  // Any trampolines we created must be patched with the right offsets.
+  for (auto unresolved_trampoline : unresolved_trampolines_) {
+    ResolveTrampoline(unresolved_trampoline);
+    delete unresolved_trampoline;
   }
 
   // We're done now, so we clear out the targets tables.
+  auto& caller = Code::Handle(zone);
   if (!is_vm_isolate) {
     for (intptr_t i = 0; i < code_objects_->length(); ++i) {
       caller = (*code_objects_)[i];
@@ -167,6 +97,266 @@
   }
 }
 
+bool CodeRelocator::AddInstructionsToText(RawCode* code) {
+  RawInstructions* instructions = Code::InstructionsOf(code);
+
+  // If two [Code] objects point to the same [Instructions] object, we'll just
+  // use the first one (they are equivalent for all practical purposes).
+  if (text_offsets_.HasKey(instructions)) {
+    return false;
+  }
+  text_offsets_.Insert({instructions, next_text_offset_});
+  commands_->Add(ImageWriterCommand(next_text_offset_, code));
+  next_text_offset_ += instructions->Size();
+
+  return true;
+}
+
+void CodeRelocator::AddTrampolineToText(RawInstructions* destination,
+                                        uint8_t* trampoline_bytes,
+                                        intptr_t trampoline_length) {
+  commands_->Add(ImageWriterCommand(next_text_offset_, trampoline_bytes,
+                                    trampoline_length));
+  trampoline_text_offsets_.Insert({destination, next_text_offset_});
+  next_text_offset_ += trampoline_length;
+}
+
+void CodeRelocator::ScanCallTargets(const Code& code,
+                                    const Array& call_targets,
+                                    intptr_t code_text_offset) {
+  if (call_targets.IsNull()) {
+    return;
+  }
+  StaticCallsTable calls(call_targets);
+  for (auto call : calls) {
+    kind_type_and_offset_ = call.Get<Code::kSCallTableKindAndOffset>();
+    auto kind = Code::KindField::decode(kind_type_and_offset_.Value());
+    auto offset = Code::OffsetField::decode(kind_type_and_offset_.Value());
+    auto entry_point =
+        Code::EntryPointField::decode(kind_type_and_offset_.Value());
+
+    if (kind == Code::kCallViaCode) {
+      continue;
+    }
+
+    target_ = call.Get<Code::kSCallTableFunctionTarget>();
+    if (target_.IsFunction()) {
+      auto& fun = Function::Cast(target_);
+      ASSERT(fun.HasCode());
+      destination_ = fun.CurrentCode();
+      ASSERT(!destination_.IsStubCode());
+    } else {
+      target_ = call.Get<Code::kSCallTableCodeTarget>();
+      ASSERT(target_.IsCode());
+      destination_ = Code::Cast(target_).raw();
+    }
+
+    const intptr_t text_offset =
+        code_text_offset + Instructions::HeaderSize() + offset;
+    UnresolvedCall unresolved_call(code.raw(), offset, entry_point, text_offset,
+                                   destination_.raw());
+    if (!TryResolveBackwardsCall(&unresolved_call)) {
+      EnqueueUnresolvedCall(new UnresolvedCall(unresolved_call));
+    }
+  }
+}
+
+void CodeRelocator::EnqueueUnresolvedCall(UnresolvedCall* unresolved_call) {
+  // Add it to the min-heap by .text offset.
+  all_unresolved_calls_.Append(unresolved_call);
+
+  // Add it to callers of destination.
+  RawInstructions* destination = Code::InstructionsOf(unresolved_call->callee);
+  if (!unresolved_calls_by_destination_.HasKey(destination)) {
+    unresolved_calls_by_destination_.Insert(
+        {destination, new SameDestinationUnresolvedCallsList()});
+  }
+  unresolved_calls_by_destination_.LookupValue(destination)
+      ->Append(unresolved_call);
+}
+
+void CodeRelocator::EnqueueUnresolvedTrampoline(
+    UnresolvedTrampoline* unresolved_trampoline) {
+  unresolved_trampolines_.Add(unresolved_trampoline);
+}
+
+bool CodeRelocator::TryResolveBackwardsCall(UnresolvedCall* unresolved_call) {
+  auto callee = Code::InstructionsOf(unresolved_call->callee);
+  auto map_entry = text_offsets_.Lookup(callee);
+  if (map_entry == nullptr) return false;
+
+  ResolveCall(unresolved_call);
+  return true;
+}
+
+void CodeRelocator::ResolveUnresolvedCallsTargeting(
+    const RawInstructions* instructions) {
+  if (unresolved_calls_by_destination_.HasKey(instructions)) {
+    SameDestinationUnresolvedCallsList* calls =
+        unresolved_calls_by_destination_.LookupValue(instructions);
+    auto it = calls->Begin();
+    while (it != calls->End()) {
+      UnresolvedCall* unresolved_call = *it;
+      ++it;
+      ASSERT(Code::InstructionsOf(unresolved_call->callee) == instructions);
+      ResolveCall(unresolved_call);
+
+      // Remove the call from both lists.
+      calls->Remove(unresolved_call);
+      all_unresolved_calls_.Remove(unresolved_call);
+
+      delete unresolved_call;
+    }
+    ASSERT(calls->IsEmpty());
+    delete calls;
+    bool ok = unresolved_calls_by_destination_.Remove(instructions);
+    ASSERT(ok);
+  }
+}
+
+void CodeRelocator::ResolveCall(UnresolvedCall* unresolved_call) {
+  const auto destination_text =
+      FindDestinationInText(Code::InstructionsOf(unresolved_call->callee),
+                            unresolved_call->call_entry_point);
+
+  ResolveCallToDestination(unresolved_call, destination_text);
+}
+
+void CodeRelocator::ResolveCallToDestination(UnresolvedCall* unresolved_call,
+                                             intptr_t destination_text) {
+  const intptr_t call_text_offset = unresolved_call->text_offset;
+  const intptr_t call_offset = unresolved_call->call_offset;
+
+  auto caller = Code::InstructionsOf(unresolved_call->caller);
+  const int32_t distance = destination_text - call_text_offset;
+  {
+    NoSafepointScope no_safepoint_scope;
+
+    PcRelativeCallPattern call(Instructions::PayloadStart(caller) +
+                               call_offset);
+    ASSERT(call.IsValid());
+    call.set_distance(static_cast<int32_t>(distance));
+    ASSERT(call.distance() == distance);
+  }
+
+  unresolved_call->caller = nullptr;
+  unresolved_call->callee = nullptr;
+}
+
+void CodeRelocator::ResolveTrampoline(
+    UnresolvedTrampoline* unresolved_trampoline) {
+  const intptr_t trampoline_text_offset = unresolved_trampoline->text_offset;
+  const uword trampoline_start =
+      reinterpret_cast<uword>(unresolved_trampoline->trampoline_bytes);
+  auto call_entry_point = unresolved_trampoline->call_entry_point;
+
+  auto callee = Code::InstructionsOf(unresolved_trampoline->callee);
+  auto destination_text = FindDestinationInText(callee, call_entry_point);
+  const int32_t distance = destination_text - trampoline_text_offset;
+
+  PcRelativeTrampolineJumpPattern pattern(trampoline_start);
+  pattern.Initialize();
+  pattern.set_distance(distance);
+  ASSERT(pattern.distance() == distance);
+}
+
+bool CodeRelocator::IsTargetInRangeFor(UnresolvedCall* unresolved_call,
+                                       intptr_t target_text_offset) {
+  const auto forward_distance =
+      target_text_offset - unresolved_call->text_offset;
+  return PcRelativeCallPattern::kLowerCallingRange < forward_distance &&
+         forward_distance < PcRelativeCallPattern::kUpperCallingRange;
+}
+
+void CodeRelocator::BuildTrampolinesForAlmostOutOfRangeCalls() {
+  while (!all_unresolved_calls_.IsEmpty()) {
+    UnresolvedCall* first_unresolved_call = all_unresolved_calls_.First();
+
+    // If we can emit another instructions object without causing the unresolved
+    // forward calls to become out-of-range, we'll not resolve it yet (maybe the
+    // target function will come very soon and we don't need a trampoline at
+    // all).
+    const intptr_t future_boundary =
+        next_text_offset_ + max_instructions_size_ +
+        kTrampolineSize *
+            (unresolved_calls_by_destination_.Length() + max_calls_);
+    if (IsTargetInRangeFor(first_unresolved_call, future_boundary) &&
+        !FLAG_always_generate_trampolines_for_testing) {
+      break;
+    }
+
+    // We have a "critical" [first_unresolved_call] we have to resolve.  If an
+    // existing trampoline is in range, we use that otherwise we create a new
+    // trampoline.
+
+    // In the worst case we'll make a new trampoline here, in which case the
+    // current text offset must be in range for the "critical"
+    // [first_unresolved_call].
+    ASSERT(IsTargetInRangeFor(first_unresolved_call, next_text_offset_));
+
+    // See if there is already a trampoline we could use.
+    intptr_t trampoline_text_offset = -1;
+    auto callee = Code::InstructionsOf(first_unresolved_call->callee);
+    auto old_trampoline_entry = trampoline_text_offsets_.Lookup(callee);
+    if (old_trampoline_entry != nullptr &&
+        !FLAG_always_generate_trampolines_for_testing) {
+      const intptr_t offset = old_trampoline_entry->value;
+      if (IsTargetInRangeFor(first_unresolved_call, offset)) {
+        trampoline_text_offset = offset;
+      }
+    }
+
+    // If there is no trampoline yet, we'll create a new one.
+    if (trampoline_text_offset == -1) {
+      // The ownership of the trampoline bytes will be transferred to the
+      // [ImageWriter], which will eventually write out the bytes and delete the
+      // buffer.
+      auto trampoline_bytes = new uint8_t[kTrampolineSize];
+      memset(trampoline_bytes, 0x00, kTrampolineSize);
+      auto unresolved_trampoline = new UnresolvedTrampoline{
+          first_unresolved_call->call_entry_point,
+          first_unresolved_call->callee,
+          trampoline_bytes,
+          next_text_offset_,
+      };
+      AddTrampolineToText(callee, trampoline_bytes, kTrampolineSize);
+      EnqueueUnresolvedTrampoline(unresolved_trampoline);
+      trampoline_text_offset = unresolved_trampoline->text_offset;
+    }
+
+    // Let the unresolved call to [destination] jump to the trampoline
+    // instead.
+    auto destination = Code::InstructionsOf(first_unresolved_call->callee);
+    ResolveCallToDestination(first_unresolved_call, trampoline_text_offset);
+
+    // Remove this unresolved call from the global list and the per-destination
+    // list.
+    auto calls = unresolved_calls_by_destination_.LookupValue(destination);
+    calls->Remove(first_unresolved_call);
+    all_unresolved_calls_.Remove(first_unresolved_call);
+    delete first_unresolved_call;
+
+    // If this destination has no longer any unresolved calls, remove it.
+    if (calls->IsEmpty()) {
+      unresolved_calls_by_destination_.Remove(destination);
+      delete calls;
+    }
+  }
+}
+
+intptr_t CodeRelocator::FindDestinationInText(
+    const RawInstructions* destination,
+    Code::CallEntryPoint call_entry_point) {
+  const uword entry_point = call_entry_point == Code::kUncheckedEntry
+                                ? Instructions::UncheckedEntryPoint(destination)
+                                : Instructions::EntryPoint(destination);
+  const uword payload_offset =
+      entry_point - Instructions::PayloadStart(destination);
+  const intptr_t unchecked_offset = Instructions::HeaderSize() + payload_offset;
+  auto destination_offset = text_offsets_.LookupValue(destination);
+  return destination_offset + unchecked_offset;
+}
+
 #endif  // defined(DART_PRECOMPILER) && !defined(TARGET_ARCH_DBC) &&           \
         // !defined(TARGET_ARCH_IA32)
 
diff --git a/runtime/vm/compiler/relocation.h b/runtime/vm/compiler/relocation.h
index 4c2714f..3917432 100644
--- a/runtime/vm/compiler/relocation.h
+++ b/runtime/vm/compiler/relocation.h
@@ -7,6 +7,7 @@
 
 #include "vm/allocation.h"
 #include "vm/image_snapshot.h"
+#include "vm/intrusive_dlist.h"
 #include "vm/object.h"
 #include "vm/type_testing_stubs.h"
 #include "vm/visitor.h"
@@ -16,6 +17,97 @@
 #if defined(DART_PRECOMPILER) && !defined(TARGET_ARCH_DBC) &&                  \
     !defined(TARGET_ARCH_IA32)
 
+// Represents a pc-relative call which has not been patched up with the final
+// destination.
+class UnresolvedCall : public IntrusiveDListEntry<UnresolvedCall>,
+                       public IntrusiveDListEntry<UnresolvedCall, 2> {
+ public:
+  UnresolvedCall(RawCode* caller,
+                 intptr_t call_offset,
+                 Code::CallEntryPoint call_entry_point,
+                 intptr_t text_offset,
+                 RawCode* callee)
+      : caller(caller),
+        call_offset(call_offset),
+        call_entry_point(call_entry_point),
+        text_offset(text_offset),
+        callee(callee) {}
+
+  UnresolvedCall(const UnresolvedCall& other)
+      : caller(other.caller),
+        call_offset(other.call_offset),
+        call_entry_point(other.call_entry_point),
+        text_offset(other.text_offset),
+        callee(other.callee) {}
+
+  // The caller which has an unresolved call.
+  RawCode* caller;
+  // The offset from the payload of the calling code which performs the call.
+  intptr_t call_offset;
+  // Type entry-point type we call in the destination.
+  Code::CallEntryPoint call_entry_point;
+  // The offset in the .text segment where the call happens.
+  intptr_t text_offset;
+  // The target of the forward call.
+  RawCode* callee;
+};
+
+// A list of all unresolved calls.
+using AllUnresolvedCallsList = IntrusiveDList<UnresolvedCall>;
+
+// A list of all unresolved calls which call the same destination.
+using SameDestinationUnresolvedCallsList = IntrusiveDList<UnresolvedCall, 2>;
+
+// Represents a trampoline which has not been patched up with the final
+// destination.
+//
+// The [CodeRelocator] will insert trampolines into the ".text" segment which
+// increase the range of PC-relative calls.  If a pc-relative call in normal
+// code is too far away from it's destination, it will call a trampoline
+// instead (which will tail-call the destination).
+struct UnresolvedTrampoline {
+  // The entry-point type we call in the destination.
+  Code::CallEntryPoint call_entry_point;
+  // The target of the forward call.
+  RawCode* callee;
+
+  // The trampoline buffer.
+  uint8_t* trampoline_bytes;
+  // The offset in the .text segment where the trampoline starts.
+  intptr_t text_offset;
+};
+
+template <typename ValueType, ValueType kNoValue>
+class InstructionsMapTraits {
+ public:
+  struct Pair {
+    RawInstructions* instructions;
+    ValueType value;
+
+    Pair() : instructions(nullptr), value(kNoValue) {}
+    Pair(RawInstructions* i, const ValueType& value)
+        : instructions(i), value(value) {}
+  };
+
+  typedef const RawInstructions* Key;
+  typedef const ValueType Value;
+
+  static Key KeyOf(Pair kv) { return kv.instructions; }
+  static ValueType ValueOf(Pair kv) { return kv.value; }
+  static inline intptr_t Hashcode(Key key) {
+    return reinterpret_cast<intptr_t>(key);
+  }
+  static inline bool IsKeyEqual(Pair pair, Key key) {
+    return pair.instructions == key;
+  }
+};
+
+using InstructionsPosition =
+    DirectChainedHashMap<InstructionsMapTraits<intptr_t, -1>>;
+
+using InstructionsUnresolvedCalls = DirectChainedHashMap<
+    InstructionsMapTraits<SameDestinationUnresolvedCallsList*, nullptr>>;
+
 // Relocates the given code objects by patching the instructions with the
 // correct pc offsets.
 //
@@ -39,15 +131,56 @@
  private:
   CodeRelocator(Thread* thread,
                 GrowableArray<RawCode*>* code_objects,
-                GrowableArray<ImageWriterCommand>* commands)
-      : StackResource(thread),
-        code_objects_(code_objects),
-        commands_(commands) {}
+                GrowableArray<ImageWriterCommand>* commands);
 
   void Relocate(bool is_vm_isolate);
 
-  GrowableArray<RawCode*>* code_objects_;
+  bool AddInstructionsToText(RawCode* code);
+  void AddTrampolineToText(RawInstructions* destination,
+                           uint8_t* trampoline_bytes,
+                           intptr_t trampoline_length);
+  void ScanCallTargets(const Code& code,
+                       const Array& call_targets,
+                       intptr_t code_text_offset);
+
+  void EnqueueUnresolvedCall(UnresolvedCall* unresolved_call);
+  void EnqueueUnresolvedTrampoline(UnresolvedTrampoline* unresolved_trampoline);
+
+  bool TryResolveBackwardsCall(UnresolvedCall* unresolved_call);
+  void ResolveUnresolvedCallsTargeting(const RawInstructions* instructions);
+  void ResolveCall(UnresolvedCall* unresolved_call);
+  void ResolveCallToDestination(UnresolvedCall* unresolved_call,
+                                intptr_t destination_text);
+  void ResolveTrampoline(UnresolvedTrampoline* unresolved_trampoline);
+
+  void BuildTrampolinesForAlmostOutOfRangeCalls();
+
+  intptr_t FindDestinationInText(const RawInstructions* destination,
+                                 Code::CallEntryPoint call_entry_point);
+
+  bool IsTargetInRangeFor(UnresolvedCall* unresolved_call,
+                          intptr_t target_text_offset);
+
+  const GrowableArray<RawCode*>* code_objects_;
   GrowableArray<ImageWriterCommand>* commands_;
+
+  // The size of largest instructions object in bytes.
+  intptr_t max_instructions_size_ = 0;
+  // The maximum number of pc-relative calls in an instructions object.
+  intptr_t max_calls_ = 0;
+
+  // Data structures used for relocation.
+  intptr_t next_text_offset_ = 0;
+  InstructionsPosition text_offsets_;
+  InstructionsPosition trampoline_text_offsets_;
+  InstructionsUnresolvedCalls unresolved_calls_by_destination_;
+  AllUnresolvedCallsList all_unresolved_calls_;
+  GrowableArray<UnresolvedTrampoline*> unresolved_trampolines_;
+
+  // Reusable handles for [ScanCallTargets].
+  Smi& kind_type_and_offset_;
+  Object& target_;
+  Code& destination_;
 };
 
 #endif  // defined(DART_PRECOMPILER) && !defined(TARGET_ARCH_DBC) &&           \
diff --git a/runtime/vm/heap/heap.cc b/runtime/vm/heap/heap.cc
index da5e831..4ce0f61 100644
--- a/runtime/vm/heap/heap.cc
+++ b/runtime/vm/heap/heap.cc
@@ -689,6 +689,15 @@
 }
 
 bool Heap::Verify(MarkExpectation mark_expectation) const {
+#if defined(DART_PRECOMPILED_RUNTIME)
+  // We cannot simply walk the heap pages which contain instructions, because
+  // they also contain trampolines inserted during AOT compilation time.
+  const bool have_bare_trampolines =
+      FLAG_precompiled_mode && FLAG_use_bare_instructions;
+  if (have_bare_trampolines) {
+    return true;
+  }
+#endif
   HeapIterationScope heap_iteration_scope(Thread::Current());
   return VerifyGC(mark_expectation);
 }
diff --git a/runtime/vm/image_snapshot.cc b/runtime/vm/image_snapshot.cc
index 9be1458..bf7dada 100644
--- a/runtime/vm/image_snapshot.cc
+++ b/runtime/vm/image_snapshot.cc
@@ -111,6 +111,15 @@
           heap_->SetObjectId(instructions, offset);
           break;
         }
+        case ImageWriterCommand::InsertBytesOfTrampoline: {
+          auto trampoline_bytes = inst.insert_trampoline_bytes.buffer;
+          auto trampoline_length = inst.insert_trampoline_bytes.buffer_length;
+          const intptr_t offset = next_text_offset_;
+          instructions_.Add(
+              InstructionsData(trampoline_bytes, trampoline_length, offset));
+          next_text_offset_ += trampoline_length;
+          break;
+        }
         default:
           UNREACHABLE();
       }
@@ -279,6 +288,9 @@
   // will allocate on the Dart heap.
   for (intptr_t i = 0; i < instructions_.length(); i++) {
     InstructionsData& data = instructions_[i];
+    const bool is_trampoline = data.trampoline_bytes != nullptr;
+    if (is_trampoline) continue;
+
     data.insns_ = &Instructions::Handle(zone, data.raw_insns_);
     ASSERT(data.raw_code_ != NULL);
     data.code_ = &Code::Handle(zone, data.raw_code_);
@@ -406,18 +418,37 @@
   TypeTestingStubFinder tts;
   intptr_t text_offset = 0;
 
+  ASSERT(offset_space_ != V8SnapshotProfileWriter::kSnapshot);
   for (intptr_t i = 0; i < instructions_.length(); i++) {
-    auto& instr = instructions_[i];
-    ASSERT((instr.text_offset_ - instructions_[0].text_offset_) == text_offset);
+    auto& data = instructions_[i];
+    const bool is_trampoline = data.trampoline_bytes != nullptr;
+    ASSERT((data.text_offset_ - instructions_[0].text_offset_) == text_offset);
+
+    if (is_trampoline) {
+      if (profile_writer_ != nullptr) {
+        const intptr_t offset = Image::kHeaderSize + text_offset;
+        profile_writer_->SetObjectTypeAndName({offset_space_, offset},
+                                              "Trampolines",
+                                              /*name=*/nullptr);
+        profile_writer_->AttributeBytesTo({offset_space_, offset},
+                                          data.trampline_length);
+      }
+
+      const auto start = reinterpret_cast<uword>(data.trampoline_bytes);
+      const auto end = start + data.trampline_length;
+      text_offset += WriteByteSequence(start, end);
+      delete[] data.trampoline_bytes;
+      data.trampoline_bytes = nullptr;
+      continue;
+    }
 
     const intptr_t instr_start = text_offset;
 
-    const Instructions& insns = *instr.insns_;
-    const Code& code = *instr.code_;
+    const Instructions& insns = *data.insns_;
+    const Code& code = *data.code_;
 
     if (profile_writer_ != nullptr) {
       const intptr_t offset = Image::kHeaderSize + text_offset;
-      ASSERT(offset_space_ != V8SnapshotProfileWriter::kSnapshot);
       profile_writer_->SetObjectTypeAndName({offset_space_, offset},
                                             "Instructions",
                                             /*name=*/nullptr);
@@ -451,9 +482,7 @@
       WriteWordLiteralText(marked_tags);
       beginning += sizeof(uword);
       text_offset += sizeof(uword);
-
-      WriteByteSequence(beginning, entry);
-      text_offset += (entry - beginning);
+      text_offset += WriteByteSequence(beginning, entry);
 
       ASSERT((text_offset - instr_start) == insns.HeaderSize());
     }
@@ -512,8 +541,7 @@
       ASSERT(Utils::IsAligned(entry, sizeof(uword)));
       ASSERT(Utils::IsAligned(end, sizeof(uword)));
 
-      WriteByteSequence(entry, end);
-      text_offset += (end - entry);
+      text_offset += WriteByteSequence(entry, end);
     }
 
     ASSERT((text_offset - instr_start) == insns.raw()->Size());
@@ -618,11 +646,12 @@
   assembly_stream_.Print(".cfi_endproc\n");
 }
 
-void AssemblyImageWriter::WriteByteSequence(uword start, uword end) {
+intptr_t AssemblyImageWriter::WriteByteSequence(uword start, uword end) {
   for (uword* cursor = reinterpret_cast<uword*>(start);
        cursor < reinterpret_cast<uword*>(end); cursor++) {
     WriteWordLiteralText(*cursor);
   }
+  return end - start;
 }
 
 BlobImageWriter::BlobImageWriter(Thread* thread,
@@ -639,6 +668,14 @@
       instructions_blob_stream_(instructions_blob_buffer, alloc, initial_size) {
 }
 
+intptr_t BlobImageWriter::WriteByteSequence(uword start, uword end) {
+  for (uword* cursor = reinterpret_cast<uword*>(start);
+       cursor < reinterpret_cast<uword*>(end); cursor++) {
+    instructions_blob_stream_.WriteWord(*cursor);
+  }
+  return end - start;
+}
+
 void BlobImageWriter::WriteText(WriteStream* clustered_stream, bool vm) {
   // This header provides the gap to make the instructions snapshot look like a
   // HeapPage.
@@ -653,12 +690,24 @@
 
   NoSafepointScope no_safepoint;
   for (intptr_t i = 0; i < instructions_.length(); i++) {
-    auto& instr = instructions_[i];
-    const Instructions& insns = *instructions_[i].insns_;
-    AutoTraceImage(insns, 0, &this->instructions_blob_stream_);
-    ASSERT((instr.text_offset_ - instructions_[0].text_offset_) == text_offset);
+    auto& data = instructions_[i];
+    const bool is_trampoline = data.trampoline_bytes != nullptr;
+    ASSERT((data.text_offset_ - instructions_[0].text_offset_) == text_offset);
+
+    if (is_trampoline) {
+      const auto start = reinterpret_cast<uword>(data.trampoline_bytes);
+      const auto end = start + data.trampline_length;
+      text_offset += WriteByteSequence(start, end);
+      delete[] data.trampoline_bytes;
+      data.trampoline_bytes = nullptr;
+      continue;
+    }
 
     const intptr_t instr_start = text_offset;
+
+    const Instructions& insns = *instructions_[i].insns_;
+    AutoTraceImage(insns, 0, &this->instructions_blob_stream_);
+
     uword beginning = reinterpret_cast<uword>(insns.raw_ptr());
     uword entry = beginning + Instructions::HeaderSize();
     uword payload_size = insns.Size();
@@ -684,12 +733,7 @@
     instructions_blob_stream_.WriteWord(marked_tags);
     text_offset += sizeof(uword);
     beginning += sizeof(uword);
-
-    for (uword* cursor = reinterpret_cast<uword*>(beginning);
-         cursor < reinterpret_cast<uword*>(end); cursor++) {
-      instructions_blob_stream_.WriteWord(*cursor);
-      text_offset += sizeof(uword);
-    }
+    text_offset += WriteByteSequence(beginning, end);
 
     ASSERT((text_offset - instr_start) == insns.raw()->Size());
   }
diff --git a/runtime/vm/image_snapshot.h b/runtime/vm/image_snapshot.h
index 939a653..1bc4260 100644
--- a/runtime/vm/image_snapshot.h
+++ b/runtime/vm/image_snapshot.h
@@ -5,6 +5,7 @@
 #ifndef RUNTIME_VM_IMAGE_SNAPSHOT_H_
 #define RUNTIME_VM_IMAGE_SNAPSHOT_H_
 
+#include <memory>
 #include <utility>
 
 #include "platform/assert.h"
@@ -102,20 +103,29 @@
 // A command which instructs the image writer to emit something into the ".text"
 // segment.
 //
-// For now this just supports emitting the instructions of a [Code] object.
-// In the future we might also want to add support for emitting trampolines.
+// For now this supports
+//
+//   * emitting the instructions of a [Code] object
+//   * emitting a trampoline of a certain size
+//
 struct ImageWriterCommand {
   enum Opcode {
     InsertInstructionOfCode,
+    InsertBytesOfTrampoline,
   };
 
-  ImageWriterCommand(intptr_t expected_offset,
-                     ImageWriterCommand::Opcode opcode,
-                     RawCode* code)
+  ImageWriterCommand(intptr_t expected_offset, RawCode* code)
       : expected_offset(expected_offset),
-        op(opcode),
+        op(ImageWriterCommand::InsertInstructionOfCode),
         insert_instruction_of_code({code}) {}
 
+  ImageWriterCommand(intptr_t expected_offset,
+                     uint8_t* trampoline_bytes,
+                     intptr_t trampoine_length)
+      : expected_offset(expected_offset),
+        op(ImageWriterCommand::InsertBytesOfTrampoline),
+        insert_trampoline_bytes({trampoline_bytes, trampoine_length}) {}
+
   // The offset (relative to the very first [ImageWriterCommand]) we expect
   // this [ImageWriterCommand] to have.
   intptr_t expected_offset;
@@ -125,6 +135,10 @@
     struct {
       RawCode* code;
     } insert_instruction_of_code;
+    struct {
+      uint8_t* buffer;
+      intptr_t buffer_length;
+    } insert_trampoline_bytes;
   };
 };
 
@@ -177,7 +191,20 @@
     InstructionsData(RawInstructions* insns,
                      RawCode* code,
                      intptr_t text_offset)
-        : raw_insns_(insns), raw_code_(code), text_offset_(text_offset) {}
+        : raw_insns_(insns),
+          raw_code_(code),
+          text_offset_(text_offset),
+          trampoline_bytes(nullptr),
+          trampline_length(0) {}
+
+    InstructionsData(uint8_t* trampoline_bytes,
+                     intptr_t trampline_length,
+                     intptr_t text_offset)
+        : raw_insns_(nullptr),
+          raw_code_(nullptr),
+          text_offset_(text_offset),
+          trampoline_bytes(trampoline_bytes),
+          trampline_length(trampline_length) {}
 
     union {
       RawInstructions* raw_insns_;
@@ -188,6 +215,9 @@
       const Code* code_;
     };
     intptr_t text_offset_;
+
+    uint8_t* trampoline_bytes;
+    intptr_t trampline_length;
   };
 
   struct ObjectData {
@@ -279,7 +309,7 @@
  private:
   void FrameUnwindPrologue();
   void FrameUnwindEpilogue();
-  void WriteByteSequence(uword start, uword end);
+  intptr_t WriteByteSequence(uword start, uword end);
   void WriteWordLiteralText(uword value) {
 // Padding is helpful for comparing the .S with --disassemble.
 #if defined(ARCH_IS_64_BIT)
@@ -312,6 +342,8 @@
   }
 
  private:
+  intptr_t WriteByteSequence(uword start, uword end);
+
   WriteStream instructions_blob_stream_;
 
   DISALLOW_COPY_AND_ASSIGN(BlobImageWriter);
diff --git a/runtime/vm/instructions_arm.cc b/runtime/vm/instructions_arm.cc
index f9137ac..1ba7633 100644
--- a/runtime/vm/instructions_arm.cc
+++ b/runtime/vm/instructions_arm.cc
@@ -144,6 +144,37 @@
   return start;
 }
 
+void InstructionPattern::EncodeLoadWordImmediate(uword end,
+                                                 Register reg,
+                                                 intptr_t value) {
+  uint16_t low16 = value & 0xffff;
+  uint16_t high16 = (value >> 16) & 0xffff;
+
+  // movw reg, #imm_lo
+  uint32_t movw_instr = 0xe3000000;
+  movw_instr |= (low16 >> 12) << 16;
+  movw_instr |= (reg << 12);
+  movw_instr |= (low16 & 0xfff);
+
+  // movt reg, #imm_hi
+  uint32_t movt_instr = 0xe3400000;
+  movt_instr |= (high16 >> 12) << 16;
+  movt_instr |= (reg << 12);
+  movt_instr |= (high16 & 0xfff);
+
+  uint32_t* cursor = reinterpret_cast<uint32_t*>(end);
+  *(--cursor) = movt_instr;
+  *(--cursor) = movw_instr;
+
+#if defined(DEBUG)
+  Register decoded_reg;
+  intptr_t decoded_value;
+  DecodeLoadWordImmediate(end, &decoded_reg, &decoded_value);
+  ASSERT(reg == decoded_reg);
+  ASSERT(value == decoded_value);
+#endif
+}
+
 static bool IsLoadWithOffset(int32_t instr,
                              Register base,
                              intptr_t* offset,
@@ -326,19 +357,64 @@
 }
 
 bool PcRelativeCallPattern::IsValid() const {
-  // bl <offset>
+  // bl.<cond> <offset>
   const uint32_t word = *reinterpret_cast<uint32_t*>(pc_);
   const uint32_t cond_all = 0xe0;
   const uint32_t branch_link = 0x0b;
   return (word >> 24) == (cond_all | branch_link);
 }
 
-bool PcRelativeJumpPattern::IsValid() const {
-  // b <offset>
-  const uint32_t word = *reinterpret_cast<uint32_t*>(pc_);
-  const uint32_t cond_all = 0xe0;
-  const uint32_t branch_nolink = 0x0a;
-  return (word >> 24) == (cond_all | branch_nolink);
+void PcRelativeTrampolineJumpPattern::Initialize() {
+#if !defined(DART_PRECOMPILED_RUNTIME)
+  uint32_t* add_pc =
+      reinterpret_cast<uint32_t*>(pattern_start_ + 2 * Instr::kInstrSize);
+  *add_pc = kAddPcEncoding;
+  set_distance(0);
+#else
+  UNREACHABLE();
+#endif
+}
+
+int32_t PcRelativeTrampolineJumpPattern::distance() {
+#if !defined(DART_PRECOMPILED_RUNTIME)
+  const uword end = pattern_start_ + 2 * Instr::kInstrSize;
+  Register reg;
+  intptr_t value;
+  InstructionPattern::DecodeLoadWordImmediate(end, &reg, &value);
+  value -= kDistanceOffset;
+  ASSERT(reg == TMP);
+  return value;
+#else
+  UNREACHABLE();
+  return 0;
+#endif
+}
+
+void PcRelativeTrampolineJumpPattern::set_distance(int32_t distance) {
+#if !defined(DART_PRECOMPILED_RUNTIME)
+  const uword end = pattern_start_ + 2 * Instr::kInstrSize;
+  InstructionPattern::EncodeLoadWordImmediate(end, TMP,
+                                              distance + kDistanceOffset);
+#else
+  UNREACHABLE();
+#endif
+}
+
+bool PcRelativeTrampolineJumpPattern::IsValid() const {
+#if !defined(DART_PRECOMPILED_RUNTIME)
+  const uword end = pattern_start_ + 2 * Instr::kInstrSize;
+  Register reg;
+  intptr_t value;
+  InstructionPattern::DecodeLoadWordImmediate(end, &reg, &value);
+
+  uint32_t* add_pc =
+      reinterpret_cast<uint32_t*>(pattern_start_ + 2 * Instr::kInstrSize);
+
+  return reg == TMP && *add_pc == kAddPcEncoding;
+#else
+  UNREACHABLE();
+  return false;
+#endif
 }
 
 intptr_t TypeTestingStubCallPattern::GetSubtypeTestCachePoolIndex() {
diff --git a/runtime/vm/instructions_arm.h b/runtime/vm/instructions_arm.h
index 70a7636..9f2d4c3 100644
--- a/runtime/vm/instructions_arm.h
+++ b/runtime/vm/instructions_arm.h
@@ -37,6 +37,14 @@
                                        Register* reg,
                                        intptr_t* value);
 
+  // Encodes a load immediate sequence ending at 'end' (the last instruction of
+  // the load sequence is the instruction before the one at end).
+  //
+  // Supports only a subset of [DecodeLoadWordImmediate], namely:
+  //   movw r, #lower16
+  //   movt r, #upper16
+  static void EncodeLoadWordImmediate(uword end, Register reg, intptr_t value);
+
   // Decodes a load sequence ending at 'end' (the last instruction of the
   // load sequence is the instruction before the one at end).  Returns the
   // address of the first instruction in the sequence.  Returns the register
@@ -160,6 +168,10 @@
 
 class PcRelativeCallPattern : public ValueObject {
  public:
+  // 24 bit signed integer which will get multiplied by 4.
+  static const intptr_t kLowerCallingRange = -(1 << 25);
+  static const intptr_t kUpperCallingRange = (1 << 25) - 1;
+
   explicit PcRelativeCallPattern(uword pc) : pc_(pc) {}
 
   static const int kLengthInBytes = 1 * Instr::kInstrSize;
@@ -188,34 +200,47 @@
   uword pc_;
 };
 
-class PcRelativeJumpPattern : public ValueObject {
+// Instruction pattern for a tail call to a signed 32-bit PC-relative offset
+//
+// The AOT compiler can emit PC-relative calls. If the destination of such a
+// call is not in range for the "bl.<cond> <offset>" instruction, the AOT
+// compiler will emit a trampoline which is in range. That trampoline will
+// then tail-call to the final destination (also via PC-relative offset, but it
+// supports a full signed 32-bit offset).
+//
+// The pattern of the trampoline looks like:
+//
+//     movw TMP, #lower16
+//     movt TMP, #upper16
+//     add  PC, PC, TMP lsl #0
+//
+class PcRelativeTrampolineJumpPattern : public ValueObject {
  public:
-  explicit PcRelativeJumpPattern(uword pc) : pc_(pc) {}
-
-  static const int kLengthInBytes = 1 * Instr::kInstrSize;
-
-  int32_t distance() {
-#if !defined(DART_PRECOMPILED_RUNTIME)
-    return Assembler::DecodeBranchOffset(*reinterpret_cast<int32_t*>(pc_));
-#else
-    UNREACHABLE();
-    return 0;
-#endif
+  explicit PcRelativeTrampolineJumpPattern(uword pattern_start)
+      : pattern_start_(pattern_start) {
+    USE(pattern_start_);
   }
 
-  void set_distance(int32_t distance) {
-#if !defined(DART_PRECOMPILED_RUNTIME)
-    int32_t* word = reinterpret_cast<int32_t*>(pc_);
-    *word = Assembler::EncodeBranchOffset(distance, *word);
-#else
-    UNREACHABLE();
-#endif
-  }
+  static const int kLengthInBytes = 3 * Instr::kInstrSize;
 
+  void Initialize();
+
+  int32_t distance();
+  void set_distance(int32_t distance);
   bool IsValid() const;
 
  private:
-  uword pc_;
+  // This offset must be applied to account for the fact that
+  //   a) the actual "branch" is only in the 3rd instruction
+  //   b) when reading the PC it reports current instruction + 8
+  static const intptr_t kDistanceOffset = -4 * Instr::kInstrSize;
+
+  // add  PC, PC, TMP lsl #0
+  static const uint32_t kAddPcEncoding =
+      (ADD << kOpcodeShift) | (AL << kConditionShift) | (PC << kRnShift) |
+      (PC << kRdShift) | (TMP << kRmShift);
+
+  uword pattern_start_;
 };
 
 }  // namespace dart
diff --git a/runtime/vm/instructions_arm64.cc b/runtime/vm/instructions_arm64.cc
index 2bf7c37..a5e9543 100644
--- a/runtime/vm/instructions_arm64.cc
+++ b/runtime/vm/instructions_arm64.cc
@@ -464,11 +464,60 @@
   return (word >> 26) == branch_link;
 }
 
-bool PcRelativeJumpPattern::IsValid() const {
-  // b <offset>
-  const uint32_t word = *reinterpret_cast<uint32_t*>(pc_);
-  const uint32_t branch_nolink = 0x5;
-  return (word >> 26) == branch_nolink;
+void PcRelativeTrampolineJumpPattern::Initialize() {
+#if !defined(DART_PRECOMPILED_RUNTIME)
+  uint32_t* pattern = reinterpret_cast<uint32_t*>(pattern_start_);
+  pattern[0] = kAdrEncoding;
+  pattern[1] = kMovzEncoding;
+  pattern[2] = kAddTmpTmp2;
+  pattern[3] = kJumpEncoding;
+  set_distance(0);
+#else
+  UNREACHABLE();
+#endif
+}
+
+int32_t PcRelativeTrampolineJumpPattern::distance() {
+#if !defined(DART_PRECOMPILED_RUNTIME)
+  uint32_t* pattern = reinterpret_cast<uint32_t*>(pattern_start_);
+  const uint32_t adr = pattern[0];
+  const uint32_t movz = pattern[1];
+  const uint32_t lower16 =
+      (((adr >> 5) & ((1 << 19) - 1)) << 2) | ((adr >> 29) & 0x3);
+  const uint32_t higher16 = (movz >> kImm16Shift) & 0xffff;
+  return (higher16 << 16) | lower16;
+#else
+  UNREACHABLE();
+  return 0;
+#endif
+}
+
+void PcRelativeTrampolineJumpPattern::set_distance(int32_t distance) {
+#if !defined(DART_PRECOMPILED_RUNTIME)
+  uint32_t* pattern = reinterpret_cast<uint32_t*>(pattern_start_);
+  uint32_t low16 = distance & 0xffff;
+  uint32_t high16 = (distance >> 16) & 0xffff;
+  pattern[0] = kAdrEncoding | ((low16 & 0x3) << 29) | ((low16 >> 2) << 5);
+  pattern[1] = kMovzEncoding | (high16 << kImm16Shift);
+  ASSERT(IsValid());
+#else
+  UNREACHABLE();
+#endif
+}
+
+bool PcRelativeTrampolineJumpPattern::IsValid() const {
+#if !defined(DART_PRECOMPILED_RUNTIME)
+  const uint32_t adr_mask = (3 << 29) | (((1 << 19) - 1) << 5);
+  const uint32_t movz_mask = 0xffff << 5;
+  uint32_t* pattern = reinterpret_cast<uint32_t*>(pattern_start_);
+  return (pattern[0] & ~adr_mask) == kAdrEncoding &
+             (pattern[1] & ~movz_mask) == kMovzEncoding &
+             pattern[2] == kAddTmpTmp2 &&
+         pattern[3] == kJumpEncoding;
+#else
+  UNREACHABLE();
+  return false;
+#endif
 }
 
 intptr_t TypeTestingStubCallPattern::GetSubtypeTestCachePoolIndex() {
diff --git a/runtime/vm/instructions_arm64.h b/runtime/vm/instructions_arm64.h
index 18a8cc3..88e523a 100644
--- a/runtime/vm/instructions_arm64.h
+++ b/runtime/vm/instructions_arm64.h
@@ -180,6 +180,10 @@
 
 class PcRelativeCallPattern : public ValueObject {
  public:
+  // 26 bit signed integer which will get multiplied by 4.
+  static const intptr_t kLowerCallingRange = -(1 << 27);
+  static const intptr_t kUpperCallingRange = (1 << 27) - 1;
+
   explicit PcRelativeCallPattern(uword pc) : pc_(pc) {}
 
   static const int kLengthInBytes = 1 * Instr::kInstrSize;
@@ -208,34 +212,55 @@
   uword pc_;
 };
 
-class PcRelativeJumpPattern : public ValueObject {
+// Instruction pattern for a tail call to a signed 32-bit PC-relative offset
+//
+// The AOT compiler can emit PC-relative calls. If the destination of such a
+// call is not in range for the "bl <offset>" instruction, the AOT compiler will
+// emit a trampoline which is in range. That trampoline will then tail-call to
+// the final destination (also via PC-relative offset, but it supports a full
+// signed 32-bit offset).
+//
+// The pattern of the trampoline looks like:
+//
+//     adr TMP, #lower16  (same as TMP = PC + #lower16)
+//     movz TMP2, #higher16 lsl 16
+//     add TMP, TMP, TMP2, SXTW
+//     br TMP
+//
+class PcRelativeTrampolineJumpPattern : public ValueObject {
  public:
-  explicit PcRelativeJumpPattern(uword pc) : pc_(pc) {}
-
-  static const int kLengthInBytes = 1 * Instr::kInstrSize;
-
-  int32_t distance() {
-#if !defined(DART_PRECOMPILED_RUNTIME)
-    return Assembler::DecodeImm26BranchOffset(*reinterpret_cast<int32_t*>(pc_));
-#else
-    UNREACHABLE();
-    return 0;
-#endif
+  explicit PcRelativeTrampolineJumpPattern(uword pattern_start)
+      : pattern_start_(pattern_start) {
+    USE(pattern_start_);
   }
 
-  void set_distance(int32_t distance) {
-#if !defined(DART_PRECOMPILED_RUNTIME)
-    int32_t* word = reinterpret_cast<int32_t*>(pc_);
-    *word = Assembler::EncodeImm26BranchOffset(distance, *word);
-#else
-    UNREACHABLE();
-#endif
-  }
+  static const int kLengthInBytes = 4 * Instr::kInstrSize;
 
+  void Initialize();
+
+  int32_t distance();
+  void set_distance(int32_t distance);
   bool IsValid() const;
 
  private:
-  uword pc_;
+  // This offset must be applied to account for the fact that
+  //   a) the actual "branch" is only in the 3rd instruction
+  //   b) when reading the PC it reports current instruction + 8
+  static const intptr_t kDistanceOffset = -5 * Instr::kInstrSize;
+
+  // adr TMP, #lower16  (same as TMP = PC + #lower16)
+  static const uint32_t kAdrEncoding = (1 << 28) | (TMP << kRdShift);
+
+  // movz TMP2, #higher16 lsl 16
+  static const uint32_t kMovzEncoding = MOVZ | (1 << kHWShift) | TMP2;
+
+  // add TMP, TMP, TMP2, SXTW
+  static const uint32_t kAddTmpTmp2 = 0x8b31c210;
+
+  // br TMP
+  static const uint32_t kJumpEncoding = BR | (TMP << kRnShift);
+
+  uword pattern_start_;
 };
 
 }  // namespace dart
diff --git a/runtime/vm/instructions_x64.h b/runtime/vm/instructions_x64.h
index 8258214..b41a95d 100644
--- a/runtime/vm/instructions_x64.h
+++ b/runtime/vm/instructions_x64.h
@@ -115,6 +115,9 @@
 // callq *[rip+offset]
 class PcRelativeCallPattern : public InstructionPattern<PcRelativeCallPattern> {
  public:
+  static const intptr_t kLowerCallingRange = -(DART_UINT64_C(1) << 31);
+  static const intptr_t kUpperCallingRange = (DART_UINT64_C(1) << 31) - 1;
+
   explicit PcRelativeCallPattern(uword pc) : InstructionPattern(pc) {}
 
   int32_t distance() {
@@ -137,29 +140,50 @@
   static const int kLengthInBytes = 5;
 };
 
-// jmpq *[rip+offset]
-class PcRelativeJumpPattern : public InstructionPattern<PcRelativeJumpPattern> {
+// Instruction pattern for a tail call to a signed 32-bit PC-relative offset
+//
+// The AOT compiler can emit PC-relative calls. If the destination of such a
+// call is not in range for the "bl.<cond> <offset>" instruction, the AOT
+// compiler will emit a trampoline which is in range. That trampoline will
+// then tail-call to the final destination (also via PC-relative offset, but it
+// supports a full signed 32-bit offset).
+//
+// The pattern of the trampoline looks like:
+//
+//     jmp $rip + <offset>
+//
+// (Strictly speaking the pc-relative call distance on X64 is big enough, but
+// for making AOT relocation code (i.e. relocation.cc) platform independent and
+// allow testing of trampolines on X64 we have it nonetheless)
+class PcRelativeTrampolineJumpPattern : public ValueObject {
  public:
-  explicit PcRelativeJumpPattern(uword pc) : InstructionPattern(pc) {}
+  explicit PcRelativeTrampolineJumpPattern(uword pattern_start)
+      : pattern_start_(pattern_start) {}
+
+  void Initialize() {
+    uint8_t* pattern = reinterpret_cast<uint8_t*>(pattern_start_);
+    pattern[0] = 0xe9;
+  }
 
   int32_t distance() {
-    return *reinterpret_cast<int32_t*>(start() + 2) + kLengthInBytes;
+    return *reinterpret_cast<int32_t*>(pattern_start_ + 1) + kLengthInBytes;
   }
 
   void set_distance(int32_t distance) {
     // [distance] is relative to the start of the instruction, x64 considers the
     // offset relative to next PC.
-    *reinterpret_cast<int32_t*>(start() + 2) = distance - kLengthInBytes;
+    *reinterpret_cast<int32_t*>(pattern_start_ + 1) = distance - kLengthInBytes;
   }
 
-  static const int* pattern() {
-    static const int kPattern[kLengthInBytes] = {0xff, 0x25, -1, -1, -1, -1};
-    return kPattern;
+  bool IsValid() const {
+    uint8_t* pattern = reinterpret_cast<uint8_t*>(pattern_start_);
+    return pattern[0] == 0xe9;
   }
 
-  static int pattern_length_in_bytes() { return kLengthInBytes; }
+ private:
+  static const int kLengthInBytes = 5;
 
-  static const int kLengthInBytes = 6;
+  uword pattern_start_;
 };
 
 }  // namespace dart
diff --git a/runtime/vm/intrusive_dlist.h b/runtime/vm/intrusive_dlist.h
new file mode 100644
index 0000000..8980789
--- /dev/null
+++ b/runtime/vm/intrusive_dlist.h
@@ -0,0 +1,233 @@
+// Copyright (c) 2019, the dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE.md file.
+
+#ifndef RUNTIME_VM_INTRUSIVE_DLIST_H_
+#define RUNTIME_VM_INTRUSIVE_DLIST_H_
+
+#include "platform/assert.h"
+#include "platform/globals.h"
+
+namespace dart {
+
+// Abstraction for "intrusive" doubly-linked list in a type-safe way in C++.
+//
+// By "intrusive" we mean that a class Item containing various field members
+// has also next/previous pointers to the next/previous Item.
+//
+// To change a class Item to be part of such a doubly-linked list, one only
+// needs to inherit from IntrusiveDListEntry<Item> (in addition to the normal
+// base class).
+//
+// To change a class Item to be part of multiple doubly-linked lists, one can
+// inherit multiple times from IntrusiveDListEntry<Item, N>, each time with a
+// different value for <N>.
+//
+// To insert an object into a list, one uses a IntrusiveDList<Item, N>. It
+// provides support for insertion/iteration/deletion.
+//
+// Usage example:
+//
+//    class Base {
+//      <arbitrary state here>
+//    };
+//
+//    class Item : public Base,
+//                 public IntrusiveDListEntry<Item>,
+//                 public IntrusiveDListEntry<Item, 2> {
+//      <arbitrary state here>
+//    };
+//
+//    Item a1, a2, a3;
+//
+//    IntrusiveDList<Item> all;
+//    all.Append(a2);
+//    all.Prepend(a1);
+//    all.Append(a3);
+//
+//    IntrusiveDList<Item, 2> idle, ready;
+//    idle.Append(a1);
+//    ready.Append(a2);
+//    ready.Append(a3);
+//
+
+template <typename T, int N>
+class IntrusiveDList;
+
+template <typename T, int N = 1>
+class IntrusiveDListEntry {
+ public:
+  IntrusiveDListEntry() {}
+
+  ~IntrusiveDListEntry() {
+    // Either this is an unlinked entry or a head/anchor.
+    ASSERT((next_ == nullptr && prev_ == nullptr) ||
+           (next_ == this && prev_ == this));
+  }
+
+ private:
+  void MakeHead() {
+    ASSERT(next_ == nullptr && prev_ == nullptr);
+    next_ = prev_ = this;
+  }
+
+  // This uses the C++ compiler's ability to convert between classes inside a
+  // (possibly multi-) inheritance hierarchy.
+  //
+  // The non-typesafe C equivalent of this is:
+  //
+  //     ((uint8*)this) - offsetof(ContainerType, list_entry);
+  //
+  T* container() { return static_cast<T*>(this); }
+
+  void Append(IntrusiveDListEntry<T, N>* entry) {
+    ASSERT(entry->next_ == nullptr && entry->prev_ == nullptr);
+    entry->next_ = next_;
+    entry->prev_ = this;
+    next_ = entry;
+    entry->next_->prev_ = entry;
+  }
+
+  void Prepend(IntrusiveDListEntry<T, N>* entry) {
+    ASSERT(entry->next_ == nullptr && entry->prev_ == nullptr);
+    entry->next_ = this;
+    entry->prev_ = prev_;
+    prev_ = entry;
+    entry->prev_->next_ = entry;
+  }
+
+  void Remove() {
+    ASSERT(prev_->next_ == this);
+    ASSERT(next_->prev_ == this);
+    ASSERT(prev_ != this && next_ != this);
+
+    prev_->next_ = next_;
+    next_->prev_ = prev_;
+
+    next_ = nullptr;
+    prev_ = nullptr;
+  }
+
+  bool IsEmpty() {
+    bool result = next_ == this;
+    ASSERT(result == (prev_ == this));
+    return result;
+  }
+
+  bool IsLinked() {
+    ASSERT((next_ == nullptr) == (prev_ == nullptr));
+    return next_ != nullptr;
+  }
+
+  IntrusiveDListEntry<T, N>* Prev() { return prev_; }
+
+  IntrusiveDListEntry<T, N>* Next() { return next_; }
+
+  friend class IntrusiveDList<T, N>;
+
+  IntrusiveDListEntry<T, N>* next_ = nullptr;
+  IntrusiveDListEntry<T, N>* prev_ = nullptr;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrusiveDListEntry);
+};
+
+template <typename T, int N = 1>
+class IntrusiveDList {
+ public:
+  typedef IntrusiveDListEntry<T, N> Entry;
+
+  template <typename ContainerType, int I = 1>
+  class Iterator {
+   public:
+    Iterator(IntrusiveDList<ContainerType, I>* head,
+             IntrusiveDListEntry<ContainerType, I>* entry)
+        : head_(head), entry_(entry) {}
+
+    inline ContainerType* operator->() { return entry_->container(); }
+
+    inline ContainerType* operator*() { return entry_->container(); }
+
+    inline bool operator==(const Iterator<ContainerType, I>& other) const {
+      return entry_ == other.entry_;
+    }
+
+    inline bool operator!=(const Iterator<ContainerType, I>& other) const {
+      return !(*this == other);
+    }
+
+    inline Iterator<ContainerType, I>& operator++() {
+      entry_ = entry_->Next();
+      return *this;
+    }
+
+   private:
+    friend IntrusiveDList;
+
+    IntrusiveDList<ContainerType, I>* head_;
+    IntrusiveDListEntry<ContainerType, I>* entry_;
+  };
+
+  inline IntrusiveDList() { head_.MakeHead(); }
+
+  inline void Append(T* a) { head_.Prepend(convert(a)); }
+
+  inline void Prepend(T* a) { head_.Append(convert(a)); }
+
+  // NOTE: This function only checks whether [a] is linked inside *a*
+  // [IntrusiveDList].
+  inline bool IsInList(T* a) { return convert(a)->IsLinked(); }
+
+  inline void Remove(T* a) { convert(a)->Remove(); }
+
+  inline bool IsEmpty() { return head_.IsEmpty(); }
+
+  inline T* First() {
+    ASSERT(!IsEmpty());
+    return head_.Next()->container();
+  }
+
+  inline T* Last() {
+    ASSERT(!IsEmpty());
+    return head_.Prev()->container();
+  }
+
+  inline T* RemoveFirst() {
+    ASSERT(!IsEmpty());
+    auto entry = head_.Next();
+    T* container = entry->container();
+    entry->Remove();
+    return container;
+  }
+
+  inline T* RemoveLast() {
+    ASSERT(!IsEmpty());
+    auto entry = head_.Prev();
+    T* container = entry->container();
+    entry->Remove();
+    return container;
+  }
+
+  inline Iterator<T, N> Begin() { return Iterator<T, N>(this, head_.Next()); }
+
+  inline Iterator<T, N> End() { return Iterator<T, N>(this, &head_); }
+
+  inline Iterator<T, N> begin() { return Begin(); }
+
+  inline Iterator<T, N> end() { return End(); }
+
+  inline Iterator<T, N> Erase(const Iterator<T, N>& iterator) {
+    ASSERT(iterator.head_ == this);
+    Iterator<T, N> next(this, iterator.entry_->Next());
+    iterator.entry_->Remove();
+    return next;
+  }
+
+ private:
+  Entry head_;
+
+  Entry* convert(T* entry) { return static_cast<Entry*>(entry); }
+};
+
+}  // namespace dart.
+
+#endif  // RUNTIME_VM_INTRUSIVE_DLIST_H_
diff --git a/runtime/vm/intrusive_dlist_test.cc b/runtime/vm/intrusive_dlist_test.cc
new file mode 100644
index 0000000..c917b85
--- /dev/null
+++ b/runtime/vm/intrusive_dlist_test.cc
@@ -0,0 +1,155 @@
+// Copyright (c) 2019, the dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE.md file.
+
+#include "vm/intrusive_dlist.h"
+#include "vm/unit_test.h"
+
+namespace dart {
+
+class Base {
+ public:
+  explicit Base(int arg) : base(arg) {}
+
+  int base;
+};
+
+class Item : public Base,
+             public IntrusiveDListEntry<Item>,
+             public IntrusiveDListEntry<Item, 2> {
+ public:
+  explicit Item(int arg0, int arg1) : Base(arg0), item(arg1) {}
+
+  int item;
+};
+
+UNIT_TEST_CASE(IntrusiveDListMultiEntryTest) {
+  Item a1(1, 11), a2(2, 12), a3(3, 13);
+
+  IntrusiveDList<Item> all;
+  IntrusiveDList<Item, 2> ready;
+
+  EXPECT(all.IsEmpty());
+  EXPECT(ready.IsEmpty());
+
+  all.Append(&a2);
+  all.Append(&a3);
+  all.Prepend(&a1);
+  EXPECT_EQ(all.First()->item, 11);
+  EXPECT_EQ(all.Last()->item, 13);
+
+  ready.Append(&a1);
+  ready.Append(&a2);
+  EXPECT_EQ(ready.First()->item, 11);
+  EXPECT_EQ(ready.Last()->item, 12);
+
+  int i = 0;
+  for (auto it = all.Begin(); it != all.End(); ++it) {
+    i++;
+    EXPECT_EQ(it->base, i);
+    EXPECT_EQ((*it)->base, i);
+    EXPECT_EQ(it->item, 10 + i);
+    EXPECT_EQ((*it)->item, 10 + i);
+  }
+  EXPECT_EQ(i, 3);
+
+  i = 0;
+  for (auto it = ready.Begin(); it != ready.End(); ++it) {
+    i++;
+    EXPECT_EQ(it->base, i);
+    EXPECT_EQ((*it)->base, i);
+    EXPECT_EQ(it->item, 10 + i);
+    EXPECT_EQ((*it)->item, 10 + i);
+  }
+  EXPECT_EQ(i, 2);
+
+  ready.Remove(&a1);
+  ready.Remove(&a2);
+
+  all.Remove(&a1);
+  all.Remove(&a2);
+  all.Remove(&a3);
+
+  EXPECT(all.IsEmpty());
+  EXPECT(ready.IsEmpty());
+}
+
+UNIT_TEST_CASE(IntrusiveDListRemoveFirstTest) {
+  Item a1(1, 11), a2(2, 12), a3(3, 13);
+
+  IntrusiveDList<Item> all;
+
+  all.Append(&a2);
+  all.Append(&a3);
+  all.Prepend(&a1);
+
+  EXPECT_EQ(&a1, all.RemoveFirst());
+  EXPECT_EQ(&a2, all.RemoveFirst());
+  EXPECT_EQ(&a3, all.RemoveFirst());
+
+  EXPECT(all.IsEmpty());
+}
+
+UNIT_TEST_CASE(IntrusiveDListRemoveLastTest) {
+  Item a1(1, 11), a2(2, 12), a3(3, 13);
+
+  IntrusiveDList<Item> all;
+
+  all.Append(&a2);
+  all.Append(&a3);
+  all.Prepend(&a1);
+
+  EXPECT_EQ(&a3, all.RemoveLast());
+  EXPECT_EQ(&a2, all.RemoveLast());
+  EXPECT_EQ(&a1, all.RemoveLast());
+  EXPECT(all.IsEmpty());
+}
+
+UNIT_TEST_CASE(IntrusiveDListIsInList) {
+  Item a1(1, 11), a2(2, 12), a3(3, 13);
+
+  IntrusiveDList<Item> all;
+
+  all.Append(&a2);
+  all.Append(&a3);
+  all.Prepend(&a1);
+
+  ASSERT(all.IsInList(&a1));
+  ASSERT(all.IsInList(&a2));
+  ASSERT(all.IsInList(&a3));
+
+  EXPECT_EQ(&a1, all.RemoveFirst());
+  EXPECT(!all.IsInList(&a1));
+  EXPECT_EQ(&a3, all.RemoveLast());
+  EXPECT(!all.IsInList(&a3));
+  EXPECT_EQ(&a2, all.RemoveFirst());
+  EXPECT(!all.IsInList(&a2));
+
+  EXPECT(all.IsEmpty());
+}
+
+UNIT_TEST_CASE(IntrusiveDListEraseIterator) {
+  Item a1(1, 11), a2(2, 12), a3(3, 13);
+
+  IntrusiveDList<Item> all;
+
+  all.Append(&a2);
+  all.Append(&a3);
+  all.Prepend(&a1);
+
+  auto it = all.Begin();
+  it = all.Erase(++it);
+  EXPECT_EQ(*it, &a3);
+  EXPECT_EQ(*it, all.Last());
+
+  it = all.Erase(all.Begin());
+  EXPECT_EQ(*it, &a3);
+  EXPECT_EQ(*it, all.First());
+  EXPECT_EQ(*it, all.Last());
+
+  it = all.Erase(all.Begin());
+  EXPECT(it == all.End());
+  EXPECT(all.IsEmpty());
+}
+
+}  // namespace dart.
diff --git a/runtime/vm/vm_sources.gni b/runtime/vm/vm_sources.gni
index 82b9645..9bfbb25 100644
--- a/runtime/vm/vm_sources.gni
+++ b/runtime/vm/vm_sources.gni
@@ -119,6 +119,7 @@
   "instructions_x64.h",
   "interpreter.cc",
   "interpreter.h",
+  "intrusive_dlist.h",
   "isolate.cc",
   "isolate.h",
   "isolate_reload.cc",
@@ -394,6 +395,7 @@
   "instructions_arm_test.cc",
   "instructions_ia32_test.cc",
   "instructions_x64_test.cc",
+  "intrusive_dlist_test.cc",
   "isolate_reload_test.cc",
   "isolate_test.cc",
   "json_test.cc",