[vm] Update the profiler to account for the interpreter.

Also, remove paranoid checks from ProfilerDartStackWalker by
 - setting the VM tag to Dart only after initializing the exit slot
 - setting the VM tag to Runtime for leaf runtime function
 - bailing out when in the prologue of the invocation stub's callee

Change-Id: Ifd1caee2203f8863b17fc7d0072de32290fd0e60
Reviewed-on: https://dart-review.googlesource.com/c/84140
Commit-Queue: Ryan Macnak <rmacnak@google.com>
Reviewed-by: RĂ©gis Crelier <regis@google.com>
diff --git a/runtime/vm/interpreter.cc b/runtime/vm/interpreter.cc
index 3ef8dad..f79d1c5 100644
--- a/runtime/vm/interpreter.cc
+++ b/runtime/vm/interpreter.cc
@@ -1086,6 +1086,7 @@
   *pc = reinterpret_cast<uint32_t*>(bytecode->ptr()->entry_point_);
   pc_ = reinterpret_cast<uword>(*pc);  // For the profiler.
   *FP = callee_fp;
+  fp_ = callee_fp;  // For the profiler.
   *SP = *FP - 1;
   return true;
 }
@@ -1371,6 +1372,7 @@
 #define HANDLE_RETURN                                                          \
   do {                                                                         \
     pp_ = InterpreterHelpers::FrameCode(FP)->ptr()->object_pool_;              \
+    fp_ = FP; /* For the profiler. */                                          \
   } while (0)
 
 // Runtime call helpers: handle invocation and potential exception after return.
@@ -1461,6 +1463,7 @@
   // FrameArguments(...) returns a pointer to the first argument.
   *SP = FrameArguments(*FP, materialization_arg_count) - 1;
   *FP = SavedCallerFP(*FP);
+  fp_ = *FP;  // For the profiler.
 
   // Restore pp.
   pp_ = InterpreterHelpers::FrameCode(*FP)->ptr()->object_pool_;
@@ -1631,14 +1634,6 @@
   }
 #endif
 
-  // Save current VM tag and mark thread as executing Dart code.
-  const uword vm_tag = thread->vm_tag();
-  thread->set_vm_tag(VMTag::kDartInterpretedTagId);
-
-  // Save current top stack resource and reset the list.
-  StackResource* top_resource = thread->top_resource();
-  thread->set_top_resource(NULL);
-
   // Setup entry frame:
   //
   //                        ^
@@ -1693,8 +1688,19 @@
   // object pool.
   pc = reinterpret_cast<uint32_t*>(bytecode->ptr()->entry_point_);
   pc_ = reinterpret_cast<uword>(pc);  // For the profiler.
+  fp_ = FP;                           // For the profiler.
   pp_ = bytecode->ptr()->object_pool_;
 
+  // Save current VM tag and mark thread as executing Dart code. For the
+  // profiler, do this *after* setting up the entry frame (compare the machine
+  // code entry stubs).
+  const uword vm_tag = thread->vm_tag();
+  thread->set_vm_tag(VMTag::kDartInterpretedTagId);
+
+  // Save current top stack resource and reset the list.
+  StackResource* top_resource = thread->top_resource();
+  thread->set_top_resource(NULL);
+
   // Cache some frequently used values in the frame.
   RawBool* true_value = Bool::True().raw();
   RawBool* false_value = Bool::False().raw();
@@ -2365,6 +2371,7 @@
     // Restore SP, FP and PP. Push result and dispatch.
     SP = FrameArguments(FP, argc);
     FP = SavedCallerFP(FP);
+    fp_ = FP;  // For the profiler.
     pp_ = InterpreterHelpers::FrameCode(FP)->ptr()->object_pool_;
     *SP = result;
     DISPATCH();
@@ -2918,6 +2925,7 @@
     SP = FrameArguments(FP, 0);
     RawObject** args = SP - argc;
     FP = SavedCallerFP(FP);
+    fp_ = FP;  // For the profiler.
     if (has_dart_caller) {
       pp_ = InterpreterHelpers::FrameCode(FP)->ptr()->object_pool_;
     }
diff --git a/runtime/vm/object.cc b/runtime/vm/object.cc
index de5035e..a158cd3 100644
--- a/runtime/vm/object.cc
+++ b/runtime/vm/object.cc
@@ -15205,6 +15205,16 @@
   return obj.IsFunction();
 }
 
+bool Code::IsBytecode() const {
+#if defined(DART_PRECOMPILED_RUNTIME)
+  return false;
+#else
+  const Object& obj = Object::Handle(owner());
+  if (!obj.IsFunction()) return false;
+  return Function::Cast(obj).Bytecode() == raw();
+#endif
+}
+
 void Code::DisableDartCode() const {
   DEBUG_ASSERT(IsMutatorOrAtSafepoint());
   ASSERT(IsFunctionCode());
@@ -15283,7 +15293,7 @@
     GrowableArray<TokenPosition>* token_positions) const {
   const CodeSourceMap& map = CodeSourceMap::Handle(code_source_map());
   if (map.IsNull()) {
-    ASSERT(!IsFunctionCode() ||
+    ASSERT(!IsFunctionCode() || IsBytecode() ||
            (Isolate::Current()->object_store()->megamorphic_miss_code() ==
             this->raw()));
     return;  // VM stub, allocation stub, or megamorphic miss function.
diff --git a/runtime/vm/object.h b/runtime/vm/object.h
index ea227ec..dd36b85 100644
--- a/runtime/vm/object.h
+++ b/runtime/vm/object.h
@@ -5183,6 +5183,7 @@
   bool IsAllocationStubCode() const;
   bool IsStubCode() const;
   bool IsFunctionCode() const;
+  bool IsBytecode() const;
 
   void DisableDartCode() const;
 
diff --git a/runtime/vm/profiler.cc b/runtime/vm/profiler.cc
index ce581ef..f5b9a86 100644
--- a/runtime/vm/profiler.cc
+++ b/runtime/vm/profiler.cc
@@ -520,7 +520,6 @@
                           uword pc,
                           uword fp,
                           uword sp,
-                          bool exited_dart_code,
                           bool allocation_sample,
                           intptr_t skip_count = 0)
       : ProfilerStackWalker((thread->isolate() != NULL)
@@ -529,127 +528,126 @@
                             sample,
                             sample_buffer,
                             skip_count),
+        thread_(thread),
         pc_(reinterpret_cast<uword*>(pc)),
         fp_(reinterpret_cast<uword*>(fp)),
         sp_(reinterpret_cast<uword*>(sp)),
         stack_upper_(stack_upper),
-        stack_lower_(stack_lower),
-        has_exit_frame_(exited_dart_code) {
-    if (exited_dart_code) {
-// On Windows and Fuchsia the profiler does not run on the thread being
-// profiled.
-#if defined(HOST_OS_WINDOWS) || defined(HOST_OS_FUCHSIA)
-      const StackFrameIterator::CrossThreadPolicy cross_thread_policy =
-          StackFrameIterator::kAllowCrossThreadIteration;
+        stack_lower_(stack_lower) {}
+
+  bool IsInterpretedFrame(uword* fp) {
+#if defined(DART_PRECOMPILED_RUNTIME)
+    return false;
 #else
-      const StackFrameIterator::CrossThreadPolicy cross_thread_policy =
-          StackFrameIterator::kNoCrossThreadIteration;
+    Interpreter* interpreter = thread_->interpreter();
+    if (interpreter == nullptr) return false;
+    return interpreter->HasFrame(reinterpret_cast<uword>(fp));
 #endif
-      StackFrameIterator iterator(ValidationPolicy::kDontValidateFrames, thread,
-                                  cross_thread_policy);
-      pc_ = NULL;
-      fp_ = NULL;
-      sp_ = NULL;
-      if (!iterator.HasNextFrame()) {
-        return;
-      }
-      // Ensure we are able to get to the exit frame.
-      StackFrame* frame = iterator.NextFrame();
-      if (!frame->IsExitFrame()) {
-        return;
-      }
-      // Skip the exit frame.
-      if (!iterator.HasNextFrame()) {
-        return;
-      }
-      frame = iterator.NextFrame();
-      // Record frame details of the first frame from which we start walking.
-      pc_ = reinterpret_cast<uword*>(frame->pc());
-      fp_ = reinterpret_cast<uword*>(frame->fp());
-      sp_ = reinterpret_cast<uword*>(frame->sp());
-      is_interpreted_frame_ = frame->is_interpreted();
-    }
   }
 
   void walk() {
-    sample_->set_exit_frame_sample(has_exit_frame_);
-    if (!ValidFramePointer()) {
+    RELEASE_ASSERT(StubCode::HasBeenInitialized());
+    if (thread_->isolate()->IsDeoptimizing()) {
       sample_->set_ignore_sample(true);
       return;
     }
-    ASSERT(ValidFramePointer());
-    uword return_pc = InitialReturnAddress();
-    if (StubCode::InInvocationStub(return_pc, is_interpreted_frame_)) {
-      // Edge case- we have called out from the Invocation Stub but have not
-      // created the stack frame of the callee. Attempt to locate the exit
-      // frame before walking the stack.
-      if (!NextExit() || !ValidFramePointer()) {
-        // Nothing to sample.
+
+    uword* exit_fp = reinterpret_cast<uword*>(thread_->top_exit_frame_info());
+    bool in_interpreted_frame;
+    bool has_exit_frame = exit_fp != 0;
+    if (has_exit_frame) {
+      if (IsInterpretedFrame(exit_fp)) {
+        // Exited from interpreter.
+        pc_ = 0;
+        fp_ = exit_fp;
+        in_interpreted_frame = true;
+        RELEASE_ASSERT(IsInterpretedFrame(fp_));
+      } else {
+        // Exited from compiled code.
+        pc_ = 0;
+        fp_ = exit_fp;
+        in_interpreted_frame = false;
+      }
+
+      // Skip exit frame.
+      pc_ = CallerPC(in_interpreted_frame);
+      fp_ = CallerFP(in_interpreted_frame);
+
+      // Can only move between interpreted and compiled frames after an exit
+      // frame.
+      RELEASE_ASSERT(IsInterpretedFrame(fp_) == in_interpreted_frame);
+    } else {
+      if (thread_->vm_tag() == VMTag::kDartCompiledTagId) {
+        // Running compiled code.
+        // Use the FP and PC from the thread interrupt or simulator; already set
+        // in the constructor.
+        in_interpreted_frame = false;
+      } else if (thread_->vm_tag() == VMTag::kDartInterpretedTagId) {
+        // Running interpreter.
+#if defined(DART_PRECOMPILED_RUNTIME)
+        UNREACHABLE();
+#else
+        pc_ = reinterpret_cast<uword*>(thread_->interpreter()->get_pc());
+        fp_ = reinterpret_cast<uword*>(thread_->interpreter()->get_fp());
+#endif
+        in_interpreted_frame = true;
+        RELEASE_ASSERT(IsInterpretedFrame(fp_));
+      } else {
+        // No Dart on the stack; caller shouldn't use this walker.
+        UNREACHABLE();
+      }
+    }
+
+    sample_->set_exit_frame_sample(has_exit_frame);
+
+    for (;;) {
+      // Skip entry frame.
+      if (StubCode::InInvocationStub(reinterpret_cast<uword>(pc_),
+                                     in_interpreted_frame)) {
+        pc_ = 0;
+        fp_ = ExitLink(in_interpreted_frame);
+        if (fp_ == 0) {
+          break;  // End of Dart stack.
+        }
+        in_interpreted_frame = IsInterpretedFrame(fp_);
+
+        // Skip exit frame.
+        pc_ = CallerPC(in_interpreted_frame);
+        fp_ = CallerFP(in_interpreted_frame);
+
+        // At least one frame between exit and next entry frame.
+        RELEASE_ASSERT(!StubCode::InInvocationStub(reinterpret_cast<uword>(pc_),
+                                                   in_interpreted_frame));
+      }
+
+#if !defined(TARGET_ARCH_DBC)
+      RawCode* marker = PCMarker(in_interpreted_frame);
+      if (marker == StubCode::InvokeDartCode_entry()->code() ||
+          marker == StubCode::InvokeDartCodeFromBytecode_entry()->code()) {
+        // During the prologue of a function, CallerPC will return the caller's
+        // caller. For most frames, the missing PC will be added during profile
+        // processing. However, during this stack walk, it can cause us to fail
+        // to identify the entry frame and lead the stack walk into the weeds.
+        RELEASE_ASSERT(!has_exit_frame);
         sample_->set_ignore_sample(true);
         return;
       }
-    }
-    while (true) {
+#endif
+
       if (!Append(reinterpret_cast<uword>(pc_))) {
-        return;
+        break;  // Sample is full.
       }
-      if (!Next()) {
-        return;
-      }
+
+      pc_ = CallerPC(in_interpreted_frame);
+      fp_ = CallerFP(in_interpreted_frame);
+
+      // Can only move between interpreted and compiled frames after an exit
+      // frame.
+      RELEASE_ASSERT(IsInterpretedFrame(fp_) == in_interpreted_frame);
     }
   }
 
  private:
-  bool Next() {
-    if (!ValidFramePointer()) {
-      return false;
-    }
-    if (StubCode::InInvocationStub(reinterpret_cast<uword>(pc_),
-                                   is_interpreted_frame_)) {
-      // In invocation stub.
-      return NextExit();
-    }
-    // In regular Dart frame.
-    uword* new_pc = CallerPC();
-    uword* new_fp = CallerFP();
-    if (!IsCalleeFrameOf(reinterpret_cast<uword>(new_fp),
-                         reinterpret_cast<uword>(fp_))) {
-      // FP didn't move to a caller (higher address on most architectures).
-      return false;
-    }
-    // Success, update fp and pc.
-    fp_ = new_fp;
-    pc_ = new_pc;
-    if (StubCode::InInvocationStub(reinterpret_cast<uword>(pc_),
-                                   is_interpreted_frame_)) {
-      // In invocation stub.
-      return NextExit();
-    }
-    return true;
-  }
-
-  bool NextExit() {
-    if (!ValidFramePointer()) {
-      return false;
-    }
-    uword* new_fp = ExitLink();
-    if (new_fp == NULL) {
-      // No exit link.
-      return false;
-    }
-    if (new_fp <= fp_) {
-      // FP didn't move to a higher address.
-      return false;
-    }
-    if (!ValidFramePointer(new_fp)) {
-      return false;
-    }
-    // Success, update fp and pc.
-    fp_ = new_fp;
-    pc_ = CallerPC();
-    return true;
-  }
-
   uword InitialReturnAddress() const {
     ASSERT(sp_ != NULL);
     // MSan/ASan are unaware of frames initialized by generated code.
@@ -658,27 +656,44 @@
     return *(sp_);
   }
 
-  uword* CallerPC() const {
+  uword* CallerPC(bool interp) const {
     ASSERT(fp_ != NULL);
-    uword* caller_pc_ptr = fp_ + kSavedCallerPcSlotFromFp;
+    uword* caller_pc_ptr =
+        fp_ + (interp ? kKBCSavedCallerPcSlotFromFp : kSavedCallerPcSlotFromFp);
     // MSan/ASan are unaware of frames initialized by generated code.
     MSAN_UNPOISON(caller_pc_ptr, kWordSize);
     ASAN_UNPOISON(caller_pc_ptr, kWordSize);
     return reinterpret_cast<uword*>(*caller_pc_ptr);
   }
 
-  uword* CallerFP() const {
+  uword* CallerFP(bool interp) const {
     ASSERT(fp_ != NULL);
-    uword* caller_fp_ptr = fp_ + kSavedCallerFpSlotFromFp;
+    uword* caller_fp_ptr =
+        fp_ + (interp ? kKBCSavedCallerFpSlotFromFp : kSavedCallerFpSlotFromFp);
     // MSan/ASan are unaware of frames initialized by generated code.
     MSAN_UNPOISON(caller_fp_ptr, kWordSize);
     ASAN_UNPOISON(caller_fp_ptr, kWordSize);
     return reinterpret_cast<uword*>(*caller_fp_ptr);
   }
 
-  uword* ExitLink() const {
+  RawCode* PCMarker(bool interp) const {
     ASSERT(fp_ != NULL);
-    uword* exit_link_ptr = fp_ + kExitLinkSlotFromEntryFp;
+    if (interp) {
+      // We don't need this extra check for the interpreter because its frame
+      // build is atomic from the profiler's point of view.
+      return NULL;
+    }
+    uword* pc_marker_ptr = fp_ + kPcMarkerSlotFromFp;
+    // MSan/ASan are unaware of frames initialized by generated code.
+    MSAN_UNPOISON(pc_marker_ptr, kWordSize);
+    ASAN_UNPOISON(pc_marker_ptr, kWordSize);
+    return reinterpret_cast<RawCode*>(*pc_marker_ptr);
+  }
+
+  uword* ExitLink(bool interp) const {
+    ASSERT(fp_ != NULL);
+    uword* exit_link_ptr =
+        fp_ + (interp ? kKBCExitLinkSlotFromEntryFp : kExitLinkSlotFromEntryFp);
     // MSan/ASan are unaware of frames initialized by generated code.
     MSAN_UNPOISON(exit_link_ptr, kWordSize);
     ASAN_UNPOISON(exit_link_ptr, kWordSize);
@@ -696,13 +711,12 @@
     return (cursor >= stack_lower_) && (cursor < stack_upper_);
   }
 
+  Thread* const thread_;
   uword* pc_;
   uword* fp_;
   uword* sp_;
-  bool is_interpreted_frame_;
   const uword stack_upper_;
   const uword stack_lower_;
-  bool has_exit_frame_;
 };
 
 // If the VM is compiled without frame pointers (which is the default on
@@ -1186,7 +1200,7 @@
   } else if (exited_dart_code) {
     ProfilerDartStackWalker dart_exit_stack_walker(
         thread, sample, sample_buffer, stack_lower, stack_upper, pc, fp, sp,
-        exited_dart_code, true);
+        /* allocation_sample*/ true);
     dart_exit_stack_walker.walk();
   } else {
     // Fall back.
@@ -1387,7 +1401,7 @@
   const bool exited_dart_code = thread->HasExitedDartCode();
   ProfilerDartStackWalker dart_stack_walker(thread, sample, sample_buffer,
                                             stack_lower, stack_upper, pc, fp,
-                                            sp, exited_dart_code, false);
+                                            sp, /* allocation_sample*/ false);
 
   // All memory access is done inside CollectSample.
   CollectSample(isolate, exited_dart_code, in_dart_code, sample,
diff --git a/runtime/vm/runtime_entry_arm.cc b/runtime/vm/runtime_entry_arm.cc
index 8f44e23..4b19f0d 100644
--- a/runtime/vm/runtime_entry_arm.cc
+++ b/runtime/vm/runtime_entry_arm.cc
@@ -45,7 +45,11 @@
 void RuntimeEntry::Call(Assembler* assembler, intptr_t argument_count) const {
   if (is_leaf()) {
     ASSERT(argument_count == this->argument_count());
-    __ BranchLinkOffset(THR, Thread::OffsetFromThread(this));
+    __ LoadFromOffset(kWord, TMP, THR, Thread::OffsetFromThread(this));
+    __ str(TMP, Address(THR, Thread::vm_tag_offset()));
+    __ blx(TMP);
+    __ LoadImmediate(TMP, VMTag::kDartCompiledTagId);
+    __ str(TMP, Address(THR, Thread::vm_tag_offset()));
   } else {
     // Argument count is not checked here, but in the runtime entry for a more
     // informative error message.
diff --git a/runtime/vm/runtime_entry_arm64.cc b/runtime/vm/runtime_entry_arm64.cc
index 04a80e6..250e05f 100644
--- a/runtime/vm/runtime_entry_arm64.cc
+++ b/runtime/vm/runtime_entry_arm64.cc
@@ -61,7 +61,10 @@
     __ ReserveAlignedFrameSpace(0);
     __ mov(CSP, SP);
     __ ldr(TMP, Address(THR, Thread::OffsetFromThread(this)));
+    __ str(TMP, Address(THR, Thread::vm_tag_offset()));
     __ blr(TMP);
+    __ LoadImmediate(TMP, VMTag::kDartCompiledTagId);
+    __ str(TMP, Address(THR, Thread::vm_tag_offset()));
     __ mov(SP, R25);
     __ mov(CSP, R23);
   } else {
diff --git a/runtime/vm/runtime_entry_ia32.cc b/runtime/vm/runtime_entry_ia32.cc
index 917f6cc..26f300e 100644
--- a/runtime/vm/runtime_entry_ia32.cc
+++ b/runtime/vm/runtime_entry_ia32.cc
@@ -30,8 +30,10 @@
 void RuntimeEntry::Call(Assembler* assembler, intptr_t argument_count) const {
   if (is_leaf()) {
     ASSERT(argument_count == this->argument_count());
-    ExternalLabel label(GetEntryPoint());
-    __ call(&label);
+    __ movl(EAX, Immediate(GetEntryPoint()));
+    __ movl(Assembler::VMTagAddress(), EAX);
+    __ call(EAX);
+    __ movl(Assembler::VMTagAddress(), Immediate(VMTag::kDartCompiledTagId));
   } else {
     // Argument count is not checked here, but in the runtime entry for a more
     // informative error message.
diff --git a/runtime/vm/runtime_entry_x64.cc b/runtime/vm/runtime_entry_x64.cc
index 105d71c..0875c69 100644
--- a/runtime/vm/runtime_entry_x64.cc
+++ b/runtime/vm/runtime_entry_x64.cc
@@ -29,7 +29,9 @@
     ASSERT(argument_count == this->argument_count());
     COMPILE_ASSERT(CallingConventions::kVolatileCpuRegisters & (1 << RAX));
     __ movq(RAX, Address(THR, Thread::OffsetFromThread(this)));
+    __ movq(Assembler::VMTagAddress(), RAX);
     __ CallCFunction(RAX);
+    __ movq(Assembler::VMTagAddress(), Immediate(VMTag::kDartCompiledTagId));
   } else {
     // Argument count is not checked here, but in the runtime entry for a more
     // informative error message.
diff --git a/runtime/vm/stub_code_arm.cc b/runtime/vm/stub_code_arm.cc
index 4d2a63a..7209ac5 100644
--- a/runtime/vm/stub_code_arm.cc
+++ b/runtime/vm/stub_code_arm.cc
@@ -967,10 +967,6 @@
   __ LoadFromOffset(kWord, R9, THR, Thread::vm_tag_offset());
   __ Push(R9);
 
-  // Mark that the thread is executing Dart code.
-  __ LoadImmediate(R9, VMTag::kDartCompiledTagId);
-  __ StoreToOffset(kWord, R9, THR, Thread::vm_tag_offset());
-
   // Save top resource and top exit frame info. Use R4-6 as temporary registers.
   // StackFrameIterator reads the top exit frame info saved in this frame.
   __ LoadFromOffset(kWord, R9, THR, Thread::top_exit_frame_info_offset());
@@ -988,6 +984,11 @@
 #endif
   __ Push(R9);
 
+  // Mark that the thread is executing Dart code. Do this after initializing the
+  // exit link for the profiler.
+  __ LoadImmediate(R9, VMTag::kDartCompiledTagId);
+  __ StoreToOffset(kWord, R9, THR, Thread::vm_tag_offset());
+
   // Load arguments descriptor array into R4, which is passed to Dart code.
   __ ldr(R4, Address(R1, VMHandles::kOffsetOfRawPtrInHandle));
 
diff --git a/runtime/vm/stub_code_arm64.cc b/runtime/vm/stub_code_arm64.cc
index e980206..f1ff4f6 100644
--- a/runtime/vm/stub_code_arm64.cc
+++ b/runtime/vm/stub_code_arm64.cc
@@ -1048,10 +1048,6 @@
   __ LoadFromOffset(R4, THR, Thread::vm_tag_offset());
   __ Push(R4);
 
-  // Mark that the thread is executing Dart code.
-  __ LoadImmediate(R6, VMTag::kDartCompiledTagId);
-  __ StoreToOffset(R6, THR, Thread::vm_tag_offset());
-
   // Save top resource and top exit frame info. Use R6 as a temporary register.
   // StackFrameIterator reads the top exit frame info saved in this frame.
   __ LoadFromOffset(R6, THR, Thread::top_resource_offset());
@@ -1063,6 +1059,11 @@
   ASSERT(kExitLinkSlotFromEntryFp == -22);
   __ Push(R6);
 
+  // Mark that the thread is executing Dart code. Do this after initializing the
+  // exit link for the profiler.
+  __ LoadImmediate(R6, VMTag::kDartCompiledTagId);
+  __ StoreToOffset(R6, THR, Thread::vm_tag_offset());
+
   // Load arguments descriptor array into R4, which is passed to Dart code.
   __ LoadFromOffset(R4, R1, VMHandles::kOffsetOfRawPtrInHandle);
 
@@ -1187,10 +1188,6 @@
   __ LoadFromOffset(R4, THR, Thread::vm_tag_offset());
   __ Push(R4);
 
-  // Mark that the thread is executing Dart code.
-  __ LoadImmediate(R6, VMTag::kDartCompiledTagId);
-  __ StoreToOffset(R6, THR, Thread::vm_tag_offset());
-
   // Save top resource and top exit frame info. Use R6 as a temporary register.
   // StackFrameIterator reads the top exit frame info saved in this frame.
   __ LoadFromOffset(R6, THR, Thread::top_resource_offset());
@@ -1202,6 +1199,11 @@
   ASSERT(kExitLinkSlotFromEntryFp == -22);
   __ Push(R6);
 
+  // Mark that the thread is executing Dart code. Do this after initializing the
+  // exit link for the profiler.
+  __ LoadImmediate(R6, VMTag::kDartCompiledTagId);
+  __ StoreToOffset(R6, THR, Thread::vm_tag_offset());
+
   // Load arguments descriptor array into R4, which is passed to Dart code.
   __ mov(R4, R1);
 
diff --git a/runtime/vm/stub_code_ia32.cc b/runtime/vm/stub_code_ia32.cc
index 48024d7..bab20db 100644
--- a/runtime/vm/stub_code_ia32.cc
+++ b/runtime/vm/stub_code_ia32.cc
@@ -756,9 +756,6 @@
   __ movl(ECX, Assembler::VMTagAddress());
   __ pushl(ECX);
 
-  // Mark that the thread is executing Dart code.
-  __ movl(Assembler::VMTagAddress(), Immediate(VMTag::kDartCompiledTagId));
-
   // Save top resource and top exit frame info. Use EDX as a temporary register.
   // StackFrameIterator reads the top exit frame info saved in this frame.
   __ movl(EDX, Address(THR, Thread::top_resource_offset()));
@@ -771,6 +768,10 @@
   __ pushl(EDX);
   __ movl(Address(THR, Thread::top_exit_frame_info_offset()), Immediate(0));
 
+  // Mark that the thread is executing Dart code. Do this after initializing the
+  // exit link for the profiler.
+  __ movl(Assembler::VMTagAddress(), Immediate(VMTag::kDartCompiledTagId));
+
   // Load arguments descriptor array into EDX.
   __ movl(EDX, Address(EBP, kArgumentsDescOffset));
   __ movl(EDX, Address(EDX, VMHandles::kOffsetOfRawPtrInHandle));
diff --git a/runtime/vm/stub_code_x64.cc b/runtime/vm/stub_code_x64.cc
index fbf6e94..1ce8502 100644
--- a/runtime/vm/stub_code_x64.cc
+++ b/runtime/vm/stub_code_x64.cc
@@ -964,9 +964,6 @@
   __ movq(RAX, Assembler::VMTagAddress());
   __ pushq(RAX);
 
-  // Mark that the thread is executing Dart code.
-  __ movq(Assembler::VMTagAddress(), Immediate(VMTag::kDartCompiledTagId));
-
   // Save top resource and top exit frame info. Use RAX as a temporary register.
   // StackFrameIterator reads the top exit frame info saved in this frame.
   __ movq(RAX, Address(THR, Thread::top_resource_offset()));
@@ -990,6 +987,10 @@
 
   __ movq(Address(THR, Thread::top_exit_frame_info_offset()), Immediate(0));
 
+  // Mark that the thread is executing Dart code. Do this after initializing the
+  // exit link for the profiler.
+  __ movq(Assembler::VMTagAddress(), Immediate(VMTag::kDartCompiledTagId));
+
   // Load arguments descriptor array into R10, which is passed to Dart code.
   __ movq(R10, Address(kArgDescReg, VMHandles::kOffsetOfRawPtrInHandle));
 
@@ -1105,9 +1106,6 @@
   __ movq(RAX, Assembler::VMTagAddress());
   __ pushq(RAX);
 
-  // Mark that the thread is executing Dart code.
-  __ movq(Assembler::VMTagAddress(), Immediate(VMTag::kDartCompiledTagId));
-
   // Save top resource and top exit frame info. Use RAX as a temporary register.
   // StackFrameIterator reads the top exit frame info saved in this frame.
   __ movq(RAX, Address(THR, Thread::top_resource_offset()));
@@ -1130,6 +1128,10 @@
   }
 #endif
 
+  // Mark that the thread is executing Dart code. Do this after initializing the
+  // exit link for the profiler.
+  __ movq(Assembler::VMTagAddress(), Immediate(VMTag::kDartCompiledTagId));
+
   // Load arguments descriptor array into R10, which is passed to Dart code.
   __ movq(R10, kArgDescReg);