[vm/bytecode] Trigger JIT-compilation from interpreter

Performance on Richards benchmark (no bytecode in platform, release build):

Interpreter (--enable-interpreter --compilation-counter-threshold=-1)
Richards(RunTime): 177228.34433333334 us.

Interpreter + bytecode compilation with default threshold 10 (--enable-interpreter)
Richards(RunTime): 930.8050725919032 us.
Varies from run to run, spikes up to 4178.005912317328 (no OSR yet, only background compilation).

Bytecode compilation (--use-bytecode-compiler)
Richards(RunTime): 830.1371626556016 us

Default pipeline:
Richards(RunTime): 880.1557113946327 us.

Numbers on Richards benchmark (platform with bytecode, release build):

Interpreter (--enable-interpreter --compilation-counter-threshold=-1)
Richards(RunTime): 354553.6346666667 us.

Interpreter + compilation with default threshold 10 (--enable-interpreter)
Richards(RunTime): 934.7108855140189 us.

Bytecode compilation (--use-bytecode-compiler)
Richards(RunTime): 822.74178403949 us.


Change-Id: Ic13e9f7c7d98f2930e6de4c4f74bac4d28478249
Reviewed-on: https://dart-review.googlesource.com/75781
Reviewed-by: Zach Anderson <zra@google.com>
Reviewed-by: RĂ©gis Crelier <regis@google.com>
Commit-Queue: Alexander Markov <alexmarkov@google.com>
diff --git a/runtime/vm/compiler/backend/flow_graph.h b/runtime/vm/compiler/backend/flow_graph.h
index a027b8a..3a87672 100644
--- a/runtime/vm/compiler/backend/flow_graph.h
+++ b/runtime/vm/compiler/backend/flow_graph.h
@@ -128,9 +128,12 @@
   }
 
   intptr_t CurrentContextEnvIndex() const {
-    return FLAG_use_bytecode_compiler
-               ? -1
-               : EnvIndex(parsed_function().current_context_var());
+#if !defined(DART_PRECOMPILED_RUNTIME)
+    if (function().HasBytecode()) {
+      return -1;
+    }
+#endif  // !defined(DART_PRECOMPILED_RUNTIME)
+    return EnvIndex(parsed_function().current_context_var());
   }
 
   intptr_t RawTypeArgumentEnvIndex() const {
diff --git a/runtime/vm/compiler/backend/flow_graph_compiler.cc b/runtime/vm/compiler/backend/flow_graph_compiler.cc
index efb9313..cc2eb01 100644
--- a/runtime/vm/compiler/backend/flow_graph_compiler.cc
+++ b/runtime/vm/compiler/backend/flow_graph_compiler.cc
@@ -1038,7 +1038,7 @@
 
 void FlowGraphCompiler::FinalizeVarDescriptors(const Code& code) {
   // TODO(alexmarkov): revise local vars descriptors when compiling bytecode
-  if (code.is_optimized() || FLAG_use_bytecode_compiler) {
+  if (code.is_optimized() || flow_graph().function().HasBytecode()) {
     // Optimized code does not need variable descriptors. They are
     // only stored in the unoptimized version.
     code.set_var_descriptors(Object::empty_var_descriptors());
diff --git a/runtime/vm/compiler/backend/il.cc b/runtime/vm/compiler/backend/il.cc
index 35c7077..0dd586c 100644
--- a/runtime/vm/compiler/backend/il.cc
+++ b/runtime/vm/compiler/backend/il.cc
@@ -3991,7 +3991,7 @@
   }
 
 #if !defined(TARGET_ARCH_DBC)
-  if ((compiler->is_optimizing() || FLAG_use_bytecode_compiler) &&
+  if ((compiler->is_optimizing() || compiler->function().HasBytecode()) &&
       HasICData()) {
     ASSERT(HasICData());
     if (ic_data()->NumberOfUsedChecks() > 0) {
diff --git a/runtime/vm/compiler/backend/type_propagator.cc b/runtime/vm/compiler/backend/type_propagator.cc
index 67ac074..6f63f4d 100644
--- a/runtime/vm/compiler/backend/type_propagator.cc
+++ b/runtime/vm/compiler/backend/type_propagator.cc
@@ -938,7 +938,7 @@
     return CompileType::Dynamic();
   }
 
-  if (FLAG_use_bytecode_compiler &&
+  if (function.HasBytecode() &&
       graph_entry->parsed_function().node_sequence() == nullptr) {
     // TODO(alexmarkov): Consider adding node_sequence() and scope.
     return CompileType::Dynamic();
diff --git a/runtime/vm/compiler/frontend/bytecode_flow_graph_builder.cc b/runtime/vm/compiler/frontend/bytecode_flow_graph_builder.cc
index 6cacf81..2499872 100644
--- a/runtime/vm/compiler/frontend/bytecode_flow_graph_builder.cc
+++ b/runtime/vm/compiler/frontend/bytecode_flow_graph_builder.cc
@@ -1303,10 +1303,8 @@
 }
 
 FlowGraph* BytecodeFlowGraphBuilder::BuildGraph() {
-  if (function().is_native()) {
-    // Use default flow graph builder for native methods.
-    return nullptr;
-  }
+  // Use default flow graph builder for native methods.
+  ASSERT(!function().is_native());
 
   const Code& bytecode = Code::Handle(Z, function().Bytecode());
 
diff --git a/runtime/vm/compiler/frontend/kernel_binary_flowgraph.cc b/runtime/vm/compiler/frontend/kernel_binary_flowgraph.cc
index 5f0e767..38a9a18 100644
--- a/runtime/vm/compiler/frontend/kernel_binary_flowgraph.cc
+++ b/runtime/vm/compiler/frontend/kernel_binary_flowgraph.cc
@@ -1909,7 +1909,8 @@
 
   SetOffset(kernel_offset);
 
-  if (FLAG_use_bytecode_compiler && function.IsBytecodeAllowed(Z)) {
+  if ((FLAG_use_bytecode_compiler || FLAG_enable_interpreter) &&
+      function.IsBytecodeAllowed(Z) && !function.is_native()) {
     if (!function.HasBytecode()) {
       bytecode_metadata_helper_.ReadMetadata(function);
     }
@@ -1918,9 +1919,8 @@
           flow_graph_builder_, parsed_function(),
           &(flow_graph_builder_->ic_data_array_));
       FlowGraph* flow_graph = bytecode_compiler.BuildGraph();
-      if (flow_graph != nullptr) {
-        return flow_graph;
-      }
+      ASSERT(flow_graph != nullptr);
+      return flow_graph;
     }
   }
 
diff --git a/runtime/vm/compiler/jit/compiler.cc b/runtime/vm/compiler/jit/compiler.cc
index 8c9eff9..30a6d36 100644
--- a/runtime/vm/compiler/jit/compiler.cc
+++ b/runtime/vm/compiler/jit/compiler.cc
@@ -140,8 +140,7 @@
 
 bool UseKernelFrontEndFor(ParsedFunction* parsed_function) {
   const Function& function = parsed_function->function();
-  return (function.kernel_offset() > 0) ||
-         (FLAG_use_bytecode_compiler && function.HasBytecode()) ||
+  return (function.kernel_offset() > 0) || function.HasBytecode() ||
          (function.kind() == RawFunction::kNoSuchMethodDispatcher) ||
          (function.kind() == RawFunction::kInvokeFieldDispatcher);
 }
@@ -262,6 +261,10 @@
     if (function.HasBytecode()) {
       // Verify that InterpretCall stub code was installed.
       ASSERT(function.CurrentCode() == StubCode::InterpretCall_entry()->code());
+      // If interpreter is enabled and there is bytecode, LazyCompile stub
+      // (which calls CompileFunction) should proceed to InterpretCall in order
+      // to enter interpreter. In such case, compilation is postponed and
+      // triggered by interpreter later via OptimizeInvokedFunction.
       return;
     }
     // No bytecode, fall back to compilation.
@@ -1008,7 +1011,7 @@
     CompileParsedFunctionHelper helper(parsed_function, optimized, osr_id);
 
     if (Compiler::IsBackgroundCompilation()) {
-      ASSERT(optimized && function.is_background_optimizable());
+      ASSERT(function.is_background_optimizable());
       if (isolate->IsTopLevelParsing() ||
           (loading_invalidation_gen_at_start !=
            isolate->loading_invalidation_gen())) {
@@ -1027,48 +1030,49 @@
         function.SetWasCompiled(true);
       }
     } else {
-      if (optimized) {
-        if (Compiler::IsBackgroundCompilation()) {
-          // Try again later, background compilation may abort because of
-          // state change during compilation.
-          if (FLAG_trace_compiler) {
-            THR_Print("Aborted background compilation: %s\n",
-                      function.ToFullyQualifiedCString());
-          }
-          {
-            // If it was a bailout, then disable optimization.
-            Error& error = Error::Handle();
-            // We got an error during compilation.
-            error = thread->sticky_error();
-            thread->clear_sticky_error();
-
-            if (error.raw() == Object::background_compilation_error().raw()) {
-              if (FLAG_trace_compiler) {
-                THR_Print(
-                    "--> disabling background optimizations for '%s' (will "
-                    "try to re-compile on isolate thread again)\n",
+      if (Compiler::IsBackgroundCompilation()) {
+        // Try again later, background compilation may abort because of
+        // state change during compilation.
+        if (FLAG_trace_compiler) {
+          THR_Print("Aborted background compilation: %s\n",
                     function.ToFullyQualifiedCString());
-              }
-
-              // Ensure we don't attempt to re-compile the function on the
-              // background compiler.
-              function.set_is_background_optimizable(false);
-
-              // Trigger another optimization soon on the main thread.
-              function.SetUsageCounter(FLAG_optimization_counter_threshold);
-            } else if ((error.IsLanguageError() &&
-                        LanguageError::Cast(error).kind() ==
-                            Report::kBailout) ||
-                       error.IsUnhandledException()) {
-              if (FLAG_trace_compiler) {
-                THR_Print("--> disabling optimizations for '%s'\n",
-                          function.ToFullyQualifiedCString());
-              }
-              function.SetIsOptimizable(false);
-            }
-          }
-          return Error::null();
         }
+        {
+          // If it was a bailout, then disable optimization.
+          Error& error = Error::Handle();
+          // We got an error during compilation.
+          error = thread->sticky_error();
+          thread->clear_sticky_error();
+
+          if (error.raw() == Object::background_compilation_error().raw()) {
+            if (FLAG_trace_compiler) {
+              THR_Print(
+                  "--> disabling background optimizations for '%s' (will "
+                  "try to re-compile on isolate thread again)\n",
+                  function.ToFullyQualifiedCString());
+            }
+
+            // Ensure we don't attempt to re-compile the function on the
+            // background compiler.
+            function.set_is_background_optimizable(false);
+
+            // Trigger another optimization soon on the main thread.
+            function.SetUsageCounter(optimized
+                                         ? FLAG_optimization_counter_threshold
+                                         : FLAG_compilation_counter_threshold);
+          } else if ((error.IsLanguageError() &&
+                      LanguageError::Cast(error).kind() == Report::kBailout) ||
+                     error.IsUnhandledException()) {
+            if (FLAG_trace_compiler) {
+              THR_Print("--> disabling optimizations for '%s'\n",
+                        function.ToFullyQualifiedCString());
+            }
+            function.SetIsOptimizable(false);
+          }
+        }
+        return Error::null();
+      }
+      if (optimized) {
         // Optimizer bailed out. Disable optimizations and never try again.
         if (trace_compiler) {
           THR_Print("--> disabling optimizations for '%s'\n",
@@ -1294,9 +1298,14 @@
   } else {
     event_name = "CompileFunctionOptimized";
   }
+  // TODO(alexmarkov): Consider adding a separate event for unoptimized
+  // compilation triggered from interpreter
   TIMELINE_FUNCTION_COMPILATION_DURATION(thread, event_name, function);
 #endif  // !defined(PRODUCT)
 
+  // If running with interpreter, do the unoptimized compilation first.
+  const bool optimized = !FLAG_enable_interpreter || function.WasCompiled();
+
   // If we are in the optimizing in the mutator/Dart thread, then
   // this is either an OSR compilation or background compilation is
   // not currently allowed.
@@ -1306,8 +1315,7 @@
          !function.is_background_optimizable());
   CompilationPipeline* pipeline =
       CompilationPipeline::New(thread->zone(), function);
-  return CompileFunctionHelper(pipeline, function, true, /* optimized */
-                               osr_id);
+  return CompileFunctionHelper(pipeline, function, optimized, osr_id);
 }
 
 // This is only used from unit tests.
@@ -1765,14 +1773,8 @@
       while (running_ && !function.IsNull() && !isolate_->IsTopLevelParsing()) {
         // Check that we have aggregated and cleared the stats.
         ASSERT(thread->compiler_stats()->IsCleared());
-        // If running with interpreter, do the unoptimized compilation first.
-        if (FLAG_enable_interpreter &&
-            (function.unoptimized_code() == Code::null())) {
-          Compiler::EnsureUnoptimizedCode(thread, function);
-        } else {
-          Compiler::CompileOptimizedFunction(thread, function,
-                                             Compiler::kNoOSRDeoptId);
-        }
+        Compiler::CompileOptimizedFunction(thread, function,
+                                           Compiler::kNoOSRDeoptId);
 #ifndef PRODUCT
         Isolate* isolate = thread->isolate();
         isolate->aggregate_compiler_stats()->Add(*thread->compiler_stats());
diff --git a/runtime/vm/compiler/jit/compiler.h b/runtime/vm/compiler/jit/compiler.h
index 1636109..9dc8396 100644
--- a/runtime/vm/compiler/jit/compiler.h
+++ b/runtime/vm/compiler/jit/compiler.h
@@ -112,6 +112,9 @@
                                          const Function& function);
 
   // Generates optimized code for function.
+  // If interpreter is used and function was not compiled yet, then
+  // generates unoptimized code (it's basically the first round of
+  // optimization).
   //
   // Returns the code object if compilation succeeds.  Returns an Error if
   // there is a compilation error.  If optimization fails, but there is no
diff --git a/runtime/vm/dart.cc b/runtime/vm/dart.cc
index 18bb9b2..d2dcdb2 100644
--- a/runtime/vm/dart.cc
+++ b/runtime/vm/dart.cc
@@ -136,12 +136,6 @@
   }
 #endif
 
-  if (FLAG_use_bytecode_compiler) {
-    // Interpreter is not able to trigger compilation yet.
-    // TODO(alexmarkov): Revise
-    FLAG_enable_interpreter = false;
-  }
-
   if (FLAG_enable_interpreter) {
 #if defined(USING_SIMULATOR) || defined(TARGET_ARCH_DBC)
     return strdup(
diff --git a/runtime/vm/flag_list.h b/runtime/vm/flag_list.h
index 8ce163e..e631158 100644
--- a/runtime/vm/flag_list.h
+++ b/runtime/vm/flag_list.h
@@ -65,6 +65,9 @@
     "Collects all dynamic function names to identify unique targets")          \
   P(compactor_tasks, int, 2,                                                   \
     "The number of tasks to use for parallel compaction.")                     \
+  P(compilation_counter_threshold, int, 10,                                    \
+    "Function's usage-counter value before interpreted function is compiled, " \
+    "-1 means never")                                                          \
   P(concurrent_sweep, bool, USING_MULTICORE,                                   \
     "Concurrent sweep for old generation.")                                    \
   R(dedup_instructions, true, bool, false,                                     \
diff --git a/runtime/vm/interpreter.cc b/runtime/vm/interpreter.cc
index 3c4cfb1..728ad6d 100644
--- a/runtime/vm/interpreter.cc
+++ b/runtime/vm/interpreter.cc
@@ -2119,6 +2119,17 @@
         INVOKE_RUNTIME(DRT_StackOverflow, args);
       }
     }
+    RawFunction* function = FrameFunction(FP);
+    int32_t counter = ++(function->ptr()->usage_counter_);
+    if (UNLIKELY(FLAG_compilation_counter_threshold >= 0 &&
+                 counter >= FLAG_compilation_counter_threshold &&
+                 !Function::HasCode(function))) {
+      SP[1] = 0;  // Unused code result.
+      SP[2] = function;
+      Exit(thread, FP, SP + 3, pc);
+      NativeArguments native_args(thread, 1, SP + 2, SP + 1);
+      INVOKE_RUNTIME(DRT_OptimizeInvokedFunction, native_args);
+    }
     DISPATCH();
   }
 
diff --git a/runtime/vm/object.cc b/runtime/vm/object.cc
index 256aaae..127170e 100644
--- a/runtime/vm/object.cc
+++ b/runtime/vm/object.cc
@@ -5986,6 +5986,7 @@
 
 void Function::AttachBytecode(const Code& value) const {
   DEBUG_ASSERT(IsMutatorOrAtSafepoint());
+  ASSERT(FLAG_enable_interpreter || FLAG_use_bytecode_compiler);
   // Finish setting up code before activating it.
   value.set_owner(*this);
   StorePointer(&raw_ptr()->bytecode_, value.raw());
@@ -5993,7 +5994,7 @@
   // We should not have loaded the bytecode if the function had code.
   ASSERT(!HasCode());
 
-  if (!FLAG_use_bytecode_compiler) {
+  if (FLAG_enable_interpreter) {
     // Set the code entry_point to InterpretCall stub.
     SetInstructions(Code::Handle(StubCode::InterpretCall_entry()->code()));
   }
@@ -6099,7 +6100,7 @@
 #if defined(DART_PRECOMPILED_RUNTIME)
   UNREACHABLE();
 #else
-  ASSERT(Thread::Current()->IsMutatorThread());
+  DEBUG_ASSERT(IsMutatorOrAtSafepoint());
   ASSERT(value.IsNull() || !value.is_optimized());
   StorePointer(&raw_ptr()->unoptimized_code_, value.raw());
 #endif
diff --git a/runtime/vm/runtime_entry.cc b/runtime/vm/runtime_entry.cc
index 330f368..95542e0 100644
--- a/runtime/vm/runtime_entry.cc
+++ b/runtime/vm/runtime_entry.cc
@@ -2147,9 +2147,15 @@
 #if !defined(DART_PRECOMPILED_RUNTIME)
   const Function& function = Function::CheckedHandle(zone, arguments.ArgAt(0));
   ASSERT(!function.IsNull());
-  ASSERT(function.HasCode());
 
-  if (Compiler::CanOptimizeFunction(thread, function)) {
+  // If running with interpreter, do the unoptimized compilation first.
+  const bool unoptimized_compilation =
+      FLAG_enable_interpreter && !function.WasCompiled();
+
+  ASSERT(unoptimized_compilation || function.HasCode());
+
+  if (unoptimized_compilation ||
+      Compiler::CanOptimizeFunction(thread, function)) {
     if (FLAG_background_compilation) {
       Field& field = Field::Handle(zone, isolate->GetDeoptimizingBoxedField());
       while (!field.IsNull()) {