[vm/aot] Support optimization of modulo against a power-of-two.

Improves MD5 performance on ARM32 by 15%, from 1.25s to 1.05s.

Cq-Include-Trybots: luci.dart.try:vm-kernel-optcounter-threshold-linux-release-x64-try, vm-kernel-precomp-linux-debug-x64-try, vm-kernel-precomp-linux-release-simarm-try, vm-kernel-precomp-linux-release-simarm64-try, vm-kernel-precomp-linux-release-x64-try, vm-kernel-precomp-mac-release-simarm64-try, vm-kernel-precomp-win-release-x64-try
Change-Id: Ia7a0614da662a80db051362fff7a4acacbc2455f
Reviewed-on: https://dart-review.googlesource.com/c/88723
Commit-Queue: Samir Jindel <sjindel@google.com>
Reviewed-by: Vyacheslav Egorov <vegorov@google.com>
diff --git a/runtime/vm/compiler/aot/aot_call_specializer.cc b/runtime/vm/compiler/aot/aot_call_specializer.cc
index 1e6a8b3..6e363f4 100644
--- a/runtime/vm/compiler/aot/aot_call_specializer.cc
+++ b/runtime/vm/compiler/aot/aot_call_specializer.cc
@@ -415,6 +415,44 @@
          TryOptimizeDoubleOperation(instr, op_kind);
 }
 
+// Modulo against a constant power-of-two can be optimized into a mask.
+// x % y -> x & (y - 1)
+Definition* AotCallSpecializer::TryOptimizeMod(TemplateDartCall<0>* instr,
+                                               Token::Kind op_kind,
+                                               Value* left_value,
+                                               Value* right_value) {
+  if (!right_value->BindsToConstant()) {
+    return nullptr;
+  }
+
+  const Object& rhs = right_value->BoundConstant();
+  int64_t modulus =
+      abs(rhs.IsSmi() ? Smi::Cast(rhs).Value() : Mint::Cast(rhs).value());
+  if (!Utils::IsPowerOfTwo(modulus) || !Smi::IsValid(modulus - 1)) {
+    return nullptr;
+  }
+
+  left_value = PrepareStaticOpInput(left_value, kMintCid, instr);
+
+#if defined(TARGET_ARCH_ARM)
+  Definition* right_definition = new (Z) UnboxedConstantInstr(
+      Smi::ZoneHandle(Z, Smi::New(modulus - 1)), kUnboxedInt32);
+  InsertBefore(instr, right_definition, /*env=*/NULL, FlowGraph::kValue);
+  right_definition = new (Z)
+      UnboxedIntConverterInstr(kUnboxedInt32, kUnboxedInt64,
+                               new (Z) Value(right_definition), DeoptId::kNone);
+  InsertBefore(instr, right_definition, /*env=*/NULL, FlowGraph::kValue);
+#else
+  Definition* right_definition = new (Z) UnboxedConstantInstr(
+      Smi::ZoneHandle(Z, Smi::New(modulus - 1)), kUnboxedInt64);
+  InsertBefore(instr, right_definition, /*env=*/NULL, FlowGraph::kValue);
+#endif
+  right_value = new (Z) Value(right_definition);
+  return new (Z)
+      BinaryInt64OpInstr(Token::kBIT_AND, left_value, right_value,
+                         DeoptId::kNone, Instruction::kNotSpeculative);
+}
+
 bool AotCallSpecializer::TryOptimizeIntegerOperation(TemplateDartCall<0>* instr,
                                                      Token::Kind op_kind) {
   if (instr->type_args_len() != 0) {
@@ -500,10 +538,13 @@
         }
         break;
       }
-#if defined(TARGET_ARCH_X64) || defined(TARGET_ARCH_ARM64)
-        // TODO(ajcbik): 32-bit archs too?
       case Token::kMOD:
+        replacement = TryOptimizeMod(instr, op_kind, left_value, right_value);
+        if (replacement != nullptr) break;
       case Token::kTRUNCDIV:
+#if !defined(TARGET_ARCH_X64) && !defined(TARGET_ARCH_ARM64)
+        // TODO(ajcbik): 32-bit archs too?
+        break;
 #endif
       case Token::kSHL:
       case Token::kSHR:
diff --git a/runtime/vm/compiler/aot/aot_call_specializer.h b/runtime/vm/compiler/aot/aot_call_specializer.h
index 404f2ba..c40aa8b 100644
--- a/runtime/vm/compiler/aot/aot_call_specializer.h
+++ b/runtime/vm/compiler/aot/aot_call_specializer.h
@@ -66,6 +66,11 @@
   bool TryExpandCallThroughGetter(const Class& receiver_class,
                                   InstanceCallInstr* call);
 
+  Definition* TryOptimizeMod(TemplateDartCall<0>* instr,
+                             Token::Kind op_kind,
+                             Value* left_value,
+                             Value* right_value);
+
   Precompiler* precompiler_;
 
   bool has_unique_no_such_method_;
diff --git a/tests/language_2/vm/bitnot_int_test.dart b/tests/language_2/vm/bitnot_int_test.dart
new file mode 100644
index 0000000..f52d66b
--- /dev/null
+++ b/tests/language_2/vm/bitnot_int_test.dart
@@ -0,0 +1,45 @@
+// Copyright (c) 2019, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+// VMOptions=--no_background_compilation --optimization_counter_threshold=10
+
+import "package:expect/expect.dart";
+
+// Tests for long bit-not under 64-bit arithmetic wrap-around semantics.
+
+final int maxInt32 = 2147483647;
+final int minInt32 = -2147483648;
+final int maxInt64 = 0x7fffffffffffffff;
+final int minInt64 = 0x8000000000000000;
+
+int bitnot(int x) {
+  return ~x;
+}
+
+doConstant() {
+  Expect.equals(0, bitnot(-1));
+  Expect.equals(-1, bitnot(0));
+  Expect.equals(-2, bitnot(1));
+
+  Expect.equals(minInt32, bitnot(maxInt32));
+  Expect.equals(maxInt32, bitnot(minInt32));
+  Expect.equals(minInt64, bitnot(maxInt64));
+  Expect.equals(maxInt64, bitnot(minInt64)); // sic!
+}
+
+doVar() {
+  int d = 0;
+  for (int i = -88; i < 10; i++) {
+    d += bitnot(i);
+  }
+  Expect.equals(3773, d);
+}
+
+main() {
+  // Repeat tests to enter JIT (when applicable).
+  for (int i = 0; i < 20; i++) {
+    doConstant();
+    doVar();
+  }
+}
diff --git a/tests/language_2/vm/modtruncdiv_int_test.dart b/tests/language_2/vm/modtruncdiv_int_test.dart
index 7a84d1a..b51811d 100644
--- a/tests/language_2/vm/modtruncdiv_int_test.dart
+++ b/tests/language_2/vm/modtruncdiv_int_test.dart
@@ -79,6 +79,13 @@
   Expect.equals(1, mod(maxInt32 + 1, maxInt32));
   Expect.equals(maxInt32 - 2, mod(minInt32 - 1, maxInt32));
   Expect.equals(0, mod(minInt32 + 1, maxInt32));
+
+  Expect.equals(15, mod(-1, 16));
+  Expect.equals(15, mod(-17, 16));
+  Expect.equals(100, mod(100, 1 << 32));
+  Expect.equals((1 << 32) - 1, mod((1 << 35) - 1, 1 << 32));
+  Expect.equals(maxInt64, mod(-1, 1 << 63));
+  Expect.equals(0, mod(minInt64, 1 << 63));
 }
 
 doTruncDivConstants() {