[vm] Adds a flag to dump dynamic bytecode traces in a debug build.
Also adds a program in pkg/vm/bin/dump_bytecode_ngrams.dart that
analyzes the traces.
Change-Id: I39c9645ca858195a41e001dab47a7b1398f4b15e
Reviewed-on: https://dart-review.googlesource.com/c/83340
Commit-Queue: Zach Anderson <zra@google.com>
Reviewed-by: Alexander Markov <alexmarkov@google.com>
diff --git a/pkg/vm/bin/dump_bytecode_ngrams.dart b/pkg/vm/bin/dump_bytecode_ngrams.dart
new file mode 100644
index 0000000..dead72c
--- /dev/null
+++ b/pkg/vm/bin/dump_bytecode_ngrams.dart
@@ -0,0 +1,65 @@
+// Copyright (c) 2018, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'dart:io' as io;
+
+import 'package:args/args.dart' show ArgParser, ArgResults;
+import 'package:vm/bytecode/ngrams.dart';
+
+final ArgParser _argParser = new ArgParser(allowTrailingOptions: true)
+ ..addFlag('basic-blocks',
+ help: 'Only allow control flow as the last instruction in a window',
+ defaultsTo: true)
+ ..addOption('output',
+ abbr: 'o', help: 'Path to output file', defaultsTo: null)
+ ..addFlag('merge-pushes',
+ help: 'Do not distinguish among different kinds of Push opcodes',
+ defaultsTo: false)
+ ..addFlag('sort',
+ abbr: 's', help: 'Sort the output by ngram frequency', defaultsTo: true)
+ ..addOption('threshold',
+ abbr: 't', help: 'Minimum ngram count threshold', defaultsTo: "1")
+ ..addOption('window', abbr: 'w', help: 'Window size', defaultsTo: "3");
+
+final String _usage = '''
+Usage: dump_bytecode_ngrams [options] input.trace
+
+Dumps stats about a dynamic bytecode instruction trace produced with the
+Dart VM option --interpreter-trace-file, e.g.:
+
+\$ dart --enable-interpreter --interpreter-trace-file=trace program.dart
+
+Options:
+${_argParser.usage}
+''';
+
+const int _badUsageExitCode = 1;
+
+int main(List<String> arguments) {
+ final ArgResults options = _argParser.parse(arguments);
+ if ((options.rest.length != 1) || (options['output'] == null)) {
+ print(_usage);
+ return _badUsageExitCode;
+ }
+
+ final basicBlocks = options['basic-blocks'];
+ final input = options.rest.single;
+ final output = options['output'];
+ final mergePushes = options['merge-pushes'];
+ final windowSize = int.parse(options['window']);
+ final sort = options['sort'];
+ final threshold = int.parse(options['threshold']);
+
+ if (!(new io.File(input).existsSync())) {
+ print("The file '$input' does not exist");
+ print(_usage);
+ return _badUsageExitCode;
+ }
+
+ NGramReader nGramReader = new NGramReader(input);
+ nGramReader.readAllNGrams(windowSize,
+ basicBlocks: basicBlocks, mergePushes: mergePushes);
+ nGramReader.writeNGramStats(output, sort: sort, minCount: threshold);
+ return 0;
+}
diff --git a/pkg/vm/lib/bytecode/dbc.dart b/pkg/vm/lib/bytecode/dbc.dart
index e5045c1..aef5aa9 100644
--- a/pkg/vm/lib/bytecode/dbc.dart
+++ b/pkg/vm/lib/bytecode/dbc.dart
@@ -304,6 +304,39 @@
bool isJump(Opcode opcode) => BytecodeFormats[opcode].encoding == Encoding.kT;
+bool isThrow(Opcode opcode) => opcode == Opcode.kThrow;
+
+bool isCall(Opcode opcode) {
+ switch (opcode) {
+ case Opcode.kIndirectStaticCall:
+ case Opcode.kInstanceCall:
+ case Opcode.kNativeCall:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool isReturn(Opcode opcode) => opcode == Opcode.kReturnTOS;
+
+bool isControlFlow(Opcode opcode) =>
+ isJump(opcode) || isThrow(opcode) || isCall(opcode) || isReturn(opcode);
+
+bool isPush(Opcode opcode) {
+ switch (opcode) {
+ case Opcode.kPush:
+ case Opcode.kPushConstant:
+ case Opcode.kPushNull:
+ case Opcode.kPushTrue:
+ case Opcode.kPushFalse:
+ case Opcode.kPushInt:
+ case Opcode.kPushStatic:
+ return true;
+ default:
+ return false;
+ }
+}
+
// Bytecode instructions reference constant pool indices using
// unsigned 16-bit operands.
const int constantPoolIndexLimit = 1 << 16;
diff --git a/pkg/vm/lib/bytecode/disassembler.dart b/pkg/vm/lib/bytecode/disassembler.dart
index afe5a82..153e837 100644
--- a/pkg/vm/lib/bytecode/disassembler.dart
+++ b/pkg/vm/lib/bytecode/disassembler.dart
@@ -6,20 +6,32 @@
import 'dart:typed_data';
+import 'package:kernel/ast.dart' show listEquals, listHashCode;
+
import 'dbc.dart';
import 'exceptions.dart';
-class _Instruction {
+class Instruction {
final Opcode opcode;
final List<int> operands;
- _Instruction(this.opcode, this.operands);
+ Instruction(this.opcode, this.operands);
+
+ @override
+ int get hashCode => opcode.index.hashCode ^ listHashCode(operands);
+
+ @override
+ bool operator ==(other) {
+ return (other is Instruction) &&
+ (opcode == other.opcode) &&
+ listEquals(operands, other.operands);
+ }
}
class BytecodeDisassembler {
static const int kOpcodeMask = 0xFF;
static const int kBitsPerInt = 64;
- List<_Instruction> _instructions;
+ List<Instruction> _instructions;
int _labelCount;
Map<int, String> _labels;
Map<int, List<String>> _markers;
@@ -40,9 +52,9 @@
// TODO(alexmarkov): endianness?
Uint32List words = uint8list.buffer.asUint32List();
- _instructions = new List<_Instruction>(words.length);
+ _instructions = new List<Instruction>(words.length);
for (int i = 0; i < words.length; i++) {
- _instructions[i] = _decodeInstruction(words[i]);
+ _instructions[i] = decodeInstruction(words[i]);
}
_labelCount = 0;
@@ -50,10 +62,10 @@
_markers = <int, List<String>>{};
}
- _Instruction _decodeInstruction(int word) {
+ Instruction decodeInstruction(int word) {
final opcode = Opcode.values[word & kOpcodeMask];
final format = BytecodeFormats[opcode];
- return new _Instruction(opcode, _decodeOperands(format, word));
+ return new Instruction(opcode, _decodeOperands(format, word));
}
List<int> _decodeOperands(Format format, int word) {
@@ -140,12 +152,12 @@
if (markers != null) {
markers.forEach(out.writeln);
}
- _writeInstruction(out, i, _instructions[i]);
+ writeInstruction(out, i, _instructions[i]);
}
return out.toString();
}
- void _writeInstruction(StringBuffer out, int bci, _Instruction instr) {
+ void writeInstruction(StringBuffer out, int bci, Instruction instr) {
final format = BytecodeFormats[instr.opcode];
assert(format != null);
@@ -192,7 +204,9 @@
case Operand.xeg:
return (value < 0) ? 'FP[$value]' : 'r$value';
case Operand.tgt:
- return _labels[bci + value] ?? (throw 'Label not found');
+ return (_labels == null)
+ ? value.toString()
+ : _labels[bci + value] ?? (throw 'Label not found');
case Operand.spe:
return SpecialIndex.values[value]
.toString()
diff --git a/pkg/vm/lib/bytecode/ngrams.dart b/pkg/vm/lib/bytecode/ngrams.dart
new file mode 100644
index 0000000..040b51b
--- /dev/null
+++ b/pkg/vm/lib/bytecode/ngrams.dart
@@ -0,0 +1,147 @@
+// Copyright (c) 2018, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+library vm.bytecode.ngrams;
+
+import 'dart:io';
+import 'dart:typed_data';
+
+import 'package:kernel/ast.dart' show listEquals, listHashCode;
+
+import 'dbc.dart';
+import 'disassembler.dart' show Instruction, BytecodeDisassembler;
+
+bool isControlFlowInstr(Instruction instr) => isControlFlow(instr.opcode);
+
+class NGram {
+ List<Instruction> instrs;
+ List<int> _words;
+ BytecodeDisassembler _disassembler;
+
+ NGram(List<int> words, {bool mergePushes = false}) {
+ _disassembler = new BytecodeDisassembler();
+ _words = words;
+ instrs = new List<Instruction>(words.length);
+ for (int i = 0; i < instrs.length; i++) {
+ instrs[i] = _disassembler.decodeInstruction(words[i]);
+ }
+ if (mergePushes) {
+ _mergePushes(instrs);
+ }
+ _canonicalize(instrs);
+ }
+
+ /// Tests if any instructions that are not the last instruction in the window
+ /// are a jump, throw, or call.
+ bool get controlFlowIsNotLast =>
+ instrs.sublist(0, instrs.length - 1).any(isControlFlowInstr);
+
+ @override
+ int get hashCode => listHashCode(instrs);
+
+ @override
+ bool operator ==(other) =>
+ (other is NGram) && listEquals(instrs, other.instrs);
+
+ @override
+ String toString() {
+ StringBuffer out = new StringBuffer();
+ for (var instr in instrs) {
+ _disassembler.writeInstruction(out, 0, instr);
+ }
+ return out.toString();
+ }
+
+ /// Rewrites all Push-like instructions as 'Push r0'.
+ static void _mergePushes(List<Instruction> instrs) {
+ for (int i = 0; i < instrs.length; i++) {
+ if (isPush(instrs[i].opcode)) {
+ instrs[i] = new Instruction(Opcode.kPush, <int>[0]);
+ }
+ }
+ }
+
+ /// Rewrites the operands of instructions so that ngrams that differ only in
+ /// operands can be considered the same.
+ ///
+ /// Each type of operand is considered to come from a different space, and
+ /// each operand is re-indexed in that space starting from 0 such that each
+ /// distinct operand before canonicalization remains distinct afterwords. E.g.
+ ///
+ /// Push r3
+ /// Push r3
+ /// Push r4
+ ///
+ /// Becomes
+ ///
+ /// Push r0
+ /// Push r0
+ /// Push r1
+ static void _canonicalize(List<Instruction> instrs) {
+ Map<Operand, Map<int, int>> operandMaps = <Operand, Map<int, int>>{
+ // No mapping for Operand.none.
+ Operand.imm: <int, int>{},
+ Operand.lit: <int, int>{},
+ Operand.reg: <int, int>{},
+ Operand.xeg: <int, int>{},
+ Operand.tgt: <int, int>{},
+ // No mapping for Operand.spe.
+ };
+ for (Instruction instr in instrs) {
+ Format fmt = BytecodeFormats[instr.opcode];
+ for (int i = 0; i < instr.operands.length; i++) {
+ Operand op = fmt.operands[i];
+ if (!operandMaps.containsKey(op)) {
+ continue;
+ }
+ int newOperand = operandMaps[op]
+ .putIfAbsent(instr.operands[i], () => operandMaps[op].length);
+ instr.operands[i] = newOperand;
+ }
+ }
+ }
+}
+
+class NGramReader {
+ Uint32List _words;
+
+ Map<NGram, int> _ngramCounts = <NGram, int>{};
+
+ NGramReader(String traceFilename) {
+ File traceFile = File(traceFilename);
+ Uint8List data = traceFile.readAsBytesSync();
+ _words = Uint32List.view(data.buffer);
+ }
+
+ Map<NGram, int> get ngramCounts => _ngramCounts;
+
+ void readAllNGrams(int windowSize,
+ {bool basicBlocks: true, bool mergePushes: false}) {
+ int offset = 0;
+ while (offset + windowSize < _words.length) {
+ Uint32List window = _words.sublist(offset, offset + windowSize);
+ offset += 1;
+ NGram ngram = new NGram(window, mergePushes: mergePushes);
+ if (basicBlocks && ngram.controlFlowIsNotLast) {
+ continue;
+ }
+ _ngramCounts.update(ngram, (count) => count + 1, ifAbsent: () => 1);
+ }
+ }
+
+ void writeNGramStats(String outputFilename,
+ {bool sort = true, int minCount = 1000}) {
+ File outputFile = new File(outputFilename);
+ IOSink file = outputFile.openWrite();
+ List<MapEntry<NGram, int>> entries =
+ _ngramCounts.entries.where((e) => e.value > minCount).toList();
+ if (sort) {
+ entries.sort((e1, e2) => e2.value - e1.value);
+ }
+ entries.forEach((e) {
+ file.write("count: ${e.value}\n${e.key}\n");
+ });
+ file.close();
+ }
+}
diff --git a/runtime/vm/interpreter.cc b/runtime/vm/interpreter.cc
index f4edb7f..8455c42 100644
--- a/runtime/vm/interpreter.cc
+++ b/runtime/vm/interpreter.cc
@@ -32,6 +32,15 @@
ULLONG_MAX,
"Trace interpreter execution after instruction count reached.");
+DEFINE_FLAG(charp,
+ interpreter_trace_file,
+ NULL,
+ "File to write a dynamic instruction trace to.");
+DEFINE_FLAG(uint64_t,
+ interpreter_trace_file_max_bytes,
+ 100 * MB,
+ "Maximum size in bytes of the interpreter trace file");
+
#define LIKELY(cond) __builtin_expect((cond), 1)
#define UNLIKELY(cond) __builtin_expect((cond), 0)
@@ -559,10 +568,36 @@
last_setjmp_buffer_ = NULL;
DEBUG_ONLY(icount_ = 1); // So that tracing after 0 traces first bytecode.
+
+#if defined(DEBUG)
+ trace_file_bytes_written_ = 0;
+ trace_file_ = NULL;
+ if (FLAG_interpreter_trace_file != NULL) {
+ Dart_FileOpenCallback file_open = Dart::file_open_callback();
+ if (file_open != NULL) {
+ trace_file_ = file_open(FLAG_interpreter_trace_file, /* write */ true);
+ trace_buffer_ = new KBCInstr[kTraceBufferInstrs];
+ trace_buffer_idx_ = 0;
+ }
+ }
+#endif
}
Interpreter::~Interpreter() {
delete[] stack_;
+#if defined(DEBUG)
+ if (trace_file_ != NULL) {
+ FlushTraceBuffer();
+ // Close the file.
+ Dart_FileCloseCallback file_close = Dart::file_close_callback();
+ if (file_close != NULL) {
+ file_close(trace_file_);
+ trace_file_ = NULL;
+ delete[] trace_buffer_;
+ trace_buffer_ = NULL;
+ }
+ }
+#endif
}
// Get the active Interpreter for the current isolate.
@@ -592,6 +627,44 @@
THR_Print("Disassembler not supported in this mode.\n");
}
}
+
+DART_FORCE_INLINE bool Interpreter::IsWritingTraceFile() const {
+ return (trace_file_ != NULL) &&
+ (trace_file_bytes_written_ < FLAG_interpreter_trace_file_max_bytes);
+}
+
+void Interpreter::FlushTraceBuffer() {
+ Dart_FileWriteCallback file_write = Dart::file_write_callback();
+ if (file_write == NULL) {
+ return;
+ }
+ if (trace_file_bytes_written_ >= FLAG_interpreter_trace_file_max_bytes) {
+ return;
+ }
+ const intptr_t bytes_to_write = Utils::Minimum(
+ static_cast<uint64_t>(trace_buffer_idx_ * sizeof(KBCInstr)),
+ FLAG_interpreter_trace_file_max_bytes - trace_file_bytes_written_);
+ if (bytes_to_write == 0) {
+ return;
+ }
+ file_write(trace_buffer_, bytes_to_write, trace_file_);
+ trace_file_bytes_written_ += bytes_to_write;
+ trace_buffer_idx_ = 0;
+}
+
+DART_NOINLINE void Interpreter::WriteInstructionToTrace(uint32_t* pc) {
+ Dart_FileWriteCallback file_write = Dart::file_write_callback();
+ if (file_write == NULL) {
+ return;
+ }
+ if (trace_buffer_idx_ < kTraceBufferInstrs) {
+ trace_buffer_[trace_buffer_idx_++] = static_cast<KBCInstr>(*pc);
+ }
+ if (trace_buffer_idx_ == kTraceBufferInstrs) {
+ FlushTraceBuffer();
+ }
+}
+
#endif // defined(DEBUG)
// Calls into the Dart runtime are based on this interface.
@@ -1205,6 +1278,9 @@
if (IsTracingExecution()) { \
TraceInstruction(pc - 1); \
} \
+ if (IsWritingTraceFile()) { \
+ WriteInstructionToTrace(pc - 1); \
+ } \
icount_++;
#else
#define TRACE_INSTRUCTION
diff --git a/runtime/vm/interpreter.h b/runtime/vm/interpreter.h
index 5dc3dc4..6150561 100644
--- a/runtime/vm/interpreter.h
+++ b/runtime/vm/interpreter.h
@@ -198,6 +198,19 @@
// Prints bytecode instruction at given pc for instruction tracing.
void TraceInstruction(uint32_t* pc) const;
+
+ bool IsWritingTraceFile() const;
+ void FlushTraceBuffer();
+ void WriteInstructionToTrace(uint32_t* pc);
+
+ void* trace_file_;
+ uint64_t trace_file_bytes_written_;
+
+ static const intptr_t kTraceBufferSizeInBytes = 10 * KB;
+ static const intptr_t kTraceBufferInstrs =
+ kTraceBufferSizeInBytes / sizeof(KBCInstr);
+ KBCInstr* trace_buffer_;
+ intptr_t trace_buffer_idx_;
#endif // defined(DEBUG)
// Longjmp support for exceptions.