Speedup package:crypto (with a focus on md5) (#892)

diff --git a/pkgs/crypto/CHANGELOG.md b/pkgs/crypto/CHANGELOG.md
index 4aee58d..520a2e1 100644
--- a/pkgs/crypto/CHANGELOG.md
+++ b/pkgs/crypto/CHANGELOG.md
@@ -1,6 +1,7 @@
 ## 3.0.7-wip
 
 - Run `dart format` with the new style.
+- Performance improvements.
 
 ## 3.0.6
 
diff --git a/pkgs/crypto/benchmark/benchmark.dart b/pkgs/crypto/benchmark/benchmark.dart
new file mode 100644
index 0000000..6e41d1f
--- /dev/null
+++ b/pkgs/crypto/benchmark/benchmark.dart
@@ -0,0 +1,149 @@
+// Copyright (c) 2025, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'dart:io' show exit, stderr;
+import 'dart:typed_data';
+
+import 'package:convert/convert.dart';
+import 'package:crypto/crypto.dart';
+
+void main(List<String> args) {
+  Hash? function;
+  int? customSize;
+
+  void setFunction(Hash newFunction, String message) {
+    if (function != null) {
+      stderr.writeln('Hash function already set.');
+      exit(1);
+    }
+    function = newFunction;
+    print('Using hash function $message');
+  }
+
+  for (var arg in args) {
+    if (arg == 'md5') {
+      setFunction(md5, 'md5');
+    } else if (arg == 'sha1') {
+      setFunction(sha1, 'sha1');
+    } else if (arg == 'sha256') {
+      setFunction(sha256, 'sha256');
+    } else if (arg == 'sha224') {
+      setFunction(sha224, 'sha224');
+    } else if (arg == 'sha384') {
+      setFunction(sha384, 'sha384');
+    } else if (arg == 'sha512') {
+      setFunction(sha512, 'sha512');
+    } else if (arg == 'sha512224') {
+      setFunction(sha512224, 'sha512/224');
+    } else if (arg == 'sha512256') {
+      setFunction(sha512256, 'sha512/256');
+    } else if (arg.startsWith('--custom=')) {
+      customSize = int.parse(arg.substring('--custom='.length));
+    } else {
+      stderr.writeln('Unknown argument: $arg');
+      exit(1);
+    }
+  }
+  if (function == null) {
+    setFunction(md5, 'md5');
+  }
+
+  if (customSize != null) {
+    doIterationsChunk(function!, mb: customSize, iterations: 1, doPrint: true);
+    return;
+  }
+
+  // Warmup.
+  doIterationsChunk(function!, mb: 1, iterations: 100, doPrint: false);
+
+  // Benchmarks.
+  print('One chunk input');
+  doIterationsChunk(function!, mb: 1, iterations: 1000, doPrint: true);
+  doIterationsChunk(function!, mb: 10, iterations: 100, doPrint: true);
+  doIterationsChunk(function!, mb: 100, iterations: 10, doPrint: true);
+  doIterationsChunk(function!, mb: 1000, iterations: 1, doPrint: true);
+
+  print('');
+  print('Add in 1024 byte chunks:');
+  doIterationsSmallChunks(function!,
+      chunkSize: 1024, mb: 1, iterations: 1000, doPrint: true);
+
+  print('');
+  print('Add in 100 byte chunks:');
+  doIterationsSmallChunks(function!,
+      chunkSize: 100, mb: 1, iterations: 1000, doPrint: true);
+
+  print('');
+  print('Add in 4 byte chunks:');
+  doIterationsSmallChunks(function!,
+      chunkSize: 4, mb: 1, iterations: 1000, doPrint: true);
+}
+
+void doIterationsChunk(Hash function,
+    {required int mb, required int iterations, required bool doPrint}) {
+  var data = Uint8List(1024 * 1024 * mb);
+  var runtimesInMs = <double>[];
+  for (var i = 0; i < iterations; i++) {
+    runtimesInMs.add(hashChunk(data, function));
+  }
+  if (doPrint) {
+    printStats(runtimesInMs, data.length, iterations);
+  }
+}
+
+void doIterationsSmallChunks(Hash function,
+    {required int chunkSize,
+    required int mb,
+    required int iterations,
+    required bool doPrint}) {
+  var data = Uint8List(chunkSize);
+  var runtimesInMs = <double>[];
+  var addIterations = mb * 1024 * 1024 ~/ chunkSize;
+  for (var i = 0; i < iterations; i++) {
+    runtimesInMs.add(hashSmallChunks(data, addIterations, function));
+  }
+  if (doPrint) {
+    printStats(runtimesInMs, data.length * addIterations, iterations);
+  }
+}
+
+double hashChunk(Uint8List data, Hash function) {
+  var stopwatch = Stopwatch()..start();
+  var hash = function.convert(data);
+  stopwatch.stop();
+  if (hash.bytes.isEmpty) throw StateError('This should never happen');
+  return stopwatch.elapsedMicroseconds / 1000;
+}
+
+double hashSmallChunks(Uint8List data, int addTimes, Hash function) {
+  var stopwatch = Stopwatch()..start();
+
+  var output = AccumulatorSink<Digest>();
+  var input = function.startChunkedConversion(output);
+  for (var i = 0; i < addTimes; i++) {
+    input.add(data);
+  }
+
+  input.close();
+  var hash = output.events.single;
+
+  stopwatch.stop();
+  if (hash.bytes.isEmpty) throw StateError('This should never happen');
+  return stopwatch.elapsedMicroseconds / 1000;
+}
+
+void printStats(List<double> runtimesInMs, int dataLength, int iterations) {
+  var mb = dataLength / 1024 / 1024;
+  runtimesInMs.sort();
+  var sum = runtimesInMs.reduce((value, element) => value + element);
+  var averageRuntimeInMs = sum / runtimesInMs.length;
+  var averageKbPerMs = dataLength / 1024 / averageRuntimeInMs;
+  var medianRuntimeInMs = runtimesInMs[runtimesInMs.length ~/ 2];
+  var medianKbPerMs = dataLength / 1024 / medianRuntimeInMs;
+  print(
+      'Processed ${mb.toStringAsFixed(2)} mb of data with an average/median of '
+      '${averageKbPerMs.toStringAsFixed(2)} / '
+      '${medianKbPerMs.toStringAsFixed(2)} '
+      'kb per ms.');
+}
diff --git a/pkgs/crypto/lib/src/hash_sink.dart b/pkgs/crypto/lib/src/hash_sink.dart
index fcd8aa9..c979a9f 100644
--- a/pkgs/crypto/lib/src/hash_sink.dart
+++ b/pkgs/crypto/lib/src/hash_sink.dart
@@ -4,8 +4,6 @@
 
 import 'dart:typed_data';
 
-import 'package:typed_data/typed_data.dart';
-
 import 'digest.dart';
 import 'utils.dart';
 
@@ -19,11 +17,24 @@
   /// Whether the hash function operates on big-endian words.
   final Endian _endian;
 
-  /// The words in the current chunk.
+  /// A [ByteData] view of the current chunk of data.
   ///
-  /// This is an instance variable to avoid re-allocating, but its data isn't
-  /// used across invocations of [_iterate].
-  final Uint32List _currentChunk;
+  /// This is an instance variable to avoid re-allocating.
+  ByteData? _byteDataView;
+
+  /// The actual chunk of bytes currently accumulating.
+  ///
+  /// The same allocation will be reused over and over again; once full it is
+  /// passed to the underlying hashing algorithm for processing.
+  final Uint8List _chunk;
+
+  /// The index of the next insertion into the chunk.
+  int _chunkNextIndex;
+
+  /// A [Uint32List] (in specified endian) copy of the chunk.
+  ///
+  /// This is an instance variable to avoid re-allocating.
+  final Uint32List _chunk32;
 
   /// Messages with more than 2^53-1 bits are not supported.
   ///
@@ -35,9 +46,6 @@
   /// The length of the input data so far, in bytes.
   int _lengthInBytes = 0;
 
-  /// Data that has yet to be processed by the hash function.
-  final _pendingData = Uint8Buffer();
-
   /// Whether [close] has been called.
   bool _isClosed = false;
 
@@ -66,7 +74,9 @@
   })  : _endian = endian,
         assert(signatureBytes >= 8),
         _signatureBytes = signatureBytes,
-        _currentChunk = Uint32List(chunkSizeInWords);
+        _chunk = Uint8List(chunkSizeInWords * bytesPerWord),
+        _chunkNextIndex = 0,
+        _chunk32 = Uint32List(chunkSizeInWords);
 
   /// Runs a single iteration of the hash computation, updating [digest] with
   /// the result.
@@ -79,8 +89,38 @@
   void add(List<int> data) {
     if (_isClosed) throw StateError('Hash.add() called after close().');
     _lengthInBytes += data.length;
-    _pendingData.addAll(data);
-    _iterate();
+    _addData(data);
+  }
+
+  void _addData(List<int> data) {
+    var dataIndex = 0;
+    var chunkNextIndex = _chunkNextIndex;
+    final size = _chunk.length;
+    _byteDataView ??= _chunk.buffer.asByteData();
+    while (true) {
+      // Check if there is enough data left in [data] for a full chunk.
+      var restEnd = chunkNextIndex + data.length - dataIndex;
+      if (restEnd < size) {
+        // There is not enough data, so just add into [_chunk].
+        _chunk.setRange(chunkNextIndex, restEnd, data, dataIndex);
+        _chunkNextIndex = restEnd;
+        return;
+      }
+
+      // There is enough data to fill the chunk. Fill it and process it.
+      _chunk.setRange(chunkNextIndex, size, data, dataIndex);
+      dataIndex += size - chunkNextIndex;
+
+      // Now do endian conversion to words.
+      var j = 0;
+      do {
+        _chunk32[j] = _byteDataView!.getUint32(j * bytesPerWord, _endian);
+        j++;
+      } while (j < _chunk32.length);
+
+      updateHash(_chunk32);
+      chunkNextIndex = 0;
+    }
   }
 
   @override
@@ -88,9 +128,8 @@
     if (_isClosed) return;
     _isClosed = true;
 
-    _finalizeData();
-    _iterate();
-    assert(_pendingData.isEmpty);
+    _finalizeAndProcessData();
+    assert(_chunkNextIndex == 0);
     _sink.add(Digest(_byteDigest()));
     _sink.close();
   }
@@ -108,65 +147,38 @@
     return byteDigest;
   }
 
-  /// Iterates through [_pendingData], updating the hash computation for each
-  /// chunk.
-  void _iterate() {
-    var pendingDataBytes = _pendingData.buffer.asByteData();
-    var pendingDataChunks = _pendingData.length ~/ _currentChunk.lengthInBytes;
-    for (var i = 0; i < pendingDataChunks; i++) {
-      // Copy words from the pending data buffer into the current chunk buffer.
-      for (var j = 0; j < _currentChunk.length; j++) {
-        _currentChunk[j] = pendingDataBytes.getUint32(
-          i * _currentChunk.lengthInBytes + j * bytesPerWord,
-          _endian,
-        );
-      }
-
-      // Run the hash function on the current chunk.
-      updateHash(_currentChunk);
-    }
-
-    // Remove all pending data up to the last clean chunk break.
-    _pendingData.removeRange(
-      0,
-      pendingDataChunks * _currentChunk.lengthInBytes,
-    );
-  }
-
-  /// Finalizes [_pendingData].
+  /// Finalizes the data and finishes the hash.
   ///
   /// This adds a 1 bit to the end of the message, and expands it with 0 bits to
   /// pad it out.
-  void _finalizeData() {
-    // Pad out the data with 0x80, eight or sixteen 0s, and as many more 0s
-    // as we need to land cleanly on a chunk boundary.
-    _pendingData.add(0x80);
-
-    final contentsLength = _lengthInBytes + 1 /* 0x80 */ + _signatureBytes;
-    final finalizedLength = _roundUp(
-      contentsLength,
-      _currentChunk.lengthInBytes,
-    );
-
-    for (var i = 0; i < finalizedLength - contentsLength; i++) {
-      _pendingData.add(0);
-    }
-
+  void _finalizeAndProcessData() {
     if (_lengthInBytes > _maxMessageLengthInBytes) {
       throw UnsupportedError(
         'Hashing is unsupported for messages with more than 2^53 bits.',
       );
     }
 
+    final contentsLength = _lengthInBytes + 1 /* 0x80 */ + _signatureBytes;
+    final finalizedLength = _roundUp(
+      contentsLength,
+      _chunk.lengthInBytes,
+    );
+
+    // Prepare the finalization data.
+    var padding = Uint8List(finalizedLength - _lengthInBytes);
+    // Pad out the data with 0x80, eight or sixteen 0s, and as many more 0s
+    // as we need to land cleanly on a chunk boundary.
+    padding[0] = 0x80;
+
+    // The rest is already 0-bytes.
+
     var lengthInBits = _lengthInBytes * bitsPerByte;
 
     // Add the full length of the input data as a 64-bit value at the end of the
     // hash. Note: we're only writing out 64 bits, so skip ahead 8 if the
     // signature is 128-bit.
-    final offset = _pendingData.length + (_signatureBytes - 8);
-
-    _pendingData.addAll(Uint8List(_signatureBytes));
-    var byteData = _pendingData.buffer.asByteData();
+    final offset = padding.length - 8;
+    var byteData = padding.buffer.asByteData();
 
     // We're essentially doing byteData.setUint64(offset, lengthInBits, _endian)
     // here, but that method isn't supported on dart2js so we implement it
@@ -180,6 +192,8 @@
       byteData.setUint32(offset, lowBits, _endian);
       byteData.setUint32(offset + bytesPerWord, highBits, _endian);
     }
+
+    _addData(padding);
   }
 
   /// Rounds [val] up to the next multiple of [n], as long as [n] is a power of
diff --git a/pkgs/crypto/lib/src/md5.dart b/pkgs/crypto/lib/src/md5.dart
index 8139325..7fc8066 100644
--- a/pkgs/crypto/lib/src/md5.dart
+++ b/pkgs/crypto/lib/src/md5.dart
@@ -78,34 +78,26 @@
 
   @override
   void updateHash(Uint32List chunk) {
-    assert(chunk.length == 16);
+    // This makes the VM get rid of some "GenericCheckBound" calls.
+    // See also https://github.com/dart-lang/sdk/issues/60753.
+    // ignore: unnecessary_statements
+    chunk[15];
 
-    var a = digest[0];
-    var b = digest[1];
-    var c = digest[2];
+    // Access [3] first to get rid of some "GenericCheckBound" calls.
     var d = digest[3];
+    var c = digest[2];
+    var b = digest[1];
+    var a = digest[0];
 
-    int e;
-    int f;
+    var e = 0;
+    var f = 0;
 
-    for (var i = 0; i < 64; i++) {
-      if (i < 16) {
-        e = (b & c) | ((~b & mask32) & d);
-        f = i;
-      } else if (i < 32) {
-        e = (d & b) | ((~d & mask32) & c);
-        f = ((5 * i) + 1) % 16;
-      } else if (i < 48) {
-        e = b ^ c ^ d;
-        f = ((3 * i) + 5) % 16;
-      } else {
-        e = c ^ (b | (~d & mask32));
-        f = (7 * i) % 16;
-      }
-
+    @pragma('vm:prefer-inline')
+    void round(int i) {
       var temp = d;
       d = c;
       c = b;
+
       b = add32(
         b,
         rotl32(
@@ -113,9 +105,37 @@
           _shiftAmounts[i],
         ),
       );
+
       a = temp;
     }
 
+    for (var i = 0; i < 16; i++) {
+      e = (b & c) | ((~b & mask32) & d);
+      // Doing `i % 16` would get rid of a "GenericCheckBound" call in the VM,
+      // but is slightly slower anyway.
+      // See also https://github.com/dart-lang/sdk/issues/60753.
+      f = i;
+      round(i);
+    }
+
+    for (var i = 16; i < 32; i++) {
+      e = (d & b) | ((~d & mask32) & c);
+      f = ((5 * i) + 1) % 16;
+      round(i);
+    }
+
+    for (var i = 32; i < 48; i++) {
+      e = b ^ c ^ d;
+      f = ((3 * i) + 5) % 16;
+      round(i);
+    }
+
+    for (var i = 48; i < 64; i++) {
+      e = c ^ (b | (~d & mask32));
+      f = (7 * i) % 16;
+      round(i);
+    }
+
     digest[0] = add32(a, digest[0]);
     digest[1] = add32(b, digest[1]);
     digest[2] = add32(c, digest[2]);
diff --git a/pkgs/crypto/tool/md5sum.dart b/pkgs/crypto/tool/md5sum.dart
new file mode 100644
index 0000000..c6b2a58
--- /dev/null
+++ b/pkgs/crypto/tool/md5sum.dart
@@ -0,0 +1,18 @@
+// Copyright (c) 2025, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'dart:io';
+
+import 'package:crypto/crypto.dart';
+
+void main(List<String> args) {
+  for (var arg in args) {
+    var data = File(arg).readAsBytesSync();
+    var stopwatch = Stopwatch()..start();
+    var hash = md5.convert(data);
+    stopwatch.stop();
+    print('Hashed to ${hash.toString()} in '
+        '${stopwatch.elapsedMilliseconds} ms');
+  }
+}