blob: b189afa46c31fc1112b5417916647e79c576d866 [file] [log] [blame]
// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import 'dart:collection' show SplayTreeMap;
/// Assigns names from [nameSequence] to items using a naive algorithm.
///
/// Items are assigned the next available name in decreasing frequency
/// order. This allocation order is unstable in that small changes in the input
/// cause large changes in the output. To see this, consider a typical
/// distribution that has a 'tail' where large numbers of items occur two or
/// three times. If the small change causes one of the items that occurs twice
/// to occur four times, of then all the items that occur three times will be
/// assigned names that are 'shifted down' in the allocation order, as will many
/// of the items that occur twice.
///
/// See [semistableFrequencyAssignment] for meaning of parameters.
void naiveFrequencyAssignment(
int items,
Iterable<String> nameSequence,
int Function(int) hashOf,
int Function(int) countOf,
void Function(int, String) assign) {
Iterator<String> nameIterator = nameSequence.iterator..moveNext();
for (int item = 0; item < items; item++) {
assign(item, nameIterator.current);
nameIterator.moveNext();
}
}
/// Assigns names from [nameSequence] to items, trying to avoid the unstable
/// allocation of [naiveFrequencyAssignment] by assigning items to their
/// hashing-based 'preferred' slots if possible.
///
/// - [items]: number of items to be assigned names. Items are numbered from
/// zero. Items must be sorted in order of decreasing [countOf].
///
/// - [nameSequence]: Potentially unbounded sequence of valid names in
/// increasing size.
///
/// - [hashOf]: Function returning a stable hash code for item `i`.
///
/// - [countOf]: Function returning the frequency or number of occurrences of
/// item `i`.
///
/// - [assign]: Function to register the assignment of a name to item `i`.
void semistableFrequencyAssignment(
int items,
Iterable<String> nameSequence,
int Function(int) hashOf,
int Function(int) countOf,
void Function(int, String) assign) {
// Overallocate 3x the number of names so that the last pool is large enough
// to substantially reduce collisions. Round up to the next power of two so
// that slightly changing the number of items does not usually change the size
// of the largest pool.
int maxIndex = 1 << (items * 3).bitLength;
List<String> names = nameSequence.take(maxIndex).toList(growable: false);
List<_Pool> pools = _Pool.makePools(names);
// First cohort with unassigned items.
_Cohort? firstCohort = _Cohort.makeCohorts(items, countOf);
for (var pool in pools) {
// Completely allocate smaller pools before allocating larger
// pools. Completely allocate each cohort in turn.
// TODO(sra): If the next several cohorts all fit in the current pool, the
// allocation will not change the bytes saved by minification. Consider
// allocating the preferred slot from several cohorts before allocating a
// non-preferred slot. This should increase the number of items allocated to
// their preferred slot.
firstCohort = firstCohort?.skipEmpty();
for (var startCohort = firstCohort;
startCohort != null;
startCohort = startCohort.next) {
if (pool.remaining == 0) break;
// Pass 1: assign members of cohort their preferred slot if available.
List<int> assigned = [];
for (var item in startCohort.unassigned) {
if (pool.remaining == 0) break;
int hash = hashOf(item);
int slot = hash % pool.size;
if (pool.slotIsAvailable(slot)) {
assign(item, pool.allocate(slot));
assigned.add(item);
}
}
startCohort.unassigned.removeAll(assigned);
// Pass 2: assign members of cohort their second 'rehash' slot if
// available, or the next available slot.
assigned.clear();
for (var item in startCohort.unassigned) {
if (pool.remaining == 0) break;
int hash = hashOf(item);
int rehashSlot = (5 * hash + 7) % pool.size;
int slot = pool.firstAvailableSlotFrom(rehashSlot);
assign(item, pool.allocate(slot));
assigned.add(item);
}
startCohort.unassigned.removeAll(assigned);
}
}
// Perform naive assignment of any items left unassigned above (should be
// none).
for (var pool in pools) {
while (pool.remaining > 0) {
firstCohort = firstCohort?.skipEmpty();
if (firstCohort == null) break;
var item = firstCohort.unassigned.first;
String name = pool.allocate(pool.firstAvailableSlot());
assign(item, name);
firstCohort.unassigned.remove(item);
}
}
}
/// A [_Pool] is a set of identifiers of the same length from which names are
/// allocated.
class _Pool {
final List<String?> _names = [];
// Keep the unused (available) slots in an ordered set for efficiently finding
// the next available slot (i.e. linear rehash). We are concerned about
// efficiency because the smaller pools are completely allocated, making
// worst-case linear rehashing quadratic. Using an ordered map brings this
// down to O(N log N).
//
// We would prefer an ordered Set, but SplayTreeSet does not have methods to
// find keys adjacent to a query key.
final SplayTreeMap<int, bool> _availableSlots = SplayTreeMap();
static List<_Pool> makePools(Iterable<String> names) {
List<_Pool> pools = [];
for (var name in names) {
int length = name.length;
while (pools.length < length) pools.add(_Pool());
_Pool pool = pools[length - 1];
pool._availableSlots[pool._names.length] = true;
pool._names.add(name);
}
return pools;
}
int get size => _names.length;
int get remaining => _availableSlots.length;
bool slotIsAvailable(int slot) => _names[slot] != null;
int firstAvailableSlot() => _availableSlots.keys.first;
/// Returns [start] if slot [start] is free, otherwise returns the next
/// available slot.
int firstAvailableSlotFrom(int start) =>
_availableSlots.firstKeyAfter(start - 1) ??
_availableSlots.firstKeyAfter(-1) ??
(throw StateError('No entries left in pool'));
String allocate(int slot) {
String name = _names[slot]!;
_names[slot] = null;
_availableSlots.remove(slot);
return name;
}
@override
String toString() => '_Pool(${_names.length}, ${_availableSlots.length})';
}
/// A [_Cohort] is a set of entities which occur with the same frequency. The
/// entities are identified by integers.
class _Cohort {
_Cohort? next; // Next cohort in decreasing frequency.
final int count; // This is the cohort of items occurring [count] times.
Set<int> unassigned = Set();
_Cohort(this.count);
_Cohort? skipEmpty() {
_Cohort? cohort = this;
while (cohort != null && cohort.remaining == 0) cohort = cohort.next;
return cohort;
}
int get remaining => unassigned.length;
static _Cohort? makeCohorts(int items, int Function(int) countOf) {
// Build _Cohorts.
_Cohort? first, current;
int lastCount = -1;
for (int item = 0; item < items; item++) {
int count = countOf(item);
if (count != lastCount) {
lastCount = count;
_Cohort next = _Cohort(count);
if (current == null) {
first = next;
} else {
current.next = next;
}
current = next;
}
current!.unassigned.add(item);
}
return first;
}
@override
String toString() => '_Cohort($count, $remaining)';
}