blob: f89d4f430e3cd197901775332035517a77b24c0b [file] [log] [blame]
// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
library dart_style.src.line_splitter;
import 'package:collection/priority_queue.dart';
import 'chunk.dart';
import 'debug.dart' as debug;
import 'line_writer.dart';
import 'rule/rule.dart';
import 'rule_set.dart';
/// To ensure the solver doesn't go totally pathological on giant code, we cap
/// it at a fixed number of attempts.
///
/// If the optimal solution isn't found after this many tries, it just uses the
/// best it found so far.
const _maxAttempts = 50000;
/// Takes a set of chunks and determines the best values for its rules in order
/// to fit it inside the page boundary.
///
/// This problem is exponential in the number of rules and a single expression
/// in Dart can be quite large, so it isn't feasible to brute force this. For
/// example:
///
/// outer(
/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8));
///
/// There are 509,607,936 ways this can be split.
///
/// The problem is even harder because we may not be able to easily tell if a
/// given solution is the best one. It's possible that there is *no* solution
/// that fits in the page (due to long strings or identifiers) so the winning
/// solution may still have overflow characters. This makes it hard to know
/// when we are done and can stop looking.
///
/// There are a couple of pieces of domain knowledge we use to cope with this:
///
/// - Changing a rule from unsplit to split will never lower its cost. A
/// solution with all rules unsplit will always be the one with the lowest
/// cost (zero). Conversely, setting all of its rules to the maximum split
/// value will always have the highest cost.
///
/// (You might think there is a converse rule about overflow characters. The
/// solution with the fewest splits will have the most overflow, and the
/// solution with the most splits will have the least overflow. Alas, because
/// of indentation, that isn't always the case. Adding a split may *increase*
/// overflow in some cases.)
///
/// - If all of the chunks for a rule are inside lines that already fit in the
/// page, then splitting that rule will never improve the solution.
///
/// We start off with a [SolveState] where all rules are unbound (which
/// implicitly treats them as unsplit). For a given solve state, we can produce
/// a set of expanded states that takes some of the rules in the first long
/// line and bind them to split values. This always produces new solve states
/// with higher cost (but often fewer overflow characters) than the parent
/// state.
///
/// We take these expanded states and add them to a work list sorted by cost.
/// Since unsplit rules always have lower cost solutions, we know that no state
/// we enqueue later will ever have a lower cost than the ones we already have
/// enqueued.
///
/// Then we keep pulling states off the work list and expanding them and adding
/// the results back into the list. We do this until we hit a solution where
/// all characters fit in the page. The first one we find will have the lowest
/// cost and we're done.
///
/// We also keep running track of the best solution we've found so far that
/// has the fewest overflow characters and the lowest cost. If no solution fits,
/// we'll use this one.
///
/// As a final escape hatch for pathologically nasty code, after trying some
/// fixed maximum number of solve states, we just bail and return the best
/// solution found so far.
///
/// Even with the above algorithmic optimizations, complex code may still
/// require a lot of exploring to find an optimal solution. To make that fast,
/// this code is carefully profiled and optimized. If you modify this, make
/// sure to test against the benchmark to ensure you don't regress performance.
class LineSplitter {
final LineWriter _writer;
/// The list of chunks being split.
final List<Chunk> _chunks;
/// The set of soft rules whose values are being selected.
final List<Rule> _rules;
/// The number of characters of additional indentation to apply to each line.
///
/// This is used when formatting blocks to get the output into the right
/// column based on where the block appears.
final int _blockIndentation;
/// The starting column of the first line.
final int _firstLineIndent;
/// The list of solve states to explore further.
///
/// This is sorted lowest-cost first. This ensures that as soon as we find a
/// solution that fits in the page, we know it will be the lowest cost one
/// and can stop looking.
final _workList = new HeapPriorityQueue<SolveState>();
/// The lowest cost solution found so far.
SolveState _bestSolution;
/// Creates a new splitter for [_writer] that tries to fit [chunks] into the
/// page width.
LineSplitter(this._writer, List<Chunk> chunks, int blockIndentation,
int firstLineIndent,
{bool flushLeft: false})
: _chunks = chunks,
// Collect the set of soft rules that we need to select values for.
_rules = chunks
.map((chunk) => chunk.rule)
.where((rule) => rule != null && rule is! HardSplitRule)
.toSet()
.toList(growable: false),
_blockIndentation = blockIndentation,
_firstLineIndent = flushLeft ? 0 : firstLineIndent + blockIndentation {
// Store the rule's index in the rule so we can get from a chunk to a rule
// index quickly.
for (var i = 0; i < _rules.length; i++) {
_rules[i].index = i;
}
}
/// Determine the best way to split the chunks into lines that fit in the
/// page, if possible.
///
/// Returns a [SplitSet] that defines where each split occurs and the
/// indentation of each line.
///
/// [firstLineIndent] is the number of characters of whitespace to prefix the
/// first line of output with.
SplitSet apply() {
// Start with a completely unbound, unsplit solution.
_workList.add(new SolveState(this, new RuleSet(_rules.length)));
var attempts = 0;
while (!_workList.isEmpty) {
var state = _workList.removeFirst();
if (state.isBetterThan(_bestSolution)) {
_bestSolution = state;
// Since we sort solutions by cost the first solution we find that
// fits is the winner.
if (_bestSolution.overflowChars == 0) break;
}
if (debug.traceSplitter) {
var best = state == _bestSolution ? " (best)" : "";
debug.log("$state$best");
debug.dumpLines(_chunks, _firstLineIndent, state.splits);
debug.log();
}
if (attempts++ > _maxAttempts) break;
// Try bumping the rule values for rules whose chunks are on long lines.
state.expand();
}
if (debug.traceSplitter) {
debug.log("$_bestSolution (winner)");
debug.dumpLines(_chunks, _firstLineIndent, _bestSolution.splits);
debug.log();
}
return _bestSolution.splits;
}
}
/// A possibly incomplete solution in the line splitting search space.
///
/// A single [SolveState] binds some subset of the rules to values while
/// leaving the rest unbound. If every rule is bound, the solve state describes
/// a complete solution to the line splitting problem. Even if rules are
/// unbound, a state can also usually be used as a solution by treating all
/// unbound rules as unsplit. (The usually is because a state that constrains
/// an unbound rule to split can't be used with that rule unsplit.)
///
/// From a given solve state, we can explore the search tree to more refined
/// solve states by producing new ones that add more bound rules to the current
/// state.
class SolveState implements Comparable<SolveState> {
final LineSplitter _splitter;
final RuleSet _ruleValues;
/// The unbound rules in this state that can be bound to produce new more
/// refined states.
///
/// Keeping this set small is the key to make the entire line splitter
/// perform well. If we consider too make rules at each state, our
/// exploration of the solution space is too branchy and we waste time on
/// dead end solutions.
///
/// Here is the key insight. The line splitter treats any unbound rule as
/// being unsplit. This means refining a solution always means taking a rule
/// that is unsplit and making it split. That monotonically increases the
/// cost, but may help fit the solution inside the page.
///
/// We want to keep the cost low, so the only reason to consider making a
/// rule split is if it reduces an overflowing line. It's also the case that
/// splitting an earlier rule will often reshuffle the rest of the line.
///
/// Taking that into account, the only rules we consider binding to extend a
/// solve state are *unbound rules inside the first line that is overflowing*.
/// Even if a line has dozens of rules, this generally keeps the branching
/// down to a few. It also means rules inside lines that already fit are
/// never touched.
///
/// There is one other set of rules that go in here. Sometimes a bound rule
/// in the solve state constrains some other unbound rule to split. In that
/// case, we also consider that active so we know to not leave it at zero.
final _liveRules = new Set<Rule>();
/// The set of splits chosen for this state.
SplitSet get splits => _splits;
SplitSet _splits;
/// The number of characters that do not fit inside the page with this set of
/// splits.
int get overflowChars => _overflowChars;
int _overflowChars;
/// Whether we can treat this state as a complete solution by leaving its
/// unbound rules unsplit.
///
/// This is generally true but will be false if the state contains any
/// unbound rules that are constrained to not be zero by other bound rules.
/// This avoids picking a solution that leaves those rules at zero when they
/// aren't allowed to be.
bool _isComplete = true;
SolveState(this._splitter, this._ruleValues) {
_calculateSplits();
_calculateCost();
}
/// Orders this state relative to [other].
///
/// This is the best-first ordering that the [LineSplitter] uses in its
/// worklist. It prefers cheaper states even if they overflow because this
/// ensures it finds the best solution first as soon as it finds one that
/// fits in the page so it can early out.
int compareTo(SolveState other) {
// TODO(rnystrom): It may be worth sorting by the estimated lowest number
// of overflow characters first. That doesn't help in cases where there is
// a solution that fits, but may help in corner cases where there is no
// fitting solution.
if (splits.cost != other.splits.cost) {
return splits.cost.compareTo(other.splits.cost);
}
if (overflowChars != other.overflowChars) {
return overflowChars.compareTo(other.overflowChars);
}
// Distinguish states based on the rule values just so that states with the
// same cost range but different rule values don't get considered identical
// by HeapPriorityQueue.
for (var rule in _splitter._rules) {
var value = _ruleValues.getValue(rule);
var otherValue = other._ruleValues.getValue(rule);
if (value != otherValue) return value.compareTo(otherValue);
}
// If we get here, this state is identical to [other].
return 0;
}
/// Returns `true` if this state is a better solution to use as the final
/// result than [other].
bool isBetterThan(SolveState other) {
// If this state contains an unbound rule that we know can't be left
// unsplit, we can't pick this as a solution.
if (!_isComplete) return false;
// Anything is better than nothing.
if (other == null) return true;
// Prefer the solution that fits the most in the page.
if (overflowChars != other.overflowChars) {
return overflowChars < other.overflowChars;
}
// Otherwise, prefer the best cost.
return splits.cost < other.splits.cost;
}
/// Enqueues more solve states to consider based on this one.
///
/// For each unbound rule in this state that occurred in the first long line,
/// enqueue solve states that bind that rule to each value it can have and
/// bind all previous rules to zero. (In other words, try all subsolutions
/// where that rule becomes the first new rule to split at.)
void expand() {
var unsplitRules = _ruleValues.clone();
// Walk down the rules looking for unbound ones to try.
var triedRules = 0;
for (var rule in _splitter._rules) {
if (_liveRules.contains(rule)) {
// We found one worth trying, so try all of its values.
for (var value = 1; value < rule.numValues; value++) {
var boundRules = unsplitRules.clone();
var mustSplitRules;
var valid = boundRules.tryBind(_splitter._rules, rule, value, (rule) {
if (mustSplitRules == null) mustSplitRules = [];
mustSplitRules.add(rule);
});
// Make sure we don't violate the constraints of the bound rules.
if (!valid) continue;
var state = new SolveState(_splitter, boundRules);
// If some unbound rules are constrained to split, remember that.
if (mustSplitRules != null) {
state._isComplete = false;
state._liveRules.addAll(mustSplitRules);
}
_splitter._workList.add(state);
}
// Stop once we've tried all of the ones we can.
if (++triedRules == _liveRules.length) break;
}
// Fill in previous unbound rules with zero.
if (!_ruleValues.contains(rule)) {
// Pass a dummy callback because zero will never fail. (If it would
// have, that rule would already be bound to some other value.)
if (!unsplitRules.tryBind(_splitter._rules, rule, 0, (_) {})) {
break;
}
}
}
}
/// Calculates the [SplitSet] for this solve state, assuming any unbound
/// rules are set to zero.
void _calculateSplits() {
// Figure out which expression nesting levels got split and need to be
// assigned columns.
var usedNestingLevels = new Set();
for (var i = 0; i < _splitter._chunks.length - 1; i++) {
var chunk = _splitter._chunks[i];
if (chunk.rule.isSplit(_getValue(chunk.rule), chunk)) {
usedNestingLevels.add(chunk.nesting);
chunk.nesting.clearTotalUsedIndent();
}
}
for (var nesting in usedNestingLevels) {
nesting.refreshTotalUsedIndent(usedNestingLevels);
}
_splits = new SplitSet(_splitter._chunks.length);
for (var i = 0; i < _splitter._chunks.length - 1; i++) {
var chunk = _splitter._chunks[i];
if (chunk.rule.isSplit(_getValue(chunk.rule), chunk)) {
var indent = 0;
if (!chunk.flushLeftAfter) {
// Add in the chunk's indent.
indent = _splitter._blockIndentation + chunk.indent;
// And any expression nesting.
indent += chunk.nesting.totalUsedIndent;
}
_splits.add(i, indent);
}
}
}
/// Gets the value to use for [rule], either the bound value or `0` if it
/// isn't bound.
int _getValue(Rule rule) {
if (rule is HardSplitRule) return 0;
return _ruleValues.getValue(rule);
}
/// Evaluates the cost (i.e. the relative "badness") of splitting the line
/// into [lines] physical lines based on the current set of rules.
void _calculateCost() {
assert(_splits != null);
// Calculate the length of each line and apply the cost of any spans that
// get split.
var cost = 0;
_overflowChars = 0;
var length = _splitter._firstLineIndent;
// The unbound rules in use by the current line. This will be null after
// the first long line has completed.
var currentLineRules = [];
endLine(int end) {
// Track lines that went over the length. It is only rules contained in
// long lines that we may want to split.
if (length > _splitter._writer.pageWidth) {
_overflowChars += length - _splitter._writer.pageWidth;
// Only try rules that are in the first long line, since we know at
// least one of them *will* be split.
if (currentLineRules != null && currentLineRules.isNotEmpty) {
_liveRules.addAll(currentLineRules);
currentLineRules = null;
}
} else {
// The line fit, so don't keep track of its rules.
if (currentLineRules != null) {
currentLineRules.clear();
}
}
}
// The set of spans that contain chunks that ended up splitting. We store
// these in a set so a span's cost doesn't get double-counted if more than
// one split occurs in it.
var splitSpans = new Set();
for (var i = 0; i < _splitter._chunks.length; i++) {
var chunk = _splitter._chunks[i];
length += chunk.text.length;
// Ignore the split after the last chunk.
if (i == _splitter._chunks.length - 1) break;
if (_splits.shouldSplitAt(i)) {
endLine(i);
splitSpans.addAll(chunk.spans);
// Include the cost of the nested block.
if (chunk.blockChunks.isNotEmpty) {
cost +=
_splitter._writer.formatBlock(chunk, _splits.getColumn(i)).cost;
}
// Start the new line.
length = _splits.getColumn(i);
} else {
if (chunk.spaceWhenUnsplit) length++;
// Include the nested block inline, if any.
length += chunk.unsplitBlockLength;
// If we might be in the first overly long line, keep track of any
// unbound rules we encounter. These are ones that we'll want to try to
// bind to shorten the long line.
if (currentLineRules != null &&
chunk.rule != null &&
!chunk.isHardSplit &&
!_ruleValues.contains(chunk.rule)) {
currentLineRules.add(chunk.rule);
}
}
}
// Add the costs for the rules that split.
_ruleValues.forEach(_splitter._rules, (rule, value) {
// A rule may be bound to zero if another rule constrains it to not split.
if (value != 0) cost += rule.cost;
});
// Add the costs for the spans containing splits.
for (var span in splitSpans) cost += span.cost;
// Finish the last line.
endLine(_splitter._chunks.length);
_splits.setCost(cost);
}
String toString() {
var buffer = new StringBuffer();
buffer.writeAll(
_splitter._rules.map((rule) {
var valueLength = "${rule.fullySplitValue}".length;
var value = "?";
if (_ruleValues.contains(rule)) {
value = "${_ruleValues.getValue(rule)}";
}
value = value.padLeft(valueLength);
if (_liveRules.contains(rule)) {
value = debug.bold(value);
} else {
value = debug.gray(value);
}
return value;
}),
" ");
buffer.write(" \$${splits.cost}");
if (overflowChars > 0) buffer.write(" (${overflowChars} over)");
if (!_isComplete) buffer.write(" (incomplete)");
if (splits == null) buffer.write(" invalid");
return buffer.toString();
}
}