blob: 420c509fe381fe06e9ebdb49ab09a2440cf80022 [file] [log] [blame]
// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
library dart_style.src.line_splitting.line_splitter;
import '../chunk.dart';
import '../debug.dart' as debug;
import '../line_writer.dart';
import '../rule/rule.dart';
import 'rule_set.dart';
import 'solve_state.dart';
import 'solve_state_queue.dart';
/// To ensure the solver doesn't go totally pathological on giant code, we cap
/// it at a fixed number of attempts.
///
/// If the optimal solution isn't found after this many tries, it just uses the
/// best it found so far.
const _maxAttempts = 5000;
/// Takes a set of chunks and determines the best values for its rules in order
/// to fit it inside the page boundary.
///
/// This problem is exponential in the number of rules and a single expression
/// in Dart can be quite large, so it isn't feasible to brute force this. For
/// example:
///
/// outer(
/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8));
///
/// There are 509,607,936 ways this can be split.
///
/// The problem is even harder because we may not be able to easily tell if a
/// given solution is the best one. It's possible that there is *no* solution
/// that fits in the page (due to long strings or identifiers) so the winning
/// solution may still have overflow characters. This makes it hard to know
/// when we are done and can stop looking.
///
/// There are a couple of pieces of domain knowledge we use to cope with this:
///
/// - Changing a rule from unsplit to split will never lower its cost. A
/// solution with all rules unsplit will always be the one with the lowest
/// cost (zero). Conversely, setting all of its rules to the maximum split
/// value will always have the highest cost.
///
/// (You might think there is a converse rule about overflow characters. The
/// solution with the fewest splits will have the most overflow, and the
/// solution with the most splits will have the least overflow. Alas, because
/// of indentation, that isn't always the case. Adding a split may *increase*
/// overflow in some cases.)
///
/// - If all of the chunks for a rule are inside lines that already fit in the
/// page, then splitting that rule will never improve the solution.
///
/// - If two partial solutions have the same cost and the bound rules don't
/// affect any of the remaining unbound rules, then whichever partial
/// solution is currently better will always be the winner regardless of what
/// the remaining unbound rules are bound to.
///
/// We start off with a [SolveState] where all rules are unbound (which
/// implicitly treats them as unsplit). For a given solve state, we can produce
/// a set of expanded states that takes some of the rules in the first long
/// line and bind them to split values. This always produces new solve states
/// with higher cost (but often fewer overflow characters) than the parent
/// state.
///
/// We take these expanded states and add them to a work list sorted by cost.
/// Since unsplit rules always have lower cost solutions, we know that no state
/// we enqueue later will ever have a lower cost than the ones we already have
/// enqueued.
///
/// Then we keep pulling states off the work list and expanding them and adding
/// the results back into the list. We do this until we hit a solution where
/// all characters fit in the page. The first one we find will have the lowest
/// cost and we're done.
///
/// We also keep running track of the best solution we've found so far that
/// has the fewest overflow characters and the lowest cost. If no solution fits,
/// we'll use this one.
///
/// When enqueing a solution, we can sometimes collapse it and a previously
/// queued one by preferring one or the other. If two solutions have the same
/// cost and we can prove that they won't diverge later as unbound rules are
/// set, we can pick the winner now and discard the other. This lets us avoid
/// redundantly exploring entire subtrees of the solution space.
///
/// As a final escape hatch for pathologically nasty code, after trying some
/// fixed maximum number of solve states, we just bail and return the best
/// solution found so far.
///
/// Even with the above algorithmic optimizations, complex code may still
/// require a lot of exploring to find an optimal solution. To make that fast,
/// this code is carefully profiled and optimized. If you modify this, make
/// sure to test against the benchmark to ensure you don't regress performance.
class LineSplitter {
final LineWriter writer;
/// The list of chunks being split.
final List<Chunk> chunks;
/// The set of soft rules whose values are being selected.
final List<Rule> rules;
/// The number of characters of additional indentation to apply to each line.
///
/// This is used when formatting blocks to get the output into the right
/// column based on where the block appears.
final int blockIndentation;
/// The starting column of the first line.
final int firstLineIndent;
/// The queue of solve states to explore further.
///
/// This is sorted lowest-cost first. This ensures that as soon as we find a
/// solution that fits in the page, we know it will be the lowest cost one
/// and can stop looking.
final _queue = new SolveStateQueue();
/// The lowest cost solution found so far.
SolveState _bestSolution;
/// Creates a new splitter for [_writer] that tries to fit [chunks] into the
/// page width.
LineSplitter(this.writer, List<Chunk> chunks, int blockIndentation,
int firstLineIndent,
{bool flushLeft: false})
: chunks = chunks,
// Collect the set of rules that we need to select values for.
rules = chunks
.map((chunk) => chunk.rule)
.where((rule) => rule != null)
.toSet()
.toList(growable: false),
blockIndentation = blockIndentation,
firstLineIndent = flushLeft ? 0 : firstLineIndent + blockIndentation {
_queue.bindSplitter(this);
// Store the rule's index in the rule so we can get from a chunk to a rule
// index quickly.
for (var i = 0; i < rules.length; i++) {
rules[i].index = i;
}
// Now that every used rule has an index, tell the rules to discard any
// constraints on unindexed rules.
for (var rule in rules) {
rule.forgetUnusedRules();
}
}
/// Determine the best way to split the chunks into lines that fit in the
/// page, if possible.
///
/// Returns a [SplitSet] that defines where each split occurs and the
/// indentation of each line.
///
/// [firstLineIndent] is the number of characters of whitespace to prefix the
/// first line of output with.
SplitSet apply() {
// Start with a completely unbound, unsplit solution.
_queue.add(new SolveState(this, new RuleSet(rules.length)));
var attempts = 0;
while (_queue.isNotEmpty) {
var state = _queue.removeFirst();
if (state.isBetterThan(_bestSolution)) {
_bestSolution = state;
// Since we sort solutions by cost the first solution we find that
// fits is the winner.
if (_bestSolution.overflowChars == 0) break;
}
if (debug.traceSplitter) {
var best = state == _bestSolution ? " (best)" : "";
debug.log("$state$best");
debug.dumpLines(chunks, firstLineIndent, state.splits);
debug.log();
}
if (attempts++ > _maxAttempts) break;
// Try bumping the rule values for rules whose chunks are on long lines.
state.expand();
}
if (debug.traceSplitter) {
debug.log("$_bestSolution (winner)");
debug.dumpLines(chunks, firstLineIndent, _bestSolution.splits);
debug.log();
}
return _bestSolution.splits;
}
void enqueue(SolveState state) {
_queue.add(state);
}
}