blob: 3291e280ed961f421dabec5166ba6213e2180fe5 [file] [log] [blame]
// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
library dart_style.src.line_splitter;
import 'dart:math' as math;
import 'chunk.dart';
import 'debug.dart';
import 'line_prefix.dart';
/// The number of spaces in a single level of indentation.
const spacesPerIndent = 2;
/// The number of indentation levels in a single level of expression nesting.
const indentsPerNest = 2;
/// Cost or indent value used to indication no solution could be found.
const invalidSplits = -1;
// TODO(rnystrom): This needs to be updated to take into account how it works
// now.
/// Takes a series of [Chunk]s and determines the best way to split them into
/// lines of output that fit within the page width (if possible).
/// Trying all possible combinations is exponential in the number of
/// [SplitParam]s and expression nesting levels both of which can be quite large
/// with things like long method chains containing function literals, lots of
/// named parameters, etc. To tame that, this uses dynamic programming. The
/// basic process is:
/// Given a suffix of the entire line, we walk over the tokens keeping track
/// of any splits we find until we fill the page width (or run out of line).
/// If we reached the end of the line without crossing the page width, we're
/// fine and the suffix is good as it is.
/// If we went over, at least one of those splits must be applied to keep the
/// suffix in bounds. For each of those splits, we split at that point and
/// apply the same algorithm for the remainder of the line. We get the results
/// of all of those, choose the one with the lowest cost, and
/// that's the best solution.
/// The fact that this recurses while only removing a small prefix from the
/// line (the chunks before the first split), means this is exponential.
/// Thankfully, though, the best set of splits for a suffix of the line depends
/// only on:
/// - The starting position of the suffix.
/// - The set of expression nesting levels currently being split up to that
/// point.
/// For example, consider the following:
/// outer(inner(argument1, argument2, argument3));
/// If the suffix we are considering is "argument2, ..." then we need to
/// know if we previously split after "outer(", "inner(", or both. The
/// answer determines how much leading indentation "argument2, ..." will
/// have.
/// Thus, whenever we calculate an ideal set of splits for some suffix, we
/// memoize it. When later recursive calls descend to that suffix again, we can
/// reuse it.
class LineSplitter {
/// The string used for newlines.
final String _lineEnding;
/// The number of characters allowed in a single line.
final int _pageWidth;
/// The list of chunks being split.
final List<Chunk> _chunks;
/// The set of spans that wrap around [_chunks].
final List<Span> _spans;
/// The leading indentation at the beginning of the first line.
final int _indent;
/// Memoization table for the best set of splits for the remainder of the
/// line following a given prefix.
final _bestSplits = <LinePrefix, SplitSet>{};
/// Creates a new splitter that tries to fit a series of chunks within a
/// given page width.
LineSplitter(this._lineEnding, this._pageWidth, this._chunks, this._spans,
this._indent) {
/// Convert the line to a [String] representation.
/// It will determine how best to split it into multiple lines of output and
/// return a single string that may contain one or more newline characters.
/// Returns a two-element list. The first element will be an [int] indicating
/// where in [buffer] the selection start point should appear if it was
/// contained in the formatted list of chunks. Otherwise it will be `null`.
/// Likewise, the second element will be non-`null` if the selection endpoint
/// is within the list of chunks.
List<int> apply(StringBuffer buffer) {
if (debugFormatter) dumpLine(_chunks, _indent);
var nestingDepth = _flattenNestingLevels();
// Hack. The formatter doesn't handle formatting very deeply nested code
// well. It can make performance spiral into a pit of sadness. Fortunately,
// we only tend to see expressions pathologically deeply nested in
// generated code that isn't read by humans much anyway. To avoid burning
// too much time on these, harden any splits containing more than a certain
// level of nesting.
// The number here was chosen empirically based on formatting the repo. It
// was picked to get the best performance while affecting the minimum amount
// of results.
// TODO(rnystrom): Do something smarter.
if (nestingDepth > 9) {
for (var chunk in _chunks) {
if (chunk.param != null && nestingDepth - chunk.nesting > 9) {
var splits = _findBestSplits(new LinePrefix());
var selection = [null, null];
// Write each chunk and the split after it.
buffer.write(" " * (_indent * spacesPerIndent));
for (var i = 0; i < _chunks.length; i++) {
var chunk = _chunks[i];
// If this chunk contains one of the selection markers, tell the writer
// where it ended up in the final output.
if (chunk.selectionStart != null) {
selection[0] = buffer.length + chunk.selectionStart;
if (chunk.selectionEnd != null) {
selection[1] = buffer.length + chunk.selectionEnd;
if (i == _chunks.length - 1) {
// Don't write trailing whitespace after the last chunk.
} else if (splits.shouldSplitAt(i)) {
if (chunk.isDouble) buffer.write(_lineEnding);
var indent = chunk.indent + splits.getNesting(i);
buffer.write(" " * (indent * spacesPerIndent));
// Should have a valid set of splits when we get here.
assert(indent != invalidSplits);
} else {
if (chunk.spaceWhenUnsplit) buffer.write(" ");
return selection;
/// Removes any unused nesting levels from the chunks.
/// The line splitter considers every possible combination of mapping
/// indentation to nesting levels when trying to find the best solution. For
/// example, it may assign 4 spaces of indentation to level 1, 8 spaces to
/// level 3, etc.
/// It's fairly common for a nesting level to not actually appear at the
/// boundary of a chunk. The source visitor may enter more than one level of
/// nesting at a point where a split cannot happen. In that case, there's no
/// point in trying to assign an indentation level to that nesting level. It
/// will never be used because no line will begin at that level of
/// indentation.
/// Worse, if the splitter *does* consider these levels, it can dramatically
/// increase solving time. To avoid that, this renumbers all of the nesting
/// levels in the chunks to not have any of these unused gaps.
/// Returns the number of distinct nesting levels remaining after flattening.
/// This may be zero if the chunks have no nesting (i.e. just statement-level
/// indentation).
int _flattenNestingLevels() {
var nestingLevels = _chunks
.map((chunk) => chunk.nesting)
.where((nesting) => nesting != -1)
var nestingMap = {-1: -1};
for (var i = 0; i < nestingLevels.length; i++) {
nestingMap[nestingLevels[i]] = i;
for (var chunk in _chunks) {
chunk.nesting = nestingMap[chunk.nesting];
return nestingLevels.length;
/// Finds the best set of splits to apply to the remainder of the line
/// following [prefix].
SplitSet _findBestSplits(LinePrefix prefix) {
// Use the memoized result if we have it.
if (_bestSplits.containsKey(prefix)) return _bestSplits[prefix];
var indent = prefix.getNextLineIndent(_chunks, _indent);
var bestSplits;
var lowestCost;
// If there are no required splits, consider not splitting any of the soft
// splits (if there are any) as one possible solution.
if (!_suffixContainsHardSplits(prefix)) {
var splits = new SplitSet();
var cost = _evaluateCost(prefix, indent, splits);
if (cost != invalidSplits) {
bestSplits = splits;
lowestCost = cost;
// If we fit the whole suffix without any splitting, that's going to be
// the best solution, so don't bother trying any others.
if (cost < Cost.overflowChar) {
_bestSplits[prefix] = bestSplits;
return bestSplits;
// For each split in the suffix, calculate the best cost where that is the
// first split applied. This recurses so that for each split, we consider
// all of the possible sets of splits *after* it and determine the best
// cost subset out of all of those.
var skippedParams = new Set();
var length = indent * spacesPerIndent;
// Don't consider the last chunk, since there's no point in splitting on it.
for (var i = prefix.length; i < _chunks.length - 1; i++) {
var split = _chunks[i];
// We must skip over this chunk if it cannot be split.
if (_canSplit(prefix, split, skippedParams)) {
var splitParams = _getSplitParams(prefix, i, split);
// Find all the params we did *not* split in the prefix that appear in
// the suffix so we can ensure they aren't split there either.
var unsplitParams = prefix.unsplitParams.toSet();
for (var param in skippedParams) {
if (_suffixContainsParam(i, param)) unsplitParams.add(param);
// Create new prefixes that go all the way up to the split. There can be
// multiple solutions here since there are different ways to handle a
// jump in nesting depth.
var longerPrefixes = prefix.expand(
_chunks, unsplitParams, splitParams, i + 1);
for (var longerPrefix in longerPrefixes) {
// Given the nesting stack for this split, see what we can do with the
// rest of the line.
var remaining = _findBestSplits(longerPrefix);
// If it wasn't possible to split the suffix given this nesting stack,
// skip it.
if (remaining == null) continue;
var splits = remaining.add(i, longerPrefix.nestingIndent);
var cost = _evaluateCost(prefix, indent, splits);
// If the suffix is invalid (because of a mis-matching multisplit),
// skip it.
if (cost == invalidSplits) continue;
if (lowestCost == null ||
cost < lowestCost ||
(cost == lowestCost && splits.weight > bestSplits.weight)) {
lowestCost = cost;
bestSplits = splits;
// If we go past the end of the page and we've already found a solution
// that fits, then no other solution that involves overflowing will beat
// that, so stop.
length += split.text.length;
if (split.spaceWhenUnsplit) length++;
if (length > _pageWidth &&
lowestCost != null &&
lowestCost < Cost.overflowChar) {
// If we can't leave this split unsplit (because it's hard or has a
// param that the prefix already forced to split), then stop.
if (split.isHardSplit) break;
if (prefix.splitParams.contains(split.param)) break;
_bestSplits[prefix] = bestSplits;
return bestSplits;
/// Gets whether the splitter can split [chunk] given [prefix] and
/// [skippedParams] which come before it.
/// This returns `false` if the prefix or skipped params imply that this
/// chunk's param must also not be applied.
bool _canSplit(LinePrefix prefix, Chunk chunk,
Set<SplitParam> skippedParams) {
// Can always split on a hard split.
if (chunk.param == null) return true;
// If we didn't split the param in the prefix, we can't split on the same
// param in the suffix.
if (prefix.unsplitParams.contains(chunk.param)) return false;
// If we already skipped over the chunk's param,
// have to skip over it on this chunk too.
if (skippedParams.contains(chunk.param)) return false;
isParamSkipped(param) {
if (skippedParams.contains(param)) return false;
// If any param implied by this one is skipped, then splitting on the
// starting param would imply it should be split, which violates that,
// so don't allow the root one to be split.
for (var implied in param.implies) {
if (!isParamSkipped(implied)) return false;
return true;
return isParamSkipped(chunk.param);
/// Get the set of params we have forced to split in [prefix] (including
/// [split] which is also forced to split) that also appear in the suffix.
/// We rebuild the set from scratch so that splits that no longer appear in
/// the shorter suffix are discarded. This helps keep the set small in the
/// prefix, which maximizes the memoization hits.
Set<SplitParam> _getSplitParams(LinePrefix prefix, int index, Chunk split) {
var splitParams = new Set();
addParam(param) {
if (_suffixContainsParam(index, param)) splitParams.add(param);
// Recurse into the params that are implied by this one.
// Consider this split too.
if (split.param != null) addParam(split.param);
return splitParams;
/// Gets whether the suffix after [prefix] contains any mandatory splits.
/// This includes both hard splits and splits that depend on params that were
/// set in the prefix.
bool _suffixContainsHardSplits(LinePrefix prefix) {
for (var i = prefix.length; i < _chunks.length - 1; i++) {
if (_chunks[i].isHardSplit || (_chunks[i].isSoftSplit &&
prefix.splitParams.contains(_chunks[i].param))) {
return true;
return false;
/// Gets whether the suffix of the line after index [split] contains a soft
/// split using [param].
bool _suffixContainsParam(int split, SplitParam param) {
if (param == null) return false;
// TODO(rnystrom): Consider caching the set of params that appear at every
// suffix.
for (var i = split + 1; i < _chunks.length; i++) {
if (_chunks[i].isSoftSplit && _chunks[i].param == param) {
return true;
return false;
/// Evaluates the cost (i.e. the relative "badness") of splitting the line
/// into [lines] physical lines based on the current set of params.
int _evaluateCost(LinePrefix prefix, int indent, SplitSet splits) {
assert(splits != null);
// Calculate the length of each line and apply the cost of any spans that
// get split.
var cost = 0;
var length = indent * spacesPerIndent;
var params = new Set();
var splitIndexes = [];
endLine() {
// Punish lines that went over the length. We don't rule these out
// completely because it may be that the only solution still goes over
// (for example with long string literals).
if (length > _pageWidth) {
cost += (length - _pageWidth) * Cost.overflowChar;
for (var i = prefix.length; i < _chunks.length; i++) {
var chunk = _chunks[i];
length += chunk.text.length;
if (i < _chunks.length - 1) {
if (splits.shouldSplitAt(i)) {
if (chunk.param != null && !params.contains(chunk.param)) {
// Don't double-count params if multiple splits share the same
// param.
// TODO(rnystrom): Is this needed? Can we actually let splits that
// share a param accumulate cost?
cost += chunk.param.cost;
// Start the new line.
length = (chunk.indent + splits.getNesting(i)) * spacesPerIndent;
} else {
if (chunk.spaceWhenUnsplit) length++;
// See which spans got split. We avoid iterators here for performance.
for (var i = 0; i < _spans.length; i++) {
var span = _spans[i];
for (var j = 0; j < splitIndexes.length; j++) {
var index = splitIndexes[j];
// If the split is contained within a span (and is not the tail end of
// it), the span got split.
if (index >= span.start && index < span.end) {
cost += span.cost;
// Finish the last line.
return cost;
/// An immutable, persistent set of enabled soft split [Chunk]s.
/// For each chunk, this tracks if it has been split and, if so, what the
/// chosen level of expression nesting is for the following line.
/// Internally, this uses a sparse parallel list where each element corresponds
/// to the nesting level of the chunk at that index in the chunk list, or `null`
/// if there is no active split there. This had about a 10% perf improvement
/// over using a [Set] of splits or a persistent linked list of split
/// index/nesting pairs.
class SplitSet {
List<int> _splitNesting;
/// Creates a new empty split set.
SplitSet() : this._(const []);
/// Returns a new [SplitSet] containing the union of this one and the split
/// at [splitIndex] with [nestingIndent].
SplitSet add(int splitIndex, int nestingIndent) {
var newNesting = new List(math.max(splitIndex + 1, _splitNesting.length));
newNesting.setAll(0, _splitNesting);
newNesting[splitIndex] = nestingIndent;
return new SplitSet._(newNesting);
/// Returns `true` if the chunk at [splitIndex] should be split.
bool shouldSplitAt(int splitIndex) =>
splitIndex < _splitNesting.length && _splitNesting[splitIndex] != null;
/// Gets the nesting level of the split chunk at [splitIndex].
int getNesting(int splitIndex) => _splitNesting[splitIndex];
/// Determines the "weight" of the set.
/// This is the sum of the positions where splits occur. Having more splits
/// increases weight but, more importantly, having a split closer to the end
/// increases its weight.
/// This is used to break a tie when two [SplitSets] have the same cost. When
/// that occurs, we prefer splits later in the line since that keeps most
/// code towards the top lines. This occurs frequently in argument lists.
/// Since every argument split has the same cost, a long argument list can be
/// split in two a number of equal-cost ways. The weight is used to select
/// the one that puts the most arguments on the first line(s).
int get weight {
var result = 0;
for (var i = 0; i < _splitNesting.length; i++) {
if (_splitNesting[i] != null) result += i;
return result;
String toString() {
var result = [];
for (var i = 0; i < _splitNesting.length; i++) {
if (_splitNesting[i] != null) {
return result.join(" ");