lib/src/line_splitting/line_splitter.dart - dart_style - Git at Google

 // Copyright (c) 2015, the Dart project authors.  Please see the AUTHORS file
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.

 library dart_style.src.line_splitting.line_splitter;

 import '../chunk.dart';
 import '../debug.dart' as debug;
 import '../line_writer.dart';
 import '../rule/rule.dart';
 import 'rule_set.dart';
 import 'solve_state.dart';
 import 'solve_state_queue.dart';

 /// To ensure the solver doesn't go totally pathological on giant code, we cap
 /// it at a fixed number of attempts.
 ///
 /// If the optimal solution isn't found after this many tries, it just uses the
 /// best it found so far.
 const _maxAttempts = 5000;

 /// Takes a set of chunks and determines the best values for its rules in order
 /// to fit it inside the page boundary.
 ///
 /// This problem is exponential in the number of rules and a single expression
 /// in Dart can be quite large, so it isn't feasible to brute force this. For
 /// example:
 ///
 ///     outer(
 ///         fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
 ///         fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
 ///         fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
 ///         fn(1 + 2, 3 + 4, 5 + 6, 7 + 8));
 ///
 /// There are 509,607,936 ways this can be split.
 ///
 /// The problem is even harder because we may not be able to easily tell if a
 /// given solution is the best one. It's possible that there is *no* solution
 /// that fits in the page (due to long strings or identifiers) so the winning
 /// solution may still have overflow characters. This makes it hard to know
 /// when we are done and can stop looking.
 ///
 /// There are a couple of pieces of domain knowledge we use to cope with this:
 ///
 /// - Changing a rule from unsplit to split will never lower its cost. A
 ///   solution with all rules unsplit will always be the one with the lowest
 ///   cost (zero). Conversely, setting all of its rules to the maximum split
 ///   value will always have the highest cost.
 ///
 ///   (You might think there is a converse rule about overflow characters. The
 ///   solution with the fewest splits will have the most overflow, and the
 ///   solution with the most splits will have the least overflow. Alas, because
 ///   of indentation, that isn't always the case. Adding a split may *increase*
 ///   overflow in some cases.)
 ///
 /// - If all of the chunks for a rule are inside lines that already fit in the
 ///   page, then splitting that rule will never improve the solution.
 ///
 /// - If two partial solutions have the same cost and the bound rules don't
 ///   affect any of the remaining unbound rules, then whichever partial
 ///   solution is currently better will always be the winner regardless of what
 ///   the remaining unbound rules are bound to.
 ///
 /// We start off with a [SolveState] where all rules are unbound (which
 /// implicitly treats them as unsplit). For a given solve state, we can produce
 /// a set of expanded states that takes some of the rules in the first long
 /// line and bind them to split values. This always produces new solve states
 /// with higher cost (but often fewer overflow characters) than the parent
 /// state.
 ///
 /// We take these expanded states and add them to a work list sorted by cost.
 /// Since unsplit rules always have lower cost solutions, we know that no state
 /// we enqueue later will ever have a lower cost than the ones we already have
 /// enqueued.
 ///
 /// Then we keep pulling states off the work list and expanding them and adding
 /// the results back into the list. We do this until we hit a solution where
 /// all characters fit in the page. The first one we find will have the lowest
 /// cost and we're done.
 ///
 /// We also keep running track of the best solution we've found so far that
 /// has the fewest overflow characters and the lowest cost. If no solution fits,
 /// we'll use this one.
 ///
 /// When enqueing a solution, we can sometimes collapse it and a previously
 /// queued one by preferring one or the other. If two solutions have the same
 /// cost and we can prove that they won't diverge later as unbound rules are
 /// set, we can pick the winner now and discard the other. This lets us avoid
 /// redundantly exploring entire subtrees of the solution space.
 ///
 /// As a final escape hatch for pathologically nasty code, after trying some
 /// fixed maximum number of solve states, we just bail and return the best
 /// solution found so far.
 ///
 /// Even with the above algorithmic optimizations, complex code may still
 /// require a lot of exploring to find an optimal solution. To make that fast,
 /// this code is carefully profiled and optimized. If you modify this, make
 /// sure to test against the benchmark to ensure you don't regress performance.
 class LineSplitter {
   final LineWriter writer;

   /// The list of chunks being split.
   final List<Chunk> chunks;

   /// The set of soft rules whose values are being selected.
   final List<Rule> rules;

   /// The number of characters of additional indentation to apply to each line.
   ///
   /// This is used when formatting blocks to get the output into the right
   /// column based on where the block appears.
   final int blockIndentation;

   /// The starting column of the first line.
   final int firstLineIndent;

   /// The queue of solve states to explore further.
   ///
   /// This is sorted lowest-cost first. This ensures that as soon as we find a
   /// solution that fits in the page, we know it will be the lowest cost one
   /// and can stop looking.
   final _queue = new SolveStateQueue();

   /// The lowest cost solution found so far.
   SolveState _bestSolution;

   /// Creates a new splitter for [_writer] that tries to fit [chunks] into the
   /// page width.
   LineSplitter(this.writer, List<Chunk> chunks, int blockIndentation,
       int firstLineIndent,
       {bool flushLeft: false})
       : chunks = chunks,
         // Collect the set of rules that we need to select values for.
         rules = chunks
             .map((chunk) => chunk.rule)
             .where((rule) => rule != null)
             .toSet()
             .toList(growable: false),
         blockIndentation = blockIndentation,
         firstLineIndent = flushLeft ? 0 : firstLineIndent + blockIndentation {
     _queue.bindSplitter(this);

     // Store the rule's index in the rule so we can get from a chunk to a rule
     // index quickly.
     for (var i = 0; i < rules.length; i++) {
       rules[i].index = i;
     }

     // Now that every used rule has an index, tell the rules to discard any
     // constraints on unindexed rules.
     for (var rule in rules) {
       rule.forgetUnusedRules();
     }
   }

   /// Determine the best way to split the chunks into lines that fit in the
   /// page, if possible.
   ///
   /// Returns a [SplitSet] that defines where each split occurs and the
   /// indentation of each line.
   ///
   /// [firstLineIndent] is the number of characters of whitespace to prefix the
   /// first line of output with.
   SplitSet apply() {
     // Start with a completely unbound, unsplit solution.
     _queue.add(new SolveState(this, new RuleSet(rules.length)));

     var attempts = 0;
     while (_queue.isNotEmpty) {
       var state = _queue.removeFirst();

       if (state.isBetterThan(_bestSolution)) {
         _bestSolution = state;

         // Since we sort solutions by cost the first solution we find that
         // fits is the winner.
         if (_bestSolution.overflowChars == 0) break;
       }

       if (debug.traceSplitter) {
         var best = state == _bestSolution ? " (best)" : "";
         debug.log("$state$best");
         debug.dumpLines(chunks, firstLineIndent, state.splits);
         debug.log();
       }

       if (attempts++ > _maxAttempts) break;

       // Try bumping the rule values for rules whose chunks are on long lines.
       state.expand();
     }

     if (debug.traceSplitter) {
       debug.log("$_bestSolution (winner)");
       debug.dumpLines(chunks, firstLineIndent, _bestSolution.splits);
       debug.log();
     }

     return _bestSolution.splits;
   }

   void enqueue(SolveState state) {
     _queue.add(state);
   }
 }
	// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
	// for details. All rights reserved. Use of this source code is governed by a
	// BSD-style license that can be found in the LICENSE file.

	library dart_style.src.line_splitting.line_splitter;

	import '../chunk.dart';
	import '../debug.dart' as debug;
	import '../line_writer.dart';
	import '../rule/rule.dart';
	import 'rule_set.dart';
	import 'solve_state.dart';
	import 'solve_state_queue.dart';

	/// To ensure the solver doesn't go totally pathological on giant code, we cap
	/// it at a fixed number of attempts.
	///
	/// If the optimal solution isn't found after this many tries, it just uses the
	/// best it found so far.
	const _maxAttempts = 5000;

	/// Takes a set of chunks and determines the best values for its rules in order
	/// to fit it inside the page boundary.
	///
	/// This problem is exponential in the number of rules and a single expression
	/// in Dart can be quite large, so it isn't feasible to brute force this. For
	/// example:
	///
	/// outer(
	/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
	/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
	/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8),
	/// fn(1 + 2, 3 + 4, 5 + 6, 7 + 8));
	///
	/// There are 509,607,936 ways this can be split.
	///
	/// The problem is even harder because we may not be able to easily tell if a
	/// given solution is the best one. It's possible that there is no solution
	/// that fits in the page (due to long strings or identifiers) so the winning
	/// solution may still have overflow characters. This makes it hard to know
	/// when we are done and can stop looking.
	///
	/// There are a couple of pieces of domain knowledge we use to cope with this:
	///
	/// - Changing a rule from unsplit to split will never lower its cost. A
	/// solution with all rules unsplit will always be the one with the lowest
	/// cost (zero). Conversely, setting all of its rules to the maximum split
	/// value will always have the highest cost.
	///
	/// (You might think there is a converse rule about overflow characters. The
	/// solution with the fewest splits will have the most overflow, and the
	/// solution with the most splits will have the least overflow. Alas, because
	/// of indentation, that isn't always the case. Adding a split may increase
	/// overflow in some cases.)
	///
	/// - If all of the chunks for a rule are inside lines that already fit in the
	/// page, then splitting that rule will never improve the solution.
	///
	/// - If two partial solutions have the same cost and the bound rules don't
	/// affect any of the remaining unbound rules, then whichever partial
	/// solution is currently better will always be the winner regardless of what
	/// the remaining unbound rules are bound to.
	///
	/// We start off with a [SolveState] where all rules are unbound (which
	/// implicitly treats them as unsplit). For a given solve state, we can produce
	/// a set of expanded states that takes some of the rules in the first long
	/// line and bind them to split values. This always produces new solve states
	/// with higher cost (but often fewer overflow characters) than the parent
	/// state.
	///
	/// We take these expanded states and add them to a work list sorted by cost.
	/// Since unsplit rules always have lower cost solutions, we know that no state
	/// we enqueue later will ever have a lower cost than the ones we already have
	/// enqueued.
	///
	/// Then we keep pulling states off the work list and expanding them and adding
	/// the results back into the list. We do this until we hit a solution where
	/// all characters fit in the page. The first one we find will have the lowest
	/// cost and we're done.
	///
	/// We also keep running track of the best solution we've found so far that
	/// has the fewest overflow characters and the lowest cost. If no solution fits,
	/// we'll use this one.
	///
	/// When enqueing a solution, we can sometimes collapse it and a previously
	/// queued one by preferring one or the other. If two solutions have the same
	/// cost and we can prove that they won't diverge later as unbound rules are
	/// set, we can pick the winner now and discard the other. This lets us avoid
	/// redundantly exploring entire subtrees of the solution space.
	///
	/// As a final escape hatch for pathologically nasty code, after trying some
	/// fixed maximum number of solve states, we just bail and return the best
	/// solution found so far.
	///
	/// Even with the above algorithmic optimizations, complex code may still
	/// require a lot of exploring to find an optimal solution. To make that fast,
	/// this code is carefully profiled and optimized. If you modify this, make
	/// sure to test against the benchmark to ensure you don't regress performance.
	class LineSplitter {
	final LineWriter writer;

	/// The list of chunks being split.
	final List<Chunk> chunks;

	/// The set of soft rules whose values are being selected.
	final List<Rule> rules;

	/// The number of characters of additional indentation to apply to each line.
	///
	/// This is used when formatting blocks to get the output into the right
	/// column based on where the block appears.
	final int blockIndentation;

	/// The starting column of the first line.
	final int firstLineIndent;

	/// The queue of solve states to explore further.
	///
	/// This is sorted lowest-cost first. This ensures that as soon as we find a
	/// solution that fits in the page, we know it will be the lowest cost one
	/// and can stop looking.
	final _queue = new SolveStateQueue();

	/// The lowest cost solution found so far.
	SolveState _bestSolution;

	/// Creates a new splitter for [_writer] that tries to fit [chunks] into the
	/// page width.
	LineSplitter(this.writer, List<Chunk> chunks, int blockIndentation,
	int firstLineIndent,
	{bool flushLeft: false})
	: chunks = chunks,
	// Collect the set of rules that we need to select values for.
	rules = chunks
	.map((chunk) => chunk.rule)
	.where((rule) => rule != null)
	.toSet()
	.toList(growable: false),
	blockIndentation = blockIndentation,
	firstLineIndent = flushLeft ? 0 : firstLineIndent + blockIndentation {
	_queue.bindSplitter(this);

	// Store the rule's index in the rule so we can get from a chunk to a rule
	// index quickly.
	for (var i = 0; i < rules.length; i++) {
	rules[i].index = i;
	}

	// Now that every used rule has an index, tell the rules to discard any
	// constraints on unindexed rules.
	for (var rule in rules) {
	rule.forgetUnusedRules();
	}
	}

	/// Determine the best way to split the chunks into lines that fit in the
	/// page, if possible.
	///
	/// Returns a [SplitSet] that defines where each split occurs and the
	/// indentation of each line.
	///
	/// [firstLineIndent] is the number of characters of whitespace to prefix the
	/// first line of output with.
	SplitSet apply() {
	// Start with a completely unbound, unsplit solution.
	_queue.add(new SolveState(this, new RuleSet(rules.length)));

	var attempts = 0;
	while (_queue.isNotEmpty) {
	var state = _queue.removeFirst();

	if (state.isBetterThan(_bestSolution)) {
	_bestSolution = state;

	// Since we sort solutions by cost the first solution we find that
	// fits is the winner.
	if (_bestSolution.overflowChars == 0) break;
	}

	if (debug.traceSplitter) {
	var best = state == _bestSolution ? " (best)" : "";
	debug.log("$state$best");
	debug.dumpLines(chunks, firstLineIndent, state.splits);
	debug.log();
	}

	if (attempts++ > _maxAttempts) break;

	// Try bumping the rule values for rules whose chunks are on long lines.
	state.expand();
	}

	if (debug.traceSplitter) {
	debug.log("$_bestSolution (winner)");
	debug.dumpLines(chunks, firstLineIndent, _bestSolution.splits);
	debug.log();
	}

	return _bestSolution.splits;
	}

	void enqueue(SolveState state) {
	_queue.add(state);
	}
	}