blob: 05caa97813d8838c58386c54d58cbf9ad4f7c179 [file] [log] [blame]
// Copyright (c) 2023, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import 'dart:collection';
import 'package:analyzer/dart/ast/token.dart';
import 'package:analyzer/source/line_info.dart';
import '../comment_type.dart';
import 'piece_writer.dart';
/// Functionality used by [AstNodeVisitor] and [SequenceBuilder] to build text
/// and pieces from the comment tokens between meaningful tokens used by AST
/// nodes.
///
/// Also handles tracking newlines between tokens and comments so that
/// information can be used to preserve discretionary blank lines in places
/// where they are allowed. These are handled along with comments because both
/// comments and whitespace are found between the linear series of [Token]s
/// produced by the analyzer parser. Likewise, both are output as whitespace
/// (in the sense of not being executable code) interleaved with the
/// [Piece]-building code that walks the actual AST and processes the code
/// tokens.
///
/// Comments are a challenge because they confound the intuitive tree-like
/// structure of the code. A comment can appear between any two tokens, and a
/// line comment can force the formatter to insert a newline in places where
/// one wouldn't otherwise make sense. When that happens, the formatter then
/// has to decide how to indent the next line.
///
/// At the same time, comments appearing in idiomatic locations like between
/// statements should be formatted gracefully and give users control over the
/// blank lines around them. To support all of that, comments are handled in a
/// couple of different ways.
///
/// Comments between top-level declarations, member declarations inside types,
/// and statements are handled directly by [SequenceBuilder]. Comments inside
/// argument lists, collection literals, and other similar constructs are
/// handled directly be [DelimitedPieceBuilder].
///
/// All other comments occur inside the middle of some expression or other
/// construct. These get directly embedded in the [TextPiece] of the code being
/// written. When that [TextPiece] is output later, it will include the comments
/// as well.
mixin CommentWriter {
PieceWriter get pieces;
LineInfo get lineInfo;
/// The tokens whose preceding comments have already been taken by calls to
/// [takeCommentsBefore()].
final Set<Token> _takenTokens = {};
/// Returns the comments that appear before [token].
///
/// The caller is required to write them because a later call to [token()]
/// for this token will not write the preceding comments.
CommentSequence takeCommentsBefore(Token token) {
if (_takenTokens.contains(token)) return CommentSequence.empty;
_takenTokens.add(token);
return _collectComments(token);
}
/// Writes comments that appear before [token].
void writeCommentsBefore(Token token) {
// In the common case where there are no comments before the token, early
// out. This avoids calculating the number of newlines between every pair
// of tokens which is slow and unnecessary.
if (token.precedingComments == null) return;
// Don't write the comments if some other construct has already handled
// them.
if (_takenTokens.contains(token)) return;
var comments = _collectComments(token);
for (var i = 0; i < comments.length; i++) {
var comment = comments[i];
if (comments.isHanging(i)) {
// Attach the comment to the previous token.
pieces.writeComment(comment, hanging: true);
} else {
pieces.writeNewline();
pieces.writeComment(comment);
}
if (comment.type == CommentType.line || comment.type == CommentType.doc) {
pieces.writeNewline();
}
}
if (comments.isNotEmpty && _needsSpaceAfterComment(token.lexeme)) {
pieces.writeSpace();
}
}
/// Takes all of the comment tokens preceding [token] and builds a
/// [CommentSequence] that tracks them and the whitespace between them.
CommentSequence _collectComments(Token token) {
var previousLine = _endLine(token.previous!);
var tokenLine = _startLine(token);
// Edge case: The analyzer includes the "\n" in the script tag's lexeme,
// which confuses some of these calculations. We don't want to allow a
// blank line between the script tag and a following comment anyway, so
// just override the script tag's line.
if (token.previous!.type == TokenType.SCRIPT_TAG) previousLine = tokenLine;
var comments = CommentSequence._([], []);
for (Token? comment = token.precedingComments;
comment != null;
comment = comment.next) {
var commentLine = _startLine(comment);
var text = comment.lexeme.trim();
var linesBefore = commentLine - previousLine;
var flushLeft = _startColumn(comment) == 1;
if (text.startsWith('///') && !text.startsWith('////')) {
// Line doc comments are always indented even if they were flush left.
flushLeft = false;
// Always add a blank line (if possible) before a doc comment block.
if (comment == token.precedingComments) linesBefore = 2;
}
CommentType type;
if (text.startsWith('///') && !text.startsWith('////') ||
text.startsWith('/**') && text != '/**/') {
type = CommentType.doc;
} else if (comment.type == TokenType.SINGLE_LINE_COMMENT) {
type = CommentType.line;
} else if (commentLine == previousLine || commentLine == tokenLine) {
// TODO(tall): I'm not sure if it makes sense to distinguish block
// comments with newlines around them from other block comments in the
// new Piece representation. Consider merging CommentType.inlineBlock
// and CommentType.block into a single type.
type = CommentType.inlineBlock;
} else {
type = CommentType.block;
}
var sourceComment = SourceComment(text, type,
offset: comment.offset, flushLeft: flushLeft);
comments._add(linesBefore, sourceComment);
previousLine = _endLine(comment);
}
comments._setLinesBeforeNextToken(tokenLine - previousLine);
return comments;
}
/// Returns `true` if a space should be output after the last comment which
/// was just written and the [token] that will be written.
bool _needsSpaceAfterComment(String token) {
// It gets a space if the following token is not a delimiter or the empty
// string (for EOF).
return token != ')' &&
token != ']' &&
token != '}' &&
token != ',' &&
token != ';' &&
token != '';
}
/// Gets the 1-based line number that the beginning of [token] lies on.
int _startLine(Token token) => lineInfo.getLocation(token.offset).lineNumber;
/// Gets the 1-based line number that the end of [token] lies on.
int _endLine(Token token) => lineInfo.getLocation(token.end).lineNumber;
/// Gets the 1-based column number that the beginning of [token] lies on.
int _startColumn(Token token) =>
lineInfo.getLocation(token.offset).columnNumber;
}
/// A comment in the source, with a bit of information about the surrounding
/// whitespace.
class SourceComment {
/// The text of the comment, including `//`, `/*`, and `*/`.
final String text;
final CommentType type;
/// Whether this comment starts at column one in the source.
///
/// Comments that start at the start of the line will not be indented in the
/// output. This way, commented out chunks of code do not get erroneously
/// re-indented.
final bool flushLeft;
/// The number of code points in the original source code preceding the start
/// of this comment.
///
/// Used to track selection markers within the comment.
final int offset;
SourceComment(this.text, this.type,
{required this.flushLeft, required this.offset});
/// Whether this comment contains a mandatory newline, either because it's a
/// comment that should be on its own line or is a multi-line block comment.
bool get containsNewline =>
type != CommentType.inlineBlock || text.contains('\n');
@override
String toString() =>
'`$text` ${type.toString().replaceAll('CommentType.', '')}';
}
/// A list of source code comments and the number of newlines between them, as
/// well as the number of newlines before the first comment and after the last
/// comment.
///
/// If there are no comments, this just tracks the number of newlines between
/// a pair of tokens.
///
/// This class is not simply a list of "comment + newline" pairs because we want
/// to know the number of newlines before the first comment and after the last.
/// That means there is always one more newline count that there are comments,
/// including the degenerate case where there are no comments but one newline
/// count.
///
/// For example, this code:
///
/// ```dart
/// a /* c1 */
/// /* c2 */
///
/// /* c3 */
///
///
/// b
/// ```
///
/// Produces a sequence like:
///
/// * 0 newlines between `a` and `/* c1 */`
/// * Comment `/* c1 */`
/// * 1 newline between `/* c1 */` and `/* c2 */`
/// * Comment `/* c2 */`
/// * 2 newlines between `/* c2 */` and `/* c3 */`
/// * Comment `/* c3 */`
/// * 3 newlines between `/* c3 */` and `b`
class CommentSequence extends ListBase<SourceComment> {
static const CommentSequence empty = CommentSequence._([0], []);
/// The number of newlines between a pair of comments or the preceding or
/// following tokens.
///
/// This list is always one element longer than [_comments].
final List<int> _linesBetween;
final List<SourceComment> _comments;
const CommentSequence._(this._linesBetween, this._comments);
/// The number of newlines between the comment at [commentIndex] and the
/// preceding comment or token.
int linesBefore(int commentIndex) => _linesBetween[commentIndex];
/// The number of newlines between the comment at [commentIndex] and the
/// following comment or token.
int linesAfter(int commentIndex) => _linesBetween[commentIndex + 1];
/// Whether the comment at [commentIndex] should be attached to the preceding
/// token.
bool isHanging(int commentIndex) {
// Don't move a comment to a preceding line.
if (linesBefore(commentIndex) != 0) return false;
// Doc comments and non-inline `/* ... */` comments are always pushed to
// the next line. Only inline block comments and line comments are allowed
// to hang at the end of a line.
var type = _comments[commentIndex].type;
return type == CommentType.inlineBlock || type == CommentType.line;
}
/// Whether the comment at [commentIndex] should be attached to the following
/// token.
bool isLeading(int commentIndex) {
// Don't move code on the next line up to the comment.
if (linesAfter(commentIndex) > 0) return false;
// Doc comments and non-inline `/* ... */` comments are always pushed to
// the next line.
return _comments[commentIndex].type == CommentType.inlineBlock;
}
/// The number of newlines between the last comment and the next token.
///
/// If there are no comments, this is the number of lines between the next
/// token and the preceding one.
int get linesBeforeNextToken => _linesBetween.last;
/// Whether there are any blank lines (i.e. more than one newline) between any
/// pair of comments or between the comments and surrounding code.
bool get containsBlank => _linesBetween.any((lines) => lines > 1);
/// The number of comments in the sequence.
@override
int get length => _comments.length;
@override
set length(int newLength) =>
throw UnsupportedError('Comment sequence can\'t be modified.');
/// The comment at [index].
@override
SourceComment operator [](int index) => _comments[index];
@override
operator []=(int index, SourceComment value) =>
throw UnsupportedError('Comment sequence can\'t be modified.');
void _add(int linesBefore, SourceComment comment) {
_linesBetween.add(linesBefore);
_comments.add(comment);
}
/// Records the number of lines between the end of the last comment and the
/// beginning of the next token.
void _setLinesBeforeNextToken(int linesAfter) {
_linesBetween.add(linesAfter);
}
/// Creates a new sequence that is this sequence followed by [other].
///
/// Sums the trailing newline of the left sequence and the leading newline
/// of the right sequence.
CommentSequence concatenate(CommentSequence other) {
// Don't allocate new sequences if we don't need to.
if (isEmpty) return other;
if (other.isEmpty) return this;
var linesBetween = [
// Include all of the newlines from the left sequence, except the last.
for (var i = 0; i < _linesBetween.length - 1; i++) _linesBetween[i],
// Combine the trailing newline of the left sequence and the leading
// newline of the right sequence.
_linesBetween[_linesBetween.length - 1] + other._linesBetween[0],
// Include the remaining newlines of the right sequence.
for (var i = 1; i < other._linesBetween.length; i++)
other._linesBetween[i]
];
var comments = [..._comments, ...other._comments];
return CommentSequence._(linesBetween, comments);
}
/// Splits this sequence into two subsequences where [index] indicates the
/// number of comments in the first returned sequence and the second
/// sequence gets the rest.
///
/// The newline count right at the split point goes to the first sequence and
/// the second sequence gets an initial newline count of zero. For example,
/// given this input sequence:
///
/// * 4 newlines before `/* a */`
/// * Comment `/* a */`
/// * 5 newlines between `/* a */` and `/* b */`
/// * Comment `/* b */`
/// * 6 newlines between `/* b */` and `/* c */`
/// * Comment `/* c */`
/// * 7 newlines between `/* c */` and `/* d */`
/// * Comment `/* d */`
/// * 8 newlines between `/* d */` and `/* e */`
/// * Comment `/* e */`
/// * 9 newlines after `/* e */`
///
/// Calling `splitAt(2)` yields:
///
/// First sequence:
///
/// * 4 newlines before `/* a */`
/// * Comment `/* a */`
/// * 5 newline between `/* a */` and `/* b */`
/// * Comment `/* b */`
/// * 6 newlines after `/* b */`
///
/// Second sequence:
///
/// * 0 newlines before `/* c */`
/// * Comment `/* c */`
/// * 7 newlines between `/* c */` and `/* d */`
/// * Comment `/* d */`
/// * 8 newlines between `/* d */` and `/* e */`
/// * Comment `/* e */`
/// * 9 newlines after `/* e */`
(CommentSequence, CommentSequence) splitAt(int index) {
// Don't allocate new sequences if we don't have to.
if (index == 0) return (CommentSequence.empty, this);
if (index == length) return (this, CommentSequence.empty);
return (
CommentSequence._(
// +1 to include the newline after the last comment.
_linesBetween.sublist(0, index + 1),
_comments.sublist(0, index)),
CommentSequence._(
// 0 is the synthesized newline count before the first comment.
[0, ..._linesBetween.sublist(index + 1, _linesBetween.length)],
_comments.sublist(index, _comments.length))
);
}
}