blob: 3e1926eb6ef0b7723c05c8780b1e62385f328ba7 [file] [log] [blame]
// Copyright (c) 2023, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import 'dart:collection';
import 'package:analyzer/dart/ast/token.dart';
import 'package:analyzer/source/line_info.dart';
import '../comment_type.dart';
/// Functionality used by [AstNodeVisitor], [DelimitedListBuilder], and
/// [SequenceBuilder] to build pieces from the comment tokens between meaningful
/// tokens used by AST nodes.
///
/// Also handles tracking newlines between tokens and comments so that
/// information can be used to preserve discretionary blank lines in places
/// where they are allowed. These are handled along with comments because both
/// comments and whitespace are found between the linear series of [Token]s
/// produced by the analyzer parser. Likewise, both are output as whitespace
/// (in the sense of not being executable code) interleaved with the
/// [Piece]-building code that walks the actual AST and processes the code
/// tokens.
///
/// Comments are a challenge because they confound the intuitive tree-like
/// structure of the code. A comment can appear between any two tokens, and a
/// line comment can force the formatter to insert a newline in places where
/// one wouldn't otherwise make sense. When that happens, the formatter then
/// has to decide how to indent the next line.
///
/// At the same time, comments appearing in idiomatic locations like between
/// statements should be formatted gracefully and give users control over the
/// blank lines around them. To support all of that, comments are handled in a
/// couple of different ways.
///
/// Comments between top-level declarations, member declarations inside types,
/// and statements are handled directly by [SequenceBuilder]. Comments inside
/// argument lists, collection literals, and other similar constructs are
/// handled directly be [DelimitedPieceBuilder].
///
/// All other comments occur inside the middle of some expression or other
/// construct. These get directly embedded in the [TextPiece] of the code being
/// written. When that [TextPiece] is output later, it will include the comments
/// as well.
class CommentWriter {
final LineInfo _lineInfo;
/// The tokens whose preceding comments have already been taken by calls to
/// [takeCommentsBefore()].
final Set<Token> _takenTokens = {};
CommentWriter(this._lineInfo);
/// Returns the comments that appear before [token].
///
/// The caller is required to write them because a later call to write [token]
/// for this token will not write the preceding comments. Used by
/// [SequenceBuilder] and [DelimitedListBuilder] which handle comment
/// formatting themselves.
CommentSequence takeCommentsBefore(Token token) {
if (_takenTokens.contains(token)) return CommentSequence.empty;
_takenTokens.add(token);
return _commentsBefore(token);
}
/// Returns the comments that appear before [token].
CommentSequence commentsBefore(Token token) {
// In the common case where there are no comments before the token, early
// out. This avoids calculating the number of newlines between every pair
// of tokens which is slow and unnecessary.
if (token.precedingComments == null) return CommentSequence.empty;
// Don't yield the comments if some other construct already handled them.
if (_takenTokens.contains(token)) return CommentSequence.empty;
return _commentsBefore(token);
}
/// Takes all of the comment tokens preceding [token] and builds a
/// [CommentSequence] that tracks them and the whitespace between them.
CommentSequence _commentsBefore(Token token) {
var previousLine = _endLine(token.previous!);
var tokenLine = _startLine(token);
// Edge case: The analyzer includes the "\n" in the script tag's lexeme,
// which confuses some of these calculations. We don't want to allow a
// blank line between the script tag and a following comment anyway, so
// just override the script tag's line.
if (token.previous!.type == TokenType.SCRIPT_TAG) previousLine = tokenLine;
// ignore: prefer_const_constructors
var comments = CommentSequence._([], []);
for (Token? comment = token.precedingComments;
comment != null;
comment = comment.next) {
var commentLine = _startLine(comment);
var text = comment.lexeme.trim();
var linesBefore = commentLine - previousLine;
var flushLeft = _startColumn(comment) == 1;
if (text.startsWith('///') && !text.startsWith('////')) {
// Line doc comments are always indented even if they were flush left.
flushLeft = false;
// Always add a blank line (if possible) before a doc comment block.
if (comment == token.precedingComments) linesBefore = 2;
}
CommentType type;
if (text.startsWith('///') && !text.startsWith('////') ||
text.startsWith('/**') && text != '/**/') {
type = CommentType.doc;
} else if (comment.type == TokenType.SINGLE_LINE_COMMENT) {
type = CommentType.line;
} else if (commentLine == previousLine || commentLine == tokenLine) {
type = CommentType.inlineBlock;
} else {
type = CommentType.block;
}
var sourceComment = SourceComment(text, type,
offset: comment.offset, flushLeft: flushLeft);
comments._add(linesBefore, sourceComment);
previousLine = _endLine(comment);
}
comments._setLinesBeforeNextToken(tokenLine - previousLine);
return comments;
}
/// Whether there are any newlines between [from] and [to].
bool hasNewlineBetween(Token from, Token to) =>
_endLine(from) < _startLine(to);
/// Gets the 1-based line number that the beginning of [token] lies on.
int _startLine(Token token) => _lineInfo.getLocation(token.offset).lineNumber;
/// Gets the 1-based line number that the end of [token] lies on.
int _endLine(Token token) => _lineInfo.getLocation(token.end).lineNumber;
/// Gets the 1-based column number that the beginning of [token] lies on.
int _startColumn(Token token) =>
_lineInfo.getLocation(token.offset).columnNumber;
}
/// A comment in the source, with a bit of information about the surrounding
/// whitespace.
class SourceComment {
/// The text of the comment, including `//`, `/*`, and `*/`.
final String text;
final CommentType type;
/// Whether this comment starts at column one in the source.
///
/// Comments that start at the start of the line will not be indented in the
/// output. This way, commented out chunks of code do not get erroneously
/// re-indented.
final bool flushLeft;
/// The number of code points in the original source code preceding the start
/// of this comment.
///
/// Used to track selection markers within the comment.
final int offset;
SourceComment(this.text, this.type,
{required this.flushLeft, required this.offset});
/// Whether this comment ends with a mandatory newline, because it's a line
/// comment or a block comment that should be on its own line.
bool get requiresNewline => type != CommentType.inlineBlock;
@override
String toString() =>
'`$text` ${type.toString().replaceAll('CommentType.', '')}';
}
/// A list of source code comments and the number of newlines between them, as
/// well as the number of newlines before the first comment and after the last
/// comment.
///
/// If there are no comments, this just tracks the number of newlines between
/// a pair of tokens.
///
/// This class is not simply a list of "comment + newline" pairs because we want
/// to know the number of newlines before the first comment and after the last.
/// That means there is always one more newline count that there are comments,
/// including the degenerate case where there are no comments but one newline
/// count.
///
/// For example, this code:
///
/// a /* c1 */
/// /* c2 */
///
/// /* c3 */
///
///
/// b
///
/// Produces a sequence like:
///
/// * 0 newlines between `a` and `/* c1 */`
/// * Comment `/* c1 */`
/// * 1 newline between `/* c1 */` and `/* c2 */`
/// * Comment `/* c2 */`
/// * 2 newlines between `/* c2 */` and `/* c3 */`
/// * Comment `/* c3 */`
/// * 3 newlines between `/* c3 */` and `b`
class CommentSequence extends ListBase<SourceComment> {
static const CommentSequence empty = CommentSequence._([0], []);
/// The number of newlines between a pair of comments or the preceding or
/// following tokens.
///
/// This list is always one element longer than [_comments].
final List<int> _linesBetween;
final List<SourceComment> _comments;
const CommentSequence._(this._linesBetween, this._comments);
/// Whether this sequence contains any comments that require a newline.
bool get requiresNewline =>
_comments.any((comment) => comment.requiresNewline);
/// The number of newlines between the comment at [commentIndex] and the
/// preceding comment or token.
int linesBefore(int commentIndex) => _linesBetween[commentIndex];
/// The number of newlines between the comment at [commentIndex] and the
/// following comment or token.
int linesAfter(int commentIndex) => _linesBetween[commentIndex + 1];
/// Whether the comment at [commentIndex] should be attached to the preceding
/// token.
bool isHanging(int commentIndex) {
// Don't move a comment to a preceding line.
if (linesBefore(commentIndex) != 0) return false;
// Doc comments and non-inline `/* ... */` comments are always pushed to
// the next line. Only inline block comments and line comments are allowed
// to hang at the end of a line.
var type = _comments[commentIndex].type;
return type == CommentType.inlineBlock || type == CommentType.line;
}
/// Whether the comment at [commentIndex] should be attached to the following
/// token.
bool isLeading(int commentIndex) {
// Don't move code on the next line up to the comment.
if (linesAfter(commentIndex) > 0) return false;
// Doc comments and non-inline `/* ... */` comments are always pushed to
// the next line.
return _comments[commentIndex].type == CommentType.inlineBlock;
}
/// The number of newlines between the last comment and the next token.
///
/// If there are no comments, this is the number of lines between the next
/// token and the preceding one.
int get linesBeforeNextToken => _linesBetween.last;
/// Whether there are any blank lines (i.e. more than one newline) between any
/// pair of comments or between the comments and surrounding code.
bool get containsBlank => _linesBetween.any((lines) => lines > 1);
/// The number of comments in the sequence.
@override
int get length => _comments.length;
@override
set length(int newLength) =>
throw UnsupportedError('Comment sequence can\'t be modified.');
/// The comment at [index].
@override
SourceComment operator [](int index) => _comments[index];
@override
void operator []=(int index, SourceComment value) =>
throw UnsupportedError('Comment sequence can\'t be modified.');
void _add(int linesBefore, SourceComment comment) {
_linesBetween.add(linesBefore);
_comments.add(comment);
}
/// Records the number of lines between the end of the last comment and the
/// beginning of the next token.
void _setLinesBeforeNextToken(int linesAfter) {
_linesBetween.add(linesAfter);
}
/// Creates a new sequence that is this sequence followed by [other].
///
/// Sums the trailing newline of the left sequence and the leading newline
/// of the right sequence.
CommentSequence concatenate(CommentSequence other) {
// Don't allocate new sequences if we don't need to.
if (isEmpty) return other;
if (other.isEmpty) return this;
var linesBetween = [
// Include all of the newlines from the left sequence, except the last.
for (var i = 0; i < _linesBetween.length - 1; i++) _linesBetween[i],
// Combine the trailing newline of the left sequence and the leading
// newline of the right sequence.
_linesBetween[_linesBetween.length - 1] + other._linesBetween[0],
// Include the remaining newlines of the right sequence.
for (var i = 1; i < other._linesBetween.length; i++)
other._linesBetween[i]
];
var comments = [..._comments, ...other._comments];
return CommentSequence._(linesBetween, comments);
}
/// Splits this sequence into two subsequences where [index] indicates the
/// number of comments in the first returned sequence and the second
/// sequence gets the rest.
///
/// The newline count right at the split point goes to the first sequence and
/// the second sequence gets an initial newline count of zero. For example,
/// given this input sequence:
///
/// * 4 newlines before `/* a */`
/// * Comment `/* a */`
/// * 5 newlines between `/* a */` and `/* b */`
/// * Comment `/* b */`
/// * 6 newlines between `/* b */` and `/* c */`
/// * Comment `/* c */`
/// * 7 newlines between `/* c */` and `/* d */`
/// * Comment `/* d */`
/// * 8 newlines between `/* d */` and `/* e */`
/// * Comment `/* e */`
/// * 9 newlines after `/* e */`
///
/// Calling `splitAt(2)` yields:
///
/// First sequence:
///
/// * 4 newlines before `/* a */`
/// * Comment `/* a */`
/// * 5 newline between `/* a */` and `/* b */`
/// * Comment `/* b */`
/// * 6 newlines after `/* b */`
///
/// Second sequence:
///
/// * 0 newlines before `/* c */`
/// * Comment `/* c */`
/// * 7 newlines between `/* c */` and `/* d */`
/// * Comment `/* d */`
/// * 8 newlines between `/* d */` and `/* e */`
/// * Comment `/* e */`
/// * 9 newlines after `/* e */`
(CommentSequence, CommentSequence) splitAt(int index) {
// Don't allocate new sequences if we don't have to.
if (index == 0) return (CommentSequence.empty, this);
if (index == length) return (this, CommentSequence.empty);
return (
CommentSequence._(
// +1 to include the newline after the last comment.
_linesBetween.sublist(0, index + 1),
_comments.sublist(0, index)),
CommentSequence._(
// 0 is the synthesized newline count before the first comment.
[0, ..._linesBetween.sublist(index + 1, _linesBetween.length)],
_comments.sublist(index, _comments.length))
);
}
}