Handle Unicode whitespace more gracefully.
* Don't crash when trailing non-ASCII whitespace is trimmed.
* Add tests for trailing whitespace trimming.
* Add tests for other Unicode characters.
Fix #901.
diff --git a/lib/src/string_compare.dart b/lib/src/string_compare.dart
index 4306cb8..30af46e 100644
--- a/lib/src/string_compare.dart
+++ b/lib/src/string_compare.dart
@@ -1,8 +1,24 @@
-library dart_style.src.string_compare;
-
/// Returns `true` if [c] represents a whitespace code unit allowed in Dart
/// source code.
-bool _isWhitespace(int c) => (c <= 0x000D && c >= 0x0009) || c == 0x0020;
+///
+/// This follows the same rules as `String.trim()` because that's what dartfmt
+/// uses to trim trailing whitespace.
+bool _isWhitespace(int c) {
+ // Not using a set or something more elegant because this code is on the hot
+ // path and this large expression is significantly faster than a set lookup.
+ return c >= 0x0009 && c <= 0x000d || // Control characters.
+ c == 0x0020 || // SPACE.
+ c == 0x0085 || // Control characters.
+ c == 0x00a0 || // NO-BREAK SPACE.
+ c == 0x1680 || // OGHAM SPACE MARK.
+ c >= 0x2000 && c <= 0x200a || // EN QUAD..HAIR SPACE.
+ c == 0x2028 || // LINE SEPARATOR.
+ c == 0x2029 || // PARAGRAPH SEPARATOR.
+ c == 0x202f || // NARROW NO-BREAK SPACE.
+ c == 0x205f || // MEDIUM MATHEMATICAL SPACE.
+ c == 0x3000 || // IDEOGRAPHIC SPACE.
+ c == 0xfeff; // ZERO WIDTH NO_BREAK SPACE.
+}
/// Returns the index of the next non-whitespace character.
///
diff --git a/test/command_line_test.dart b/test/command_line_test.dart
index 7284e50..d328374 100644
--- a/test/command_line_test.dart
+++ b/test/command_line_test.dart
@@ -122,7 +122,8 @@
emits('Usage: dartfmt [options...] [files or directories...]'));
await expectLater(process.stdout, emitsThrough(contains('--overwrite')));
await expectLater(process.stdout, emitsThrough(contains('--fix')));
- await expectLater(process.stdout, neverEmits(contains('--set-exit-if-changed')));
+ await expectLater(
+ process.stdout, neverEmits(contains('--set-exit-if-changed')));
await process.shouldExit(0);
});
@@ -135,7 +136,8 @@
emits('Usage: dartfmt [options...] [files or directories...]'));
await expectLater(process.stdout, emitsThrough(contains('--overwrite')));
await expectLater(process.stdout, emitsThrough(contains('--fix')));
- await expectLater(process.stdout, emitsThrough(contains('--set-exit-if-changed')));
+ await expectLater(
+ process.stdout, emitsThrough(contains('--set-exit-if-changed')));
await process.shouldExit(0);
});
});
diff --git a/test/string_compare_test.dart b/test/string_compare_test.dart
index 226b421..3adaa71 100644
--- a/test/string_compare_test.dart
+++ b/test/string_compare_test.dart
@@ -39,26 +39,26 @@
});
test('test unicode whitespace characters', () {
- // Dart sources only allow ascii whitespace code points so we
- // should not consider the following strings equal.
+ // The formatter strips all Unicode whitespace characters from the end of
+ // comment lines, so treat those as whitespace too.
var whitespaceRunes = [
- 0x00A0,
- 0x1680,
- 0x180E,
+ 0x0020,
+ 0x0085,
+ 0x00a0,
0x2000,
- 0x200A,
+ 0x200a,
0x2028,
0x2029,
- 0x202F,
- 0x205F,
+ 0x202f,
+ 0x205f,
0x3000,
- 0xFEFF
+ 0xfeff
];
for (var rune in whitespaceRunes) {
expect(
equalIgnoringWhitespace(
'foo${String.fromCharCode(rune)}bar', 'foo bar'),
- isFalse);
+ isTrue);
}
});
diff --git a/test/utils.dart b/test/utils.dart
index e8437ac..2d1ab06 100644
--- a/test/utils.dart
+++ b/test/utils.dart
@@ -24,6 +24,7 @@
final _indentPattern = RegExp(r'\(indent (\d+)\)');
final _fixPattern = RegExp(r'\(fix ([a-x-]+)\)');
+final _unicodePattern = RegExp(r'×([0-9a-fA-F]{2,4})');
/// If tool/command_shell.dart has been compiled to a snapshot, this is the path
/// to it.
@@ -226,6 +227,10 @@
expectedOutput += lines[i] + '\n';
}
+ // Unescape special Unicode escape markers.
+ input = _unescapeUnicode(input);
+ expectedOutput = _unescapeUnicode(expectedOutput);
+
// TODO(rnystrom): Stop skipping these tests when possible.
if (description.contains('(skip:')) {
print('skipping $description');
@@ -240,6 +245,7 @@
var expected = _extractSelection(expectedOutput,
isCompilationUnit: isCompilationUnit);
+ var expectedText = expected.text;
var formatter = DartFormatter(
pageWidth: pageWidth, indent: leadingIndent, fixes: fixes);
@@ -254,9 +260,9 @@
// Fail with an explicit message because it's easier to read than
// the matcher output.
- if (actualText != expected.text) {
+ if (actualText != expectedText) {
fail('Formatting did not match expectation. Expected:\n'
- '${expected.text}\nActual:\n$actualText');
+ '$expectedText\nActual:\n$actualText');
}
expect(actual.selectionStart, equals(expected.selectionStart));
@@ -281,3 +287,15 @@
selectionStart: start == -1 ? null : start,
selectionLength: end == -1 ? null : end - start);
}
+
+/// Turn the special Unicode escape marker syntax used in the tests into real
+/// Unicode characters.
+///
+/// This does not use Dart's own string escape sequences so that we don't
+/// accidentally modify the Dart code being formatted.
+String _unescapeUnicode(String input) {
+ return input.replaceAllMapped(_unicodePattern, (match) {
+ var codePoint = int.parse(match[1], radix: 16);
+ return String.fromCharCode(codePoint);
+ });
+}
diff --git a/test/whitespace/trailing.unit b/test/whitespace/trailing.unit
new file mode 100644
index 0000000..79dbeff
--- /dev/null
+++ b/test/whitespace/trailing.unit
@@ -0,0 +1,32 @@
+40 columns |
+>>> remove after line comment
+// trailing spaces after here:×20×20×20×20
+<<<
+// trailing spaces after here:
+>>> remove from empty line comment
+//×20×20×20
+<<<
+//
+>>> keep inside block comment lines
+/* one×20×20
+ two×20
+×20×20×20
+ three×20×20×20×20
+*/×20×20
+<<<
+/* one×20×20
+ two×20
+×20×20×20
+ three×20×20×20×20
+*/
+>>> after code
+main() {×20×20
+×20
+ veryLongExpression +×20×20×20
+ veryLongStatement;×20×20
+}×20×20×20×20
+<<<
+main() {
+ veryLongExpression +
+ veryLongStatement;
+}
\ No newline at end of file
diff --git a/test/whitespace/unicode.unit b/test/whitespace/unicode.unit
new file mode 100644
index 0000000..b40018e
--- /dev/null
+++ b/test/whitespace/unicode.unit
@@ -0,0 +1,65 @@
+40 columns |
+>>> preserve unicode whitespace inside comments from trim from the end
+// control middle: ×09 end: ×09 ×09
+// control middle: ×0b end: ×0b ×0b
+// space middle: ×20 end: ×20 ×20
+// control middle: ×85 end: ×85 ×85
+<<<
+// control middle: ×09 end:
+// control middle: ×0b end:
+// space middle: ×20 end:
+// control middle: ×85 end:
+>>> preserve unicode whitespace inside comments from trim from the end
+// no-break space middle: ×a0 end: ×a0 ×a0
+// ogham space mark middle: ×1680 end: ×1680 ×1680
+// en quad middle: ×2000 end: ×2000 ×2000
+// em quad middle: ×2001 end: ×2001 ×2001
+// en space middle: ×2002 end: ×2002 ×2002
+// em space middle: ×2003 end: ×2003 ×2003
+<<<
+// no-break space middle: ×a0 end:
+// ogham space mark middle: ×1680 end:
+// en quad middle: ×2000 end:
+// em quad middle: ×2001 end:
+// en space middle: ×2002 end:
+// em space middle: ×2003 end:
+>>>
+// three-per-em space middle: ×2004 end: ×2004 ×2004
+// four-per-em space middle: ×2005 end: ×2005 ×2005
+// six-per-em space middle: ×2006 end: ×2006 ×2006
+// figure space middle: ×2007 end: ×2007 ×2007
+// punctuation space middle: ×2008 end: ×2008 ×2008
+// thin space middle: ×2009 end: ×2009 ×2009
+// hair space middle: ×200a end: ×200a ×200a
+<<<
+// three-per-em space middle: ×2004 end:
+// four-per-em space middle: ×2005 end:
+// six-per-em space middle: ×2006 end:
+// figure space middle: ×2007 end:
+// punctuation space middle: ×2008 end:
+// thin space middle: ×2009 end:
+// hair space middle: ×200a end:
+>>>
+// line separator middle: ×2028 end: ×2028 ×2028
+// paragraph separator middle: ×2029 end: ×2029 ×2029
+// narrow no-break space middle: ×202f end: ×202f ×202f
+// medium mathematical space middle: ×205f end: ×205f ×205f
+// ideographic space middle: ×3000 end: ×3000 ×3000
+// zero width no-break space middle: ×feff end: ×feff ×feff
+<<<
+// line separator middle: ×2028 end:
+// paragraph separator middle: ×2029 end:
+// narrow no-break space middle: ×202f end:
+// medium mathematical space middle: ×205f end:
+// ideographic space middle: ×3000 end:
+// zero width no-break space middle: ×feff end:
+>>> unicode line endings
+// line feed middle: ×0a // end: ×0a ×0a
+// form feed middle: ×0c // end: ×0c ×0c
+// carriage return middle: ×0d // end: ×0d ×0d
+<<<
+// line feed middle:
+// end:
+
+// form feed middle: ×0c // end:
+// carriage return middle: // end: