blob: c145b5c2f4b3111a1c6c110e1a87e5dcc007c1e6 [file] [log] [blame]
// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
library fasta.quote;
import 'problems.dart' show unhandled;
import 'fasta_codes.dart' as fasta;
import 'scanner/characters.dart'
show
$BACKSLASH,
$BS,
$CLOSE_CURLY_BRACKET,
$CR,
$FF,
$LF,
$OPEN_CURLY_BRACKET,
$SPACE,
$TAB,
$VTAB,
$b,
$f,
$n,
$r,
$t,
$u,
$v,
$x,
hexDigitValue,
isHexDigit;
enum Quote {
Single,
Double,
MultiLineSingle,
MultiLineDouble,
RawSingle,
RawDouble,
RawMultiLineSingle,
RawMultiLineDouble,
}
Quote analyzeQuote(String first) {
if (first.startsWith('"""')) return Quote.MultiLineDouble;
if (first.startsWith('r"""')) return Quote.RawMultiLineDouble;
if (first.startsWith("'''")) return Quote.MultiLineSingle;
if (first.startsWith("r'''")) return Quote.RawMultiLineSingle;
if (first.startsWith('"')) return Quote.Double;
if (first.startsWith('r"')) return Quote.RawDouble;
if (first.startsWith("'")) return Quote.Single;
if (first.startsWith("r'")) return Quote.RawSingle;
return unhandled(first, "analyzeQuote", -1, null);
}
// Note: based on [StringValidator.quotingFromString]
// (pkg/compiler/lib/src/string_validator.dart).
int lengthOfOptionalWhitespacePrefix(String first, int start) {
List<int> codeUnits = first.codeUnits;
for (int i = start; i < codeUnits.length; i++) {
int code = codeUnits[i];
if (code == $BACKSLASH) {
i++;
if (i < codeUnits.length) {
code = codeUnits[i];
} else {
break;
}
}
if (code == $TAB || code == $SPACE) continue;
if (code == $CR) {
if (i + 1 < codeUnits.length && codeUnits[i + 1] == $LF) {
i++;
}
return i + 1;
}
if (code == $LF) {
return i + 1;
}
break; // Not a white-space character.
}
return start;
}
int firstQuoteLength(String first, Quote quote) {
switch (quote) {
case Quote.Single:
case Quote.Double:
return 1;
case Quote.MultiLineSingle:
case Quote.MultiLineDouble:
return lengthOfOptionalWhitespacePrefix(first, 3);
case Quote.RawSingle:
case Quote.RawDouble:
return 2;
case Quote.RawMultiLineSingle:
case Quote.RawMultiLineDouble:
return lengthOfOptionalWhitespacePrefix(first, 4);
}
return unhandled("$quote", "firstQuoteLength", -1, null);
}
int lastQuoteLength(Quote quote) {
switch (quote) {
case Quote.Single:
case Quote.Double:
case Quote.RawSingle:
case Quote.RawDouble:
return 1;
case Quote.MultiLineSingle:
case Quote.MultiLineDouble:
case Quote.RawMultiLineSingle:
case Quote.RawMultiLineDouble:
return 3;
}
return unhandled("$quote", "lastQuoteLength", -1, null);
}
String unescapeFirstStringPart(String first, Quote quote, Object location,
UnescapeErrorListener listener) {
return unescape(first.substring(firstQuoteLength(first, quote)), quote,
location, listener);
}
String unescapeLastStringPart(String last, Quote quote, Object location,
bool isLastQuoteSynthetic, UnescapeErrorListener listener) {
int end = last.length - (isLastQuoteSynthetic ? 0 : lastQuoteLength(quote));
return unescape(last.substring(0, end), quote, location, listener);
}
String unescapeString(
String string, Object location, UnescapeErrorListener listener) {
Quote quote = analyzeQuote(string);
return unescape(
string.substring(firstQuoteLength(string, quote),
string.length - lastQuoteLength(quote)),
quote,
location,
listener);
}
String unescape(String string, Quote quote, Object location,
UnescapeErrorListener listener) {
switch (quote) {
case Quote.Single:
case Quote.Double:
return !string.contains("\\")
? string
: unescapeCodeUnits(string.codeUnits, false, location, listener);
case Quote.MultiLineSingle:
case Quote.MultiLineDouble:
return !string.contains("\\") && !string.contains("\r")
? string
: unescapeCodeUnits(string.codeUnits, false, location, listener);
case Quote.RawSingle:
case Quote.RawDouble:
return string;
case Quote.RawMultiLineSingle:
case Quote.RawMultiLineDouble:
return !string.contains("\r")
? string
: unescapeCodeUnits(string.codeUnits, true, location, listener);
}
return unhandled("$quote", "unescape", -1, null);
}
// Note: based on
// [StringValidator.validateString](pkg/compiler/lib/src/string_validator.dart).
String unescapeCodeUnits(List<int> codeUnits, bool isRaw, Object location,
UnescapeErrorListener listener) {
// Can't use Uint8List or Uint16List here, the code units may be larger.
List<int> result = new List<int>(codeUnits.length);
int resultOffset = 0;
for (int i = 0; i < codeUnits.length; i++) {
int code = codeUnits[i];
if (code == $CR) {
if (i + 1 < codeUnits.length && codeUnits[i + 1] == $LF) {
i++;
}
code = $LF;
} else if (!isRaw && code == $BACKSLASH) {
if (codeUnits.length == ++i) {
listener.handleUnescapeError(
fasta.messageInvalidUnicodeEscape, location, i, 1);
return new String.fromCharCodes(codeUnits);
}
code = codeUnits[i];
/// `\n` for newline, equivalent to `\x0A`.
/// `\r` for carriage return, equivalent to `\x0D`.
/// `\f` for form feed, equivalent to `\x0C`.
/// `\b` for backspace, equivalent to `\x08`.
/// `\t` for tab, equivalent to `\x09`.
/// `\v` for vertical tab, equivalent to `\x0B`.
/// `\xXX` for hex escape.
/// `\uXXXX` or `\u{XX?X?X?X?X?}` for Unicode hex escape.
if (code == $n) {
code = $LF;
} else if (code == $r) {
code = $CR;
} else if (code == $f) {
code = $FF;
} else if (code == $b) {
code = $BS;
} else if (code == $t) {
code = $TAB;
} else if (code == $v) {
code = $VTAB;
} else if (code == $x) {
// Expect exactly 2 hex digits.
int begin = i;
if (codeUnits.length <= i + 2) {
listener.handleUnescapeError(fasta.messageInvalidHexEscape, location,
begin, codeUnits.length + 1 - begin);
return new String.fromCharCodes(codeUnits);
}
code = 0;
for (int j = 0; j < 2; j++) {
int digit = codeUnits[++i];
if (!isHexDigit(digit)) {
listener.handleUnescapeError(
fasta.messageInvalidHexEscape, location, begin, i + 1 - begin);
return new String.fromCharCodes(codeUnits);
}
code = (code << 4) + hexDigitValue(digit);
}
} else if (code == $u) {
int begin = i;
if (codeUnits.length == i + 1) {
listener.handleUnescapeError(fasta.messageInvalidUnicodeEscape,
location, begin, codeUnits.length + 1 - begin);
return new String.fromCharCodes(codeUnits);
}
code = codeUnits[i + 1];
if (code == $OPEN_CURLY_BRACKET) {
// Expect 1-6 hex digits followed by '}'.
if (codeUnits.length == ++i) {
listener.handleUnescapeError(fasta.messageInvalidUnicodeEscape,
location, begin, i + 1 - begin);
return new String.fromCharCodes(codeUnits);
}
code = 0;
for (int j = 0; j < 7; j++) {
if (codeUnits.length == ++i) {
listener.handleUnescapeError(fasta.messageInvalidUnicodeEscape,
location, begin, i + 1 - begin);
return new String.fromCharCodes(codeUnits);
}
int digit = codeUnits[i];
if (j != 0 && digit == $CLOSE_CURLY_BRACKET) break;
if (!isHexDigit(digit)) {
listener.handleUnescapeError(fasta.messageInvalidUnicodeEscape,
location, begin, i + 2 - begin);
return new String.fromCharCodes(codeUnits);
}
code = (code << 4) + hexDigitValue(digit);
}
} else {
// Expect exactly 4 hex digits.
if (codeUnits.length <= i + 4) {
listener.handleUnescapeError(fasta.messageInvalidUnicodeEscape,
location, begin, codeUnits.length + 1 - begin);
return new String.fromCharCodes(codeUnits);
}
code = 0;
for (int j = 0; j < 4; j++) {
int digit = codeUnits[++i];
if (!isHexDigit(digit)) {
listener.handleUnescapeError(fasta.messageInvalidUnicodeEscape,
location, begin, i + 1 - begin);
return new String.fromCharCodes(codeUnits);
}
code = (code << 4) + hexDigitValue(digit);
}
}
if (code > 0x10FFFF) {
listener.handleUnescapeError(
fasta.messageInvalidCodePoint, location, begin, i + 1 - begin);
return new String.fromCharCodes(codeUnits);
}
} else {
// Nothing, escaped character is passed through;
}
}
result[resultOffset++] = code;
}
return new String.fromCharCodes(result, 0, resultOffset);
}
abstract class UnescapeErrorListener {
void handleUnescapeError(
fasta.Message message, covariant location, int offset, int length);
}