blob: 75f59f54a81352479f999f2239cc5b80834047fb [file] [log] [blame]
// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
// Check the validity of string literals.
library stringvalidator;
import 'dart:collection';
import 'common.dart';
import 'package:front_end/src/fasta/scanner.dart' show Token;
import 'tree/dartstring.dart' show DartString;
import 'tree/nodes.dart' show StringQuoting;
import 'package:front_end/src/fasta/scanner/characters.dart';
class StringValidator {
final DiagnosticReporter reporter;
StringValidator(this.reporter);
DartString validateInterpolationPart(Token token, StringQuoting quoting,
{bool isFirst: false, bool isLast: false}) {
String source = token.lexeme;
int leftQuote = 0;
int rightQuote = 0;
if (isFirst) leftQuote = quoting.leftQuoteLength;
if (isLast) rightQuote = quoting.rightQuoteLength;
String content = copyWithoutQuotes(source, leftQuote, rightQuote);
return validateString(
token, token.charOffset + leftQuote, content, quoting);
}
static StringQuoting quotingFromString(String sourceString) {
Iterator<int> source = sourceString.codeUnits.iterator;
bool raw = false;
int leftQuoteLength = 1;
source.moveNext();
int quoteChar = source.current;
if (quoteChar == $r) {
raw = true;
source.moveNext();
quoteChar = source.current;
}
assert(quoteChar == $SQ || quoteChar == $DQ);
// String has at least one quote. Check it if has three.
// If it only has two, the string must be an empty string literal,
// and end after the second quote.
if (source.moveNext() && source.current == quoteChar && source.moveNext()) {
int code = source.current;
assert(code == quoteChar); // If not, there is a bug in the parser.
leftQuoteLength = 3;
// Check if a multiline string starts with optional whitespace followed by
// a newline (CR, LF or CR+LF).
// We also accept if these characters are escaped by a backslash.
int newLineLength = 1;
while (true) {
// Due to string-interpolations we are not guaranteed to see the
// trailing quoting characters. The invocations to `moveNext()` may
// therefore return false and the `current`-getter return `null`. The
// code does not need to handle this specially (as it will not find the
// newline characters).
source.moveNext();
code = source.current;
if (code == $BACKSLASH) {
newLineLength++;
source.moveNext();
code = source.current;
}
if (code == $TAB || code == $SPACE) {
newLineLength++;
continue;
}
if (code == $CR) {
if (source.moveNext() && source.current == $LF) {
newLineLength++;
}
leftQuoteLength += newLineLength;
} else if (code == $LF) {
leftQuoteLength += newLineLength;
}
break;
}
}
return StringQuoting.getQuoting(quoteChar, raw, leftQuoteLength);
}
/**
* Return the string [string] witout its [initial] first and [terminal] last
* characters. This is intended to be used to remove quotes from string
* literals (including an initial 'r' for raw strings).
*/
String copyWithoutQuotes(String string, int initial, int terminal) {
assert(0 <= initial);
assert(0 <= terminal);
assert(initial + terminal <= string.length);
return string.substring(initial, string.length - terminal);
}
void stringParseError(String message, Token token, int offset) {
reporter.reportErrorMessage(reporter.spanFromToken(token),
MessageKind.GENERIC, {'text': "$message @ $offset"});
}
/**
* Validates the escape sequences and special characters of a string literal.
* Returns a DartString if valid, and null if not.
*/
DartString validateString(
Token token, int startOffset, String string, StringQuoting quoting) {
// We need to check for invalid x and u escapes, for line
// terminators in non-multiline strings, and for invalid Unicode
// code points (either directly or as u-escape values).
int length = 0;
int index = startOffset;
bool containsEscape = false;
var stringIter = string.codeUnits.iterator;
for (HasNextIterator<int> iter = new HasNextIterator(stringIter);
iter.hasNext;
length++) {
index++;
int code = iter.next();
if (code == $BACKSLASH) {
if (quoting.raw) continue;
containsEscape = true;
if (!iter.hasNext) {
stringParseError("Incomplete escape sequence", token, index);
return null;
}
index++;
code = iter.next();
if (code == $x) {
for (int i = 0; i < 2; i++) {
if (!iter.hasNext) {
stringParseError("Incomplete escape sequence", token, index);
return null;
}
index++;
code = iter.next();
if (!isHexDigit(code)) {
stringParseError(
"Invalid character in escape sequence", token, index);
return null;
}
}
// A two-byte hex escape can't generate an invalid value.
continue;
} else if (code == $u) {
index++;
code = iter.hasNext ? iter.next() : 0;
int value = 0;
if (code == $OPEN_CURLY_BRACKET) {
// expect 1-6 hex digits.
int count = 0;
while (iter.hasNext) {
code = iter.next();
index++;
if (code == $CLOSE_CURLY_BRACKET) {
break;
}
if (!isHexDigit(code)) {
stringParseError(
"Invalid character in escape sequence", token, index);
return null;
}
count++;
value = value * 16 + hexDigitValue(code);
}
if (code != $CLOSE_CURLY_BRACKET || count == 0 || count > 6) {
int errorPosition = index - count;
if (count > 6) errorPosition += 6;
stringParseError(
"Invalid character in escape sequence", token, errorPosition);
return null;
}
} else {
// Expect four hex digits, including the one just read.
for (int i = 0; i < 4; i++) {
if (i > 0) {
if (iter.hasNext) {
index++;
code = iter.next();
} else {
code = 0;
}
}
if (!isHexDigit(code)) {
stringParseError(
"Invalid character in escape sequence", token, index);
return null;
}
value = value * 16 + hexDigitValue(code);
}
}
code = value;
}
}
if (code >= 0x10000) {
length++;
if (code > 0x10FFFF) {
stringParseError("Invalid code point", token, index);
return null;
}
}
}
// String literal successfully validated.
if (quoting.raw || !containsEscape) {
// A string without escapes could just as well have been raw.
return new DartString.rawString(string, length);
}
return new DartString.escapedString(string, length);
}
}