| // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| // Check the validity of string literals. |
| |
| library stringvalidator; |
| |
| import "dart:collection"; |
| |
| import "dart2jslib.dart"; |
| import "tree/tree.dart"; |
| import "util/characters.dart"; |
| import "scanner/scannerlib.dart" show Token; |
| |
| class StringValidator { |
| final DiagnosticListener listener; |
| |
| StringValidator(this.listener); |
| |
| DartString validateInterpolationPart(Token token, StringQuoting quoting, |
| {bool isFirst: false, |
| bool isLast: false}) { |
| String source = token.value; |
| int leftQuote = 0; |
| int rightQuote = 0; |
| if (isFirst) leftQuote = quoting.leftQuoteLength; |
| if (isLast) rightQuote = quoting.rightQuoteLength; |
| String content = copyWithoutQuotes(source, leftQuote, rightQuote); |
| return validateString(token, |
| token.charOffset + leftQuote, |
| content, |
| quoting); |
| } |
| |
| static StringQuoting quotingFromString(String sourceString) { |
| Iterator<int> source = sourceString.codeUnits.iterator; |
| bool raw = false; |
| int leftQuoteLength = 1; |
| source.moveNext(); |
| int quoteChar = source.current; |
| if (quoteChar == $r) { |
| raw = true; |
| source.moveNext(); |
| quoteChar = source.current; |
| } |
| assert(quoteChar == $SQ || quoteChar == $DQ); |
| // String has at least one quote. Check it if has three. |
| // If it only has two, the string must be an empty string literal, |
| // and end after the second quote. |
| bool multiline = false; |
| if (source.moveNext() && source.current == quoteChar && source.moveNext()) { |
| int code = source.current; |
| assert(code == quoteChar); // If not, there is a bug in the parser. |
| leftQuoteLength = 3; |
| |
| // Check if a multiline string starts with optional whitespace followed by |
| // a newline (CR, LF or CR+LF). |
| // We also accept if the these characters are escaped by a backslash. |
| int newLineLength = 1; |
| while (true) { |
| // Due to string-interpolations we are not guaranteed to see the |
| // trailing quoting characters. The invocations to `moveNext()` may |
| // therefore return false and the `current`-getter return `null`. The |
| // code does not need to handle this specially (as it will not find the |
| // newline characters). |
| source.moveNext(); |
| code = source.current; |
| if (code == $BACKSLASH) { |
| newLineLength++; |
| source.moveNext(); |
| code = source.current; |
| } |
| if (code == $TAB || code == $SPACE) { |
| newLineLength++; |
| continue; |
| } |
| if (code == $CR) { |
| if (source.moveNext() && source.current == $LF) { |
| newLineLength++; |
| } |
| leftQuoteLength += newLineLength; |
| } else if (code == $LF) { |
| leftQuoteLength += newLineLength; |
| } |
| break; |
| } |
| } |
| return StringQuoting.getQuoting(quoteChar, raw, leftQuoteLength); |
| } |
| |
| /** |
| * Return the string [string] witout its [initial] first and [terminal] last |
| * characters. This is intended to be used to remove quotes from string |
| * literals (including an initial 'r' for raw strings). |
| */ |
| String copyWithoutQuotes(String string, int initial, int terminal) { |
| assert(0 <= initial); |
| assert(0 <= terminal); |
| assert(initial + terminal <= string.length); |
| return string.substring(initial, string.length - terminal); |
| } |
| |
| void stringParseError(String message, Token token, int offset) { |
| listener.reportFatalError( |
| token, MessageKind.GENERIC, {'text': "$message @ $offset"}); |
| } |
| |
| /** |
| * Validates the escape sequences and special characters of a string literal. |
| * Returns a DartString if valid, and null if not. |
| */ |
| DartString validateString(Token token, |
| int startOffset, |
| String string, |
| StringQuoting quoting) { |
| // We need to check for invalid x and u escapes, for line |
| // terminators in non-multiline strings, and for invalid Unicode |
| // scalar values (either directly or as u-escape values). We also check |
| // for unpaired UTF-16 surrogates. |
| int length = 0; |
| int index = startOffset; |
| bool containsEscape = false; |
| bool previousWasLeadSurrogate = false; |
| bool invalidUtf16 = false; |
| var stringIter = string.codeUnits.iterator; |
| for(HasNextIterator<int> iter = new HasNextIterator(stringIter); |
| iter.hasNext; |
| length++) { |
| index++; |
| int code = iter.next(); |
| if (code == $BACKSLASH) { |
| if (quoting.raw) continue; |
| containsEscape = true; |
| if (!iter.hasNext) { |
| stringParseError("Incomplete escape sequence",token, index); |
| return null; |
| } |
| index++; |
| code = iter.next(); |
| if (code == $x) { |
| for (int i = 0; i < 2; i++) { |
| if (!iter.hasNext) { |
| stringParseError("Incomplete escape sequence", token, index); |
| return null; |
| } |
| index++; |
| code = iter.next(); |
| if (!isHexDigit(code)) { |
| stringParseError("Invalid character in escape sequence", |
| token, index); |
| return null; |
| } |
| } |
| // A two-byte hex escape can't generate an invalid value. |
| continue; |
| } else if (code == $u) { |
| int escapeStart = index - 1; |
| index++; |
| code = iter.hasNext ? iter.next() : 0; |
| int value = 0; |
| if (code == $OPEN_CURLY_BRACKET) { |
| // expect 1-6 hex digits. |
| int count = 0; |
| while (iter.hasNext) { |
| code = iter.next(); |
| index++; |
| if (code == $CLOSE_CURLY_BRACKET) { |
| break; |
| } |
| if (!isHexDigit(code)) { |
| stringParseError("Invalid character in escape sequence", |
| token, index); |
| return null; |
| } |
| count++; |
| value = value * 16 + hexDigitValue(code); |
| } |
| if (code != $CLOSE_CURLY_BRACKET || count == 0 || count > 6) { |
| int errorPosition = index - count; |
| if (count > 6) errorPosition += 6; |
| stringParseError("Invalid character in escape sequence", |
| token, errorPosition); |
| return null; |
| } |
| } else { |
| // Expect four hex digits, including the one just read. |
| for (int i = 0; i < 4; i++) { |
| if (i > 0) { |
| if (iter.hasNext) { |
| index++; |
| code = iter.next(); |
| } else { |
| code = 0; |
| } |
| } |
| if (!isHexDigit(code)) { |
| stringParseError("Invalid character in escape sequence", |
| token, index); |
| return null; |
| } |
| value = value * 16 + hexDigitValue(code); |
| } |
| } |
| code = value; |
| } |
| } |
| if (code >= 0x10000) length++; |
| // This handles both unescaped characters and the value of unicode |
| // escapes. |
| if (previousWasLeadSurrogate) { |
| if (!isUtf16TrailSurrogate(code)) { |
| invalidUtf16 = true; |
| break; |
| } |
| previousWasLeadSurrogate = false; |
| } else if (isUtf16LeadSurrogate(code)) { |
| previousWasLeadSurrogate = true; |
| } else if (!isUnicodeScalarValue(code)) { |
| invalidUtf16 = true; |
| break; |
| } |
| } |
| if (previousWasLeadSurrogate || invalidUtf16) { |
| stringParseError("Invalid Utf16 surrogate", token, index); |
| return null; |
| } |
| // String literal successfully validated. |
| if (quoting.raw || !containsEscape) { |
| // A string without escapes could just as well have been raw. |
| return new DartString.rawString(string, length); |
| } |
| return new DartString.escapedString(string, length); |
| } |
| } |