blob: 4e005cdf3a075d9fb2498c3a20b27e6648e22598 [file] [log] [blame]
// Copyright (c) 2017, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style licenset hat can be found in the LICENSE file.
library fasta.scanner.recover;
import '../fasta_codes.dart'
show
FastaCode,
codeAsciiControlCharacter,
codeEncoding,
codeExpectedHexDigit,
codeMissingExponent,
codeNonAsciiIdentifier,
codeNonAsciiWhitespace,
codeUnexpectedDollarInString,
codeUnmatchedToken,
codeUnterminatedComment,
codeUnterminatedString;
import 'token.dart' show StringToken, SymbolToken, Token;
import 'error_token.dart' show NonAsciiIdentifierToken, ErrorToken;
import 'precedence.dart' as Precedence;
import 'precedence.dart' show PrecedenceInfo;
/// Recover from errors in [tokens]. The original sources are provided as
/// [bytes]. [lineStarts] are the beginning character offsets of lines, and
/// must be updated if recovery is performed rewriting the original source
/// code.
Token defaultRecoveryStrategy(
List<int> bytes, Token tokens, List<int> lineStarts) {
// See [Parser.reportErrorToken](../parser/src/parser.dart) for how
// it currently handles lexical errors. In addition, notice how the parser
// calls [handleInvalidExpression], [handleInvalidFunctionBody], and
// [handleInvalidTypeReference] to allow the listener to recover its internal
// state. See [package:compiler/src/parser/element_listener.dart] for an
// example of how these events are used.
//
// In addition, the scanner will attempt a bit of recovery when braces don't
// match up during brace grouping. See
// [ArrayBasedScanner.discardBeginGroupUntil](array_based_scanner.dart). For
// more details on brace grouping see
// [AbstractScanner.unmatchedBeginGroup](abstract_scanner.dart).
/// Tokens with errors.
ErrorToken error;
/// Used for appending to [error].
ErrorToken errorTail;
/// Tokens without errors.
Token good;
/// Used for appending to [good].
Token goodTail;
/// The previous token appended to [good]. Since tokens are single linked
/// lists, this allows us to rewrite the current token without scanning all
/// of [good]. This is supposed to be the token immediately before
/// [goodTail], that is, `beforeGoodTail.next == goodTail`.
Token beforeGoodTail;
Token recoverIdentifier(NonAsciiIdentifierToken first) {
List<int> codeUnits = <int>[];
// True if the previous good token is an identifier and ends right where
// [first] starts. This is the case for input like `blåbærgrød`. In this
// case, the scanner produces this sequence of tokens:
//
// [
// StringToken("bl"),
// NonAsciiIdentifierToken("å"),
// StringToken("b"),
// NonAsciiIdentifierToken("æ"),
// StringToken("rgr"),
// NonAsciiIdentifierToken("ø"),
// StringToken("d"),
// EOF,
// ]
bool prepend = false;
// True if following token is also an identifier that starts right where
// [errorTail] ends. This is the case for "b" above.
bool append = false;
if (goodTail != null) {
if (goodTail.info == Precedence.IDENTIFIER_INFO &&
goodTail.charEnd == first.charOffset) {
prepend = true;
}
}
Token next = errorTail.next;
if (next.info == Precedence.IDENTIFIER_INFO &&
errorTail.charOffset + 1 == next.charOffset) {
append = true;
}
if (prepend) {
codeUnits.addAll(goodTail.lexeme.codeUnits);
}
NonAsciiIdentifierToken current = first;
while (current != errorTail) {
codeUnits.add(current.character);
current = current.next as NonAsciiIdentifierToken;
}
codeUnits.add(errorTail.character);
int charOffset = first.charOffset;
if (prepend) {
charOffset = goodTail.charOffset;
if (beforeGoodTail == null) {
// We're prepending the first good token, so the new token will become
// the first good token.
good = null;
goodTail = null;
beforeGoodTail = null;
} else {
goodTail = beforeGoodTail;
}
}
if (append) {
codeUnits.addAll(next.lexeme.codeUnits);
next = next.next;
}
String value = new String.fromCharCodes(codeUnits);
return synthesizeToken(charOffset, value, Precedence.IDENTIFIER_INFO)
..next = next;
}
Token recoverExponent() {
return synthesizeToken(errorTail.charOffset, "NaN", Precedence.DOUBLE_INFO)
..next = errorTail.next;
}
Token recoverString() {
// TODO(ahe): Improve this.
return skipToEof(errorTail);
}
Token recoverHexDigit() {
return synthesizeToken(errorTail.charOffset, "-1", Precedence.INT_INFO)
..next = errorTail.next;
}
Token recoverStringInterpolation() {
// TODO(ahe): Improve this.
return skipToEof(errorTail);
}
Token recoverComment() {
// TODO(ahe): Improve this.
return skipToEof(errorTail);
}
Token recoverUnmatched() {
// TODO(ahe): Try to use top-level keywords (such as `class`, `typedef`,
// and `enum`) and identation to recover.
return errorTail.next;
}
for (Token current = tokens; !current.isEof; current = current.next) {
if (current is ErrorToken) {
ErrorToken first = current as ErrorToken;
Token next = current;
bool treatAsWhitespace = false;
do {
current = next;
if (errorTail == null) {
error = next as ErrorToken;
} else {
errorTail.next = next;
next.previousToken = errorTail;
}
errorTail = next as ErrorToken;
next = next.next;
} while (next is ErrorToken && first.errorCode == (next as ErrorToken).errorCode);
FastaCode code = first.errorCode;
if (code == codeEncoding ||
code == codeNonAsciiWhitespace ||
code == codeAsciiControlCharacter) {
treatAsWhitespace = true;
} else if (code == codeNonAsciiIdentifier) {
current = recoverIdentifier(first);
assert(current.next != null);
} else if (code == codeMissingExponent) {
current = recoverExponent();
assert(current.next != null);
} else if (code == codeUnterminatedString) {
current = recoverString();
assert(current.next != null);
} else if (code == codeExpectedHexDigit) {
current = recoverHexDigit();
assert(current.next != null);
} else if (code == codeUnexpectedDollarInString) {
current = recoverStringInterpolation();
assert(current.next != null);
} else if (code == codeUnterminatedComment) {
current = recoverComment();
assert(current.next != null);
} else if (code == codeUnmatchedToken) {
current = recoverUnmatched();
assert(current.next != null);
} else {
treatAsWhitespace = true;
}
if (treatAsWhitespace) continue;
}
if (goodTail == null) {
good = current;
} else {
goodTail.next = current;
current.previousToken = goodTail;
}
beforeGoodTail = goodTail;
goodTail = current;
}
error.previousToken = new SymbolToken.eof(-1)..next = error;
Token tail;
if (good != null) {
errorTail.next = good;
good.previousToken = errorTail;
tail = goodTail;
} else {
tail = errorTail;
}
if (!tail.isEof)
tail.next = new SymbolToken.eof(tail.end)..previousToken = tail;
return error;
}
Token synthesizeToken(int charOffset, String value, PrecedenceInfo info) {
return new StringToken.fromString(info, value, charOffset);
}
Token skipToEof(Token token) {
while (!token.isEof) {
token = token.next;
}
return token;
}
String closeBraceFor(String openBrace) {
return const {
'(': ')',
'[': ']',
'{': '}',
'<': '>',
r'${': '}',
}[openBrace];
}