| // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| library _fe_analyzer_shared.scanner.token; |
| |
| import 'token.dart' as analyzer; |
| import 'token.dart' show Token, TokenType; |
| |
| import 'token_constants.dart' show IDENTIFIER_TOKEN; |
| |
| import 'string_canonicalizer.dart'; |
| |
| /** |
| * A String-valued token. Represents identifiers, string literals, |
| * number literals, comments, and error tokens, using the corresponding |
| * precedence info. |
| */ |
| class StringToken extends analyzer.SimpleToken implements analyzer.StringToken { |
| /** |
| * The length threshold above which substring tokens are computed lazily. |
| * |
| * For string tokens that are substrings of the program source, the actual |
| * substring extraction is performed lazily. This is beneficial because |
| * not all scanned code are actually used. For unused parts, the substrings |
| * are never computed and allocated. |
| */ |
| static const int LAZY_THRESHOLD = 4; |
| |
| dynamic /* String | LazySubstring */ valueOrLazySubstring; |
| |
| /** |
| * Creates a non-lazy string token. If [canonicalize] is true, the string |
| * is canonicalized before the token is created. |
| */ |
| StringToken.fromString(TokenType type, String value, int charOffset, |
| {bool canonicalize: false, analyzer.CommentToken? precedingComments}) |
| : valueOrLazySubstring = canonicalizedString( |
| value, /* start = */ 0, value.length, canonicalize), |
| super(type, charOffset, precedingComments); |
| |
| /** |
| * Creates a lazy string token. If [canonicalize] is true, the string |
| * is canonicalized before the token is created. |
| */ |
| StringToken.fromSubstring( |
| TokenType type, String data, int start, int end, int charOffset, |
| {bool canonicalize: false, analyzer.CommentToken? precedingComments}) |
| : super(type, charOffset, precedingComments) { |
| int length = end - start; |
| if (length <= LAZY_THRESHOLD) { |
| valueOrLazySubstring = |
| canonicalizedString(data, start, end, canonicalize); |
| } else { |
| valueOrLazySubstring = |
| new _LazySubstring(data, start, length, canonicalize); |
| } |
| } |
| |
| /** |
| * Creates a lazy string token. If [asciiOnly] is false, the byte array |
| * is passed through a UTF-8 decoder. |
| */ |
| StringToken.fromUtf8Bytes(TokenType type, List<int> data, int start, int end, |
| bool asciiOnly, int charOffset, |
| {analyzer.CommentToken? precedingComments}) |
| : super(type, charOffset, precedingComments) { |
| int length = end - start; |
| if (length <= LAZY_THRESHOLD) { |
| valueOrLazySubstring = decodeUtf8(data, start, end, asciiOnly); |
| } else { |
| valueOrLazySubstring = new _LazySubstring(data, start, length, asciiOnly); |
| } |
| } |
| |
| StringToken._(TokenType type, this.valueOrLazySubstring, int charOffset, |
| [analyzer.CommentToken? precedingComments]) |
| : super(type, charOffset, precedingComments); |
| |
| @override |
| String get lexeme { |
| if (valueOrLazySubstring is String) { |
| return valueOrLazySubstring; |
| } else { |
| assert(valueOrLazySubstring is _LazySubstring); |
| dynamic data = valueOrLazySubstring.data; |
| int start = valueOrLazySubstring.start; |
| int end = start + (valueOrLazySubstring as _LazySubstring).length; |
| if (data is String) { |
| valueOrLazySubstring = canonicalizedString( |
| data, start, end, valueOrLazySubstring.boolValue); |
| } else { |
| valueOrLazySubstring = |
| decodeUtf8(data, start, end, valueOrLazySubstring.boolValue); |
| } |
| return valueOrLazySubstring; |
| } |
| } |
| |
| @override |
| bool get isIdentifier => identical(kind, IDENTIFIER_TOKEN); |
| |
| @override |
| String toString() => lexeme; |
| |
| static final StringCanonicalizer canonicalizer = new StringCanonicalizer(); |
| |
| static String canonicalizedString( |
| String s, int start, int end, bool canonicalize) { |
| if (!canonicalize) return s; |
| return canonicalizer.canonicalize(s, start, end, /* asciiOnly = */ false); |
| } |
| |
| static String decodeUtf8(List<int> data, int start, int end, bool asciiOnly) { |
| return canonicalizer.canonicalize(data, start, end, asciiOnly); |
| } |
| |
| @override |
| Token copy() => new StringToken._( |
| type, valueOrLazySubstring, charOffset, copyComments(precedingComments)); |
| |
| @override |
| String value() => lexeme; |
| } |
| |
| /** |
| * A String-valued token that does not exist in the original source. |
| */ |
| class SyntheticStringToken extends StringToken |
| implements analyzer.SyntheticStringToken { |
| SyntheticStringToken(TokenType type, String value, int offset, |
| [analyzer.CommentToken? precedingComments]) |
| : super._(type, value, offset, precedingComments); |
| |
| @override |
| int get length => 0; |
| |
| @override |
| Token copy() => new SyntheticStringToken( |
| type, valueOrLazySubstring, offset, copyComments(precedingComments)); |
| } |
| |
| class CommentToken extends StringToken implements analyzer.CommentToken { |
| @override |
| analyzer.SimpleToken? parent; |
| |
| /** |
| * Creates a lazy comment token. If [canonicalize] is true, the string |
| * is canonicalized before the token is created. |
| */ |
| CommentToken.fromSubstring( |
| TokenType type, String data, int start, int end, int charOffset, |
| {bool canonicalize: false}) |
| : super.fromSubstring(type, data, start, end, charOffset, |
| canonicalize: canonicalize); |
| |
| /** |
| * Creates a non-lazy comment token. |
| */ |
| CommentToken.fromString(TokenType type, String lexeme, int charOffset) |
| : super.fromString(type, lexeme, charOffset); |
| |
| /** |
| * Creates a lazy string token. If [asciiOnly] is false, the byte array |
| * is passed through a UTF-8 decoder. |
| */ |
| CommentToken.fromUtf8Bytes(TokenType type, List<int> data, int start, int end, |
| bool asciiOnly, int charOffset) |
| : super.fromUtf8Bytes(type, data, start, end, asciiOnly, charOffset); |
| |
| CommentToken._(TokenType type, valueOrLazySubstring, int charOffset) |
| : super._(type, valueOrLazySubstring, charOffset); |
| |
| @override |
| CommentToken copy() => |
| new CommentToken._(type, valueOrLazySubstring, charOffset); |
| |
| @override |
| void remove() { |
| if (previous != null) { |
| previous!.setNextWithoutSettingPrevious(next); |
| next?.previous = previous; |
| } else { |
| assert(parent!.precedingComments == this); |
| parent!.precedingComments = next as CommentToken; |
| } |
| } |
| } |
| |
| class LanguageVersionToken extends CommentToken |
| implements analyzer.LanguageVersionToken { |
| @override |
| int major; |
| |
| @override |
| int minor; |
| |
| LanguageVersionToken.from(String text, int offset, this.major, this.minor) |
| : super.fromString(TokenType.SINGLE_LINE_COMMENT, text, offset); |
| |
| LanguageVersionToken.fromSubstring( |
| String string, int start, int end, int tokenStart, this.major, this.minor, |
| {bool canonicalize: false}) |
| : super.fromSubstring( |
| TokenType.SINGLE_LINE_COMMENT, string, start, end, tokenStart, |
| canonicalize: canonicalize); |
| |
| LanguageVersionToken.fromUtf8Bytes(List<int> bytes, int start, int end, |
| int tokenStart, this.major, this.minor) |
| : super.fromUtf8Bytes( |
| TokenType.SINGLE_LINE_COMMENT, bytes, start, end, true, tokenStart); |
| |
| @override |
| LanguageVersionToken copy() => |
| new LanguageVersionToken.from(lexeme, offset, major, minor); |
| } |
| |
| class DartDocToken extends CommentToken |
| implements analyzer.DocumentationCommentToken { |
| /** |
| * Creates a lazy comment token. If [canonicalize] is true, the string |
| * is canonicalized before the token is created. |
| */ |
| DartDocToken.fromSubstring( |
| TokenType type, String data, int start, int end, int charOffset, |
| {bool canonicalize: false}) |
| : super.fromSubstring(type, data, start, end, charOffset, |
| canonicalize: canonicalize); |
| |
| /** |
| * Creates a lazy string token. If [asciiOnly] is false, the byte array |
| * is passed through a UTF-8 decoder. |
| */ |
| DartDocToken.fromUtf8Bytes(TokenType type, List<int> data, int start, int end, |
| bool asciiOnly, int charOffset) |
| : super.fromUtf8Bytes(type, data, start, end, asciiOnly, charOffset); |
| |
| DartDocToken._(TokenType type, valueOrLazySubstring, int charOffset) |
| : super._(type, valueOrLazySubstring, charOffset); |
| |
| @override |
| DartDocToken copy() => |
| new DartDocToken._(type, valueOrLazySubstring, charOffset); |
| } |
| |
| /** |
| * This class represents the necessary information to compute a substring |
| * lazily. The substring can either originate from a string or from |
| * a [:List<int>:] of UTF-8 bytes. |
| */ |
| abstract class _LazySubstring { |
| /** The original data, either a string or a List<int> */ |
| get data; |
| |
| int get start; |
| int get length; |
| |
| /** |
| * If this substring is based on a String, the [boolValue] indicates whether |
| * the resulting substring should be canonicalized. |
| * |
| * For substrings based on a byte array, the [boolValue] is true if the |
| * array only holds ASCII characters. The resulting substring will be |
| * canonicalized after decoding. |
| */ |
| bool get boolValue; |
| |
| _LazySubstring.internal(); |
| |
| factory _LazySubstring(data, int start, int length, bool b) { |
| // See comment on [CompactLazySubstring]. |
| if (start < 0x100000 && length < 0x200) { |
| int fields = (start << 9); |
| fields = fields | length; |
| fields = fields << 1; |
| if (b) fields |= 1; |
| return new _CompactLazySubstring(data, fields); |
| } else { |
| return new _FullLazySubstring(data, start, length, b); |
| } |
| } |
| } |
| |
| /** |
| * This class encodes [start], [length] and [boolValue] in a single |
| * 30 bit integer. It uses 20 bits for [start], which covers source files |
| * of 1MB. [length] has 9 bits, which covers 512 characters. |
| * |
| * The file html_dart2js.dart is currently around 1MB. |
| */ |
| class _CompactLazySubstring extends _LazySubstring { |
| final dynamic data; |
| final int fields; |
| |
| _CompactLazySubstring(this.data, this.fields) : super.internal(); |
| |
| int get start => fields >> 10; |
| int get length => (fields >> 1) & 0x1ff; |
| bool get boolValue => (fields & 1) == 1; |
| } |
| |
| class _FullLazySubstring extends _LazySubstring { |
| final dynamic data; |
| final int start; |
| final int length; |
| final bool boolValue; |
| _FullLazySubstring(this.data, this.start, this.length, this.boolValue) |
| : super.internal(); |
| } |
| |
| bool isUserDefinableOperator(String value) { |
| return isBinaryOperator(value) || |
| isMinusOperator(value) || |
| isTernaryOperator(value) || |
| isUnaryOperator(value); |
| } |
| |
| bool isUnaryOperator(String value) => identical(value, "~"); |
| |
| bool isBinaryOperator(String value) { |
| return identical(value, "==") || |
| identical(value, "[]") || |
| identical(value, "*") || |
| identical(value, "/") || |
| identical(value, "%") || |
| identical(value, "~/") || |
| identical(value, "+") || |
| identical(value, "<<") || |
| identical(value, ">>") || |
| identical(value, ">>>") || |
| identical(value, ">=") || |
| identical(value, ">") || |
| identical(value, "<=") || |
| identical(value, "<") || |
| identical(value, "&") || |
| identical(value, "^") || |
| identical(value, "|"); |
| } |
| |
| bool isTernaryOperator(String value) => identical(value, "[]="); |
| |
| bool isMinusOperator(String value) => identical(value, "-"); |