lib/src/tokenizer_base.dart - csslib - Git at Google

 // Copyright (c) 2012, the Dart project authors.  Please see the AUTHORS file
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.
 // Generated by scripts/tokenizer_gen.py.

 part of '../parser.dart';

 /// Tokenizer state to support look ahead for Less' nested selectors.
 class TokenizerState {
   final int index;
   final int startIndex;
   final bool inSelectorExpression;
   final bool inSelector;

   TokenizerState(TokenizerBase base)
       : index = base._index,
         startIndex = base._startIndex,
         inSelectorExpression = base.inSelectorExpression,
         inSelector = base.inSelector;
 }

 /// The base class for our tokenizer. The hand coded parts are in this file,
 /// with the generated parts in the subclass Tokenizer.
 abstract class TokenizerBase {
   final SourceFile _file;
   final String _text;

   // TODO: this seems like a bug – this field *is* used
   // ignore: prefer_final_fields
   bool _inString;

   /// Changes tokenization when in a pseudo function expression.  If true then
   /// minus signs are handled as operators instead of identifiers.
   bool inSelectorExpression = false;

   /// Changes tokenization when in selectors. If true, it prevents identifiers
   /// from being treated as units. This would break things like ":lang(fr)" or
   /// the HTML (unknown) tag name "px", which is legal to use in a selector.
   // TODO(jmesserly): is this a problem elsewhere? "fr" for example will be
   // processed as a "fraction" unit token, preventing it from working in
   // places where an identifier is expected. This was breaking selectors like:
   //     :lang(fr)
   // The assumption that "fr" always means fraction (and similar issue with
   // other units) doesn't seem valid. We probably should defer this
   // analysis until we reach places in the parser where units are expected.
   // I'm not sure this is tokenizing as described in the specs:
   //     http://dev.w3.org/csswg/css-syntax/
   //     http://dev.w3.org/csswg/selectors4/
   bool inSelector = false;

   int _index = 0;
   int _startIndex = 0;

   TokenizerBase(this._file, this._text, this._inString, [this._index = 0]);

   Token next();
   int getIdentifierKind();

   /// Snapshot of Tokenizer scanning state.
   TokenizerState get mark => TokenizerState(this);

   /// Restore Tokenizer scanning state.
   void restore(TokenizerState markedData) {
     _index = markedData.index;
     _startIndex = markedData.startIndex;
     inSelectorExpression = markedData.inSelectorExpression;
     inSelector = markedData.inSelector;
   }

   int _nextChar() {
     if (_index < _text.length) {
       return _text.codeUnitAt(_index++);
     } else {
       return 0;
     }
   }

   int _peekChar([int offset = 0]) {
     if (_index + offset < _text.length) {
       return _text.codeUnitAt(_index + offset);
     } else {
       return 0;
     }
   }

   bool _maybeEatChar(int ch) {
     if (_index < _text.length) {
       if (_text.codeUnitAt(_index) == ch) {
         _index++;
         return true;
       } else {
         return false;
       }
     } else {
       return false;
     }
   }

   bool _nextCharsAreNumber(int first) {
     if (TokenizerHelpers.isDigit(first)) return true;
     var second = _peekChar();
     if (first == TokenChar.DOT) return TokenizerHelpers.isDigit(second);
     if (first == TokenChar.PLUS || first == TokenChar.MINUS) {
       return TokenizerHelpers.isDigit(second) ||
           (second == TokenChar.DOT && TokenizerHelpers.isDigit(_peekChar(1)));
     }
     return false;
   }

   Token _finishToken(int kind) {
     return Token(kind, _file.span(_startIndex, _index));
   }

   Token _errorToken([String? message]) {
     return ErrorToken(
         TokenKind.ERROR, _file.span(_startIndex, _index), message);
   }

   Token finishWhitespace() {
     _index--;
     while (_index < _text.length) {
       final ch = _text.codeUnitAt(_index++);
       if (ch == TokenChar.SPACE ||
           ch == TokenChar.TAB ||
           ch == TokenChar.RETURN) {
         // do nothing
       } else if (ch == TokenChar.NEWLINE) {
         if (!_inString) {
           return _finishToken(TokenKind.WHITESPACE); // note the newline?
         }
       } else {
         _index--;
         if (_inString) {
           return next();
         } else {
           return _finishToken(TokenKind.WHITESPACE);
         }
       }
     }
     return _finishToken(TokenKind.END_OF_FILE);
   }

   Token finishMultiLineComment() {
     var nesting = 1;
     do {
       var ch = _nextChar();
       if (ch == 0) {
         return _errorToken();
       } else if (ch == TokenChar.ASTERISK) {
         if (_maybeEatChar(TokenChar.SLASH)) {
           nesting--;
         }
       } else if (ch == TokenChar.SLASH) {
         if (_maybeEatChar(TokenChar.ASTERISK)) {
           nesting++;
         }
       }
     } while (nesting > 0);

     if (_inString) {
       return next();
     } else {
       return _finishToken(TokenKind.COMMENT);
     }
   }

   void eatDigits() {
     while (_index < _text.length) {
       if (TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) {
         _index++;
       } else {
         return;
       }
     }
   }

   static int _hexDigit(int c) {
     if (c >= 48 /*0*/ && c <= 57 /*9*/) {
       return c - 48;
     } else if (c >= 97 /*a*/ && c <= 102 /*f*/) {
       return c - 87;
     } else if (c >= 65 /*A*/ && c <= 70 /*F*/) {
       return c - 55;
     } else {
       return -1;
     }
   }

   int readHex([int? hexLength]) {
     int maxIndex;
     if (hexLength == null) {
       maxIndex = _text.length - 1;
     } else {
       // TODO(jimhug): What if this is too long?
       maxIndex = _index + hexLength;
       if (maxIndex >= _text.length) return -1;
     }
     var result = 0;
     while (_index < maxIndex) {
       final digit = _hexDigit(_text.codeUnitAt(_index));
       if (digit == -1) {
         if (hexLength == null) {
           return result;
         } else {
           return -1;
         }
       }
       _hexDigit(_text.codeUnitAt(_index));
       // Multiply by 16 rather than shift by 4 since that will result in a
       // correct value for numbers that exceed the 32 bit precision of JS
       // 'integers'.
       // TODO: Figure out a better solution to integer truncation. Issue 638.
       result = (result * 16) + digit;
       _index++;
     }

     return result;
   }

   Token finishNumber() {
     eatDigits();

     if (_peekChar() == TokenChar.DOT) {
       // Handle the case of 1.toString().
       _nextChar();
       if (TokenizerHelpers.isDigit(_peekChar())) {
         eatDigits();
         return finishNumberExtra(TokenKind.DOUBLE);
       } else {
         _index--;
       }
     }

     return finishNumberExtra(TokenKind.INTEGER);
   }

   Token finishNumberExtra(int kind) {
     if (_maybeEatChar(101 /*e*/) || _maybeEatChar(69 /*E*/)) {
       kind = TokenKind.DOUBLE;
       _maybeEatChar(TokenKind.MINUS);
       _maybeEatChar(TokenKind.PLUS);
       eatDigits();
     }
     if (_peekChar() != 0 && TokenizerHelpers.isIdentifierStart(_peekChar())) {
       _nextChar();
       return _errorToken('illegal character in number');
     }

     return _finishToken(kind);
   }

   Token _makeStringToken(List<int> buf, bool isPart) {
     final s = String.fromCharCodes(buf);
     final kind = isPart ? TokenKind.STRING_PART : TokenKind.STRING;
     return LiteralToken(kind, _file.span(_startIndex, _index), s);
   }

   Token makeIEFilter(int start, int end) {
     var filter = _text.substring(start, end);
     return LiteralToken(TokenKind.STRING, _file.span(start, end), filter);
   }

   Token _makeRawStringToken(bool isMultiline) {
     String s;
     if (isMultiline) {
       // Skip initial newline in multiline strings
       var start = _startIndex + 4;
       if (_text[start] == '\n') start++;
       s = _text.substring(start, _index - 3);
     } else {
       s = _text.substring(_startIndex + 2, _index - 1);
     }
     return LiteralToken(TokenKind.STRING, _file.span(_startIndex, _index), s);
   }

   Token finishMultilineString(int quote) {
     var buf = <int>[];
     while (true) {
       var ch = _nextChar();
       if (ch == 0) {
         return _errorToken();
       } else if (ch == quote) {
         if (_maybeEatChar(quote)) {
           if (_maybeEatChar(quote)) {
             return _makeStringToken(buf, false);
           }
           buf.add(quote);
         }
         buf.add(quote);
       } else if (ch == TokenChar.BACKSLASH) {
         var escapeVal = readEscapeSequence();
         if (escapeVal == -1) {
           return _errorToken('invalid hex escape sequence');
         } else {
           buf.add(escapeVal);
         }
       } else {
         buf.add(ch);
       }
     }
   }

   Token finishString(int quote) {
     if (_maybeEatChar(quote)) {
       if (_maybeEatChar(quote)) {
         // skip an initial newline
         _maybeEatChar(TokenChar.NEWLINE);
         return finishMultilineString(quote);
       } else {
         return _makeStringToken(<int>[], false);
       }
     }
     return finishStringBody(quote);
   }

   Token finishRawString(int quote) {
     if (_maybeEatChar(quote)) {
       if (_maybeEatChar(quote)) {
         return finishMultilineRawString(quote);
       } else {
         return _makeStringToken(<int>[], false);
       }
     }
     while (true) {
       var ch = _nextChar();
       if (ch == quote) {
         return _makeRawStringToken(false);
       } else if (ch == 0) {
         return _errorToken();
       }
     }
   }

   Token finishMultilineRawString(int quote) {
     while (true) {
       var ch = _nextChar();
       if (ch == 0) {
         return _errorToken();
       } else if (ch == quote && _maybeEatChar(quote) && _maybeEatChar(quote)) {
         return _makeRawStringToken(true);
       }
     }
   }

   Token finishStringBody(int quote) {
     var buf = <int>[];
     while (true) {
       var ch = _nextChar();
       if (ch == quote) {
         return _makeStringToken(buf, false);
       } else if (ch == 0) {
         return _errorToken();
       } else if (ch == TokenChar.BACKSLASH) {
         var escapeVal = readEscapeSequence();
         if (escapeVal == -1) {
           return _errorToken('invalid hex escape sequence');
         } else {
           buf.add(escapeVal);
         }
       } else {
         buf.add(ch);
       }
     }
   }

   int readEscapeSequence() {
     final ch = _nextChar();
     int hexValue;
     switch (ch) {
       case 110 /*n*/ :
         return TokenChar.NEWLINE;
       case 114 /*r*/ :
         return TokenChar.RETURN;
       case 102 /*f*/ :
         return TokenChar.FF;
       case 98 /*b*/ :
         return TokenChar.BACKSPACE;
       case 116 /*t*/ :
         return TokenChar.TAB;
       case 118 /*v*/ :
         return TokenChar.FF;
       case 120 /*x*/ :
         hexValue = readHex(2);
         break;
       case 117 /*u*/ :
         if (_maybeEatChar(TokenChar.LBRACE)) {
           hexValue = readHex();
           if (!_maybeEatChar(TokenChar.RBRACE)) {
             return -1;
           }
         } else {
           hexValue = readHex(4);
         }
         break;
       default:
         return ch;
     }

     if (hexValue == -1) return -1;

     // According to the Unicode standard the high and low surrogate halves
     // used by UTF-16 (U+D800 through U+DFFF) and values above U+10FFFF
     // are not legal Unicode values.
     if (hexValue < 0xD800 || hexValue > 0xDFFF && hexValue <= 0xFFFF) {
       return hexValue;
     } else if (hexValue <= 0x10FFFF) {
       messages.error('unicode values greater than 2 bytes not implemented yet',
           _file.span(_startIndex, _startIndex + 1));
       return -1;
     } else {
       return -1;
     }
   }

   Token finishDot() {
     if (TokenizerHelpers.isDigit(_peekChar())) {
       eatDigits();
       return finishNumberExtra(TokenKind.DOUBLE);
     } else {
       return _finishToken(TokenKind.DOT);
     }
   }
 }
	// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
	// for details. All rights reserved. Use of this source code is governed by a
	// BSD-style license that can be found in the LICENSE file.
	// Generated by scripts/tokenizer_gen.py.

	part of '../parser.dart';

	/// Tokenizer state to support look ahead for Less' nested selectors.
	class TokenizerState {
	final int index;
	final int startIndex;
	final bool inSelectorExpression;
	final bool inSelector;

	TokenizerState(TokenizerBase base)
	: index = base._index,
	startIndex = base._startIndex,
	inSelectorExpression = base.inSelectorExpression,
	inSelector = base.inSelector;
	}

	/// The base class for our tokenizer. The hand coded parts are in this file,
	/// with the generated parts in the subclass Tokenizer.
	abstract class TokenizerBase {
	final SourceFile _file;
	final String _text;

	// TODO: this seems like a bug – this field is used
	// ignore: prefer_final_fields
	bool _inString;

	/// Changes tokenization when in a pseudo function expression. If true then
	/// minus signs are handled as operators instead of identifiers.
	bool inSelectorExpression = false;

	/// Changes tokenization when in selectors. If true, it prevents identifiers
	/// from being treated as units. This would break things like ":lang(fr)" or
	/// the HTML (unknown) tag name "px", which is legal to use in a selector.
	// TODO(jmesserly): is this a problem elsewhere? "fr" for example will be
	// processed as a "fraction" unit token, preventing it from working in
	// places where an identifier is expected. This was breaking selectors like:
	// :lang(fr)
	// The assumption that "fr" always means fraction (and similar issue with
	// other units) doesn't seem valid. We probably should defer this
	// analysis until we reach places in the parser where units are expected.
	// I'm not sure this is tokenizing as described in the specs:
	// http://dev.w3.org/csswg/css-syntax/
	// http://dev.w3.org/csswg/selectors4/
	bool inSelector = false;

	int _index = 0;
	int _startIndex = 0;

	TokenizerBase(this._file, this._text, this._inString, [this._index = 0]);

	Token next();
	int getIdentifierKind();

	/// Snapshot of Tokenizer scanning state.
	TokenizerState get mark => TokenizerState(this);

	/// Restore Tokenizer scanning state.
	void restore(TokenizerState markedData) {
	_index = markedData.index;
	_startIndex = markedData.startIndex;
	inSelectorExpression = markedData.inSelectorExpression;
	inSelector = markedData.inSelector;
	}

	int _nextChar() {
	if (_index < _text.length) {
	return _text.codeUnitAt(_index++);
	} else {
	return 0;
	}
	}

	int _peekChar([int offset = 0]) {
	if (_index + offset < _text.length) {
	return _text.codeUnitAt(_index + offset);
	} else {
	return 0;
	}
	}

	bool _maybeEatChar(int ch) {
	if (_index < _text.length) {
	if (_text.codeUnitAt(_index) == ch) {
	_index++;
	return true;
	} else {
	return false;
	}
	} else {
	return false;
	}
	}

	bool _nextCharsAreNumber(int first) {
	if (TokenizerHelpers.isDigit(first)) return true;
	var second = _peekChar();
	if (first == TokenChar.DOT) return TokenizerHelpers.isDigit(second);
	if (first == TokenChar.PLUS \|\| first == TokenChar.MINUS) {
	return TokenizerHelpers.isDigit(second) \|\|
	(second == TokenChar.DOT && TokenizerHelpers.isDigit(_peekChar(1)));
	}
	return false;
	}

	Token _finishToken(int kind) {
	return Token(kind, _file.span(_startIndex, _index));
	}

	Token _errorToken([String? message]) {
	return ErrorToken(
	TokenKind.ERROR, _file.span(_startIndex, _index), message);
	}

	Token finishWhitespace() {
	_index--;
	while (_index < _text.length) {
	final ch = _text.codeUnitAt(_index++);
	if (ch == TokenChar.SPACE \|\|
	ch == TokenChar.TAB \|\|
	ch == TokenChar.RETURN) {
	// do nothing
	} else if (ch == TokenChar.NEWLINE) {
	if (!_inString) {
	return _finishToken(TokenKind.WHITESPACE); // note the newline?
	}
	} else {
	_index--;
	if (_inString) {
	return next();
	} else {
	return _finishToken(TokenKind.WHITESPACE);
	}
	}
	}
	return _finishToken(TokenKind.END_OF_FILE);
	}

	Token finishMultiLineComment() {
	var nesting = 1;
	do {
	var ch = _nextChar();
	if (ch == 0) {
	return _errorToken();
	} else if (ch == TokenChar.ASTERISK) {
	if (_maybeEatChar(TokenChar.SLASH)) {
	nesting--;
	}
	} else if (ch == TokenChar.SLASH) {
	if (_maybeEatChar(TokenChar.ASTERISK)) {
	nesting++;
	}
	}
	} while (nesting > 0);

	if (_inString) {
	return next();
	} else {
	return _finishToken(TokenKind.COMMENT);
	}
	}

	void eatDigits() {
	while (_index < _text.length) {
	if (TokenizerHelpers.isDigit(_text.codeUnitAt(_index))) {
	_index++;
	} else {
	return;
	}
	}
	}

	static int _hexDigit(int c) {
	if (c >= 48 /0/ && c <= 57 /9/) {
	return c - 48;
	} else if (c >= 97 /a/ && c <= 102 /f/) {
	return c - 87;
	} else if (c >= 65 /A/ && c <= 70 /F/) {
	return c - 55;
	} else {
	return -1;
	}
	}

	int readHex([int? hexLength]) {
	int maxIndex;
	if (hexLength == null) {
	maxIndex = _text.length - 1;
	} else {
	// TODO(jimhug): What if this is too long?
	maxIndex = _index + hexLength;
	if (maxIndex >= _text.length) return -1;
	}
	var result = 0;
	while (_index < maxIndex) {
	final digit = _hexDigit(_text.codeUnitAt(_index));
	if (digit == -1) {
	if (hexLength == null) {
	return result;
	} else {
	return -1;
	}
	}
	_hexDigit(_text.codeUnitAt(_index));
	// Multiply by 16 rather than shift by 4 since that will result in a
	// correct value for numbers that exceed the 32 bit precision of JS
	// 'integers'.
	// TODO: Figure out a better solution to integer truncation. Issue 638.
	result = (result * 16) + digit;
	_index++;
	}

	return result;
	}

	Token finishNumber() {
	eatDigits();

	if (_peekChar() == TokenChar.DOT) {
	// Handle the case of 1.toString().
	_nextChar();
	if (TokenizerHelpers.isDigit(_peekChar())) {
	eatDigits();
	return finishNumberExtra(TokenKind.DOUBLE);
	} else {
	_index--;
	}
	}

	return finishNumberExtra(TokenKind.INTEGER);
	}

	Token finishNumberExtra(int kind) {
	if (_maybeEatChar(101 /e/) \|\| _maybeEatChar(69 /E/)) {
	kind = TokenKind.DOUBLE;
	_maybeEatChar(TokenKind.MINUS);
	_maybeEatChar(TokenKind.PLUS);
	eatDigits();
	}
	if (_peekChar() != 0 && TokenizerHelpers.isIdentifierStart(_peekChar())) {
	_nextChar();
	return _errorToken('illegal character in number');
	}

	return _finishToken(kind);
	}

	Token _makeStringToken(List<int> buf, bool isPart) {
	final s = String.fromCharCodes(buf);
	final kind = isPart ? TokenKind.STRING_PART : TokenKind.STRING;
	return LiteralToken(kind, _file.span(_startIndex, _index), s);
	}

	Token makeIEFilter(int start, int end) {
	var filter = _text.substring(start, end);
	return LiteralToken(TokenKind.STRING, _file.span(start, end), filter);
	}

	Token _makeRawStringToken(bool isMultiline) {
	String s;
	if (isMultiline) {
	// Skip initial newline in multiline strings
	var start = _startIndex + 4;
	if (_text[start] == '\n') start++;
	s = _text.substring(start, _index - 3);
	} else {
	s = _text.substring(_startIndex + 2, _index - 1);
	}
	return LiteralToken(TokenKind.STRING, _file.span(_startIndex, _index), s);
	}

	Token finishMultilineString(int quote) {
	var buf = <int>[];
	while (true) {
	var ch = _nextChar();
	if (ch == 0) {
	return _errorToken();
	} else if (ch == quote) {
	if (_maybeEatChar(quote)) {
	if (_maybeEatChar(quote)) {
	return _makeStringToken(buf, false);
	}
	buf.add(quote);
	}
	buf.add(quote);
	} else if (ch == TokenChar.BACKSLASH) {
	var escapeVal = readEscapeSequence();
	if (escapeVal == -1) {
	return _errorToken('invalid hex escape sequence');
	} else {
	buf.add(escapeVal);
	}
	} else {
	buf.add(ch);
	}
	}
	}

	Token finishString(int quote) {
	if (_maybeEatChar(quote)) {
	if (_maybeEatChar(quote)) {
	// skip an initial newline
	_maybeEatChar(TokenChar.NEWLINE);
	return finishMultilineString(quote);
	} else {
	return _makeStringToken(<int>[], false);
	}
	}
	return finishStringBody(quote);
	}

	Token finishRawString(int quote) {
	if (_maybeEatChar(quote)) {
	if (_maybeEatChar(quote)) {
	return finishMultilineRawString(quote);
	} else {
	return _makeStringToken(<int>[], false);
	}
	}
	while (true) {
	var ch = _nextChar();
	if (ch == quote) {
	return _makeRawStringToken(false);
	} else if (ch == 0) {
	return _errorToken();
	}
	}
	}

	Token finishMultilineRawString(int quote) {
	while (true) {
	var ch = _nextChar();
	if (ch == 0) {
	return _errorToken();
	} else if (ch == quote && _maybeEatChar(quote) && _maybeEatChar(quote)) {
	return _makeRawStringToken(true);
	}
	}
	}

	Token finishStringBody(int quote) {
	var buf = <int>[];
	while (true) {
	var ch = _nextChar();
	if (ch == quote) {
	return _makeStringToken(buf, false);
	} else if (ch == 0) {
	return _errorToken();
	} else if (ch == TokenChar.BACKSLASH) {
	var escapeVal = readEscapeSequence();
	if (escapeVal == -1) {
	return _errorToken('invalid hex escape sequence');
	} else {
	buf.add(escapeVal);
	}
	} else {
	buf.add(ch);
	}
	}
	}

	int readEscapeSequence() {
	final ch = _nextChar();
	int hexValue;
	switch (ch) {
	case 110 /n/ :
	return TokenChar.NEWLINE;
	case 114 /r/ :
	return TokenChar.RETURN;
	case 102 /f/ :
	return TokenChar.FF;
	case 98 /b/ :
	return TokenChar.BACKSPACE;
	case 116 /t/ :
	return TokenChar.TAB;
	case 118 /v/ :
	return TokenChar.FF;
	case 120 /x/ :
	hexValue = readHex(2);
	break;
	case 117 /u/ :
	if (_maybeEatChar(TokenChar.LBRACE)) {
	hexValue = readHex();
	if (!_maybeEatChar(TokenChar.RBRACE)) {
	return -1;
	}
	} else {
	hexValue = readHex(4);
	}
	break;
	default:
	return ch;
	}

	if (hexValue == -1) return -1;

	// According to the Unicode standard the high and low surrogate halves
	// used by UTF-16 (U+D800 through U+DFFF) and values above U+10FFFF
	// are not legal Unicode values.
	if (hexValue < 0xD800 \|\| hexValue > 0xDFFF && hexValue <= 0xFFFF) {
	return hexValue;
	} else if (hexValue <= 0x10FFFF) {
	messages.error('unicode values greater than 2 bytes not implemented yet',
	_file.span(_startIndex, _startIndex + 1));
	return -1;
	} else {
	return -1;
	}
	}

	Token finishDot() {
	if (TokenizerHelpers.isDigit(_peekChar())) {
	eatDigits();
	return finishNumberExtra(TokenKind.DOUBLE);
	} else {
	return _finishToken(TokenKind.DOT);
	}
	}
	}