blob: d5683e11f87ee093796388d968bb3e95a2f75da4 [file] [log] [blame]
// Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import "dart:typed_data";
// JSON conversion.
patch _parseJson(String json, reviver(var key, var value)) {
_BuildJsonListener listener;
if (reviver == null) {
listener = new _BuildJsonListener();
} else {
listener = new _ReviverJsonListener(reviver);
}
new _JsonParser(json, listener).parse();
return listener.result;
}
//// Implementation ///////////////////////////////////////////////////////////
// Simple API for JSON parsing.
abstract class _JsonListener {
void handleString(String value) {}
void handleNumber(num value) {}
void handleBool(bool value) {}
void handleNull() {}
void beginObject() {}
void propertyName() {}
void propertyValue() {}
void endObject() {}
void beginArray() {}
void arrayElement() {}
void endArray() {}
/** Called on failure to parse [source]. */
void fail(String source, int position, String message) {}
}
/**
* A [JsonListener] that builds data objects from the parser events.
*
* This is a simple stack-based object builder. It keeps the most recently
* seen value in a variable, and uses it depending on the following event.
*/
class _BuildJsonListener extends _JsonListener {
/**
* Stack used to handle nested containers.
*
* The current container is pushed on the stack when a new one is
* started. If the container is a [Map], there is also a current [key]
* which is also stored on the stack.
*/
List stack = [];
/** The current [Map] or [List] being built. */
var currentContainer;
/** The most recently read property key. */
String key;
/** The most recently read value. */
var value;
/** Pushes the currently active container (and key, if a [Map]). */
void pushContainer() {
if (currentContainer is Map) stack.add(key);
stack.add(currentContainer);
}
/** Pops the top container from the [stack], including a key if applicable. */
void popContainer() {
value = currentContainer;
currentContainer = stack.removeLast();
if (currentContainer is Map) key = stack.removeLast();
}
void handleString(String value) { this.value = value; }
void handleNumber(num value) { this.value = value; }
void handleBool(bool value) { this.value = value; }
void handleNull() { this.value = null; }
void beginObject() {
pushContainer();
currentContainer = {};
}
void propertyName() {
key = value;
value = null;
}
void propertyValue() {
Map map = currentContainer;
map[key] = value;
key = value = null;
}
void endObject() {
popContainer();
}
void beginArray() {
pushContainer();
currentContainer = [];
}
void arrayElement() {
List list = currentContainer;
currentContainer.add(value);
value = null;
}
void endArray() {
popContainer();
}
/** Read out the final result of parsing a JSON string. */
get result {
assert(currentContainer == null);
return value;
}
}
class _ReviverJsonListener extends _BuildJsonListener {
final _Reviver reviver;
_ReviverJsonListener(reviver(key, value)) : this.reviver = reviver;
void arrayElement() {
List list = currentContainer;
value = reviver(list.length, value);
super.arrayElement();
}
void propertyValue() {
value = reviver(key, value);
super.propertyValue();
}
get result {
return reviver(null, value);
}
}
class _JsonParser {
// A simple non-recursive state-based parser for JSON.
//
// Literal values accepted in states ARRAY_EMPTY, ARRAY_COMMA, OBJECT_COLON
// and strings also in OBJECT_EMPTY, OBJECT_COMMA.
// VALUE STRING : , } ] Transitions to
// EMPTY X X -> END
// ARRAY_EMPTY X X @ -> ARRAY_VALUE / pop
// ARRAY_VALUE @ @ -> ARRAY_COMMA / pop
// ARRAY_COMMA X X -> ARRAY_VALUE
// OBJECT_EMPTY X @ -> OBJECT_KEY / pop
// OBJECT_KEY @ -> OBJECT_COLON
// OBJECT_COLON X X -> OBJECT_VALUE
// OBJECT_VALUE @ @ -> OBJECT_COMMA / pop
// OBJECT_COMMA X -> OBJECT_KEY
// END
// Starting a new array or object will push the current state. The "pop"
// above means restoring this state and then marking it as an ended value.
// X means generic handling, @ means special handling for just that
// state - that is, values are handled generically, only punctuation
// cares about the current state.
// Values for states are chosen so bits 0 and 1 tell whether
// a string/value is allowed, and setting bits 0 through 2 after a value
// gets to the next state (not empty, doesn't allow a value).
// State building-block constants.
static const int INSIDE_ARRAY = 1;
static const int INSIDE_OBJECT = 2;
static const int AFTER_COLON = 3; // Always inside object.
static const int ALLOW_STRING_MASK = 8; // Allowed if zero.
static const int ALLOW_VALUE_MASK = 4; // Allowed if zero.
static const int ALLOW_VALUE = 0;
static const int STRING_ONLY = 4;
static const int NO_VALUES = 12;
// Objects and arrays are "empty" until their first property/element.
static const int EMPTY = 0;
static const int NON_EMPTY = 16;
static const int EMPTY_MASK = 16; // Empty if zero.
static const int VALUE_READ_BITS = NO_VALUES | NON_EMPTY;
// Actual states.
static const int STATE_INITIAL = EMPTY | ALLOW_VALUE;
static const int STATE_END = NON_EMPTY | NO_VALUES;
static const int STATE_ARRAY_EMPTY = INSIDE_ARRAY | EMPTY | ALLOW_VALUE;
static const int STATE_ARRAY_VALUE = INSIDE_ARRAY | NON_EMPTY | NO_VALUES;
static const int STATE_ARRAY_COMMA = INSIDE_ARRAY | NON_EMPTY | ALLOW_VALUE;
static const int STATE_OBJECT_EMPTY = INSIDE_OBJECT | EMPTY | STRING_ONLY;
static const int STATE_OBJECT_KEY = INSIDE_OBJECT | NON_EMPTY | NO_VALUES;
static const int STATE_OBJECT_COLON = AFTER_COLON | NON_EMPTY | ALLOW_VALUE;
static const int STATE_OBJECT_VALUE = AFTER_COLON | NON_EMPTY | NO_VALUES;
static const int STATE_OBJECT_COMMA = INSIDE_OBJECT | NON_EMPTY | STRING_ONLY;
// Character code constants.
static const int BACKSPACE = 0x08;
static const int TAB = 0x09;
static const int NEWLINE = 0x0a;
static const int CARRIAGE_RETURN = 0x0d;
static const int FORM_FEED = 0x0c;
static const int SPACE = 0x20;
static const int QUOTE = 0x22;
static const int PLUS = 0x2b;
static const int COMMA = 0x2c;
static const int MINUS = 0x2d;
static const int DECIMALPOINT = 0x2e;
static const int SLASH = 0x2f;
static const int CHAR_0 = 0x30;
static const int CHAR_9 = 0x39;
static const int COLON = 0x3a;
static const int CHAR_E = 0x45;
static const int LBRACKET = 0x5b;
static const int BACKSLASH = 0x5c;
static const int RBRACKET = 0x5d;
static const int CHAR_a = 0x61;
static const int CHAR_b = 0x62;
static const int CHAR_e = 0x65;
static const int CHAR_f = 0x66;
static const int CHAR_l = 0x6c;
static const int CHAR_n = 0x6e;
static const int CHAR_r = 0x72;
static const int CHAR_s = 0x73;
static const int CHAR_t = 0x74;
static const int CHAR_u = 0x75;
static const int LBRACE = 0x7b;
static const int RBRACE = 0x7d;
final String source;
final _JsonListener listener;
_JsonParser(this.source, this.listener);
/** Parses [source], or throws if it fails. */
void parse() {
final List<int> states = <int>[];
int state = STATE_INITIAL;
int position = 0;
int length = source.length;
while (position < length) {
int char = source.codeUnitAt(position);
switch (char) {
case SPACE:
case CARRIAGE_RETURN:
case NEWLINE:
case TAB:
position++;
break;
case QUOTE:
if ((state & ALLOW_STRING_MASK) != 0) fail(position);
position = parseString(position + 1);
state |= VALUE_READ_BITS;
break;
case LBRACKET:
if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
listener.beginArray();
states.add(state);
state = STATE_ARRAY_EMPTY;
position++;
break;
case LBRACE:
if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
listener.beginObject();
states.add(state);
state = STATE_OBJECT_EMPTY;
position++;
break;
case CHAR_n:
if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
position = parseNull(position);
state |= VALUE_READ_BITS;
break;
case CHAR_f:
if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
position = parseFalse(position);
state |= VALUE_READ_BITS;
break;
case CHAR_t:
if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
position = parseTrue(position);
state |= VALUE_READ_BITS;
break;
case COLON:
if (state != STATE_OBJECT_KEY) fail(position);
listener.propertyName();
state = STATE_OBJECT_COLON;
position++;
break;
case COMMA:
if (state == STATE_OBJECT_VALUE) {
listener.propertyValue();
state = STATE_OBJECT_COMMA;
position++;
} else if (state == STATE_ARRAY_VALUE) {
listener.arrayElement();
state = STATE_ARRAY_COMMA;
position++;
} else {
fail(position);
}
break;
case RBRACKET:
if (state == STATE_ARRAY_EMPTY) {
listener.endArray();
} else if (state == STATE_ARRAY_VALUE) {
listener.arrayElement();
listener.endArray();
} else {
fail(position);
}
state = states.removeLast() | VALUE_READ_BITS;
position++;
break;
case RBRACE:
if (state == STATE_OBJECT_EMPTY) {
listener.endObject();
} else if (state == STATE_OBJECT_VALUE) {
listener.propertyValue();
listener.endObject();
} else {
fail(position);
}
state = states.removeLast() | VALUE_READ_BITS;
position++;
break;
default:
if ((state & ALLOW_VALUE_MASK) != 0) fail(position);
position = parseNumber(char, position);
state |= VALUE_READ_BITS;
break;
}
}
if (state != STATE_END) fail(position);
}
/**
* Parses a "true" literal starting at [position].
*
* [:source[position]:] must be "t".
*/
int parseTrue(int position) {
assert(source.codeUnitAt(position) == CHAR_t);
if (source.length < position + 4) fail(position, "Unexpected identifier");
if (source.codeUnitAt(position + 1) != CHAR_r ||
source.codeUnitAt(position + 2) != CHAR_u ||
source.codeUnitAt(position + 3) != CHAR_e) {
fail(position);
}
listener.handleBool(true);
return position + 4;
}
/**
* Parses a "false" literal starting at [position].
*
* [:source[position]:] must be "f".
*/
int parseFalse(int position) {
assert(source.codeUnitAt(position) == CHAR_f);
if (source.length < position + 5) fail(position, "Unexpected identifier");
if (source.codeUnitAt(position + 1) != CHAR_a ||
source.codeUnitAt(position + 2) != CHAR_l ||
source.codeUnitAt(position + 3) != CHAR_s ||
source.codeUnitAt(position + 4) != CHAR_e) {
fail(position);
}
listener.handleBool(false);
return position + 5;
}
/**
* Parses a "null" literal starting at [position].
*
* [:source[position]:] must be "n".
*/
int parseNull(int position) {
assert(source.codeUnitAt(position) == CHAR_n);
if (source.length < position + 4) fail(position, "Unexpected identifier");
if (source.codeUnitAt(position + 1) != CHAR_u ||
source.codeUnitAt(position + 2) != CHAR_l ||
source.codeUnitAt(position + 3) != CHAR_l) {
fail(position);
}
listener.handleNull();
return position + 4;
}
/**
* Parses a string value.
*
* Initial [position] is right after the initial quote.
* Returned position right after the final quote.
*/
int parseString(int position) {
// Format: '"'([^\x00-\x1f\\\"]|'\\'[bfnrt/\\"])*'"'
// Initial position is right after first '"'.
int start = position;
while (position < source.length) {
int char = source.codeUnitAt(position++);
// BACKSLASH is larger than QUOTE and SPACE.
if (char > BACKSLASH) {
continue;
}
if (char == BACKSLASH) {
return parseStringWithEscapes(start, position - 1);
}
if (char == QUOTE) {
listener.handleString(source.substring(start, position - 1));
return position;
}
if (char < SPACE) {
fail(position - 1, "Control character in string");
}
}
fail(start - 1, "Unterminated string");
}
int parseStringWithEscapes(start, position) {
// Backslash escape detected. Collect character codes for rest of string.
int firstEscape = position;
List<int> chars = <int>[];
for (int i = start; i < firstEscape; i++) {
chars.add(source.codeUnitAt(i));
}
position++;
while (true) {
if (position == source.length) {
fail(start - 1, "Unterminated string");
}
int char = source.codeUnitAt(position);
switch (char) {
case CHAR_b: char = BACKSPACE; break;
case CHAR_f: char = FORM_FEED; break;
case CHAR_n: char = NEWLINE; break;
case CHAR_r: char = CARRIAGE_RETURN; break;
case CHAR_t: char = TAB; break;
case SLASH:
case BACKSLASH:
case QUOTE:
break;
case CHAR_u:
int hexStart = position - 1;
int value = 0;
for (int i = 0; i < 4; i++) {
position++;
if (position == source.length) {
fail(start - 1, "Unterminated string");
}
char = source.codeUnitAt(position);
char -= 0x30;
if (char < 0) fail(hexStart, "Invalid unicode escape");
if (char < 10) {
value = value * 16 + char;
} else {
char = (char | 0x20) - 0x31;
if (char < 0 || char > 5) {
fail(hexStart, "Invalid unicode escape");
}
value = value * 16 + char + 10;
}
}
char = value;
break;
default:
if (char < SPACE) fail(position, "Control character in string");
fail(position, "Unrecognized string escape");
}
do {
chars.add(char);
position++;
if (position == source.length) fail(start - 1, "Unterminated string");
char = source.codeUnitAt(position);
if (char == QUOTE) {
String result = new String.fromCharCodes(chars);
listener.handleString(result);
return position + 1;
}
if (char < SPACE) {
fail(position, "Control character in string");
}
} while (char != BACKSLASH);
position++;
}
}
int parseNumber(int char, int position) {
// Format:
// '-'?('0'|[1-9][0-9]*)('.'[0-9]+)?([eE][+-]?[0-9]+)?
int start = position;
int length = source.length;
int intValue = 0; // Collect int value while parsing.
int intSign = 1;
bool isDouble = false;
// Break this block when the end of the number literal is reached.
// At that time, position points to the next character, and isDouble
// is set if the literal contains a decimal point or an exponential.
parsing: {
if (char == MINUS) {
intSign = -1;
position++;
if (position == length) fail(position, "Missing expected digit");
char = source.codeUnitAt(position);
}
if (char < CHAR_0 || char > CHAR_9) {
fail(position, "Missing expected digit");
}
if (char == CHAR_0) {
position++;
if (position == length) break parsing;
char = source.codeUnitAt(position);
if (CHAR_0 <= char && char <= CHAR_9) {
fail(position);
}
} else {
do {
intValue = intValue * 10 + (char - CHAR_0);
position++;
if (position == length) break parsing;
char = source.codeUnitAt(position);
} while (CHAR_0 <= char && char <= CHAR_9);
}
if (char == DECIMALPOINT) {
isDouble = true;
position++;
if (position == length) fail(position, "Missing expected digit");
char = source.codeUnitAt(position);
if (char < CHAR_0 || char > CHAR_9) fail(position);
do {
position++;
if (position == length) break parsing;
char = source.codeUnitAt(position);
} while (CHAR_0 <= char && char <= CHAR_9);
}
if (char == CHAR_e || char == CHAR_E) {
isDouble = true;
position++;
if (position == length) fail(position, "Missing expected digit");
char = source.codeUnitAt(position);
if (char == PLUS || char == MINUS) {
position++;
if (position == length) fail(position, "Missing expected digit");
char = source.codeUnitAt(position);
}
if (char < CHAR_0 || char > CHAR_9) {
fail(position, "Missing expected digit");
}
do {
position++;
if (position == length) break parsing;
char = source.codeUnitAt(position);
} while (CHAR_0 <= char && char <= CHAR_9);
}
}
if (!isDouble) {
listener.handleNumber(intSign * intValue);
return position;
}
// Consider whether we can have an int/double.parse that works on part of
// a string, to avoid creating the substring.
String literal = source.substring(start, position);
// This correctly creates -0.0 for doubles.
listener.handleNumber(double.parse(literal));
return position;
}
void fail(int position, [String message]) {
if (message == null) message = "Unexpected character";
listener.fail(source, position, message);
// If the listener didn't throw, do it here.
String slice;
int sliceEnd = position + 20;
if (sliceEnd > source.length) {
slice = "'${source.substring(position)}'";
} else {
slice = "'${source.substring(position, sliceEnd)}...'";
}
throw new FormatException("Unexpected character at $position: $slice");
}
}
// UTF-8 conversion.
patch class _Utf8Encoder {
/* patch */ static List<int> _createBuffer(int size) => new Uint8List(size);
}