pkg/compiler/lib/src/js_backend/string_abbreviation.dart - sdk.git - Git at Google

 // Copyright (c) 2020, the Dart project authors.  Please see the AUTHORS file
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.

 import 'package:front_end/src/api_unstable/dart2js.dart'
     show $0, $9, $A, $Z, $a, $z;

 /// Returns a list of strings that are short valid identifiers based on the
 /// corresponding full strings in [strings].
 ///
 /// [strings] must not contain the empty string.
 /// [strings] must have no duplicates.
 List<String> abbreviateToIdentifiers(Iterable<String> strings,
     {int minLength = 6}) {
   var nodes = [for (final string in strings) _Node(string)];
   _partition(nodes, minLength, [], 0);
   return [for (final node in nodes) node.assignment];
 }

 class _Node {
   final String string;
   String assignment;
   _Node(this.string);
 }

 /// Walk the prefix tree or TRIE of [nodes], assigning compressed names built
 /// from the distinguishing characters along the path.
 ///
 /// - [index] is the position of the first potentially different character,
 ///   i.e. the current TRIE depth.
 /// - [path] contains the prefix of the compressed name at this depth.
 /// - Path compression starts after the first [minLength] characters.
 void _partition(
     List<_Node> nodes, int minLength, List<String> path, int index) {
   while (true) {
     // Handle trivial partitions.
     if (nodes.length == 0) return;
     if (nodes.length == 1 && path.length >= minLength) {
       String name = path.join();
       assert(name.isNotEmpty);
       nodes.single.assignment = name;
       return;
     }

     // Partition on the code unit at position [index], setting [terminating] if
     // some string ends at this length;
     Map<int, List<_Node>> partition = {};
     _Node terminating;

     for (final node in nodes) {
       String string = node.string;
       assert(string.length > 0);
       if (index < string.length) {
         int codeUnit = string.codeUnitAt(index);
         (partition[codeUnit] ??= []).add(node);
       } else {
         assert(terminating == null); // i.e. no duplicates.
         terminating = node;
       }
     }

     if (terminating != null) {
       terminating.assignment = path.join();
     }

     if (partition.length == 0) return;

     if (partition.length > 1) {
       var keys = partition.keys.toList();
       var keyEncodings = _discriminators(keys, path.isEmpty);
       for (int key in keys) {
         var children = partition[key];
         var discriminator = keyEncodings[key];
         _partition(children, minLength, [...path, discriminator], index + 1);
       }
       return;
     }

     assert(partition.length == 1);
     // All the strings have the same code unit at [index]. Use iteration to
     // find next partition point to avoid recursing down the whole string
     // length. The path is compressed by omitting to add fragments to [path]
     // unless we are near the start of the string.

     int codeUnit = partition.keys.single;
     // Add some characters to name to distinguish from terminating strings.
     // Add the first few legal identifier characters of the string regardless.
     if (terminating != null || path.length < minLength) {
       path.add(_isIdentifier(codeUnit, path.isEmpty)
           ? String.fromCharCode(codeUnit)
           : '_');
     }
     nodes = partition.values.single;
     index += 1;
   }
 }

 Map<int, String> _discriminators(List<int> keys, bool atStart) {
   // Assign each partition a distinguishing short string. If the partition key
   // is a valid identifier character, it can be used, otherwise we could use
   // `'_'` or an escaped code like `'x3b'` or `'u12ef'`. If we use an escape
   // like `'x3b'` then we need to be careful with the partition key `x`, as it
   // might be followed by `3b`. We avoid this problem without lookahead by
   // encoding `x` as an escape (i.e. `'x78'`) if there is another `x`-escape.

   const xCode = 0x78;
   const uCode = 0x75;

   bool hasX = false;
   bool hasU = false;

   int xEscapes = 0;
   int uEscapes = 0;
   for (int key in keys) {
     if (_isIdentifier(key, atStart)) {
       if (key == xCode) hasX = true;
       if (key == uCode) hasU = true;
     } else if (key < 256) {
       xEscapes++;
     } else {
       uEscapes++;
     }
   }

   Map<int, String> encoding = {};
   bool escapeToUnderscore = false;

   if (uEscapes + xEscapes <= 1) {
     escapeToUnderscore = true;
     xEscapes = uEscapes = 0;
   }

   if (uEscapes > 0 && hasU) {
     encoding[uCode] = 'x75';
     xEscapes++;
   }
   if (xEscapes > 0 && hasX) {
     encoding[xCode] = 'x78';
   }

   for (int key in keys) {
     if (encoding.containsKey(key)) continue;
     if (_isIdentifier(key, atStart)) {
       encoding[key] = String.fromCharCode(key);
     } else {
       if (escapeToUnderscore) {
         encoding[key] = '_';
       } else if (key < 256) {
         encoding[key] = 'x' + key.toRadixString(16).padLeft(2, '0');
       } else {
         encoding[key] = 'u' + key.toRadixString(16).padLeft(4, '0');
       }
     }
   }

   return encoding;
 }

 bool _isIdentifier(int codeUnit, bool atStart) {
   return atStart ? _isAsciiAlpha(codeUnit) : _isAsciiAlphanumeric(codeUnit);
 }

 bool _isAsciiAlphanumeric(int codeUnit) {
   return $0 <= codeUnit && codeUnit <= $9 || _isAsciiAlpha(codeUnit);
 }

 bool _isAsciiAlpha(int codeUnit) {
   return $A <= codeUnit && codeUnit <= $Z || $a <= codeUnit && codeUnit <= $z;
 }
	// Copyright (c) 2020, the Dart project authors. Please see the AUTHORS file
	// for details. All rights reserved. Use of this source code is governed by a
	// BSD-style license that can be found in the LICENSE file.

	import 'package:front_end/src/api_unstable/dart2js.dart'
	show $0, $9, $A, $Z, $a, $z;

	/// Returns a list of strings that are short valid identifiers based on the
	/// corresponding full strings in [strings].
	///
	/// [strings] must not contain the empty string.
	/// [strings] must have no duplicates.
	List<String> abbreviateToIdentifiers(Iterable<String> strings,
	{int minLength = 6}) {
	var nodes = [for (final string in strings) _Node(string)];
	_partition(nodes, minLength, [], 0);
	return [for (final node in nodes) node.assignment];
	}

	class _Node {
	final String string;
	String assignment;
	_Node(this.string);
	}

	/// Walk the prefix tree or TRIE of [nodes], assigning compressed names built
	/// from the distinguishing characters along the path.
	///
	/// - [index] is the position of the first potentially different character,
	/// i.e. the current TRIE depth.
	/// - [path] contains the prefix of the compressed name at this depth.
	/// - Path compression starts after the first [minLength] characters.
	void _partition(
	List<_Node> nodes, int minLength, List<String> path, int index) {
	while (true) {
	// Handle trivial partitions.
	if (nodes.length == 0) return;
	if (nodes.length == 1 && path.length >= minLength) {
	String name = path.join();
	assert(name.isNotEmpty);
	nodes.single.assignment = name;
	return;
	}

	// Partition on the code unit at position [index], setting [terminating] if
	// some string ends at this length;
	Map<int, List<_Node>> partition = {};
	_Node terminating;

	for (final node in nodes) {
	String string = node.string;
	assert(string.length > 0);
	if (index < string.length) {
	int codeUnit = string.codeUnitAt(index);
	(partition[codeUnit] ??= []).add(node);
	} else {
	assert(terminating == null); // i.e. no duplicates.
	terminating = node;
	}
	}

	if (terminating != null) {
	terminating.assignment = path.join();
	}

	if (partition.length == 0) return;

	if (partition.length > 1) {
	var keys = partition.keys.toList();
	var keyEncodings = _discriminators(keys, path.isEmpty);
	for (int key in keys) {
	var children = partition[key];
	var discriminator = keyEncodings[key];
	_partition(children, minLength, [...path, discriminator], index + 1);
	}
	return;
	}

	assert(partition.length == 1);
	// All the strings have the same code unit at [index]. Use iteration to
	// find next partition point to avoid recursing down the whole string
	// length. The path is compressed by omitting to add fragments to [path]
	// unless we are near the start of the string.

	int codeUnit = partition.keys.single;
	// Add some characters to name to distinguish from terminating strings.
	// Add the first few legal identifier characters of the string regardless.
	if (terminating != null \|\| path.length < minLength) {
	path.add(_isIdentifier(codeUnit, path.isEmpty)
	? String.fromCharCode(codeUnit)
	: '_');
	}
	nodes = partition.values.single;
	index += 1;
	}
	}

	Map<int, String> _discriminators(List<int> keys, bool atStart) {
	// Assign each partition a distinguishing short string. If the partition key
	// is a valid identifier character, it can be used, otherwise we could use
	// `'_'` or an escaped code like `'x3b'` or `'u12ef'`. If we use an escape
	// like `'x3b'` then we need to be careful with the partition key `x`, as it
	// might be followed by `3b`. We avoid this problem without lookahead by
	// encoding `x` as an escape (i.e. `'x78'`) if there is another `x`-escape.

	const xCode = 0x78;
	const uCode = 0x75;

	bool hasX = false;
	bool hasU = false;

	int xEscapes = 0;
	int uEscapes = 0;
	for (int key in keys) {
	if (_isIdentifier(key, atStart)) {
	if (key == xCode) hasX = true;
	if (key == uCode) hasU = true;
	} else if (key < 256) {
	xEscapes++;
	} else {
	uEscapes++;
	}
	}

	Map<int, String> encoding = {};
	bool escapeToUnderscore = false;

	if (uEscapes + xEscapes <= 1) {
	escapeToUnderscore = true;
	xEscapes = uEscapes = 0;
	}

	if (uEscapes > 0 && hasU) {
	encoding[uCode] = 'x75';
	xEscapes++;
	}
	if (xEscapes > 0 && hasX) {
	encoding[xCode] = 'x78';
	}

	for (int key in keys) {
	if (encoding.containsKey(key)) continue;
	if (_isIdentifier(key, atStart)) {
	encoding[key] = String.fromCharCode(key);
	} else {
	if (escapeToUnderscore) {
	encoding[key] = '_';
	} else if (key < 256) {
	encoding[key] = 'x' + key.toRadixString(16).padLeft(2, '0');
	} else {
	encoding[key] = 'u' + key.toRadixString(16).padLeft(4, '0');
	}
	}
	}

	return encoding;
	}

	bool _isIdentifier(int codeUnit, bool atStart) {
	return atStart ? _isAsciiAlpha(codeUnit) : _isAsciiAlphanumeric(codeUnit);
	}

	bool _isAsciiAlphanumeric(int codeUnit) {
	return $0 <= codeUnit && codeUnit <= $9 \|\| _isAsciiAlpha(codeUnit);
	}

	bool _isAsciiAlpha(int codeUnit) {
	return $A <= codeUnit && codeUnit <= $Z \|\| $a <= codeUnit && codeUnit <= $z;
	}