// Copyright (c) 2020, the Dart project authors. Please see the AUTHORS file | |
// for details. All rights reserved. Use of this source is governed by a | |
// BSD-style license that can be found in the LICENSE file. | |
import "dart:io"; // For writing directly to file. | |
import 'package:charcode/ascii.dart'; | |
import "src/uflags.dart"; | |
/// Generates Dart constant declarations for character codes. | |
/// | |
/// Usage: | |
/// ```text | |
/// charcode (character-range | rename | flags)* | |
/// ``` | |
/// | |
/// ## Character ranges | |
/// | |
/// A `character-range` is either | |
/// * a single charater, | |
/// * an escaped character `\n`, `\r`, `\t`, `\xHH`, `\uHHHH` or `\DD*`, | |
/// where `H` is a hexadecimal digit and `D` is a decimal digit. | |
/// * two characters separated by `-` or the `\d`, `\w` or `\s` escapes, or | |
/// * a sequence of character ranges. | |
/// Examples include `a`, `a-z`, or `abe-gz`. | |
/// Accepts `\n`, `\r`, `\t`, `\`, `\-`, `\xHH` and `\uHHHHHH` escapes. | |
/// | |
/// A (re)name declaration is a single or escaped character, a `=` and an | |
/// identifier name, optionally followed by a `:` and a description | |
/// Example: `x=cross:"A cross product."`. | |
/// The naming names or renames the character and adds or changes | |
/// the associated description. | |
/// It will not add the character to the output. | |
/// Following occurrences of the character will use the new name. | |
/// Example: `y y=why x-z` will generate `$y` for the character | |
/// code of "y", then `$x`, `$why` and `$z` as well. | |
/// | |
/// ## Flags | |
/// | |
/// `-o` followed by a file name makes output be written to that file | |
/// instead of to stdout. | |
/// | |
/// `-f` followed by a file name will read declarations from that file | |
/// as well as from the command line. Each line in the file is handled | |
/// like a command line argument (not including flags.) | |
/// Intended for importing a set of name declarations. | |
/// The line is not split on spaces, so it allows comments with spaces | |
/// like: | |
/// ```text | |
/// x=cross:The cross character. | |
/// ``` | |
/// | |
/// `-p` followed by zero more characters makes those characters be | |
/// used as prefix for later generated names. The default is the `$` | |
/// character. If setting an empty prefix, names starting with digits | |
/// will still use the previously configured prefix character since | |
/// identifiers cannot start with a digit. | |
/// | |
/// `-h` includes declarations for HTML entity names. | |
/// | |
/// `--` disables flags in the following command line arguments. | |
void main(List<String> args) { | |
String? outputFile; | |
var declarations = CharcodeBuilder(); | |
addAscii(declarations); | |
var flags = Flags<String>() | |
..add(FlagConfig('?', null, 'help', | |
description: "Display this usage information")) | |
..add(FlagConfig.optionalParameter("p", "p", "prefix", r"$", | |
description: "Sets prefix for later generated constants", | |
valueDescription: "PREFIX")) | |
..add(FlagConfig.requiredParameter("o", "o", "output", | |
description: "Write generated code to file instead of stdout", | |
valueDescription: "FILE")) | |
..add(FlagConfig.requiredParameter("f", "f", "input", | |
description: | |
"Read instructions from file. Each line of the file is treated as a non-flag command line entry.", | |
valueDescription: "FILE")) | |
..add(FlagConfig("h", "h", "html", | |
description: "Include HTML entity names in predefined names")); | |
for (var arg in parseFlags(flags, args, stderr.writeln)) { | |
var key = arg.key; | |
if (key == null) { | |
// Not a flag, value is command line argument. | |
declarations.parse(arg.value!); | |
} else { | |
switch (key) { | |
case "p": | |
var prefix = arg.value!; | |
if (prefix.isNotEmpty && | |
(!_isIdentifierPart(prefix) || !_startsWithNonDigit(prefix))) { | |
warn("Invalid prefix, must be valid identifier: $prefix"); | |
continue; | |
} | |
declarations.prefix = prefix; | |
break; | |
case "o": | |
if (arg.value != null) { | |
outputFile = arg.value; | |
} else { | |
warn("No file name after `-o` option"); | |
} | |
break; | |
case "f": | |
var inputFile = arg.value; | |
if (inputFile != null) { | |
var input = File(inputFile).readAsLinesSync(); | |
for (var line in input) { | |
line = line.trimRight(); | |
if (line.isNotEmpty) declarations.parse(line); | |
} | |
} else { | |
warn("No file name after `-f` option"); | |
} | |
break; | |
case "h": | |
addHtmlEntities(declarations); | |
break; | |
case "?": | |
var buffer = StringBuffer(usageString); | |
flags.writeUsage(buffer); | |
stdout | |
..write(buffer) | |
..flush(); | |
return; | |
} | |
} | |
} | |
IOSink output = stdout; | |
if (outputFile != null) { | |
output = File(outputFile).openWrite(); | |
} | |
declarations.writeTo(output); | |
if (outputFile != null) { | |
output.close(); | |
} | |
} | |
void warn(String warning) { | |
stderr.writeln(warning); | |
} | |
/// A single declaration to be written to the output. | |
class Entry implements Comparable<Entry> { | |
/// The code point. | |
final int charCode; | |
/// The non-prefixed name to be given | |
final String name; | |
/// The prefix to put before the name. | |
/// | |
/// The prefix may be empty, but it won't be if | |
/// the name be used as an identifier by itself | |
/// (which means it starts with a digit). | |
final String prefix; | |
/// Any documentation to add to the declaration. | |
final String? comment; | |
Entry(this.charCode, this.name, this.prefix, this.comment); | |
/// Emits a declaration to [output]. | |
/// | |
/// The constant declaration declares the identifier [prefix]+[name] | |
/// to have the value [codePoint]. | |
void writeTo(IOSink output) { | |
var hex = charCode.toRadixString(16).padLeft(2, "0"); | |
if (comment != null) { | |
output.writeln("/// $comment"); | |
} | |
output.writeln("const int $prefix$name = 0x$hex;"); | |
} | |
/// Orders entries by [charCode] first, then by | |
/// prefixed and name. | |
int compareTo(Entry other) { | |
var delta = charCode - other.charCode; | |
if (delta == 0) { | |
delta = prefix.compareTo(other.prefix); | |
if (delta == 0) { | |
delta = name.compareTo(other.name); | |
} | |
} | |
return delta; | |
} | |
} | |
/// A name and optional description currently assigned to a code point. | |
class CharacterDeclaration { | |
/// The unprefixed name to use for the code point. | |
final String name; | |
/// An optional description. | |
final String? description; | |
CharacterDeclaration(this.name, this.description); | |
} | |
/// Character code declaration builder. | |
/// | |
/// Remembers names and descriptions assigned to code points, | |
/// and accumulates requestes for declarations for a number | |
/// of those. | |
/// Allows the prefix to be changed between declaration | |
/// requests. | |
class CharcodeBuilder { | |
/// Prefix used for all characters being added. | |
String _prefix = r"$"; | |
/// Reserve prefix used for digits when [_prefix] is empty. | |
String _reservePrefix = r"$"; | |
final Map<int, CharacterDeclaration> _declarations = {}; | |
final Set<String> _entryNames = {}; | |
final List<Entry> _entries = []; | |
void parse(String arg) { | |
var scanner = CharScanner(arg); | |
var allowRename = true; | |
while (true) { | |
var start = scanner.index; | |
var char = scanner.next(); | |
if (char < 0) break; | |
if (char == $backslash) { | |
var escape = interpretCharEscape(scanner); | |
if (escape >= 0) { | |
char == escape; | |
} else if (interpretRangeEscape(scanner)) { | |
allowRename = false; | |
continue; | |
} | |
} | |
var peek = scanner.peek; | |
if (allowRename && peek == $equal) { | |
// A rename. | |
scanner.skip(); | |
var newName = scanner.rest; | |
if (newName.isEmpty || !_isIdentifierPart(newName)) { | |
warn("Invalid name: $arg"); | |
} else { | |
var colon = newName.indexOf(":"); | |
if (colon >= 0) { | |
var description = newName.substring(colon + 1); | |
newName = newName.substring(0, colon); | |
rename(char, newName, description); | |
} else { | |
rename(char, newName); | |
} | |
} | |
break; | |
} | |
allowRename = false; | |
if (peek != $minus) { | |
addChar(char); | |
} else { | |
var afterChar = scanner.index; | |
scanner.skip(); | |
var secondChar = scanner.next(); | |
if (secondChar < 0) { | |
// Trailing `-`. | |
addChar(char); | |
addChar(peek); | |
continue; | |
} | |
if (secondChar == $backslash) { | |
var escape = interpretCharEscape(scanner); | |
if (escape >= 0) secondChar = escape; | |
} | |
if (char <= secondChar) { | |
addCharRange(char, secondChar); | |
} else { | |
warn( | |
"Invalid range, end befroe start: ${scanner.input.substring(start, scanner.index)}"); | |
addChar(char); | |
scanner.index = afterChar; | |
} | |
} | |
} | |
} | |
/// Recognize single-character escapes. | |
/// | |
/// - `\n`: Line feed | |
/// - `\r`: Carriage return | |
/// - `\t`: Tab | |
/// - `\x..`: Hex escape, up to two digits. | |
/// - `\u......`: Hex escape, up to six digits. | |
/// - `\[0-9]+`: Decimal escape. | |
/// - `\-` and `\\`: Identitiy escapes. | |
static int interpretCharEscape(CharScanner scanner) { | |
if (scanner.hasNext) { | |
var char = scanner.next(); | |
if (char == $n) return $lf; | |
if (char == $r) return $cr; | |
if (char == $t) return $tab; | |
if (char == $u) return scanner.hex(6); | |
if (char == $x) return scanner.hex(2); | |
if (char == $backslash) return char; | |
if (char == $minus) return char; | |
if (_isDigit(char)) return scanner.dec(char); | |
scanner.index--; | |
} | |
return -1; | |
} | |
/// Renames `charCode` to `name` in following declarations. | |
/// | |
/// Returns whether the name is valid as an identifier. | |
/// If it is not valid, no change is made. | |
bool rename(int charCode, String name, [String? description]) { | |
if (!_isIdentifierPart(name)) return false; | |
_declarations.putIfAbsent( | |
charCode, () => CharacterDeclaration(name, description)); | |
return true; | |
} | |
/// Adds a single character to the output. | |
bool addChar(int charCode) { | |
var declaration = _declarations[charCode]; | |
if (declaration == null) return false; | |
var name = declaration.name; | |
if (_entryNames.contains(name)) { | |
warn("Existing declaration with name \"$name\" for character " | |
"'${String.fromCharCode(charCode)}' " | |
"(U+${charCode.toRadixString(16).toUpperCase().padLeft(4, "0")})"); | |
return false; | |
} | |
var prefix = _prefix; | |
if (!_startsWithNonDigit(declaration.name)) prefix = _reservePrefix; | |
_entries.add(Entry(charCode, name, prefix, declaration.description)); | |
_entryNames.add(name); | |
return true; | |
} | |
/// Adds a range of characters, both ends inclusive. | |
/// | |
/// If a character in the range has no name, nothing is emitted fo rit. | |
void addCharRange(int start, int end) { | |
for (var charCode = start; charCode <= end; charCode++) { | |
addChar(charCode); // Ignore return value. | |
} | |
} | |
/// The current prefix used to prefix declarations. | |
/// | |
/// May be emtpy, otherwise needs to be a valid identifier. | |
String get prefix => _prefix; | |
set prefix(String prefix) { | |
if (!_isIdentifierPart(prefix) || | |
(prefix.isNotEmpty && !_startsWithNonDigit(prefix))) { | |
throw ArgumentError.value(prefix, "Not a valid identifier start"); | |
} | |
_prefix = prefix; | |
if (prefix.isNotEmpty) { | |
_reservePrefix = prefix; | |
} | |
} | |
void writeTo(IOSink output) { | |
_entries.sort((a, b) => a.charCode - b.charCode); | |
var separator = ""; | |
for (var entry in _entries) { | |
output.write(separator); | |
entry.writeTo(output); | |
separator = "\n"; | |
} | |
} | |
bool interpretRangeEscape(CharScanner scanner) { | |
if (!scanner.hasNext) return false; | |
var char = scanner.peek; | |
if (char == $d) { | |
addCharRange($0, $9); | |
} else if (char == $s) { | |
addChar($space); | |
addChar($tab); | |
addChar($cr); | |
addChar($lf); | |
} else if (char == $w) { | |
addCharRange($0, $9); | |
addCharRange($A, $Z); | |
addCharRange($a, $z); | |
addChar($_); | |
addChar($$); | |
} else { | |
return false; | |
} | |
scanner.skip(); | |
return true; | |
} | |
} | |
/// Whether the string starts with a non-digit. | |
/// | |
/// Only used on strings that have passed [_isIdentifierPart], | |
/// and if those start with a non-digit, the string is a valid identifier. | |
bool _startsWithNonDigit(String string) => | |
string.isNotEmpty && (string.codeUnitAt(0) ^ $0) > 9; | |
/// Whether [string] contains only characters that can be in an identifier. | |
bool _isIdentifierPart(String string) { | |
for (var i = 0; i < string.length; i++) { | |
var char = string.codeUnitAt(i); | |
if (char ^ $0 <= 9) continue; | |
var lowerCaseChar = char | 0x20; | |
if (lowerCaseChar >= $a && lowerCaseChar <= $z) continue; | |
if (char == $$ || char == $_) continue; | |
return false; | |
} | |
return true; | |
} | |
class CharScanner { | |
final String input; | |
int index = 0; | |
CharScanner(this.input); | |
// The next character of input. | |
int get peek => index < input.length ? input.codeUnitAt(index) : -1; | |
bool get hasNext => index < input.length; | |
void skip() { | |
if (index >= input.length) throw StateError("Nothing to skip"); | |
index++; | |
} | |
String get rest => input.substring(index); | |
int next() => index < input.length ? input.codeUnitAt(index++) : -1; | |
/// Parses a hex number with at most [maxDigits] digits. | |
int hex(int maxDigits) { | |
var result = 0; | |
while (maxDigits > 0 && index < input.length) { | |
var char = input.codeUnitAt(index); | |
var hexValue = _hexValue(char); | |
if (hexValue < 0) break; | |
result = result * 16 + hexValue; | |
maxDigits--; | |
index++; | |
} | |
return result; | |
} | |
/// Parses a decimal number with the first digit already read. | |
int dec(int firstDigit) { | |
var result = firstDigit ^ 0x30; | |
while (index < input.length) { | |
var char = input.codeUnitAt(index); | |
var digit = char ^ 0x30; | |
if (digit > 9) break; | |
result = result * 10 + digit; | |
index++; | |
} | |
return result; | |
} | |
static int _hexValue(int char) { | |
var digit = char ^ 0x30; | |
if (digit <= 9) return digit; | |
var lc = (char | 0x20); | |
if (lc >= 0x61 && lc <= 0x66) return lc - (0x61 - 10); | |
return -1; | |
} | |
} | |
bool _isDigit(int char) => (char ^ 0x30) <= 9; | |
void addAscii(CharcodeBuilder descriptions) { | |
// ASCII character comments and default names. | |
descriptions | |
// Control characters | |
..rename(0x00, "nul", '"Null character" control character.') | |
..rename(0x01, "soh", '"Start of Header" control character.') | |
..rename(0x02, "stx", '"Start of Text" control character.') | |
..rename(0x03, "etx", '"End of Text" control character.') | |
..rename(0x04, "eot", '"End of Transmission" control character.') | |
..rename(0x05, "enq", '"Enquiry" control character.') | |
..rename(0x06, "ack", '"Acknowledgment" control character.') | |
..rename(0x07, "bel", '"Bell" control character.') | |
..rename(0x08, "bs", '"Backspace" control character.') | |
..rename(0x09, "tab", '"Horizontal Tab" control character, common name.') | |
..rename(0x0A, "lf", '"Line feed" control character.') | |
..rename(0x0B, "vt", '"Vertical Tab" control character.') | |
..rename(0x0C, "ff", '"Form feed" control character.') | |
..rename(0x0D, "cr", '"Carriage return" control character.') | |
..rename(0x0E, "so", '"Shift Out" control character.') | |
..rename(0x0F, "si", '"Shift In" control character.') | |
..rename(0x10, "dle", '"Data Link Escape" control character.') | |
..rename(0x11, "dc1", '"Device Control 1" control character (oft. XON).') | |
..rename(0x12, "dc2", '"Device Control 2" control character.') | |
..rename(0x13, "dc3", '"Device Control 3" control character (oft. XOFF).') | |
..rename(0x14, "dc4", '"Device Control 4" control character.') | |
..rename(0x15, "nak", '"Negative Acknowledgment" control character.') | |
..rename(0x16, "syn", '"Synchronous idle" control character.') | |
..rename(0x17, "etb", '"End of Transmission Block" control character.') | |
..rename(0x18, "can", '"Cancel" control character.') | |
..rename(0x19, "em", '"End of Medium" control character.') | |
..rename(0x1A, "sub", '"Substitute" control character.') | |
..rename(0x1B, "esc", '"Escape" control character.') | |
..rename(0x1C, "fs", '"File Separator" control character.') | |
..rename(0x1D, "gs", '"Group Separator" control character.') | |
..rename(0x1E, "rs", '"Record Separator" control character.') | |
..rename(0x1F, "us", '"Unit Separator" control character.') | |
// Visible characters. | |
..rename(0x20, "space", 'Space character.') | |
..rename(0x21, "exclamation", 'Character `!`.') | |
..rename(0x22, "quot", 'Character `"`, short name.') | |
..rename(0x23, "hash", 'Character `#`.') | |
..rename(0x24, "\$", 'Character `\$`.') | |
..rename(0x25, "percent", 'Character `%`.') | |
..rename(0x26, "amp", 'Character `&`, short name.') | |
..rename(0x27, "apos", 'Character "\'".') | |
..rename(0x28, "lparen", 'Character `(`.') | |
..rename(0x29, "rparen", 'Character `)`.') | |
..rename(0x2A, "asterisk", 'Character `*`.') | |
..rename(0x2B, "plus", 'Character `+`.') | |
..rename(0x2C, "comma", 'Character `,`.') | |
..rename(0x2D, "minus", 'Character `-`.') | |
..rename(0x2E, "dot", 'Character `.`.') | |
..rename(0x2F, "slash", 'Character `/`.') | |
..rename(0x30, "0", 'Character `0`.') | |
..rename(0x31, "1", 'Character `1`.') | |
..rename(0x32, "2", 'Character `2`.') | |
..rename(0x33, "3", 'Character `3`.') | |
..rename(0x34, "4", 'Character `4`.') | |
..rename(0x35, "5", 'Character `5`.') | |
..rename(0x36, "6", 'Character `6`.') | |
..rename(0x37, "7", 'Character `7`.') | |
..rename(0x38, "8", 'Character `8`.') | |
..rename(0x39, "9", 'Character `9`.') | |
..rename(0x3A, "colon", 'Character `:`.') | |
..rename(0x3B, "semicolon", 'Character `;`.') | |
..rename(0x3C, "lt", 'Character `<`.') | |
..rename(0x3D, "equal", 'Character `<`.') | |
..rename(0x3E, "gt", 'Character `>`.') | |
..rename(0x3F, "question", 'Character `?`.') | |
..rename(0x40, "at", 'Character `@`.') | |
..rename(0x41, "A", 'Character `A`.') | |
..rename(0x42, "B", 'Character `B`.') | |
..rename(0x43, "C", 'Character `C`.') | |
..rename(0x44, "D", 'Character `D`.') | |
..rename(0x45, "E", 'Character `E`.') | |
..rename(0x46, "F", 'Character `F`.') | |
..rename(0x47, "G", 'Character `G`.') | |
..rename(0x48, "H", 'Character `H`.') | |
..rename(0x49, "I", 'Character `I`.') | |
..rename(0x4A, "J", 'Character `J`.') | |
..rename(0x4B, "K", 'Character `K`.') | |
..rename(0x4C, "L", 'Character `L`.') | |
..rename(0x4D, "M", 'Character `M`.') | |
..rename(0x4E, "N", 'Character `N`.') | |
..rename(0x4F, "O", 'Character `O`.') | |
..rename(0x50, "P", 'Character `P`.') | |
..rename(0x51, "Q", 'Character `Q`.') | |
..rename(0x52, "R", 'Character `R`.') | |
..rename(0x53, "S", 'Character `S`.') | |
..rename(0x54, "T", 'Character `T`.') | |
..rename(0x55, "U", 'Character `U`.') | |
..rename(0x56, "V", 'Character `V`.') | |
..rename(0x57, "W", 'Character `W`.') | |
..rename(0x58, "X", 'Character `X`.') | |
..rename(0x59, "Y", 'Character `Y`.') | |
..rename(0x5A, "Z", 'Character `Z`.') | |
..rename(0x5B, "lbracket", 'Character `[`.') | |
..rename(0x5C, "backslash", r'Character `\`.') | |
..rename(0x5D, "rbracket", 'Character `]`.') | |
..rename(0x5E, "caret", 'Character `^`.') | |
..rename(0x5F, "_", 'Character `_`.') | |
..rename(0x60, "backquote", 'Character `` ` ``.') | |
..rename(0x61, "a", 'Character `a`.') | |
..rename(0x62, "b", 'Character `b`.') | |
..rename(0x63, "c", 'Character `c`.') | |
..rename(0x64, "d", 'Character `d`.') | |
..rename(0x65, "e", 'Character `e`.') | |
..rename(0x66, "f", 'Character `f`.') | |
..rename(0x67, "g", 'Character `g`.') | |
..rename(0x68, "h", 'Character `h`.') | |
..rename(0x69, "i", 'Character `i`.') | |
..rename(0x6A, "j", 'Character `j`.') | |
..rename(0x6B, "k", 'Character `k`.') | |
..rename(0x6C, "l", 'Character `l`.') | |
..rename(0x6D, "m", 'Character `m`.') | |
..rename(0x6E, "n", 'Character `n`.') | |
..rename(0x6F, "o", 'Character `o`.') | |
..rename(0x70, "p", 'Character `p`.') | |
..rename(0x71, "q", 'Character `q`.') | |
..rename(0x72, "r", 'Character `r`.') | |
..rename(0x73, "s", 'Character `s`.') | |
..rename(0x74, "t", 'Character `t`.') | |
..rename(0x75, "u", 'Character `u`.') | |
..rename(0x76, "v", 'Character `v`.') | |
..rename(0x77, "w", 'Character `w`.') | |
..rename(0x78, "x", 'Character `x`.') | |
..rename(0x79, "y", 'Character `y`.') | |
..rename(0x7A, "z", 'Character `z`.') | |
..rename(0x7B, "lbrace", 'Character `{`.') | |
..rename(0x7C, "bar", 'Character `|`.') | |
..rename(0x7D, "rbrace", 'Character `}`.') | |
..rename(0x7E, "tilde", 'Character `~`.') | |
// Control character | |
..rename(0x7F, "del", '"Delete" control character.'); | |
} | |
void addHtmlEntities(CharcodeBuilder descriptions) { | |
descriptions | |
..rename(0x22, "quot", "Character '\"', short name.") | |
..rename(0x27, "apos", "Character \"'\".") | |
..rename(0x3C, "lt", "Character '<'.") | |
..rename(0x3E, "gt", "Character '>'.") | |
..rename(0x00A0, "nbsp", "no-break space (non-breaking space)") | |
..rename(0x00A1, "iexcl", "inverted exclamation mark ('¡')") | |
..rename(0x00A2, "cent", "cent sign ('¢')") | |
..rename(0x00A3, "pound", "pound sign ('£')") | |
..rename(0x00A4, "curren", "currency sign ('¤')") | |
..rename(0x00A5, "yen", "yen sign (yuan sign) ('¥')") | |
..rename(0x00A6, "brvbar", "broken bar (broken vertical bar) ('¦')") | |
..rename(0x00A7, "sect", "section sign ('§')") | |
..rename( | |
0x00A8, "uml", "diaeresis (spacing diaeresis); see Germanic umlaut ('¨')") | |
..rename(0x00A9, "copy", "copyright symbol ('©')") | |
..rename(0x00AA, "ordf", "feminine ordinal indicator ('ª')") | |
..rename(0x00AB, "laquo", | |
"left-pointing double angle quotation mark (left pointing guillemet) ('«')") | |
..rename(0x00AC, "not", "not sign ('¬')") | |
..rename(0x00AD, "shy", "soft hyphen (discretionary hyphen)") | |
..rename( | |
0x00AE, "reg", "registered sign (registered trademark symbol) ('®')") | |
..rename( | |
0x00AF, "macr", "macron (spacing macron, overline, APL overbar) ('¯')") | |
..rename(0x00B0, "deg", "degree symbol ('°')") | |
..rename(0x00B1, "plusmn", "plus-minus sign (plus-or-minus sign) ('±')") | |
..rename( | |
0x00B2, "sup2", "superscript two (superscript digit two, squared) ('²')") | |
..rename(0x00B3, "sup3", | |
"superscript three (superscript digit three, cubed) ('³')") | |
..rename(0x00B4, "acute", "acute accent (spacing acute) ('´')") | |
..rename(0x00B5, "micro", "micro sign ('µ')") | |
..rename(0x00B6, "para", "pilcrow sign (paragraph sign) ('¶')") | |
..rename( | |
0x00B7, "middot", "middle dot (Georgian comma, Greek middle dot) ('·')") | |
..rename(0x00B8, "cedil", "cedilla (spacing cedilla) ('¸')") | |
..rename(0x00B9, "sup1", "superscript one (superscript digit one) ('¹')") | |
..rename(0x00BA, "ordm", "masculine ordinal indicator ('º')") | |
..rename(0x00BB, "raquo", | |
"right-pointing double angle quotation mark (right pointing guillemet) ('»')") | |
..rename(0x00BC, "frac14", | |
"vulgar fraction one quarter (fraction one quarter) ('¼')") | |
..rename( | |
0x00BD, "frac12", "vulgar fraction one half (fraction one half) ('½')") | |
..rename(0x00BE, "frac34", | |
"vulgar fraction three quarters (fraction three quarters) ('¾')") | |
..rename( | |
0x00BF, "iquest", "inverted question mark (turned question mark) ('¿')") | |
..rename(0x00C0, "Agrave", | |
"Latin capital letter A with grave accent (Latin capital letter A grave) ('À')") | |
..rename(0x00C1, "Aacute", "Latin capital letter A with acute accent ('Á')") | |
..rename(0x00C2, "Acirc", "Latin capital letter A with circumflex ('Â')") | |
..rename(0x00C3, "Atilde", "Latin capital letter A with tilde ('Ã')") | |
..rename(0x00C4, "Auml", "Latin capital letter A with diaeresis ('Ä')") | |
..rename(0x00C5, "Aring", | |
"Latin capital letter A with ring above (Latin capital letter A ring) ('Å')") | |
..rename(0x00C6, "AElig", | |
"Latin capital letter AE (Latin capital ligature AE) ('Æ')") | |
..rename(0x00C7, "Ccedil", "Latin capital letter C with cedilla ('Ç')") | |
..rename(0x00C8, "Egrave", "Latin capital letter E with grave accent ('È')") | |
..rename(0x00C9, "Eacute", "Latin capital letter E with acute accent ('É')") | |
..rename(0x00CA, "Ecirc", "Latin capital letter E with circumflex ('Ê')") | |
..rename(0x00CB, "Euml", "Latin capital letter E with diaeresis ('Ë')") | |
..rename(0x00CC, "Igrave", "Latin capital letter I with grave accent ('Ì')") | |
..rename(0x00CD, "Iacute", "Latin capital letter I with acute accent ('Í')") | |
..rename(0x00CE, "Icirc", "Latin capital letter I with circumflex ('Î')") | |
..rename(0x00CF, "Iuml", "Latin capital letter I with diaeresis ('Ï')") | |
..rename(0x00D0, "ETH", "Latin capital letter Eth ('Ð')") | |
..rename(0x00D1, "Ntilde", "Latin capital letter N with tilde ('Ñ')") | |
..rename(0x00D2, "Ograve", "Latin capital letter O with grave accent ('Ò')") | |
..rename(0x00D3, "Oacute", "Latin capital letter O with acute accent ('Ó')") | |
..rename(0x00D4, "Ocirc", "Latin capital letter O with circumflex ('Ô')") | |
..rename(0x00D5, "Otilde", "Latin capital letter O with tilde ('Õ')") | |
..rename(0x00D6, "Ouml", "Latin capital letter O with diaeresis ('Ö')") | |
..rename(0x00D7, "times", "multiplication sign ('×')") | |
..rename(0x00D8, "Oslash", | |
"Latin capital letter O with stroke (Latin capital letter O slash) ('Ø')") | |
..rename(0x00D9, "Ugrave", "Latin capital letter U with grave accent ('Ù')") | |
..rename(0x00DA, "Uacute", "Latin capital letter U with acute accent ('Ú')") | |
..rename(0x00DB, "Ucirc", "Latin capital letter U with circumflex ('Û')") | |
..rename(0x00DC, "Uuml", "Latin capital letter U with diaeresis ('Ü')") | |
..rename(0x00DD, "Yacute", "Latin capital letter Y with acute accent ('Ý')") | |
..rename(0x00DE, "THORN", "Latin capital letter THORN ('Þ')") | |
..rename(0x00DF, "szlig", | |
"Latin small letter sharp s (ess-zed); see German Eszett ('ß')") | |
..rename(0x00E0, "agrave", "Latin small letter a with grave accent ('à')") | |
..rename(0x00E1, "aacute", "Latin small letter a with acute accent ('á')") | |
..rename(0x00E2, "acirc", "Latin small letter a with circumflex ('â')") | |
..rename(0x00E3, "atilde", "Latin small letter a with tilde ('ã')") | |
..rename(0x00E4, "auml", "Latin small letter a with diaeresis ('ä')") | |
..rename(0x00E5, "aring", "Latin small letter a with ring above ('å')") | |
..rename( | |
0x00E6, "aelig", "Latin small letter ae (Latin small ligature ae) ('æ')") | |
..rename(0x00E7, "ccedil", "Latin small letter c with cedilla ('ç')") | |
..rename(0x00E8, "egrave", "Latin small letter e with grave accent ('è')") | |
..rename(0x00E9, "eacute", "Latin small letter e with acute accent ('é')") | |
..rename(0x00EA, "ecirc", "Latin small letter e with circumflex ('ê')") | |
..rename(0x00EB, "euml", "Latin small letter e with diaeresis ('ë')") | |
..rename(0x00EC, "igrave", "Latin small letter i with grave accent ('ì')") | |
..rename(0x00ED, "iacute", "Latin small letter i with acute accent ('í')") | |
..rename(0x00EE, "icirc", "Latin small letter i with circumflex ('î')") | |
..rename(0x00EF, "iuml", "Latin small letter i with diaeresis ('ï')") | |
..rename(0x00F0, "eth", "Latin small letter eth ('ð')") | |
..rename(0x00F1, "ntilde", "Latin small letter n with tilde ('ñ')") | |
..rename(0x00F2, "ograve", "Latin small letter o with grave accent ('ò')") | |
..rename(0x00F3, "oacute", "Latin small letter o with acute accent ('ó')") | |
..rename(0x00F4, "ocirc", "Latin small letter o with circumflex ('ô')") | |
..rename(0x00F5, "otilde", "Latin small letter o with tilde ('õ')") | |
..rename(0x00F6, "ouml", "Latin small letter o with diaeresis ('ö')") | |
..rename(0x00F7, "divide", "division sign (obelus) ('÷')") | |
..rename(0x00F8, "oslash", | |
"Latin small letter o with stroke (Latin small letter o slash) ('ø')") | |
..rename(0x00F9, "ugrave", "Latin small letter u with grave accent ('ù')") | |
..rename(0x00FA, "uacute", "Latin small letter u with acute accent ('ú')") | |
..rename(0x00FB, "ucirc", "Latin small letter u with circumflex ('û')") | |
..rename(0x00FC, "uuml", "Latin small letter u with diaeresis ('ü')") | |
..rename(0x00FD, "yacute", "Latin small letter y with acute accent ('ý')") | |
..rename(0x00FE, "thorn", "Latin small letter thorn ('þ')") | |
..rename(0x00FF, "yuml", "Latin small letter y with diaeresis ('ÿ')") | |
..rename(0x0152, "OElig", "Latin capital ligature oe ('Œ')") | |
..rename(0x0153, "oelig", "Latin small ligature oe ('œ')") | |
..rename(0x0160, "Scaron", "Latin capital letter s with caron ('Š')") | |
..rename(0x0161, "scaron", "Latin small letter s with caron ('š')") | |
..rename(0x0178, "Yuml", "Latin capital letter y with diaeresis ('Ÿ')") | |
..rename(0x0192, "fnof", | |
"Latin small letter f with hook (function, florin) ('ƒ')") | |
..rename(0x02C6, "circ", "modifier letter circumflex accent ('ˆ')") | |
..rename(0x02DC, "tilde", "small tilde ('˜')") | |
..rename(0x0391, "Alpha", "Greek capital letter Alpha ('Α')") | |
..rename(0x0392, "Beta", "Greek capital letter Beta ('Β')") | |
..rename(0x0393, "Gamma", "Greek capital letter Gamma ('Γ')") | |
..rename(0x0394, "Delta", "Greek capital letter Delta ('Δ')") | |
..rename(0x0395, "Epsilon", "Greek capital letter Epsilon ('Ε')") | |
..rename(0x0396, "Zeta", "Greek capital letter Zeta ('Ζ')") | |
..rename(0x0397, "Eta", "Greek capital letter Eta ('Η')") | |
..rename(0x0398, "Theta", "Greek capital letter Theta ('Θ')") | |
..rename(0x0399, "Iota", "Greek capital letter Iota ('Ι')") | |
..rename(0x039A, "Kappa", "Greek capital letter Kappa ('Κ')") | |
..rename(0x039B, "Lambda", "Greek capital letter Lambda ('Λ')") | |
..rename(0x039C, "Mu", "Greek capital letter Mu ('Μ')") | |
..rename(0x039D, "Nu", "Greek capital letter Nu ('Ν')") | |
..rename(0x039E, "Xi", "Greek capital letter Xi ('Ξ')") | |
..rename(0x039F, "Omicron", "Greek capital letter Omicron ('Ο')") | |
..rename(0x03A0, "Pi", "Greek capital letter Pi ('Π')") | |
..rename(0x03A1, "Rho", "Greek capital letter Rho ('Ρ')") | |
..rename(0x03A3, "Sigma", "Greek capital letter Sigma ('Σ')") | |
..rename(0x03A4, "Tau", "Greek capital letter Tau ('Τ')") | |
..rename(0x03A5, "Upsilon", "Greek capital letter Upsilon ('Υ')") | |
..rename(0x03A6, "Phi", "Greek capital letter Phi ('Φ')") | |
..rename(0x03A7, "Chi", "Greek capital letter Chi ('Χ')") | |
..rename(0x03A8, "Psi", "Greek capital letter Psi ('Ψ')") | |
..rename(0x03A9, "Omega", "Greek capital letter Omega ('Ω')") | |
..rename(0x03B1, "alpha", "Greek small letter alpha ('α')") | |
..rename(0x03B2, "beta", "Greek small letter beta ('β')") | |
..rename(0x03B3, "gamma", "Greek small letter gamma ('γ')") | |
..rename(0x03B4, "delta", "Greek small letter delta ('δ')") | |
..rename(0x03B5, "epsilon", "Greek small letter epsilon ('ε')") | |
..rename(0x03B6, "zeta", "Greek small letter zeta ('ζ')") | |
..rename(0x03B7, "eta", "Greek small letter eta ('η')") | |
..rename(0x03B8, "theta", "Greek small letter theta ('θ')") | |
..rename(0x03B9, "iota", "Greek small letter iota ('ι')") | |
..rename(0x03BA, "kappa", "Greek small letter kappa ('κ')") | |
..rename(0x03BB, "lambda", "Greek small letter lambda ('λ')") | |
..rename(0x03BC, "mu", "Greek small letter mu ('μ')") | |
..rename(0x03BD, "nu", "Greek small letter nu ('ν')") | |
..rename(0x03BE, "xi", "Greek small letter xi ('ξ')") | |
..rename(0x03BF, "omicron", "Greek small letter omicron ('ο')") | |
..rename(0x03C0, "pi", "Greek small letter pi ('π')") | |
..rename(0x03C1, "rho", "Greek small letter rho ('ρ')") | |
..rename(0x03C2, "sigmaf", "Greek small letter final sigma ('ς')") | |
..rename(0x03C3, "sigma", "Greek small letter sigma ('σ')") | |
..rename(0x03C4, "tau", "Greek small letter tau ('τ')") | |
..rename(0x03C5, "upsilon", "Greek small letter upsilon ('υ')") | |
..rename(0x03C6, "phi", "Greek small letter phi ('φ')") | |
..rename(0x03C7, "chi", "Greek small letter chi ('χ')") | |
..rename(0x03C8, "psi", "Greek small letter psi ('ψ')") | |
..rename(0x03C9, "omega", "Greek small letter omega ('ω')") | |
..rename(0x03D1, "thetasym", "Greek theta symbol ('ϑ')") | |
..rename(0x03D2, "upsih", "Greek Upsilon with hook symbol ('ϒ')") | |
..rename(0x03D6, "piv", "Greek pi symbol ('ϖ')") | |
..rename(0x2002, "ensp", "en space") | |
..rename(0x2003, "emsp", "em space") | |
..rename(0x2009, "thinsp", "thin space") | |
..rename(0x200C, "zwnj", "zero-width non-joiner") | |
..rename(0x200D, "zwj", "zero-width joiner") | |
..rename(0x200E, "lrm", "left-to-right mark") | |
..rename(0x200F, "rlm", "right-to-left mark") | |
..rename(0x2013, "ndash", "en dash ('–')") | |
..rename(0x2014, "mdash", "em dash ('—')") | |
..rename(0x2018, "lsquo", "left single quotation mark ('‘')") | |
..rename(0x2019, "rsquo", "right single quotation mark ('’')") | |
..rename(0x201A, "sbquo", "single low-9 quotation mark ('‚')") | |
..rename(0x201C, "ldquo", "left double quotation mark ('“')") | |
..rename(0x201D, "rdquo", "right double quotation mark ('”')") | |
..rename(0x201E, "bdquo", "double low-9 quotation mark ('„')") | |
..rename(0x2020, "dagger", "dagger, obelisk ('†')") | |
..rename(0x2021, "Dagger", "double dagger, double obelisk ('‡')") | |
..rename(0x2022, "bull", "bullet (black small circle) ('•')") | |
..rename(0x2026, "hellip", "horizontal ellipsis (three dot leader) ('…')") | |
..rename(0x2030, "permil", "per mille sign ('‰')") | |
..rename(0x2032, "prime", "prime (minutes, feet) ('′')") | |
..rename(0x2033, "Prime", "double prime (seconds, inches) ('″')") | |
..rename( | |
0x2039, "lsaquo", "single left-pointing angle quotation mark ('‹')") | |
..rename(0x203A, "rsaquo", "single right-pointing angle quotation mark ('›')") | |
..rename(0x203E, "oline", "overline (spacing overscore) ('‾')") | |
..rename(0x2044, "frasl", "fraction slash (solidus) ('⁄')") | |
..rename(0x20AC, "euro", "euro sign ('€')") | |
..rename(0x2111, "image", "black-letter capital I (imaginary part) ('ℑ')") | |
..rename(0x2118, "weierp", "script capital P (power set, Weierstrass p) ('℘')") | |
..rename(0x211C, "real", "black-letter capital R (real part symbol) ('ℜ')") | |
..rename(0x2122, "trade", "trademark symbol ('™')") | |
..rename(0x2135, "alefsym", "alef symbol (first transfinite cardinal) ('ℵ')") | |
..rename(0x2190, "larr", "leftwards arrow ('←')") | |
..rename(0x2191, "uarr", "upwards arrow ('↑')") | |
..rename(0x2192, "rarr", "rightwards arrow ('→')") | |
..rename(0x2193, "darr", "downwards arrow ('↓')") | |
..rename(0x2194, "harr", "left right arrow ('↔')") | |
..rename(0x21B5, "crarr", "downwards arrow with corner leftwards (carriage return) ('↵')") | |
..rename(0x21D0, "lArr", "leftwards double arrow ('⇐')") | |
..rename(0x21D1, "uArr", "upwards double arrow ('⇑')") | |
..rename(0x21D2, "rArr", "rightwards double arrow ('⇒')") | |
..rename(0x21D3, "dArr", "downwards double arrow ('⇓')") | |
..rename(0x21D4, "hArr", "left right double arrow ('⇔')") | |
..rename(0x2200, "forall", "for all ('∀')") | |
..rename(0x2202, "part", "partial differential ('∂')") | |
..rename(0x2203, "exist", "there exists ('∃')") | |
..rename(0x2205, "empty", "empty set (null set); see also U+8960, ⌀ ('∅')") | |
..rename(0x2207, "nabla", "del or nabla (vector differential operator) ('∇')") | |
..rename(0x2208, "isin", "element of ('∈')") | |
..rename(0x2209, "notin", "not an element of ('∉')") | |
..rename(0x220B, "ni", "contains as member ('∋')") | |
..rename(0x220F, "prod", "n-ary product (product sign) ('∏')") | |
..rename(0x2211, "sum", "n-ary summation ('∑')") | |
..rename(0x2212, "minus", "minus sign ('−')") | |
..rename(0x2217, "lowast", "asterisk operator ('∗')") | |
..rename(0x221A, "radic", "square root (radical sign) ('√')") | |
..rename(0x221D, "prop", "proportional to ('∝')") | |
..rename(0x221E, "infin", "infinity ('∞')") | |
..rename(0x2220, "ang", "angle ('∠')") | |
..rename(0x2227, "and", "logical and (wedge) ('∧')") | |
..rename(0x2228, "or", "logical or (vee) ('∨')") | |
..rename(0x2229, "cap", "intersection (cap) ('∩')") | |
..rename(0x222A, "cup", "union (cup) ('∪')") | |
..rename(0x222B, "int", "integral ('∫')") | |
..rename(0x2234, "there4", "therefore sign ('∴')") | |
..rename(0x223C, "sim", "tilde operator (varies with, similar to) ('∼')") | |
..rename(0x2245, "cong", "congruent to ('≅')") | |
..rename(0x2248, "asymp", "almost equal to (asymptotic to) ('≈')") | |
..rename(0x2260, "ne", "not equal to ('≠')") | |
..rename(0x2261, "equiv", "identical to; sometimes used for 'equivalent to' ('≡')") | |
..rename(0x2264, "le", "less-than or equal to ('≤')") | |
..rename(0x2265, "ge", "greater-than or equal to ('≥')") | |
..rename(0x2282, "sub", "subset of ('⊂')") | |
..rename(0x2283, "sup", "superset of ('⊃')") | |
..rename(0x2284, "nsub", "not a subset of ('⊄')") | |
..rename(0x2286, "sube", "subset of or equal to ('⊆')") | |
..rename(0x2287, "supe", "superset of or equal to ('⊇')") | |
..rename(0x2295, "oplus", "circled plus (direct sum) ('⊕')") | |
..rename(0x2297, "otimes", "circled times (vector product) ('⊗')") | |
..rename(0x22A5, "perp", "up tack (orthogonal to, perpendicular) ('⊥')") | |
..rename(0x22C5, "sdot", "dot operator ('⋅')") | |
..rename(0x22EE, "vellip", "vertical ellipsis ('⋮')") | |
..rename(0x2308, "lceil", "left ceiling (APL upstile) ('⌈')") | |
..rename(0x2309, "rceil", "right ceiling ('⌉')") | |
..rename(0x230A, "lfloor", "left floor (APL downstile) ('⌊')") | |
..rename(0x230B, "rfloor", "right floor ('⌋')") | |
..rename(0x2329, "lang", "left-pointing angle bracket (bra) ('〈')") | |
..rename(0x232A, "rang", "right-pointing angle bracket (ket) ('〉')") | |
..rename(0x25CA, "loz", "lozenge ('◊')") | |
..rename(0x2660, "spades", "black spade suit ('♠')") | |
..rename(0x2663, "clubs", "black club suit (shamrock) ('♣')") | |
..rename(0x2665, "hearts", "black heart suit (valentine) ('♥')") | |
..rename(0x2666, "diams", "black diamond suit ('♦')"); | |
} | |
const String usageString = r""" | |
Usage: | |
charcode [-h] [-oFILE] (<character-range> | <rename> | -pPREFIX | -fFILE)* | |
A <character-range> is either | |
- A literal character, | |
- An escaped character `\n`, `\r`, `\t`, `\xhh`, `\uhhhh` or `\dd*`, | |
where `h` is a hexadecimal digit and `d` is a decimal digit. | |
- Two characters separated by `-`, | |
- The `\d`, `\w` or `\s` escapes, or | |
- A sequence of character ranges. | |
Examples: `a`, `a-z`, `\da-fA-F`, `\x00-\uFFFF`. | |
A <rename> declaration is a single or escaped character, a `=` and an | |
identifier name, optionally followed by a `:` and a description | |
Example: x=cross:\"a cross product.\" | |
The declaration names or renames the character and adds or changes | |
the associated description. | |
It will not add the character to the output. | |
Following occurrences of the character will use the new name. | |
Example: `charcode y y=why x-z` will generate `$y` for the character | |
code of "y", then `$x`, `$why` and `$z` as well. | |
"""; |