blob: 71f7860dc4c3415ab19df30724c610e860733aff [file] [log] [blame]
// Copyright (c) 2018, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import 'dart:io';
import 'dart:typed_data';
import 'package:characters/src/grapheme_clusters/constants.dart';
import '../src/args.dart';
import '../src/data_files.dart';
import '../src/grapheme_category_loader.dart';
import '../src/shared.dart';
import '../src/string_literal_writer.dart';
// Generates tests for grapheme cluster splitting from the Unicode
// GraphemeBreakTest.txt file.
//
// Fetches the data files from the Unicode web site.
const defaultVerbose = false;
const testFile = 'test/src/unicode_grapheme_tests.dart';
void main(List<String> args) async {
var flags = parseArgs(args, 'generate_tests');
var output = flags.dryrun
? null
: flags.targetFile ?? File(path(packageRoot, testFile));
if (output != null && !output.existsSync()) {
try {
output.createSync(recursive: true);
} catch (e) {
stderr.writeln('Cannot find or create file: ${output.path}');
stderr.writeln('Writing to stdout');
output = null;
}
}
if (flags.update) {
// Force license file update.
await licenseFile.load(checkForUpdate: true);
}
var (categories, graphemeTests, emojiTests) = await (
loadCategories(update: flags.update, verbose: flags.verbose),
graphemeTestData.load(checkForUpdate: flags.update),
emojiTestData.load(checkForUpdate: flags.update),
).wait;
generateTests(
output,
[graphemeTests, emojiTests],
categories,
verbose: flags.verbose,
dryrun: flags.dryrun,
);
}
void generateTests(
File? output,
List<String> texts,
Uint8List categories, {
bool verbose = false,
bool dryrun = false,
}) {
var buffer = StringBuffer();
writeHeader(buffer, [
graphemeTestData,
emojiTestData,
graphemeBreakPropertyData,
emojiData,
derivedData,
]);
buffer.writeln('// ignore_for_file: lines_longer_than_80_chars');
buffer.writeln('// dart format off');
buffer.writeln();
// Example character of a category which is in the upper planes.
var upperChars = List<int>.filled(inputCategoryCount, -1);
// Example character of a category which is in the lower planes.
var lowerChars = List<int>.filled(inputCategoryCount, -1);
writeTests(
buffer,
texts,
categories,
lowerChars,
upperChars,
verbose: verbose,
dryrun: dryrun,
);
buffer.writeln('// dart format on');
writeOtherCategories(buffer, categories, lowerChars, upperChars);
if (output == null) {
stdout.write(buffer);
} else if (!dryrun) {
output.writeAsStringSync(buffer.toString());
}
}
void writeTests(
StringSink buffer,
List<String> texts,
Uint8List categories,
List<int> lowerChars,
List<int> upperChars, {
bool dryrun = false,
bool verbose = defaultVerbose,
}) async {
var writer = StringLiteralWriter(buffer, lineLength: 9999, escape: _escape);
void writeParts(List<List<int>> parts, String description) {
buffer.writeln(' [');
const indent = ' ';
for (var i = 0; i < parts.length; i++) {
buffer.write(indent);
writer.start(indent.length);
for (var char in parts[i]) {
writer.add(char);
var c = categories[char];
((char < 0x10000) ? lowerChars : upperChars)[c] = char;
}
writer.end();
buffer.writeln(',');
}
buffer
..write(' ], // ')
..writeln(description);
}
// Write grapheme cluster tests.
{
buffer.writeln('// Grapheme cluster tests.');
writeTestHeader(buffer, 'splitTests');
var test = texts[0];
var lineRE = RegExp(r'^(÷.*?)#[ \t]*(.*)', multiLine: true);
var tokensRE = RegExp(r'[÷×]|[\dA-F]+');
for (var line in lineRE.allMatches(test)) {
var description = line[2]!;
var tokens = tokensRE.allMatches(line[1]!).map((x) => x[0]!).toList();
assert(tokens.first == '÷');
assert(tokens.last == '÷');
var parts = <List<int>>[];
var chars = <int>[];
for (var i = 1; i < tokens.length; i += 2) {
var cp = int.parse(tokens[i], radix: 16);
chars.add(cp);
if (tokens[i + 1] == '÷') {
parts.add(chars);
chars = [];
}
}
writeParts(parts, description);
}
buffer.writeln('];');
}
// Write emoji cluster tests.
{
buffer.writeln('// Emoji tests.');
writeTestHeader(buffer, 'emojis');
// Emojis
var emojis = texts[1];
var lineRE = RegExp(r'^([ \dA-F]*?);[^#]*#[ \t]*(.*)', multiLine: true);
var tokensRE = RegExp(r'[\dA-F]+');
for (var line in lineRE.allMatches(emojis)) {
var description = line[2]!;
var part = <int>[];
for (var token in tokensRE.allMatches(line[1]!)) {
var value = int.parse(token[0]!, radix: 16);
part.add(value);
}
writeParts([part], description);
}
buffer.writeln('];');
}
}
bool _escape(int cp) => cp > 0xff || cp & 0x60 == 0 || cp == 0x7f;
void writeTestHeader(StringSink buffer, String testName) {
buffer
..write('const List<List<String>> ')
..write(testName)
..writeln(' = [');
}
void writeOtherCategories(
StringSink output,
Uint8List categories,
List<int> lowerChars,
List<int> upperChars,
) {
var otherCategories = lowerChars;
for (var i = 0; i < 0x110000; i++) {
if (i == 0x10000) otherCategories = upperChars;
var category = categories[i];
if (otherCategories[category] < 0) otherCategories[category] = i;
}
// BMP characters.
output
..writeln('// BMP character in each category, if any, -1 if none.')
..writeln('const lowerChars = <int>[');
for (var char in lowerChars) {
output.write(' ');
writeHex(output, char);
output.writeln(',');
}
output.writeln('];');
output
..writeln('// Non-BMP character in each category, if any, -1 if none.')
..writeln('const upperChars = <int>[');
for (var char in upperChars) {
output.write(' ');
writeHex(output, char);
output.writeln(',');
}
output.writeln('];');
}
void writeHex(StringSink out, int value) {
if (value < 0) {
out.write('-');
value = -value;
}
out
..write('0x')
..write(value.toRadixString(16));
}