// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
// ----------------------------------------------------------------------
// This is a very specialized tool which was created in order to support
// adding hash values used as location markers in the LaTeX source of the
// language specification. It is intended to take its input file as the
// first argument and the output file name as the second argument. From
// docs/language a typical usage would be as follows:
// dart ../../tools/addlatexhash.dart dartLangSpec.tex tmp.tex
// This will yield a normalized variant tmp.tex of the language
// specification with hash values filled in. For more details, please
// check the language specification source itself.
// NB: This utility assumes UN*X style line endings, \n, in the LaTeX
// source file receieved as input; it will not work with other styles.
// TODO: The current version does not fill in hash values, it only
// standardizes the LaTeX source by removing comments and normalizing
// white space.
import 'dart:io';
import 'dart:convert';
import '../pkg/crypto/lib/crypto.dart';
// Normalization of the text, i.e., removal or normalization
// of elements that do not affect the output from latex
final commentRE = new RegExp(r"[^\\]%.*"); // NB: . does not match \n
final whitespaceAllRE = new RegExp(r"^\s+$");
final whitespaceRE = new RegExp(r"[ \t]{2,}");
// normalization steps
cutMatch(line, match, {startOffset: 0, endOffset: 0, glue: ""}) {
if (match == null) return line;
var start = match.start + startOffset;
var end = match.end + endOffset;
var len = line.length;
if (start < 0) start = 0;
if (end > len) end = len;
return line.substring(0, start) + glue + line.substring(end);
cutRegexp(line, re, {startOffset: 0, endOffset: 0, glue: ""}) {
return cutMatch(line, re.firstMatch(line),
startOffset: startOffset,
endOffset: endOffset,
glue: glue);
cutFromMatch(line, match, {offset: 0, glue: ""}) {
if (match == null) return line;
return line.substring(0, match.start + offset) + glue;
cutFromRegexp(line, re, {offset: 0, glue: ""}) {
return cutFromMatch(line, re.firstMatch(line), offset: offset, glue: glue);
isWsOnly(line) => whitespaceAllRE.firstMatch(line) != null;
isCommentOnly(line) => line.startsWith("%");
justEol(line) {
return line.endsWith("\n") ? "\n" : "";
stripComment(line) {
// NB: it is tempting to remove everything from the '%' and out,
// including the final newline, if any, but this does not work.
// The problem is that TeX will do exactly this, but then it will
// add back a character that depends on its state (S, M, or N),
// and it is tricky to maintain a similar state that matches the
// state of TeX faithfully. Hence, we remove the content of
// comments but do not remove the comments themselves, we just
// leave the '%' at the end of the line and let TeX manage its
// states in a way that does not differ from the file from before
// stripComment
if (isCommentOnly(line)) return "%\n";
return cutRegexp(line, commentRE, startOffset: 2);
// Reduce a wsOnly line to its eol, remove leading ws
// entirely, and reduce multiple ws chars to one
normalizeWhitespace(line) {
var trimLine = line.trimLeft();
if (trimLine.isEmpty) return justEol(line);
return trimLine.replaceAll(whitespaceRE, " ");
// Reduce sequences of >1 wsOnly lines to 1, and sequences of >1
// commentOnly lines to 1; moreover, treat commentOnly lines as
// wsOnly when occurring in wsOnly line blocks
multilineNormalize(lines) {
var afterBlankLines = false; // does 'line' succeed >0 empty lines?
var afterCommentLines = false; // .. succeed >0 commentOnly lines?
var newLines = new List();
for (var line in lines) {
if (afterBlankLines && afterCommentLines) {
// can never happen
throw "Bug, please report to eernst@";
} else if (afterBlankLines && !afterCommentLines) {
// at least one line before 'line' is wsOnly
if (!isWsOnly(line)) {
// blank line block ended
afterCommentLines = isCommentOnly(line);
// special case: it seems to be safe to remove commentOnly lines
// after wsOnly lines, so the TeX state must be predictably right;
// next line will then be afterCommentLines and be dropped, so
// we drop the entire comment block---which is very useful; we can
// also consider this comment line to be an empty line, such that
// subsequent empty lines can be considered to be in a block of
// empty lines; note that almost all variants of this will break..
if (afterCommentLines) {
// _current_ 'line' a commentOnly here
afterBlankLines = true;
afterCommentLines = false;
// and do not add 'line'
} else {
// after blanks, but current 'line' is neither blank nor comment
afterBlankLines = false;
} else {
// blank line block continues, do not add 'line'
} else if (!afterBlankLines && afterCommentLines) {
// at least one line before 'line' is commentOnly
if (!isCommentOnly(line)) {
// comment line block ended
afterBlankLines = isWsOnly(line);
afterCommentLines = false;
} else {
// comment line block continues, do not add 'line'
} else {
assert(!afterBlankLines && !afterCommentLines);
// no wsOnly or commentOnly lines preceed 'line'
afterBlankLines = isWsOnly(line);
afterCommentLines = isCommentOnly(line);
if (!afterCommentLines) newLines.add(line);
// else skipping commentOnly line after nonWs, nonComment text
return newLines;
// Selecting the elements in the pipeline
normalize(line) => normalizeWhitespace(stripComment(line));
sispNormalize(line) => stripComment(line);
// Managing fragments with significant spacing
final dartCodeBeginRE = new RegExp(r"^\s*\\begin\{dartCode\}");
final dartCodeEndRE = new RegExp (r"^\s*\\end\{dartCode\}");
sispIs(line, targetRE) {
return targetRE.firstMatch(line) != null;
sispIsDartBegin(line) => sispIs(line, dartCodeBeginRE);
sispIsDartEnd(line) => sispIs(line, dartCodeEndRE);
// Transform input file into output file
main ([args]) {
if (args.length != 2) {
print("Usage: addlatexhash.dart <input-file> <output-file>");
throw "Received ${args.length} arguments, expected two";
var inputFile = new File(args[0]);
var outputFile = new File(args[1]);
var lines = inputFile.readAsLinesSync();
// single-line normalization
var inDartCode = false;
var newLines = new List();
for (var line in lines) {
if (sispIsDartBegin(line)) {
inDartCode = true;
} else if (sispIsDartEnd(line)) {
inDartCode = false;
if (inDartCode) {
newLines.add(sispNormalize(line + "\n"));
} else {
newLines.add(normalize(line + "\n"));
// multi-line normalization
newLines = multilineNormalize(newLines);
// output result