blob: 678020622448a3a5245eb615661e5f8156655ba7 [file] [log] [blame]
// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
import 'dart:io';
import 'package:_fe_analyzer_shared/src/scanner/scanner.dart'
show ErrorToken, ScannerConfiguration, StringScanner;
import 'package:_fe_analyzer_shared/src/scanner/token.dart'
show BeginToken, SimpleToken, Token, TokenType;
class Duplicate {
final List<FromToUri> where;
final String example;
Duplicate(this.where, this.example);
@override
String toString() => "Duplicate[$example]";
}
class FromToUri {
final Uri uri;
final int startOffset;
final int endOffset;
FromToUri(this.uri, this.startOffset, this.endOffset);
}
class Line {
String content;
Uri uri;
int startOffset;
int endOffset;
Line? previous;
Line? next;
Line(
this.content, this.uri, this.startOffset, this.endOffset, this.previous) {
if (previous != null) {
previous!.next = this;
}
}
}
class ExtendedLines {
List<Line> startLines;
int lineCount;
ExtendedLines(this.startLines, this.lineCount);
}
class MultiMap<K, V> {
Map<K, List<V>> data = {};
void operator []=(K key, V value) {
List<V>? lookup = data[key];
if (lookup != null) {
lookup.add(value);
} else {
data[key] = [value];
}
}
}
void _indexLines(MultiMap<String, Line> mapped, Set<String> denyListed,
String data, Uri uri) {
// TODO(jensj): Work directly on scanned tokens, or use parser as well?
// * Should probably only operate on body content, and then not cross bracket
// boundaries in that it should suggest that "foo; } } bar;" is a duplicate
// -- while it might be duplicate, we can't replace that with a function
// call (to get rid of duplicate code).
// * If something is a string we could perhaps ignore the content so that
/*
assert(declaration.parent == _libraryTypeParameterScopeBuilder);
Map<String, Builder> members = declaration.members!;
Map<String, MemberBuilder> constructors = declaration.constructors!;
Map<String, MemberBuilder> setters = declaration.setters!;
Scope classScope = new Scope(
local: members,
setters: setters,
parent: scope.withTypeVariables(typeVariables),
debugName: "class $className",
isModifiable: false);
*/
// and
/*
assert(declaration.parent == _libraryTypeParameterScopeBuilder);
Map<String, Builder> members = declaration.members!;
Map<String, MemberBuilder> constructors = declaration.constructors!;
Map<String, MemberBuilder> setters = declaration.setters!;
Scope classScope = new Scope(
local: members,
setters: setters,
parent: scope.withTypeVariables(typeVariables),
debugName: "extension $extensionName",
isModifiable: false);
*/
// could match.
Token scannedToken = _scan(data);
if (scannedToken is ErrorToken) throw "Can't operate on erroneous data";
Token token = scannedToken;
StringBuffer sb = new StringBuffer();
String space = "";
int? startOffset;
List<Token> endGroups = [];
Line? previousLine;
void endLine(Token lastToken) {
String s = sb.toString();
int lineStart = startOffset!;
sb.clear();
space = "";
startOffset = null;
Line line = new Line(s, uri, lineStart, lastToken.charEnd, previousLine);
previousLine = line;
if (!denyListed.contains(s)) mapped[s] = line;
}
while (true) {
sb.write(space);
sb.write(token.lexeme);
space = " ";
startOffset ??= token.charOffset;
if (endGroups.isNotEmpty && endGroups.last == token) {
endLine(token);
endGroups.removeLast();
} else if (token is BeginToken &&
token.type == TokenType.OPEN_CURLY_BRACKET &&
token.endGroup != null) {
// End line on a "{".
endLine(token);
endGroups.add(token.endGroup!);
} else if (token is SimpleToken && token.type == TokenType.SEMICOLON) {
// End line on a ";".
endLine(token);
} else if (token.next!.isEof) {
endLine(token);
break;
}
token = token.next!;
}
}
void _extendIndexedLines(List<Line> lines, Set<Line> alreadyIncluded,
List<Duplicate> foundDuplicates, Set<String> denyListed) {
int length = lines.length;
if (length == 1) {
// The indexed line was only seen once. That's not a duplicate!
return;
}
// We move forward in the data, and if having found a -> b -> c
// after having processed 'a' we shouldn't process from 'b' too. We thus
// remove already included lines here.
for (Line line in lines) {
if (alreadyIncluded.contains(line)) {
length--;
}
}
if (length <= 1) {
// We've seen this line before, but all duplicates was included in another
// actual duplicate find. Don't process and report again!
return;
}
// Can this potential duplicate be extended?
List<ExtendedLines>? extended = _extend(lines,
existingMatchCount: 1, leastMatches: 3, denyListed: denyListed);
if (extended == null || extended.isEmpty) return;
for (ExtendedLines extendedLine in extended) {
for (Line line in extendedLine.startLines) {
int left = extendedLine.lineCount - 1;
Line l = line;
while (left > 0) {
alreadyIncluded.add(l);
left--;
l = l.next!;
}
}
List<FromToUri> where = [];
StringBuffer sb = new StringBuffer();
String? example;
for (Line firstLine in extendedLine.startLines) {
int left = extendedLine.lineCount - 1;
Line lastLine = firstLine;
if (example == null) sb.writeln(lastLine.content);
while (left > 0) {
left--;
lastLine = lastLine.next!;
if (example == null) sb.writeln(lastLine.content);
}
where.add(new FromToUri(
firstLine.uri, firstLine.startOffset, lastLine.endOffset));
example ??= sb.toString();
}
foundDuplicates.add(new Duplicate(where, example!));
}
}
List<Duplicate> findDuplicates(Map<Uri, String> data, {bool verbose = false}) {
MultiMap<String, Line> indexedLines = new MultiMap();
const Set<String> denyListed = const {"}", "return ;", ";", ") ;", "else {"};
for (MapEntry<Uri, String> entry in data.entries) {
_indexLines(indexedLines, denyListed, entry.value, entry.key);
}
// TODO(jensj): The already included approach is too simple. E.g.
/*
match1;
nomatch;
match0;
match1;
match2;
match3;
vs
match1;
nomatchX;
match0;
match1;
match2;
match3;
would first find match1 match2 match3 --- then match0 match1 match2 match3.
*/
Set<Line> alreadyIncluded = {};
List<Duplicate> result = [];
for (MapEntry<String, List<Line>> entry in indexedLines.data.entries) {
_extendIndexedLines(entry.value, alreadyIncluded, result, denyListed);
}
if (verbose) {
if (result.length == 0) {
print("Didn't find any duplicates.");
} else if (result.length == 1) {
print("Found 1 duplicate:");
} else {
print("Found ${result.length} duplicates:");
}
for (Duplicate duplicate in result) {
print("Found '${duplicate.example}' at:");
for (FromToUri where in duplicate.where) {
print("${where.uri}: ${where.startOffset} -> ${where.endOffset}");
}
print("----\n\n");
}
}
return result;
}
/// Given a list of lines that match, find duplicates that match on more lines,
/// thereby extending and possibly splitting the match.
List<ExtendedLines>? _extend(List<Line> lines,
{required int existingMatchCount,
required int leastMatches,
required Set<String> denyListed}) {
MultiMap<String, Line> mapped = new MultiMap();
// E.g. for this input
// a -> b1 -> c1 -> d1
// a -> b1 -> c1 -> d2
// a -> b2 -> c2 -> d1
// a -> b2 -> c2 -> d2
// a -> b3 -> c2 -> d1
// we'd like it to be split into
// [a, b1, c1] and [a, b2, c2]
for (Line line in lines) {
Line? next = line.next;
if (next != null) {
mapped[next.content] = next;
}
}
List<ExtendedLines>? result;
for (MapEntry<String, List<Line>> entry in mapped.data.entries) {
if (entry.value.length == 1) {
continue;
}
int newMatchCount = existingMatchCount + 1;
// Don't count e.g. '}' as an actual match -> require one additional match.
// Notice that we can't just not count it as that would destroy the count
// which we use to go back and forth between first and last matched line.
if (denyListed.contains(entry.key)) {
leastMatches++;
}
List<ExtendedLines>? extended = _extend(entry.value,
existingMatchCount: newMatchCount,
leastMatches: leastMatches,
denyListed: denyListed);
if (extended != null) {
// Was extended further.
(result ??= []).addAll(extended);
} else if (newMatchCount >= leastMatches) {
// Couldn't be extended further, but this was far enough.
(result ??= []).add(new ExtendedLines(
entry.value.map((Line endLine) {
Line line = endLine;
int back = existingMatchCount;
while (back > 0) {
line = line.previous!;
back--;
}
return line;
}).toList(),
newMatchCount));
}
}
return result;
}
Token _scan(String data) {
ScannerConfiguration scannerConfiguration = new ScannerConfiguration(
enableTripleShift: true,
enableExtensionMethods: true,
enableNonNullable: true,
forAugmentationLibrary: false);
StringScanner scanner =
new StringScanner(data, configuration: scannerConfiguration);
Token firstToken = scanner.tokenize();
return firstToken;
}
void main(List<String> args) {
if (args.isEmpty) {
args = [
Platform.script
.resolve("../lib/src/fasta/source/source_library_builder.dart")
.toFilePath()
];
}
bool printed = false;
for (String s in args) {
File f = new File(s);
if (!f.existsSync()) continue;
String data = f.readAsStringSync();
if (printed) print("\n\n=============\n\n");
print("Output on $s:");
findDuplicates({Uri.parse(s): data}, verbose: true);
printed = true;
}
}