pkg/front_end/tool/duplicate_code_finder_experiment.dart - sdk - Git at Google

 // Copyright (c) 2022, the Dart project authors.  Please see the AUTHORS file
 // for details. All rights reserved. Use of this source code is governed by a
 // BSD-style license that can be found in the LICENSE file.

 import 'dart:io';

 import 'package:_fe_analyzer_shared/src/scanner/scanner.dart'
     show ErrorToken, ScannerConfiguration, StringScanner;

 import 'package:_fe_analyzer_shared/src/scanner/token.dart'
     show BeginToken, SimpleToken, Token, TokenType;

 class Duplicate {
   final List<FromToUri> where;
   final String example;

   Duplicate(this.where, this.example);

   @override
   String toString() => "Duplicate[$example]";
 }

 class FromToUri {
   final Uri uri;
   final int startOffset;
   final int endOffset;

   FromToUri(this.uri, this.startOffset, this.endOffset);
 }

 class Line {
   String content;
   Uri uri;
   int startOffset;
   int endOffset;
   Line? previous;
   Line? next;

   Line(
       this.content, this.uri, this.startOffset, this.endOffset, this.previous) {
     if (previous != null) {
       previous!.next = this;
     }
   }
 }

 class ExtendedLines {
   List<Line> startLines;
   int lineCount;

   ExtendedLines(this.startLines, this.lineCount);
 }

 class MultiMap<K, V> {
   Map<K, List<V>> data = {};

   void operator []=(K key, V value) {
     List<V>? lookup = data[key];
     if (lookup != null) {
       lookup.add(value);
     } else {
       data[key] = [value];
     }
   }
 }

 void _indexLines(MultiMap<String, Line> mapped, Set<String> denyListed,
     String data, Uri uri) {
   // TODO(jensj): Work directly on scanned tokens, or use parser as well?
   // * Should probably only operate on body content, and then not cross bracket
   //   boundaries in that it should suggest that "foo; } } bar;" is a duplicate
   //   -- while it might be duplicate, we can't replace that with a function
   //   call (to get rid of duplicate code).
   // * If something is a string we could perhaps ignore the content so that
 /*
     assert(declaration.parent == _libraryTypeParameterScopeBuilder);
     Map<String, Builder> members = declaration.members!;
     Map<String, MemberBuilder> constructors = declaration.constructors!;
     Map<String, MemberBuilder> setters = declaration.setters!;

     Scope classScope = new Scope(
         local: members,
         setters: setters,
         parent: scope.withTypeVariables(typeVariables),
         debugName: "class $className",
         isModifiable: false);
 */
   //   and
 /*
     assert(declaration.parent == _libraryTypeParameterScopeBuilder);
     Map<String, Builder> members = declaration.members!;
     Map<String, MemberBuilder> constructors = declaration.constructors!;
     Map<String, MemberBuilder> setters = declaration.setters!;

     Scope classScope = new Scope(
         local: members,
         setters: setters,
         parent: scope.withTypeVariables(typeVariables),
         debugName: "extension $extensionName",
         isModifiable: false);
 */
   //   could match.
   Token scannedToken = _scan(data);
   if (scannedToken is ErrorToken) throw "Can't operate on erroneous data";
   Token token = scannedToken;
   StringBuffer sb = new StringBuffer();
   String space = "";
   int? startOffset;

   List<Token> endGroups = [];
   Line? previousLine;

   void endLine(Token lastToken) {
     String s = sb.toString();
     int lineStart = startOffset!;
     sb.clear();
     space = "";
     startOffset = null;

     Line line = new Line(s, uri, lineStart, lastToken.charEnd, previousLine);
     previousLine = line;
     if (!denyListed.contains(s)) mapped[s] = line;
   }

   while (true) {
     sb.write(space);
     sb.write(token.lexeme);
     space = " ";
     startOffset ??= token.charOffset;

     if (endGroups.isNotEmpty && endGroups.last == token) {
       endLine(token);
       endGroups.removeLast();
     } else if (token is BeginToken &&
         token.type == TokenType.OPEN_CURLY_BRACKET &&
         token.endGroup != null) {
       // End line on a "{".
       endLine(token);
       endGroups.add(token.endGroup!);
     } else if (token is SimpleToken && token.type == TokenType.SEMICOLON) {
       // End line on a ";".
       endLine(token);
     } else if (token.next!.isEof) {
       endLine(token);
       break;
     }

     token = token.next!;
   }
 }

 void _extendIndexedLines(List<Line> lines, Set<Line> alreadyIncluded,
     List<Duplicate> foundDuplicates, Set<String> denyListed) {
   int length = lines.length;

   if (length == 1) {
     // The indexed line was only seen once. That's not a duplicate!
     return;
   }

   // We move forward in the data, and if having found a -> b -> c
   // after having processed 'a' we shouldn't process from 'b' too. We thus
   // remove already included lines here.
   for (Line line in lines) {
     if (alreadyIncluded.contains(line)) {
       length--;
     }
   }
   if (length <= 1) {
     // We've seen this line before, but all duplicates was included in another
     // actual duplicate find. Don't process and report again!
     return;
   }

   // Can this potential duplicate be extended?
   List<ExtendedLines>? extended = _extend(lines,
       existingMatchCount: 1, leastMatches: 3, denyListed: denyListed);
   if (extended == null || extended.isEmpty) return;
   for (ExtendedLines extendedLine in extended) {
     for (Line line in extendedLine.startLines) {
       int left = extendedLine.lineCount - 1;
       Line l = line;
       while (left > 0) {
         alreadyIncluded.add(l);
         left--;
         l = l.next!;
       }
     }
     List<FromToUri> where = [];
     StringBuffer sb = new StringBuffer();
     String? example;
     for (Line firstLine in extendedLine.startLines) {
       int left = extendedLine.lineCount - 1;
       Line lastLine = firstLine;
       if (example == null) sb.writeln(lastLine.content);
       while (left > 0) {
         left--;
         lastLine = lastLine.next!;
         if (example == null) sb.writeln(lastLine.content);
       }
       where.add(new FromToUri(
           firstLine.uri, firstLine.startOffset, lastLine.endOffset));
       example ??= sb.toString();
     }
     foundDuplicates.add(new Duplicate(where, example!));
   }
 }

 List<Duplicate> findDuplicates(Map<Uri, String> data, {bool verbose = false}) {
   MultiMap<String, Line> indexedLines = new MultiMap();
   const Set<String> denyListed = const {"}", "return ;", ";", ") ;", "else {"};

   for (MapEntry<Uri, String> entry in data.entries) {
     _indexLines(indexedLines, denyListed, entry.value, entry.key);
   }

   // TODO(jensj): The already included approach is too simple. E.g.
   /*
 match1;
 nomatch;

 match0;
 match1;
 match2;
 match3;

 vs

 match1;
 nomatchX;

 match0;
 match1;
 match2;
 match3;

 would first find match1 match2 match3 --- then match0 match1 match2 match3.
 */

   Set<Line> alreadyIncluded = {};
   List<Duplicate> result = [];
   for (MapEntry<String, List<Line>> entry in indexedLines.data.entries) {
     _extendIndexedLines(entry.value, alreadyIncluded, result, denyListed);
   }

   if (verbose) {
     if (result.length == 0) {
       print("Didn't find any duplicates.");
     } else if (result.length == 1) {
       print("Found 1 duplicate:");
     } else {
       print("Found ${result.length} duplicates:");
     }
     for (Duplicate duplicate in result) {
       print("Found '${duplicate.example}' at:");
       for (FromToUri where in duplicate.where) {
         print("${where.uri}: ${where.startOffset} -> ${where.endOffset}");
       }
       print("----\n\n");
     }
   }

   return result;
 }

 /// Given a list of lines that match, find duplicates that match on more lines,
 /// thereby extending and possibly splitting the match.
 List<ExtendedLines>? _extend(List<Line> lines,
     {required int existingMatchCount,
     required int leastMatches,
     required Set<String> denyListed}) {
   MultiMap<String, Line> mapped = new MultiMap();

   // E.g. for this input
   // a -> b1 -> c1 -> d1
   // a -> b1 -> c1 -> d2
   // a -> b2 -> c2 -> d1
   // a -> b2 -> c2 -> d2
   // a -> b3 -> c2 -> d1
   // we'd like it to be split into
   // [a, b1, c1] and [a, b2, c2]

   for (Line line in lines) {
     Line? next = line.next;
     if (next != null) {
       mapped[next.content] = next;
     }
   }

   List<ExtendedLines>? result;

   for (MapEntry<String, List<Line>> entry in mapped.data.entries) {
     if (entry.value.length == 1) {
       continue;
     }

     int newMatchCount = existingMatchCount + 1;
     // Don't count e.g. '}' as an actual match -> require one additional match.
     // Notice that we can't just not count it as that would destroy the count
     // which we use to go back and forth between first and last matched line.
     if (denyListed.contains(entry.key)) {
       leastMatches++;
     }

     List<ExtendedLines>? extended = _extend(entry.value,
         existingMatchCount: newMatchCount,
         leastMatches: leastMatches,
         denyListed: denyListed);
     if (extended != null) {
       // Was extended further.
       (result ??= []).addAll(extended);
     } else if (newMatchCount >= leastMatches) {
       // Couldn't be extended further, but this was far enough.
       (result ??= []).add(new ExtendedLines(
           entry.value.map((Line endLine) {
             Line line = endLine;
             int back = existingMatchCount;
             while (back > 0) {
               line = line.previous!;
               back--;
             }
             return line;
           }).toList(),
           newMatchCount));
     }
   }

   return result;
 }

 Token _scan(String data) {
   ScannerConfiguration scannerConfiguration = new ScannerConfiguration(
       enableTripleShift: true,
       enableExtensionMethods: true,
       enableNonNullable: true,
       forAugmentationLibrary: false);

   StringScanner scanner =
       new StringScanner(data, configuration: scannerConfiguration);
   Token firstToken = scanner.tokenize();
   return firstToken;
 }

 void main(List<String> args) {
   if (args.isEmpty) {
     args = [
       Platform.script
           .resolve("../lib/src/fasta/source/source_library_builder.dart")
           .toFilePath()
     ];
   }
   bool printed = false;
   for (String s in args) {
     File f = new File(s);
     if (!f.existsSync()) continue;
     String data = f.readAsStringSync();

     if (printed) print("\n\n=============\n\n");
     print("Output on $s:");
     findDuplicates({Uri.parse(s): data}, verbose: true);
     printed = true;
   }
 }
	// Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file
	// for details. All rights reserved. Use of this source code is governed by a
	// BSD-style license that can be found in the LICENSE file.

	import 'dart:io';

	import 'package:_fe_analyzer_shared/src/scanner/scanner.dart'
	show ErrorToken, ScannerConfiguration, StringScanner;

	import 'package:_fe_analyzer_shared/src/scanner/token.dart'
	show BeginToken, SimpleToken, Token, TokenType;

	class Duplicate {
	final List<FromToUri> where;
	final String example;

	Duplicate(this.where, this.example);

	@override
	String toString() => "Duplicate[$example]";
	}

	class FromToUri {
	final Uri uri;
	final int startOffset;
	final int endOffset;

	FromToUri(this.uri, this.startOffset, this.endOffset);
	}

	class Line {
	String content;
	Uri uri;
	int startOffset;
	int endOffset;
	Line? previous;
	Line? next;

	Line(
	this.content, this.uri, this.startOffset, this.endOffset, this.previous) {
	if (previous != null) {
	previous!.next = this;
	}
	}
	}

	class ExtendedLines {
	List<Line> startLines;
	int lineCount;

	ExtendedLines(this.startLines, this.lineCount);
	}

	class MultiMap<K, V> {
	Map<K, List<V>> data = {};

	void operator []=(K key, V value) {
	List<V>? lookup = data[key];
	if (lookup != null) {
	lookup.add(value);
	} else {
	data[key] = [value];
	}
	}
	}

	void _indexLines(MultiMap<String, Line> mapped, Set<String> denyListed,
	String data, Uri uri) {
	// TODO(jensj): Work directly on scanned tokens, or use parser as well?
	// * Should probably only operate on body content, and then not cross bracket
	// boundaries in that it should suggest that "foo; } } bar;" is a duplicate
	// -- while it might be duplicate, we can't replace that with a function
	// call (to get rid of duplicate code).
	// * If something is a string we could perhaps ignore the content so that
	/*
	assert(declaration.parent == _libraryTypeParameterScopeBuilder);
	Map<String, Builder> members = declaration.members!;
	Map<String, MemberBuilder> constructors = declaration.constructors!;
	Map<String, MemberBuilder> setters = declaration.setters!;

	Scope classScope = new Scope(
	local: members,
	setters: setters,
	parent: scope.withTypeVariables(typeVariables),
	debugName: "class $className",
	isModifiable: false);
	*/
	// and
	/*
	assert(declaration.parent == _libraryTypeParameterScopeBuilder);
	Map<String, Builder> members = declaration.members!;
	Map<String, MemberBuilder> constructors = declaration.constructors!;
	Map<String, MemberBuilder> setters = declaration.setters!;

	Scope classScope = new Scope(
	local: members,
	setters: setters,
	parent: scope.withTypeVariables(typeVariables),
	debugName: "extension $extensionName",
	isModifiable: false);
	*/
	// could match.
	Token scannedToken = _scan(data);
	if (scannedToken is ErrorToken) throw "Can't operate on erroneous data";
	Token token = scannedToken;
	StringBuffer sb = new StringBuffer();
	String space = "";
	int? startOffset;

	List<Token> endGroups = [];
	Line? previousLine;

	void endLine(Token lastToken) {
	String s = sb.toString();
	int lineStart = startOffset!;
	sb.clear();
	space = "";
	startOffset = null;

	Line line = new Line(s, uri, lineStart, lastToken.charEnd, previousLine);
	previousLine = line;
	if (!denyListed.contains(s)) mapped[s] = line;
	}

	while (true) {
	sb.write(space);
	sb.write(token.lexeme);
	space = " ";
	startOffset ??= token.charOffset;

	if (endGroups.isNotEmpty && endGroups.last == token) {
	endLine(token);
	endGroups.removeLast();
	} else if (token is BeginToken &&
	token.type == TokenType.OPEN_CURLY_BRACKET &&
	token.endGroup != null) {
	// End line on a "{".
	endLine(token);
	endGroups.add(token.endGroup!);
	} else if (token is SimpleToken && token.type == TokenType.SEMICOLON) {
	// End line on a ";".
	endLine(token);
	} else if (token.next!.isEof) {
	endLine(token);
	break;
	}

	token = token.next!;
	}
	}

	void _extendIndexedLines(List<Line> lines, Set<Line> alreadyIncluded,
	List<Duplicate> foundDuplicates, Set<String> denyListed) {
	int length = lines.length;

	if (length == 1) {
	// The indexed line was only seen once. That's not a duplicate!
	return;
	}

	// We move forward in the data, and if having found a -> b -> c
	// after having processed 'a' we shouldn't process from 'b' too. We thus
	// remove already included lines here.
	for (Line line in lines) {
	if (alreadyIncluded.contains(line)) {
	length--;
	}
	}
	if (length <= 1) {
	// We've seen this line before, but all duplicates was included in another
	// actual duplicate find. Don't process and report again!
	return;
	}

	// Can this potential duplicate be extended?
	List<ExtendedLines>? extended = _extend(lines,
	existingMatchCount: 1, leastMatches: 3, denyListed: denyListed);
	if (extended == null \|\| extended.isEmpty) return;
	for (ExtendedLines extendedLine in extended) {
	for (Line line in extendedLine.startLines) {
	int left = extendedLine.lineCount - 1;
	Line l = line;
	while (left > 0) {
	alreadyIncluded.add(l);
	left--;
	l = l.next!;
	}
	}
	List<FromToUri> where = [];
	StringBuffer sb = new StringBuffer();
	String? example;
	for (Line firstLine in extendedLine.startLines) {
	int left = extendedLine.lineCount - 1;
	Line lastLine = firstLine;
	if (example == null) sb.writeln(lastLine.content);
	while (left > 0) {
	left--;
	lastLine = lastLine.next!;
	if (example == null) sb.writeln(lastLine.content);
	}
	where.add(new FromToUri(
	firstLine.uri, firstLine.startOffset, lastLine.endOffset));
	example ??= sb.toString();
	}
	foundDuplicates.add(new Duplicate(where, example!));
	}
	}

	List<Duplicate> findDuplicates(Map<Uri, String> data, {bool verbose = false}) {
	MultiMap<String, Line> indexedLines = new MultiMap();
	const Set<String> denyListed = const {"}", "return ;", ";", ") ;", "else {"};

	for (MapEntry<Uri, String> entry in data.entries) {
	_indexLines(indexedLines, denyListed, entry.value, entry.key);
	}

	// TODO(jensj): The already included approach is too simple. E.g.
	/*
	match1;
	nomatch;

	match0;
	match1;
	match2;
	match3;

	vs

	match1;
	nomatchX;

	match0;
	match1;
	match2;
	match3;

	would first find match1 match2 match3 --- then match0 match1 match2 match3.
	*/

	Set<Line> alreadyIncluded = {};
	List<Duplicate> result = [];
	for (MapEntry<String, List<Line>> entry in indexedLines.data.entries) {
	_extendIndexedLines(entry.value, alreadyIncluded, result, denyListed);
	}

	if (verbose) {
	if (result.length == 0) {
	print("Didn't find any duplicates.");
	} else if (result.length == 1) {
	print("Found 1 duplicate:");
	} else {
	print("Found ${result.length} duplicates:");
	}
	for (Duplicate duplicate in result) {
	print("Found '${duplicate.example}' at:");
	for (FromToUri where in duplicate.where) {
	print("${where.uri}: ${where.startOffset} -> ${where.endOffset}");
	}
	print("----\n\n");
	}
	}

	return result;
	}

	/// Given a list of lines that match, find duplicates that match on more lines,
	/// thereby extending and possibly splitting the match.
	List<ExtendedLines>? _extend(List<Line> lines,
	{required int existingMatchCount,
	required int leastMatches,
	required Set<String> denyListed}) {
	MultiMap<String, Line> mapped = new MultiMap();

	// E.g. for this input
	// a -> b1 -> c1 -> d1
	// a -> b1 -> c1 -> d2
	// a -> b2 -> c2 -> d1
	// a -> b2 -> c2 -> d2
	// a -> b3 -> c2 -> d1
	// we'd like it to be split into
	// [a, b1, c1] and [a, b2, c2]

	for (Line line in lines) {
	Line? next = line.next;
	if (next != null) {
	mapped[next.content] = next;
	}
	}

	List<ExtendedLines>? result;

	for (MapEntry<String, List<Line>> entry in mapped.data.entries) {
	if (entry.value.length == 1) {
	continue;
	}

	int newMatchCount = existingMatchCount + 1;
	// Don't count e.g. '}' as an actual match -> require one additional match.
	// Notice that we can't just not count it as that would destroy the count
	// which we use to go back and forth between first and last matched line.
	if (denyListed.contains(entry.key)) {
	leastMatches++;
	}

	List<ExtendedLines>? extended = _extend(entry.value,
	existingMatchCount: newMatchCount,
	leastMatches: leastMatches,
	denyListed: denyListed);
	if (extended != null) {
	// Was extended further.
	(result ??= []).addAll(extended);
	} else if (newMatchCount >= leastMatches) {
	// Couldn't be extended further, but this was far enough.
	(result ??= []).add(new ExtendedLines(
	entry.value.map((Line endLine) {
	Line line = endLine;
	int back = existingMatchCount;
	while (back > 0) {
	line = line.previous!;
	back--;
	}
	return line;
	}).toList(),
	newMatchCount));
	}
	}

	return result;
	}

	Token _scan(String data) {
	ScannerConfiguration scannerConfiguration = new ScannerConfiguration(
	enableTripleShift: true,
	enableExtensionMethods: true,
	enableNonNullable: true,
	forAugmentationLibrary: false);

	StringScanner scanner =
	new StringScanner(data, configuration: scannerConfiguration);
	Token firstToken = scanner.tokenize();
	return firstToken;
	}

	void main(List<String> args) {
	if (args.isEmpty) {
	args = [
	Platform.script
	.resolve("../lib/src/fasta/source/source_library_builder.dart")
	.toFilePath()
	];
	}
	bool printed = false;
	for (String s in args) {
	File f = new File(s);
	if (!f.existsSync()) continue;
	String data = f.readAsStringSync();

	if (printed) print("\n\n=============\n\n");
	print("Output on $s:");
	findDuplicates({Uri.parse(s): data}, verbose: true);
	printed = true;
	}
	}