runtime/tools/dartfuzz/collect_data.py - sdk.git - Git at Google

 #!/usr/bin/env python3
 # Copyright (c) 2019, the Dart project authors.  Please see the AUTHORS file
 # for details. All rights reserved. Use of this source code is governed by a
 # BSD-style license that can be found in the LICENSE file.
 #
 """Webscraper for make_a_fuzz nightly cluster run results.

 Given the uri of a make_a_fuzz run, this script will first
 extract the links pointing to each of the individual shards
 and then parse the output generated by each shard to
 find divergences reported by the dartfuzz_test.dart program,
 concatenate all output, or summarize all test results.

 Example:
   collect_data.py --type sum
       https://ci.chromium.org/p/dart/builders/ci.sandbox/fuzz-linux/303
 """

 # This script may require a one time install of BeautifulSoup:
 # sudo apt-get install python3-bs4

 import argparse
 import re
 import sys

 from bs4 import BeautifulSoup

 import requests


 # Matches shard raw stdout to extract divergence reports.
 P_DIV = re.compile("(Isolate.+? !DIVERGENCE! (\n|.)+?)Isolate ", re.MULTILINE)

 # Matches shard raw stdout to extract report summaries.
 P_SUM = re.compile(
     r"^Tests: (\d+) Success: (\d+) "
     r"\(Rerun: (\d+)\) Skipped: (\d+) "
     r"Timeout: (\d+) Divergences: (\d+)", re.MULTILINE)

 # Matches uri to extract shard number.
 P_SHARD = re.compile(r".*make_a_fuzz_shard_(\d+)")


 def get_shard_links(uri):
     links = []
     resp = requests.get(uri)
     soup = BeautifulSoup(resp.text, "html.parser")
     for a in soup.findAll("a"):
         if "stdout" in a.text:
             href = a["href"]
             if ("make_a_fuzz_shard" in href and "__trigger__" not in href):
                 links.append(href + "?format=raw")
     return links


 def print_reencoded(text):
     # Re-encoding avoids breaking some terminals.
     print(text.encode("ascii", errors="ignore").decode("unicode-escape"))


 def print_output_all(text):
     print_reencoded(text)


 def print_output_div(shard, text, keywords):
     sys.stderr.write("Shard: " + shard + "  \r")
     m = P_DIV.findall(text)
     if m:
         for x in m:
             keep = True
             for word in keywords:
                 if word in x[0]:
                     keep = False
                     break
             if keep:
                 print_reencoded(x[0])


 def get_output_sum(shard, text, should_print, s=[0, 0, 0, 0, 0, 0], divs=[]):
     m = P_SUM.findall(text)
     if not m:
         sys.stderr.write("Failed to parse shard %s stdout for summary" % shard)
         return
     for test in m:
         if int(test[-1]) == 1:
             divs.append(shard)
         for i in range(len(s)):
             s[i] += int(test[i])
     if should_print:
         print(
             "Tests: %d Success: %d (Rerun: %d) Skipped: %d Timeout: %d "
             "Divergences: %d (failing shards: %s)    \r" %
             tuple(s + [", ".join(divs) if divs else "none"]),
             end="")
     return s


 def get_stats(uri, output_type, keywords, output_csv):
     resp = requests.get(uri)

     if output_type == "all":
         print_output_all(resp.text)
     elif output_type == "div":
         shard = P_SHARD.findall(uri)[0]
         print_output_div(shard, resp.text, keywords)
     elif output_type == "sum":
         shard = P_SHARD.findall(uri)[0]
         should_print = not output_csv
         return get_output_sum(shard, resp.text, should_print)
     return None


 def main():
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument(
         "--type",
         choices=("div", "sum", "all"),
         required=True,
         help=
         "Select output type (div: divergence report, sum: summary, all: complete stdout)"
     )
     parser.add_argument(
         "--filter",
         nargs="+",
         default=[],
         help="Do not include divergences containing these keywords.")
     parser.add_argument(
         "--output-csv",
         dest="output_csv",
         action="store_true",
         default=False,
         help=
         "Print output in CSV format to stdout. Only supported for --type=sum")
     parser.add_argument(
         "uri",
         type=str,
         help=
         "Uri of one make_a_fuzz run from https://ci.chromium.org/p/dart/builders/ci.sandbox/fuzz-linux."
     )
     args = parser.parse_args()
     if args.type != 'sum' and args.output_csv:
         print('Error: --output-csv can only be provided for --type=sum')
         return

     shard_links = get_shard_links(args.uri)

     if len(shard_links) == 0:
         print("Invalid run")
         sys.exit(-1)
         return

     for link in shard_links:
         stats = get_stats(link, args.type, args.filter, args.output_csv)
     if args.output_csv:
         print("%d,%d,%d,%d,%d,%d" % tuple(stats))
     else:
         print("")
     sys.exit(0)


 if __name__ == "__main__":
     main()
	#!/usr/bin/env python3
	# Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
	# for details. All rights reserved. Use of this source code is governed by a
	# BSD-style license that can be found in the LICENSE file.
	#
	"""Webscraper for make_a_fuzz nightly cluster run results.

	Given the uri of a make_a_fuzz run, this script will first
	extract the links pointing to each of the individual shards
	and then parse the output generated by each shard to
	find divergences reported by the dartfuzz_test.dart program,
	concatenate all output, or summarize all test results.

	Example:
	collect_data.py --type sum
	https://ci.chromium.org/p/dart/builders/ci.sandbox/fuzz-linux/303
	"""

	# This script may require a one time install of BeautifulSoup:
	# sudo apt-get install python3-bs4

	import argparse
	import re
	import sys

	from bs4 import BeautifulSoup

	import requests


	# Matches shard raw stdout to extract divergence reports.
	P_DIV = re.compile("(Isolate.+? !DIVERGENCE! (\n\|.)+?)Isolate ", re.MULTILINE)

	# Matches shard raw stdout to extract report summaries.
	P_SUM = re.compile(
	r"^Tests: (\d+) Success: (\d+) "
	r"\(Rerun: (\d+)\) Skipped: (\d+) "
	r"Timeout: (\d+) Divergences: (\d+)", re.MULTILINE)

	# Matches uri to extract shard number.
	P_SHARD = re.compile(r".*make_a_fuzz_shard_(\d+)")


	def get_shard_links(uri):
	links = []
	resp = requests.get(uri)
	soup = BeautifulSoup(resp.text, "html.parser")
	for a in soup.findAll("a"):
	if "stdout" in a.text:
	href = a["href"]
	if ("make_a_fuzz_shard" in href and "__trigger__" not in href):
	links.append(href + "?format=raw")
	return links


	def print_reencoded(text):
	# Re-encoding avoids breaking some terminals.
	print(text.encode("ascii", errors="ignore").decode("unicode-escape"))


	def print_output_all(text):
	print_reencoded(text)


	def print_output_div(shard, text, keywords):
	sys.stderr.write("Shard: " + shard + " \r")
	m = P_DIV.findall(text)
	if m:
	for x in m:
	keep = True
	for word in keywords:
	if word in x[0]:
	keep = False
	break
	if keep:
	print_reencoded(x[0])


	def get_output_sum(shard, text, should_print, s=[0, 0, 0, 0, 0, 0], divs=[]):
	m = P_SUM.findall(text)
	if not m:
	sys.stderr.write("Failed to parse shard %s stdout for summary" % shard)
	return
	for test in m:
	if int(test[-1]) == 1:
	divs.append(shard)
	for i in range(len(s)):
	s[i] += int(test[i])
	if should_print:
	print(
	"Tests: %d Success: %d (Rerun: %d) Skipped: %d Timeout: %d "
	"Divergences: %d (failing shards: %s) \r" %
	tuple(s + [", ".join(divs) if divs else "none"]),
	end="")
	return s


	def get_stats(uri, output_type, keywords, output_csv):
	resp = requests.get(uri)

	if output_type == "all":
	print_output_all(resp.text)
	elif output_type == "div":
	shard = P_SHARD.findall(uri)[0]
	print_output_div(shard, resp.text, keywords)
	elif output_type == "sum":
	shard = P_SHARD.findall(uri)[0]
	should_print = not output_csv
	return get_output_sum(shard, resp.text, should_print)
	return None


	def main():
	parser = argparse.ArgumentParser(description=__doc__)
	parser.add_argument(
	"--type",
	choices=("div", "sum", "all"),
	required=True,
	help=
	"Select output type (div: divergence report, sum: summary, all: complete stdout)"
	)
	parser.add_argument(
	"--filter",
	nargs="+",
	default=[],
	help="Do not include divergences containing these keywords.")
	parser.add_argument(
	"--output-csv",
	dest="output_csv",
	action="store_true",
	default=False,
	help=
	"Print output in CSV format to stdout. Only supported for --type=sum")
	parser.add_argument(
	"uri",
	type=str,
	help=
	"Uri of one make_a_fuzz run from https://ci.chromium.org/p/dart/builders/ci.sandbox/fuzz-linux."
	)
	args = parser.parse_args()
	if args.type != 'sum' and args.output_csv:
	print('Error: --output-csv can only be provided for --type=sum')
	return

	shard_links = get_shard_links(args.uri)

	if len(shard_links) == 0:
	print("Invalid run")
	sys.exit(-1)
	return

	for link in shard_links:
	stats = get_stats(link, args.type, args.filter, args.output_csv)
	if args.output_csv:
	print("%d,%d,%d,%d,%d,%d" % tuple(stats))
	else:
	print("")
	sys.exit(0)


	if __name__ == "__main__":
	main()