blob: 1683a83f7e51ba2cfa051077bd013fbcfd4e0e3b [file] [log] [blame]
#!/usr/bin/env python3
# Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
# for details. All rights reserved. Use of this source code is governed by a
# BSD-style license that can be found in the LICENSE file.
#
"""Webscraper for make_a_fuzz nightly cluster run results.
Given the uri of a make_a_fuzz run, this script will first
extract the links pointing to each of the individual shards
and then parse the output generated by each shard to
find divergences reported by the dartfuzz_test.dart program,
concatenate all output, or summarize all test results.
Example:
collect_data.py --type sum
https://ci.chromium.org/p/dart/builders/ci.sandbox/fuzz-linux/303
"""
# This script may require a one time install of BeautifulSoup:
# sudo apt-get install python3-bs4
import argparse
import re
import sys
from bs4 import BeautifulSoup
import requests
# Matches shard raw stdout to extract divergence reports.
P_DIV = re.compile("(Isolate.+? !DIVERGENCE! (\n|.)+?)Isolate ", re.MULTILINE)
# Matches shard raw stdout to extract report summaries.
P_SUM = re.compile(
r"^Tests: (\d+) Success: (\d+) "
r"\(Rerun: (\d+)\) Skipped: (\d+) "
r"Timeout: (\d+) Divergences: (\d+)", re.MULTILINE)
# Matches uri to extract shard number.
P_SHARD = re.compile(r".*make_a_fuzz_shard_(\d+)")
def get_shard_links(uri):
links = []
resp = requests.get(uri)
soup = BeautifulSoup(resp.text, "html.parser")
for a in soup.findAll("a"):
if "stdout" in a.text:
href = a["href"]
if ("make_a_fuzz_shard" in href and "__trigger__" not in href):
links.append(href + "?format=raw")
return links
def print_reencoded(text):
# Re-encoding avoids breaking some terminals.
print(text.encode("ascii", errors="ignore").decode("unicode-escape"))
def print_output_all(text):
print_reencoded(text)
def print_output_div(shard, text, keywords):
sys.stderr.write("Shard: " + shard + " \r")
m = P_DIV.findall(text)
if m:
for x in m:
keep = True
for word in keywords:
if word in x[0]:
keep = False
break
if keep:
print_reencoded(x[0])
def get_output_sum(shard, text, should_print, s=[0, 0, 0, 0, 0, 0], divs=[]):
m = P_SUM.findall(text)
if not m:
sys.stderr.write("Failed to parse shard %s stdout for summary" % shard)
return
for test in m:
if int(test[-1]) == 1:
divs.append(shard)
for i in range(len(s)):
s[i] += int(test[i])
if should_print:
print(
"Tests: %d Success: %d (Rerun: %d) Skipped: %d Timeout: %d "
"Divergences: %d (failing shards: %s) \r" %
tuple(s + [", ".join(divs) if divs else "none"]),
end="")
return s
def get_stats(uri, output_type, keywords, output_csv):
resp = requests.get(uri)
if output_type == "all":
print_output_all(resp.text)
elif output_type == "div":
shard = P_SHARD.findall(uri)[0]
print_output_div(shard, resp.text, keywords)
elif output_type == "sum":
shard = P_SHARD.findall(uri)[0]
should_print = not output_csv
return get_output_sum(shard, resp.text, should_print)
return None
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--type",
choices=("div", "sum", "all"),
required=True,
help=
"Select output type (div: divergence report, sum: summary, all: complete stdout)"
)
parser.add_argument(
"--filter",
nargs="+",
default=[],
help="Do not include divergences containing these keywords.")
parser.add_argument(
"--output-csv",
dest="output_csv",
action="store_true",
default=False,
help=
"Print output in CSV format to stdout. Only supported for --type=sum")
parser.add_argument(
"uri",
type=str,
help=
"Uri of one make_a_fuzz run from https://ci.chromium.org/p/dart/builders/ci.sandbox/fuzz-linux."
)
args = parser.parse_args()
if args.type != 'sum' and args.output_csv:
print('Error: --output-csv can only be provided for --type=sum')
return
shard_links = get_shard_links(args.uri)
if len(shard_links) == 0:
print("Invalid run")
sys.exit(-1)
return
for link in shard_links:
stats = get_stats(link, args.type, args.filter, args.output_csv)
if args.output_csv:
print("%d,%d,%d,%d,%d,%d" % tuple(stats))
else:
print("")
sys.exit(0)
if __name__ == "__main__":
main()