|  | #!/usr/bin/env python3 | 
|  | # Copyright (c) 2019, the Dart project authors.  Please see the AUTHORS file | 
|  | # for details. All rights reserved. Use of this source code is governed by a | 
|  | # BSD-style license that can be found in the LICENSE file. | 
|  | # | 
|  | """Webscraper for make_a_fuzz nightly cluster run results. | 
|  |  | 
|  | Given the uri of a make_a_fuzz run, this script will first | 
|  | extract the links pointing to each of the individual shards | 
|  | and then parse the output generated by each shard to | 
|  | find divergences reported by the dartfuzz_test.dart program, | 
|  | concatenate all output, or summarize all test results. | 
|  |  | 
|  | Example: | 
|  | collect_data.py --type sum | 
|  | https://ci.chromium.org/p/dart/builders/ci.sandbox/fuzz-linux/303 | 
|  | """ | 
|  |  | 
|  | # This script may require a one time install of BeautifulSoup: | 
|  | # sudo apt-get install python3-bs4 | 
|  |  | 
|  | import argparse | 
|  | import re | 
|  | import sys | 
|  |  | 
|  | from bs4 import BeautifulSoup | 
|  |  | 
|  | import requests | 
|  |  | 
|  |  | 
|  | # Matches shard raw stdout to extract divergence reports. | 
|  | P_DIV = re.compile("(Isolate.+? !DIVERGENCE! (\n|.)+?)Isolate ", re.MULTILINE) | 
|  |  | 
|  | # Matches shard raw stdout to extract report summaries. | 
|  | P_SUM = re.compile( | 
|  | r"^Tests: (\d+) Success: (\d+) " | 
|  | r"\(Rerun: (\d+)\) Skipped: (\d+) " | 
|  | r"Timeout: (\d+) Divergences: (\d+)", re.MULTILINE) | 
|  |  | 
|  | # Matches uri to extract shard number. | 
|  | P_SHARD = re.compile(r".*make_a_fuzz_shard_(\d+)") | 
|  |  | 
|  |  | 
|  | def get_shard_links(uri): | 
|  | links = [] | 
|  | resp = requests.get(uri) | 
|  | soup = BeautifulSoup(resp.text, "html.parser") | 
|  | for a in soup.findAll("a"): | 
|  | if "stdout" in a.text: | 
|  | href = a["href"] | 
|  | if ("make_a_fuzz_shard" in href and "__trigger__" not in href): | 
|  | links.append(href + "?format=raw") | 
|  | return links | 
|  |  | 
|  |  | 
|  | def print_reencoded(text): | 
|  | # Re-encoding avoids breaking some terminals. | 
|  | print(text.encode("ascii", errors="ignore").decode("unicode-escape")) | 
|  |  | 
|  |  | 
|  | def print_output_all(text): | 
|  | print_reencoded(text) | 
|  |  | 
|  |  | 
|  | def print_output_div(shard, text, keywords): | 
|  | sys.stderr.write("Shard: " + shard + "  \r") | 
|  | m = P_DIV.findall(text) | 
|  | if m: | 
|  | for x in m: | 
|  | keep = True | 
|  | for word in keywords: | 
|  | if word in x[0]: | 
|  | keep = False | 
|  | break | 
|  | if keep: | 
|  | print_reencoded(x[0]) | 
|  |  | 
|  |  | 
|  | def get_output_sum(shard, text, should_print, s=[0, 0, 0, 0, 0, 0], divs=[]): | 
|  | m = P_SUM.findall(text) | 
|  | if not m: | 
|  | sys.stderr.write("Failed to parse shard %s stdout for summary" % shard) | 
|  | return | 
|  | for test in m: | 
|  | if int(test[-1]) == 1: | 
|  | divs.append(shard) | 
|  | for i in range(len(s)): | 
|  | s[i] += int(test[i]) | 
|  | if should_print: | 
|  | print( | 
|  | "Tests: %d Success: %d (Rerun: %d) Skipped: %d Timeout: %d " | 
|  | "Divergences: %d (failing shards: %s)    \r" % | 
|  | tuple(s + [", ".join(divs) if divs else "none"]), | 
|  | end="") | 
|  | return s | 
|  |  | 
|  |  | 
|  | def get_stats(uri, output_type, keywords, output_csv): | 
|  | resp = requests.get(uri) | 
|  |  | 
|  | if output_type == "all": | 
|  | print_output_all(resp.text) | 
|  | elif output_type == "div": | 
|  | shard = P_SHARD.findall(uri)[0] | 
|  | print_output_div(shard, resp.text, keywords) | 
|  | elif output_type == "sum": | 
|  | shard = P_SHARD.findall(uri)[0] | 
|  | should_print = not output_csv | 
|  | return get_output_sum(shard, resp.text, should_print) | 
|  | return None | 
|  |  | 
|  |  | 
|  | def main(): | 
|  | parser = argparse.ArgumentParser(description=__doc__) | 
|  | parser.add_argument( | 
|  | "--type", | 
|  | choices=("div", "sum", "all"), | 
|  | required=True, | 
|  | help= | 
|  | "Select output type (div: divergence report, sum: summary, all: complete stdout)" | 
|  | ) | 
|  | parser.add_argument( | 
|  | "--filter", | 
|  | nargs="+", | 
|  | default=[], | 
|  | help="Do not include divergences containing these keywords.") | 
|  | parser.add_argument( | 
|  | "--output-csv", | 
|  | dest="output_csv", | 
|  | action="store_true", | 
|  | default=False, | 
|  | help= | 
|  | "Print output in CSV format to stdout. Only supported for --type=sum") | 
|  | parser.add_argument( | 
|  | "uri", | 
|  | type=str, | 
|  | help= | 
|  | "Uri of one make_a_fuzz run from https://ci.chromium.org/p/dart/builders/ci.sandbox/fuzz-linux." | 
|  | ) | 
|  | args = parser.parse_args() | 
|  | if args.type != 'sum' and args.output_csv: | 
|  | print('Error: --output-csv can only be provided for --type=sum') | 
|  | return | 
|  |  | 
|  | shard_links = get_shard_links(args.uri) | 
|  |  | 
|  | if len(shard_links) == 0: | 
|  | print("Invalid run") | 
|  | sys.exit(-1) | 
|  | return | 
|  |  | 
|  | for link in shard_links: | 
|  | stats = get_stats(link, args.type, args.filter, args.output_csv) | 
|  | if args.output_csv: | 
|  | print("%d,%d,%d,%d,%d,%d" % tuple(stats)) | 
|  | else: | 
|  | print("") | 
|  | sys.exit(0) | 
|  |  | 
|  |  | 
|  | if __name__ == "__main__": | 
|  | main() |