| #!/usr/bin/env python3 |
| # Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file |
| # for details. All rights reserved. Use of this source code is governed by a |
| # BSD-style license that can be found in the LICENSE file. |
| # |
| """Webscraper for make_a_fuzz nightly cluster run results. |
| |
| Given the uri of a make_a_fuzz run, this script will first |
| extract the links pointing to each of the individual shards |
| and then parse the output generated by each shard to |
| find divergences reported by the dartfuzz_test.dart program, |
| concatenate all output, or summarize all test results. |
| |
| Example: |
| collect_data.py --type sum |
| https://ci.chromium.org/p/dart/builders/ci.sandbox/fuzz-linux/303 |
| """ |
| |
| # This script may require a one time install of BeautifulSoup: |
| # sudo apt-get install python3-bs4 |
| |
| import argparse |
| import re |
| import sys |
| |
| from bs4 import BeautifulSoup |
| |
| import requests |
| |
| |
| # Matches shard raw stdout to extract divergence reports. |
| P_DIV = re.compile("(Isolate.+? !DIVERGENCE! (\n|.)+?)Isolate ", re.MULTILINE) |
| |
| # Matches shard raw stdout to extract report summaries. |
| P_SUM = re.compile( |
| r"^Tests: (\d+) Success: (\d+) " |
| r"\(Rerun: (\d+)\) Skipped: (\d+) " |
| r"Timeout: (\d+) Divergences: (\d+)", re.MULTILINE) |
| |
| # Matches uri to extract shard number. |
| P_SHARD = re.compile(r".*make_a_fuzz_shard_(\d+)") |
| |
| |
| def get_shard_links(uri): |
| links = [] |
| resp = requests.get(uri) |
| soup = BeautifulSoup(resp.text, "html.parser") |
| for a in soup.findAll("a"): |
| if "stdout" in a.text: |
| href = a["href"] |
| if ("make_a_fuzz_shard" in href and "__trigger__" not in href): |
| links.append(href + "?format=raw") |
| return links |
| |
| |
| def print_reencoded(text): |
| # Re-encoding avoids breaking some terminals. |
| print(text.encode("ascii", errors="ignore").decode("unicode-escape")) |
| |
| |
| def print_output_all(text): |
| print_reencoded(text) |
| |
| |
| def print_output_div(shard, text, keywords): |
| sys.stderr.write("Shard: " + shard + " \r") |
| m = P_DIV.findall(text) |
| if m: |
| for x in m: |
| keep = True |
| for word in keywords: |
| if word in x[0]: |
| keep = False |
| break |
| if keep: |
| print_reencoded(x[0]) |
| |
| |
| def get_output_sum(shard, text, should_print, s=[0, 0, 0, 0, 0, 0], divs=[]): |
| m = P_SUM.findall(text) |
| if not m: |
| sys.stderr.write("Failed to parse shard %s stdout for summary" % shard) |
| return |
| for test in m: |
| if int(test[-1]) == 1: |
| divs.append(shard) |
| for i in range(len(s)): |
| s[i] += int(test[i]) |
| if should_print: |
| print( |
| "Tests: %d Success: %d (Rerun: %d) Skipped: %d Timeout: %d " |
| "Divergences: %d (failing shards: %s) \r" % |
| tuple(s + [", ".join(divs) if divs else "none"]), |
| end="") |
| return s |
| |
| |
| def get_stats(uri, output_type, keywords, output_csv): |
| resp = requests.get(uri) |
| |
| if output_type == "all": |
| print_output_all(resp.text) |
| elif output_type == "div": |
| shard = P_SHARD.findall(uri)[0] |
| print_output_div(shard, resp.text, keywords) |
| elif output_type == "sum": |
| shard = P_SHARD.findall(uri)[0] |
| should_print = not output_csv |
| return get_output_sum(shard, resp.text, should_print) |
| return None |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser(description=__doc__) |
| parser.add_argument( |
| "--type", |
| choices=("div", "sum", "all"), |
| required=True, |
| help= |
| "Select output type (div: divergence report, sum: summary, all: complete stdout)" |
| ) |
| parser.add_argument( |
| "--filter", |
| nargs="+", |
| default=[], |
| help="Do not include divergences containing these keywords.") |
| parser.add_argument( |
| "--output-csv", |
| dest="output_csv", |
| action="store_true", |
| default=False, |
| help= |
| "Print output in CSV format to stdout. Only supported for --type=sum") |
| parser.add_argument( |
| "uri", |
| type=str, |
| help= |
| "Uri of one make_a_fuzz run from https://ci.chromium.org/p/dart/builders/ci.sandbox/fuzz-linux." |
| ) |
| args = parser.parse_args() |
| if args.type != 'sum' and args.output_csv: |
| print('Error: --output-csv can only be provided for --type=sum') |
| return |
| |
| shard_links = get_shard_links(args.uri) |
| |
| if len(shard_links) == 0: |
| print("Invalid run") |
| sys.exit(-1) |
| return |
| |
| for link in shard_links: |
| stats = get_stats(link, args.type, args.filter, args.output_csv) |
| if args.output_csv: |
| print("%d,%d,%d,%d,%d,%d" % tuple(stats)) |
| else: |
| print("") |
| sys.exit(0) |
| |
| |
| if __name__ == "__main__": |
| main() |