| #!/usr/bin/env python3 |
| # Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file |
| # for details. All rights reserved. Use of this source code is governed by a |
| # BSD-style license that can be found in the LICENSE file. |
| # |
| """ |
| This script minimizes a program generated by dartfuzz.dart. |
| """ |
| |
| import argparse |
| import multiprocessing as mp |
| import re |
| import shlex |
| import subprocess |
| |
| STATEMENT_MASK_LINE = 0 |
| STATEMENT_NUMBER_LINE = 1 |
| EXPRESSION_MASK_LINE = 2 |
| EXPRESSION_NUMBER_LINE = 3 |
| |
| |
| class MaskGen(object): |
| """ |
| Generates bitpatterns of <nbits> bits which indicate which |
| statements or expressions should be masked when passed as the |
| --smask or --emask parameter to dartfuzz.dart. |
| The patterns are generated in the following manner: |
| 11111111111111111111111111111111 |
| 11111111111111110000000000000000 |
| 00000000000000001111111111111111 |
| 11111111111111111111111100000000 |
| 11111111111111110000000011111111 |
| 11111111000000001111111111111111 |
| 00000000111111111111111111111111 |
| ... |
| If the error persists with a given a pattern it is stored in the |
| parameter <mask>. |
| """ |
| |
| def __init__(self, nbits, mask): |
| # Mask bitlength. |
| self.nbits = nbits |
| # Mask determining which statements or expressions to skip. |
| self.mask = mask |
| # Current mask modification start index. |
| self.convolution_idx = 0 |
| # Max mask with all bits set to 1. |
| self.max = (1 << (nbits + 1)) - 1 |
| # Current number of bits set in the mask modification mask. |
| self.nbitsVar = int(nbits) |
| # Current mask modification mask. |
| # This will be folded over the current value of self.mask |
| # starting from 0. |
| self.filter_mask = (1 << self.nbitsVar) - 1 |
| # All the masks already tested. |
| self.tested = set() |
| self.gen_new_mask = False |
| |
| def __iter__(self): |
| return self |
| |
| def __next__(self): |
| return self.next() |
| |
| # Update the set of tested masks. |
| # This is used to keep track of tested masks that did not produce a crash. |
| def update_tested(self, new_mask): |
| self.tested.add(new_mask) |
| |
| # Update the current mask with the new mask that produced a crash. |
| # Note: We assume that if mask |
| # 11110000 produces a crash and |
| # 00000011 produces a crash that |
| # 11110011 will also produce a crash. |
| # This might not be true for some cases. |
| def update_mask(self, new_mask): |
| new_mask = (self.mask | new_mask) & self.max |
| self.mask = new_mask |
| |
| # Count the bits of the current mask. |
| # More bits mean more statements omitted, so this is used |
| # to determine the quality of the current mask. |
| def count_bits(self): |
| return bin(self.mask).count('1') |
| |
| def inc_var(self): |
| self.convolution_idx = 0 |
| self.nbitsVar = self.nbitsVar >> 1 |
| self.filter_mask = (1 << self.nbitsVar) - 1 |
| return self.filter_mask == 0 |
| |
| def stop(self): |
| if self.convolution_idx >= self.nbits and self.inc_var() > 0: |
| # Check if the last run generated a new mask. |
| if self.gen_new_mask: |
| # Restart mask generation from beginning. |
| self.nbitsVar = self.nbits >> 1 |
| self.inc_var() |
| self.gen_new_mask = False |
| return False |
| print("STOP") |
| return True |
| return False |
| |
| def next(self): |
| while not self.stop(): |
| # Generate a new mask according to the mask generation algorithm. |
| new_mask = (self.mask | |
| (self.filter_mask << self.convolution_idx)) & self.max |
| # Update the counter variable for the mask generation algorithm. |
| self.convolution_idx += self.nbitsVar |
| # If the new_mask has new bits and has not been tested yet. |
| # We need to check both because only masks that produce an |
| # error are merged into self.mask. |
| if new_mask != self.mask and \ |
| new_mask not in self.tested: |
| # Add new mask to the set of tested masks. |
| self.tested.add(new_mask) |
| # Mark that we generated a new mask on this run. |
| self.gen_new_mask = True |
| # Return the new mask to be tested. |
| return new_mask |
| raise StopIteration() |
| |
| |
| # Generate a Dart program for the given statement (smask) and |
| # expression (emask) masks. Dartfuzz parameters for seed, ffi and |
| # fp are static and therefore contained in the dartfuzz_cmd parameter. |
| def generate_dart(dartfuzz_cmd, dart_test, smask, mask, do_expr): |
| if do_expr: |
| # Minimizing expressions. |
| cmds = shlex.split(dartfuzz_cmd) + [dart_test] + \ |
| ['--mini', '--smask', '%d' % smask, '--emask', '%d' % mask] |
| p = subprocess.Popen(cmds, stdout=subprocess.PIPE) |
| else: |
| # Minimizing statements. |
| cmds = shlex.split(dartfuzz_cmd) + [dart_test] + \ |
| ['--mini', '--smask', '%d' % mask] |
| p = subprocess.Popen(cmds, stdout=subprocess.PIPE) |
| p_stdout, p_stderr = p.communicate() |
| if p.returncode != 0: |
| raise 'Invalid return code on generate %d' % p.returncode |
| mask_new = 0 |
| if do_expr: |
| mask_new = int(p_stdout.decode().splitlines()[EXPRESSION_MASK_LINE]) |
| else: |
| mask_new = int(p_stdout.decode().splitlines()[STATEMENT_MASK_LINE]) |
| return mask_new |
| |
| |
| # Run the Dart program and check for error (by matching error_match_p). |
| def run_dart(dart_cmd, dart_test, error_match_p, timeout): |
| cmd = ['timeout', str(timeout)] + shlex.split(dart_cmd) + [dart_test] |
| p = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE) |
| p_stdout, p_stderr = p.communicate() |
| if error_match_p.search(p_stdout.decode()) or \ |
| error_match_p.search(p_stderr.decode()): |
| return True |
| if p.returncode != 0: |
| print("Warning: error return code %d" % p.returncode) |
| return False |
| |
| |
| # Run multiple tries of the given dart_cmd and check for errors by matching |
| # against error_match_p. |
| def run_dart_mp(dart_cmd, dart_test, error_match_p, tries, nthreads, timeout): |
| if tries == 1: |
| return run_dart(dart_cmd, dart_test, error_match_p, timeout) |
| pool = mp.Pool(nthreads) |
| results = [ |
| pool.apply_async(run_dart, |
| (dart_cmd, dart_test, error_match_p, timeout)) |
| for i in range(tries) |
| ] |
| worker_done = [False for i in range(tries)] |
| while True: |
| all_done = True |
| for i, result in enumerate(results): |
| if worker_done[i]: |
| continue |
| try: |
| r = result.get(timeout=0.5) |
| worker_done[i] = True |
| if r: |
| pool.close() |
| pool.terminate() |
| pool.join() |
| return True |
| except mp.TimeoutError: |
| all_done = False |
| if all_done: |
| break |
| pool.close() |
| pool.join() |
| return False |
| |
| |
| def minimize(dartfuzz_cmd, |
| dart_cmd, |
| dart_cmd_ref, |
| dart_test, |
| error_match1, |
| error_match2, |
| smask, |
| emask, |
| do_expr, |
| verbose, |
| tries, |
| tries_ref, |
| threads, |
| timeout, |
| print_every=32): |
| # Run dartfuzz command once to get the total number of statements |
| # and expressions in the generated program. |
| # Output will look like this: |
| # <Current Statement Mask> |
| # <Number of Statements> |
| # <Current Expression Mask> |
| # <Number of Expressions> |
| p = subprocess.Popen( |
| dartfuzz_cmd + " --mini " + dart_test, |
| shell=True, |
| stdout=subprocess.PIPE) |
| p_stdout, p_stderr = p.communicate() |
| if do_expr: |
| # Minimizing expressions. |
| nstmts = int(p_stdout.decode().splitlines()[EXPRESSION_NUMBER_LINE]) |
| mask_gen = MaskGen(nstmts, emask) |
| else: |
| # Minimizing statements. |
| nstmts = int(p_stdout.decode().splitlines()[STATEMENT_NUMBER_LINE]) |
| mask_gen = MaskGen(nstmts, smask) |
| # Best mask found so far. |
| min_mask = 0 |
| # Maximum number of statements or expressions masked. |
| max_bits = 0 |
| # Count number of iterations. |
| cntr = 0 |
| # Result of the last run. |
| last_err = True |
| # Compile pattern to match standard output and error for a string |
| # identifying the crash. |
| error_match_p1 = re.compile(error_match1) |
| error_match_p2 = None |
| if error_match2 is not None: |
| error_match_p2 = re.compile(error_match2) |
| for mask in mask_gen: |
| if (verbose): |
| print("Mask: %x" % mask) |
| cntr += 1 |
| if cntr % print_every == 0: |
| cntr = 0 |
| print("Best I could do so far is mask %d/%d" \ |
| % (max_bits, nstmts)) |
| if do_expr: |
| print(dartfuzz_cmd + " " + dart_test + |
| " --mini --smask 0x%x --emask 0x%x" % (smask, min_mask)) |
| else: |
| print(dartfuzz_cmd + " " + dart_test + |
| " --mini --smask 0x%x --emask 0" % (min_mask)) |
| # The return value mask_new contains the actual mask applied by |
| # dartfuzz. Due to nesting, masking one statement might lead to other |
| # statements being masked as well; mask_new will contain this updated |
| # mask. |
| mask_new = generate_dart(dartfuzz_cmd, dart_test, smask, mask, do_expr) |
| # Run generated Dart program and check for error. |
| err = run_dart_mp(dart_cmd, dart_test, error_match_p1, tries, threads, |
| timeout) |
| if err and verbose: |
| print("Matched error 1 " + error_match1) |
| err_ref = True |
| if (dart_cmd_ref is not None) and (error_match_p2 is not None): |
| err_ref = run_dart_mp(dart_cmd_ref, dart_test, error_match_p2, |
| tries_ref, threads, timeout) |
| if err_ref and verbose: |
| print("Matched error 2 " + error_match2) |
| if err and err_ref: |
| # In case the mask used for generating the Dart program lead to an |
| # error, update the mask generator accordingly. |
| mask_gen.update_mask(mask) |
| # TODO (felih): this line should be enough but there seems to be a |
| # bug in dartfuzz.dart calculating mask_new. |
| mask_gen.update_mask(mask_new) |
| max_bits = mask_gen.count_bits() |
| min_mask = mask_gen.mask |
| elif last_err: |
| # If the last run removed the error try the inverse. |
| invMaskNew = mask_gen.mask | (mask_gen.max & ~mask_new) |
| if invMaskNew != mask_gen.mask and \ |
| invMaskNew not in mask_gen.tested: |
| if (verbose): |
| print("Mask: %x (i)" % invMaskNew) |
| mask_new = generate_dart(dartfuzz_cmd, dart_test, smask, |
| invMaskNew, do_expr) |
| err = run_dart_mp(dart_cmd, dart_test, error_match_p1, tries, |
| threads, timeout) |
| if err and verbose: |
| print("Matched error 1 " + error_match1) |
| err_ref = True |
| if (dart_cmd_ref is not None) and (error_match_p2 is not None): |
| err_ref = run_dart_mp(dart_cmd_ref, dart_test, |
| error_match_p2, tries_ref, threads, |
| timeout) |
| if err_ref and verbose: |
| print("Matched error 2 " + error_match2) |
| if err and err_ref: |
| mask_gen.update_mask(invMaskNew) |
| mask_gen.update_mask(mask_new) |
| max_bits = mask_gen.count_bits() |
| min_mask = mask_gen.mask |
| last_err = err and err_ref |
| # Update the set of tested masks with the one we just tested. |
| mask_gen.update_tested(mask_new) |
| |
| print("Best I could do is %d/%d" \ |
| % (max_bits,nstmts)) |
| |
| if do_expr: |
| print(dartfuzz_cmd + " " + dart_test + |
| " --mini --smask 0x%x --emask 0x%x" % (smask, min_mask)) |
| else: |
| print(dartfuzz_cmd + " " + dart_test + |
| " --mini --smask 0x%x --emask 0" % (min_mask)) |
| |
| return min_mask |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser( |
| description='Minimize a generated Dart program ' + |
| ' while maintaining an identified crash or divergence. ' + |
| 'A second Dart program can be specified for divergence testing.') |
| parser.add_argument( |
| '--dartfuzz', |
| required=True, |
| help="dartfuzz command string " |
| "e.g. dart dartfuzz.dart --no-ffi --no-fp --seed 243123600") |
| parser.add_argument( |
| '--dart', |
| help="Dart command string " |
| "e.g. ./sdk/out/ReleaseX64/dart", |
| required=True) |
| parser.add_argument( |
| '--dart-ref', |
| dest="dart_ref", |
| help="Dart command string for reference build " + |
| "(in case of divergence testing) " + "e.g. ./sdk/out/ReleaseX64/dart", |
| default=None) |
| parser.add_argument( |
| '--testfile', |
| required=True, |
| help="output filename for program generated by " |
| "dartfuzz command and passed to Dart command " |
| "e.g fuzz.dart") |
| parser.add_argument( |
| '--err', |
| help="string indicating an error for Dart cmd (no multiline matches)", |
| required=True) |
| parser.add_argument( |
| '--err-ref', |
| dest="err_ref", |
| help="string matching the diverging output for the Dart " + |
| "reference command (no multiline matches)", |
| default=None) |
| parser.add_argument( |
| '--smask', help="hexadecimal statements mask", default="0") |
| parser.add_argument( |
| '--emask', help="hexadecimal expressions mask", default="0") |
| parser.add_argument( |
| '--typ', |
| choices=["s", "e", "se"], |
| required=True, |
| help="minimize statements (s) or expressions (e) or both (se)") |
| parser.add_argument( |
| '--tries', |
| type=int, |
| default=1, |
| help='number of retries per run for Dart cmd') |
| parser.add_argument( |
| '--tries-ref', |
| type=int, |
| dest='tries_ref', |
| default=1, |
| help='number of retries per run for Dart reference cmd') |
| parser.add_argument( |
| '--threads', |
| type=int, |
| default=4, |
| help='number of threads to use for retries') |
| parser.add_argument( |
| '--timeout', type=int, default=60, help='timeout for Dart command') |
| parser.add_argument( |
| '--verbose', help="print intermediate results", action="store_true") |
| args = parser.parse_args() |
| timeout = args.timeout |
| do_stmt = "s" in args.typ |
| do_expr = "e" in args.typ |
| smask = int(args.smask, 16) |
| if do_stmt: |
| print("Minimizing Statements") |
| smask = minimize(args.dartfuzz, args.dart, args.dart_ref, |
| args.testfile, args.err, args.err_ref, smask, |
| int(args.emask, 16), False, args.verbose, args.tries, |
| args.tries_ref, args.threads, timeout) |
| if do_expr: |
| print("Minimizing Expressions") |
| minimize(args.dartfuzz, args.dart, args.dart_ref, |
| args.testfile, args.err, args.err_ref, smask, |
| int(args.emask, 16), True, args.verbose, args.tries, |
| args.tries_ref, args.threads, timeout) |
| |
| |
| if __name__ == "__main__": |
| main() |