blob: c9be86ae7adf859796121bd93c32de46e6daf452 [file] [log] [blame]
#!/usr/bin/env python3
# Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
# for details. All rights reserved. Use of this source code is governed by a
# BSD-style license that can be found in the LICENSE file.
#
"""
This script minimizes a program generated by dartfuzz.dart.
"""
import argparse
import multiprocessing as mp
import re
import shlex
import subprocess
STATEMENT_MASK_LINE = 0
STATEMENT_NUMBER_LINE = 1
EXPRESSION_MASK_LINE = 2
EXPRESSION_NUMBER_LINE = 3
class MaskGen(object):
"""
Generates bitpatterns of <nbits> bits which indicate which
statements or expressions should be masked when passed as the
--smask or --emask parameter to dartfuzz.dart.
The patterns are generated in the following manner:
11111111111111111111111111111111
11111111111111110000000000000000
00000000000000001111111111111111
11111111111111111111111100000000
11111111111111110000000011111111
11111111000000001111111111111111
00000000111111111111111111111111
...
If the error persists with a given a pattern it is stored in the
parameter <mask>.
"""
def __init__(self, nbits, mask):
# Mask bitlength.
self.nbits = nbits
# Mask determining which statements or expressions to skip.
self.mask = mask
# Current mask modification start index.
self.convolution_idx = 0
# Max mask with all bits set to 1.
self.max = (1 << (nbits + 1)) - 1
# Current number of bits set in the mask modification mask.
self.nbitsVar = int(nbits)
# Current mask modification mask.
# This will be folded over the current value of self.mask
# starting from 0.
self.filter_mask = (1 << self.nbitsVar) - 1
# All the masks already tested.
self.tested = set()
self.gen_new_mask = False
def __iter__(self):
return self
def __next__(self):
return self.next()
# Update the set of tested masks.
# This is used to keep track of tested masks that did not produce a crash.
def update_tested(self, new_mask):
self.tested.add(new_mask)
# Update the current mask with the new mask that produced a crash.
# Note: We assume that if mask
# 11110000 produces a crash and
# 00000011 produces a crash that
# 11110011 will also produce a crash.
# This might not be true for some cases.
def update_mask(self, new_mask):
new_mask = (self.mask | new_mask) & self.max
self.mask = new_mask
# Count the bits of the current mask.
# More bits mean more statements omitted, so this is used
# to determine the quality of the current mask.
def count_bits(self):
return bin(self.mask).count('1')
def inc_var(self):
self.convolution_idx = 0
self.nbitsVar = self.nbitsVar >> 1
self.filter_mask = (1 << self.nbitsVar) - 1
return self.filter_mask == 0
def stop(self):
if self.convolution_idx >= self.nbits and self.inc_var() > 0:
# Check if the last run generated a new mask.
if self.gen_new_mask:
# Restart mask generation from beginning.
self.nbitsVar = self.nbits >> 1
self.inc_var()
self.gen_new_mask = False
return False
print("STOP")
return True
return False
def next(self):
while not self.stop():
# Generate a new mask according to the mask generation algorithm.
new_mask = (self.mask |
(self.filter_mask << self.convolution_idx)) & self.max
# Update the counter variable for the mask generation algorithm.
self.convolution_idx += self.nbitsVar
# If the new_mask has new bits and has not been tested yet.
# We need to check both because only masks that produce an
# error are merged into self.mask.
if new_mask != self.mask and \
new_mask not in self.tested:
# Add new mask to the set of tested masks.
self.tested.add(new_mask)
# Mark that we generated a new mask on this run.
self.gen_new_mask = True
# Return the new mask to be tested.
return new_mask
raise StopIteration()
# Generate a Dart program for the given statement (smask) and
# expression (emask) masks. Dartfuzz parameters for seed, ffi and
# fp are static and therefore contained in the dartfuzz_cmd parameter.
def generate_dart(dartfuzz_cmd, dart_test, smask, mask, do_expr):
if do_expr:
# Minimizing expressions.
cmds = shlex.split(dartfuzz_cmd) + [dart_test] + \
['--mini', '--smask', '%d' % smask, '--emask', '%d' % mask]
p = subprocess.Popen(cmds, stdout=subprocess.PIPE)
else:
# Minimizing statements.
cmds = shlex.split(dartfuzz_cmd) + [dart_test] + \
['--mini', '--smask', '%d' % mask]
p = subprocess.Popen(cmds, stdout=subprocess.PIPE)
p_stdout, p_stderr = p.communicate()
if p.returncode != 0:
raise 'Invalid return code on generate %d' % p.returncode
mask_new = 0
if do_expr:
mask_new = int(p_stdout.decode().splitlines()[EXPRESSION_MASK_LINE])
else:
mask_new = int(p_stdout.decode().splitlines()[STATEMENT_MASK_LINE])
return mask_new
# Run the Dart program and check for error (by matching error_match_p).
def run_dart(dart_cmd, dart_test, error_match_p, timeout):
cmd = ['timeout', str(timeout)] + shlex.split(dart_cmd) + [dart_test]
p = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
p_stdout, p_stderr = p.communicate()
if error_match_p.search(p_stdout.decode()) or \
error_match_p.search(p_stderr.decode()):
return True
if p.returncode != 0:
print("Warning: error return code %d" % p.returncode)
return False
# Run multiple tries of the given dart_cmd and check for errors by matching
# against error_match_p.
def run_dart_mp(dart_cmd, dart_test, error_match_p, tries, nthreads, timeout):
if tries == 1:
return run_dart(dart_cmd, dart_test, error_match_p, timeout)
pool = mp.Pool(nthreads)
results = [
pool.apply_async(run_dart,
(dart_cmd, dart_test, error_match_p, timeout))
for i in range(tries)
]
worker_done = [False for i in range(tries)]
while True:
all_done = True
for i, result in enumerate(results):
if worker_done[i]:
continue
try:
r = result.get(timeout=0.5)
worker_done[i] = True
if r:
pool.close()
pool.terminate()
pool.join()
return True
except mp.TimeoutError:
all_done = False
if all_done:
break
pool.close()
pool.join()
return False
def minimize(dartfuzz_cmd,
dart_cmd,
dart_cmd_ref,
dart_test,
error_match1,
error_match2,
smask,
emask,
do_expr,
verbose,
tries,
tries_ref,
threads,
timeout,
print_every=32):
# Run dartfuzz command once to get the total number of statements
# and expressions in the generated program.
# Output will look like this:
# <Current Statement Mask>
# <Number of Statements>
# <Current Expression Mask>
# <Number of Expressions>
p = subprocess.Popen(
dartfuzz_cmd + " --mini " + dart_test,
shell=True,
stdout=subprocess.PIPE)
p_stdout, p_stderr = p.communicate()
if do_expr:
# Minimizing expressions.
nstmts = int(p_stdout.decode().splitlines()[EXPRESSION_NUMBER_LINE])
mask_gen = MaskGen(nstmts, emask)
else:
# Minimizing statements.
nstmts = int(p_stdout.decode().splitlines()[STATEMENT_NUMBER_LINE])
mask_gen = MaskGen(nstmts, smask)
# Best mask found so far.
min_mask = 0
# Maximum number of statements or expressions masked.
max_bits = 0
# Count number of iterations.
cntr = 0
# Result of the last run.
last_err = True
# Compile pattern to match standard output and error for a string
# identifying the crash.
error_match_p1 = re.compile(error_match1)
error_match_p2 = None
if error_match2 is not None:
error_match_p2 = re.compile(error_match2)
for mask in mask_gen:
if (verbose):
print("Mask: %x" % mask)
cntr += 1
if cntr % print_every == 0:
cntr = 0
print("Best I could do so far is mask %d/%d" \
% (max_bits, nstmts))
if do_expr:
print(dartfuzz_cmd + " " + dart_test +
" --mini --smask 0x%x --emask 0x%x" % (smask, min_mask))
else:
print(dartfuzz_cmd + " " + dart_test +
" --mini --smask 0x%x --emask 0" % (min_mask))
# The return value mask_new contains the actual mask applied by
# dartfuzz. Due to nesting, masking one statement might lead to other
# statements being masked as well; mask_new will contain this updated
# mask.
mask_new = generate_dart(dartfuzz_cmd, dart_test, smask, mask, do_expr)
# Run generated Dart program and check for error.
err = run_dart_mp(dart_cmd, dart_test, error_match_p1, tries, threads,
timeout)
if err and verbose:
print("Matched error 1 " + error_match1)
err_ref = True
if (dart_cmd_ref is not None) and (error_match_p2 is not None):
err_ref = run_dart_mp(dart_cmd_ref, dart_test, error_match_p2,
tries_ref, threads, timeout)
if err_ref and verbose:
print("Matched error 2 " + error_match2)
if err and err_ref:
# In case the mask used for generating the Dart program lead to an
# error, update the mask generator accordingly.
mask_gen.update_mask(mask)
# TODO (felih): this line should be enough but there seems to be a
# bug in dartfuzz.dart calculating mask_new.
mask_gen.update_mask(mask_new)
max_bits = mask_gen.count_bits()
min_mask = mask_gen.mask
elif last_err:
# If the last run removed the error try the inverse.
invMaskNew = mask_gen.mask | (mask_gen.max & ~mask_new)
if invMaskNew != mask_gen.mask and \
invMaskNew not in mask_gen.tested:
if (verbose):
print("Mask: %x (i)" % invMaskNew)
mask_new = generate_dart(dartfuzz_cmd, dart_test, smask,
invMaskNew, do_expr)
err = run_dart_mp(dart_cmd, dart_test, error_match_p1, tries,
threads, timeout)
if err and verbose:
print("Matched error 1 " + error_match1)
err_ref = True
if (dart_cmd_ref is not None) and (error_match_p2 is not None):
err_ref = run_dart_mp(dart_cmd_ref, dart_test,
error_match_p2, tries_ref, threads,
timeout)
if err_ref and verbose:
print("Matched error 2 " + error_match2)
if err and err_ref:
mask_gen.update_mask(invMaskNew)
mask_gen.update_mask(mask_new)
max_bits = mask_gen.count_bits()
min_mask = mask_gen.mask
last_err = err and err_ref
# Update the set of tested masks with the one we just tested.
mask_gen.update_tested(mask_new)
print("Best I could do is %d/%d" \
% (max_bits,nstmts))
if do_expr:
print(dartfuzz_cmd + " " + dart_test +
" --mini --smask 0x%x --emask 0x%x" % (smask, min_mask))
else:
print(dartfuzz_cmd + " " + dart_test +
" --mini --smask 0x%x --emask 0" % (min_mask))
return min_mask
def main():
parser = argparse.ArgumentParser(
description='Minimize a generated Dart program ' +
' while maintaining an identified crash or divergence. ' +
'A second Dart program can be specified for divergence testing.')
parser.add_argument(
'--dartfuzz',
required=True,
help="dartfuzz command string "
"e.g. dart dartfuzz.dart --no-ffi --no-fp --seed 243123600")
parser.add_argument(
'--dart',
help="Dart command string "
"e.g. ./sdk/out/ReleaseX64/dart",
required=True)
parser.add_argument(
'--dart-ref',
dest="dart_ref",
help="Dart command string for reference build " +
"(in case of divergence testing) " + "e.g. ./sdk/out/ReleaseX64/dart",
default=None)
parser.add_argument(
'--testfile',
required=True,
help="output filename for program generated by "
"dartfuzz command and passed to Dart command "
"e.g fuzz.dart")
parser.add_argument(
'--err',
help="string indicating an error for Dart cmd (no multiline matches)",
required=True)
parser.add_argument(
'--err-ref',
dest="err_ref",
help="string matching the diverging output for the Dart " +
"reference command (no multiline matches)",
default=None)
parser.add_argument(
'--smask', help="hexadecimal statements mask", default="0")
parser.add_argument(
'--emask', help="hexadecimal expressions mask", default="0")
parser.add_argument(
'--typ',
choices=["s", "e", "se"],
required=True,
help="minimize statements (s) or expressions (e) or both (se)")
parser.add_argument(
'--tries',
type=int,
default=1,
help='number of retries per run for Dart cmd')
parser.add_argument(
'--tries-ref',
type=int,
dest='tries_ref',
default=1,
help='number of retries per run for Dart reference cmd')
parser.add_argument(
'--threads',
type=int,
default=4,
help='number of threads to use for retries')
parser.add_argument(
'--timeout', type=int, default=60, help='timeout for Dart command')
parser.add_argument(
'--verbose', help="print intermediate results", action="store_true")
args = parser.parse_args()
timeout = args.timeout
do_stmt = "s" in args.typ
do_expr = "e" in args.typ
smask = int(args.smask, 16)
if do_stmt:
print("Minimizing Statements")
smask = minimize(args.dartfuzz, args.dart, args.dart_ref,
args.testfile, args.err, args.err_ref, smask,
int(args.emask, 16), False, args.verbose, args.tries,
args.tries_ref, args.threads, timeout)
if do_expr:
print("Minimizing Expressions")
minimize(args.dartfuzz, args.dart, args.dart_ref,
args.testfile, args.err, args.err_ref, smask,
int(args.emask, 16), True, args.verbose, args.tries,
args.tries_ref, args.threads, timeout)
if __name__ == "__main__":
main()