#!/usr/bin/env python3

'''
FuzzDiff
Written by: Dan Rosenberg

This is a simple tool designed to help out with crash analysis during fuzz
testing.  It selectively "un-fuzzes" portions of a fuzzed file that is known to
cause a crash, re-launches the targeted application, and sees if it still
crashes.  Eventually, this will yield a file that still causes the crash, but
contains a minimum set of changes from the original un-fuzzed file.

Copyright (C) 2010 Virtual Security Research, LLC. - All rights reserved

This program is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, either version 3 of the License, or (at your option) any later
version.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along
with this program.  If not, see <http://www.gnu.org/licenses/>.

'''

import os, random, shutil, subprocess, time, sys, signal

# Probability a fuzzed byte will be reverted
THRESHOLD = 0.5

# Number of iterations before stopping
ITER = 6000

# Temporary output
TMPFILE = "/tmp/fuzz123"

# Time to wait before killing target program
WAIT = 2

def unfuzz(orig, fuzz, out):
    if os.stat(orig).st_size != os.stat(fuzz).st_size:
        print("[*] Files are not the same size.")
        term(-1)

    diff = 0
    unchanged = 0

    try:
        # Open as binary so fuzzed bytes are not decoded/altered
        with open(orig, "rb") as origfd, open(fuzz, "rb") as fuzzfd, open(out, "wb") as outfd:
            while True:
                c = origfd.read(1)
                if not c:
                    break
                d = fuzzfd.read(1)

                # If there's a diff...
                if c != d:
                    diff += 1
                    # With some probability, revert it
                    if random.random() > THRESHOLD:
                        outfd.write(c)
                    else:
                        unchanged += 1
                        outfd.write(d)
                else:
                    outfd.write(c)
    except OSError:
        print("[*] Error opening file - bad arguments")
        term(-1)

    return diff, unchanged

def term(ret):
    print("[*] Terminating...")
    try:
        os.remove(TMPFILE)
    except FileNotFoundError:
        pass
    sys.exit(ret)

#######################
# Program entry point #
#######################

if len(sys.argv) < 4:
    print("[*] Usage: fuzzdiff [orig] [fuzzed] [program] [args]")
    sys.exit(-1)

progargs = sys.argv[3:]
orig = sys.argv[1]
best = "fuzz.out"

try:
    shutil.copy(sys.argv[2], best)
except OSError:
    print("[*] Error opening file - bad fuzzfile argument")
    term(-1)

# Main loop
for i in range(ITER):
    if i % 200 == 0:
        print(f"[*] Iteration {i}/{ITER}")
    diff, unchanged = unfuzz(orig, best, TMPFILE)

    # Only bother if we actually reduced the number of diffs
    if unchanged < diff:

        # Test if the result still crashes the target
        try:
            p = subprocess.Popen(
                progargs + [TMPFILE],
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL
            )
        except OSError:
            print("[*] Error running program")
            term(-1)

        # Give the program some time to start...
        #
        # More robust than sleep()+poll(): wait up to WAIT seconds.
        # If the program doesn't exit in time, terminate (and if needed, kill).
        try:
            rc = p.wait(timeout=WAIT)
        except subprocess.TimeoutExpired:
            # If the program hasn't terminated, kill it
            p.terminate()
            try:
                p.wait(timeout=0.5)
            except subprocess.TimeoutExpired:
                p.kill()
            continue

        # If it segfaulted, keep the changes
        #
        # On Unix, a process killed by SIGSEGV typically returns -11
        # (i.e., -signal.SIGSEGV). This is not portable to Windows.
        if rc == -signal.SIGSEGV:
            shutil.copy(TMPFILE, best)
            print("[*] Reduced diffs from", diff, "to", unchanged)
            if unchanged == 1:
                break

print("[*] Output written to fuzz.out")
term(1)