Skip to content

Commit

Permalink
Deal with uninitialized data sections in PE files and tackle some rec…
Browse files Browse the repository at this point in the history
…ursion issues (vivisect#622)
rakuy0 authored Nov 16, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent 1567510 commit 6d6a5d9
Showing 9 changed files with 104 additions and 23 deletions.
83 changes: 68 additions & 15 deletions envi/codeflow.py
Original file line number Diff line number Diff line change
@@ -49,11 +49,17 @@ def __init__(self, mem, persist=False, exptable=True, recurse=True):
self._cf_recurse = recurse
self._cf_exptable = exptable
self._cf_blocks = []

self._cf_blocked = collections.OrderedDict()
self._cf_delaying = collections.defaultdict(set)
self._cf_delayed = collections.defaultdict(set)
self._calls_from = {}

self._dynamic_branch_handlers = []

def _cb_opcode(self, va, op, branches):
'''
Extend CodeFlowContext and implement this method to recieve
Extend CodeFlowContext and implement this method to receive
a callback for every newly discovered opcode.
'''
return branches
@@ -70,7 +76,7 @@ def _cb_noflow(self, va, tva):
'''
Implement this method to receive a callback when a given code
branch is skipped due to being in the noflow dictionary.
( likely due to prodedural branch to noreturn address )
( likely due to procedural branch to noreturn address )
'''
pass

@@ -89,8 +95,8 @@ def _cb_branchtable(self, tableva, ptrva, destva):

def _cb_dynamic_branch(self, va, op, bflags, branches):
'''
if codeflow finds a branch to a non-discrete value (eg. to a register)
we handle it here. by default, we simply track the dynamic branch in a global
if codeflow finds a branch to a non-discrete value (eg: to a register)
we handle it here. By default, we simply track the dynamic branch in a global
VaSet which is added to every workspace.
'''
'''
@@ -128,7 +134,7 @@ def addFunctionDef(self, fva, calls_from):

def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT):
'''
Do code flow disassembly from the specified address. Returnes a list
Do code flow disassembly from the specified address. Returns a list
of the procedural branch targets discovered during code flow...
Set persist=True to store 'opdone' and never disassemble the same thing twice
@@ -179,7 +185,8 @@ def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT):

bva, bflags = branches.pop()

# look for dynamic branches (ie. branches which don't have a known target). assume at least one branch
# look for dynamic branches (ie. branches which don't have a known target).
# Assume at least one branch
if bva is None:
self._cb_dynamic_branch(va, op, bflags, branches)

@@ -217,6 +224,9 @@ def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT):
if not self._mem.probeMemory(bva, 1, e_const.MM_EXEC):
continue

if self._mem.probeMemory(bva, 1, e_const.MM_UNINIT):
continue

if bflags & envi.BR_PROC:

# Record that the current code flow has a call from it
@@ -225,18 +235,18 @@ def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT):

if bva != nextva: # NOTE: avoid call 0 constructs

# Now we decend so we do deepest func callbacks first!
# Now we descend so we do deepest func callbacks first!
if self._cf_recurse:
# descend into functions, but make sure we don't descend into
# recursive functions
if bva in self._cf_blocks:
logger.debug("not recursing to function 0x%x (at 0x%x): it's already in analysis call path (ie. it called *this* func)",
logger.debug("not recursing to function 0x%x (at 0x%x): it's already in analysis call path (ie. it called *this* func)",
bva, va)
logger.debug("call path: \t" + ", ".join([hex(x) for x in self._cf_blocks]))
# the function that we want to make prodcedural
# the function that we want to make procedural
# called us so we can't call to make it procedural
# until it's done
cf_eps[bva] = bflags
cf_eps[bva] = (startva, bflags)
else:
logger.debug("descending into function 0x%x (from 0x%x)", bva, va)
self.addEntryPoint(bva, arch=bflags)
@@ -249,6 +259,17 @@ def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT):

# We only go up to procedural branches, not across
continue

# we're jumping to a function we're in the middle of
# it's effectively a call from, but we should block
# until the other finishes processing to avoid some...odd
# issues with noret detection
if bva in self._cf_blocks and op.iflags & envi.IF_BRANCH:
if self._cf_recurse and startva != bva:
self._cf_delayed[startva].add(bva)
self._cf_delaying[bva].add(startva)

continue
except Exception as e:
logger.warning("codeflow: %r", e, exc_info=True)

@@ -257,10 +278,23 @@ def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT):

# remove our local blocks from global block stack
self._cf_blocks.pop()
while cf_eps:
fva, arch = cf_eps.popitem()
if not self._mem.isFunction(fva):
self.addEntryPoint(fva, arch=arch)
for fva, (pva, othrarch) in cf_eps.items():
if fva in self._cf_blocks:
self._cf_blocked[fva] = (pva, othrarch)
else:
if not self._mem.isFunction(fva):
self.addEntryPoint(fva, arch=othrarch)

fallback = collections.OrderedDict()
items = list(self._cf_blocked.items())
for fva, othrarch in items:
if fva not in self._cf_blocks and not self._mem.isFunction(fva):
self._funcs.pop(fva, None)
self._cf_blocked.pop(fva, None)
self.addEntryPoint(fva, arch=othrarch)
else:
fallback[fva] = arch
self._cf_blocked = fallback

return list(calls_from.keys())

@@ -291,7 +325,26 @@ def addEntryPoint(self, va, arch=envi.ARCH_DEFAULT):
# logger.debug('addEntryPoint(0x%x): calls_from: %r', va, calls_from)

# Finally, notify the callback of a new function
self._cb_function(va, {'CallsFrom': calls_from})
# we gotta hold some of these off for a bit
if va not in self._cf_delayed:
self._cb_function(va, {'CallsFrom': calls_from})
# remove this function from any blocking lists
if va in self._cf_delaying:
todo = []
for blocked in self._cf_delaying[va]:
self._cf_delayed[blocked].discard(va)
if len(self._cf_delayed[blocked]) == 0:
todo.append(blocked)

self._cf_delaying.pop(va, None)
for ova in todo:
self._cf_delayed.pop(ova, None)
calls = self._calls_from.pop(ova, {})
self._cb_function(ova, {'CallsFrom': calls})
else:
# stash these off for later
self._calls_from[va] = calls_from

return va

def flushFunction(self, fva):
1 change: 1 addition & 0 deletions envi/const.py
Original file line number Diff line number Diff line change
@@ -18,6 +18,7 @@
MM_WRITE = 0x2
MM_EXEC = 0x1
MM_SHARED = 0x08
MM_UNINIT = 0x10

MM_READ_WRITE = MM_READ | MM_WRITE
MM_READ_EXEC = MM_READ | MM_EXEC
6 changes: 5 additions & 1 deletion vivisect/__init__.py
Original file line number Diff line number Diff line change
@@ -21,8 +21,9 @@
import envi
import envi.exc as e_exc
import envi.bits as e_bits
import envi.common as e_common
import envi.memory as e_mem
import envi.const as e_const
import envi.common as e_common
import envi.config as e_config
import envi.bytesig as e_bytesig
import envi.symstore.resolver as e_resolv
@@ -964,6 +965,9 @@ def findPointers(self, cache=True):

for mva, msize, mperm, mname in self.getMemoryMaps():

if mperm & e_const.MM_UNINIT:
continue

offset, bytes = self.getByteDef(mva)
maxsize = len(bytes) - size

10 changes: 10 additions & 0 deletions vivisect/analysis/generic/codeblocks.py
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@
import collections

import envi
import envi.const as e_const

from vivisect.const import REF_CODE, LOC_POINTER, LOC_OP

@@ -97,6 +98,12 @@ def analyzeFunction(vw, funcva):
if rflags & envi.BR_DEREF:
continue

mmap = vw.getMemoryMap(tova)
if mmap:
mva, msize, mperm, mname = mmap
if mperm & e_const.MM_UNINIT:
continue

branch = True
todo.append(tova)

@@ -136,6 +143,9 @@ def analyzeFunction(vw, funcva):
# (like during dynamic branch analysis)
try:
bsize = blocks[bva]
if bsize == 0:
continue

tmpcb = vw.getCodeBlock(bva)
# sometimes codeblocks can be deleted if owned by multiple functions
if bva not in oldblocks or tmpcb is None:
2 changes: 2 additions & 0 deletions vivisect/analysis/generic/funcentries.py
Original file line number Diff line number Diff line change
@@ -31,6 +31,8 @@ def analyze(vw):
# Segment permissions check for likely code stuff at all
if not mapflags & e_const.MM_EXEC:
continue
if mapflags & e_const.MM_UNINIT:
continue

i = 0
maxsize = mapsize - 4
8 changes: 4 additions & 4 deletions vivisect/base.py
Original file line number Diff line number Diff line change
@@ -728,8 +728,8 @@ def _mcb_WorkspaceServer(self, name, wshost):
def _fmcb_Thunk(self, funcva, th, thunkname):
# If the function being made a thunk is registered
# in NoReturnApis, update codeflow...
if self.getMeta('NoReturnApis').get( thunkname.lower() ):
self.cfctx.addNoReturnAddr( funcva )
if self.getMeta('NoReturnApis').get(thunkname.lower()):
self.cfctx.addNoReturnAddr(funcva)

def _fmcb_CallsFrom(self, funcva, th, callsfrom):
for va in callsfrom:
@@ -823,7 +823,7 @@ def _cb_function(self, fva, fmeta):

fname = vw.getName( fva )
if vw.getMeta('NoReturnApis').get( fname.lower() ):
self._cf_noret[ fva ] = True
self._cf_noret[fva] = True

if len( vw.getFunctionBlocks( fva )) == 1:
return
@@ -833,7 +833,7 @@ def _cb_function(self, fva, fmeta):
va = lva[0]
ctup = vw.getCodeBlock(va)
if ctup and fva == ctup[2] and vw.getFunctionMeta(fva, 'BlockCount', default=0) == 1:
self._cf_noret[ fva ] = True
self._cf_noret[fva] = True
break

def _cb_branchtable(self, tablebase, tableva, destva):
2 changes: 2 additions & 0 deletions vivisect/parsers/pe.py
Original file line number Diff line number Diff line change
@@ -286,6 +286,8 @@ def loadPeIntoWorkspace(vw, pe, filename=None, baseaddr=None):
mapflags |= e_const.MM_EXEC
if chars & PE.IMAGE_SCN_CNT_CODE:
mapflags |= e_const.MM_EXEC
if chars & PE.IMAGE_SCN_CNT_UNINITIALIZED_DATA:
mapflags |= e_const.MM_UNINIT

secrva = sec.VirtualAddress
secvsize = sec.VirtualSize
1 change: 0 additions & 1 deletion vivisect/tests/testvivisect.py
Original file line number Diff line number Diff line change
@@ -373,7 +373,6 @@ def test_cli_xrefs(self):
self.assertIn("From: 0x0804fe94, To: 0x080490d0, Type: Code, Flags: 0x00010001\n", output)
self.chgrp_vw.canvas.clearCanvas()


def test_loc_types(self):
'''
Test that we have data consistency in locations
14 changes: 12 additions & 2 deletions vivisect/tools/graphutil.py
Original file line number Diff line number Diff line change
@@ -3,13 +3,17 @@
Some glue code to do workspace related things based on visgraph
'''
import time
import envi
import logging
import vivisect
import collections

import envi
import envi.const as e_const

import visgraph.pathcore as vg_pathcore
import visgraph.graphcore as vg_graphcore

import vivisect

xrskip = envi.BR_PROC | envi.BR_DEREF

logger = logging.getLogger(__name__)
@@ -475,6 +479,12 @@ def buildFunctionGraph(vw, fva, revloop=False, g=None):
if xrflags & xrskip:
continue

mmap = vw.getMemoryMap(xrto)
if mmap:
mva, msize, mperm, mname = mmap
if mperm & e_const.MM_UNINIT:
continue

if not g.hasNode(xrto):
cblock = vw.getCodeBlock(xrto)
if cblock is None:

0 comments on commit 6d6a5d9

Please sign in to comment.