diff --git a/cabal.project b/cabal.project index 81ade675..0b4126b0 100644 --- a/cabal.project +++ b/cabal.project @@ -21,7 +21,7 @@ tests: true -- NOTE: This applies to the whole package: reopt and reopt-explore package reopt - ghc-options: -Wall -Wno-error=unused-imports -Wno-error=unused-do-bind -Wno-error=unused-pattern-binds + ghc-options: -Wall -Wno-error=unused-imports -Wno-error=unused-do-bind -Wno-error=unused-pattern-binds -Wno-error=unused-matches -- ghc-options: -Wall -Werror package reopt-tools ghc-options: -Wall diff --git a/reopt.cabal b/reopt.cabal index 47446664..1137e4c5 100644 --- a/reopt.cabal +++ b/reopt.cabal @@ -158,6 +158,7 @@ executable reopt containers, directory, elf-edit, + extra, filepath, generic-lens, lens, diff --git a/reopt/Main_reopt.hs b/reopt/Main_reopt.hs index 52f7296c..fc32a941 100644 --- a/reopt/Main_reopt.hs +++ b/reopt/Main_reopt.hs @@ -6,6 +6,7 @@ module Main (main) where import Control.Exception (catch) import Control.Lens (Lens', (^.), (^..)) import Control.Monad (foldM, forM_, unless, when) +import Control.Monad.Extra (concatForM, concatMapM) import Data.Aeson qualified as Aeson import Data.Aeson.Encoding qualified as AE import Data.ByteString qualified as BS @@ -20,7 +21,7 @@ import Data.ElfEdit qualified as Elf import Data.Generics.Labels () import Data.IORef (IORef, modifyIORef', newIORef, readIORef) import Data.List qualified as List -import Data.Maybe (fromMaybe, isJust, isNothing) +import Data.Maybe (fromMaybe, isJust, isNothing, mapMaybe) import Data.Text qualified as T import Data.Version (Version (versionBranch)) import Data.Word (Word64) @@ -40,7 +41,6 @@ import System.IO.Error ( import Text.Printf (printf) import Data.Macaw.Analysis.RegisterUse qualified as Macaw -import Data.Macaw.CFG qualified as Macaw import Data.Macaw.DebugLogging qualified as Macaw import Data.Macaw.Discovery qualified as Macaw import Data.Parameterized.Some (Some (Some)) @@ -66,12 +66,8 @@ import Reopt.Occam ( toOccamManifest, ) import Reopt.Server (runServer) -import Reopt.TypeInference.ConstraintGen ( - ModuleConstraints (mcNamedTypes, mcWarnings), - genModuleConstraints, - showInferredTypes, - ) -import Reopt.TypeInference.FunTypeMaps (getAddrSymMap) + +-- import Reopt.TypeInference.FunTypeMaps (getAddrSymMap) import Reopt.TypeInference.Pretty (ppFunction) import Reopt.Utils.Exit ( checkedReadFile, @@ -87,8 +83,13 @@ import Reopt.X86 ( osPersonality, ) +import Data.Macaw.CFG (ArchAddrWidth, ArchMemAddr, MemWidth, asSegmentOff) +import Data.Macaw.Discovery (DiscoveryState (memory), FunctionExploreReason (UserRequest), markAddrsAsFunction) +import Data.Macaw.X86.SyscallInfo (SyscallPersonality) import Paths_reopt (version) -import Debug.Trace (trace) +import Reopt.CFG.FnRep (FnBlock (fbStmts), FnStmt (FnCall), FnValue (..), fnBlocks) +import Reopt.TypeInference.ConstraintGen (ModuleConstraints (..), genModuleConstraints, showInferredTypes) +import Reopt.TypeInference.FunTypeMaps (FunTypeMaps, SymAddrMap, getAddrSymMap) reoptVersion :: String reoptVersion = printf "Reopt binary reoptimizer (reopt) %s" v @@ -858,36 +859,14 @@ performReopt args elfPath = do unless (shouldRecover args) $ reoptEndNow () let sysp = osPersonality os - -- recoverX86Output <- - -- doRecoverX86 funPrefix sysp symAddrMap debugTypeMap discState - -- let recMod = recoveredModule recoverX86Output - -- let relinkerInfo = mergeRelations recoverX86Output - - -- Generate constraints - -- let moduleConstraints = - -- genModuleConstraints - -- recMod - -- (Macaw.memory discState) - -- (traceTypeUnification args) - -- (traceConstraintOrigins args) - - -- _ <- trace "About to re-run discovery" (pure ()) - -- let newAddr = fromMaybe (error "sad") $ Macaw.resolveRegionOff (Macaw.memory discState) 1 0x1149 - -- let discState' = Macaw.markAddrAsFunction Macaw.UserRequest newAddr discState - let rerunDiscState = discState - -- rerunDiscState <- - -- reoptRunDiscovery (getAddrSymMap symAddrMap) $ - -- Macaw.incCompleteDiscovery discState' (roDiscoveryOptions rOpts) - -- _ <- trace "Done re-running discovery" (pure ()) - - recoverX86Output <- doRecoverX86 funPrefix sysp symAddrMap debugTypeMap rerunDiscState - let recMod = recoveredModule recoverX86Output - let relinkerInfo = mergeRelations recoverX86Output + (recoverX86Output, recMod, moduleConstraints) <- + reoptRecoveryLoop args symAddrMap rOpts funPrefix sysp debugTypeMap discState - forM_ (recoveredDefs recMod) $ \ f -> do - trace "FUNCTION" (pure ()) - trace (show (PP.pretty f)) (pure ()) + -- forM_ (recoveredDefs recMod) $ \ f -> do + -- trace "FUNCTION" (pure ()) + -- trace (show (PP.pretty f)) (pure ()) + let relinkerInfo = mergeRelations recoverX86Output case relinkerInfoExportPath args of Nothing -> pure () Just path -> do @@ -907,13 +886,6 @@ performReopt args elfPath = do let buffer = AE.encodingToLazyByteString (AE.list id invariants) reoptWriteByteString AnnotationsFileType path buffer - let moduleConstraints = - genModuleConstraints - recMod - (Macaw.memory rerunDiscState) - (traceTypeUnification args) - (traceConstraintOrigins args) - -- FIXME: move let prettyDefs = @@ -1065,8 +1037,8 @@ displayConstraintsInformation moduleConstraints = do -- putStrLn (unlines (map (show . PP.indent 4 . PP.pretty) (mcConstraints moduleConstraints))) -- putStrLn "Constraints (solving)" -- putStrLn (unlines (map (show . PP.indent 4 . PP.pretty) (mcTyConstraints moduleConstraints))) - putStrLn "Inferred types" - putStrLn (showInferredTypes moduleConstraints) + -- putStrLn "Inferred types" + -- putStrLn (showInferredTypes moduleConstraints) main' :: IO () main' = do @@ -1096,3 +1068,62 @@ main = main' `catch` h displayError "Other error" IO.hPrint IO.stderr e IO.hPrint IO.stderr $ ioeGetErrorType e + +-- | Checks whether a given `FnStmt` has a potential code pointer address we +-- want to try and investigate. We intended for such addresses to be identified +-- via type reconstruction, but it turns out that we can just get away with +-- identifying code-pointer-sized values pointing into an executable segment. +-- However, we could double-check with the results of type reconstruction to +-- potentially avoid some spurious pointers. +fnStmtHasCandidate :: + MemWidth (ArchAddrWidth arch) => + Monad m => + FnStmt arch -> + m [ArchMemAddr arch] +fnStmtHasCandidate (FnCall _fn args _mRet) = do + concatForM args $ \(Some fnValue) -> + case fnValue of + FnCodePointer addr -> return [addr] + _ -> return [] +fnStmtHasCandidate _ = return [] + +reoptRecoveryLoop :: + Args -> + SymAddrMap 64 -> + ReoptOptions -> + BSC.ByteString -> + SyscallPersonality -> + FunTypeMaps 64 -> + Macaw.DiscoveryState X86_64 -> + ReoptM X86_64 r (RecoverX86Output, RecoveredModule X86_64, ModuleConstraints X86_64) +reoptRecoveryLoop args symAddrMap rOpts funPrefix sysp debugTypeMap = go + where + go previousDiscState = do + discState <- + reoptRunDiscovery (getAddrSymMap symAddrMap) $ + Macaw.incCompleteDiscovery previousDiscState (roDiscoveryOptions rOpts) + + recoverX86Output <- doRecoverX86 funPrefix sysp symAddrMap debugTypeMap discState + let recMod = recoveredModule recoverX86Output + + let moduleConstraints = + genModuleConstraints + recMod + (Macaw.memory discState) + (traceTypeUnification args) + (traceConstraintOrigins args) + + -- Search for new candidate function entry points + let allBlocks = concatMap fnBlocks (recoveredDefs recMod) + let allStmts = concatMap fbStmts allBlocks + candidateAddresses <- concatMapM fnStmtHasCandidate allStmts + let candidateAddressesAsSegOffs = mapMaybe (asSegmentOff (memory discState)) candidateAddresses + -- NOTE: if we mark addresses that have already been tried (even if they + -- have failed), Macaw will not add them to the unexplored frontier, so + -- there is no risk here. + let newDiscState = markAddrsAsFunction UserRequest candidateAddressesAsSegOffs discState + let unexplored = newDiscState ^. Macaw.unexploredFunctions + + if null unexplored + then return (recoverX86Output, recMod, moduleConstraints) + else go newDiscState diff --git a/scratch/stripped/function_pointer.c b/scratch/stripped/function_pointer.c index db090559..d75b3b51 100644 --- a/scratch/stripped/function_pointer.c +++ b/scratch/stripped/function_pointer.c @@ -7,10 +7,10 @@ // int apply(int (*f)(int), int i) { return f(i); } int callee(int i) { - return i + 42; + return i; +} +int higher_order(int (*f)()) { return apply(f, 13); } +int callit() { + return higher_order(callee) + 2; // need +2 otherwise Macaw doesn't see RAX used... } -// int higher_order(int (*f)()) { return apply(f, 13); } -// int callit() { -// return higher_order(callee) + 2; // need +2 otherwise Macaw doesn't see RAX used... -// } int main() { return callee(0); } diff --git a/scratch/stripped/function_pointer_stripped b/scratch/stripped/function_pointer_stripped index 72743962..978cc7f9 100644 Binary files a/scratch/stripped/function_pointer_stripped and b/scratch/stripped/function_pointer_stripped differ diff --git a/scratch/stripped/function_pointer_unstripped b/scratch/stripped/function_pointer_unstripped index c11279fd..5ebaa2ee 100755 Binary files a/scratch/stripped/function_pointer_unstripped and b/scratch/stripped/function_pointer_unstripped differ diff --git a/src/Reopt.hs b/src/Reopt.hs index 5733e151..10a506d3 100644 --- a/src/Reopt.hs +++ b/src/Reopt.hs @@ -1711,10 +1711,10 @@ headerTypeMap :: ReoptM arch r (FunTypeMaps (Macaw.ArchAddrWidth arch)) headerTypeMap hdrAnn dynDepsTypeMap symAddrMap noretMap = do - trace "typeDefs" $ forM_ (Map.assocs (typeDefs hdrAnn)) $ \ (bs, ty) -> - trace (show bs <> " ↦ " <> show ty) (pure ()) - trace "funDecls" $ forM_ (Map.assocs (funDecls hdrAnn)) $ \ (bs, ty) -> - trace (show bs <> " ↦ " <> show ty) (pure ()) + -- trace "typeDefs" $ forM_ (Map.assocs (typeDefs hdrAnn)) $ \ (bs, ty) -> + -- trace (show bs <> " ↦ " <> show ty) (pure ()) + -- trace "funDecls" $ forM_ (Map.assocs (funDecls hdrAnn)) $ \ (bs, ty) -> + -- trace (show bs <> " ↦ " <> show ty) (pure ()) globalStepStarted Events.HeaderTypeInference @@ -2296,10 +2296,10 @@ x86ArgumentAnalysis sysp resolveFunName funTypeMap discState = do Either String [Some (Value X86_64 ids)] resolveFn callSite callRegs = do -- trace ("[!!!] Resolving " <> show callSite <> ", regs: " <> show callRegs) $ - trace ("FunMap:\n" <> show funTypeMap) $ + -- trace ("FunMap:\n" <> show funTypeMap) $ case x86CallRegs mem resolveFunName resolveFunType callSite callRegs of - Left rsn -> trace "Left" $ Left (ppRegisterUseErrorReason rsn) - Right r -> trace "Right" $ Right (callArgValues r) + Left rsn -> Left (ppRegisterUseErrorReason rsn) + Right r -> Right (callArgValues r) functionDemands (x86DemandInfo sysp) mem resolveFn $ filter shouldPropagate $ Macaw.exploredFunctions discState @@ -2316,9 +2316,11 @@ x86ArgumentAnalysis sysp resolveFunName funTypeMap discState = do printf "%s: Could not determine signature at callsite %s:\n %s" (Events.ppFnEntry dnm faddr) (Events.ppSegOff callSite) msg globalStepFinished Events.FunctionArgInference () - traceM $ "Dems: " <> show dems - let fty = inferFunctionTypeFromDemands dems - traceM $ "Inferred function type: " <> show fty + -- traceM "Demand set: " + -- forM_ (Map.assocs dems) $ \(off, dem) -> + -- traceM (show off <> " ↦ " <> show dem) + -- let fty = inferFunctionTypeFromDemands dems + -- traceM $ "Inferred function type: " <> show fty pure (inferFunctionTypeFromDemands dems, summaryFails) @@ -2340,9 +2342,9 @@ doRecoverX86 :: ReoptM X86_64 r RecoverX86Output doRecoverX86 unnamedFunPrefix sysp symAddrMap debugTypeMap discState = do - trace "Potential fun type map:" $ - forM_ (Map.assocs (addrTypeMap debugTypeMap)) $ \ (k, v) -> do - trace (show k <> " ↦ " <> show v) (pure ()) + -- trace "Potential fun type map:" $ + -- forM_ (Map.assocs (addrTypeMap debugTypeMap)) $ \ (k, v) -> do + -- trace (show k <> " ↦ " <> show v) (pure ()) -- Map names to known function types when we have explicit information. let @@ -2382,15 +2384,15 @@ doRecoverX86 unnamedFunPrefix sysp symAddrMap debugTypeMap discState = do let resolveFunName a = Map.lookup a funNameMap x86ArgumentAnalysis sysp resolveFunName knownFunTypeMap discState - let explored = - [ nm - | Some finfo <- Macaw.exploredFunctions discState - , let faddr = Macaw.discoveredFunAddr finfo - , let nm = Map.findWithDefault (error "Address undefined in funNameMap") faddr funNameMap - ] + -- let explored = + -- [ nm + -- | Some finfo <- Macaw.exploredFunctions discState + -- , let faddr = Macaw.discoveredFunAddr finfo + -- , let nm = Map.findWithDefault (error "Address undefined in funNameMap") faddr funNameMap + -- ] - trace "Functions explored by Macaw:" $ forM_ explored $ \ nm -> - trace (" → " <> show nm) (pure ()) + -- trace "Functions explored by Macaw:" $ forM_ explored $ \ nm -> + -- trace (" → " <> show nm) (pure ()) let funTypeMap :: Map BS.ByteString (MemSegmentOff 64, X86FunTypeInfo) @@ -2404,25 +2406,25 @@ doRecoverX86 unnamedFunPrefix sysp symAddrMap debugTypeMap discState = do , tp <- maybeToList $ Map.lookup faddr fDems ] - trace "fDems:" $ forM_ (Map.assocs fDems) $ \ (k, v) -> - trace (show k <> " ↦ " <> show v) (pure ()) - trace "Candidates were:" (pure ()) - forM_ - [ (faddr, nm) - | Some finfo <- Macaw.exploredFunctions discState - , let faddr = Macaw.discoveredFunAddr finfo - , let nm = Map.findWithDefault (error "Address undefined in funNameMap") faddr funNameMap - -- , tp <- maybeToList $ Map.lookup faddr fDems - ] $ \ (a, nm) -> - trace (show a <> " ↦ " <> show nm) (pure ()) - - trace "Actual fun type map:" $ - forM_ (Map.assocs funTypeMap) $ \ kv -> do - trace (show kv) (pure ()) + -- trace "fDems:" $ forM_ (Map.assocs fDems) $ \ (k, v) -> + -- trace (show k <> " ↦ " <> show v) (pure ()) + -- trace "Candidates were:" (pure ()) + -- forM_ + -- [ (faddr, nm) + -- | Some finfo <- Macaw.exploredFunctions discState + -- , let faddr = Macaw.discoveredFunAddr finfo + -- , let nm = Map.findWithDefault (error "Address undefined in funNameMap") faddr funNameMap + -- -- , tp <- maybeToList $ Map.lookup faddr fDems + -- ] $ \ (a, nm) -> + -- trace (show a <> " ↦ " <> show nm) (pure ()) + + -- trace "Actual fun type map:" $ + -- forM_ (Map.assocs funTypeMap) $ \ kv -> do + -- trace (show kv) (pure ()) fnDefsAndLogEvents <- fmap catMaybes $ forM (Macaw.exploredFunctions discState) $ \(Some finfo) -> do - trace ("Considering recovering " <> show (Macaw.discoveredFunAddr finfo)) $ do + -- trace ("Considering recovering " <> show (Macaw.discoveredFunAddr finfo)) $ do let faddr = Macaw.discoveredFunAddr finfo let _ = trace ("2: " <> show faddr) () let dnm = Macaw.discoveredFunSymbol finfo diff --git a/src/Reopt/CFG/FnRep.hs b/src/Reopt/CFG/FnRep.hs index fcf3e220..06b6baa9 100644 --- a/src/Reopt/CFG/FnRep.hs +++ b/src/Reopt/CFG/FnRep.hs @@ -234,11 +234,10 @@ data FnValue (arch :: Type) (tp :: M.Type) where -- | Symbol name of this function. BSC.ByteString -> FnValue arch (BVType (ArchAddrWidth arch)) - -- | Value is a function. - -- - -- The int should be in the range @[0..argCount)@, and the type repr - -- is the type of the argument. + -- | Value is a function argument. The int should be in the range + -- @[0..argCount)@, and the type repr is the type of the argument. FnArg :: !Int -> !(TypeRepr tp) -> FnValue arch tp + -- | Value is a constant pointer into the executable segment FnCodePointer :: Macaw.MemAddr (ArchAddrWidth arch) -> FnValue arch (BVType (ArchAddrWidth arch)) diff --git a/src/Reopt/CFG/LLVM.hs b/src/Reopt/CFG/LLVM.hs index 14c62674..1f0de6be 100644 --- a/src/Reopt/CFG/LLVM.hs +++ b/src/Reopt/CFG/LLVM.hs @@ -116,7 +116,6 @@ import Reopt.TypeInference.Solver ( pattern FUnknownTy, ) import Reopt.VCG.Annotations qualified as Ann -import Debug.Trace (trace) data LLVMBitCastInfo = LLVMBitCastInfo { llvmBitCastSrcType :: L.Type @@ -1334,7 +1333,8 @@ stmtToLLVM :: FnStmt arch -> BBLLVM arch () stmtToLLVM stmt = bbArchConstraints $ do - comment (show $ PP.pretty stmt) + -- This prints the FnStmt alongside the LLVM code, for debugging purposes + -- comment (show $ PP.pretty stmt) case stmt of FnComment _ -> return () FnAssignStmt (FnAssignment lhs rhs) -> do @@ -1901,7 +1901,7 @@ defineFunction aInfo archOps genOpts constraints f = do , needSwitchFailLabel = False , funBlockPhiMap = initResolvePhiMap f } - trace (show (PP.pretty (fnEntryBlock f))) (pure ()) + -- trace (show (PP.pretty (fnEntryBlock f))) (pure ()) let (postEntryFunState, entryBlockRes) = addLLVMBlock ctx initFunState (fnEntryBlock f) diff --git a/src/Reopt/CFG/Recovery.hs b/src/Reopt/CFG/Recovery.hs index fbfde896..488f24f7 100644 --- a/src/Reopt/CFG/Recovery.hs +++ b/src/Reopt/CFG/Recovery.hs @@ -89,7 +89,6 @@ import Data.Macaw.Discovery.State import Data.Macaw.Memory.Permissions qualified as Perm import Data.Macaw.Types hiding (Type) import Data.Macaw.Types qualified as M (Type) -import Debug.Trace import Data.Macaw.X86 ( x86DemandContext, x86_64CallParams, @@ -929,13 +928,11 @@ recoverStmt :: Stmt X86_64 ids -> Recover ids () recoverStmt stmtIdx stmt = do - trace ("Recovering " <> show (ppStmt pretty stmt)) (pure ()) + -- trace ("Recovering " <> show (ppStmt pretty stmt)) (pure ()) case stmt of AssignStmt asgn -> do - trace (show ("AssignStmt: " <> pretty asgn)) (pure ()) recoverAssign stmtIdx asgn WriteMem addr memRepr val -> do - trace (show ("WriteMem: " <> pretty addr)) (pure ()) ainfo <- popMemAccessInfo stmtIdx case ainfo of NotFrameAccess -> do @@ -959,7 +956,6 @@ recoverStmt stmtIdx stmt = do Comment msg -> do addFnStmt $ FnComment msg ExecArchStmt astmt0 -> do - trace "Arch Stmt..." (pure ()) -- Architecture-specific statements are assumed to always have side -- effects. astmt <- traverseF recoverValue astmt0 @@ -969,7 +965,6 @@ recoverStmt stmtIdx stmt = do -- Set recovery instruction offset modify $ \s -> s{rsBlockOff = o} ArchState _ _ -> do - trace "X" (pure ()) pure () ------------------------------------------------------------------------ @@ -1848,11 +1843,11 @@ x86CallRegs :: -- | Registers when call occurs. RegState X86Reg (Value X86_64 ids) -> Either RegisterUseErrorReason (CallRegs X86_64 ids) -x86CallRegs mem funNameMap funTypeMap callSite regs = do - nm <- trace ("Looking up name for call site " <> show callSite) $ do +x86CallRegs mem funNameMap funTypeMap _callSite regs = do + nm <- do let ipVal = regs ^. boundValue ip_reg - trace (show ipVal) $ case ipVal of - BVValue _ val -> trace "A" $ do + case ipVal of + BVValue _ val -> do let faddr = absoluteAddr (fromInteger val) callTarget <- case asSegmentOff mem faddr of @@ -1863,24 +1858,22 @@ x86CallRegs mem funNameMap funTypeMap callSite regs = do Right r Nothing -> Left $ Reason CallTargetNotFunctionEntryPoint (memWordValue (addrOffset faddr)) - RelocatableValue _ faddr -> trace "B" $ do + RelocatableValue _ faddr -> do callTarget <- case asSegmentOff mem faddr of Just r -> pure r Nothing -> Left $ Reason InvalidCallTargetAddress (memWordValue (addrOffset faddr)) - trace ("Call target: " <> show callTarget) $ case funNameMap callTarget of + case funNameMap callTarget of Just r -> - trace "Right" $ Right r + Right r Nothing -> - trace "Left" $ Left $ Reason CallTargetNotFunctionEntryPoint (memWordValue (addrOffset faddr)) - SymbolValue _ (SymbolRelocation nm _ver) -> trace "C" $ do + Left $ Reason CallTargetNotFunctionEntryPoint (memWordValue (addrOffset faddr)) + SymbolValue _ (SymbolRelocation nm _ver) -> do pure nm - v -> - trace "Indirect" $ v `seq` do - Left $ Reason IndirectCallTarget () - trace ("Looking up type for " <> show nm) $ case funTypeMap nm of + _ -> Left $ Reason IndirectCallTarget () + case funTypeMap nm of Just tp -> x86TranslateCallType mem nm regs tp - Nothing -> trace "No" $ Left $ Reason UnknownCallTargetArguments nm + Nothing -> Left $ Reason UnknownCallTargetArguments nm uninitRegs :: [Pair X86Reg (FnRegValue X86_64)] uninitRegs =