From 0d3cb65d8c0ac4d2adf7791d118ec57b5a36bb36 Mon Sep 17 00:00:00 2001 From: Valentin Robert Date: Tue, 29 Aug 2023 09:32:23 -0700 Subject: [PATCH] add-entry-point CLI argument Currently the only options allow to exclude some functions, or to only allow a specified set of functions. But sometimes, it's nice to be able to add functions to the discovery frontier without restricting what further functions may be discovered from there. This adds a new repeatable option `--add-entry-point` that lets one add an extra entry point to start discovery from, without precluding more entry points from being discovered. Also fixes a bunch of bad comments around those CLI arguments. --- reopt/Main_reopt.hs | 24 ++++++++++++++++++++---- src/Reopt.hs | 16 ++++++++++++---- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/reopt/Main_reopt.hs b/reopt/Main_reopt.hs index c5da08ed..b5548acd 100644 --- a/reopt/Main_reopt.hs +++ b/reopt/Main_reopt.hs @@ -189,8 +189,12 @@ data Args = Args -- ^ Path to llvm-mc -- -- Only used when generating object file from assembly generated by llc. + , addEntryPoints :: ![String] + -- ^ List of extra entry points to start discovery from. This does not + -- preclude other entry points from being added through discovery. , includeAddrs :: ![String] - -- ^ List of entry points for translation + -- ^ List of allowed function entry points for discovery. This prevents + -- exploration of entry points beyond the ones explicitly listed. , excludeAddrs :: ![String] -- ^ List of function entry points that we exclude for translation. , loadBaseAddress :: !(Maybe Word64) @@ -249,6 +253,7 @@ defaultArgs = , optLevel = 2 , slashPath = "slash" , llvmMcPath = "llvm-mc" + , addEntryPoints = [] , includeAddrs = [] , excludeAddrs = [] , loadBaseAddress = Nothing @@ -440,7 +445,16 @@ slashPathP = pathP "slash" #slashPath llvmMcPathP :: Parser String llvmMcPathP = pathP "llvm-mc" #llvmMcPath --- | Used to add a new function to ignore translation of. +-- | Used to add extra entry points to seed discovery. +addEntryPointP :: Parser String +addEntryPointP = + strOption + ( long "add-entry-point" + <> metavar "ADDR" + <> help "Extra address to seed discovery frontier with (may be repeated)" + ) + +-- | Used to specify the only addresses we care to explore. includeAddrP :: Parser String includeAddrP = strOption @@ -449,7 +463,7 @@ includeAddrP = <> help "Address of function to include in analysis (may be repeated)" ) --- | Used to add a new function to ignore translation of. +-- | Used to specify addresses we wish **not** to explore. excludeAddrP :: Parser String excludeAddrP = strOption @@ -606,6 +620,7 @@ arguments = <*> optLevelP <*> slashPathP <*> llvmMcPathP + <*> many addEntryPointP <*> many includeAddrP <*> many excludeAddrP <*> optional loadBaseAddressP @@ -674,7 +689,8 @@ argsReoptOptions args = do gdbDebugDirs <- getGdbDebugInfoDirs True pure $ ReoptOptions - { roIncluded = includeAddrs args + { roExtraEntryPoints = addEntryPoints args + , roIncluded = includeAddrs args , roExcluded = excludeAddrs args , roVerboseMode = True , roDiscoveryOptions = args ^. #discOpts diff --git a/src/Reopt.hs b/src/Reopt.hs index f2efe30e..0d4af755 100644 --- a/src/Reopt.hs +++ b/src/Reopt.hs @@ -440,7 +440,9 @@ reoptDefaultDiscoveryOptions = -- | Information from user to control which addresses to include and -- exclude. data ReoptOptions = ReoptOptions - { roIncluded :: [String] + { roExtraEntryPoints :: [String] + -- ^ Additional entry points we want Reopt to consider + , roIncluded :: [String] -- ^ Symbols/addresses user wanted included , roExcluded :: [String] -- ^ Symbols/addresses user wanted exluded. @@ -462,7 +464,8 @@ data ReoptOptions = ReoptOptions defaultReoptOptions :: ReoptOptions defaultReoptOptions = ReoptOptions - { roIncluded = [] + { roExtraEntryPoints = [] + , roIncluded = [] , roExcluded = [] , roVerboseMode = False , roDiscoveryOptions = reoptDefaultDiscoveryOptions @@ -970,7 +973,7 @@ initDiscState mem initPoints regInfo symAddrMap explorePred ainfo reoptOpts = do let resolveEntry qsn | ".cold" `BS.isSuffixOf` qsnBytes qsn = Nothing | otherwise = Just Macaw.MayReturnFun - let entryPoints0 = + let noReturnEntryPoints = Map.mapMaybe resolveEntry (samAddrMap symAddrMap) & addKnownFn symAddrMap "abort" Macaw.NoReturnFun & addKnownFn symAddrMap "exit" Macaw.NoReturnFun @@ -980,7 +983,9 @@ initDiscState mem initPoints regInfo symAddrMap explorePred ainfo reoptOpts = do & addKnownFn symAddrMap "__malloc_assert" Macaw.NoReturnFun & addKnownFn symAddrMap "__stack_chk_fail" Macaw.NoReturnFun & addKnownFn symAddrMap "_ZSt9terminatev" Macaw.NoReturnFun - let entryPoints = foldl (\m a -> Map.insert a Macaw.MayReturnFun m) entryPoints0 initPoints + extraEntryPoints <- mapM (resolveSymAddr mem regInfo symAddrMap) (roExtraEntryPoints reoptOpts) + let mayReturnEntryPoints = initPoints ++ extraEntryPoints + let entryPoints = foldl (\m a -> Map.insert a Macaw.MayReturnFun m) noReturnEntryPoints mayReturnEntryPoints case (roIncluded reoptOpts, roExcluded reoptOpts) of ([], excludeNames) -> do excludeAddrs <- mapM (resolveSymAddr mem regInfo symAddrMap) excludeNames @@ -997,6 +1002,9 @@ initDiscState mem initPoints regInfo symAddrMap explorePred ainfo reoptOpts = do let initState = Macaw.emptyDiscoveryState mem (getAddrSymMap symAddrMap) ainfo & Macaw.trustedFunctionEntryPoints .~ entryPoints + -- NOTE (val) It looks a bit weird that we're not also checking + -- `explorePred a` here. Not sure that's intended, and it's + -- definitely not documented. & Macaw.exploreFnPred .~ (`Set.member` s) & Macaw.markAddrsAsFunction Macaw.InitAddr s pure $! initState