diff --git a/deps/macaw b/deps/macaw index 21e3b8f4..e05a9db2 160000 --- a/deps/macaw +++ b/deps/macaw @@ -1 +1 @@ -Subproject commit 21e3b8f461ac4eb64287ca696443d5e421420d32 +Subproject commit e05a9db243ce28cc9115d33d490bdd56c3a44dc1 diff --git a/reopt-explore/LLVM.hs b/reopt-explore/LLVM.hs index 746d0ca2..d212a813 100644 --- a/reopt-explore/LLVM.hs +++ b/reopt-explore/LLVM.hs @@ -182,7 +182,7 @@ exploreBinary args opts totalCount (index, fPath) = do recoverLogEvent summaryRef statsRef let annDecl = emptyAnnDeclarations hdrInfo <- handleEitherStringWithExit $ parseElfHeaderInfo64 fPath bs - (os, _, recovOut, constraints) <- + (os, _, recovOut, _, constraints) <- -- (os, _, recMod, constraints, _, logEvents) <- handleEitherWithExit =<< runReoptM logger (recoverX86Elf lOpts opts annDecl unnamedFunPrefix hdrInfo) diff --git a/reopt-explore/Residual.hs b/reopt-explore/Residual.hs index 64dd7a35..5f0cd5ab 100644 --- a/reopt-explore/Residual.hs +++ b/reopt-explore/Residual.hs @@ -151,7 +151,7 @@ performRecovery residualOpts reoptOpts (_idx, fPath) = do >>= either (error . show) return hdrInfo <- handleEitherStringWithExit $ parseElfHeaderInfo64 fPath bs logger <- createLogger reoptOpts fPath - (_os, ds, recovOut, _) <- + (_os, ds, recovOut, _, _) <- handleEitherWithExit =<< runReoptM logger diff --git a/reopt.cabal b/reopt.cabal index 47446664..1137e4c5 100644 --- a/reopt.cabal +++ b/reopt.cabal @@ -158,6 +158,7 @@ executable reopt containers, directory, elf-edit, + extra, filepath, generic-lens, lens, diff --git a/reopt/Main_reopt.hs b/reopt/Main_reopt.hs index fc248b2b..77f567c4 100644 --- a/reopt/Main_reopt.hs +++ b/reopt/Main_reopt.hs @@ -18,44 +18,32 @@ import Data.ElfEdit ( ) import Data.ElfEdit qualified as Elf import Data.Generics.Labels () -import Data.IORef ( - IORef, - modifyIORef', - newIORef, - readIORef, - ) -import Data.List ( - intercalate, - nub, - stripPrefix, - (\\), - ) -import Data.Macaw.Analysis.RegisterUse ( - ppRegisterUseErrorReason, - ruReason, - ) -import Data.Macaw.DebugLogging -import Data.Macaw.Discovery ( - DiscoveryOptions (..), - defaultDiscoveryOptions, - memory, - ppDiscoveryStateBlocks, - ) -import Data.Maybe ( - fromMaybe, - isJust, - isNothing, - ) -import Data.Parameterized.Some (Some (Some)) +import Data.IORef (IORef, modifyIORef', newIORef, readIORef) +import Data.List qualified as List +import Data.Maybe (fromMaybe, isJust, isNothing) import Data.Text qualified as T import Data.Version (Version (versionBranch)) import Data.Word (Word64) import GHC.Generics (Generic) import Numeric (readHex) import Options.Applicative -import Paths_reopt (version) import Prettyprinter qualified as PP import Prettyprinter.Render.Text qualified as PP +import System.Exit (exitFailure) +import System.FilePath (splitFileName) +import System.IO qualified as IO +import System.IO.Error ( + ioeGetErrorString, + ioeGetErrorType, + isUserError, + ) +import Text.Printf (printf) + +import Data.Macaw.Analysis.RegisterUse qualified as Macaw +import Data.Macaw.DebugLogging qualified as Macaw +import Data.Macaw.Discovery qualified as Macaw +import Data.Parameterized.Some (Some (Some)) + import Reopt import Reopt.ELFArchInfo (getElfArchInfo) import Reopt.EncodeInvariants ( @@ -77,11 +65,7 @@ import Reopt.Occam ( toOccamManifest, ) import Reopt.Server (runServer) -import Reopt.TypeInference.ConstraintGen ( - ModuleConstraints (mcNamedTypes, mcWarnings), - genModuleConstraints, - showInferredTypes, - ) +import Reopt.TypeInference.ConstraintGen (ModuleConstraints (..), genModuleConstraints) import Reopt.TypeInference.Pretty (ppFunction) import Reopt.Utils.Exit ( checkedReadFile, @@ -96,20 +80,13 @@ import Reopt.X86 ( osLinkName, osPersonality, ) -import System.Exit (exitFailure) -import System.FilePath (splitFileName) -import System.IO qualified as IO -import System.IO.Error ( - ioeGetErrorString, - ioeGetErrorType, - isUserError, - ) -import Text.Printf (printf) + +import Paths_reopt (version) reoptVersion :: String reoptVersion = printf "Reopt binary reoptimizer (reopt) %s" v where - v = intercalate "." $ map (printf "%d") $ versionBranch version + v = List.intercalate "." $ map (printf "%d") $ versionBranch version -- | Write a builder object to a file if defined or standard out if not. writeOutput :: Maybe FilePath -> (IO.Handle -> IO a) -> IO a @@ -125,7 +102,7 @@ unintercalate punct = reverse . go [] "" go acc "" [] = acc go acc thisAcc [] = reverse thisAcc : acc go acc thisAcc str'@(x : xs) - | Just sfx <- stripPrefix punct str' = go (reverse thisAcc : acc) "" sfx + | Just sfx <- List.stripPrefix punct str' = go (reverse thisAcc : acc) "" sfx | otherwise = go acc (x : thisAcc) xs ------------------------------------------------------------------------ @@ -155,7 +132,7 @@ data Action -- | Command line arguments. data Args = Args { reoptAction :: !Action - , debugKeys :: [DebugClass] + , debugKeys :: [Macaw.DebugClass] -- ^ Debug information TODO: See if we can omit this. , outputPath :: !(Maybe FilePath) -- ^ Path to output @@ -199,7 +176,7 @@ data Args = Args -- ^ List of function entry points that we exclude for translation. , loadBaseAddress :: !(Maybe Word64) -- ^ Address to load binary at if relocatable. - , discOpts :: !DiscoveryOptions + , discOpts :: !Macaw.DiscoveryOptions -- ^ Options affecting discovery , unnamedFunPrefix :: !BS.ByteString -- ^ Prefix for unnamed functions identified in code discovery. @@ -359,22 +336,22 @@ llvmVersionP = Just c -> pure c Nothing -> Left $ printf "Unsupported LLVM version %s" s -parseDebugFlags :: [DebugClass] -> String -> Either String [DebugClass] +parseDebugFlags :: [Macaw.DebugClass] -> String -> Either String [Macaw.DebugClass] parseDebugFlags oldKeys cl = case cl of '-' : cl' -> do ks <- getKeys cl' - return (oldKeys \\ ks) + return (oldKeys List.\\ ks) cl' -> do ks <- getKeys cl' - return (nub $ oldKeys ++ ks) + return (List.nub $ oldKeys ++ ks) where - getKeys "all" = Right allDebugKeys - getKeys s = case parseDebugKey s of + getKeys "all" = Right Macaw.allDebugKeys + getKeys s = case Macaw.parseDebugKey s of Nothing -> Left $ "Unknown debug key `" ++ s ++ "'" Just k -> Right [k] -debugKeysP :: Parser [DebugClass] +debugKeysP :: Parser [Macaw.DebugClass] debugKeysP = option (eitherReader validate) $ long "debug" @@ -387,7 +364,7 @@ debugKeysP = ++ "with comma-separated keys. Keys may be preceded by a '-' which " ++ "means disable that key.\n" ++ "Supported keys: all, " - ++ intercalate ", " (map debugKeyName allDebugKeys) + ++ List.intercalate ", " (map Macaw.debugKeyName Macaw.allDebugKeys) ) where validate s = do @@ -624,9 +601,9 @@ arguments = <*> many includeAddrP <*> many excludeAddrP <*> optional loadBaseAddressP - <*> ( DiscoveryOptions + <*> ( Macaw.DiscoveryOptions -- This was never exposed to the CLI - (exploreFunctionSymbols defaultDiscoveryOptions) + (Macaw.exploreFunctionSymbols Macaw.defaultDiscoveryOptions) <$> exploreCodeAddrInMemP <*> logAtAnalyzeFunctionP <*> logAtAnalyzeBlockP @@ -727,7 +704,7 @@ showCFG args elfPath = do initState <- reoptRunInit $ doInit (loadOptions args) hdrInfo ainfo pltFn reoptOpts (_, discState) <- doDiscovery hdrAnn hdrInfo ainfo initState reoptOpts -- Print discovery - pure $ show $ ppDiscoveryStateBlocks discState + pure $ show $ Macaw.ppDiscoveryStateBlocks discState handleEitherWithExit mr -- | Show the constraints generated by the type inference step. @@ -760,7 +737,7 @@ showConstraints args elfPath = do doRecoverX86 funPrefix sysp symAddrMap debugTypeMap discState let recMod = recoveredModule recoverX86Output - pure $ genModuleConstraints recMod (memory discState) (traceTypeUnification args) (traceConstraintOrigins args) + pure $ genModuleConstraints recMod (Macaw.memory discState) (traceTypeUnification args) (traceConstraintOrigins args) mc <- handleEitherWithExit mr @@ -803,7 +780,7 @@ collectInvariants ref evt = do let enc = encodeInvariantMsg addr invMap seq enc $ modifyIORef' ref (enc :) ReoptFunStepFailed InvariantInference (FunId addr _mnm) e -> do - let enc = encodeInvariantFailedMsg addr (ppRegisterUseErrorReason (ruReason e)) + let enc = encodeInvariantFailedMsg addr (Macaw.ppRegisterUseErrorReason (Macaw.ruReason e)) seq enc $ modifyIORef' ref (enc :) _ -> do pure () @@ -856,28 +833,33 @@ performReopt args elfPath = do funPrefix :: BSC.ByteString funPrefix = unnamedFunPrefix args - (os, initState) <- reoptX86Init (loadOptions args) rOpts origElf - let symAddrMap = initDiscSymAddrMap initState + (os, symAddrMap, debugTypeMap, discState) <- + reoptPrepareForRecovery (loadOptions args) rOpts hdrAnn funPrefix origElf when (shouldRecover args) $ checkSymbolUnused funPrefix symAddrMap - let ainfo = osArchitectureInfo os - (debugTypeMap, discState) <- doDiscovery hdrAnn origElf ainfo initState rOpts - case cfgExportPath args of Nothing -> pure () Just path -> do reoptWrite CfgFileType path $ \h -> do - PP.hPutDoc h (ppDiscoveryStateBlocks discState) + PP.hPutDoc h (Macaw.ppDiscoveryStateBlocks discState) unless (shouldRecover args) $ reoptEndNow () let sysp = osPersonality os - recoverX86Output <- - doRecoverX86 funPrefix sysp symAddrMap debugTypeMap discState - let recMod = recoveredModule recoverX86Output + (_, recoverX86Output, recMod, moduleConstraints) <- + reoptRecoveryLoop symAddrMap rOpts funPrefix sysp debugTypeMap discState + + -- forM_ (recoveredDefs recMod) $ \ f -> do + -- trace "FUNCTION" (pure ()) + -- trace (show (PP.pretty f)) (pure ()) + let relinkerInfo = mergeRelations recoverX86Output + case relinkerInfoExportPath args of + Nothing -> pure () + Just path -> do + reoptWriteByteString RelinkerInfoFileType path (Aeson.encode relinkerInfo) case fnsExportPath args of Nothing -> pure () @@ -893,19 +875,6 @@ performReopt args elfPath = do let buffer = AE.encodingToLazyByteString (AE.list id invariants) reoptWriteByteString AnnotationsFileType path buffer - case relinkerInfoExportPath args of - Nothing -> pure () - Just path -> do - reoptWriteByteString RelinkerInfoFileType path (Aeson.encode relinkerInfo) - - -- Generate constraints - let moduleConstraints = - genModuleConstraints - recMod - (memory discState) - (traceTypeUnification args) - (traceConstraintOrigins args) - -- FIXME: move let prettyDefs = @@ -1053,17 +1022,18 @@ displayConstraintsInformation :: ModuleConstraints arch -> IO () displayConstraintsInformation moduleConstraints = do putStrLn "Warnings" putStrLn (unlines (map ((++) "\t" . show) (mcWarnings moduleConstraints))) - -- putStrLn "Constraints (generated)" - -- putStrLn (unlines (map (show . PP.indent 4 . PP.pretty) (mcConstraints moduleConstraints))) - -- putStrLn "Constraints (solving)" - -- putStrLn (unlines (map (show . PP.indent 4 . PP.pretty) (mcTyConstraints moduleConstraints))) - putStrLn "Inferred types" - putStrLn (showInferredTypes moduleConstraints) + +-- putStrLn "Constraints (generated)" +-- putStrLn (unlines (map (show . PP.indent 4 . PP.pretty) (mcConstraints moduleConstraints))) +-- putStrLn "Constraints (solving)" +-- putStrLn (unlines (map (show . PP.indent 4 . PP.pretty) (mcTyConstraints moduleConstraints))) +-- putStrLn "Inferred types" +-- putStrLn (showInferredTypes moduleConstraints) main' :: IO () main' = do args <- getCommandLineArgs - setDebugKeys (args ^. #debugKeys) + Macaw.setDebugKeys (args ^. #debugKeys) case args ^. #reoptAction of DumpDisassembly file -> dumpDisassembly args file ShowCFG file -> diff --git a/src/Reopt.hs b/src/Reopt.hs index a148b32b..df7f9408 100644 --- a/src/Reopt.hs +++ b/src/Reopt.hs @@ -41,6 +41,7 @@ module Reopt ( InitDiscovery, initDiscSymAddrMap, doDiscovery, + reoptRunDiscovery, -- * Debug info discovery reoptHomeDir, @@ -54,6 +55,8 @@ module Reopt ( Reopt.TypeInference.HeaderTypes.emptyAnnDeclarations, RecoveredModule, recoveredDefs, + reoptPrepareForRecovery, + reoptRecoveryLoop, resolveHeader, updateRecoveredModule, @@ -235,7 +238,9 @@ import Reopt.ArgResolver ( ) import Reopt.CFG.FnRep ( FnArchStmt, - FnValue (FnFunctionEntryValue), + FnBlock (fbStmts), + FnStmt (FnCall), + FnValue (FnCodePointer, FnFunctionEntryValue), FoldFnValue (foldFnValue), Function (fnAddr, fnName), FunctionDecl ( @@ -309,7 +314,7 @@ import Reopt.TypeInference.FunTypeMaps ( ) import Reopt.TypeInference.Header (parseHeader) import Reopt.TypeInference.HeaderTypes ( - AnnDeclarations (funDecls), + AnnDeclarations (..), AnnFunArg (..), AnnFunType (..), AnnType (..), @@ -361,6 +366,9 @@ import Text.LLVM.PP qualified as LPP import Text.PrettyPrint.HughesPJ qualified as HPJ import Text.Printf (printf) +import Control.Monad.Extra (concatForM, concatMapM) +import Data.Macaw.CFG qualified as Macaw +import Debug.Trace import Reopt.ELFArchInfo ( InitDiscM, ProcessPLTEntries, @@ -1717,6 +1725,11 @@ headerTypeMap :: Map (ArchSegmentOff arch) Macaw.NoReturnFunStatus -> ReoptM arch r (FunTypeMaps (Macaw.ArchAddrWidth arch)) headerTypeMap hdrAnn dynDepsTypeMap symAddrMap noretMap = do + -- trace "typeDefs" $ forM_ (Map.assocs (typeDefs hdrAnn)) $ \ (bs, ty) -> + -- trace (show bs <> " ↦ " <> show ty) (pure ()) + -- trace "funDecls" $ forM_ (Map.assocs (funDecls hdrAnn)) $ \ (bs, ty) -> + -- trace (show bs <> " ↦ " <> show ty) (pure ()) + globalStepStarted Events.HeaderTypeInference let voidPtrType = PtrAnnType VoidAnnType @@ -2099,6 +2112,8 @@ resolveArgType nm tp0 = addGPReg64 nm TypedefAnnType _ tp -> resolveArgType nm tp + FunPtrAnnType _ret _args -> + addGPReg64 nm -- | This parses the types extracted from header function arguments to the -- machine code registers that the function will expect. @@ -2109,15 +2124,15 @@ argsToRegisters :: V.Vector AnnFunArg -> ArgResolver m () argsToRegisters args = go 0 - where - go :: Int -> ArgResolver m () - go argIx - | argIx >= V.length args = pure () - | otherwise = do - let arg = args V.! argIx - let nm = fromMaybe ("arg" ++ show argIx) (funArgName arg) - resolveArgType nm (funArgType arg) - go (argIx + 1) + where + go :: Int -> ArgResolver m () + go argIx + | argIx >= V.length args = pure () + | otherwise = do + let arg = args V.! argIx + let nm = fromMaybe ("arg" ++ show argIx) (funArgName arg) + resolveArgType nm (funArgType arg) + go (argIx + 1) parseReturnType :: AnnType -> Either ArgResolverError [Some X86RetInfo] parseReturnType tp0 = @@ -2130,6 +2145,7 @@ parseReturnType tp0 = DoubleAnnType -> Right [Some (RetZMM ZMMDouble 0)] PtrAnnType _ -> Right [Some (RetBV64 F.RAX)] TypedefAnnType _ tp -> parseReturnType tp + FunPtrAnnType{} -> Left $ UnsupportedReturnType (ppAnnType tp0) resolveAnnFunType :: Monad m => @@ -2263,7 +2279,7 @@ x86ArgumentAnalysis :: -- | Map from addresses to function name. (MemSegmentOff 64 -> Maybe BSC.ByteString) -> -- | Map from address to the name at that address along with type - (BSC.ByteString -> Maybe X86FunTypeInfo) -> + Map BSC.ByteString (MemSegmentOff 64, X86FunTypeInfo) -> Macaw.DiscoveryState X86_64 -> ReoptM X86_64 @@ -2271,7 +2287,8 @@ x86ArgumentAnalysis :: ( Map (MemSegmentOff 64) X86FunTypeInfo , Map (MemSegmentOff 64) (FunctionArgAnalysisFailure 64) ) -x86ArgumentAnalysis sysp resolveFunName resolveFunType discState = do +x86ArgumentAnalysis sysp resolveFunName funTypeMap discState = do + let resolveFunType fnm = snd <$> Map.lookup fnm funTypeMap -- Generate map from symbol names to known type. let mem = Macaw.memory discState -- Compute only those functions whose types are not known. @@ -2292,6 +2309,8 @@ x86ArgumentAnalysis sysp resolveFunName resolveFunType discState = do RegState X86Reg (Value X86_64 ids) -> Either String [Some (Value X86_64 ids)] resolveFn callSite callRegs = do + -- trace ("[!!!] Resolving " <> show callSite <> ", regs: " <> show callRegs) $ + -- trace ("FunMap:\n" <> show funTypeMap) $ case x86CallRegs mem resolveFunName resolveFunType callSite callRegs of Left rsn -> Left (ppRegisterUseErrorReason rsn) Right r -> Right (callArgValues r) @@ -2311,6 +2330,12 @@ x86ArgumentAnalysis sysp resolveFunName resolveFunType discState = do printf "%s: Could not determine signature at callsite %s:\n %s" (Events.ppFnEntry dnm faddr) (Events.ppSegOff callSite) msg globalStepFinished Events.FunctionArgInference () + -- traceM "Demand set: " + -- forM_ (Map.assocs dems) $ \(off, dem) -> + -- traceM (show off <> " ↦ " <> show dem) + -- let fty = inferFunctionTypeFromDemands dems + -- traceM $ "Inferred function type: " <> show fty + pure (inferFunctionTypeFromDemands dems, summaryFails) data RecoverX86Output = RecoverX86Output @@ -2330,6 +2355,10 @@ doRecoverX86 :: Macaw.DiscoveryState X86_64 -> ReoptM X86_64 r RecoverX86Output doRecoverX86 unnamedFunPrefix sysp symAddrMap debugTypeMap discState = do + -- trace "Potential fun type map:" $ + -- forM_ (Map.assocs (addrTypeMap debugTypeMap)) $ \ (k, v) -> do + -- trace (show k <> " ↦ " <> show v) (pure ()) + -- Map names to known function types when we have explicit information. let knownFunTypeMap :: Map BS.ByteString (MemSegmentOff 64, X86FunTypeInfo) @@ -2366,8 +2395,17 @@ doRecoverX86 unnamedFunPrefix sysp symAddrMap debugTypeMap discState = do -- Infer registers each function demands. (fDems, summaryFailures) <- do let resolveFunName a = Map.lookup a funNameMap - let resolveFunType fnm = snd <$> Map.lookup fnm knownFunTypeMap - x86ArgumentAnalysis sysp resolveFunName resolveFunType discState + x86ArgumentAnalysis sysp resolveFunName knownFunTypeMap discState + + -- let explored = + -- [ nm + -- | Some finfo <- Macaw.exploredFunctions discState + -- , let faddr = Macaw.discoveredFunAddr finfo + -- , let nm = Map.findWithDefault (error "Address undefined in funNameMap") faddr funNameMap + -- ] + + -- trace "Functions explored by Macaw:" $ forM_ explored $ \ nm -> + -- trace (" → " <> show nm) (pure ()) let funTypeMap :: Map BS.ByteString (MemSegmentOff 64, X86FunTypeInfo) @@ -2381,9 +2419,27 @@ doRecoverX86 unnamedFunPrefix sysp symAddrMap debugTypeMap discState = do , tp <- maybeToList $ Map.lookup faddr fDems ] + -- trace "fDems:" $ forM_ (Map.assocs fDems) $ \ (k, v) -> + -- trace (show k <> " ↦ " <> show v) (pure ()) + -- trace "Candidates were:" (pure ()) + -- forM_ + -- [ (faddr, nm) + -- | Some finfo <- Macaw.exploredFunctions discState + -- , let faddr = Macaw.discoveredFunAddr finfo + -- , let nm = Map.findWithDefault (error "Address undefined in funNameMap") faddr funNameMap + -- -- , tp <- maybeToList $ Map.lookup faddr fDems + -- ] $ \ (a, nm) -> + -- trace (show a <> " ↦ " <> show nm) (pure ()) + + -- trace "Actual fun type map:" $ + -- forM_ (Map.assocs funTypeMap) $ \ kv -> do + -- trace (show kv) (pure ()) + fnDefsAndLogEvents <- fmap catMaybes $ forM (Macaw.exploredFunctions discState) $ \(Some finfo) -> do + -- trace ("Considering recovering " <> show (Macaw.discoveredFunAddr finfo)) $ do let faddr = Macaw.discoveredFunAddr finfo + let _ = trace ("2: " <> show faddr) () let dnm = Macaw.discoveredFunSymbol finfo let fnId = Events.funId faddr dnm let nm = Map.findWithDefault (error "Address undefined in funNameMap") faddr funNameMap @@ -2574,7 +2630,89 @@ checkSymbolUnused unnamedFunPrefix symAddrMap = do "No symbol in the binary may start with the prefix %d." (BSC.unpack unnamedFunPrefix) --- | Analyze an elf binary to extract information. +-- | Checks whether a given `FnStmt` has a potential code pointer address we +-- want to try and investigate. We intended for such addresses to be identified +-- via type reconstruction, but it turns out that we can just get away with +-- identifying code-pointer-sized values pointing into an executable segment. +-- However, we could double-check with the results of type reconstruction to +-- potentially avoid some spurious pointers. +fnStmtHasCandidate :: + MemWidth (Macaw.ArchAddrWidth arch) => + Monad m => + FnStmt arch -> + m [Macaw.ArchMemAddr arch] +fnStmtHasCandidate (FnCall _fn args _mRet) = do + concatForM args $ \(Some fnValue) -> + case fnValue of + FnCodePointer addr -> return [addr] + _ -> return [] +fnStmtHasCandidate _ = return [] + +-- | Repeatedly perform Macaw recovery and discover new potential function entry +-- points. +reoptRecoveryLoop :: + SymAddrMap 64 -> + ReoptOptions -> + BSC.ByteString -> + SyscallPersonality -> + FunTypeMaps 64 -> + Macaw.DiscoveryState X86_64 -> + ReoptM + X86_64 + r + ( Macaw.DiscoveryState X86_64 + , RecoverX86Output + , RecoveredModule X86_64 + , ModuleConstraints X86_64 + ) +reoptRecoveryLoop symAddrMap rOpts funPrefix sysp debugTypeMap = go + where + go previousDiscState = do + discState <- + reoptRunDiscovery (getAddrSymMap symAddrMap) $ + Macaw.incCompleteDiscovery previousDiscState (roDiscoveryOptions rOpts) + + recoverX86Output <- doRecoverX86 funPrefix sysp symAddrMap debugTypeMap discState + let recMod = recoveredModule recoverX86Output + + let moduleConstraints = + genModuleConstraints + recMod + (Macaw.memory discState) + (roTraceUnification rOpts) + (roTraceConstraintOrigins rOpts) + + -- Search for new candidate function entry points + let allBlocks = concatMap fnBlocks (recoveredDefs recMod) + let allStmts = concatMap fbStmts allBlocks + candidateAddresses <- concatMapM fnStmtHasCandidate allStmts + let candidateAddressesAsSegOffs = mapMaybe (asSegmentOff (Macaw.memory discState)) candidateAddresses + -- NOTE: if we mark addresses that have already been tried (even if they + -- have failed), Macaw will not add them to the unexplored frontier, so + -- there is no risk here. + let newDiscState = Macaw.markAddrsAsFunction Macaw.UserRequest candidateAddressesAsSegOffs discState + let unexplored = newDiscState ^. Macaw.unexploredFunctions + + if null unexplored + then traceM "NOLOOP" >> return (newDiscState, recoverX86Output, recMod, moduleConstraints) + else traceM "LOOP" >> go newDiscState + +reoptPrepareForRecovery :: + LoadOptions -> + ReoptOptions -> + AnnDeclarations -> + BSC.ByteString -> + Elf.ElfHeaderInfo 64 -> + ReoptM X86_64 r (X86OS, SymAddrMap 64, FunTypeMaps 64, Macaw.DiscoveryState X86_64) +reoptPrepareForRecovery loadOpts reoptOpts hdrAnn unnamedFunPrefix hdrInfo = do + (os, initState) <- reoptX86Init loadOpts reoptOpts hdrInfo + let symAddrMap = initDiscSymAddrMap initState + checkSymbolUnused unnamedFunPrefix symAddrMap + let ainfo = osArchitectureInfo os + (debugTypeMap, discState) <- doDiscovery hdrAnn hdrInfo ainfo initState reoptOpts + return (os, symAddrMap, debugTypeMap, discState) + +-- | Analyze an ELF binary to extract information. recoverX86Elf :: -- | Option to load the binary at the given address LoadOptions -> @@ -2590,30 +2728,15 @@ recoverX86Elf :: ( X86OS , Macaw.DiscoveryState X86_64 , RecoverX86Output + , RecoveredModule X86_64 , ModuleConstraints X86_64 ) recoverX86Elf loadOpts reoptOpts hdrAnn unnamedFunPrefix hdrInfo = do - (os, initState) <- reoptX86Init loadOpts reoptOpts hdrInfo - let symAddrMap = initDiscSymAddrMap initState - checkSymbolUnused unnamedFunPrefix symAddrMap - - let ainfo = osArchitectureInfo os - (debugTypeMap, discState) <- - doDiscovery hdrAnn hdrInfo ainfo initState reoptOpts - + (os, symAddrMap, debugTypeMap, discState) <- reoptPrepareForRecovery loadOpts reoptOpts hdrAnn unnamedFunPrefix hdrInfo let sysp = osPersonality os - recoverX86Output <- - doRecoverX86 unnamedFunPrefix sysp symAddrMap debugTypeMap discState - - let recMod = recoveredModule recoverX86Output - let constraints = - genModuleConstraints - recMod - (Macaw.memory discState) - (roTraceUnification reoptOpts) - (roTraceConstraintOrigins reoptOpts) - - pure (os, discState, recoverX86Output, constraints) + (finalDiscState, recoverX86Output, recMod, moduleConstraints) <- + reoptRecoveryLoop symAddrMap reoptOpts unnamedFunPrefix sysp debugTypeMap discState + pure (os, finalDiscState, recoverX86Output, recMod, moduleConstraints) -------------------------------------------------------------------------------- -- Compile the LLVM @@ -2725,7 +2848,8 @@ renderLLVMIR llvmGenOpt llvmConfig os recMod constraints = -- Generate LLVM module let archOps = LLVM.x86LLVMArchOps (show os) - (m, ann, ext, logEvents) = moduleForFunctions archOps llvmGenOpt recMod constraints + aInfo = osArchitectureInfo os + (m, ann, ext, logEvents) = moduleForFunctions aInfo archOps llvmGenOpt recMod constraints -- Render into LLVM out = HPJ.fullRender HPJ.PageMode 10000 1 pp mempty (ppLLVM llvmConfig m) in diff --git a/src/Reopt/CFG/FnRep.hs b/src/Reopt/CFG/FnRep.hs index 639dc2a0..49605a6c 100644 --- a/src/Reopt/CFG/FnRep.hs +++ b/src/Reopt/CFG/FnRep.hs @@ -69,7 +69,7 @@ import Data.Macaw.CFG ( ppApp, sexpr, ) -import Data.Macaw.Memory +import Data.Macaw.Memory qualified as Macaw import Data.Macaw.Types ( BVType, BoolType, @@ -234,11 +234,18 @@ data FnValue (arch :: Type) (tp :: M.Type) where -- | Symbol name of this function. BSC.ByteString -> FnValue arch (BVType (ArchAddrWidth arch)) - -- | Value is a function. - -- - -- The int should be in the range @[0..argCount)@, and the type repr - -- is the type of the argument. + -- | Value is a function argument. The int should be in the range + -- @[0..argCount)@, and the type repr is the type of the argument. FnArg :: !Int -> !(TypeRepr tp) -> FnValue arch tp + -- | Value is a constant pointer into the executable segment + FnCodePointer :: + Macaw.MemAddr (ArchAddrWidth arch) -> + FnValue arch (BVType (ArchAddrWidth arch)) + -- | Value is a constant pointer into the executable segment + FnTypedCodePointer :: + Macaw.MemAddr (ArchAddrWidth arch) -> + !(FunctionType arch) -> + FnValue arch (BVType (ArchAddrWidth arch)) ------------------------------------------------------------------------ -- FoldFnValue @@ -260,12 +267,12 @@ class FoldFnValue (v :: Type -> Type) where type FnArchConstraints arch = ( IsArchFn (ArchFn arch) , IsArchStmt (FnArchStmt arch) - , MemWidth (ArchAddrWidth arch) + , Macaw.MemWidth (ArchAddrWidth arch) , HasRepr (ArchFn arch (FnValue arch)) TypeRepr , HasRepr (ArchReg arch) TypeRepr ) -instance MemWidth (ArchAddrWidth arch) => PP.Pretty (FnValue arch tp) where +instance Macaw.MemWidth (ArchAddrWidth arch) => PP.Pretty (FnValue arch tp) where pretty (FnUndefined{}) = "undef" pretty (FnConstantBool b) = if b then "true" else "false" pretty (FnConstantValue w i) @@ -276,6 +283,8 @@ instance MemWidth (ArchAddrWidth arch) => PP.Pretty (FnValue arch tp) where pretty (FnReturn var) = PP.pretty var pretty (FnFunctionEntryValue _ n) = PP.pretty (BSC.unpack n) pretty (FnArg i _) = "arg" <> PP.pretty i + pretty (FnCodePointer addr) = "codeptr" <> PP.pretty addr + pretty (FnTypedCodePointer addr _fty) = "typedcodeptr" <> PP.pretty addr instance FnArchConstraints arch => PP.Pretty (FnAssignRhs arch (FnValue arch) tp) where pretty rhs = @@ -289,7 +298,7 @@ instance FnArchConstraints arch => PP.Pretty (FnAssignRhs arch (FnValue arch) tp | i >= 0 -> PP.parens ( "0x" <> PP.pretty (showHex i "") <> " : " <> "bv" - PP.<+> PP.pretty (8 * addrSize (Proxy :: Proxy (ArchAddrWidth arch))) + PP.<+> PP.pretty (8 * Macaw.addrSize (Proxy :: Proxy (ArchAddrWidth arch))) ) | otherwise -> error ("FnAddrWidthConstant given negative value: " ++ show i) @@ -307,13 +316,13 @@ instance FnArchConstraints arch => ShowF (FnAssignment arch) archWidthTypeRepr :: forall p arch. - MemWidth (ArchAddrWidth arch) => + Macaw.MemWidth (ArchAddrWidth arch) => p arch -> TypeRepr (BVType (ArchAddrWidth arch)) -archWidthTypeRepr _ = BVTypeRepr (addrWidthNatRepr (addrWidthRepr (Proxy :: Proxy (ArchAddrWidth arch)))) +archWidthTypeRepr _ = BVTypeRepr (Macaw.addrWidthNatRepr (Macaw.addrWidthRepr (Proxy :: Proxy (ArchAddrWidth arch)))) instance - (MemWidth (ArchAddrWidth arch), HasRepr (ArchFn arch f) TypeRepr) => + (Macaw.MemWidth (ArchAddrWidth arch), HasRepr (ArchFn arch f) TypeRepr) => HasRepr (FnAssignRhs arch f) TypeRepr where typeRepr rhs = @@ -336,6 +345,8 @@ instance FnArchConstraints arch => HasRepr (FnValue arch) TypeRepr where FnReturn ret -> frReturnType ret FnFunctionEntryValue{} -> archWidthTypeRepr (Proxy :: Proxy arch) FnArg _ tp -> tp + FnCodePointer{} -> archWidthTypeRepr (Proxy :: Proxy arch) + FnTypedCodePointer{} -> archWidthTypeRepr (Proxy :: Proxy arch) ------------------------------------------------------------------------ -- FnStmt @@ -406,20 +417,20 @@ instance FoldFnValue FnStmt where -- FnBlockLabel -- | A block label -newtype FnBlockLabel w = FnBlockLabel {fnBlockLabelAddr :: MemSegmentOff w} +newtype FnBlockLabel w = FnBlockLabel {fnBlockLabelAddr :: Macaw.MemSegmentOff w} deriving (Eq, Ord) -- | Render block label from segment offset. -fnBlockLabelFromAddr :: MemSegmentOff w -> FnBlockLabel w +fnBlockLabelFromAddr :: Macaw.MemSegmentOff w -> FnBlockLabel w fnBlockLabelFromAddr = FnBlockLabel instance PP.Pretty (FnBlockLabel w) where pretty (FnBlockLabel s) = let - a = segoffAddr s - o = memWordToUnsigned (addrOffset a) + a = Macaw.segoffAddr s + o = Macaw.memWordToUnsigned (Macaw.addrOffset a) in - "block_" <> PP.pretty (addrBase a) <> "_" <> PP.pretty (showHex o "") + "block_" <> PP.pretty (Macaw.addrBase a) <> "_" <> PP.pretty (showHex o "") -- | Render block label as a string fnBlockLabelString :: FnBlockLabel w -> String @@ -439,7 +450,7 @@ data FnJumpTarget arch = FnJumpTarget -- These must match the type of the jump target. } -instance MemWidth (ArchAddrWidth arch) => PP.Pretty (FnJumpTarget arch) where +instance Macaw.MemWidth (ArchAddrWidth arch) => PP.Pretty (FnJumpTarget arch) where pretty tgt = PP.pretty (fnJumpLabel tgt) PP.<+> PP.encloseSep PP.lbracket PP.rbracket " " phiVals where phiVals = V.toList $ viewSome PP.pretty <$> fnJumpPhiValues tgt @@ -514,7 +525,7 @@ data FnBlockInvariant arch where -- -- @o@ is typically negative on processors whose stacks grow down. FnStackOff :: - !(MemInt (ArchAddrWidth arch)) -> + !(Macaw.MemInt (ArchAddrWidth arch)) -> !(BoundLoc (ArchReg arch) (BVType (ArchAddrWidth arch))) -> FnBlockInvariant arch @@ -595,7 +606,7 @@ instance FoldFnValue FnBlock where -- This currently isn't the case, as Phi nodes still use `ArchReg` to index the -- nodes. However, this will be changed. data Function arch = Function - { fnAddr :: !(MemSegmentOff (ArchAddrWidth arch)) + { fnAddr :: !(Macaw.MemSegmentOff (ArchAddrWidth arch)) -- ^ The address for this function , fnType :: !(FunctionType arch) -- ^ Type of this function @@ -641,7 +652,7 @@ instance -- | A function declaration that has type information, but no recovered definition. data FunctionDecl arch = FunctionDecl - { funDeclAddr :: !(MemSegmentOff (ArchAddrWidth arch)) + { funDeclAddr :: !(Macaw.MemSegmentOff (ArchAddrWidth arch)) -- ^ Address of function in binary. , funDeclName :: !BSC.ByteString -- ^ Symbol name for function. diff --git a/src/Reopt/CFG/LLVM.hs b/src/Reopt/CFG/LLVM.hs index a633cc76..744937e5 100644 --- a/src/Reopt/CFG/LLVM.hs +++ b/src/Reopt/CFG/LLVM.hs @@ -87,7 +87,7 @@ import GHC.TypeLits #if __GLASGOW_HASKELL__ < 902 import Numeric.Natural (Natural) #endif -import Prettyprinter (pretty, viaShow) +import Prettyprinter qualified as PP import Text.LLVM qualified as L import Text.LLVM.PP qualified as L (ppType) import Text.PrettyPrint.HughesPJ qualified as HPJ @@ -96,7 +96,7 @@ import Text.Printf import Data.Macaw.Analysis.RegisterUse (BoundLoc (..)) import Data.Macaw.CFG import Data.Macaw.Types -import Data.Macaw.X86 (X86BlockPrecond (..), X86Reg (..), X86_64) +import Data.Macaw.X86 (ArchitectureInfo, X86BlockPrecond (..), X86Reg (..), X86_64, archAddrWidth) import Data.Bits (testBit) import Reopt.CFG.FnRep @@ -110,8 +110,10 @@ import Reopt.TypeInference.Solver ( TyVar, tyToLLVMType, pattern FConflictTy, + pattern FFunPtrTy, pattern FNumTy, pattern FPtrTy, + pattern FUnknownFunPtrTy, pattern FUnknownTy, ) import Reopt.VCG.Annotations qualified as Ann @@ -228,10 +230,12 @@ llvmMaskedLoad :: L.Type -> Intrinsic llvmMaskedLoad n tp tpv = do - let vstr = "v" ++ show n ++ tp - mnem = "llvm.masked.load." ++ vstr ++ ".p0" ++ vstr - args = [L.PtrTo (L.Vector n tpv), L.iT 32, L.Vector n (L.iT 1), L.Vector n tpv] - in intrinsic mnem (L.Vector n tpv) args + let + vstr = "v" ++ show n ++ tp + mnem = "llvm.masked.load." ++ vstr ++ ".p0" ++ vstr + args = [L.PtrTo (L.Vector n tpv), L.iT 32, L.Vector n (L.iT 1), L.Vector n tpv] + in + intrinsic mnem (L.Vector n tpv) args -- | @llvm.masked.store.*@ intrinsic llvmMaskedStore :: @@ -243,10 +247,12 @@ llvmMaskedStore :: L.Type -> Intrinsic llvmMaskedStore n tp tpv = do - let vstr = "v" ++ show n ++ tp - mnem = "llvm.masked.store." ++ vstr ++ ".p0" ++ vstr - args = [L.PtrTo (L.Vector n tpv), L.iT 32, L.Vector n (L.iT 1), L.Vector n tpv] - in intrinsic mnem (L.Vector n tpv) args + let + vstr = "v" ++ show n ++ tp + mnem = "llvm.masked.store." ++ vstr ++ ".p0" ++ vstr + args = [L.PtrTo (L.Vector n tpv), L.iT 32, L.Vector n (L.iT 1), L.Vector n tpv] + in + intrinsic mnem (L.Vector n tpv) args llvmIntrinsics :: [Intrinsic] llvmIntrinsics = @@ -507,7 +513,8 @@ type LLVMArchConstraints arch = -- -- This information is the same for all blocks within the function. data FunLLVMContext arch = FunLLVMContext - { archFns :: !(LLVMArchSpecificOps arch) + { archInfo :: !(ArchitectureInfo arch) + , archFns :: !(LLVMArchSpecificOps arch) -- ^ Architecture-specific functions , funLLVMGenOptions :: !LLVMGenOptions -- ^ Options for generating LLVM @@ -628,7 +635,7 @@ setAssignIdValue :: setAssignIdValue fid v = do m <- gets bbAssignValMap case Map.lookup fid m of - Just{} -> error $ "internal: Assign id " ++ show (pretty fid) ++ " already assigned." + Just{} -> error $ "internal: Assign id " ++ show (PP.pretty fid) ++ " already assigned." Nothing -> pure () modify' $ \s -> s{bbAssignValMap = Map.insert fid v (bbAssignValMap s)} @@ -646,6 +653,7 @@ valueToLLVM :: FnValue arch tp -> BBLLVM arch (L.Typed L.Value) valueToLLVM ctx avmap val = withArchConstraints ctx $ do + aInfo <- asks archInfo let ptrWidth = addrWidthNatRepr (addrWidthRepr (Proxy :: Proxy (ArchAddrWidth arch))) case val of -- A value that is actually undefined, like a non-argument register at @@ -659,7 +667,7 @@ valueToLLVM ctx avmap val = withArchConstraints ctx $ do case Map.lookup lhs avmap of Just v -> pure v Nothing -> - error $ "Could not find assignment value " ++ show (pretty lhs) + error $ "Could not find assignment value " ++ show (PP.pretty lhs) -- Value from a phi node FnPhiValue phiVar -> do case Map.lookup (unFnPhiVar phiVar) avmap of @@ -671,14 +679,14 @@ valueToLLVM ctx avmap val = withArchConstraints ctx $ do case Map.lookup lhs avmap of Just v -> pure v Nothing -> - error $ "Could not find return variable " ++ show (pretty lhs) - -- The entry pointer to a function. We do the cast as a const - -- expr as function addresses appear as constants in e.g. phi - -- nodes + error $ "Could not find return variable " ++ show (PP.pretty lhs) + -- The entry pointer to a function. We do the cast as a const expr as + -- function addresses appear as constants in e.g. phi nodes FnFunctionEntryValue ftp nm -> do let typ = natReprToLLVMType ptrWidth - let fptr :: L.Typed L.Value - fptr = L.Typed (functionTypeToLLVM ftp) (L.ValSymbol (L.Symbol (BSC.unpack nm))) + let + fptr :: L.Typed L.Value + fptr = L.Typed (functionTypeToLLVM ftp) (L.ValSymbol (L.Symbol (BSC.unpack nm))) logEvent $ LLVMLogEvent "FnFunctionEntryValue" $ LogInfoPtrToInt $ @@ -688,6 +696,39 @@ valueToLLVM ctx avmap val = withArchConstraints ctx $ do FnArg i _tp | 0 <= i, i < V.length (funArgs ctx) -> pure $ funArgs ctx V.! i | otherwise -> error $ "Illegal argument index " ++ show i + FnCodePointer addr -> do + let + ty = + L.ptrT $ + L.FunTy + L.Opaque -- TODO + [] -- TODO + False + -- FIXME: should use the base and the offset + value = + L.ValConstExpr $ + L.ConstConv + L.IntToPtr + ( L.Typed + (L.PrimType (L.Integer (fromInteger (8 * toInteger (addrWidthReprByteCount (archAddrWidth aInfo)))))) + (L.ValInteger (toInteger (addrOffset addr))) + ) + ty + return $ L.Typed ty value + FnTypedCodePointer addr fty -> do + let + ty = functionTypeToLLVM fty + -- FIXME: should use the base and the offset + value = + L.ValConstExpr $ + L.ConstConv + L.IntToPtr + ( L.Typed + (L.PrimType (L.Integer (fromInteger (8 * toInteger (addrWidthReprByteCount (archAddrWidth aInfo)))))) + (L.ValInteger (toInteger (addrOffset addr))) + ) + ty + return $ L.Typed ty value mkLLVMValue :: HasCallStack => @@ -849,15 +890,16 @@ appToLLVM :: BBLLVM arch (L.Typed L.Value) appToLLVM lhs app = bbArchConstraints $ do let typ = typeToLLVMType $ typeRepr app - let binop :: - (L.Typed L.Value -> L.Value -> BBLLVM arch (L.Typed L.Value)) -> - FnValue arch utp -> - FnValue arch utp -> - BBLLVM arch (L.Typed L.Value) - binop f x y = do - x' <- mkLLVMValue x - y' <- mkLLVMValue y - f x' (L.typedValue y') + let + binop :: + (L.Typed L.Value -> L.Value -> BBLLVM arch (L.Typed L.Value)) -> + FnValue arch utp -> + FnValue arch utp -> + BBLLVM arch (L.Typed L.Value) + binop f x y = do + x' <- mkLLVMValue x + y' <- mkLLVMValue y + f x' (L.typedValue y') case app of Eq x y -> binop (icmpop L.Ieq) x y Mux _tp c t f -> do @@ -876,23 +918,25 @@ appToLLVM lhs app = bbArchConstraints $ do MkTuple fieldTypes fields -> do let structType = L.Struct (toListFC typeToLLVMType fieldTypes) let initUndef = L.Typed structType L.ValUndef - let f :: - forall utp. - FnValue arch utp -> - (Int32 -> L.Typed L.Value -> BBLLVM arch (L.Typed L.Value)) -> - (Int32 -> L.Typed L.Value -> BBLLVM arch (L.Typed L.Value)) - f fld c i s = do - llvmFieldValue <- mkLLVMValue fld - s' <- insertValue s llvmFieldValue i - c (i + 1) s' + let + f :: + forall utp. + FnValue arch utp -> + (Int32 -> L.Typed L.Value -> BBLLVM arch (L.Typed L.Value)) -> + (Int32 -> L.Typed L.Value -> BBLLVM arch (L.Typed L.Value)) + f fld c i s = do + llvmFieldValue <- mkLLVMValue fld + s' <- insertValue s llvmFieldValue i + c (i + 1) s' foldrFC f (\_ r -> pure r) fields 0 initUndef -- :: !(P.List TypeRepr l) -> !(f (TupleType l)) -> !(P.Index l r) -> App f r TupleField _fieldTypes macawStruct idx -> do -- Make a struct llvmStruct <- mkLLVMValue macawStruct -- Get index as an Int32 - let idxVal :: Integer - idxVal = PL.indexValue idx + let + idxVal :: Integer + idxVal = PL.indexValue idx when (idxVal >= toInteger (maxBound :: Int32)) $ error $ "Index out of range " ++ show idxVal ++ "." @@ -1112,8 +1156,9 @@ llvmGEPFromPtr :: BBLLVM arch (L.Typed L.Value) llvmGEPFromPtr pointeeType ofs ptrV = do let pointerType = L.PtrTo pointeeType - let zeroV = L.Typed (L.iT 32) (L.int 0) - ofsV = L.Typed (L.iT 32) (L.int ofs) + let + zeroV = L.Typed (L.iT 32) (L.int 0) + ofsV = L.Typed (L.iT 32) (L.int ofs) -- https://llvm.org/docs/GetElementPtr.html#what-is-the-first-index-of-the-gep-instruction L.Typed pointerType <$> evalInstr (L.GEP False ptrV [zeroV, ofsV]) @@ -1208,7 +1253,7 @@ pointerForMemOp ctx ptr pointeeType = do getInferredType ptr >>= \case Just FPtrTy{} -> llvmGEPFromPtr pointeeType 0 ptrV Just FConflictTy{} -> llvmAsPtr ctx pointeeType ptrV - t -> error $ "Unexpected type at pointerForMemOp " ++ show (pretty t) + t -> error $ "Unexpected type at pointerForMemOp " ++ show (PP.pretty t) -- | Convert an assignment to a llvm expression rhsToLLVM :: @@ -1289,9 +1334,10 @@ resolveFunctionEntry dest = fromMaybe (error "fnTypes 1") (Map.lookup nm (mcExtFunTypes constraints)) - let resolvetv tv = Map.lookup tv (mcTypeMap constraints) - args = map resolvetv (fttvArgs fty) - retty = fmap resolvetv (fttvRet fty) + let + resolvetv tv = Map.lookup tv (mcTypeMap constraints) + args = map resolvetv (fttvArgs fty) + retty = fmap resolvetv (fttvRet fty) return ( L.Typed (functionTypeToLLVM' dest_ftp args retty) (L.ValSymbol sym) , args @@ -1312,7 +1358,8 @@ stmtToLLVM :: FnStmt arch -> BBLLVM arch () stmtToLLVM stmt = bbArchConstraints $ do - comment (show $ pretty stmt) + -- This prints the FnStmt alongside the LLVM code, for debugging purposes + -- comment (show $ PP.pretty stmt) case stmt of FnComment _ -> return () FnAssignStmt (FnAssignment lhs rhs) -> do @@ -1390,16 +1437,18 @@ coerceForSubtype m_vTy m_tgtTy v = do (FPtrTy{}, _) -> llvmPtrAsBV "coerceForSubtype" v (_, FPtrTy ty) -> llvmAsPtr "coerceForSubtype" (tyToLLVMType ptrWidth ty) v + (FUnknownTy, FFunPtrTy{}) -> pure v -- TODO (val) ? + (FUnknownTy, FUnknownFunPtrTy{}) -> pure v -- TODO (val) ? (t, t') -> do thisLabel <- gets bbThisLabel :: BBLLVM arch L.BlockLabel error $ show $ "Type mismatch at " - <> viaShow thisLabel + <> PP.viaShow thisLabel <> ": " - <> pretty t + <> PP.pretty t <> " and " - <> pretty t' + <> PP.pretty t' mkLLVMSubtypeValue :: forall arch tp. @@ -1414,8 +1463,9 @@ mkLLVMSubtypeValue v m_tgtTy = do addTargetPhiValues :: forall arch. FnJumpTarget arch -> BBLLVM arch () addTargetPhiValues tgt = do thisLabel <- gets bbThisLabel :: BBLLVM arch L.BlockLabel - let tgtLbl :: FnBlockLabel (ArchAddrWidth arch) - tgtLbl = fnJumpLabel tgt + let + tgtLbl :: FnBlockLabel (ArchAddrWidth arch) + tgtLbl = fnJumpLabel tgt tgtPhis <- asks (fromMaybe (error "Missing block") . Map.lookup tgtLbl . funBlockPhis) let doSubtype (Some v) (Some phiv) = @@ -1423,19 +1473,20 @@ addTargetPhiValues tgt = do values <- V.zipWithM doSubtype (fnJumpPhiValues tgt) tgtPhis -- Add this block value to phi assignment - let updateVar :: - Map L.BlockLabel (Int, L.Value) -> - L.Typed L.Value -> - BBLLVM arch (Map L.BlockLabel (Int, L.Value)) - -- updateVar prevVars (Some v@(FnPhiValue phiVar)) = do - -- constraints <- asks moduleConstraints - -- fn <- asks funName - -- let tyV = mcAssignTyVars constraints Map.! fn Map.! unFnPhiVar phiVar - -- let inferredType = mcTypeMap constraints Map.! tyV - -- thisVal <- mkLLVMValue v - -- pure $! Map.insertWith (const (first (+ 1))) thisLabel (1, L.typedValue thisVal) prevVars - updateVar prevVars v = do - pure $! Map.insertWith (const (first (+ 1))) thisLabel (1, L.typedValue v) prevVars + let + updateVar :: + Map L.BlockLabel (Int, L.Value) -> + L.Typed L.Value -> + BBLLVM arch (Map L.BlockLabel (Int, L.Value)) + -- updateVar prevVars (Some v@(FnPhiValue phiVar)) = do + -- constraints <- asks moduleConstraints + -- fn <- asks funName + -- let tyV = mcAssignTyVars constraints Map.! fn Map.! unFnPhiVar phiVar + -- let inferredType = mcTypeMap constraints Map.! tyV + -- thisVal <- mkLLVMValue v + -- pure $! Map.insertWith (const (first (+ 1))) thisLabel (1, L.typedValue thisVal) prevVars + updateVar prevVars v = do + pure $! Map.insertWith (const (first (+ 1))) thisLabel (1, L.typedValue v) prevVars m <- BBLLVM $ use $ funStateLens . funBlockPhiMapLens let curEntries = phiAssignmentForBlock m tgtLbl newEntries <- V.zipWithM updateVar curEntries values @@ -1646,9 +1697,10 @@ callAsm :: [L.Typed L.Value] -> BBLLVM arch (L.Typed L.Value) callAsm attrs resType asmCode asmArgs args = do - let argTypes = L.typedType <$> args - ftp = L.PtrTo (L.FunTy resType argTypes False) - f = L.ValAsm (asmSideeffect attrs) False asmCode asmArgs + let + argTypes = L.typedType <$> args + ftp = L.PtrTo (L.FunTy resType argTypes False) + f = L.ValAsm (asmSideeffect attrs) False asmCode asmArgs L.Typed resType <$> evalInstr (L.Call False ftp f args) -- | Call some inline assembly that does not return a value. @@ -1665,8 +1717,9 @@ callAsm_ :: callAsm_ attrs asmCode asmArgs args = do let argTypes = L.typedType <$> args let ftp = L.PtrTo (L.FunTy (L.PrimType L.Void) argTypes False) - let f :: L.Value - f = L.ValAsm (asmSideeffect attrs) False asmCode asmArgs + let + f :: L.Value + f = L.ValAsm (asmSideeffect attrs) False asmCode asmArgs call_ (L.Typed ftp f) args ------------------------------------------------------------------------ @@ -1708,9 +1761,11 @@ mkStackExpr o in Ann.BVSub (Ann.Var Ann.StackHigh) oExpr | o == 0 = Ann.Var Ann.StackHigh | otherwise = - let oExpr :: Ann.Expr Ann.BlockVar - oExpr = Ann.BVDecimal (fromInteger o) 64 - in Ann.BVAdd (Ann.Var Ann.StackHigh) oExpr + let + oExpr :: Ann.Expr Ann.BlockVar + oExpr = Ann.BVDecimal (fromInteger o) 64 + in + Ann.BVAdd (Ann.Var Ann.StackHigh) oExpr newtype BlockAnnGen a = BlockAnnGen (Except String a) deriving (Functor, Applicative, Monad, MonadError String) @@ -1735,8 +1790,9 @@ mkBoundLocExpr (StackOffLoc o tp) = if o < 0 then case tp of BVMemRepr byteCount LittleEndian -> do - let stackExpr = mkStackExpr (toInteger o) - bitCount = 8 * natValue byteCount + let + stackExpr = mkStackExpr (toInteger o) + bitCount = 8 * natValue byteCount pure $! Ann.Var (Ann.MCStack stackExpr bitCount) _ -> throwError $ "Do not support stack references with type " ++ show tp @@ -1749,10 +1805,11 @@ addPhiPrecond b prev0 = case phiFnRepVar b of Some phiVar -> do -- Get expression representing LLVM value. - let phiExpr :: Ann.Expr Ann.BlockVar - phiExpr = Ann.Var (Ann.LLVMVar (Text.pack (phiLLVMIdent b))) - -- Assert pfi expression is equal to each machine location. - vars = fnPhiVarRep phiVar : fnPhiVarLocations phiVar + let + phiExpr :: Ann.Expr Ann.BlockVar + phiExpr = Ann.Var (Ann.LLVMVar (Text.pack (phiLLVMIdent b))) + -- Assert pfi expression is equal to each machine location. + vars = fnPhiVarRep phiVar : fnPhiVarLocations phiVar let fn v prev = seq prev $ do e <- mkBoundLocExpr v let expr = Ann.Eq phiExpr e @@ -1821,6 +1878,7 @@ getBlockAnn fnm blockRes = do defineFunction :: forall arch. (LLVMArchConstraints arch, arch ~ X86_64) => + ArchitectureInfo arch -> -- | Architecture specific operations LLVMArchSpecificOps arch -> -- | Options for generating LLVM @@ -1830,19 +1888,21 @@ defineFunction :: -- | Function to translate Function arch -> LLVMTrans (L.Define, Either String Ann.FunctionAnn) -defineFunction archOps genOpts constraints f = do +defineFunction aInfo archOps genOpts constraints f = do let ptrWidth = widthVal $ addrWidthNatRepr (addrWidthRepr (Proxy :: Proxy (ArchAddrWidth arch))) - let mkInputReg :: (Some TypeRepr, TyVar) -> Int -> L.Typed L.Ident - mkInputReg (Some tp, tyv) i = - case Map.lookup tyv (mcTypeMap constraints) of - Just (FPtrTy pointee) -> L.Typed (L.PtrTo (tyToLLVMType ptrWidth pointee)) (argIdent i) - _ -> L.Typed (typeToLLVMType tp) (argIdent i) + let + mkInputReg :: (Some TypeRepr, TyVar) -> Int -> L.Typed L.Ident + mkInputReg (Some tp, tyv) i = + case Map.lookup tyv (mcTypeMap constraints) of + Just (FPtrTy pointee) -> L.Typed (L.PtrTo (tyToLLVMType ptrWidth pointee)) (argIdent i) + _ -> L.Typed (typeToLLVMType tp) (argIdent i) let fty = fromMaybe (error "fty") (Map.lookup (fnName f) (mcExtFunTypes constraints)) let argsWithTyVars = zip (fnArgTypes (fnType f)) (fttvArgs fty) - let inputArgs :: [L.Typed L.Ident] - inputArgs = zipWith mkInputReg argsWithTyVars [0 ..] + let + inputArgs :: [L.Typed L.Ident] + inputArgs = zipWith mkInputReg argsWithTyVars [0 ..] let fret = fmap (\tv -> Map.lookup tv (mcTypeMap constraints)) (fttvRet fty) @@ -1851,31 +1911,35 @@ defineFunction archOps genOpts constraints f = do id' :: (LLVMArchConstraints arch => a) -> a id' a = a - let ctx :: FunLLVMContext arch - ctx = - FunLLVMContext - { archFns = archOps - , funLLVMGenOptions = genOpts - , funAddr = fnAddr f - , funName = fnName f - , funArgs = V.fromList $ fmap L.ValIdent <$> inputArgs - , funRetType = fret - , funAllocaCount = 0 - , moduleConstraints = constraints - , funBlockPhis = phiMapFromFunction f - , withArchConstraints = id' - } + let + ctx :: FunLLVMContext arch + ctx = + FunLLVMContext + { archInfo = aInfo + , archFns = archOps + , funLLVMGenOptions = genOpts + , funAddr = fnAddr f + , funName = fnName f + , funArgs = V.fromList $ fmap L.ValIdent <$> inputArgs + , funRetType = fret + , funAllocaCount = 0 + , moduleConstraints = constraints + , funBlockPhis = phiMapFromFunction f + , withArchConstraints = id' + } -- Create ordinary blocks m0 <- gets llvmTransIntrinsicMap - let initFunState :: FunState arch - initFunState = - FunState - { nmCounter = 0 - , funIntrinsicMap = m0 - , needSwitchFailLabel = False - , funBlockPhiMap = initResolvePhiMap f - } + let + initFunState :: FunState arch + initFunState = + FunState + { nmCounter = 0 + , funIntrinsicMap = m0 + , needSwitchFailLabel = False + , funBlockPhiMap = initResolvePhiMap f + } + -- trace (show (PP.pretty (fnEntryBlock f))) (pure ()) let (postEntryFunState, entryBlockRes) = addLLVMBlock ctx initFunState (fnEntryBlock f) @@ -1892,11 +1956,13 @@ defineFunction archOps genOpts constraints f = do ++ llvmTransLogEvents s } - let entryLLVMBlock :: L.BasicBlock - entryLLVMBlock = toBasicBlock constraints (fnName f) (funBlockPhiMap finalFunState) entryBlockRes + let + entryLLVMBlock :: L.BasicBlock + entryLLVMBlock = toBasicBlock constraints (fnName f) (funBlockPhiMap finalFunState) entryBlockRes - let blocks :: [L.BasicBlock] - blocks = toBasicBlock constraints (fnName f) (funBlockPhiMap finalFunState) <$> finalBlocks + let + blocks :: [L.BasicBlock] + blocks = toBasicBlock constraints (fnName f) (funBlockPhiMap finalFunState) <$> finalBlocks let finBlocks | needSwitchFailLabel finalFunState = entryLLVMBlock : (blocks ++ [failBlock]) @@ -1916,22 +1982,23 @@ defineFunction archOps genOpts constraints f = do , L.defMetadata = Map.empty , L.defComdat = Nothing } - let funAnn :: Either String Ann.FunctionAnn - funAnn = do - blockAnnEntries <- mapM (getBlockAnn (fnName f)) (V.fromList (entryBlockRes : finalBlocks)) - let finBlockAnnMap - | needSwitchFailLabel finalFunState = - V.snoc blockAnnEntries (switchFailLabel, Ann.UnreachableBlock) - | otherwise = - blockAnnEntries - let blockObjMap = uncurry Ann.blockAnnToJSON <$> finBlockAnnMap - let addr = fromIntegral $ addrOffset $ segoffAddr $ fnAddr f - pure $! - Ann.FunctionAnn - { Ann.llvmFunName = BSC.unpack (fnName f) - , Ann.faStartAddr = addr - , Ann.blocks = blockObjMap - } + let + funAnn :: Either String Ann.FunctionAnn + funAnn = do + blockAnnEntries <- mapM (getBlockAnn (fnName f)) (V.fromList (entryBlockRes : finalBlocks)) + let finBlockAnnMap + | needSwitchFailLabel finalFunState = + V.snoc blockAnnEntries (switchFailLabel, Ann.UnreachableBlock) + | otherwise = + blockAnnEntries + let blockObjMap = uncurry Ann.blockAnnToJSON <$> finBlockAnnMap + let addr = fromIntegral $ addrOffset $ segoffAddr $ fnAddr f + pure $! + Ann.FunctionAnn + { Ann.llvmFunName = BSC.unpack (fnName f) + , Ann.faStartAddr = addr + , Ann.blocks = blockObjMap + } pure (funDef, funAnn) -- | Create function annotation from declaration. @@ -1971,12 +2038,12 @@ declareIntrinsic i = -- behavior. moduleForFunctions :: forall arch. - ( LLVMArchConstraints arch - , Show (FunctionType arch) - , FoldableFC (ArchFn arch) - , FoldableF (FnArchStmt arch) - , arch ~ X86_64 - ) => + arch ~ X86_64 => + FoldableF (FnArchStmt arch) => + FoldableFC (ArchFn arch) => + LLVMArchConstraints arch => + Show (FunctionType arch) => + ArchitectureInfo arch -> -- | Architecture specific functions LLVMArchSpecificOps arch -> -- | Options for generating LLVM @@ -1989,37 +2056,39 @@ moduleForFunctions :: , [Ann.ExternalFunctionAnn] , [LLVMLogEvent] ) -moduleForFunctions archOps genOpts recMod constraints = - let (dynIntrinsics, logEvents, definesAndAnn) = runLLVMTrans $ - forM (recoveredDefs recMod) $ \f -> do - let fId = funId (fnAddr f) (Just (fnName f)) - (d, ma) <- defineFunction archOps genOpts constraints f - pure (d, (fId, ma)) - -- FIXME: this is repeated in a bunch of places - ptrWidth = widthVal $ addrWidthNatRepr (addrWidthRepr (Proxy :: Proxy (ArchAddrWidth arch))) - namedTypes = - [ L.TypeDecl (L.Ident s) (tyToLLVMType ptrWidth ty) - | (s, ty) <- mcNamedTypes constraints - ] - llvmMod = - L.Module - { L.modSourceName = Nothing - , L.modDataLayout = [] - , L.modTypes = namedTypes - , L.modNamedMd = [] - , L.modUnnamedMd = [] - , L.modGlobals = [] - , L.modDeclares = - fmap declareIntrinsic llvmIntrinsics - ++ fmap declareIntrinsic dynIntrinsics - ++ fmap declareFunction (recoveredDecls recMod) - , L.modDefines = fst <$> definesAndAnn - , L.modInlineAsm = [] - , L.modAliases = [] - , L.modComdat = Map.empty - } - annDecls = mkExternalFunctionAnn <$> recoveredDecls recMod - in (llvmMod, snd <$> definesAndAnn, annDecls, logEvents) +moduleForFunctions aInfo archOps genOpts recMod constraints = + let + (dynIntrinsics, logEvents, definesAndAnn) = runLLVMTrans $ + forM (recoveredDefs recMod) $ \f -> do + let fId = funId (fnAddr f) (Just (fnName f)) + (d, ma) <- defineFunction aInfo archOps genOpts constraints f + pure (d, (fId, ma)) + -- FIXME: this is repeated in a bunch of places + ptrWidth = widthVal $ addrWidthNatRepr (addrWidthRepr (Proxy :: Proxy (ArchAddrWidth arch))) + namedTypes = + [ L.TypeDecl (L.Ident s) (tyToLLVMType ptrWidth ty) + | (s, ty) <- mcNamedTypes constraints + ] + llvmMod = + L.Module + { L.modSourceName = Nothing + , L.modDataLayout = [] + , L.modTypes = namedTypes + , L.modNamedMd = [] + , L.modUnnamedMd = [] + , L.modGlobals = [] + , L.modDeclares = + fmap declareIntrinsic llvmIntrinsics + ++ fmap declareIntrinsic dynIntrinsics + ++ fmap declareFunction (recoveredDecls recMod) + , L.modDefines = fst <$> definesAndAnn + , L.modInlineAsm = [] + , L.modAliases = [] + , L.modComdat = Map.empty + } + annDecls = mkExternalFunctionAnn <$> recoveredDecls recMod + in + (llvmMod, snd <$> definesAndAnn, annDecls, logEvents) -- | Returns the type that was inferred for the given value via constraint -- solving, if any. @@ -2034,6 +2103,8 @@ getInferredType (FnReturn (FnReturnVar retVar _)) = getInferredTypeForAssignIdBB getInferredType (FnArg arg _typ) = do fn <- asks funName getInferredFunctionArgType fn arg +getInferredType (FnCodePointer{}) = pure Nothing -- TODO +getInferredType (FnTypedCodePointer _args _ret) = error "TODO" -- TODO getInferredFunctionArgType :: BSC.ByteString -> Int -> BBLLVM arch (Maybe FTy) getInferredFunctionArgType fn arg = do diff --git a/src/Reopt/CFG/Recovery.hs b/src/Reopt/CFG/Recovery.hs index 4b9bb2d4..488f24f7 100644 --- a/src/Reopt/CFG/Recovery.hs +++ b/src/Reopt/CFG/Recovery.hs @@ -89,7 +89,6 @@ import Data.Macaw.Discovery.State import Data.Macaw.Memory.Permissions qualified as Perm import Data.Macaw.Types hiding (Type) import Data.Macaw.Types qualified as M (Type) - import Data.Macaw.X86 ( x86DemandContext, x86_64CallParams, @@ -547,7 +546,8 @@ recoverCValue cv = do RelocatableCValue _w addr | Just addrRef <- asSegmentOff mem addr , Perm.isExecutable (segmentFlags (segoffSegment addrRef)) -> do - throwErrorAt ReoptUnsupportedFnValueTag "Cannot lift code pointers." + pure $ FnCodePointer addr + -- throwErrorAt ReoptUnsupportedFnValueTag "Cannot lift code pointers." | otherwise -> case asAbsoluteAddr addr of Just absAddr -> emitNewAssign (toInteger absAddr) @@ -928,6 +928,7 @@ recoverStmt :: Stmt X86_64 ids -> Recover ids () recoverStmt stmtIdx stmt = do + -- trace ("Recovering " <> show (ppStmt pretty stmt)) (pure ()) case stmt of AssignStmt asgn -> do recoverAssign stmtIdx asgn @@ -955,11 +956,12 @@ recoverStmt stmtIdx stmt = do Comment msg -> do addFnStmt $ FnComment msg ExecArchStmt astmt0 -> do - -- Architecture-specific statements are assumed to always - -- have side effects. + -- Architecture-specific statements are assumed to always have side + -- effects. astmt <- traverseF recoverValue astmt0 addFnStmt (FnArchStmt (X86FnStmt astmt)) - InstructionStart o _ -> do + InstructionStart o asm -> do + addFnStmt $ FnComment asm -- added by val -- Set recovery instruction offset modify $ \s -> s{rsBlockOff = o} ArchState _ _ -> do @@ -1868,8 +1870,7 @@ x86CallRegs mem funNameMap funTypeMap _callSite regs = do Left $ Reason CallTargetNotFunctionEntryPoint (memWordValue (addrOffset faddr)) SymbolValue _ (SymbolRelocation nm _ver) -> do pure nm - _ -> - Left $ Reason IndirectCallTarget () + _ -> Left $ Reason IndirectCallTarget () case funTypeMap nm of Just tp -> x86TranslateCallType mem nm regs tp Nothing -> Left $ Reason UnknownCallTargetArguments nm @@ -1969,7 +1970,6 @@ recoverFunction sysp mem fInfo invMap nm curArgs curRets = do } runFunRecover funCtx $ do let entryBlk = fromJust $ Map.lookup entryAddr (fInfo ^. parsedBlocks) - -- Insert uninitialized register into initial block location map. let insUninit :: diff --git a/src/Reopt/TypeInference/ConstraintGen.hs b/src/Reopt/TypeInference/ConstraintGen.hs index dd5c96e9..2da43f22 100644 --- a/src/Reopt/TypeInference/ConstraintGen.hs +++ b/src/Reopt/TypeInference/ConstraintGen.hs @@ -55,13 +55,9 @@ import Data.Macaw.Types ( typeRepr, ) import Data.Parameterized (FoldableF, FoldableFC) -import Data.Parameterized.NatRepr ( - NatRepr, - intValue, - testEquality, - widthVal, - ) +import Data.Parameterized.NatRepr (NatRepr, intValue, testEquality, widthVal) import Data.Parameterized.Some (Some (Some), viewSome) +import Data.Parameterized.TraversableFC (toListFC) import Reopt.CFG.FnRep ( FnArchConstraints, FnArchStmt, @@ -89,9 +85,10 @@ import Reopt.TypeInference.Solver ( RowVar, SolverM, StructName, - Ty, + Ty (Ty), TyVar, eqTC, + funPtrTy, isNumTC, numTy, ptrAddTC, @@ -99,14 +96,16 @@ import Reopt.TypeInference.Solver ( ptrTC, runSolverM, subTypeTC, + tupleTy, unifyConstraints, - varTy, + varTy, vecTy, ) import Reopt.TypeInference.Solver qualified as S import Reopt.TypeInference.Solver.Constraints ( ConstraintProvenance (..), FnRepProvenance (..), ) +import Reopt.TypeInference.Solver.Types (TyF (..)) -- This algorithm proceeds in stages: -- 1. Give type variables to the arguments to all functions @@ -505,6 +504,16 @@ emitPtr prov pointee pointer = -- ----------------------------------------------------------------------------- -- Core algorithm +macawTypeToReoptTy :: Some TypeRepr -> Ty +macawTypeToReoptTy = viewSome go + where + go :: TypeRepr ty -> Ty + go BoolTypeRepr = numTy 1 + go (BVTypeRepr n) = numTy (fromInteger (intValue n)) + go (FloatTypeRepr _flt) = error "TODO: support float in type inference" + go (TupleTypeRepr s) = tupleTy $ toListFC go s + go (VecTypeRepr w tp) = vecTy (fromInteger (intValue w)) (go tp) + genFnValue :: FnArchConstraints arch => FnValue arch tp -> CGenM CGenBlockContext arch Ty genFnValue v = case v of @@ -516,6 +525,13 @@ genFnValue v = FnReturn frv -> funRetType frv FnFunctionEntryValue{} -> punt FnArg i _ -> argumentType i + FnCodePointer _addr -> pure $ Ty UnknownFunPtrTy + -- NOTE: not sure what to do about varags yet + FnTypedCodePointer _addr fty -> + pure $ + funPtrTy + (map macawTypeToReoptTy (fnArgTypes fty)) + (maybe (error "No return type, investigate...") macawTypeToReoptTy (fnReturnType fty)) -- FIXME (val) type? where punt = do warn "Punting on FnValue" diff --git a/src/Reopt/TypeInference/Header.hs b/src/Reopt/TypeInference/Header.hs index 81eb6915..d2194ac8 100644 --- a/src/Reopt/TypeInference/Header.hs +++ b/src/Reopt/TypeInference/Header.hs @@ -145,15 +145,34 @@ parseStructUnion (C.CStruct tag _mi _mdecl _attrs n) = C.CStructTag -> errorAt n "Struct is not supported." C.CUnionTag -> errorAt n "Union is not supported." --- | Parser derived declarators. +parseTypeDecl :: C.NodeInfo -> C.CDeclarationSpecifier C.NodeInfo -> CParser C.CTypeSpec +parseTypeDecl _ (C.CTypeSpec spec) = return spec +parseTypeDecl n _ = errorAt n "Expected type specification" + +parseTypeDecls :: C.CDeclaration C.NodeInfo -> CParser C.CTypeSpec +parseTypeDecls (C.CDecl [decl] _ n) = parseTypeDecl n decl +-- NOTE (val) So far I'm only seeing singleton lists here, not sure why. +parseTypeDecls (C.CDecl _decls _ n) = errorAt n "Expected single type spec, investigate." +parseTypeDecls (C.CStaticAssert _ _ n) = errorAt n "TODO" + +-- | Parse derived declarators. parseTypeDerivedDecl :: [C.CDerivedDeclarator C.NodeInfo] -> AnnType -> CParser AnnType parseTypeDerivedDecl [] tp = pure tp parseTypeDerivedDecl (C.CPtrDeclr _ _ : rest) tp = do parseTypeDerivedDecl rest $! PtrAnnType tp parseTypeDerivedDecl (C.CArrDeclr _ _ n : _) _tp = do errorAt n "Arrays are not supported." -parseTypeDerivedDecl (C.CFunDeclr _ _ n : _) _tp = do - errorAt n "Function declarations are not supported in this context." +parseTypeDerivedDecl (C.CFunDeclr (Right (decls, False)) _attrs _n : rest) tp = do + typeSpecs <- mapM parseTypeDecls decls + args <- mapM (parseType emptyQualMods) typeSpecs + parseTypeDerivedDecl rest $! FunPtrAnnType tp args + -- errorAt n $ "decls: " ++ show decls ++ "\nattrs: " ++ show attrs ++ "\ntp: " ++ show tp +parseTypeDerivedDecl (C.CFunDeclr (Right (_, True)) _boop n : _) _tp = do + errorAt n "True" +parseTypeDerivedDecl (C.CFunDeclr (Left {}) _boop n : _) _tp = do + errorAt n "Left" +-- parseTypeDerivedDecl (C.CFunDeclr _ _ n : _) _tp = do +-- errorAt n "Function declarations are not supported in this context." parseFullType :: [C.CDeclarationSpecifier C.NodeInfo] -> diff --git a/src/Reopt/TypeInference/HeaderTypes.hs b/src/Reopt/TypeInference/HeaderTypes.hs index d08204b6..d6d5744e 100644 --- a/src/Reopt/TypeInference/HeaderTypes.hs +++ b/src/Reopt/TypeInference/HeaderTypes.hs @@ -11,6 +11,7 @@ module Reopt.TypeInference.HeaderTypes ( ) where import Data.ByteString.Char8 qualified as BSC +import Data.List (intercalate) import Data.Map.Strict (Map) import Data.Map.Strict qualified as Map import Data.Vector qualified as V @@ -33,6 +34,7 @@ data AnnType PtrAnnType !AnnType | -- | A typedef with the name and resolved right hand side. TypedefAnnType !BSC.ByteString !AnnType + | FunPtrAnnType !AnnType ![AnnType] deriving (Eq, Show, Read) -- | Pretty print the header type for the end user. @@ -44,6 +46,7 @@ ppAnnType = \case DoubleAnnType -> "double" PtrAnnType tp -> ppAnnType tp ++ "*" TypedefAnnType nm _ -> BSC.unpack nm + FunPtrAnnType ret args -> ppAnnType ret ++ "(*?)(" ++ intercalate ", " (map ppAnnType args) ++ ")" instance PP.Pretty AnnType where pretty = \case @@ -53,6 +56,8 @@ instance PP.Pretty AnnType where DoubleAnnType -> "double" PtrAnnType tp -> PP.pretty tp <> "*" TypedefAnnType nm _ -> PP.pretty (BSC.unpack nm) + FunPtrAnnType ret args -> + PP.pretty ret PP.<+> PP.parens (PP.hcat (PP.punctuate PP.comma (map PP.pretty args))) -- | Information about function argument with optional name -- information. diff --git a/src/Reopt/TypeInference/Solver.hs b/src/Reopt/TypeInference/Solver.hs index d52b61c2..cec475c0 100644 --- a/src/Reopt/TypeInference/Solver.hs +++ b/src/Reopt/TypeInference/Solver.hs @@ -4,10 +4,13 @@ module Reopt.TypeInference.Solver ( Ty (..), TyVar, RowVar, + funPtrTy, numTy, ptrTy, ptrTy', + tupleTy, varTy, + vecTy, SolverM, runSolverM, eqTC, @@ -28,6 +31,8 @@ module Reopt.TypeInference.Solver ( FTy, pattern FNumTy, pattern FPtrTy, + pattern FFunPtrTy, + pattern FUnknownFunPtrTy, pattern FUnknownTy, pattern FNamedStruct, pattern FStructTy, @@ -96,9 +101,18 @@ ptrTy = Ty . PtrTy ptrTy' :: Ty -> Ty ptrTy' = Ty . PtrTy . singletonFieldMap 0 +tupleTy :: [Ty] -> Ty +tupleTy = Ty . TupleTy + +vecTy :: Int -> Ty -> Ty +vecTy i = Ty . VecTy i + varTy :: TyVar -> Ty varTy = Var +funPtrTy :: [Ty] -> Ty -> Ty +funPtrTy args ret = Ty (FunPtrTy args ret) + -------------------------------------------------------------------------------- -- Compilers from Ty into ITy @@ -114,6 +128,8 @@ compileTy (Ty ty) = PtrTy fm -> do fm' <- traverse nameTy fm PtrTy . RowExprVar <$> freshRowVarFM fm' + UnknownFunPtrTy -> pure UnknownFunPtrTy + FunPtrTy args ret -> FunPtrTy <$> mapM nameTy args <*> nameTy ret ConflictTy n -> pure (ConflictTy n) TupleTy ts -> TupleTy <$> traverse nameTy ts VecTy n ty' -> VecTy n <$> nameTy ty' @@ -374,6 +390,12 @@ pattern FNumTy sz = FTy (NumTy sz) pattern FPtrTy :: FTy -> FTy pattern FPtrTy ty = FTy (PtrTy ty) +pattern FFunPtrTy :: [FTy] -> FTy -> FTy +pattern FFunPtrTy args ret = FTy (FunPtrTy args ret) + +pattern FUnknownFunPtrTy :: FTy +pattern FUnknownFunPtrTy = FTy UnknownFunPtrTy + pattern FUnknownTy :: FTy pattern FUnknownTy = UnknownTy diff --git a/src/Reopt/TypeInference/Solver/Finalise.hs b/src/Reopt/TypeInference/Solver/Finalise.hs index 47cee445..3b49e4e7 100644 --- a/src/Reopt/TypeInference/Solver/Finalise.hs +++ b/src/Reopt/TypeInference/Solver/Finalise.hs @@ -78,6 +78,8 @@ finalizeTypeDefs = do if off == 0 then pure (PtrTy (rowExprVar re')) else PtrTy <$> freshRowVarFM (dropFieldMap off fm) + UnknownFunPtrTy -> pure UnknownFunPtrTy + FunPtrTy args ret -> FunPtrTy <$> mapM lookupTyVarRep args <*> lookupTyVarRep ret NumTy n -> pure (NumTy n) ConflictTy n -> pure (ConflictTy n) TupleTy ts -> TupleTy <$> traverse lookupTyVarRep ts @@ -149,6 +151,8 @@ finaliseTyF (ty, tv, _) r = where norm = \case PtrTy rv -> FTy (PtrTy (Map.findWithDefault (StructTy emptyFieldMap) rv (csRowVars r))) + UnknownFunPtrTy -> FTy UnknownFunPtrTy + FunPtrTy args ret -> FTy (FunPtrTy (map normTy args) (normTy ret)) NumTy n -> FTy (NumTy n) ConflictTy n -> FTy (ConflictTy n) TupleTy ts -> FTy (TupleTy (map normTy ts)) diff --git a/src/Reopt/TypeInference/Solver/Types.hs b/src/Reopt/TypeInference/Solver/Types.hs index 7655a37c..f618c6d7 100644 --- a/src/Reopt/TypeInference/Solver/Types.hs +++ b/src/Reopt/TypeInference/Solver/Types.hs @@ -30,6 +30,10 @@ data TyF rvar f TupleTy [f] | -- | A vector VecTy Int f + | -- | An unknown function pointer type + UnknownFunPtrTy + | -- | A known function pointer type + FunPtrTy [f] f deriving (Eq, Ord, Show, Functor, Foldable, Traversable) -- | An unrolled ITy @@ -69,8 +73,8 @@ recTyByteWidth ptrSz = offsetAfterLast . last where offsetAfterLast (o, ty) = fromIntegral o + tyByteWidth ptrSz ty --- | This shoold only be called on types which can occur within a --- RecTy, i.e., not records. +-- | This should only be called on types which can occur within a RecTy, i.e., +-- not records. tyByteWidth :: Int -> FTy -> Integer tyByteWidth ptrSz UnknownTy = fromIntegral ptrSz `div` 8 tyByteWidth _ptrSz StructTy{} = error "Saw a StructTy in tyByteWidth" @@ -79,6 +83,8 @@ tyByteWidth ptrSz (FTy ty) = case ty of NumTy n -> fromIntegral n `div` 8 PtrTy _ -> fromIntegral ptrSz `div` 8 + UnknownFunPtrTy -> fromIntegral ptrSz `div` 8 + FunPtrTy{} -> fromIntegral ptrSz `div` 8 ConflictTy n -> fromIntegral n `div` 8 TupleTy{} -> error "Saw a TupleTy in tyByteWidth" VecTy{} -> error "Saw a VecTy in tyByteWidth" @@ -98,17 +104,21 @@ recTyToLLVMType ptrSz fields = L.Struct (go 0 fields) -- c.f. typeToLLVMType tyToLLVMType :: Int -> FTy -> L.Type -tyToLLVMType ptrSz UnknownTy = - L.PrimType (L.Integer (fromIntegral ptrSz)) -tyToLLVMType _ptrSz (NamedStruct s) = L.Alias (L.Ident s) -tyToLLVMType ptrSz (StructTy fm) = recTyToLLVMType ptrSz (Map.assocs (getFieldMap fm)) -tyToLLVMType ptrSz (FTy ty) = - case ty of - NumTy n -> L.PrimType (L.Integer (fromIntegral n)) - PtrTy ty' -> L.PtrTo $ tyToLLVMType ptrSz ty' - ConflictTy n -> L.PrimType (L.Integer (fromIntegral n)) - TupleTy ts -> L.Struct (map (tyToLLVMType ptrSz) ts) - VecTy n ty' -> L.Vector (fromIntegral n) (tyToLLVMType ptrSz ty') +tyToLLVMType ptrSz = go + where + go :: FTy -> L.Type + go UnknownTy = L.PrimType (L.Integer (fromIntegral ptrSz)) + go (NamedStruct s) = L.Alias (L.Ident s) + go (StructTy fm) = recTyToLLVMType ptrSz (Map.assocs (getFieldMap fm)) + go (FTy ty) = + case ty of + NumTy n -> L.PrimType (L.Integer (fromIntegral n)) + PtrTy ty' -> L.PtrTo $ tyToLLVMType ptrSz ty' + UnknownFunPtrTy -> L.PtrTo L.Opaque + FunPtrTy args ret -> L.PtrTo $ L.FunTy (go ret) (map go args) False + ConflictTy n -> L.PrimType (L.Integer (fromIntegral n)) + TupleTy ts -> L.Struct (map go ts) + VecTy n ty' -> L.Vector (fromIntegral n) (go ty') -------------------------------------------------------------------------------- -- Instances @@ -124,6 +134,8 @@ instance (PP.Pretty f, PP.Pretty rv) => PP.Pretty (TyF rv f) where pretty = \case NumTy sz -> "i" <> PP.pretty sz PtrTy t -> "ptr " <> PP.pretty t + UnknownFunPtrTy -> "? (???)*" + FunPtrTy args ret -> PP.pretty ret <> " (" <> PP.hcat (PP.punctuate PP.comma (map PP.pretty args)) <> ")*" ConflictTy n -> "![" <> PP.pretty n <> "]" TupleTy ts -> PP.tupled (map PP.pretty ts) VecTy n ty -> "< " <> PP.pretty n <> " x " <> PP.pretty ty <> " >" @@ -150,6 +162,8 @@ instance (FreeTyVars rvar, FreeTyVars f) => FreeTyVars (TyF rvar f) where freeTyVars = \case NumTy _ -> Set.empty PtrTy t -> freeTyVars t + UnknownFunPtrTy -> Set.empty + FunPtrTy args ret -> freeTyVars ret `Set.union` Set.unions (map freeTyVars args) ConflictTy{} -> Set.empty TupleTy ts -> foldMap freeTyVars ts VecTy _ ty -> freeTyVars ty @@ -181,6 +195,8 @@ instance (FreeRowVars r, FreeRowVars f) => FreeRowVars (TyF r f) where freeRowVars = \case NumTy _ -> Set.empty PtrTy t -> freeRowVars t + UnknownFunPtrTy -> Set.empty + FunPtrTy args ret -> freeRowVars ret `Set.union` Set.unions (map freeRowVars args) ConflictTy{} -> Set.empty TupleTy ts -> foldMap freeRowVars ts VecTy _ ty -> freeRowVars ty diff --git a/tests/ReoptTests.hs b/tests/ReoptTests.hs index fd060815..db309c45 100644 --- a/tests/ReoptTests.hs +++ b/tests/ReoptTests.hs @@ -51,7 +51,7 @@ mkTest fp = T.testCase fp $ do mr <- runReoptM logger $ do recoverX86Elf loadOpts reoptOpts hdrAnn "reopt" hdrInfo - (os, discState, recovOut, moduleConstraints) <- either (fail . show) pure mr + (os, discState, recovOut, _, moduleConstraints) <- either (fail . show) pure mr let recMod = recoveredModule recovOut writeFile blocks_path $ show $ ppDiscoveryStateBlocks discState