-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
macaw-dump: A CLI for printing internal data structures
A simple library and tiny wrappers for different architectures, intended for debugging purposes. The only current capability is to run code discovery on a set of symbols in a binary (or all of them if none are specified), and pretty-print the resulting Macaw or Crucible CFGs.
- Loading branch information
1 parent
30b4579
commit 7c3bd2f
Showing
5 changed files
with
286 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
packages: base/ | ||
macaw-aarch32/ | ||
macaw-aarch32-symbolic/ | ||
macaw-dump/ | ||
macaw-semmc/ | ||
macaw-ppc/ | ||
macaw-ppc-symbolic/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
{-# LANGUAGE GADTs #-} | ||
{-# LANGUAGE ImportQualifiedPost #-} | ||
{-# LANGUAGE TypeApplications #-} | ||
|
||
module Main (main) where | ||
|
||
import Data.ByteString qualified as BS | ||
import Data.ElfEdit qualified as EE | ||
import Data.Macaw.AArch32.Symbolic () | ||
import Data.Macaw.ARM qualified as MA | ||
import Data.Macaw.Dump qualified as Dump | ||
import Data.Macaw.Symbolic qualified as MS | ||
import Data.Proxy (Proxy(..)) | ||
import Data.Set qualified as Set | ||
import System.Environment qualified as Env | ||
|
||
go :: FilePath -> IO () | ||
go exePath = do | ||
bytes <- BS.readFile exePath | ||
case EE.decodeElfHeaderInfo bytes of | ||
Left (_, msg) -> error ("Error parsing ELF header from file '" ++ show exePath ++ "': " ++ msg) | ||
Right (EE.SomeElf ehi) -> do | ||
case EE.headerClass (EE.header ehi) of | ||
EE.ELFCLASS32 -> do | ||
discState <- Dump.runDiscovery ehi MA.arm_linux_info Set.empty | ||
archVals <- | ||
case MS.archVals (Proxy @MA.ARM) Nothing of | ||
Just archVals -> pure archVals | ||
Nothing -> error "impossible" | ||
Dump.displayCfgs exePath discState archVals False | ||
EE.ELFCLASS64 -> error "Only 32-bit ARM is supported" | ||
|
||
main :: IO () | ||
main = do | ||
args <- Env.getArgs | ||
case args of | ||
[exePath] -> go exePath | ||
_ -> error "Bad arguments" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
Cabal-version: 2.2 | ||
Name: macaw-dump | ||
Version: 0.1 | ||
Author: Galois Inc. | ||
Maintainer: langston@galois.com | ||
Build-type: Simple | ||
License: BSD-3-Clause | ||
License-file: LICENSE | ||
Category: Language | ||
Synopsis: A tool to display internal Macaw data structures | ||
|
||
extra-doc-files: README.md | ||
|
||
common shared | ||
-- Specifying -Wall and -Werror can cause the project to fail to build on | ||
-- newer versions of GHC simply due to new warnings being added to -Wall. To | ||
-- prevent this from happening we manually list which warnings should be | ||
-- considered errors. We also list some warnings that are not in -Wall, though | ||
-- try to avoid "opinionated" warnings (though this judgement is clearly | ||
-- subjective). | ||
-- | ||
-- Warnings are grouped by the GHC version that introduced them, and then | ||
-- alphabetically. | ||
-- | ||
-- A list of warnings and the GHC version in which they were introduced is | ||
-- available here: | ||
-- https://ghc.gitlab.haskell.org/ghc/doc/users_guide/using-warnings.html | ||
|
||
-- Since GHC 8.10 or earlier: | ||
ghc-options: | ||
-Wall | ||
-Werror=compat-unqualified-imports | ||
-Werror=deferred-type-errors | ||
-Werror=deprecated-flags | ||
-Werror=deprecations | ||
-Werror=deriving-defaults | ||
-Werror=dodgy-foreign-imports | ||
-Werror=duplicate-exports | ||
-Werror=empty-enumerations | ||
-Werror=identities | ||
-Werror=inaccessible-code | ||
-Werror=incomplete-patterns | ||
-Werror=incomplete-record-updates | ||
-Werror=incomplete-uni-patterns | ||
-Werror=inline-rule-shadowing | ||
-Werror=missed-extra-shared-lib | ||
-Werror=missing-exported-signatures | ||
-Werror=missing-fields | ||
-Werror=missing-home-modules | ||
-Werror=missing-methods | ||
-Werror=overflowed-literals | ||
-Werror=overlapping-patterns | ||
-Werror=partial-fields | ||
-Werror=partial-type-signatures | ||
-Werror=simplifiable-class-constraints | ||
-Werror=star-binder | ||
-Werror=star-is-type | ||
-Werror=tabs | ||
-Werror=typed-holes | ||
-Werror=unrecognised-pragmas | ||
-Werror=unrecognised-warning-flags | ||
-Werror=unsupported-calling-conventions | ||
-Werror=unsupported-llvm-version | ||
-Werror=unticked-promoted-constructors | ||
-Werror=unused-imports | ||
-Werror=warnings-deprecations | ||
-Werror=wrong-do-bind | ||
|
||
if impl(ghc >= 9.2) | ||
ghc-options: | ||
-Werror=ambiguous-fields | ||
-Werror=operator-whitespace | ||
-Werror=operator-whitespace-ext-conflict | ||
-Werror=redundant-bang-patterns | ||
|
||
if impl(ghc >= 9.4) | ||
ghc-options: | ||
-Werror=forall-identifier | ||
-Werror=misplaced-pragmas | ||
-Werror=redundant-strictness-flags | ||
-Werror=type-equality-out-of-scope | ||
-Werror=type-equality-requires-operators | ||
|
||
ghc-prof-options: -O2 -fprof-auto-top | ||
default-language: Haskell2010 | ||
|
||
library | ||
import: shared | ||
|
||
build-depends: | ||
base >= 4.13, | ||
bytestring, | ||
containers, | ||
crucible, | ||
elf-edit, | ||
lens, | ||
macaw-base, | ||
macaw-symbolic, | ||
parameterized-utils, | ||
prettyprinter, | ||
text, | ||
what4 | ||
|
||
hs-source-dirs: src | ||
|
||
exposed-modules: | ||
Data.Macaw.Dump |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
{-# LANGUAGE DataKinds #-} | ||
{-# LANGUAGE FlexibleContexts #-} | ||
{-# LANGUAGE GADTs #-} | ||
{-# LANGUAGE ImportQualifiedPost #-} | ||
{-# LANGUAGE OverloadedStrings #-} | ||
{-# LANGUAGE RankNTypes #-} | ||
{-# LANGUAGE ScopedTypeVariables #-} | ||
{-# LANGUAGE TypeOperators #-} | ||
|
||
module Data.Macaw.Dump | ||
( DumpException | ||
, runDiscovery | ||
, displayCfgs | ||
) where | ||
|
||
import Control.Exception qualified as X | ||
import Control.Lens qualified as Lens | ||
import Control.Monad qualified as Monad | ||
import Data.ByteString qualified as BS | ||
import Data.ElfEdit qualified as EE | ||
import Data.Foldable qualified as F | ||
import Data.Set qualified as Set | ||
import Data.Macaw.Architecture.Info qualified as MAI | ||
import Data.Macaw.CFG qualified as MC | ||
import Data.Macaw.Discovery qualified as MD | ||
import Data.Macaw.Memory.ElfLoader qualified as MEL | ||
import Data.Macaw.Memory.LoadCommon qualified as MML | ||
import Data.Macaw.Memory qualified as MM | ||
import Data.Macaw.Symbolic qualified as MS | ||
import Data.Map qualified as Map | ||
import Data.Parameterized.Some (Some(Some)) | ||
import Data.Text.Encoding.Error qualified as Text | ||
import Data.Text.Encoding qualified as Text | ||
import Data.Text qualified as Text | ||
import Lang.Crucible.Analysis.Postdom qualified as CAP | ||
import Lang.Crucible.CFG.Core qualified as CCC | ||
import Lang.Crucible.CFG.Extension qualified as CCE | ||
import Lang.Crucible.FunctionHandle qualified as CFH | ||
import Prettyprinter qualified as PP | ||
import System.IO qualified as IO | ||
import What4.FunctionName qualified as WF | ||
import What4.ProgramLoc qualified as WPL | ||
|
||
data DumpException = ELFResolutionError String | ||
deriving Show | ||
|
||
instance X.Exception DumpException | ||
|
||
|
||
-- | Convert machine addresses into What4 positions. | ||
-- | ||
-- When possible, we map to the structured 'WPL.BinaryPos' type. However, some | ||
-- 'MM.MemSegmentOff' cannot be mapped to an absolute position (e.g., some | ||
-- addresses from shared libraries are in non-trivial segments). In those cases, | ||
-- we map to the unstructured 'WPL.Others' with a sufficiently descriptive string. | ||
-- | ||
-- TODO: import from `Testing` | ||
posFn :: (MM.MemWidth w) => Text.Text -> MM.MemSegmentOff w -> WPL.Position | ||
posFn binaryName segOff = | ||
case MM.segoffAsAbsoluteAddr segOff of | ||
Just mw -> WPL.BinaryPos binaryName (fromIntegral mw) | ||
Nothing -> WPL.OtherPos (binaryName <> Text.pack ": " <> Text.pack (show segOff)) | ||
|
||
-- | Load an ELF file into a macaw 'MM.Memory' (and symbols) | ||
-- | ||
-- Prints warnings to stderr. | ||
loadELF :: | ||
MML.LoadOptions -> | ||
EE.ElfHeaderInfo w -> | ||
IO (MM.Memory w, [MEL.MemSymbol w]) | ||
loadELF loadOpts ehi = do | ||
case MEL.resolveElfContents loadOpts ehi of | ||
Left err -> X.throwIO (ELFResolutionError err) | ||
Right (warnings, mem, _mentry, nameAddrList) -> do | ||
F.forM_ warnings $ \w -> do | ||
IO.hPutStrLn IO.stderr w | ||
return (mem, nameAddrList) | ||
|
||
-- | Run discovery on the provided symbols, or all if none are provided | ||
runDiscovery :: | ||
forall arch w. | ||
( MC.ArchAddrWidth arch ~ w | ||
, MC.ArchConstraints arch | ||
, MM.MemWidth w | ||
) => | ||
EE.ElfHeaderInfo w -> | ||
MAI.ArchitectureInfo arch -> | ||
Set.Set BS.ByteString -> | ||
IO (MD.DiscoveryState arch) | ||
runDiscovery headerInfo archInfo symbols = do | ||
(mem, nameAddrList) <- loadELF MML.defaultLoadOptions headerInfo | ||
let addrSymMap = | ||
Map.fromList | ||
[ (MEL.memSymbolStart msym, name) | ||
| msym <- nameAddrList | ||
, let name = MEL.memSymbolName msym | ||
, Set.null symbols || Set.member name symbols | ||
] | ||
pure (MD.cfgFromAddrs archInfo mem addrSymMap (Map.keys addrSymMap) []) | ||
|
||
displayCfgs :: | ||
( MC.ArchConstraints arch | ||
, MS.GenArchInfo MS.LLVMMemory arch | ||
, CCE.IsSyntaxExtension (MS.MacawExt arch) | ||
) => | ||
FilePath -> | ||
MD.DiscoveryState arch -> | ||
MS.GenArchVals mem arch -> | ||
-- | Also print Crucible CFG? | ||
Bool -> | ||
IO () | ||
displayCfgs path discState archVals printCrucible = do | ||
let funInfos = discState Lens.^. MD.funInfo | ||
halloc <- CFH.newHandleAllocator | ||
F.for_ (Map.toList funInfos) $ \(_addr, Some info) -> do | ||
IO.print (PP.pretty info) | ||
Monad.when printCrucible $ do | ||
let pos = posFn (Text.pack path) | ||
let funName = | ||
WF.functionNameFromText $ | ||
Text.decodeUtf8With Text.lenientDecode $ | ||
MD.discoveredFunName info | ||
CCC.SomeCFG ssa <- | ||
MS.mkFunCFG (MS.archFunctions archVals) halloc funName pos info | ||
IO.print (CCC.ppCFG' True (CAP.postdomInfo ssa) ssa) |