Skip to content

Commit

Permalink
macaw-dump: A CLI for printing internal data structures
Browse files Browse the repository at this point in the history
A simple library and tiny wrappers for different architectures, intended
for debugging purposes. The only current capability is to run code
discovery on a set of symbols in a binary (or all of them if none are
specified), and pretty-print the resulting Macaw or Crucible CFGs.
  • Loading branch information
langston-barrett authored and Your Name committed Oct 9, 2024
1 parent 30b4579 commit 7c3bd2f
Show file tree
Hide file tree
Showing 5 changed files with 286 additions and 0 deletions.
1 change: 1 addition & 0 deletions cabal.project.dist
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
packages: base/
macaw-aarch32/
macaw-aarch32-symbolic/
macaw-dump/
macaw-semmc/
macaw-ppc/
macaw-ppc-symbolic/
Expand Down
15 changes: 15 additions & 0 deletions macaw-aarch32-symbolic/macaw-aarch32-symbolic.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,21 @@ library
default-language: Haskell2010
ghc-options: -Wall -Wcompat

executable macaw-aarch32-dump
hs-source-dirs: tools
main-is: Dump.hs
default-language: Haskell2010
ghc-options: -Wall -Wcompat
build-depends:
base,
bytestring,
containers,
elf-edit,
macaw-aarch32,
macaw-aarch32-symbolic,
macaw-dump,
macaw-symbolic

test-suite macaw-aarch32-symbolic-tests
type: exitcode-stdio-1.0
default-language: Haskell2010
Expand Down
38 changes: 38 additions & 0 deletions macaw-aarch32-symbolic/tools/Dump.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{-# LANGUAGE GADTs #-}
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE TypeApplications #-}

module Main (main) where

import Data.ByteString qualified as BS
import Data.ElfEdit qualified as EE
import Data.Macaw.AArch32.Symbolic ()
import Data.Macaw.ARM qualified as MA
import Data.Macaw.Dump qualified as Dump
import Data.Macaw.Symbolic qualified as MS
import Data.Proxy (Proxy(..))
import Data.Set qualified as Set
import System.Environment qualified as Env

go :: FilePath -> IO ()
go exePath = do
bytes <- BS.readFile exePath
case EE.decodeElfHeaderInfo bytes of
Left (_, msg) -> error ("Error parsing ELF header from file '" ++ show exePath ++ "': " ++ msg)
Right (EE.SomeElf ehi) -> do
case EE.headerClass (EE.header ehi) of
EE.ELFCLASS32 -> do
discState <- Dump.runDiscovery ehi MA.arm_linux_info Set.empty
archVals <-
case MS.archVals (Proxy @MA.ARM) Nothing of
Just archVals -> pure archVals
Nothing -> error "impossible"
Dump.displayCfgs exePath discState archVals False
EE.ELFCLASS64 -> error "Only 32-bit ARM is supported"

main :: IO ()
main = do
args <- Env.getArgs
case args of
[exePath] -> go exePath
_ -> error "Bad arguments"
107 changes: 107 additions & 0 deletions macaw-dump/macaw-dump.cabal
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
Cabal-version: 2.2
Name: macaw-dump
Version: 0.1
Author: Galois Inc.
Maintainer: langston@galois.com
Build-type: Simple
License: BSD-3-Clause
License-file: LICENSE
Category: Language
Synopsis: A tool to display internal Macaw data structures

extra-doc-files: README.md

common shared
-- Specifying -Wall and -Werror can cause the project to fail to build on
-- newer versions of GHC simply due to new warnings being added to -Wall. To
-- prevent this from happening we manually list which warnings should be
-- considered errors. We also list some warnings that are not in -Wall, though
-- try to avoid "opinionated" warnings (though this judgement is clearly
-- subjective).
--
-- Warnings are grouped by the GHC version that introduced them, and then
-- alphabetically.
--
-- A list of warnings and the GHC version in which they were introduced is
-- available here:
-- https://ghc.gitlab.haskell.org/ghc/doc/users_guide/using-warnings.html

-- Since GHC 8.10 or earlier:
ghc-options:
-Wall
-Werror=compat-unqualified-imports
-Werror=deferred-type-errors
-Werror=deprecated-flags
-Werror=deprecations
-Werror=deriving-defaults
-Werror=dodgy-foreign-imports
-Werror=duplicate-exports
-Werror=empty-enumerations
-Werror=identities
-Werror=inaccessible-code
-Werror=incomplete-patterns
-Werror=incomplete-record-updates
-Werror=incomplete-uni-patterns
-Werror=inline-rule-shadowing
-Werror=missed-extra-shared-lib
-Werror=missing-exported-signatures
-Werror=missing-fields
-Werror=missing-home-modules
-Werror=missing-methods
-Werror=overflowed-literals
-Werror=overlapping-patterns
-Werror=partial-fields
-Werror=partial-type-signatures
-Werror=simplifiable-class-constraints
-Werror=star-binder
-Werror=star-is-type
-Werror=tabs
-Werror=typed-holes
-Werror=unrecognised-pragmas
-Werror=unrecognised-warning-flags
-Werror=unsupported-calling-conventions
-Werror=unsupported-llvm-version
-Werror=unticked-promoted-constructors
-Werror=unused-imports
-Werror=warnings-deprecations
-Werror=wrong-do-bind

if impl(ghc >= 9.2)
ghc-options:
-Werror=ambiguous-fields
-Werror=operator-whitespace
-Werror=operator-whitespace-ext-conflict
-Werror=redundant-bang-patterns

if impl(ghc >= 9.4)
ghc-options:
-Werror=forall-identifier
-Werror=misplaced-pragmas
-Werror=redundant-strictness-flags
-Werror=type-equality-out-of-scope
-Werror=type-equality-requires-operators

ghc-prof-options: -O2 -fprof-auto-top
default-language: Haskell2010

library
import: shared

build-depends:
base >= 4.13,
bytestring,
containers,
crucible,
elf-edit,
lens,
macaw-base,
macaw-symbolic,
parameterized-utils,
prettyprinter,
text,
what4

hs-source-dirs: src

exposed-modules:
Data.Macaw.Dump
125 changes: 125 additions & 0 deletions macaw-dump/src/Data/Macaw/Dump.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE ImportQualifiedPost #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeOperators #-}

module Data.Macaw.Dump
( DumpException
, runDiscovery
, displayCfgs
) where

import Control.Exception qualified as X
import Control.Lens qualified as Lens
import Control.Monad qualified as Monad
import Data.ByteString qualified as BS
import Data.ElfEdit qualified as EE
import Data.Foldable qualified as F
import Data.Set qualified as Set
import Data.Macaw.Architecture.Info qualified as MAI
import Data.Macaw.CFG qualified as MC
import Data.Macaw.Discovery qualified as MD
import Data.Macaw.Memory.ElfLoader qualified as MEL
import Data.Macaw.Memory.LoadCommon qualified as MML
import Data.Macaw.Memory qualified as MM
import Data.Macaw.Symbolic qualified as MS
import Data.Map qualified as Map
import Data.Parameterized.Some (Some(Some))
import Data.Text.Encoding.Error qualified as Text
import Data.Text.Encoding qualified as Text
import Data.Text qualified as Text
import Lang.Crucible.Analysis.Postdom qualified as CAP
import Lang.Crucible.CFG.Core qualified as CCC
import Lang.Crucible.CFG.Extension qualified as CCE
import Lang.Crucible.FunctionHandle qualified as CFH
import Prettyprinter qualified as PP
import System.IO qualified as IO
import What4.FunctionName qualified as WF
import What4.ProgramLoc qualified as WPL

data DumpException = ELFResolutionError String
deriving Show

instance X.Exception DumpException


-- | Convert machine addresses into What4 positions.
--
-- When possible, we map to the structured 'WPL.BinaryPos' type. However, some
-- 'MM.MemSegmentOff' cannot be mapped to an absolute position (e.g., some
-- addresses from shared libraries are in non-trivial segments). In those cases,
-- we map to the unstructured 'WPL.Others' with a sufficiently descriptive string.
--
-- TODO: import from `Testing`
posFn :: (MM.MemWidth w) => Text.Text -> MM.MemSegmentOff w -> WPL.Position
posFn binaryName segOff =
case MM.segoffAsAbsoluteAddr segOff of
Just mw -> WPL.BinaryPos binaryName (fromIntegral mw)
Nothing -> WPL.OtherPos (binaryName <> Text.pack ": " <> Text.pack (show segOff))

-- | Load an ELF file into a macaw 'MM.Memory' (and symbols)
--
-- Prints warnings to stderr.
loadELF ::
MML.LoadOptions ->
EE.ElfHeaderInfo w ->
IO (MM.Memory w, [MEL.MemSymbol w])
loadELF loadOpts ehi = do
case MEL.resolveElfContents loadOpts ehi of
Left err -> X.throwIO (ELFResolutionError err)
Right (warnings, mem, _mentry, nameAddrList) -> do
F.forM_ warnings $ \w -> do
IO.hPutStrLn IO.stderr w
return (mem, nameAddrList)

-- | Run discovery on the provided symbols, or all if none are provided
runDiscovery ::
forall arch w.
( MC.ArchAddrWidth arch ~ w
, MC.ArchConstraints arch
, MM.MemWidth w
) =>
EE.ElfHeaderInfo w ->
MAI.ArchitectureInfo arch ->
Set.Set BS.ByteString ->
IO (MD.DiscoveryState arch)
runDiscovery headerInfo archInfo symbols = do
(mem, nameAddrList) <- loadELF MML.defaultLoadOptions headerInfo
let addrSymMap =
Map.fromList
[ (MEL.memSymbolStart msym, name)
| msym <- nameAddrList
, let name = MEL.memSymbolName msym
, Set.null symbols || Set.member name symbols
]
pure (MD.cfgFromAddrs archInfo mem addrSymMap (Map.keys addrSymMap) [])

displayCfgs ::
( MC.ArchConstraints arch
, MS.GenArchInfo MS.LLVMMemory arch
, CCE.IsSyntaxExtension (MS.MacawExt arch)
) =>
FilePath ->
MD.DiscoveryState arch ->
MS.GenArchVals mem arch ->
-- | Also print Crucible CFG?
Bool ->
IO ()
displayCfgs path discState archVals printCrucible = do
let funInfos = discState Lens.^. MD.funInfo
halloc <- CFH.newHandleAllocator
F.for_ (Map.toList funInfos) $ \(_addr, Some info) -> do
IO.print (PP.pretty info)
Monad.when printCrucible $ do
let pos = posFn (Text.pack path)
let funName =
WF.functionNameFromText $
Text.decodeUtf8With Text.lenientDecode $
MD.discoveredFunName info
CCC.SomeCFG ssa <-
MS.mkFunCFG (MS.archFunctions archVals) halloc funName pos info
IO.print (CCC.ppCFG' True (CAP.postdomInfo ssa) ssa)

0 comments on commit 7c3bd2f

Please sign in to comment.