From 8dcbc41d054a4e5ad843159f58c4e43fa5dbf4b7 Mon Sep 17 00:00:00 2001 From: Sam Thomas Date: Tue, 11 Jun 2024 01:35:48 +0100 Subject: [PATCH] more rework --- fugue-core/Cargo.toml | 1 + fugue-core/src/arch/aarch64.rs | 42 +++++----- fugue-core/src/arch/arm.rs | 33 ++++---- fugue-core/src/arch/x86.rs | 136 ++++++++++++++++----------------- fugue-core/src/icfg/mod.rs | 34 +++++++++ fugue-core/src/lifter.rs | 66 +++++++++++----- fugue-core/src/project/mod.rs | 84 +++++++++++++++++--- 7 files changed, 251 insertions(+), 145 deletions(-) diff --git a/fugue-core/Cargo.toml b/fugue-core/Cargo.toml index 9398be8..c7e9324 100644 --- a/fugue-core/Cargo.toml +++ b/fugue-core/Cargo.toml @@ -20,6 +20,7 @@ ouroboros = "0.18" regex = "1" rkyv = "0.7" rustc-hash = "1.1" +sealed = "0.5" serde = { version = "1", features = ["derive"] } serde_yaml = "0.9" static_init = "1" diff --git a/fugue-core/src/arch/aarch64.rs b/fugue-core/src/arch/aarch64.rs index 6f943eb..7cd5423 100644 --- a/fugue-core/src/arch/aarch64.rs +++ b/fugue-core/src/arch/aarch64.rs @@ -3,30 +3,21 @@ use std::cell::{Cell, RefCell}; use fugue_ir::disassembly::IRBuilderArena; use fugue_ir::Address; -use thiserror::Error; - use yaxpeax_arch::*; use yaxpeax_arm::armv8::a64::Opcode; pub use yaxpeax_arm::armv8::a64::{ - DecodeError as AArch64DecoderError, InstDecoder as AArch64InstDecoder, Instruction as AArch64Instruction, + DecodeError as AArch64DecoderError, InstDecoder as AArch64InstDecoder, + Instruction as AArch64Instruction, }; use crate::ir::PCode; -use crate::lifter::{InsnLifter, LiftedInsn, LiftedInsnProperties, Lifter}; +use crate::lifter::{InsnLifter, LiftedInsn, LiftedInsnProperties, Lifter, LifterError}; pub struct AArch64InsnLifter { decoder: AArch64InstDecoder, } -#[derive(Debug, Error)] -pub enum AArch64LifterError { - #[error(transparent)] - Decoder(#[from] AArch64DecoderError), - #[error(transparent)] - Lifter(#[from] fugue_ir::error::Error), -} - impl AArch64InsnLifter { pub fn new() -> Self { Self::new_with(AArch64InstDecoder::default()) @@ -35,31 +26,32 @@ impl AArch64InsnLifter { pub fn new_with(decoder: AArch64InstDecoder) -> Self { Self { decoder } } + + pub fn boxed<'a>(self) -> Box> { + Box::new(self) + } } fn should_lift(insn: &AArch64Instruction) -> bool { match insn.opcode { - Opcode::B - | Opcode::BL - | Opcode::CBZ - | Opcode::CBNZ - | Opcode::SVC => true, + Opcode::B | Opcode::BL | Opcode::CBZ | Opcode::CBNZ | Opcode::SVC => true, _ => false, } } -impl<'a> InsnLifter<'a, AArch64Instruction> for AArch64InsnLifter { - type Error = AArch64LifterError; - +impl<'a> InsnLifter<'a> for AArch64InsnLifter { fn properties<'b>( &mut self, lifter: &mut Lifter, irb: &'a IRBuilderArena, address: Address, bytes: &'b [u8], - ) -> Result, Self::Error> { + ) -> Result, LifterError> { let mut reader = yaxpeax_arch::U8Reader::new(bytes); - let insn = self.decoder.decode(&mut reader)?; + let insn = self + .decoder + .decode(&mut reader) + .map_err(LifterError::decode)?; let size = insn.len().to_const() as u8; if should_lift(&insn) { @@ -68,7 +60,9 @@ impl<'a> InsnLifter<'a, AArch64Instruction> for AArch64InsnLifter { operations, delay_slots, length, - } = lifter.lift(irb, address, bytes)?; + } = lifter + .lift(irb, address, bytes) + .map_err(LifterError::lift)?; Ok(LiftedInsn { address, @@ -77,7 +71,6 @@ impl<'a> InsnLifter<'a, AArch64Instruction> for AArch64InsnLifter { operations: RefCell::new(Some(operations)), delay_slots, length, - data: insn, }) } else { Ok(LiftedInsn { @@ -87,7 +80,6 @@ impl<'a> InsnLifter<'a, AArch64Instruction> for AArch64InsnLifter { operations: RefCell::new(None), delay_slots: 0, length: size, - data: insn, }) } } diff --git a/fugue-core/src/arch/arm.rs b/fugue-core/src/arch/arm.rs index ebf0c5a..6f2b387 100644 --- a/fugue-core/src/arch/arm.rs +++ b/fugue-core/src/arch/arm.rs @@ -3,8 +3,6 @@ use std::cell::{Cell, RefCell}; use fugue_ir::disassembly::IRBuilderArena; use fugue_ir::Address; -use thiserror::Error; - use yaxpeax_arch::*; use yaxpeax_arm::armv7::{Opcode, Operand, Reg}; @@ -13,20 +11,12 @@ pub use yaxpeax_arm::armv7::{ }; use crate::ir::PCode; -use crate::lifter::{InsnLifter, LiftedInsn, LiftedInsnProperties, Lifter}; +use crate::lifter::{InsnLifter, LiftedInsn, LiftedInsnProperties, Lifter, LifterError}; pub struct ARMInsnLifter { decoder: ARMInstDecoder, } -#[derive(Debug, Error)] -pub enum ARMLifterError { - #[error(transparent)] - Decoder(#[from] ARMDecoderError), - #[error(transparent)] - Lifter(#[from] fugue_ir::error::Error), -} - impl ARMInsnLifter { pub fn new() -> Self { Self::new_with(ARMInstDecoder::armv7()) @@ -35,6 +25,10 @@ impl ARMInsnLifter { pub fn new_with(decoder: ARMInstDecoder) -> Self { Self { decoder } } + + pub fn boxed<'a>(self) -> Box> { + Box::new(self) + } } fn should_lift(insn: &ARMInstruction) -> bool { @@ -53,18 +47,19 @@ fn should_lift(insn: &ARMInstruction) -> bool { } } -impl<'a> InsnLifter<'a, ARMInstruction> for ARMInsnLifter { - type Error = ARMLifterError; - +impl<'a> InsnLifter<'a> for ARMInsnLifter { fn properties<'b>( &mut self, lifter: &mut Lifter, irb: &'a IRBuilderArena, address: Address, bytes: &'b [u8], - ) -> Result, Self::Error> { + ) -> Result, LifterError> { let mut reader = yaxpeax_arch::U8Reader::new(bytes); - let insn = self.decoder.decode(&mut reader)?; + let insn = self + .decoder + .decode(&mut reader) + .map_err(LifterError::decode)?; let size = insn.len().to_const() as u8; if should_lift(&insn) { @@ -73,7 +68,9 @@ impl<'a> InsnLifter<'a, ARMInstruction> for ARMInsnLifter { operations, delay_slots, length, - } = lifter.lift(irb, address, bytes)?; + } = lifter + .lift(irb, address, bytes) + .map_err(LifterError::lift)?; Ok(LiftedInsn { address, @@ -82,7 +79,6 @@ impl<'a> InsnLifter<'a, ARMInstruction> for ARMInsnLifter { operations: RefCell::new(Some(operations)), delay_slots, length, - data: insn, }) } else { Ok(LiftedInsn { @@ -92,7 +88,6 @@ impl<'a> InsnLifter<'a, ARMInstruction> for ARMInsnLifter { operations: RefCell::new(None), delay_slots: 0, length: size, - data: insn, }) } } diff --git a/fugue-core/src/arch/x86.rs b/fugue-core/src/arch/x86.rs index f07502f..972ae97 100644 --- a/fugue-core/src/arch/x86.rs +++ b/fugue-core/src/arch/x86.rs @@ -3,8 +3,6 @@ use std::cell::{Cell, RefCell}; use fugue_ir::disassembly::IRBuilderArena; use fugue_ir::Address; -use thiserror::Error; - use yaxpeax_arch::*; pub use yaxpeax_x86::{x86_32, x86_64}; @@ -18,72 +16,75 @@ pub use yaxpeax_x86::protected_mode::{ }; use crate::ir::PCode; -use crate::lifter::{InsnLifter, LiftedInsn, LiftedInsnProperties, Lifter}; +use crate::lifter::{InsnLifter, LiftedInsn, LiftedInsnProperties, Lifter, LifterError}; +#[sealed::sealed] pub trait X86Arch: Arch { fn should_lift(insn: &Self::Instruction) -> bool; } +#[sealed::sealed] impl X86Arch for x86_32 { fn should_lift(insn: &Self::Instruction) -> bool { use yaxpeax_x86::protected_mode::Opcode; return match insn.opcode() { - Opcode::JO | - Opcode::JB | - Opcode::JZ | - Opcode::JA | - Opcode::JS | - Opcode::JP | - Opcode::JL | - Opcode::JG | - Opcode::JMP | - Opcode::JNO | - Opcode::JNB | - Opcode::JNZ | - Opcode::JNA | - Opcode::JNS | - Opcode::JNP | - Opcode::JGE | - Opcode::JLE | - Opcode::JMPF | - Opcode::JMPE | - Opcode::JECXZ => true, + Opcode::JO + | Opcode::JB + | Opcode::JZ + | Opcode::JA + | Opcode::JS + | Opcode::JP + | Opcode::JL + | Opcode::JG + | Opcode::JMP + | Opcode::JNO + | Opcode::JNB + | Opcode::JNZ + | Opcode::JNA + | Opcode::JNS + | Opcode::JNP + | Opcode::JGE + | Opcode::JLE + | Opcode::JMPF + | Opcode::JMPE + | Opcode::JECXZ => true, Opcode::CALL | Opcode::CALLF => true, Opcode::RETF | Opcode::RETURN => true, _ => false, - } + }; } } +#[sealed::sealed] impl X86Arch for x86_64 { fn should_lift(insn: &Self::Instruction) -> bool { use yaxpeax_x86::amd64::Opcode; return match insn.opcode() { - Opcode::JO | - Opcode::JB | - Opcode::JZ | - Opcode::JA | - Opcode::JS | - Opcode::JP | - Opcode::JL | - Opcode::JG | - Opcode::JMP | - Opcode::JNO | - Opcode::JNB | - Opcode::JNZ | - Opcode::JNA | - Opcode::JNS | - Opcode::JNP | - Opcode::JGE | - Opcode::JLE | - Opcode::JMPF | - Opcode::JMPE => true, + Opcode::JO + | Opcode::JB + | Opcode::JZ + | Opcode::JA + | Opcode::JS + | Opcode::JP + | Opcode::JL + | Opcode::JG + | Opcode::JMP + | Opcode::JNO + | Opcode::JNB + | Opcode::JNZ + | Opcode::JNA + | Opcode::JNS + | Opcode::JNP + | Opcode::JGE + | Opcode::JLE + | Opcode::JMPF + | Opcode::JMPE => true, Opcode::CALL | Opcode::CALLF => true, Opcode::RETF | Opcode::RETURN => true, _ => false, - } + }; } } @@ -94,23 +95,6 @@ where decoder: D::Decoder, } -#[derive(Debug, Error)] -pub enum X86LifterError { - #[error(transparent)] - Decoder(anyhow::Error), - #[error(transparent)] - Lifter(#[from] fugue_ir::error::Error), -} - -impl X86LifterError { - pub fn decoder(e: E) -> Self - where - E: std::error::Error + Send + Sync + 'static, - { - Self::Decoder(e.into()) - } -} - impl X86InsnLifter where D: X86Arch, @@ -132,36 +116,46 @@ where } } -impl<'a, D> InsnLifter<'a, D::Instruction> for X86InsnLifter +impl X86InsnLifter where - D: X86Arch, - D::Instruction: 'a, + D: X86Arch + 'static, for<'b> U8Reader<'b>: Reader, ::Diff: TryInto, <::Diff as TryInto>::Error: std::error::Error + Send + Sync + 'static, { - type Error = X86LifterError; + pub fn boxed<'a>(self) -> Box> { + Box::new(self) + } +} +impl<'a, D> InsnLifter<'a> for X86InsnLifter +where + D: X86Arch, + for<'b> U8Reader<'b>: Reader, + ::Diff: TryInto, + <::Diff as TryInto>::Error: + std::error::Error + Send + Sync + 'static, +{ fn properties<'b>( &mut self, lifter: &mut Lifter, irb: &'a IRBuilderArena, address: Address, bytes: &'b [u8], - ) -> Result, Self::Error> { + ) -> Result, LifterError> { let mut reader = yaxpeax_arch::U8Reader::new(bytes); let insn = self .decoder .decode(&mut reader) - .map_err(X86LifterError::decoder)?; + .map_err(LifterError::decode)?; let size = insn .len() .to_const() .try_into() - .map_err(X86LifterError::decoder)?; + .map_err(LifterError::decode)?; if D::should_lift(&insn) { let PCode { @@ -169,7 +163,9 @@ where operations, delay_slots, length, - } = lifter.lift(irb, address, bytes)?; + } = lifter + .lift(irb, address, bytes) + .map_err(LifterError::lift)?; Ok(LiftedInsn { address, @@ -178,7 +174,6 @@ where operations: RefCell::new(Some(operations)), delay_slots, length, - data: insn, }) } else { Ok(LiftedInsn { @@ -188,7 +183,6 @@ where operations: RefCell::new(None), delay_slots: 0, length: size, - data: insn, }) } } diff --git a/fugue-core/src/icfg/mod.rs b/fugue-core/src/icfg/mod.rs index 2cc5f1c..d72dc44 100644 --- a/fugue-core/src/icfg/mod.rs +++ b/fugue-core/src/icfg/mod.rs @@ -1,3 +1,37 @@ +use std::collections::VecDeque; + +use fugue_ir::Address; + +use crate::project::{Project, ProjectRawView}; + +pub struct ICFGBuilder<'a, R> +where + R: ProjectRawView, +{ + project: &'a mut Project, + candidates: VecDeque
, +} + +impl<'a, R> ICFGBuilder<'a, R> +where + R: ProjectRawView, +{ + pub fn new(project: &'a mut Project) -> Self { + Self { + project, + candidates: VecDeque::new(), + } + } + + pub fn add_candidate(&mut self, candidate: impl Into
) { + self.candidates.push_back(candidate.into()); + } + + pub fn add_candidates(&mut self, candidates: impl IntoIterator) { + self.candidates.extend(candidates); + } +} + #[cfg(test)] mod test { use std::cell::{Cell, RefCell}; diff --git a/fugue-core/src/lifter.rs b/fugue-core/src/lifter.rs index 1ff020d..435ad5e 100644 --- a/fugue-core/src/lifter.rs +++ b/fugue-core/src/lifter.rs @@ -8,8 +8,48 @@ use fugue_ir::il::instruction::Instruction; use fugue_ir::translator::TranslationContext; use fugue_ir::{Address, Translator}; +use thiserror::Error; + use crate::ir::{Insn, PCode}; +#[derive(Debug, Error)] +pub enum LifterError { + #[error(transparent)] + Decode(anyhow::Error), + #[error(transparent)] + Lift(anyhow::Error), +} + +impl LifterError { + pub fn decode(e: E) -> Self + where + E: std::error::Error + Send + Sync + 'static, + { + Self::Decode(e.into()) + } + + pub fn decode_with(m: M) -> Self + where + M: std::fmt::Debug + std::fmt::Display + Send + Sync + 'static, + { + Self::Decode(anyhow::Error::msg(m)) + } + + pub fn lift(e: E) -> Self + where + E: std::error::Error + Send + Sync + 'static, + { + Self::Lift(e.into()) + } + + pub fn lift_with(m: M) -> Self + where + M: std::fmt::Debug + std::fmt::Display + Send + Sync + 'static, + { + Self::Lift(anyhow::Error::msg(m)) + } +} + #[derive(Clone)] #[repr(transparent)] pub struct Lifter<'a>(TranslationContext<'a>); @@ -137,17 +177,16 @@ bitflags::bitflags! { } } -pub struct LiftedInsn<'a, 'b, T: 'a = ()> { +pub struct LiftedInsn<'a, 'b> { pub address: Address, pub bytes: &'b [u8], pub properties: Cell, pub operations: RefCell>>>, pub delay_slots: u8, pub length: u8, - pub data: T, } -impl<'a, 'b, T: 'a> LiftedInsn<'a, 'b, T> { +impl<'a, 'b> LiftedInsn<'a, 'b> { pub fn address(&self) -> Address { self.address } @@ -160,14 +199,6 @@ impl<'a, 'b, T: 'a> LiftedInsn<'a, 'b, T> { &self.bytes[..self.len()] } - pub fn data(&self) -> &T { - &self.data - } - - pub fn data_mut(&mut self) -> &mut T { - &mut self.data - } - pub fn len(&self) -> usize { self.length as _ } @@ -209,16 +240,14 @@ impl<'a, 'b, T: 'a> LiftedInsn<'a, 'b, T> { } } -pub trait InsnLifter<'a, T: 'a = ()> { - type Error; - +pub trait InsnLifter<'a> { fn properties<'b>( &mut self, lifter: &mut Lifter, irb: &'a IRBuilderArena, address: Address, bytes: &'b [u8], - ) -> Result, Self::Error>; + ) -> Result, LifterError>; } #[derive(Debug, Clone, Copy, Default)] @@ -231,21 +260,19 @@ impl DefaultInsnLifter { } impl<'a> InsnLifter<'a> for DefaultInsnLifter { - type Error = Error; - fn properties<'b>( &mut self, lifter: &mut Lifter, irb: &'a IRBuilderArena, address: Address, bytes: &'b [u8], - ) -> Result, Self::Error> { + ) -> Result, LifterError> { let PCode { address, operations, delay_slots, length, - } = lifter.lift(irb, address, bytes)?; + } = lifter.lift(irb, address, bytes).map_err(LifterError::lift)?; Ok(LiftedInsn { address, @@ -254,7 +281,6 @@ impl<'a> InsnLifter<'a> for DefaultInsnLifter { properties: Cell::new(LiftedInsnProperties::default()), delay_slots, length, - data: (), }) } } diff --git a/fugue-core/src/project/mod.rs b/fugue-core/src/project/mod.rs index bcbb156..8532353 100644 --- a/fugue-core/src/project/mod.rs +++ b/fugue-core/src/project/mod.rs @@ -1,6 +1,7 @@ use std::borrow::Cow; use std::cmp::Ordering; use std::fmt::{Debug, Display}; +use std::marker::PhantomData; use std::ops::Range; use anyhow::anyhow; @@ -10,6 +11,8 @@ use heed::types::U64; use iset::IntervalMap; use thiserror::Error; +use crate::language::{Language, LanguageBuilder}; +use crate::lifter::Lifter; use crate::loader::Loadable; use crate::util::table::{MmapTable, MmapTableReader}; @@ -47,7 +50,11 @@ pub struct LoadedSegment<'a> { } impl<'a> LoadedSegment<'a> { - pub fn new(name: impl Into>, addr: impl Into
, data: impl Into>) -> Self { + pub fn new( + name: impl Into>, + addr: impl Into
, + data: impl Into>, + ) -> Self { let data = data.into(); let size = data.len(); @@ -59,7 +66,11 @@ impl<'a> LoadedSegment<'a> { } } - pub fn new_uninit(name: impl Into>, addr: impl Into
, size: usize) -> Self { + pub fn new_uninit( + name: impl Into>, + addr: impl Into
, + size: usize, + ) -> Self { Self { name: name.into(), addr: addr.into(), @@ -90,13 +101,14 @@ impl<'a> LoadedSegment<'a> { } pub fn range(&self) -> Range
{ - self.addr..self.addr+self.size + self.addr..self.addr + self.size } } impl<'a> ProjectRawViewReader<'a> for ProjectRawViewMmapedReader<'a> { fn view_bytes(&self, address: impl Into
) -> Result<&[u8], ProjectRawViewError> { let address = address.into(); + // find the interval that contains this address let Ok(index) = self.segments.binary_search_by(|segm| { let iv = segm.range(); @@ -163,7 +175,7 @@ impl ProjectRawView for ProjectRawViewMmaped { for i in 1..segments.len() { let ri = segments[i].range(); - let rj = segments[i-1].range(); + let rj = segments[i - 1].range(); if ri.start < rj.end { return Err(ProjectRawViewError::OverlappingRanges); @@ -227,7 +239,10 @@ impl ProjectRawView for ProjectRawViewInMemory { return Err(ProjectRawViewError::OverlappingRanges); } - mapping.insert(addr..last_addr, LoadedSegment::new("LOAD", addr, data.into_owned())); + mapping.insert( + addr..last_addr, + LoadedSegment::new("LOAD", addr, data.into_owned()), + ); Ok(mapping) })?; @@ -286,13 +301,25 @@ pub struct Project where R: ProjectRawView, { + entry: Option
, + language: Language, mapping: R, } +pub struct ProjectBuilder +where + R: ProjectRawView, +{ + language_builder: LanguageBuilder, + _mapping: PhantomData, +} + #[derive(Debug, Error)] pub enum ProjectError { #[error("cannot load project: {0}")] Load(anyhow::Error), + #[error("cannot initialise language: {0}")] + Language(anyhow::Error), } impl ProjectError { @@ -302,20 +329,51 @@ impl ProjectError { { Self::Load(e.into()) } + + pub fn language(e: E) -> Self + where + E: std::error::Error + Send + Sync + 'static, + { + Self::Language(e.into()) + } } -impl Project +impl ProjectBuilder where R: ProjectRawView, { - pub fn new<'a, L>(loadable: &L) -> Result + pub fn new(language_builder: LanguageBuilder) -> Self { + Self { + language_builder, + _mapping: PhantomData, + } + } + + pub fn build<'a, L>(&self, loadable: &L) -> Result, ProjectError> where L: Loadable<'a>, { - Ok(Self { + Ok(Project { + entry: loadable.entry(), + language: loadable + .language(&self.language_builder) + .map_err(ProjectError::language)?, mapping: R::new(loadable).map_err(ProjectError::load)?, }) } +} + +impl Project +where + R: ProjectRawView, +{ + pub fn language(&self) -> &Language { + &self.language + } + + pub fn lifter(&self) -> Lifter { + self.language.lifter() + } pub fn raw(&self) -> &R { &self.mapping @@ -336,9 +394,15 @@ mod test { let input = BytesOrMapping::from_file("tests/ls.elf")?; let object = Object::new(input)?; + let language_builder = LanguageBuilder::new("data")?; + + let project_builder1 = + ProjectBuilder::::new(language_builder.clone()); + let project_builder2 = ProjectBuilder::::new(language_builder); + // Create the project from the mapping object - let project1 = Project::::new(&object)?; - let project2 = Project::::new(&object)?; + let project1 = project_builder1.build(&object)?; + let project2 = project_builder2.build(&object)?; // Let's test a read from a known address... let reader1 = project1.raw().reader()?;