From 330e887696be67c6d8e812086bf425ae5579efd6 Mon Sep 17 00:00:00 2001 From: Teo Voinea <58236992+tevoinea@users.noreply.github.com> Date: Thu, 19 Oct 2023 20:13:43 +0000 Subject: [PATCH 1/2] Early days --- src/agent/Cargo.lock | 63 ++++++++++++ src/agent/coverage/Cargo.toml | 1 + src/agent/coverage/src/source.rs | 2 + src/agent/debuggable-module/Cargo.toml | 2 + src/agent/debuggable-module/src/block.rs | 116 ++++++++++++++++++++++- 5 files changed, 183 insertions(+), 1 deletion(-) diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index eb35241201..aecde54ed0 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -296,6 +296,30 @@ dependencies = [ "rustc-demangle", ] +[[package]] +name = "bad64" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdfe7137c8817d4649b0f05e559c759efa8fb376cd25a33c4d069288332f1202" +dependencies = [ + "bad64-sys", + "cstr_core", + "num-derive", + "num-traits", + "static_assertions", +] + +[[package]] +name = "bad64-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95ee29370ca1576779cbf37adc0bebd518ef7956bb457f64fe499817a7774aea" +dependencies = [ + "bindgen", + "cc", + "glob", +] + [[package]] name = "base64" version = "0.13.1" @@ -415,6 +439,7 @@ version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "305fe645edc1442a0fa8b6726ba61d422798d37a52e12eaecf4b022ebbb88f01" dependencies = [ + "jobserver", "libc", ] @@ -558,6 +583,7 @@ name = "coverage" version = "0.1.0" dependencies = [ "anyhow", + "bad64", "cc", "clap", "cobertura", @@ -711,6 +737,16 @@ dependencies = [ "typenum", ] +[[package]] +name = "cstr_core" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd98742e4fdca832d40cab219dc2e3048de17d873248f83f17df47c1bea70956" +dependencies = [ + "cty", + "memchr", +] + [[package]] name = "ctrlc" version = "3.4.0" @@ -721,6 +757,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "cty" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b365fabc795046672053e29c954733ec3b05e4be654ab130fe8f1f94d7051f35" + [[package]] name = "deadpool" version = "0.9.5" @@ -745,6 +787,7 @@ name = "debuggable-module" version = "0.1.0" dependencies = [ "anyhow", + "bad64", "clap", "elsa", "gimli 0.28.0", @@ -1688,6 +1731,15 @@ version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +[[package]] +name = "jobserver" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" +dependencies = [ + "libc", +] + [[package]] name = "joinery" version = "2.1.0" @@ -2073,6 +2125,17 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "num-derive" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "876a53fff98e03a936a674b29568b0e605f06b29372c2489ff4de23f1949743d" +dependencies = [ + "proc-macro2 1.0.66", + "quote 1.0.32", + "syn 1.0.109", +] + [[package]] name = "num-traits" version = "0.2.16" diff --git a/src/agent/coverage/Cargo.toml b/src/agent/coverage/Cargo.toml index e1ced7050f..5548288530 100644 --- a/src/agent/coverage/Cargo.toml +++ b/src/agent/coverage/Cargo.toml @@ -41,3 +41,4 @@ coverage = { path = "../coverage" } cc = "1.0" tempfile = "3.8.0" dunce = "1.0" +bad64 = "0.6.0" diff --git a/src/agent/coverage/src/source.rs b/src/agent/coverage/src/source.rs index e06e8aa285..e3e20255ad 100644 --- a/src/agent/coverage/src/source.rs +++ b/src/agent/coverage/src/source.rs @@ -164,6 +164,8 @@ pub fn binary_to_source_coverage( Ok(source) } +// TODO: Maybe? +// #[cfg(target_arch = "x86_64")] fn instruction_offsets(module: &dyn Module, block: &Block) -> Result> { use iced_x86::Decoder; let data = module.read(block.offset, block.size)?; diff --git a/src/agent/debuggable-module/Cargo.toml b/src/agent/debuggable-module/Cargo.toml index a227432830..0709820614 100644 --- a/src/agent/debuggable-module/Cargo.toml +++ b/src/agent/debuggable-module/Cargo.toml @@ -19,6 +19,8 @@ symbolic = { version = "12.3", features = [ "symcache", ] } thiserror = "1.0" +bad64 = "0.6.0" + [dev-dependencies] clap = { version = "4.4", features = ["derive"] } diff --git a/src/agent/debuggable-module/src/block.rs b/src/agent/debuggable-module/src/block.rs index a794623ba9..f4d228b614 100644 --- a/src/agent/debuggable-module/src/block.rs +++ b/src/agent/debuggable-module/src/block.rs @@ -2,7 +2,6 @@ // Licensed under the MIT License. use anyhow::Result; -use iced_x86::Decoder; use std::collections::{BTreeMap, BTreeSet}; use crate::debuginfo::DebugInfo; @@ -19,6 +18,120 @@ pub fn sweep_module(module: &dyn Module, debuginfo: &DebugInfo) -> Result Result { + use bad64::disasm; + use bad64::Op::*; + let region = offset.region(size); + let mut visited = BTreeSet::new(); + + let data = module.read(offset, size)?; + + let mut pending = Vec::new(); + + // Schedule the function entrypoint. + pending.push(offset.0); + + // Schedule any extra jump labels in the target region. + for label in debuginfo.labels() { + // Don't duplicate function entrypoint. + if label == offset { + continue; + } + + // Don't visit labels outside of the function region. + if !region.contains(&label.0) { + continue; + } + + pending.push(label.0); + + while let Some(entry) = pending.pop() { + if !region.contains(&entry) { + continue; + } + + if visited.contains(&entry) { + continue; + } + + visited.insert(entry); + + // Reset decoder for `entry`. + let position = (entry - offset.0); + let mut decoder = disasm(data, position); + + // Decode instructions (starting from `entry`) until we reach a block + // terminator or run out of valid data. + while let Some(Ok(inst)) = decoder.next() { + match inst.op() { + // Unconditional branch + B | BR => { + // Pretty sure we only need the first operand? + let target = match inst.operands()[0] { + // Idk which one we need yet + e => { + // Using the operand, we should figure out where the branch is going to + println!("Got B | BR operand: {:?}", e); + 7 + } + }; + pending.push(target); + + // We can't fall through to the next instruction, so don't add it to + // the worklist. + break; + } + // Conditional branch + CBNZ | CBZ | B_AL | B_CC | B_CS | B_EQ | B_GE | B_GT | B_HI | B_LE | B_LS + | B_LT | B_MI | B_NE | B_NV | B_PL | B_VC | B_VS => { + // Pretty sure we only need the first operand? + let target = match inst.operands()[0] { + // Idk which one we need yet + e => { + // Using the operand, we should figure out where the branch is going to + println!("Got conditional branch operand: {:?}", e); + 7 + } + }; + pending.push(target); + + // We can fall through, so add to work list. + if let Some(Ok(next_inst)) = decoder.peekable().peek() { + pending.push(next_inst.address()); + } + + // Fall through not guaranteed, so this block is terminated. + break; + } + // TODO: Figure out what to do about BRKA BRKAS BRKB BRKBS BRKN BRKNS BRKPA BRKPAS BRKPB BRKPBS + // equivalent to int3 in x86 + BRK => { + break; + } + // return + RET => { + break; + } + // call + BL | BLR => {} // exception + // interrupt + SVC | HVC | SMC => {} + _ => { + println!("You didn't handle instruction type: {:?}", inst) + } + } + } + } + } + + panic!() +} + pub fn sweep_region( module: &dyn Module, debuginfo: &DebugInfo, @@ -26,6 +139,7 @@ pub fn sweep_region( size: u64, ) -> Result { use iced_x86::Code; + use iced_x86::Decoder; use iced_x86::FlowControl::*; let region = offset.region(size); From 6d331b93622ef1b4e0e63505ab4366370f7f7c7b Mon Sep 17 00:00:00 2001 From: Teo Voinea <58236992+tevoinea@users.noreply.github.com> Date: Mon, 23 Oct 2023 17:28:58 +0000 Subject: [PATCH 2/2] Organize the code a bit --- .../debuggable-module/src/block/arm64.rs | 237 ++++++++++++++++++ src/agent/debuggable-module/src/block/mod.rs | 77 ++++++ .../src/{block.rs => block/x86_64.rs} | 179 +------------ 3 files changed, 316 insertions(+), 177 deletions(-) create mode 100644 src/agent/debuggable-module/src/block/arm64.rs create mode 100644 src/agent/debuggable-module/src/block/mod.rs rename src/agent/debuggable-module/src/{block.rs => block/x86_64.rs} (55%) diff --git a/src/agent/debuggable-module/src/block/arm64.rs b/src/agent/debuggable-module/src/block/arm64.rs new file mode 100644 index 0000000000..da64ae9376 --- /dev/null +++ b/src/agent/debuggable-module/src/block/arm64.rs @@ -0,0 +1,237 @@ +use bad64::disasm; +use bad64::Op::*; + +use anyhow::Result; +use std::collections::{BTreeMap, BTreeSet}; + +use crate::block::Block; +use crate::debuginfo::DebugInfo; +use crate::{Module, Offset}; + +use super::Blocks; + +enum FlowControlOpClassification { + Next, + UnconditionalBranch, + ConditionalBranch, + Return, + Call, + Interrupt, + NotFlowControl, + + // TODO: Don't know what to do about these yet + IndirectBranch, + IndirectCall, + XbeginXabortXend, + Exception, +} + +impl From for FlowControlOpClassification { + fn from(val: bad64::Op) -> Self { + match val { + NOP => FlowControlOpClassification::Next, + B | BR => FlowControlOpClassification::UnconditionalBranch, + CBNZ | CBZ | B_AL | B_CC | B_CS | B_EQ | B_GE | B_GT | B_HI | B_LE | B_LS | B_LT + | B_MI | B_NE | B_NV | B_PL | B_VC | B_VS => { + FlowControlOpClassification::ConditionalBranch + } + // TODO: Figure out what to do about BRKA BRKAS BRKB BRKBS BRKN BRKNS BRKPA BRKPAS BRKPB BRKPBS + // equivalent to int3 in x86 + BRK => FlowControlOpClassification::Interrupt, + RET => FlowControlOpClassification::Return, + BL | BLR => FlowControlOpClassification::Call, + SVC | HVC | SMC => FlowControlOpClassification::Interrupt, + _ => FlowControlOpClassification::NotFlowControl, + } + } +} + +pub fn sweep_region( + module: &dyn Module, + debuginfo: &DebugInfo, + offset: Offset, + size: u64, +) -> Result { + let region = offset.region(size); + let mut visited = BTreeSet::new(); + + let data = module.read(offset, size)?; + + let mut pending = Vec::new(); + + // Schedule the function entrypoint. + pending.push(offset.0); + + // Schedule any extra jump labels in the target region. + for label in debuginfo.labels() { + // Don't duplicate function entrypoint. + if label == offset { + continue; + } + + // Don't visit labels outside of the function region. + if !region.contains(&label.0) { + continue; + } + + pending.push(label.0); + + while let Some(entry) = pending.pop() { + if !region.contains(&entry) { + continue; + } + + if visited.contains(&entry) { + continue; + } + + visited.insert(entry); + + // Reset decoder for `entry`. + let position = (entry - offset.0); + let mut decoder = disasm(data, position); + + // Decode instructions (starting from `entry`) until we reach a block + // terminator or run out of valid data. + while let Some(Ok(inst)) = decoder.next() { + let op_group: FlowControlOpClassification = inst.op().into(); + match op_group { + FlowControlOpClassification::UnconditionalBranch => { + // Pretty sure we only need the first operand? + let target = match inst.operands()[0] { + // Idk which one we need yet + e => { + // Using the operand, we should figure out where the branch is going to + println!("Got B | BR operand: {:?}", e); + 7 + } + }; + pending.push(target); + + // We can't fall through to the next instruction, so don't add it to + // the worklist. + break; + } + FlowControlOpClassification::ConditionalBranch => { + // Pretty sure we only need the first operand? + let target = match inst.operands()[0] { + // Idk which one we need yet + e => { + // Using the operand, we should figure out where the branch is going to + println!("Got conditional branch operand: {:?}", e); + 7 + } + }; + pending.push(target); + + // We can fall through, so add to work list. + if let Some(Ok(next_inst)) = decoder.peekable().peek() { + pending.push(next_inst.address()); + } + + // Fall through not guaranteed, so this block is terminated. + break; + } + FlowControlOpClassification::Interrupt => { + break; + } + FlowControlOpClassification::Return => { + break; + } + FlowControlOpClassification::Call => { + todo!() + } + _ => { + // println!("You didn't handle instruction type: {:?}", inst) + } + } + } + } + } + + let mut blocks = Blocks::default(); + + for &entry in &visited { + // Reset decoder for `entry`. + let position = entry - offset.0; + // let mut decoder = disasm(data, position); + let mut end = 0; + + let mut decoder = disasm(data, position).peekable(); + while let Some(Ok(inst)) = decoder.next() { + end = inst.address(); + let op_group: FlowControlOpClassification = inst.op().into(); + match op_group { + FlowControlOpClassification::NotFlowControl => {} + FlowControlOpClassification::IndirectBranch => { + break; + } + FlowControlOpClassification::UnconditionalBranch => { + break; + } + FlowControlOpClassification::ConditionalBranch => { + break; + } + FlowControlOpClassification::Return => { + break; + } + FlowControlOpClassification::Call => { + todo!(); + // let target = Offset(inst.near_branch_target()); + + // // If call site is `noreturn`, then next instruction is not reachable. + // let noreturn = debuginfo + // .functions() + // .find(|f| f.contains(&target)) + // .map(|f| f.noreturn) + // .unwrap_or(false); + + // if noreturn { + // break; + // } + } + FlowControlOpClassification::Exception => { + todo!(); + // Ensure that the decoder PC points to the first instruction outside + // of the block. + // + // By doing this, we always exclude UD instructions from blocks. + // decoder.set_ip(inst.ip()); + + // Invalid instruction or UD. + // break; + } + FlowControlOpClassification::Interrupt => break, + FlowControlOpClassification::Next => { + // Fall through. + } + FlowControlOpClassification::IndirectCall => { + // We dont' know the callee and can't tell if it is `noreturn`, so fall through. + } + FlowControlOpClassification::XbeginXabortXend => { + // Not yet analyzed, so fall through. + } + } + + // Based only on instruction semantics, we'd continue. But if the + // next offset is a known block entrypoint, we're at a terminator. + if let Some(Ok(next_inst)) = decoder.peek() { + if visited.contains(&next_inst.address()) { + break; + } + } + } + + let size = end.saturating_sub(entry); + + if size > 0 { + let offset = Offset(entry); + let block = Block::new(offset, size); + blocks.map.insert(offset, block); + } else { + warn!("dropping empty block {:x}..{:x}", entry, end); + } + } + + Ok(blocks) +} diff --git a/src/agent/debuggable-module/src/block/mod.rs b/src/agent/debuggable-module/src/block/mod.rs new file mode 100644 index 0000000000..1e315dfd99 --- /dev/null +++ b/src/agent/debuggable-module/src/block/mod.rs @@ -0,0 +1,77 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use anyhow::Result; +use std::collections::BTreeMap; + +use crate::debuginfo::DebugInfo; +use crate::{Module, Offset}; + +#[cfg(target_arch = "aarch64")] +pub use self::arm64::sweep_region; +#[cfg(target_arch = "x86_64")] +pub use self::x86_64::sweep_region; + +pub mod arm64; +pub mod x86_64; + +pub fn sweep_module(module: &dyn Module, debuginfo: &DebugInfo) -> Result { + let mut blocks = Blocks::default(); + + for function in debuginfo.functions() { + let function_blocks = sweep_region(module, debuginfo, function.offset, function.size)?; + blocks.map.extend(&function_blocks.map); + } + + Ok(blocks) +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct Block { + pub offset: Offset, + pub size: u64, +} + +impl Block { + pub fn new(offset: Offset, size: u64) -> Self { + Self { offset, size } + } + + pub fn contains(&self, offset: &Offset) -> bool { + self.offset.region(self.size).contains(&offset.0) + } +} + +#[derive(Clone, Debug, Default)] +pub struct Blocks { + pub map: BTreeMap, +} + +impl Blocks { + pub fn new() -> Self { + Self::default() + } + + pub fn iter(&self) -> impl Iterator { + self.map.values() + } + + pub fn find(&self, offset: &Offset) -> Option<&Block> { + self.map.values().find(|b| b.contains(offset)) + } + + pub fn extend<'b>(&mut self, blocks: impl IntoIterator) { + for &b in blocks.into_iter() { + self.map.insert(b.offset, b); + } + } +} + +impl<'b> IntoIterator for &'b Blocks { + type Item = &'b Block; + type IntoIter = std::collections::btree_map::Values<'b, Offset, Block>; + + fn into_iter(self) -> Self::IntoIter { + self.map.values() + } +} diff --git a/src/agent/debuggable-module/src/block.rs b/src/agent/debuggable-module/src/block/x86_64.rs similarity index 55% rename from src/agent/debuggable-module/src/block.rs rename to src/agent/debuggable-module/src/block/x86_64.rs index f4d228b614..efdd65fd02 100644 --- a/src/agent/debuggable-module/src/block.rs +++ b/src/agent/debuggable-module/src/block/x86_64.rs @@ -1,136 +1,11 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - use anyhow::Result; use std::collections::{BTreeMap, BTreeSet}; +use crate::block::Block; use crate::debuginfo::DebugInfo; use crate::{Module, Offset}; -pub fn sweep_module(module: &dyn Module, debuginfo: &DebugInfo) -> Result { - let mut blocks = Blocks::default(); - - for function in debuginfo.functions() { - let function_blocks = sweep_region(module, debuginfo, function.offset, function.size)?; - blocks.map.extend(&function_blocks.map); - } - - Ok(blocks) -} - -pub fn sweep_region_arm( - module: &dyn Module, - debuginfo: &DebugInfo, - offset: Offset, - size: u64, -) -> Result { - use bad64::disasm; - use bad64::Op::*; - let region = offset.region(size); - let mut visited = BTreeSet::new(); - - let data = module.read(offset, size)?; - - let mut pending = Vec::new(); - - // Schedule the function entrypoint. - pending.push(offset.0); - - // Schedule any extra jump labels in the target region. - for label in debuginfo.labels() { - // Don't duplicate function entrypoint. - if label == offset { - continue; - } - - // Don't visit labels outside of the function region. - if !region.contains(&label.0) { - continue; - } - - pending.push(label.0); - - while let Some(entry) = pending.pop() { - if !region.contains(&entry) { - continue; - } - - if visited.contains(&entry) { - continue; - } - - visited.insert(entry); - - // Reset decoder for `entry`. - let position = (entry - offset.0); - let mut decoder = disasm(data, position); - - // Decode instructions (starting from `entry`) until we reach a block - // terminator or run out of valid data. - while let Some(Ok(inst)) = decoder.next() { - match inst.op() { - // Unconditional branch - B | BR => { - // Pretty sure we only need the first operand? - let target = match inst.operands()[0] { - // Idk which one we need yet - e => { - // Using the operand, we should figure out where the branch is going to - println!("Got B | BR operand: {:?}", e); - 7 - } - }; - pending.push(target); - - // We can't fall through to the next instruction, so don't add it to - // the worklist. - break; - } - // Conditional branch - CBNZ | CBZ | B_AL | B_CC | B_CS | B_EQ | B_GE | B_GT | B_HI | B_LE | B_LS - | B_LT | B_MI | B_NE | B_NV | B_PL | B_VC | B_VS => { - // Pretty sure we only need the first operand? - let target = match inst.operands()[0] { - // Idk which one we need yet - e => { - // Using the operand, we should figure out where the branch is going to - println!("Got conditional branch operand: {:?}", e); - 7 - } - }; - pending.push(target); - - // We can fall through, so add to work list. - if let Some(Ok(next_inst)) = decoder.peekable().peek() { - pending.push(next_inst.address()); - } - - // Fall through not guaranteed, so this block is terminated. - break; - } - // TODO: Figure out what to do about BRKA BRKAS BRKB BRKBS BRKN BRKNS BRKPA BRKPAS BRKPB BRKPBS - // equivalent to int3 in x86 - BRK => { - break; - } - // return - RET => { - break; - } - // call - BL | BLR => {} // exception - // interrupt - SVC | HVC | SMC => {} - _ => { - println!("You didn't handle instruction type: {:?}", inst) - } - } - } - } - } - - panic!() -} +use super::Blocks; pub fn sweep_region( module: &dyn Module, @@ -348,53 +223,3 @@ pub fn sweep_region( Ok(blocks) } - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub struct Block { - pub offset: Offset, - pub size: u64, -} - -impl Block { - pub fn new(offset: Offset, size: u64) -> Self { - Self { offset, size } - } - - pub fn contains(&self, offset: &Offset) -> bool { - self.offset.region(self.size).contains(&offset.0) - } -} - -#[derive(Clone, Debug, Default)] -pub struct Blocks { - pub map: BTreeMap, -} - -impl Blocks { - pub fn new() -> Self { - Self::default() - } - - pub fn iter(&self) -> impl Iterator { - self.map.values() - } - - pub fn find(&self, offset: &Offset) -> Option<&Block> { - self.map.values().find(|b| b.contains(offset)) - } - - pub fn extend<'b>(&mut self, blocks: impl IntoIterator) { - for &b in blocks.into_iter() { - self.map.insert(b.offset, b); - } - } -} - -impl<'b> IntoIterator for &'b Blocks { - type Item = &'b Block; - type IntoIter = std::collections::btree_map::Values<'b, Offset, Block>; - - fn into_iter(self) -> Self::IntoIter { - self.map.values() - } -}