From edecc6f6ff34f9f5ac0272c14fd7396b302593ec Mon Sep 17 00:00:00 2001 From: Vladimir Vukicevic Date: Wed, 22 May 2024 10:09:01 -0700 Subject: [PATCH] Synthesize sections from section map (fixes #153) --- src/dbi.rs | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/pdb.rs | 67 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 158 insertions(+), 2 deletions(-) diff --git a/src/dbi.rs b/src/dbi.rs index ce21c70..4edff9a 100644 --- a/src/dbi.rs +++ b/src/dbi.rs @@ -107,6 +107,19 @@ impl<'s> DebugInformation<'s> { let contributions_buf = buf.take(self.header.section_contribution_size as usize)?; DBISectionContributionIter::parse(contributions_buf.into()) } + + /// Returns an iterator that can traverse the section map in sequential order. Also known as the "OMF Segment map". + pub fn section_map(&self) -> Result> { + let mut buf = self.stream.parse_buffer(); + // drop the header, modules list, and section contributions list + let offset = self.header_len + + self.header.module_list_size as usize + + self.header.section_contribution_size as usize; + + buf.take(offset)?; + let section_map_buf = buf.take(self.header.section_map_size as usize)?; + DBISectionMapIter::parse(section_map_buf.into()) + } } /// The version of the PDB format. @@ -584,6 +597,86 @@ impl<'c> FallibleIterator for DBISectionContributionIter<'c> { } } +/// See https://github.com/google/syzygy/blob/8164b24ebde9c5649c9a09e88a7fc0b0fcbd1bc5/syzygy/pdb/pdb_data.h#L172 +/// Also see https://www.virtualbox.org/browser/vbox/trunk/include/iprt/formats/codeview.h?rev=93115#L272 +/// This is also known as OMF Segment Map. In the OMF SegmentMap structure, flags and section_type +/// are a single 16-bit value. +#[derive(Debug, Copy, Clone)] +pub struct DBISectionMapItem { + /// flags: 0x1 read, 0x2 write, 0x4 execute, 0x8 32-bit + pub flags: u8, + /// section_type: 0x1 = SEL, 0x2 = ABS, 0x10 = GROUP + pub section_type: u8, + /// Overlay number + pub overlay: u16, + /// group index, 0 if not relevant + pub group: u16, + /// Technically "frame" in OMF SegmentMap, which is complicated + pub section_number: u16, + /// Index into name table, or 0xffff + pub seg_name_index: u16, + /// Index into name table, or 0xffff + pub class_name_index: u16, + /// RVA offset of this section + pub rva_offset: u32, + /// Length of this section + pub section_length: u32, +} + +impl DBISectionMapItem { + fn parse(buf: &mut ParseBuffer<'_>) -> Result { + Ok(Self { + flags: buf.parse_u8()?, + section_type: buf.parse_u8()?, + overlay: buf.parse_u16()?, + group: buf.parse_u16()?, + section_number: buf.parse_u16()?, + seg_name_index: buf.parse_u16()?, + class_name_index: buf.parse_u16()?, + rva_offset: buf.parse_u32()?, + section_length: buf.parse_u32()?, + }) + } +} + +/// A `DBISectionMapIter` iterates over the section map in the DBI section, producing `DBISectionMap`s. +#[derive(Debug)] +pub struct DBISectionMapIter<'c> { + /// The section count. + pub sec_count: u16, + /// The logical section count. Typically equals sec_count, if no groups are in use. (?) + pub sec_count_log: u16, + buf: ParseBuffer<'c>, +} + +impl<'c> DBISectionMapIter<'c> { + fn parse(mut buf: ParseBuffer<'c>) -> Result { + let sec_count = buf.parse_u16()?; + let sec_count_log = buf.parse_u16()?; + + Ok(Self { + buf, + sec_count, + sec_count_log, + }) + } +} + +impl<'c> FallibleIterator for DBISectionMapIter<'c> { + type Item = DBISectionMapItem; + type Error = Error; + + fn next(&mut self) -> result::Result, Self::Error> { + // see if we're at EOF + if self.buf.is_empty() { + return Ok(None); + } + + let segmap = Self::Item::parse(&mut self.buf)?; + Ok(Some(segmap)) + } +} + /// A `DbgDataHdr`, which contains a series of (optional) MSF stream numbers. #[derive(Debug, Copy, Clone)] #[allow(dead_code)] // reason = "unused fields added for completeness" diff --git a/src/pdb.rs b/src/pdb.rs index b92f671..891ce58 100644 --- a/src/pdb.rs +++ b/src/pdb.rs @@ -5,7 +5,8 @@ // http://opensource.org/licenses/MIT>, at your option. This file may not be // copied, modified, or distributed except according to those terms. -use crate::common::*; +use fallible_iterator::FallibleIterator; + use crate::dbi::{DBIExtraStreams, DBIHeader, DebugInformation, Module}; use crate::framedata::FrameTable; use crate::modi::ModuleInfo; @@ -17,6 +18,7 @@ use crate::source::Source; use crate::strings::StringTable; use crate::symbol::SymbolTable; use crate::tpi::{IdInformation, TypeInformation}; +use crate::{common::*, SectionCharacteristics}; // Some streams have a fixed stream index. // http://llvm.org/docs/PDB/index.html @@ -242,7 +244,7 @@ impl<'s, S: Source<'s> + 's> PDB<'s, S> { let index = self.extra_streams()?.section_headers; let stream = match self.raw_stream(index)? { Some(stream) => stream, - None => return Ok(None), + None => return self.maybe_synthesize_section(), }; let mut buf = stream.parse_buffer(); @@ -254,6 +256,67 @@ impl<'s, S: Source<'s> + 's> PDB<'s, S> { Ok(Some(headers)) } + // If there are no section_headers in the file, attempt to synthesize sections + // based on the section map. This seems to be necessary to handle NGEN-generated PDB + // files (.ni.pdb from Crossgen2). + fn maybe_synthesize_section(&mut self) -> Result>> { + // If we have OMAP From data, I don't believe we can do this, because the RVAs + // won't map. But I'm not 100% sure of that, be conservative. + if self.omap_from_src()?.is_some() { + return Ok(None); + } + + let debug_info = self.debug_information()?; + let sec_map = debug_info.section_map()?; + if sec_map.sec_count != sec_map.sec_count_log { + return Ok(None); + } + let sec_map = sec_map.collect::>()?; + + let mut rva = 0x1000u32; // in the absence of explicit section data, this starts at 0x1000 + let sections = sec_map.into_iter() + .filter(|sm| { + // the section with a bogus section length also doesn't have any rwx flags, + // and has section_type == 2 + sm.section_type == 1 && // "SEL" section, not ABS (0x2) or GROUP (0x10) + sm.section_length != u32::MAX // shouldn't happen, but just in case + }) + .map(|sm| { + let mut characteristics = 0u32; + if sm.flags & 0x1 != 0 { // R + characteristics |= 0x40000000; // IMAGE_SCN_MEM_READ + } + if sm.flags & 0x2 != 0 { // W + characteristics |= 0x80000000; // IMAGE_SCN_MEM_WRITE + } + if sm.flags & 0x4 != 0 { // X + characteristics |= 0x20000000; // IMAGE_SCN_MEM_EXECUTE + characteristics |= 0x20; // IMAGE_SCN_CNT_CODE + } + + if sm.rva_offset != 0 { + eprintln!("pdb: synthesizing section with rva_offset != 0, might not be correct! {:?}", sm); + } + + let this_rva = rva + sm.rva_offset; + rva = this_rva + sm.section_length; + ImageSectionHeader { + name: [0; 8], + virtual_size: sm.section_length, + virtual_address: this_rva, + size_of_raw_data: sm.section_length, + pointer_to_raw_data: 0, + pointer_to_relocations: 0, + pointer_to_line_numbers: 0, + number_of_relocations: 0, + number_of_line_numbers: 0, + characteristics: SectionCharacteristics(characteristics), + } + }).collect::>(); + + Ok(Some(sections)) + } + /// Retrieve the global frame data table. /// /// This table describes the stack frame layout for functions from all modules in the PDB. Not