From 9a04ce2d571b6d69137b7e5ddd2faef0ca298499 Mon Sep 17 00:00:00 2001 From: rsheeter Date: Mon, 19 Aug 2024 22:06:11 -0700 Subject: [PATCH] Fix fuzzer bug with cmap4 --- font-test-data/src/cmap.rs | 36 +++++++++++++++++++++++++++++++++++ font-test-data/src/lib.rs | 1 + read-fonts/src/tables/cmap.rs | 27 +++++++++++++++++++++++++- 3 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 font-test-data/src/cmap.rs diff --git a/font-test-data/src/cmap.rs b/font-test-data/src/cmap.rs new file mode 100644 index 000000000..9edefef8d --- /dev/null +++ b/font-test-data/src/cmap.rs @@ -0,0 +1,36 @@ +//! cmap test data for scenarios not readily produced with ttx + +use read_fonts::{be_buffer, be_buffer_add, test_helpers::BeBuffer}; + +/// Contains two codepoint ranges, both [6, 64]. Surely you don't duplicate them? +pub fn repetitive_cmap4() -> BeBuffer { + // + be_buffer! { + 4_u16, // uint16 format + 0_u16, // uint16 length, unused + 0_u16, // uint16 language, unused + 4_u16, // uint16 segCountX2, 2 * 2 segments + 0_u16, // uint16 searchRange, unused + 0_u16, // uint16 entrySelector, unused + 0_u16, // uint16 rangeShift, unused + // segCount endCode entries + 64_u16, // uint16 endCode[0] + 64_u16, // uint16 endCode[1] + + 0_u16, // uint16 reservedPad, unused + + // segCount startCode entries + 6_u16, // uint16 startCode[0] + 6_u16, // uint16 startCode[1] + + // segCount idDelta entries + 0_u16, // uint16 idDelta[0] + 0_u16, // uint16 idDelta[1] + + // segCount idRangeOffset entries + 0_u16, // uint16 idRangeOffset[0] + 0_u16 // uint16 idRangeOffset[1] + + // no glyphIdArray entries + } +} diff --git a/font-test-data/src/lib.rs b/font-test-data/src/lib.rs index d677477fc..407ecff28 100644 --- a/font-test-data/src/lib.rs +++ b/font-test-data/src/lib.rs @@ -1,5 +1,6 @@ //! test data shared between various fontations crates. +pub mod cmap; pub mod gdef; pub mod gpos; pub mod gsub; diff --git a/read-fonts/src/tables/cmap.rs b/read-fonts/src/tables/cmap.rs index 496fbb32b..52929945a 100644 --- a/read-fonts/src/tables/cmap.rs +++ b/read-fonts/src/tables/cmap.rs @@ -164,7 +164,16 @@ impl<'a> Iterator for Cmap4Iter<'a> { return Some((codepoint, glyph_id)); } else { self.cur_range_ix += 1; - self.cur_range = self.subtable.code_range(self.cur_range_ix)?; + let next_range = self.subtable.code_range(self.cur_range_ix)?; + // Groups should be in order and non-overlapping so make sure + // that the start code of next group is at least current_end + 1. + // Also avoid start sliding backwards if we see data where end < start by taking the max + // of next.end and curr.end as the new end. + // This prevents timeout and bizarre results in the face of numerous overlapping ranges + // https://github.com/googlefonts/fontations/issues/1100 + // cmap4 ranges are u16 so no need to stress about values past char::MAX + self.cur_range = next_range.start.max(self.cur_range.end) + ..next_range.end.max(self.cur_range.end); self.cur_start_code = self.cur_range.start as u16; } } @@ -749,4 +758,20 @@ mod tests { _ => None, }) } + + /// + /// + /// Note that this doesn't demonstrate the timeout, merely that we've eliminated the underlying + /// enthusiasm for non-ascending ranges that enabled it + #[test] + fn cmap4_bad_data() { + let buf = font_test_data::cmap::repetitive_cmap4(); + let cmap4 = Cmap4::read(FontData::new(buf.as_slice())).unwrap(); + + // we should have unique, ascending codepoints, not duplicates and overlaps + assert_eq!( + (6..=64).collect::>(), + cmap4.iter().map(|(cp, _)| cp).collect::>() + ); + } }