Skip to content

Commit

Permalink
Replaced advance_by_range and retreat_by_range with a more optimized …
Browse files Browse the repository at this point in the history
…impl. Perf hasnt changed much (1%) but code size dropped significantly.
  • Loading branch information
josephg committed Jul 24, 2024
1 parent 41328ce commit 738d3c1
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 100 deletions.
151 changes: 52 additions & 99 deletions src/listmerge/advance_retreat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::listmerge::merge::notify_for;
use crate::LV;
use crate::ost::LeafIdx;
use crate::rev_range::RangeRev;
use crate::stats::marker_a;

#[derive(Debug, Eq, PartialEq)]
pub(super) struct QueryResult {
Expand Down Expand Up @@ -68,16 +69,34 @@ impl M2Tracker {
}
}

pub(crate) fn advance_by_range(&mut self, mut range: DTRange) {
fn adv_retreat_range(&mut self, mut range: DTRange, incr: i32) {
// This method handles both advancing and retreating. In either case, because of the way
// SpanState is designed, we need to either increment or decrement the state of every
// visited item in the LV range.

// Note: When retreating, we still visit all the items in the range in earliest-to-latest
// order. This is a bit of a wild optimisation, because its possible (common even) for
// the range to include an edit which inserts a character followed by an edit which deletes
// the character. The obvious way to process that would be to first undo the delete event,
// then undo the insert.
//
// However, that requires that we visit the range in reverse order, which has worse
// performance and requires advance and retreat to be handled differently. So long as the
// *result* of running retreat() is the same, its safe to not do that, and instead treat the
// span state as an integer and just decrement it twice.
while !range.is_empty() {
// Note the delete could be reversed - but we don't really care here; we just mark the
// whole range anyway.
// let (tag, target, mut len) = self.next_action(range.start);
// if let Some(mut cursor) = self.range_tree.try_find_item(range.start) {
// crate::stats::marker_a();
// self.range_tree.emplace_cursor_unknown(cursor);
// } else {
// crate::stats::marker_b();
// }

let QueryResult {
tag,
target,
offset,
mut leaf_idx,
..
} = self.index_query(range.start);

let len = usize::min(target.len() - offset, range.len());
Expand All @@ -100,109 +119,43 @@ impl M2Tracker {
target_range.len(),
&mut notify_for(&mut self.index),
|e| {
if tag == Ins {
e.current_state.mark_inserted();
} else {
e.delete();
}
e.current_state.0 = e.current_state.0.wrapping_add_signed(incr);
}
).0;

// TODO: Emplace it if we can.
// cursor.flush(&mut self.range_tree);
self.range_tree.emplace_cursor_unknown(cursor);
}

range.truncate_keeping_right(len);
}
}


pub fn retreat_by_range(&mut self, mut range: DTRange) {
// We need to go through the range in reverse order to make sure if we visit an insert then
// delete of the same item, we un-delete before un-inserting.
// TODO: Could probably relax this restriction when I feel more comfortable about overall
// correctness.

while !range.is_empty() {
// TODO: This is gross. Clean this up. There's totally a nicer way to write this.
let last_lv = range.last();

if let Some(mut cursor) = self.range_tree.try_find_item(last_lv) {
// Try just modifying the item directly.
//
// The item will only exist in the range tree at all if it was an insert.
let (e, _offset) = cursor.0.get_item(&self.range_tree);
// let chunk_start = last_lv - offset;
let start = range.start.max(e.id.start);
cursor.0.offset = start - e.id.start;
let max_len = range.end - start;

range.end -= self.range_tree.mutate_entry(
&mut cursor,
max_len,
&mut notify_for(&mut self.index),
|e| {
e.current_state.mark_not_inserted_yet();
}
).0;
self.range_tree.emplace_cursor_unknown(cursor);
} else {
// Figure it out the "slow" way, by looking up the item in the index.
let QueryResult { tag, target, offset, mut leaf_idx } = self.index_query(last_lv);

let chunk_start = last_lv - offset;
let start = range.start.max(chunk_start);
let end = usize::min(range.end, chunk_start + target.len());

let e_offset = start - chunk_start; // Usually 0.

let len = end - start;
debug_assert!(len <= range.len());
range.end -= len;

let mut target_range = target.range(e_offset, e_offset + len);

while !target_range.is_empty() {
// STATS.with(|s| {
// let mut s = s.borrow_mut();
// s.2 += 1;
// });

// Because the tag is either entirely delete or entirely insert, its safe to move
// forwards in this child range. (Which I'm doing because that makes the code much
// easier to reason about).

// We can't reuse the pointer returned by the index_query call because we mutate
// each loop iteration.

let leaf_idx = match replace(&mut leaf_idx, LeafIdx::default()) {
LeafIdx(usize::MAX) => self.marker_at(target_range.start),
x => x,
};
// let mut cursor = self.old_range_tree.mut_cursor_before_item(target_range.start, ptr);
let (mut cursor, _pos) = self.range_tree.mut_cursor_before_item(target_range.start, leaf_idx);

target_range.start += self.range_tree.mutate_entry(
&mut cursor,
target_range.len(),
&mut notify_for(&mut self.index),
|e| {
if tag == Ins {
e.current_state.mark_not_inserted_yet();
} else {
e.current_state.undelete();
}
}
).0;

// TODO: Emplace it if we can.
// cursor.flush(&mut self.range_tree);
self.range_tree.emplace_cursor_unknown(cursor);
}
}
}
}

// self.check_index();
pub(crate) fn advance_by_range(&mut self, range: DTRange) {
self.adv_retreat_range(range, 1);
}
pub(crate) fn retreat_by_range(&mut self, range: DTRange) {
self.adv_retreat_range(range, -1);
}

// // if let Some(mut cursor) = self.range_tree.try_find_item(last_lv) {
// // // Try just modifying the item directly.
// // //
// // // The item will only exist in the range tree at all if it was an insert.
// // let (e, _offset) = cursor.0.get_item(&self.range_tree);
// // // let chunk_start = last_lv - offset;
// // let start = range.start.max(e.id.start);
// // cursor.0.offset = start - e.id.start;
// // let max_len = range.end - start;
// //
// // range.end -= self.range_tree.mutate_entry(
// // &mut cursor,
// // max_len,
// // &mut notify_for(&mut self.index),
// // |e| {
// // e.current_state.mark_not_inserted_yet();
// // }
// // ).0;
// // self.range_tree.emplace_cursor_unknown(cursor);
// // } else {
}
10 changes: 9 additions & 1 deletion src/listmerge/yjsspan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::ost::content_tree::Content;
/// Note a u16 (or even a u8) should be fine in practice. Double deletes almost never happen in
/// reality - unless someone is maliciously generating them.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)]
pub struct SpanState(u32);
pub struct SpanState(pub(crate) u32);

pub const NOT_INSERTED_YET: SpanState = SpanState(0);
pub const INSERTED: SpanState = SpanState(1);
Expand Down Expand Up @@ -79,6 +79,14 @@ impl SpanState {
}
}

pub(crate) fn raw_decrement(&mut self) {
debug_assert!(self.0 >= 1);
self.0 -= 1;
}
pub(crate) fn raw_increment(&mut self) {
self.0 += 1;
}

pub(crate) fn mark_inserted(&mut self) {
if *self != NOT_INSERTED_YET {
panic!("Invalid insert target - item already marked as inserted");
Expand Down

0 comments on commit 738d3c1

Please sign in to comment.