Skip to content

Commit

Permalink
Tweaks to index_tree. Small cleanups. Added pool counts
Browse files Browse the repository at this point in the history
  • Loading branch information
josephg committed Mar 27, 2024
1 parent fe448b2 commit 280e9e6
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 209 deletions.
2 changes: 1 addition & 1 deletion crates/rle/src/splitable_span.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ impl<T: Clone + Eq> MergableSpan for RleRun<T> {

/// Distinct RLE run. Each distinct run expresses some value between each (start, end) pair.
#[derive(Copy, Clone, Hash, Debug, PartialEq, Eq, Default)]
pub struct RleDRun<T: Clone> {
pub struct RleDRun<T> {
pub start: usize,
pub end: usize,
pub val: T,
Expand Down
4 changes: 2 additions & 2 deletions examples/profile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ fn profile_merge(name: &str, n: usize) {
// RUSTFLAGS="-Cforce-frame-pointers=yes" cargo build --profile profiling --example profile
fn main() {
// profile_merge("clownschool", 500);
// profile_merge("git-makefile", 200);
profile_merge("git-makefile", 1);
profile_merge("git-makefile", 200);
// profile_merge("git-makefile", 1);
// profile_merge("node_nodecc", 1);
// profile_merge("clownschool", 1);
}
174 changes: 0 additions & 174 deletions src/listmerge/markers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,180 +12,6 @@ use crate::list::operation::ListOpKind;
use crate::{DTRange, LV};
use crate::ost::IndexContent;

// TODO: Consider refactoring this to be a single enum. Put len in InsPtr and use .len(). But this
// might make the code way slower.

// #[derive(Copy, Clone, Eq, PartialEq, Debug)]
// pub enum MarkerOld {
// /// For inserts, we store a pointer to the leaf node containing the inserted item. This is only
// /// used for inserts so we don't need to modify multiple entries when the inserted item is
// /// moved.
// InsPtr(NonNull<NodeLeaf<CRDTSpan, DocRangeIndex>>),
//
// /// For deletes we name the delete's target. Note this contains redundant information - since
// /// we already have a length field.
// DelTarget(RangeRev),
// }
//
// /// So this struct is a little weird. Its designed this way so I can reuse content-tree for two
// /// different structures:
// ///
// /// - When we enable and disable inserts, we need a marker (index) into the b-tree node in the range
// /// tree containing that entry. This lets us find things in O(log n) time, which improves
// /// performance for large merges. (Though at a cost of extra bookkeeping overhead for small
// /// merges).
// /// - For deletes, we need to know the delete's target. Ie, which corresponding insert inserted the
// /// item which was deleted by this edit.
// ///
// /// The cleanest implementation of this would store a TimeSpan for the ID of this edit instead of
// /// just storing a length field. And we'd use a variant of the content-tree which uses absolutely
// /// positioned items like a normal b-tree with RLE. But I don't have an implementation of that. So
// /// instead we end up with this slightly weird structure.
// #[derive(Copy, Clone, Eq, PartialEq, Debug)]
// pub struct MarkerEntry {
// pub len: usize,
// pub inner: MarkerOld,
// }
//
// impl MarkerOld {
// pub(super) fn tag(&self) -> ListOpKind {
// match self {
// MarkerOld::InsPtr(_) => ListOpKind::Ins,
// MarkerOld::DelTarget(_) => ListOpKind::Del
// }
// }
// }

// impl HasLength for MarkerEntry {
// fn len(&self) -> usize {
// self.len
// }
// }
//
// impl SplitableSpanHelpers for MarkerOld {
// fn truncate_h(&mut self, at: usize) -> Self {
// match self {
// InsPtr(_) => *self,
// MarkerOld::DelTarget(target) => DelTarget(target.truncate(at)),
// }
// }
// }
//
// impl SplitableSpanHelpers for MarkerEntry {
// fn truncate_h(&mut self, at: usize) -> Self {
// let remainder_len = self.len - at;
// self.len = at;
// MarkerEntry {
// len: remainder_len,
// inner: self.inner.truncate(at),
// }
// }
//
// fn truncate_keeping_right_h(&mut self, at: usize) -> Self {
// let left = Self {
// len: at,
// inner: self.inner.truncate_keeping_right(at)
// };
// self.len -= at;
// left
// }
// }
//
// impl MergableSpan for MarkerOld {
// fn can_append(&self, other: &Self) -> bool {
// match (self, other) {
// (InsPtr(ptr1), InsPtr(ptr2)) => {
// ptr1 == ptr2
// }
// (DelTarget(t1), DelTarget(t2)) => t1.can_append(t2),
// _ => false,
// }
// }
//
// fn append(&mut self, other: Self) {
// match (self, other) {
// (InsPtr(_), InsPtr(_)) => {},
// (DelTarget(t1), DelTarget(t2)) => t1.append(t2),
// _ => {
// panic!("Internal consistency error: Invalid append");
// },
// }
// }
//
// fn prepend(&mut self, other: Self) {
// match (self, other) {
// (InsPtr(_), InsPtr(_)) => {},
// (DelTarget(t1), DelTarget(t2)) => t1.prepend(t2),
// _ => {
// panic!("Internal consistency error: Invalid prepend");
// },
// }
// }
// }
//
// impl MergableSpan for MarkerEntry {
// fn can_append(&self, other: &Self) -> bool {
// self.inner.can_append(&other.inner)
// }
//
// fn append(&mut self, other: Self) {
// self.len += other.len;
// self.inner.append(other.inner);
// }
//
// fn prepend(&mut self, other: Self) {
// self.len += other.len;
// self.inner.prepend(other.inner);
// }
// }
//
// // impl<E: EntryTraits, I: TreeIndex<E>, const IE: usize, const LE: usize> IndexGet<usize> for MarkerEntry<YjsSpan2, DocRangeIndex, IE, LE> {
// // type Output = NonNull<NodeLeaf<YjsSpan2, DocRangeIndex, IE, LE>>;
// //
// // fn index_get(&self, _index: usize) -> Self::Output {
// // self.ptr
// // }
// // }
//
//
//
// impl Default for MarkerEntry {
// fn default() -> Self {
// MarkerEntry {
// len: 0,
// inner: InsPtr(std::ptr::NonNull::dangling()),
// }
// }
// }
//
//
// // impl MarkerEntry {
// // pub fn unwrap_ptr(&self) -> NonNull<NodeLeaf<YjsSpan2, DocRangeIndex, DEFAULT_IE, DEFAULT_LE>> {
// // if let InsPtr(ptr) = self.inner {
// // ptr
// // } else {
// // panic!("Internal consistency error: Cannot unwrap delete");
// // }
// // }
// // }
//
// impl Searchable for MarkerEntry {
// type Item = Option<NonNull<NodeLeaf<CRDTSpan, DocRangeIndex>>>;
//
// fn get_offset(&self, _loc: Self::Item) -> Option<usize> {
// panic!("Should never be used")
// }
//
// fn at_offset(&self, _offset: usize) -> Self::Item {
// if let InsPtr(ptr) = self.inner {
// Some(ptr)
// } else {
// None
// }
// }
// }


/// Its kind of upsetting that I need this.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct DelRange {
Expand Down
22 changes: 8 additions & 14 deletions src/listmerge/merge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -542,23 +542,10 @@ impl M2Tracker {
// debug_assert!(cg.parents.version_contains_time(&[lv_start], target.start));
// }

// let m2: Marker2 = From::from(Marker::DelTarget(RangeRev {
// span: target,
// fwd
// }));
// println!("DEL RANGE {:?} -> {:?} ({:?})", (lv_start..lv_start+len), Marker::DelTarget(RangeRev {
// span: target,
// fwd
// }), m2);

self.index.set_range((lv_start..lv_start+len).into(), Marker::Del(DelRange {
target: if fwd { target.start } else { target.end },
fwd
}).into(), fwd);
// self.index.set_range((lv_start..lv_start+len).into(), MarkerOld::DelTarget(RangeRev {
// span: target,
// fwd
// }).into(), fwd);

// if cfg!(debug_assertions) {
// self.check_index();
Expand Down Expand Up @@ -752,6 +739,8 @@ impl<'a> Iterator for TransformedOpsIter<'a> {
}
}

// println!("{:?}", self.tracker.index.count_obj_pool());

// No more plan. Stop!
// dbg!(&self.op_iter, self.plan_idx);
debug_assert!(self.op_iter.is_none());
Expand Down Expand Up @@ -1184,10 +1173,15 @@ mod test {
// node_nodecc: 72135
// git-makefile: 23166
let mut bytes = vec![];

// File::open("benchmark_data/git-makefile.dt").unwrap().read_to_end(&mut bytes).unwrap();
// let o = ListOpLog::load_from(&bytes).unwrap();
// o.checkout_tip();

println!("----");
bytes.clear();
File::open("benchmark_data/node_nodecc.dt").unwrap().read_to_end(&mut bytes).unwrap();
let o = ListOpLog::load_from(&bytes).unwrap();

o.checkout_tip();
}
}
Expand Down
56 changes: 38 additions & 18 deletions src/ost/index_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::cell::Cell;
use std::cmp::Ordering;
use std::fmt::Debug;
use std::mem;
use std::mem::swap;
use std::mem::replace;
use std::ops::{Index, IndexMut, Range};
use std::ptr::NonNull;
use rle::{HasLength, MergableSpan, RleDRun, SplitableSpan, SplitableSpanHelpers};
Expand All @@ -11,7 +11,7 @@ use crate::ost::{NODE_CHILDREN, LeafIdx, NodeIdx, LEAF_CHILDREN};
use crate::ost::content_tree::{ContentLeaf, ContentNode, ContentTree};

#[derive(Debug, Clone)]
pub(crate) struct IndexTree<V> {
pub(crate) struct IndexTree<V: Copy> {
leaves: Vec<IndexLeaf<V>>,
nodes: Vec<IndexNode>,
// upper_bound: LV,
Expand Down Expand Up @@ -166,26 +166,26 @@ impl<V: Default + IndexContent> Default for IndexTree<V> {
}
}

impl<V> Index<LeafIdx> for IndexTree<V> {
impl<V: Copy> Index<LeafIdx> for IndexTree<V> {
type Output = IndexLeaf<V>;

fn index(&self, index: LeafIdx) -> &Self::Output {
&self.leaves[index.0]
}
}
impl<V> IndexMut<LeafIdx> for IndexTree<V> {
impl<V: Copy> IndexMut<LeafIdx> for IndexTree<V> {
fn index_mut(&mut self, index: LeafIdx) -> &mut Self::Output {
&mut self.leaves[index.0]
}
}
impl<V> Index<NodeIdx> for IndexTree<V> {
impl<V: Copy> Index<NodeIdx> for IndexTree<V> {
type Output = IndexNode;

fn index(&self, index: NodeIdx) -> &Self::Output {
&self.nodes[index.0]
}
}
impl<V> IndexMut<NodeIdx> for IndexTree<V> {
impl<V: Copy> IndexMut<NodeIdx> for IndexTree<V> {
fn index_mut(&mut self, index: NodeIdx) -> &mut Self::Output {
&mut self.nodes[index.0]
}
Expand Down Expand Up @@ -1279,6 +1279,25 @@ impl<V: Default + IndexContent> IndexTree<V> {
count
}

/// returns number of internal nodes, leaves.
pub fn count_obj_pool(&self) -> (usize, usize) {
let mut nodes = 0;
let mut leaves = 0;

let mut idx = self.free_node_pool_head;
while idx.0 != usize::MAX {
nodes += 1;
idx = self.nodes[idx.0].parent;
}
let mut idx = self.free_leaf_pool_head;
while idx.0 != usize::MAX {
leaves += 1;
idx = self.leaves[idx.0].next_leaf;
}

(nodes, leaves)
}

/// Iterate over the contents of the index. Note the index tree may contain extra entries
/// for items within the range, with a value of V::default.
pub fn iter(&self) -> IndexTreeIter<V> {
Expand Down Expand Up @@ -1470,14 +1489,14 @@ impl<V: Default + IndexContent> IndexTree<V> {
}

#[derive(Debug)]
pub struct IndexTreeIter<'a, V> {
pub struct IndexTreeIter<'a, V: Copy> {
tree: &'a IndexTree<V>,
leaf_idx: LeafIdx,
// leaf: &'a IndexLeaf<V>,
elem_idx: usize,
}

impl<'a, V: Clone> Iterator for IndexTreeIter<'a, V> {
impl<'a, V: Copy> Iterator for IndexTreeIter<'a, V> {
// type Item = (DTRange, V);
type Item = RleDRun<V>;

Expand Down Expand Up @@ -1647,25 +1666,26 @@ mod test {
#[test]
fn split_leaf() {
let mut tree = IndexTree::new();
tree.set_range((1..2).into(), X(100), true);
// Using 10, 20, ... so they don't merge.
tree.set_range(10.into(), X(100), true);
tree.dbg_check();
tree.set_range((2..3).into(), X(200), true);
tree.set_range((3..4).into(), X(100), true);
tree.set_range((4..5).into(), X(200), true);
tree.set_range(20.into(), X(200), true);
tree.set_range(30.into(), X(100), true);
tree.set_range(40.into(), X(200), true);
tree.dbg_check();
// dbg!(&tree);
tree.set_range((5..6).into(), X(100), true);
tree.set_range(50.into(), X(100), true);
tree.dbg_check();

// dbg!(&tree);
// dbg!(tree.iter().collect::<Vec<_>>());

tree.dbg_check_eq(&[
RleDRun::new(1..2, X(100)),
RleDRun::new(2..3, X(200)),
RleDRun::new(3..4, X(100)),
RleDRun::new(4..5, X(200)),
RleDRun::new(5..6, X(100)),
RleDRun::new(10..11, X(100)),
RleDRun::new(20..21, X(200)),
RleDRun::new(30..31, X(100)),
RleDRun::new(40..41, X(200)),
RleDRun::new(50..51, X(100)),
]);
}

Expand Down

0 comments on commit 280e9e6

Please sign in to comment.