Skip to content

Commit

Permalink
Use region dimensions in SAD and ME
Browse files Browse the repository at this point in the history
This avoids having to re-check bounds every time we perform SAD, as the region knows its own size.
It also may save 2 usize's being passed around during ME.
To enforce this, we also remove the w and h parameters from everywhere.

This is part of a series of commits authored by @maj160 to improve performance of rav1e.
  • Loading branch information
maj160 authored and shssoichiro committed Jan 17, 2023
1 parent c2dfb35 commit e773dbe
Show file tree
Hide file tree
Showing 8 changed files with 137 additions and 138 deletions.
12 changes: 5 additions & 7 deletions benches/dist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,6 @@ fn new_plane<T: Pixel>(
type DistFn<T> = fn(
plane_org: &PlaneRegion<'_, T>,
plane_ref: &PlaneRegion<'_, T>,
w: usize,
h: usize,
bit_depth: usize,
cpu: CpuFeatureLevel,
) -> u32;
Expand All @@ -108,15 +106,15 @@ fn run_dist_bench<T: Pixel>(
let input_plane = new_plane::<T>(&mut ra, w, h);
let rec_plane = new_plane::<T>(&mut ra, w, h);

let plane_org = input_plane.as_region();
let plane_ref = rec_plane.as_region();

let blk_w = bs.width();
let blk_h = bs.height();
let plane_org =
input_plane.region(Area::Rect { x: 0, y: 0, width: blk_w, height: blk_h });
let plane_ref =
rec_plane.region(Area::Rect { x: 0, y: 0, width: blk_w, height: blk_h });

b.iter(|| {
let _ =
black_box(func(&plane_org, &plane_ref, blk_w, blk_h, bit_depth, cpu));
let _ = black_box(func(&plane_org, &plane_ref, bit_depth, cpu));
})
}

Expand Down
4 changes: 2 additions & 2 deletions src/api/internal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,8 @@ impl<T: Pixel> ContextInner<T> {
bsize: BlockSize, len: usize,
reference_frame_block_importances: &mut [f32],
) {
debug_assert!(bsize.width() == IMPORTANCE_BLOCK_SIZE);
debug_assert!(bsize.height() == IMPORTANCE_BLOCK_SIZE);
let coded_data = fi.coded_frame_data.as_ref().unwrap();
let plane_org = &frame.planes[0];
let plane_ref = &reference_frame.planes[0];
Expand Down Expand Up @@ -950,8 +952,6 @@ impl<T: Pixel> ContextInner<T> {
let inter_cost = get_satd(
&region_org,
&region_ref,
bsize.width(),
bsize.height(),
bit_depth,
fi.cpu_feature_level,
) as f32;
Expand Down
17 changes: 3 additions & 14 deletions src/api/lookahead.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,6 @@ pub(crate) fn estimate_intra_costs<T: Pixel>(
let intra_cost = get_satd(
&plane_org,
&plane_after_prediction_region,
bsize.width(),
bsize.height(),
bit_depth,
cpu_feature_level,
);
Expand Down Expand Up @@ -223,10 +221,6 @@ pub(crate) fn estimate_inter_costs<T: Pixel>(
let h_in_imp_b = plane_org.cfg.height / IMPORTANCE_BLOCK_SIZE;
let w_in_imp_b = plane_org.cfg.width / IMPORTANCE_BLOCK_SIZE;
let stats = &fs.frame_me_stats.read().expect("poisoned lock")[0];
let bsize = BlockSize::from_width_and_height(
IMPORTANCE_BLOCK_SIZE,
IMPORTANCE_BLOCK_SIZE,
);

let mut inter_costs = 0;
(0..h_in_imp_b).for_each(|y| {
Expand All @@ -252,14 +246,9 @@ pub(crate) fn estimate_inter_costs<T: Pixel>(
height: IMPORTANCE_BLOCK_SIZE,
});

inter_costs += get_satd(
&region_org,
&region_ref,
bsize.width(),
bsize.height(),
bit_depth,
fi.cpu_feature_level,
) as u64;
inter_costs +=
get_satd(&region_org, &region_ref, bit_depth, fi.cpu_feature_level)
as u64;
});
});
inter_costs as f64 / (w_in_imp_b * h_in_imp_b) as f64
Expand Down
26 changes: 14 additions & 12 deletions src/asm/aarch64/dist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,13 @@ const fn to_index(bsize: BlockSize) -> usize {
#[inline(always)]
#[allow(clippy::let_and_return)]
pub fn get_sad<T: Pixel>(
src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, w: usize, h: usize,
bit_depth: usize, cpu: CpuFeatureLevel,
src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, bit_depth: usize,
cpu: CpuFeatureLevel,
) -> u32 {
let bsize_opt = BlockSize::from_width_and_height_opt(w, h);
let bsize_opt =
BlockSize::from_width_and_height_opt(src.rect().width, src.rect().height);

let call_rust = || -> u32 { rust::get_sad(dst, src, w, h, bit_depth, cpu) };
let call_rust = || -> u32 { rust::get_sad(dst, src, bit_depth, cpu) };

#[cfg(feature = "check_asm")]
let ref_dist = call_rust();
Expand Down Expand Up @@ -110,12 +111,13 @@ pub fn get_sad<T: Pixel>(

#[inline(always)]
pub fn get_satd<T: Pixel>(
src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, w: usize, h: usize,
bit_depth: usize, cpu: CpuFeatureLevel,
src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, bit_depth: usize,
cpu: CpuFeatureLevel,
) -> u32 {
let bsize_opt = BlockSize::from_width_and_height_opt(w, h);
let bsize_opt =
BlockSize::from_width_and_height_opt(src.rect().width, src.rect().height);

let call_rust = || -> u32 { rust::get_satd(dst, src, w, h, bit_depth, cpu) };
let call_rust = || -> u32 { rust::get_satd(dst, src, bit_depth, cpu) };

#[cfg(feature = "check_asm")]
let ref_dist = call_rust();
Expand Down Expand Up @@ -229,8 +231,8 @@ mod test {
*s = random::<u8>() as u16 * $BD / 8;
*d = random::<u8>() as u16 * $BD / 8;
}
let result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap());
let rust_result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::RUST);
let result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap());
let rust_result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::RUST);

assert_eq!(rust_result, result);
} else {
Expand All @@ -242,8 +244,8 @@ mod test {
*s = random::<u8>();
*d = random::<u8>();
}
let result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap());
let rust_result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::RUST);
let result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap());
let rust_result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::RUST);

assert_eq!(rust_result, result);
}
Expand Down
25 changes: 14 additions & 11 deletions src/asm/x86/dist/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,12 +166,13 @@ pub(crate) const fn to_index(bsize: BlockSize) -> usize {
#[inline(always)]
#[allow(clippy::let_and_return)]
pub fn get_sad<T: Pixel>(
src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, w: usize, h: usize,
bit_depth: usize, cpu: CpuFeatureLevel,
src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, bit_depth: usize,
cpu: CpuFeatureLevel,
) -> u32 {
let bsize_opt = BlockSize::from_width_and_height_opt(w, h);
let bsize_opt =
BlockSize::from_width_and_height_opt(src.rect().width, src.rect().height);

let call_rust = || -> u32 { rust::get_sad(dst, src, w, h, bit_depth, cpu) };
let call_rust = || -> u32 { rust::get_sad(dst, src, bit_depth, cpu) };

#[cfg(feature = "check_asm")]
let ref_dist = call_rust();
Expand Down Expand Up @@ -220,12 +221,14 @@ pub fn get_sad<T: Pixel>(
#[inline(always)]
#[allow(clippy::let_and_return)]
pub fn get_satd<T: Pixel>(
src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, w: usize, h: usize,
bit_depth: usize, cpu: CpuFeatureLevel,
src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, bit_depth: usize,
cpu: CpuFeatureLevel,
) -> u32 {
let w = src.rect().width;
let h = src.rect().height;
let bsize_opt = BlockSize::from_width_and_height_opt(w, h);

let call_rust = || -> u32 { rust::get_satd(dst, src, w, h, bit_depth, cpu) };
let call_rust = || -> u32 { rust::get_satd(dst, src, bit_depth, cpu) };

#[cfg(feature = "check_asm")]
let ref_dist = call_rust();
Expand Down Expand Up @@ -565,8 +568,8 @@ mod test {
*s = random::<u8>() as u16 * $BD / 8;
*d = random::<u8>() as u16 * $BD / 8;
}
let result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap());
let rust_result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::RUST);
let result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap());
let rust_result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::RUST);

assert_eq!(rust_result, result);
} else {
Expand All @@ -578,8 +581,8 @@ mod test {
*s = random::<u8>();
*d = random::<u8>();
}
let result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap());
let rust_result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::RUST);
let result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap());
let rust_result = [<get_ $DIST_TY>](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::RUST);

assert_eq!(rust_result, result);
}
Expand Down
31 changes: 12 additions & 19 deletions src/dist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,12 @@ pub(crate) mod rust {
/// Compute the sum of absolute differences over a block.
/// w and h can be at most 128, the size of the largest block.
pub fn get_sad<T: Pixel>(
plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>, w: usize,
h: usize, _bit_depth: usize, _cpu: CpuFeatureLevel,
plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>,
_bit_depth: usize, _cpu: CpuFeatureLevel,
) -> u32 {
debug_assert!(w <= 128 && h <= 128);
let plane_org =
plane_org.subregion(Area::Rect { x: 0, y: 0, width: w, height: h });
let plane_ref =
plane_ref.subregion(Area::Rect { x: 0, y: 0, width: w, height: h });
debug_assert!(
plane_org.rect().width <= 128 && plane_org.rect().height <= 128
);

plane_org
.rows_iter()
Expand Down Expand Up @@ -156,11 +154,12 @@ pub(crate) mod rust {
/// revert to sad on edges when these transforms do not fit into w and h.
/// 4x4 transforms instead of 8x8 transforms when width or height < 8.
pub fn get_satd<T: Pixel>(
plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>, w: usize,
h: usize, _bit_depth: usize, _cpu: CpuFeatureLevel,
plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>,
_bit_depth: usize, _cpu: CpuFeatureLevel,
) -> u32 {
let w = plane_org.rect().width;
let h = plane_org.rect().height;
assert!(w <= 128 && h <= 128);
assert!(plane_org.rect().width >= w && plane_org.rect().height >= h);
assert!(plane_ref.rect().width >= w && plane_ref.rect().height >= h);

// Size of hadamard transform should be 4x4 or 8x8
Expand All @@ -186,9 +185,7 @@ pub(crate) mod rust {

// Revert to sad on edge blocks (frame edges)
if chunk_w != size || chunk_h != size {
sum += get_sad(
&chunk_org, &chunk_ref, chunk_w, chunk_h, _bit_depth, _cpu,
) as u64;
sum += get_sad(&chunk_org, &chunk_ref, _bit_depth, _cpu) as u64;
continue;
}

Expand Down Expand Up @@ -443,7 +440,7 @@ pub mod test {
let (input_plane, rec_plane) = setup_planes::<T>();

for (w, h, distortion) in blocks {
let area = Area::StartingAt { x: 32, y: 40 };
let area = Area::Rect { x: 32, y: 40, width: w, height: h };

let input_region = input_plane.region(area);
let rec_region = rec_plane.region(area);
Expand All @@ -453,8 +450,6 @@ pub mod test {
get_sad(
&input_region,
&rec_region,
w,
h,
bit_depth,
CpuFeatureLevel::default()
)
Expand Down Expand Up @@ -502,7 +497,7 @@ pub mod test {
let (input_plane, rec_plane) = setup_planes::<T>();

for (w, h, distortion) in blocks {
let area = Area::StartingAt { x: 32, y: 40 };
let area = Area::Rect { x: 32, y: 40, width: w, height: h };

let input_region = input_plane.region(area);
let rec_region = rec_plane.region(area);
Expand All @@ -512,8 +507,6 @@ pub mod test {
get_satd(
&input_region,
&rec_region,
w,
h,
bit_depth,
CpuFeatureLevel::default()
)
Expand Down
Loading

0 comments on commit e773dbe

Please sign in to comment.