Skip to content

Commit

Permalink
Well-defined lazy initialization for get_intra_edges
Browse files Browse the repository at this point in the history
Always write to the top-left so the initialized area is contiguous.

Introduce new types to make a safe rebinding pattern ergonomic:

  let mut edges: IntraEdgeBuffer<T> = Aligned::uninit_array();
  let edges: IntraEdge<T> = get_intra_edges(&mut edges, ...);
  predict_intra(&edges, ...);

IntraEdgeBuffer holds the aligned array for initialization.
IntraEdge holds references to the initialized slices and ensures the
layout required by the intra-prediction assembly.

Support passing pre-initialized data for tests and benchmarks.

Since MaybeUninit::write_slice() is only available in nightly rustc,
use std::mem::transmute() with copy_from_slice() in get_intra_edges().
  • Loading branch information
barrbrain committed Oct 27, 2023
1 parent ddbece2 commit efa9f8c
Show file tree
Hide file tree
Showing 10 changed files with 150 additions and 89 deletions.
15 changes: 5 additions & 10 deletions benches/predict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,19 @@ use rand::{Rng, SeedableRng};
use rand_chacha::ChaChaRng;
use rav1e::bench::cpu_features::CpuFeatureLevel;
use rav1e::bench::frame::*;
use rav1e::bench::partition::BlockSize;
use rav1e::bench::partition::{BlockSize, IntraEdge};
use rav1e::bench::predict::*;
use rav1e::bench::transform::TxSize;
use rav1e::bench::util::*;

pub const BLOCK_SIZE: BlockSize = BlockSize::BLOCK_32X32;

pub fn generate_block<T: Pixel>(
rng: &mut ChaChaRng, edge_buf: &mut Aligned<[T; 257]>,
) -> (Plane<T>, Vec<i16>) {
pub fn generate_block<T: Pixel>(rng: &mut ChaChaRng) -> (Plane<T>, Vec<i16>) {
let block = Plane::from_slice(
&vec![T::cast_from(0); BLOCK_SIZE.width() * BLOCK_SIZE.height()],
BLOCK_SIZE.width(),
);
let ac: Vec<i16> = (0..(32 * 32)).map(|_| rng.gen()).collect();
for v in edge_buf.data.iter_mut() {
*v = T::cast_from(rng.gen::<u8>());
}

(block, ac)
}

Expand Down Expand Up @@ -132,8 +126,9 @@ pub fn intra_bench<T: Pixel>(
b: &mut Bencher, mode: PredictionMode, variant: PredictionVariant,
) {
let mut rng = ChaChaRng::from_seed([0; 32]);
let mut edge_buf = unsafe { Aligned::uninitialized() };
let (mut block, ac) = generate_block::<T>(&mut rng, &mut edge_buf);
let edge_buf = Aligned::from_fn(|_| T::cast_from(rng.gen::<u8>()));
let edge_buf = IntraEdge::mock(&edge_buf);
let (mut block, ac) = generate_block::<T>(&mut rng);
let cpu = CpuFeatureLevel::default();
let bitdepth = match T::type_enum() {
PixelType::U8 => 8,
Expand Down
3 changes: 3 additions & 0 deletions src/api/lookahead.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use crate::partition::{get_intra_edges, BlockSize};
use crate::predict::{IntraParam, PredictionMode};
use crate::tiling::{Area, PlaneRegion, TileRect};
use crate::transform::TxSize;
use crate::util::Aligned;
use crate::Pixel;
use rayon::iter::*;
use rust_hawktracer::*;
Expand Down Expand Up @@ -54,7 +55,9 @@ pub(crate) fn estimate_intra_costs<T: Pixel>(
});

// TODO: other intra prediction modes.
let mut edge_buf = Aligned::uninit_array();
let edge_buf = get_intra_edges(
&mut edge_buf,
&plane.as_region(),
TileBlockOffset(BlockOffset { x, y }),
0,
Expand Down
15 changes: 6 additions & 9 deletions src/asm/aarch64/predict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

use crate::context::MAX_TX_SIZE;
use crate::cpu_features::CpuFeatureLevel;
use crate::partition::BlockSize;
use crate::partition::{BlockSize, IntraEdge};
use crate::predict::rust::{
dr_intra_derivative, select_ief_strength, select_ief_upsample,
};
Expand All @@ -18,7 +18,6 @@ use crate::predict::{
};
use crate::tiling::{PlaneRegion, PlaneRegionMut};
use crate::transform::TxSize;
use crate::util::Aligned;
use crate::{Pixel, PixelType};
use libc;
use libc::{c_int, ptrdiff_t};
Expand Down Expand Up @@ -487,7 +486,7 @@ pub fn dispatch_predict_intra<T: Pixel>(
mode: PredictionMode, variant: PredictionVariant,
dst: &mut PlaneRegionMut<'_, T>, tx_size: TxSize, bit_depth: usize,
ac: &[i16], angle: isize, ief_params: Option<IntraEdgeFilterParameters>,
edge_buf: &Aligned<[T; 4 * MAX_TX_SIZE + 1]>, cpu: CpuFeatureLevel,
edge_buf: &IntraEdge<T>, cpu: CpuFeatureLevel,
) {
let call_rust = |dst: &mut PlaneRegionMut<'_, T>| {
rust::dispatch_predict_intra(
Expand All @@ -504,10 +503,8 @@ pub fn dispatch_predict_intra<T: Pixel>(
let dst_ptr = dst.data_ptr_mut() as *mut _;
let dst_u16 = dst.data_ptr_mut() as *mut u16;
let stride = T::to_asm_stride(dst.plane_cfg.stride) as libc::ptrdiff_t;
let edge_ptr =
edge_buf.data.as_ptr().offset(2 * MAX_TX_SIZE as isize) as *const _;
let edge_u16 =
edge_buf.data.as_ptr().offset(2 * MAX_TX_SIZE as isize) as *const u16;
let edge_ptr = edge_buf.top_left_ptr() as *const _;
let edge_u16 = edge_buf.top_left_ptr() as *const u16;
let w = tx_size.width() as libc::c_int;
let h = tx_size.height() as libc::c_int;
let angle = angle as libc::c_int;
Expand Down Expand Up @@ -600,7 +597,7 @@ pub fn dispatch_predict_intra<T: Pixel>(
return ipred_z2(
dst.data_ptr_mut(),
stride,
edge_buf.data.as_ptr().add(2 * MAX_TX_SIZE),
edge_buf.top_left_ptr(),
angle as isize,
w,
h,
Expand All @@ -614,7 +611,7 @@ pub fn dispatch_predict_intra<T: Pixel>(
(if angle < 90 { ipred_z1 } else { ipred_z3 })(
dst.data_ptr_mut(),
stride,
edge_buf.data.as_ptr().add(2 * MAX_TX_SIZE),
edge_buf.top_left_ptr(),
angle as isize,
w,
h,
Expand Down
5 changes: 3 additions & 2 deletions src/asm/shared/predict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ mod test {
use crate::context::MAX_TX_SIZE;
use crate::cpu_features::CpuFeatureLevel;
use crate::frame::{AsRegion, Plane};
use crate::partition::BlockSize;
use crate::partition::{BlockSize, IntraEdge};
use crate::predict::dispatch_predict_intra;
use crate::predict::pred_cfl_ac;
use crate::predict::rust;
Expand All @@ -41,9 +41,10 @@ mod test {
fn pred_matches_inner<T: Pixel>(cpu: CpuFeatureLevel, bit_depth: usize) {
let tx_size = TxSize::TX_4X4;
let ac: Aligned<[i16; 32 * 32]> = Aligned::from_fn(|i| i as i16 - 16 * 32);
let edge_buf: Aligned<[T; 4 * MAX_TX_SIZE + 1]> = Aligned::from_fn(|i| {
let edge_buf = Aligned::from_fn(|i| {
T::cast_from(((i ^ 1) + 32).saturating_sub(2 * MAX_TX_SIZE))
});
let edge_buf = IntraEdge::mock(&edge_buf);

let ief_params_all = [
None,
Expand Down
12 changes: 4 additions & 8 deletions src/asm/x86/predict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,13 @@
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

use crate::context::MAX_TX_SIZE;
use crate::cpu_features::CpuFeatureLevel;
use crate::partition::BlockSize;
use crate::partition::{BlockSize, IntraEdge};
use crate::predict::{
rust, IntraEdgeFilterParameters, PredictionMode, PredictionVariant,
};
use crate::tiling::{PlaneRegion, PlaneRegionMut};
use crate::transform::TxSize;
use crate::util::Aligned;
use crate::Pixel;
use std::mem::MaybeUninit;
use v_frame::pixel::PixelType;
Expand Down Expand Up @@ -242,7 +240,7 @@ pub fn dispatch_predict_intra<T: Pixel>(
mode: PredictionMode, variant: PredictionVariant,
dst: &mut PlaneRegionMut<'_, T>, tx_size: TxSize, bit_depth: usize,
ac: &[i16], angle: isize, ief_params: Option<IntraEdgeFilterParameters>,
edge_buf: &Aligned<[T; 4 * MAX_TX_SIZE + 1]>, cpu: CpuFeatureLevel,
edge_buf: &IntraEdge<T>, cpu: CpuFeatureLevel,
) {
let call_rust = |dst: &mut PlaneRegionMut<'_, T>| {
rust::dispatch_predict_intra(
Expand All @@ -261,8 +259,7 @@ pub fn dispatch_predict_intra<T: Pixel>(
match T::type_enum() {
PixelType::U8 => {
let dst_ptr = dst.data_ptr_mut() as *mut _;
let edge_ptr =
edge_buf.data.as_ptr().offset(2 * MAX_TX_SIZE as isize) as *const _;
let edge_ptr = edge_buf.top_left_ptr() as *const _;
if cpu >= CpuFeatureLevel::AVX512ICL {
match mode {
PredictionMode::DC_PRED => {
Expand Down Expand Up @@ -555,8 +552,7 @@ pub fn dispatch_predict_intra<T: Pixel>(
}
PixelType::U16 => {
let dst_ptr = dst.data_ptr_mut() as *mut _;
let edge_ptr =
edge_buf.data.as_ptr().offset(2 * MAX_TX_SIZE as isize) as *const _;
let edge_ptr = edge_buf.top_left_ptr() as *const _;
let bd_max = (1 << bit_depth) - 1;
if cpu >= CpuFeatureLevel::AVX512ICL {
match mode {
Expand Down
2 changes: 2 additions & 0 deletions src/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1475,7 +1475,9 @@ pub fn encode_tx_block<T: Pixel, W: Writer>(

if mode.is_intra() {
let bit_depth = fi.sequence.bit_depth;
let mut edge_buf = Aligned::uninit_array();
let edge_buf = get_intra_edges(
&mut edge_buf,
&rec.as_const(),
tile_partition_bo,
bx,
Expand Down
Loading

0 comments on commit efa9f8c

Please sign in to comment.