Skip to content

Commit

Permalink
support funny sizes
Browse files Browse the repository at this point in the history
* refine `PlaneDims` logic for uyvy dimension calculation
* relax constraints on `uyvy_to_i420::convert`
* process rows, not just blocks, with processors deferring to a
  fallback. In theory at least the AVX one could use masked instructions
  instead, but the fallback approach was easy.
* test with miri
  • Loading branch information
scottlamb committed Jun 21, 2024
1 parent a202063 commit d9e0e83
Show file tree
Hide file tree
Showing 5 changed files with 478 additions and 192 deletions.
16 changes: 16 additions & 0 deletions .github/workflows/push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ jobs:
runs-on: ${{ matrix.runner }}

strategy:
fail-fast: false
matrix:
runner: [ubuntu-latest, macos-latest]
steps:
Expand All @@ -31,3 +32,18 @@ jobs:
- run: cargo clippy -- --deny warnings
- run: cargo test --release
- run: cargo criterion

miri:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Rust
uses: actions-rust-lang/setup-rust-toolchain@v1
with:
toolchain: nightly
components: miri
- name: Cache
uses: Swatinem/rust-cache@v2
with:
key: miri
- run: cargo miri test
10 changes: 3 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,10 @@ Limitations and future work:
* Returns `Err` on x86\_64 CPUs that don't support
AVX2. We'll likely add an SSE2 fallback later. As SSE2 is in the core
x86\_64 instruction set, this would mean all x86\_64 CPUs would be supported.
* Returns `Err` for frame widths that aren't a multiple
of 64 pixels (for AVX2) or 32 pixels (for NEON). This could be eased via a
scalar fallback path for the remaining pixels, or (for AVX2) masked
load/store instructions.
* Returns `Err` for frame heights that aren't a multiple of 2.
* Expects to process full horizontal lines. This is likely to
change to allow working on cropped regions and outputting to frames with
extra padding between lines as required by some APIs/devices.
change to allow working on cropped regions.
* Does not support output to a frame with padding, as required by some
APIs/devices.
* The ARM NEON code is less optimized than the AVX2 code today.

You may find the notes in [`docs/simd.md`](docs/simd.md) helpful if you are new
Expand Down
57 changes: 31 additions & 26 deletions src/frame.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,6 @@ pub unsafe trait Frame {
/// Returns true if this frame has been fully initialized.
fn initialized(&self) -> bool;

/// Marks this frame as fully initialized.
///
/// # Safety
///
/// The caller must ensure that the frame is fully initialized, including
/// any padding bytes.
unsafe fn initialize(&mut self);

/// Returns the (image format-defined) planes for read/shared access.
fn planes(&self) -> ArrayVec<FramePlaneRef, MAX_PLANES>;
}
Expand All @@ -74,6 +66,14 @@ pub unsafe trait Frame {
pub unsafe trait FrameMut: Frame {
/// Returns the (image format-defined) planes for mutation/exclusive access.
fn planes_mut(&mut self) -> ArrayVec<FramePlaneMut, MAX_PLANES>;

/// Marks this frame as fully initialized.
///
/// # Safety
///
/// The caller must ensure that the frame is fully initialized, including
/// any padding bytes.
unsafe fn initialize(&mut self);
}

/// Provides read-only access to a given image plane.
Expand Down Expand Up @@ -247,6 +247,16 @@ pub unsafe trait Storage {

/// Returns a raw pointer to the start of the storage.
fn as_ptr(&self) -> *const u8;
}

/// Write access to a backing buffer for a [`ConsecutiveFrame`].
///
/// # Safety
///
/// As in [`Storage`].
pub unsafe trait StorageMut: Storage {
/// Returns a raw pointer to the start of the storage.
fn as_mut_ptr(&mut self) -> *mut u8;

/// Notes that this storage is initialized, up to length `len`.
///
Expand All @@ -262,16 +272,6 @@ pub unsafe trait Storage {
unsafe fn initialize(&mut self, len: usize) {}
}

/// Write access to a backing buffer for a [`ConsecutiveFrame`].
///
/// # Safety
///
/// As in [`Storage`].
pub unsafe trait StorageMut: Storage {
/// Returns a raw pointer to the start of the storage.
fn as_mut_ptr(&mut self) -> *mut u8;
}

unsafe impl Storage for Vec<u8> {
#[inline]
fn check_len(&self, len: usize) -> bool {
Expand All @@ -289,6 +289,11 @@ unsafe impl StorageMut for Vec<u8> {
fn as_mut_ptr(&mut self) -> *mut u8 {
self.as_mut_ptr()
}

#[inline]
unsafe fn initialize(&mut self, len: usize) {
unsafe { self.set_len(len) };
}
}

macro_rules! impl_slice_storage {
Expand Down Expand Up @@ -478,14 +483,6 @@ unsafe impl<S: Storage> Frame for ConsecutiveFrame<S> {
}
planes
}

#[inline]
unsafe fn initialize(&mut self) {
if !self.initialized {
self.storage.initialize(self.total_size());
self.initialized = true;
}
}
}

unsafe impl<S: StorageMut> FrameMut for ConsecutiveFrame<S> {
Expand All @@ -506,4 +503,12 @@ unsafe impl<S: StorageMut> FrameMut for ConsecutiveFrame<S> {
}
planes
}

#[inline]
unsafe fn initialize(&mut self) {
if !self.initialized {
self.storage.initialize(self.total_size());
self.initialized = true;
}
}
}
69 changes: 67 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,17 @@ pub enum PixelFormat {
/// [UYVY](https://fourcc.org/pixel-format/yuv-uyvy/).
///
/// Matches ffmpeg's `AV_PIX_FMT_UYVY422`: "packed YUV 4:2:2, 16bpp, Cb Y0 Cr Y1".
///
/// For odd-width images, the width is rounded up to the next multiple of 2,
/// with the final `Y` as a don't-care byte, and the final chroma values not
/// subsampled.
UYVY422,

/// [I420](https://fourcc.org/pixel-format/yuv-i420/).
///
/// Matches ffmpeg's `AV_PIX_FMT_YUV420P`: "planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)".
///
/// For odd-width and odd-height images, the final pixel is not subsampled.
I420,

/// BGRA.
Expand Down Expand Up @@ -81,7 +87,11 @@ impl PixelFormat {
match self {
PixelFormat::UYVY422 => {
sizes.push(PlaneDims {
stride: width.checked_shl(1).expect("stride should not overflow"),
// Round to next multiple of 2, then double.
stride: width
.checked_add(width & 1)
.and_then(|w| w.checked_shl(1))
.expect("stride should not overflow"),
rows: height,
});
}
Expand All @@ -91,8 +101,8 @@ impl PixelFormat {
stride: width,
rows: height,
});
// U/V planes.
let chroma_plane_size = PlaneDims {
// U/V planes.
// Overflow-safe divide by two that rounds up.
stride: (width >> 1) + (width & 1),
rows: (height >> 1) + (height & 1),
Expand Down Expand Up @@ -120,3 +130,58 @@ impl PixelFormat {
}
}
}

#[cfg(test)]
mod tests {
#[test]
fn odd_sizes() {
assert_eq!(
super::PixelFormat::UYVY422
.min_plane_dims(1, 1)
.collect::<Vec<_>>(),
vec![super::PlaneDims { stride: 4, rows: 1 }]
);
assert_eq!(
super::PixelFormat::UYVY422
.min_plane_dims(2, 2)
.collect::<Vec<_>>(),
vec![super::PlaneDims { stride: 4, rows: 2 }]
);
assert_eq!(
super::PixelFormat::UYVY422
.min_plane_dims(3, 3)
.collect::<Vec<_>>(),
vec![super::PlaneDims { stride: 8, rows: 3 }]
);
assert_eq!(
super::PixelFormat::I420
.min_plane_dims(1, 1)
.collect::<Vec<_>>(),
vec![
super::PlaneDims { stride: 1, rows: 1 },
super::PlaneDims { stride: 1, rows: 1 },
super::PlaneDims { stride: 1, rows: 1 }
]
);
assert_eq!(
super::PixelFormat::I420
.min_plane_dims(2, 2)
.collect::<Vec<_>>(),
vec![
super::PlaneDims { stride: 2, rows: 2 },
super::PlaneDims { stride: 1, rows: 1 },
super::PlaneDims { stride: 1, rows: 1 }
]
);
assert_eq!(
super::PixelFormat::I420
.min_plane_dims(3, 3)
.collect::<Vec<_>>(),
vec![
super::PlaneDims { stride: 3, rows: 3 },
super::PlaneDims { stride: 2, rows: 2 },
super::PlaneDims { stride: 2, rows: 2 }
]
);
}
}
Loading

0 comments on commit d9e0e83

Please sign in to comment.