From 3dd5ef7ac75493f233279188d82f78a32c379917 Mon Sep 17 00:00:00 2001 From: David Michael Barr Date: Thu, 8 Jul 2021 23:42:45 +0900 Subject: [PATCH] Fix fall-through of x86 dispatch_predict_intra for CpuFeatureLevel::RUST --- src/api/config/encoder.rs | 12 ++++++++++-- src/api/config/mod.rs | 6 ++++++ src/api/test.rs | 4 ++++ src/asm/x86/predict.rs | 2 +- src/bin/common.rs | 17 +++++++++++++++++ src/capi.rs | 6 ++++++ src/encoder.rs | 12 ++++++++---- src/header.rs | 20 ++++++++------------ 8 files changed, 60 insertions(+), 19 deletions(-) diff --git a/src/api/config/encoder.rs b/src/api/config/encoder.rs index c91d7f4098..9226f9997f 100644 --- a/src/api/config/encoder.rs +++ b/src/api/config/encoder.rs @@ -31,6 +31,13 @@ pub struct EncoderConfig { pub height: usize, /// Sample aspect ratio (for anamorphic video). pub sample_aspect_ratio: Rational, + /// Maximum width of the frames in pixels (for seq header) + /// 0 means to use the width setting instead. + /// Used for multiple renditions when switch frames are in use. + /// Set all renditions to have identical max_width / max_height. + pub max_width: usize, + /// Maximum height of the frames in pixels (for seq header) + pub max_height: usize, /// Video time base. pub time_base: Rational, @@ -133,7 +140,8 @@ impl EncoderConfig { height: 480, sample_aspect_ratio: Rational { num: 1, den: 1 }, time_base: Rational { num: 1, den: 30 }, - + max_width: 0, + max_height: 0, bit_depth: 8, chroma_sampling: ChromaSampling::Cs420, chroma_sample_position: ChromaSamplePosition::Unknown, @@ -213,7 +221,7 @@ impl EncoderConfig { // has the property that the scaled distortion of a 2Nx2N block is always // equal to the sum of the scaled distortions of the NxN sub-blocks it's // made of, this is a necessary property to be able to do RDO between - // multiple partition sizes properly. Unfortunately, when tx domain + // multiple partition sizes properly. Unfortunately, when tx domains // distortion is used, distortion is only known at the tx block level which // might be bigger than 8x8. So temporal RDO is always disabled in that case. !self.speed_settings.tx_domain_distortion diff --git a/src/api/config/mod.rs b/src/api/config/mod.rs index 51421a3c8a..3d49691439 100644 --- a/src/api/config/mod.rs +++ b/src/api/config/mod.rs @@ -313,6 +313,12 @@ impl Config { if render_height == 0 || render_height > u16::max_value() as usize { return Err(InvalidRenderHeight(render_height)); } + if config.max_width != 0 && config.width > config.max_width { + return Err(InvalidWidth(config.width)); + } + if config.max_height != 0 && config.height > config.max_height { + return Err(InvalidHeight(config.height)); + } if config.rdo_lookahead_frames > MAX_RDO_LOOKAHEAD_FRAMES || config.rdo_lookahead_frames < 1 diff --git a/src/api/test.rs b/src/api/test.rs index 9bb36aae56..f6d6ace246 100644 --- a/src/api/test.rs +++ b/src/api/test.rs @@ -1902,6 +1902,8 @@ fn log_q_exp_overflow() { width: 16, height: 16, sample_aspect_ratio: Rational::new(1, 1), + max_width: 0, + max_height: 0, bit_depth: 8, chroma_sampling: ChromaSampling::Cs420, chroma_sample_position: ChromaSamplePosition::Unknown, @@ -1967,6 +1969,8 @@ fn guess_frame_subtypes_assert() { width: 16, height: 16, sample_aspect_ratio: Rational::new(1, 1), + max_width: 0, + max_height: 0, bit_depth: 8, chroma_sampling: ChromaSampling::Cs420, chroma_sample_position: ChromaSamplePosition::Unknown, diff --git a/src/asm/x86/predict.rs b/src/asm/x86/predict.rs index db16a7b163..3b5bd2e70f 100644 --- a/src/asm/x86/predict.rs +++ b/src/asm/x86/predict.rs @@ -170,7 +170,7 @@ pub fn dispatch_predict_intra( let angle = angle as libc::c_int; match T::type_enum() { - PixelType::U8 => { + PixelType::U8 if cpu >= CpuFeatureLevel::SSSE3 => { let dst_ptr = dst.data_ptr_mut() as *mut _; let edge_ptr = edge_buf.data.as_ptr().offset(2 * MAX_TX_SIZE as isize) as *const _; diff --git a/src/bin/common.rs b/src/bin/common.rs index 759e6627ad..6ae039f5c4 100644 --- a/src/bin/common.rs +++ b/src/bin/common.rs @@ -250,6 +250,20 @@ pub fn parse_cli() -> Result { .takes_value(true) .default_value("0") ) + .arg( + Arg::with_name("MAX_WIDTH") + .help("Maximum width coded in the sequence header. 0 uses the input video width.") + .long("max-width") + .takes_value(true) + .default_value("0") + ) + .arg( + Arg::with_name("MAX_HEIGHT") + .help("Maximum height coded in the sequence header. 0 uses the input video width.") + .long("max-height") + .takes_value(true) + .default_value("0") + ) .arg( Arg::with_name("TILES") .help("Number of tiles. Tile-cols and tile-rows are overridden\n\ @@ -716,6 +730,9 @@ fn parse_config(matches: &ArgMatches<'_>) -> Result { cfg.tile_cols = matches.value_of("TILE_COLS").unwrap().parse().unwrap(); cfg.tile_rows = matches.value_of("TILE_ROWS").unwrap().parse().unwrap(); + cfg.max_width = matches.value_of("MAX_WIDTH").unwrap().parse().unwrap(); + cfg.max_height = matches.value_of("MAX_HEIGHT").unwrap().parse().unwrap(); + cfg.tiles = matches.value_of("TILES").unwrap().parse().unwrap(); if cfg.tile_cols > 64 || cfg.tile_rows > 64 { diff --git a/src/capi.rs b/src/capi.rs index 8a3c81c8da..05cb791b0a 100644 --- a/src/capi.rs +++ b/src/capi.rs @@ -612,6 +612,12 @@ unsafe fn option_match( match key { "width" => enc.width = value.parse().map_err(|_| ())?, "height" => enc.height = value.parse().map_err(|_| ())?, + "max_width" => { + enc.max_width = value.parse().map_err(|_| ())? + } + "max_height" => { + enc.max_height = value.parse().map_err(|_| ())? + } "speed" => { enc.speed_settings = rav1e::SpeedSettings::from_preset(value.parse().map_err(|_| ())?) diff --git a/src/encoder.rs b/src/encoder.rs index 1ccf8c8310..6b41a9430f 100644 --- a/src/encoder.rs +++ b/src/encoder.rs @@ -187,8 +187,12 @@ pub struct Sequence { impl Sequence { pub fn new(config: &EncoderConfig) -> Sequence { - let width_bits = 32 - (config.width as u32).leading_zeros(); - let height_bits = 32 - (config.height as u32).leading_zeros(); + let max_width = + if config.max_width > 0 { config.max_width } else { config.width }; + let max_height = + if config.max_height > 0 { config.max_height } else { config.height }; + let width_bits = 32 - ((max_width as u32) - 1).leading_zeros(); + let height_bits = 32 - ((max_height as u32) - 1).leading_zeros(); assert!(width_bits <= 16); assert!(height_bits <= 16); @@ -277,8 +281,8 @@ impl Sequence { color_description: config.color_description, mastering_display: config.mastering_display, content_light: config.content_light, - max_frame_width: config.width as u32, - max_frame_height: config.height as u32, + max_frame_width: max_width as u32, + max_frame_height: max_height as u32, frame_id_numbers_present_flag: false, frame_id_length: FRAME_ID_LENGTH, delta_frame_id_length: DELTA_FRAME_ID_LENGTH, diff --git a/src/header.rs b/src/header.rs index e650b16ac7..78c1c1fce6 100644 --- a/src/header.rs +++ b/src/header.rs @@ -837,12 +837,10 @@ impl UncompressedHeader for BitWriter { fn write_max_frame_size( &mut self, fi: &FrameInvariants, ) -> io::Result<()> { - // width_bits and height_bits will have to be moved to the sequence header OBU - // when we add support for it. - let width = fi.width - 1; - let height = fi.height - 1; - let width_bits = log_in_base_2(width as u32) as u32 + 1; - let height_bits = log_in_base_2(height as u32) as u32 + 1; + let width = fi.sequence.max_frame_width - 1; + let height = fi.sequence.max_frame_height - 1; + let width_bits = fi.sequence.num_bits_width; + let height_bits = fi.sequence.num_bits_height; assert!(width_bits <= 16); assert!(height_bits <= 16); self.write(4, width_bits - 1)?; @@ -858,14 +856,12 @@ impl UncompressedHeader for BitWriter { // width_bits and height_bits will have to be moved to the sequence header OBU // when we add support for it. if fi.frame_size_override_flag { - let width = fi.width - 1; - let height = fi.height - 1; - let width_bits = log_in_base_2(width as u32) as u32 + 1; - let height_bits = log_in_base_2(height as u32) as u32 + 1; + let width_bits = fi.sequence.num_bits_width; + let height_bits = fi.sequence.num_bits_height; assert!(width_bits <= 16); assert!(height_bits <= 16); - self.write(width_bits, width as u16)?; - self.write(height_bits, height as u16)?; + self.write(width_bits, (fi.width - 1) as u16)?; + self.write(height_bits, (fi.height - 1) as u16)?; } if fi.sequence.enable_superres { unimplemented!();