diff --git a/.github/workflows/rav1e.yml b/.github/workflows/rav1e.yml index 05dbc6d653..7bbb963dca 100644 --- a/.github/workflows/rav1e.yml +++ b/.github/workflows/rav1e.yml @@ -12,7 +12,6 @@ on: jobs: rustfmt-clippy: - runs-on: ubuntu-22.04 steps: @@ -193,7 +192,7 @@ jobs: - name: Check extra features if: matrix.toolchain == 'stable' && matrix.conf == 'check-extra-feats' run: | - cargo check --features=check_asm,capi,dump_lookahead_data,serialize,bench --all-targets + cargo check --features=check_asm,capi,dump_lookahead_data,serialize,bench,devel --all-targets - name: Check extra features if: matrix.toolchain == 'stable' && matrix.conf == 'check-unstable-feats' run: | diff --git a/Cargo.toml b/Cargo.toml index d7b69ff27f..280d2f03d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,9 @@ default-run = "rav1e" [features] unstable = [] +# Exposes extra flags for tuning compiler internals. +# Intended to be used by developers to find ideal internal settings. +devel = [] channel-api = ["crossbeam"] decode_test = ["aom-sys"] decode_test_dav1d = ["dav1d-sys"] diff --git a/src/api/config/encoder.rs b/src/api/config/encoder.rs index 038f3301b0..05f5392f9d 100644 --- a/src/api/config/encoder.rs +++ b/src/api/config/encoder.rs @@ -114,6 +114,50 @@ pub struct EncoderConfig { /// Settings which affect the encoding speed vs. quality trade-off. pub speed_settings: SpeedSettings, + + /// Advanced settings which are intended for use by developers. + /// Non-developers should use the default values. + pub advanced_flags: AdvancedTuning, +} + +/// Advanced settings that are intended for use by developers +/// for tuning compiler internals. +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct AdvancedTuning { + /// Controls the strength of the deblock filter, as a multiplier to the default. + pub deblock_strength: f32, + /// Controls the sharpness of the deblock filter. Accepts a value from 0-7. + pub deblock_sharpness: u8, + /// Controls the ratio between intra frame and inter frame quantizers, as a multiplier. + /// Default is 1.0. Higher values create a higher quantizer difference, while lower values + /// create a lower quantizer difference. A value of 0.0 would mean that I and P quantizers + /// are the same. + pub ip_ratio: f32, + /// Controls the ratio between "P"-frame and "B"-frame quantizers, as a multiplier. + /// Default is 1.0. Higher values create a higher quantizer difference, while lower values + /// create a lower quantizer difference. A value of 0.0 would mean that P and B quantizers + /// are the same. + pub pb_ratio: f32, + /// Controls the ratio between frame quantizers in the levels of the pyramid betweem "B"-frames, + /// as a multiplier. Default is 1.0. Higher values create a higher quantizer difference, + /// while lower values create a lower quantizer difference. A value of 0.0 would mean that + /// B0 and B1 quantizers are the same. + pub b_ratio: f32, + /// Controls the strength of temporal RDO, as a multiplier to the default. + pub temporal_rdo_strength: f32, +} + +impl Default for AdvancedTuning { + fn default() -> Self { + Self { + deblock_strength: 1.0, + deblock_sharpness: 0, + ip_ratio: 1.0, + pb_ratio: 1.0, + b_ratio: 1.0, + temporal_rdo_strength: 1.0, + } + } } /// Default preset for `EncoderConfig`: it is a balance between quality and @@ -171,6 +215,7 @@ impl EncoderConfig { tile_rows: 0, tiles: 0, speed_settings: SpeedSettings::from_preset(speed), + advanced_flags: Default::default(), } } diff --git a/src/api/test.rs b/src/api/test.rs index 5d08692ffe..21ff78d1f8 100644 --- a/src/api/test.rs +++ b/src/api/test.rs @@ -2165,6 +2165,7 @@ fn log_q_exp_overflow() { }, ..Default::default() }, + advanced_flags: Default::default(), }; let config = Config::new().with_encoder_config(enc).with_threads(1); @@ -2242,6 +2243,7 @@ fn guess_frame_subtypes_assert() { }, ..Default::default() }, + advanced_flags: Default::default(), }; let config = Config::new().with_encoder_config(enc).with_threads(1); diff --git a/src/bin/common.rs b/src/bin/common.rs index 6289e27b9b..ba9220d4e6 100644 --- a/src/bin/common.rs +++ b/src/bin/common.rs @@ -249,10 +249,53 @@ pub struct CliOptions { #[clap(long, short, value_parser, help_heading = "DEBUGGING")] pub reconstruction: Option, + /// Controls the strength of the deblock filter, as a multiplier to the default. + #[cfg(feature = "devel")] + #[clap(long, value_parser = positive_float, default_value_t=1.0f32, help_heading = "ADVANCED")] + pub deblock_strength: f32, + /// Controls the sharpness of the deblock filter. Accepts a value from 0-7. + #[cfg(feature = "devel")] + #[clap(long, value_parser = clap::value_parser!(u8).range(0..=7), default_value_t=0, help_heading = "ADVANCED")] + pub deblock_sharpness: u8, + /// Controls the ratio between intra frame and inter frame quantizers, as a multiplier. + /// Higher values create a higher quantizer difference, while lower values + /// create a lower quantizer difference. A value of 0.0 would mean that I and P quantizers + /// are the same. + #[cfg(feature = "devel")] + #[clap(long, value_parser = positive_float, default_value_t=1.0f32, help_heading = "ADVANCED")] + pub ip_ratio: f32, + /// Controls the ratio between "P"-frame and "B"-frame quantizers, as a multiplier. + /// Default is 1.0. Higher values create a higher quantizer difference, while lower values + /// create a lower quantizer difference. A value of 0.0 would mean that P and B quantizers + /// are the same. + #[cfg(feature = "devel")] + #[clap(long, value_parser = positive_float, default_value_t=1.0f32, help_heading = "ADVANCED")] + pub pb_ratio: f32, + /// Controls the ratio between frame quantizers in the levels of the pyramid betweem "B"-frames, + /// as a multiplier. Default is 1.0. Higher values create a higher quantizer difference, + /// while lower values create a lower quantizer difference. A value of 0.0 would mean that + /// B0 and B1 quantizers are the same. + #[cfg(feature = "devel")] + #[clap(long, value_parser = positive_float, default_value_t=1.0f32, help_heading = "ADVANCED")] + pub b_ratio: f32, + /// Controls the strength of temporal RDO, as a multiplier to the default. + #[cfg(feature = "devel")] + #[clap(long, value_parser = positive_float, default_value_t=1.0f32, help_heading = "ADVANCED")] + pub temporal_rdo_strength: f32, + #[clap(subcommand)] pub command: Option, } +#[cfg(feature = "devel")] +fn positive_float(input: &str) -> Result { + let value = input.parse::().map_err(|e| e.to_string())?; + if value < 0.0 { + return Err("Value must not be negative".to_string()); + } + Ok(value) +} + fn get_version() -> &'static str { static VERSION_STR: Lazy = Lazy::new(|| { format!( @@ -299,7 +342,7 @@ pub enum Commands { #[clap(long, short, value_parser)] save_config: Option, /// Load the encoder configuration from a toml file - #[clap(long, short, value_parser, conflicts_with = "save-config")] + #[clap(long, short, value_parser, conflicts_with = "save_config")] load_config: Option, }, } @@ -484,6 +527,18 @@ pub fn parse_cli() -> Result { }) } +#[cfg(feature = "devel")] +const fn parse_advanced_flags(cli: &CliOptions) -> AdvancedTuning { + AdvancedTuning { + deblock_strength: cli.deblock_strength, + deblock_sharpness: cli.deblock_sharpness, + ip_ratio: cli.ip_ratio, + pb_ratio: cli.pb_ratio, + b_ratio: cli.b_ratio, + temporal_rdo_strength: cli.temporal_rdo_strength, + } +} + fn parse_config(matches: &CliOptions) -> Result { let maybe_quantizer = matches.quantizer; let maybe_bitrate = matches.bitrate; @@ -689,5 +744,10 @@ fn parse_config(matches: &CliOptions) -> Result { cfg.speed_settings.scene_detection_mode = SceneDetectionSpeed::None; } + #[cfg(feature = "devel")] + { + cfg.advanced_flags = parse_advanced_flags(matches); + } + Ok(cfg) } diff --git a/src/encoder.rs b/src/encoder.rs index d11dbfd193..7e71d82964 100644 --- a/src/encoder.rs +++ b/src/encoder.rs @@ -466,7 +466,7 @@ impl FrameState { cdfs: CDFContext::new(0), context_update_tile_id: 0, max_tile_size_bytes: 0, - deblock: Default::default(), + deblock: DeblockState::new(&fi.config, fi.frame_type), segmentation: Default::default(), restoration: rs, frame_me_stats: me_stats, @@ -497,7 +497,7 @@ impl FrameState { cdfs: CDFContext::new(0), context_update_tile_id: 0, max_tile_size_bytes: 0, - deblock: Default::default(), + deblock: DeblockState::new(&fi.config, fi.frame_type), segmentation: Default::default(), restoration: rs, frame_me_stats: FrameMEStats::new_arc_array(fi.w_in_b, fi.h_in_b), @@ -539,6 +539,22 @@ pub struct DeblockState { pub block_delta_multi: bool, } +impl DeblockState { + pub fn new(config: &EncoderConfig, frame_type: FrameType) -> Self { + let mut state = DeblockState { ..Default::default() }; + if frame_type == FrameType::INTER { + // Apply deblock strength only to inter frames + for level in &mut state.levels { + *level = ((*level as f32) * config.advanced_flags.deblock_strength) + .min(MAX_LOOP_FILTER as f32) + .round() as u8; + } + } + state.sharpness = config.advanced_flags.deblock_sharpness; + state + } +} + impl Default for DeblockState { fn default() -> Self { DeblockState { diff --git a/src/fuzzing.rs b/src/fuzzing.rs index d6a7b5e6ad..ba95d0766a 100644 --- a/src/fuzzing.rs +++ b/src/fuzzing.rs @@ -258,6 +258,7 @@ impl Arbitrary for ArbitraryEncoder { switch_frame_interval: u.int_in_range(0..=3)?, tune: *u.choose(&[Tune::Psnr, Tune::Psychovisual])?, film_grain_params: None, + advanced_flags: Default::default(), }; let frame_count = diff --git a/src/rate.rs b/src/rate.rs index e7633777a1..fa5ba4ca87 100644 --- a/src/rate.rs +++ b/src/rate.rs @@ -14,6 +14,7 @@ use crate::quantize::{ac_q, dc_q, select_ac_qi, select_dc_qi}; use crate::util::{ bexp64, bexp_q24, blog64, clamp, q24_to_q57, q57, q57_to_q24, Pixel, }; +use debug_unreachable::debug_unreachable; use std::cmp; // The number of frame sub-types for which we track distinct parameters. @@ -71,14 +72,30 @@ const MQP_Q12: &[i32; FRAME_NSUBTYPES] = &[ (1.0 * (1 << 12) as f64) as i32, ]; -// The ratio 33_810_170.0 / 86_043_287.0 was derived by approximating the median -// of a change of 15 quantizer steps in the quantizer tables. -const DQP_Q57: &[i64; FRAME_NSUBTYPES] = &[ - (-(33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64, - (0.0 * (1i64 << 57) as f64) as i64, - ((33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64, - (2.0 * (33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64) as i64, -]; +#[cfg_attr(not(feature = "devel"), allow(unused_variables))] +fn dqp_q57(fti: usize, ip_ratio: f64, pb_ratio: f64, b_ratio: f64) -> i64 { + // The ratio 33_810_170.0 / 86_043_287.0 was derived by approximating the median + // of a change of 15 quantizer steps in the quantizer tables. + const BASE: f64 = (33_810_170.0 / 86_043_287.0) * (1i64 << 57) as f64; + + // If we are not in devel mode, hardcode these as constants to the compiler + // can optimize better. + #[cfg(not(feature = "devel"))] + let ip_ratio = 1.0; + #[cfg(not(feature = "devel"))] + let pb_ratio = 1.0; + #[cfg(not(feature = "devel"))] + let b_ratio = 1.0; + + match fti { + FRAME_SUBTYPE_I => (-ip_ratio * BASE) as i64, + FRAME_SUBTYPE_P => 0i64, + FRAME_SUBTYPE_B0 => (pb_ratio * BASE) as i64, + FRAME_SUBTYPE_B1 => ((pb_ratio + b_ratio) * BASE) as i64, + // SAFETY: This branch should never occur, if it does the macro will catch it in debug mode. + _ => unsafe { debug_unreachable!("Unsupported frame subtype") }, + } +} // For 8-bit-depth inter frames, log_q_y is derived from log_target_q with a // linear model: @@ -703,11 +720,12 @@ impl RCState { pub(crate) fn select_first_pass_qi( &self, bit_depth: usize, fti: usize, chroma_sampling: ChromaSampling, + ip_ratio: f64, pb_ratio: f64, b_ratio: f64, ) -> QuantizerParameters { // Adjust the quantizer for the frame type, result is Q57: let log_q = ((self.pass1_log_base_q + (1i64 << 11)) >> 12) * (MQP_Q12[fti] as i64) - + DQP_Q57[fti]; + + dqp_q57(fti, ip_ratio, pb_ratio, b_ratio); QuantizerParameters::new_from_log_q( self.pass1_log_base_q, log_q, @@ -723,14 +741,24 @@ impl RCState { &self, ctx: &ContextInner, output_frameno: u64, fti: usize, maybe_prev_log_base_q: Option, log_isqrt_mean_scale: i64, ) -> QuantizerParameters { + let ip_ratio = ctx.config.advanced_flags.ip_ratio as f64; + let pb_ratio = ctx.config.advanced_flags.pb_ratio as f64; + let b_ratio = ctx.config.advanced_flags.b_ratio as f64; + // Is rate control active? if self.target_bitrate <= 0 { // Rate control is not active. // Derive quantizer directly from frame type. let bit_depth = ctx.config.bit_depth; let chroma_sampling = ctx.config.chroma_sampling; - let (log_base_q, log_q) = - Self::calc_flat_quantizer(ctx.config.quantizer as u8, bit_depth, fti); + let (log_base_q, log_q) = Self::calc_flat_quantizer( + ctx.config.quantizer as u8, + bit_depth, + fti, + ip_ratio, + pb_ratio, + b_ratio, + ); QuantizerParameters::new_from_log_q( log_base_q, log_q, @@ -752,6 +780,9 @@ impl RCState { ctx.config.bit_depth, fti, ctx.config.chroma_sampling, + ip_ratio, + pb_ratio, + b_ratio, ); } // Second pass of 2-pass mode: we know exactly how much of each frame @@ -925,7 +956,7 @@ impl RCState { // Modulate base quantizer by frame type. let log_q = ((log_base_q + (1i64 << 11)) >> 12) * (MQP_Q12[ftj] as i64) - + DQP_Q57[ftj]; + + dqp_q57(ftj, ip_ratio, pb_ratio, b_ratio); // All the fields here are Q57 except for the exponent, which is // Q6. bits += (nframes[ftj] as i64) @@ -959,7 +990,7 @@ impl RCState { // Modulate base quantizer by frame type. let mut log_q = ((log_base_q + (1i64 << 11)) >> 12) * (MQP_Q12[fti] as i64) - + DQP_Q57[fti]; + + dqp_q57(fti, ip_ratio, pb_ratio, b_ratio); // The above allocation looks only at the total rate we'll accumulate // in the next reservoir_frame_delay frames. // However, we could overflow the bit reservoir on the very next @@ -1019,14 +1050,26 @@ impl RCState { } if let Some(qi_max) = self.maybe_ac_qi_max { - let (max_log_base_q, max_log_q) = - Self::calc_flat_quantizer(qi_max, ctx.config.bit_depth, fti); + let (max_log_base_q, max_log_q) = Self::calc_flat_quantizer( + qi_max, + ctx.config.bit_depth, + fti, + ip_ratio, + pb_ratio, + b_ratio, + ); log_base_q = cmp::min(log_base_q, max_log_base_q); log_q = cmp::min(log_q, max_log_q); } if self.ac_qi_min > 0 { - let (min_log_base_q, min_log_q) = - Self::calc_flat_quantizer(self.ac_qi_min, ctx.config.bit_depth, fti); + let (min_log_base_q, min_log_q) = Self::calc_flat_quantizer( + self.ac_qi_min, + ctx.config.bit_depth, + fti, + ip_ratio, + pb_ratio, + b_ratio, + ); log_base_q = cmp::max(log_base_q, min_log_base_q); log_q = cmp::max(log_q, min_log_q); } @@ -1044,7 +1087,8 @@ impl RCState { // Computes a quantizer directly from the frame type and base quantizer index, // without consideration for rate control. fn calc_flat_quantizer( - base_qi: u8, bit_depth: usize, fti: usize, + base_qi: u8, bit_depth: usize, fti: usize, ip_ratio: f64, pb_ratio: f64, + b_ratio: f64, ) -> (i64, i64) { // TODO: Rename "quantizer" something that indicates it is a quantizer // index, and move it somewhere more sensible (or choose a better way to @@ -1063,7 +1107,7 @@ impl RCState { let log_base_q = (log_ac_q + log_dc_q + 1) >> 1; // Adjust the quantizer for the frame type, result is Q57: let log_q = ((log_base_q + (1i64 << 11)) >> 12) * (MQP_Q12[fti] as i64) - + DQP_Q57[fti]; + + dqp_q57(fti, ip_ratio, pb_ratio, b_ratio); (log_base_q, log_q) } diff --git a/src/rdo.rs b/src/rdo.rs index 553d6f9d75..aa0a33a4b6 100644 --- a/src/rdo.rs +++ b/src/rdo.rs @@ -455,6 +455,7 @@ pub fn distortion_scale( let coded_data = fi.coded_frame_data.as_ref().unwrap(); coded_data.distortion_scales[y * coded_data.w_in_imp_b + x] + .strength_adjusted(fi.config.advanced_flags.temporal_rdo_strength as f64) } /// # Panics @@ -504,6 +505,7 @@ pub fn spatiotemporal_scale( .sum::(); } DistortionScale(((sum + (den >> 1)) / den) as u32) + .strength_adjusted(fi.config.advanced_flags.temporal_rdo_strength as f64) } pub fn distortion_scale_for( @@ -617,6 +619,22 @@ impl DistortionScale { pub const fn mul_u64(self, dist: u64) -> u64 { (self.0 as u64 * dist + (1 << Self::SHIFT >> 1)) >> Self::SHIFT } + + #[inline] + #[cfg(feature = "devel")] + pub fn strength_adjusted(self, strength: f64) -> Self { + let diff = 1.0 - f64::from(self); + let add = diff * strength; + DistortionScale::from((1.0 + add).max(0.0)) + } + + #[inline(always)] + #[cfg(not(feature = "devel"))] + pub fn strength_adjusted(self, _strength: f64) -> Self { + // If we aren't using a devel build, just return self + // so we do not add any performance cost. + self + } } impl std::ops::Mul for DistortionScale {