diff --git a/Config/Sample.cfg b/Config/Sample.cfg index 4e6f53eda..5dd1921f1 100644 --- a/Config/Sample.cfg +++ b/Config/Sample.cfg @@ -76,16 +76,19 @@ SearchAreaHeight : 7 # Number of search posit ConstrainedIntra : 0 # Enable the use of Constrained Intra which results in sending two PPSs (0: OFF, 1: ON) # ====================== Rate Control =============================== -RateControlMode : 0 # Rate control mode (0: OFF(CQP), 1: VBR) +RateControlMode : 0 # Rate control mode (0: OFF(CQP), 1: VBR, 2: CRF) TargetBitRate : 7000000 # Target Bit Rate (in bits per second) vbvMaxrate : 0 # VBV MaxRate (in bits per second) vbvBufsize : 0 # VBV Bufsize (in bits per second) vbvBufInit : 90 # Sets how full the VBV buffer to be - [0 - 100] hrdFlag : 0 # hrdflag (When hrdFlag is set to 1 it requires vbvMaxrate and vbvBufsize to be greater than 0)(0: disable , 1: enable) +lowLevelVbv : 0 # Enables lowLevelVBV Algorithm (When lowLevelVbv is set to 1 it requires vbvMaxrate and vbvBufsize to be greater than 0)(0: OFF, 1: ON) MaxQpAllowed : 48 # maximum allowed QP when rate control is on - [0-51] MinQpAllowed : 10 # minimum allowed QP when rate control is on - [0-51] LookAheadDistance : 17 # Enable Look Ahead [0-250] SceneChangeDetection : 1 # Enable Scene Change Detection (0: OFF, 1: ON) +ConstantRateFactor : 28 # CRF value allowed for rate control use - [0-51] + # ====================== Adaptive QP Params =============================== BitRateReduction : 1 # BitRate Reduction (only applicable when Tune is set to 0) (0= OFF, 1=ON ) diff --git a/Docs/svt-hevc_encoder_user_guide.md b/Docs/svt-hevc_encoder_user_guide.md index fde93979f..636a947d5 100644 --- a/Docs/svt-hevc_encoder_user_guide.md +++ b/Docs/svt-hevc_encoder_user_guide.md @@ -296,12 +296,14 @@ The encoder parameters present in the Sample.cfg file are listed in this table b | **SearchAreaWidth** | -search-w | [1 - 256] | Depends on input resolution | Motion vector search area width | | **SearchAreaHeight** | -search-h | [1 - 256] | Depends on input resolution | Motion vector search area height | | **ConstrainedIntra** | -constrd-intra | [0,1] | 0 | Allow the use of Constrained Intra, when enabled, this features yields to sending two PPSs in the HEVC Elementary streams
0 = OFF, 1 = ON | -| **RateControlMode** | -rc | [0,1] | 0 | 0 : CQP , 1 : VBR | +| **RateControlMode** | -rc | [0 - 2] | 0 | 0 : CQP , 1 : VBR , 2 : CRF | +| **ConstantRateFactor** | -crf | [0 - 51] | 28 | CRF value allowed for rate control use, only apllicable when RateControlMode is set to 2 | | **TargetBitRate** | -tbr | Any Number | 7000000 | Target bitrate in bits / second. Only used when RateControlMode is set to 1 | | **vbvMaxrate** | -vbv-maxrate | Any Number | 0 | VBVMaxrate in bits / second. Only used when RateControlMode is set to 1 | | **vbvBufsize** | -vbv-bufsize | Any Number | 0 | VBV BufferSize in bits / second. Only used when RateControlMode is set to 1 | -| **vbvBufInit** | -vbv-init | [0 - 100] | 90 | Sets the initial percentage size that the VBV buffer is filled to | -| **hrdFlag** | -hrd | [0,1] | 0 | Sets the HRD (Hypothetical Reference Decoder) Flag in the encoded stream, 0 = OFF, 1 = ON When
hrdFlag is set to 1, vbvMaxrate and vbvBufsize must be greater than 0 | +| **vbvBufInit** | -vbv-init | [0 - 100] | 90 | Sets how full the VBV buffer to be| +| **hrdFlag** | -hrd | [0,1] | 0 | HRD Flag, 0 = OFF, 1 = ON |When hrdFlag is set to 1 it requires vbvMaxrate and vbvBufsize to be greater than 0 | +| **lowLevelVbv** | -low-level-vbv | [0,1] | 0 | Enable lowLevelVBV algorithm. it requires vbvMaxrate and vbvBufsize to be greater than 0 0 = OFF, 1 = ON | | **MaxQpAllowed** | -max-qp | [0 - 51] | 48 | Maximum QP value allowed for rate control use. Only used when RateControlMode is set to 1. Has to be >= MinQpAllowed | | **MinQpAllowed** | -min-qp | [0 - 50] | 10 | Minimum QP value allowed for rate control use. Only used when RateControlMode is set to 1. Has to be < MaxQpAllowed | | **LookAheadDistance** | -lad | [0 - 250] | Depending on BRC mode | When RateControlMode is set to 1 it's best to set this parameter to be equal to the Intra period value (such is the default set by the encoder). When CQP is chosen, then a (2 \* minigopsize +1) look ahead is recommended. | @@ -449,6 +451,21 @@ Similarly, in order to enable VBV and run a 2-stream 8kp50 simultaneous encode o >taskset 0xFFFFFFF0000000FFFFFFF0000000 ./SvtHevcEncApp -encMode 11 -w 3840 -h 2160 -bit-depth 10 -compressed-ten-bit-format 1 -i in.yuv -rc 1 –tbr 10000000 -vbv-maxrate 10000000 -vbv-bufsize 10000000 -fps 50 -b out3.bin -n 5000 –nb 500 & +
+Similarly, in order to enable VBV and run a 2-stream 8kp50 simultaneous encode on a Xeon Platinum 8180 system the following command lines should be used: + +#### *Running Windows\* Server 2016:* + +>start /node 0 SvtHevcEncApp.exe -encMode 12 -tune 0 -w 3840 -h 2160 -bit-depth 10 -compressed-ten-bit-format 1 -i in.yuv -rc 1 –tbr 10000000 -vbv-maxrate 10000000 -vbv-bufsize 20000000 -fps 50 -b out1.bin -n 5000 –nb 500 + +>start /node 1 SvtHevcEncApp.exe -encMode 12 -tune 0 -w 3840 -h 2160 -bit-depth 10 -compressed-ten-bit-format 1 -i in.yuv -rc 1 –tbr 10000000 -vbv-maxrate 10000000 -vbv-bufsize 20000000 -fps 50 -b out3.bin -n 5000 –nb 500 + +#### *Running Ubuntu\* 18.04:* + +>taskset 0x0000000FFFFFFF0000000FFFFFFF ./SvtHevcEncApp -encMode 12 -tune 0 -w 3840 -h 2160 -bit-depth 10 -compressed-ten-bit-format 1 -i in.yuv -rc 1 –tbr 10000000 -vbv-maxrate 10000000 -vbv-bufsize 10000000 -fps 50 -b out3.bin -n 5000 –nb 500 & + +>taskset 0xFFFFFFF0000000FFFFFFF0000000 ./SvtHevcEncApp -encMode 12 -tune 0 -w 3840 -h 2160 -bit-depth 10 -compressed-ten-bit-format 1 -i in.yuv -rc 1 –tbr 10000000 -vbv-maxrate 10000000 -vbv-bufsize 10000000 -fps 50 -b out3.bin -n 5000 –nb 500 & +
Where 0x0000000FFFFFFF0000000FFFFFFF and 0xFFFFFFF0000000FFFFFFF0000000 are masks for sockets 0 and 1 respectively on a dual 8180 system. diff --git a/Source/API/EbApi.h b/Source/API/EbApi.h index 028c69722..cdb651f07 100644 --- a/Source/API/EbApi.h +++ b/Source/API/EbApi.h @@ -422,6 +422,11 @@ typedef struct EB_H265_ENC_CONFIGURATION * * Default is 10. */ uint32_t minQpAllowed; + /* CRF value allowed for rate control use, only apllicable when rate + * control mode is set to 2. + * + * Default is 28. */ + uint32_t crf; // bitstream options @@ -545,7 +550,13 @@ typedef struct EB_H265_ENC_CONFIGURATION /* Active channel count. */ uint32_t activeChannelCount; - + /* Enables lowLevelVBV Algorithm + * + * 0 = disable. + * 1 = enable. + * + * Default is 0. */ + uint8_t lowLevelVbv; // Threads management diff --git a/Source/App/CMakeLists.txt b/Source/App/CMakeLists.txt index e39b1881d..4947c4430 100644 --- a/Source/App/CMakeLists.txt +++ b/Source/App/CMakeLists.txt @@ -38,12 +38,12 @@ target_link_libraries(SvtHevcEncApp SvtHevcEnc) if(UNIX) - target_link_libraries(SvtHevcEncApp - pthread) - if(NOT APPLE) - target_link_libraries(SvtHevcEncApp - rt) - endif() + target_link_libraries(SvtHevcEncApp + pthread m) + if(NOT APPLE) + target_link_libraries(SvtHevcEncApp + rt) + endif() endif() install(TARGETS SvtHevcEncApp RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/Source/App/EbAppConfig.c b/Source/App/EbAppConfig.c index bd637f8d9..f3d45a691 100644 --- a/Source/App/EbAppConfig.c +++ b/Source/App/EbAppConfig.c @@ -86,9 +86,11 @@ #define NALU_FILE_TOKEN "-nalu-file" #define RATE_CONTROL_ENABLE_TOKEN "-rc" #define TARGET_BIT_RATE_TOKEN "-tbr" +#define CRF_TOKEN "-crf" #define VBV_MAX_RATE_TOKEN "-vbv-maxrate" #define VBV_BUFFER_SIZE_TOKEN "-vbv-bufsize" #define VBV_BUFFER_INIT_TOKEN "-vbv-init" +#define ENABLE_LOW_LEVEL_VBV_TOKEN "-low-level-vbv" #define HRD_TOKEN "-hrd" #define MAX_QP_TOKEN "-max-qp" #define MIN_QP_TOKEN "-min-qp" @@ -192,6 +194,7 @@ static void SetFrameRate (const char *value, EbConfig_t * cfg->frameRate = cfg->frameRate << 16; } } + static void SetFrameRateNumerator (const char *value, EbConfig_t *cfg) {cfg->frameRateNumerator = strtoul(value, NULL, 0);}; static void SetFrameRateDenominator (const char *value, EbConfig_t *cfg) {cfg->frameRateDenominator = strtoul(value, NULL, 0);}; static void SetEncoderBitDepth (const char *value, EbConfig_t *cfg) {cfg->encoderBitDepth = strtoul(value, NULL, 0);} @@ -204,6 +207,7 @@ static void SetCfgIntraRefreshType (const char *value, EbConfig_t * static void SetHierarchicalLevels (const char *value, EbConfig_t *cfg) {cfg->hierarchicalLevels = strtol(value, NULL, 0); }; static void SetCfgPredStructure (const char *value, EbConfig_t *cfg) {cfg->predStructure = strtol(value, NULL, 0); }; static void SetCfgQp (const char *value, EbConfig_t *cfg) {cfg->qp = strtoul(value, NULL, 0);}; +static void SetCfgCrf (const char *value, EbConfig_t *cfg) {cfg->crf = strtoul(value, NULL, 0); }; static void SetCfgUseQpFile (const char *value, EbConfig_t *cfg) {cfg->useQpFile = (EB_BOOL)strtol(value, NULL, 0); }; static void SetCfgTileColumnCount (const char *value, EbConfig_t *cfg) { cfg->tileColumnCount = (EB_BOOL)strtol(value, NULL, 0); }; static void SetCfgTileRowCount (const char *value, EbConfig_t *cfg) { cfg->tileRowCount = (EB_BOOL)strtol(value, NULL, 0); }; @@ -228,6 +232,7 @@ static void SetVbvMaxrate (const char *value, EbConfig_t * static void SetVbvBufsize (const char *value, EbConfig_t *cfg) { cfg->vbvBufsize = strtoul(value, NULL, 0);}; static void SetVbvBufInit (const char *value, EbConfig_t *cfg) { cfg->vbvBufInit = strtoul(value, NULL, 0);}; static void SetHrdFlag (const char *value, EbConfig_t *cfg) { cfg->hrdFlag = strtoul(value, NULL, 0);}; +static void SetLowLevelVbv (const char *value, EbConfig_t *cfg) { cfg->lowLevelVbv = (EB_BOOL)strtol(value, NULL, 0); }; static void SetVideoUsabilityInfo (const char *value, EbConfig_t *cfg) {cfg->videoUsabilityInfo = strtol(value, NULL, 0);}; static void SetHighDynamicRangeInput (const char *value, EbConfig_t *cfg) {cfg->highDynamicRangeInput = strtol(value, NULL, 0);}; static void SetAccessUnitDelimiter (const char *value, EbConfig_t *cfg) {cfg->accessUnitDelimiter = strtol(value, NULL, 0);}; @@ -348,6 +353,7 @@ config_entry_t config_entry[] = { { SINGLE_INPUT, VBV_BUFFER_SIZE_TOKEN, "vbvBufsize", SetVbvBufsize }, { SINGLE_INPUT, HRD_TOKEN, "hrd", SetHrdFlag }, { SINGLE_INPUT, VBV_BUFFER_INIT_TOKEN, "vbvBufInit", SetVbvBufInit}, + { SINGLE_INPUT, ENABLE_LOW_LEVEL_VBV_TOKEN,"lowLevelVbv",SetLowLevelVbv}, // Deblock Filter @@ -368,7 +374,8 @@ config_entry_t config_entry[] = { { SINGLE_INPUT, CONSTRAINED_INTRA_ENABLE_TOKEN, "ConstrainedIntra", SetEnableConstrainedIntra }, // Rate Control - { SINGLE_INPUT, RATE_CONTROL_ENABLE_TOKEN, "RateControlMode", SetRateControlMode }, + { SINGLE_INPUT, CRF_TOKEN, "CRF", SetCfgCrf }, + { SINGLE_INPUT, RATE_CONTROL_ENABLE_TOKEN, "RateControlMode", SetRateControlMode }, { SINGLE_INPUT, TARGET_BIT_RATE_TOKEN, "TargetBitRate", SetTargetBitRate }, { SINGLE_INPUT, MAX_QP_TOKEN, "MaxQpAllowed", SetMaxQpAllowed }, { SINGLE_INPUT, MIN_QP_TOKEN, "MinQpAllowed", SetMinQpAllowed }, @@ -570,6 +577,8 @@ void EbConfigCtor(EbConfig_t *configPtr) configPtr->vbvBufsize = 0; configPtr->vbvBufInit = 90; configPtr->hrdFlag = 0; + configPtr->crf = 28; + configPtr->lowLevelVbv = 0; // Testing configPtr->testUserData = 0; diff --git a/Source/App/EbAppConfig.h b/Source/App/EbAppConfig.h index d4bbbac80..09c333431 100644 --- a/Source/App/EbAppConfig.h +++ b/Source/App/EbAppConfig.h @@ -321,6 +321,7 @@ typedef struct EbConfig_s uint32_t targetBitRate; uint32_t maxQpAllowed; uint32_t minQpAllowed; + uint32_t crf; uint32_t vbvMaxRate; uint32_t vbvBufsize; uint64_t vbvBufInit; @@ -348,6 +349,7 @@ typedef struct EbConfig_s EB_BOOL switchThreadsToRtPriority; EB_BOOL fpsInVps; uint32_t hrdFlag; + EB_BOOL lowLevelVbv; EB_BOOL unrestrictedMotionVector; /**************************************** diff --git a/Source/App/EbAppContext.c b/Source/App/EbAppContext.c index d9dca8632..5423c38f1 100644 --- a/Source/App/EbAppContext.c +++ b/Source/App/EbAppContext.c @@ -187,9 +187,11 @@ EB_ERRORTYPE CopyConfigurationParameters( callbackData->ebEncParameters.maxQpAllowed = config->maxQpAllowed; callbackData->ebEncParameters.minQpAllowed = config->minQpAllowed; callbackData->ebEncParameters.qp = config->qp; + callbackData->ebEncParameters.crf = config->crf; callbackData->ebEncParameters.vbvMaxrate = config->vbvMaxRate; callbackData->ebEncParameters.vbvBufsize = config->vbvBufsize; callbackData->ebEncParameters.vbvBufInit = config->vbvBufInit; + callbackData->ebEncParameters.lowLevelVbv = config->lowLevelVbv; callbackData->ebEncParameters.useQpFile = (EB_BOOL)config->useQpFile; callbackData->ebEncParameters.tileColumnCount = (EB_BOOL)config->tileColumnCount; callbackData->ebEncParameters.tileRowCount = (EB_BOOL)config->tileRowCount; diff --git a/Source/Lib/C_DEFAULT/CMakeLists.txt b/Source/Lib/C_DEFAULT/CMakeLists.txt index b8f6c2aff..ade810f0b 100644 --- a/Source/Lib/C_DEFAULT/CMakeLists.txt +++ b/Source/Lib/C_DEFAULT/CMakeLists.txt @@ -19,7 +19,8 @@ set(C_DEFAULT_SOURCE EbIntraPrediction_C.c EbMcp_C.c EbMeSadCalculation_C.c - EbPackUnPack_C.c + EbMeSatdCalculation_C.c + EbPackUnPack_C.c EbPictureOperators_C.c EbSampleAdaptiveOffset_C.c EbTransforms_C.c) @@ -38,6 +39,7 @@ add_library(HEVC_C_DEFAULT OBJECT EbIntraPrediction_C.h EbMcp_C.h EbMeSadCalculation_C.h + EbMeSatdCalculation_C.h EbPackUnPack_C.h EbPictureOperators_C.h EbSampleAdaptiveOffset_C.h diff --git a/Source/Lib/C_DEFAULT/EbMeSatdCalculation_C.c b/Source/Lib/C_DEFAULT/EbMeSatdCalculation_C.c new file mode 100644 index 000000000..513735d4f --- /dev/null +++ b/Source/Lib/C_DEFAULT/EbMeSatdCalculation_C.c @@ -0,0 +1,80 @@ +/* +* Copyright(c) 2018 Intel Corporation +* SPDX - License - Identifier: BSD - 2 - Clause - Patent +*/ + +#include "EbMeSatdCalculation_C.h" + +#define BITS_PER_SUM (8 * sizeof(EB_U16)) + +static inline EB_U32 abs2(EB_U32 a) +{ + EB_U32 s = ((a >> (BITS_PER_SUM - 1)) & (((EB_U32)1 << BITS_PER_SUM) + 1)) * ((EB_U32)-1); + + return (a + s) ^ s; +} + +#define HADAMARD4(d0, d1, d2, d3, s0, s1, s2, s3) { \ + EB_U32 t0 = s0 + s1; \ + EB_U32 t1 = s0 - s1; \ + EB_U32 t2 = s2 + s3; \ + EB_U32 t3 = s2 - s3; \ + d0 = t0 + t2; \ + d2 = t0 - t2; \ + d1 = t1 + t3; \ + d3 = t1 - t3; \ +} + +/******************************************* +Calcualte SATD for 8x4 sublcoks. +*******************************************/ +EB_U32 SatdCalculation_8x4( + EB_U8 *src, + EB_U32 srcStride, + EB_U8 *ref, + EB_U32 refStride) +{ + EB_U32 tmp[4][4]; + EB_U32 a0, a1, a2, a3; + EB_U32 sum = 0; + + for (EB_U64 i = 0; i < 4; i++, src += srcStride, ref += refStride) + { + a0 = (src[0] - ref[0]) + ((EB_U32)(src[4] - ref[4]) << BITS_PER_SUM); + a1 = (src[1] - ref[1]) + ((EB_U32)(src[5] - ref[5]) << BITS_PER_SUM); + a2 = (src[2] - ref[2]) + ((EB_U32)(src[6] - ref[6]) << BITS_PER_SUM); + a3 = (src[3] - ref[3]) + ((EB_U32)(src[7] - ref[7]) << BITS_PER_SUM); + HADAMARD4(tmp[i][0], tmp[i][1], tmp[i][2], tmp[i][3], a0, a1, a2, a3); + } + + for (EB_U64 i = 0; i < 4; i++) + { + HADAMARD4(a0, a1, a2, a3, tmp[0][i], tmp[1][i], tmp[2][i], tmp[3][i]); + sum += abs2(a0) + abs2(a1) + abs2(a2) + abs2(a3); + } + + return (((EB_U16)sum) + (sum >> BITS_PER_SUM)) >> 1; +} + +/******************************************* +Calcualte SATD for 16x16 sublcoks. +*******************************************/ +EB_U32 SatdCalculation_16x16( + EB_U8 *src, + EB_U32 srcStride, + EB_U8 *ref, + EB_U32 refStride) +{ + EB_U32 satd = 0; + + for (EB_U64 row = 0; row < 16; row += 4) + { + for (EB_U64 col = 0; col < 16; col += 8) + { + satd += SatdCalculation_8x4(src + row * srcStride + col, srcStride, + ref + row * refStride + col, refStride); + } + } + + return satd; +} diff --git a/Source/Lib/C_DEFAULT/EbMeSatdCalculation_C.h b/Source/Lib/C_DEFAULT/EbMeSatdCalculation_C.h new file mode 100644 index 000000000..0282c2434 --- /dev/null +++ b/Source/Lib/C_DEFAULT/EbMeSatdCalculation_C.h @@ -0,0 +1,23 @@ +/* +* Copyright(c) 2018 Intel Corporation +* SPDX - License - Identifier: BSD - 2 - Clause - Patent +*/ + +#ifndef EbMeSatdCalculation_C_h +#define EbMeSatdCalculation_C_h + +#include "EbDefinitions.h" +#ifdef __cplusplus +extern "C" { +#endif + +EB_U32 SatdCalculation_16x16( + EB_U8 *src, + EB_U32 srcStride, + EB_U8 *ref, + EB_U32 refStride); + +#ifdef __cplusplus +} +#endif +#endif // EbMeSadCalculation_C_h \ No newline at end of file diff --git a/Source/Lib/Codec/CMakeLists.txt b/Source/Lib/Codec/CMakeLists.txt index ff50f8ea7..f6a307ae1 100644 --- a/Source/Lib/Codec/CMakeLists.txt +++ b/Source/Lib/Codec/CMakeLists.txt @@ -38,6 +38,7 @@ set(Codec_Source EbMotionEstimationResults.c EbPacketizationProcess.c EbPacketizationReorderQueue.c + EbPerFramePrediction.c EbPictureAnalysisProcess.c EbPictureAnalysisResults.c EbPictureBufferDesc.c @@ -154,6 +155,7 @@ add_library(SvtHevcEnc EbPacketizationProcess.h EbPacketizationReorderQueue.h EbPackUnPack.h + EbPerFramePrediction.h EbPictureAnalysisProcess.h EbPictureAnalysisResults.h EbPictureBufferDesc.h @@ -191,6 +193,7 @@ add_library(SvtHevcEnc if(UNIX) set(LIBS "-lpthread") + set(LIBS "${LIBS} -lm") # Set version number for SONAME. set_target_properties(SvtHevcEnc PROPERTIES SOVERSION ${SVT_HEVC_SOVERSION}) diff --git a/Source/Lib/Codec/EbCodingLoop.c b/Source/Lib/Codec/EbCodingLoop.c index 14f6ae117..ebd151a56 100644 --- a/Source/Lib/Codec/EbCodingLoop.c +++ b/Source/Lib/Codec/EbCodingLoop.c @@ -3116,13 +3116,16 @@ EB_EXTERN void EncodePass( } } } - - QpmDeriveBeaAndSkipQpmFlagLcu( - sequenceControlSetPtr, - pictureControlSetPtr, - lcuPtr, - tbAddr, - contextPtr); + if (sequenceControlSetPtr->staticConfig.lowLevelVbv) { + contextPtr->skipQpmFlag = EB_TRUE; + } + else + QpmDeriveBeaAndSkipQpmFlagLcu( + sequenceControlSetPtr, + pictureControlSetPtr, + lcuPtr, + tbAddr, + contextPtr); encodeContextPtr = ((SequenceControlSet_t*)(pictureControlSetPtr->sequenceControlSetWrapperPtr->objectPtr))->encodeContextPtr; @@ -3145,11 +3148,11 @@ EB_EXTERN void EncodePass( } - EB_BOOL useDeltaQp = (EB_BOOL)(sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction); + EB_BOOL useDeltaQp = (EB_BOOL)(sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction ||( sequenceControlSetPtr->staticConfig.lowLevelVbv)); EB_BOOL singleSegment = (sequenceControlSetPtr->encDecSegmentColCountArray[pictureControlSetPtr->temporalLayerIndex] == 1) && (sequenceControlSetPtr->encDecSegmentRowCountArray[pictureControlSetPtr->temporalLayerIndex] == 1); - EB_BOOL useDeltaQpSegments = singleSegment ? 0 : (EB_BOOL)(sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction); + EB_BOOL useDeltaQpSegments = singleSegment ? 0 : (EB_BOOL)(sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction||(sequenceControlSetPtr->staticConfig.lowLevelVbv)); if (is16bit) { EncodePassPackLcu( @@ -3197,9 +3200,13 @@ EB_EXTERN void EncodePass( cuPtr->deltaQp = 0; - cuPtr->qp = (sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction) ? contextPtr->qpmQp : pictureControlSetPtr->pictureQp; - lcuPtr->qp = (sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction) ? contextPtr->qpmQp : pictureControlSetPtr->pictureQp; - cuPtr->orgDeltaQp = cuPtr->deltaQp; + + cuPtr->qp = (sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction) && !(sequenceControlSetPtr->staticConfig.lowLevelVbv) ? + contextPtr->qpmQp : lcuPtr->qp; + lcuPtr->qp = (sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction) && !(sequenceControlSetPtr->staticConfig.lowLevelVbv) ? + contextPtr->qpmQp : lcuPtr->qp; + cuPtr->orgDeltaQp = cuPtr->deltaQp; + if (!contextPtr->skipQpmFlag && (sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction) && @@ -3682,7 +3689,8 @@ EB_EXTERN void EncodePass( } // Encode Transform Unit -INTRA- - contextPtr->forceCbfFlag = (contextPtr->skipQpmFlag) ? + + contextPtr->forceCbfFlag = (contextPtr->skipQpmFlag && !(sequenceControlSetPtr->staticConfig.lowLevelVbv)) ? EB_FALSE : lcuPtr->lcuEdgeInfoPtr->tileLeftEdgeFlag && ((contextPtr->cuOriginX & (63)) == 0) && (contextPtr->cuOriginY == lcuOriginY); @@ -3968,7 +3976,7 @@ EB_EXTERN void EncodePass( } //TU LOOP for MV mode + Luma CBF decision. - contextPtr->forceCbfFlag = (contextPtr->skipQpmFlag) ? + contextPtr->forceCbfFlag = (contextPtr->skipQpmFlag && !(sequenceControlSetPtr->staticConfig.vbvBufsize && sequenceControlSetPtr->staticConfig.vbvMaxrate)) ? EB_FALSE : lcuPtr->lcuEdgeInfoPtr->tileLeftEdgeFlag && ((tuOriginX & 63) == 0) && (tuOriginY == lcuOriginY); @@ -4157,7 +4165,7 @@ EB_EXTERN void EncodePass( cuPtr->transformUnitArray[contextPtr->tuItr].cbCbf2 = EB_FALSE; cuPtr->transformUnitArray[contextPtr->tuItr].crCbf2 = EB_FALSE; } else if (cuPtr->predictionUnitArray[0].mergeFlag == EB_TRUE) { - contextPtr->forceCbfFlag = (contextPtr->skipQpmFlag) ? + contextPtr->forceCbfFlag = (contextPtr->skipQpmFlag && !(sequenceControlSetPtr->staticConfig.lowLevelVbv)) ? EB_FALSE : lcuPtr->lcuEdgeInfoPtr->tileLeftEdgeFlag && ((tuOriginX & 63) == 0) && (tuOriginY == lcuOriginY); @@ -4167,7 +4175,6 @@ EB_EXTERN void EncodePass( tbAddr, lcuStatPtr->stationaryEdgeOverTimeFlag, pictureControlSetPtr->temporalLayerIndex > 0 ? lcuStatPtr->pmStationaryEdgeOverTimeFlag : lcuStatPtr->stationaryEdgeOverTimeFlag); - // Set Fast El coef shaping method contextPtr->transCoeffShapeLuma = DEFAULT_SHAPE; contextPtr->transCoeffShapeChroma = DEFAULT_SHAPE; diff --git a/Source/Lib/Codec/EbCodingUnit.c b/Source/Lib/Codec/EbCodingUnit.c index 751a41ac1..fc1387262 100644 --- a/Source/Lib/Codec/EbCodingUnit.c +++ b/Source/Lib/Codec/EbCodingUnit.c @@ -9,6 +9,7 @@ #include "EbUtility.h" #include "EbTransformUnit.h" #include "EbPictureControlSet.h" +#include "EbThreads.h" /* Tasks & Questions @@ -19,6 +20,28 @@ Tasks & Questions -Need a ReconPicture for each candidate. -I don't see a way around doing the copies in temp memory and then copying it in... */ +EB_ERRORTYPE RCStatRowCtor( + RCStatRow_t **rcStatRowDblPtr, + EB_U16 rowIndex) +{ + EB_ERRORTYPE return_error = EB_ErrorNone; + RCStatRow_t *rcStatRowPtr; + EB_MALLOC(RCStatRow_t*, rcStatRowPtr, sizeof(RCStatRow_t), EB_N_PTR); + *rcStatRowDblPtr = rcStatRowPtr; + rcStatRowPtr->rowIndex = rowIndex; + rcStatRowPtr->predictedBits = 0; + rcStatRowPtr->encodedBits = 0; + rcStatRowPtr->rowQp = 0; + rcStatRowPtr->totalCUEncoded = 0; + rcStatRowPtr->lastEncodedCU = 0; + EB_CREATEMUTEX(EB_HANDLE, rcStatRowPtr->rowUpdateMutex, sizeof(EB_HANDLE), EB_MUTEX); + if (return_error == EB_ErrorInsufficientResources) { + return EB_ErrorInsufficientResources; + } + + return EB_ErrorNone; +} + EB_ERRORTYPE LargestCodingUnitCtor( LargestCodingUnit_t **largetCodingUnitDblPtr, EB_U8 lcuSize, @@ -64,7 +87,10 @@ EB_ERRORTYPE LargestCodingUnitCtor( largestCodingUnitPtr->originY = lcuOriginY; largestCodingUnitPtr->index = lcuIndex; - + largestCodingUnitPtr->proxytotalBits = 0; + largestCodingUnitPtr->rowInd = 0; + largestCodingUnitPtr->intraSadInterval = 0; + largestCodingUnitPtr->interSadInterval = 0; EB_MALLOC(CodingUnit_t**, largestCodingUnitPtr->codedLeafArrayPtr, sizeof(CodingUnit_t*) * CU_MAX_COUNT, EB_N_PTR); for(codedLeafIndex=0; codedLeafIndex < CU_MAX_COUNT; ++codedLeafIndex) { EB_MALLOC(CodingUnit_t*, largestCodingUnitPtr->codedLeafArrayPtr[codedLeafIndex], sizeof(CodingUnit_t) , EB_N_PTR); diff --git a/Source/Lib/Codec/EbCodingUnit.h b/Source/Lib/Codec/EbCodingUnit.h index 51fadfcda..ace8aa280 100644 --- a/Source/Lib/Codec/EbCodingUnit.h +++ b/Source/Lib/Codec/EbCodingUnit.h @@ -204,6 +204,11 @@ typedef struct LargestCodingUnit_s { //Bits only used for quantized coeffs EB_U32 quantizedCoeffsBits; EB_U32 totalBits; + EB_U32 proxytotalBits; + EB_U32 rowInd; + EB_U32 intraSadInterval; + EB_U32 interSadInterval; + EB_U8 fullLcu; // Quantized Coefficients EbPictureBufferDesc_t *quantizedCoeff; @@ -223,6 +228,23 @@ typedef struct LargestCodingUnit_s { +/************************************** + * Low level vbv + **************************************/ +typedef struct RCStatRow_s +{ + EB_U16 rowIndex; + EB_U32 encodedBits; /* sum of 'totalBits' of encoded LCUs */ + EB_U32 predictedBits; + EB_U32 rowQp; + EB_U32 totalCUEncoded; /*Tracks number of LCUs encoded in each row*/ + EB_U32 lastEncodedCU; /*Tracks the address of last encoded CU*/ + EB_HANDLE rowUpdateMutex; +}RCStatRow_t; + + +extern EB_ERRORTYPE RCStatRowCtor( +RCStatRow_t **rcStatRowDblPtr, EB_U16 rowIndex); extern EB_ERRORTYPE LargestCodingUnitCtor( diff --git a/Source/Lib/Codec/EbDefinitions.h b/Source/Lib/Codec/EbDefinitions.h index a1700a758..f2e451486 100644 --- a/Source/Lib/Codec/EbDefinitions.h +++ b/Source/Lib/Codec/EbDefinitions.h @@ -1052,6 +1052,11 @@ typedef enum EB_SEI { #define MIN_QP_VALUE 0 #define MAX_QP_VALUE 51 #define MAX_CHROMA_MAP_QP_VALUE 57 +#define BASE_FRAME_DURATION 0.04 +#define MIN_FRAME_DURATION 0.01 +#define MAX_FRAME_DURATION 1.00 +#define SAD_SATD_CONSTANT 5 + //***Transforms*** @@ -1091,6 +1096,10 @@ typedef enum EB_SEI { #define BR_SHIFT 6 #define CPB_SHIFT 4 +#define STEP_SIZE 1 +#define RC_TOL 0 +#define RC_TOL_FACTOR 0 + // INTRA restriction for global motion #define INTRA_GLOBAL_MOTION_NON_MOVING_INDEX_TH 2 #define INTRA_GLOBAL_MOTION_DARK_LCU_TH 50 diff --git a/Source/Lib/Codec/EbEncDecProcess.c b/Source/Lib/Codec/EbEncDecProcess.c index a19a44dfb..45ade1371 100644 --- a/Source/Lib/Codec/EbEncDecProcess.c +++ b/Source/Lib/Codec/EbEncDecProcess.c @@ -5,6 +5,7 @@ #include + #include "EbTransforms.h" #include "EbEncDecTasks.h" #include "EbEncDecResults.h" @@ -13,7 +14,7 @@ #include "EbSampleAdaptiveOffset.h" #include "EbErrorCodes.h" #include "EbErrorHandling.h" - +#include "EbPerFramePrediction.h" void PrecomputeCabacCost(CabacCost_t *CabacCostPtr, CabacEncodeContext_t *cabacEncodeCtxPtr); @@ -200,6 +201,7 @@ EB_ERRORTYPE EncDecContextCtor( contextPtr->saoLeftBuffer16[1] = contextPtr->saoLeftBuffer16[0] + (MAX_LCU_SIZE + 2); } + return EB_ErrorNone; } @@ -1344,6 +1346,48 @@ static void ResetEncodePassNeighborArrays(PictureControlSet_t *pictureControlSet return; } +//Reset Proxy Neighbor arrays in Encdec +static void EntropyCodingResetTempNeighborArrays(PictureControlSet_t *pictureControlSetPtr, EB_U16 tileIdx) +{ + NeighborArrayUnitReset(pictureControlSetPtr->tempModeTypeNeighborArray[tileIdx]); + NeighborArrayUnitReset(pictureControlSetPtr->tempLeafDepthNeighborArray[tileIdx]); + NeighborArrayUnitReset(pictureControlSetPtr->tempIntraLumaModeNeighborArray[tileIdx]); + NeighborArrayUnitReset(pictureControlSetPtr->tempSkipFlagNeighborArray[tileIdx]); + + return; +} + +static void ResetTempEntropy( + PictureControlSet_t *pictureControlSetPtr, + SequenceControlSet_t *sequenceControlSetPtr) { + EB_U32 tileCnt = pictureControlSetPtr->ParentPcsPtr->tileRowCount * pictureControlSetPtr->ParentPcsPtr->tileColumnCount; + EB_U32 tileIdx = 0; + EB_U32 entropyCodingQp = pictureControlSetPtr->pictureQp; + for (tileIdx = 0; tileIdx < tileCnt; tileIdx++) { + ResetEntropyCoder( + sequenceControlSetPtr->encodeContextPtr, + pictureControlSetPtr->entropyCodingInfo[tileIdx]->tempEntropyCoderPtr, + entropyCodingQp, + pictureControlSetPtr->sliceType); + + pictureControlSetPtr->tempprevCodedQp[tileIdx] = pictureControlSetPtr->pictureQp; + pictureControlSetPtr->tempprevQuantGroupCodedQp[tileIdx] = pictureControlSetPtr->pictureQp; + EntropyCodingResetTempNeighborArrays(pictureControlSetPtr, tileIdx); + } +} + +static void ResetRowStats( + PictureControlSet_t *pictureControlSetPtr,SequenceControlSet_t *sequenceControlSetPtr) { + EB_U8 lcuSize = (EB_U8)sequenceControlSetPtr->lcuSize; + EB_U8 lcuSizeLog2 = (EB_U8)Log2f(lcuSize); + for (EB_U8 row = 0; row < ((sequenceControlSetPtr->lumaHeight + lcuSize - 1) >> lcuSizeLog2); row++) { + EbBlockOnMutex(pictureControlSetPtr->rowStats[row]->rowUpdateMutex); + pictureControlSetPtr->rowStats[row]->totalCUEncoded = 0; + pictureControlSetPtr->rowStats[row]->encodedBits = 0; + pictureControlSetPtr->rowStats[row]->lastEncodedCU = 0; + EbReleaseMutex(pictureControlSetPtr->rowStats[row]->rowUpdateMutex); + } +} /************************************************** * Reset Coding Loop **************************************************/ @@ -1446,7 +1490,7 @@ static void EncDecConfigureLcu( { //RC is off - if (sequenceControlSetPtr->staticConfig.rateControlMode == 0 && sequenceControlSetPtr->staticConfig.improveSharpness == 0 && sequenceControlSetPtr->staticConfig.bitRateReduction == 0) { + if ((sequenceControlSetPtr->staticConfig.rateControlMode != 1) && sequenceControlSetPtr->staticConfig.improveSharpness == 0 && sequenceControlSetPtr->staticConfig.bitRateReduction == 0) { contextPtr->qp = pictureQp; } //RC is on @@ -2614,6 +2658,138 @@ static EB_ERRORTYPE SignalDerivationEncDecKernelOq( return return_error; } +EB_U32 predBitsPerLcu(PictureControlSet_t* pictureControlSetPtr, EncodeContext_t* encodeContextPtr, LargestCodingUnit_t* lcuPtr, EB_U8 qpVbv) +{ + EB_U32 sadBits; + EB_U32 intraSadBits; + EB_U32 interSadBits; + RateControlTables_t *rateControlTablesPtr; + EB_Bit_Number *sadBitsArrayPtr; + EB_Bit_Number *intraSadBitsArrayPtr; + + rateControlTablesPtr = &encodeContextPtr->rateControlTablesArray[qpVbv]; + sadBitsArrayPtr = rateControlTablesPtr->sadBitsArray[pictureControlSetPtr->temporalLayerIndex]; + intraSadBitsArrayPtr = rateControlTablesPtr->intraSadBitsArray[0]; + + if (pictureControlSetPtr->sliceType == EB_I_PICTURE) { + if (lcuPtr->fullLcu) { + intraSadBits = intraSadBitsArrayPtr[lcuPtr->intraSadInterval]; + interSadBits = 0; + sadBits = intraSadBits; + } + else + return 0; + } + else + { + if (lcuPtr->fullLcu) { + intraSadBits = intraSadBitsArrayPtr[lcuPtr->intraSadInterval]; + interSadBits = sadBitsArrayPtr[lcuPtr->interSadInterval]; + sadBits = interSadBits; + if (interSadBits > (intraSadBits * 3)) + sadBits = intraSadBits; + } + else + return 0; + } + return sadBits; +} + +EB_U64 predictRowsSizeSum(PictureControlSet_t* pictureControlSetPtr, SequenceControlSet_t* sequenceControlSetPtr, EB_U8 qpVbv, EB_U64 *encodedBitsSoFar) +{ + EB_U64 predictedBitsForFrame = 0; + EB_U64 predictedBitsDoneSoFar = 0; + *encodedBitsSoFar = 0; + EB_U64 framesizeEstimated = 0; + HlRateControlHistogramEntry_t *hlRateControlHistogramPtrTemp; + EB_U8 lcuSizeLog2 = (EB_U8)Log2f(sequenceControlSetPtr->lcuSize); + EB_U8 pictureWidthInLcu = (sequenceControlSetPtr->lumaWidth + sequenceControlSetPtr->lcuSize - 1) >> lcuSizeLog2; + EB_U8 pictureHeightInLcu = (sequenceControlSetPtr->lumaHeight + sequenceControlSetPtr->lcuSize - 1) >> lcuSizeLog2; + + hlRateControlHistogramPtrTemp = (sequenceControlSetPtr->encodeContextPtr->hlRateControlHistorgramQueue[pictureControlSetPtr->ParentPcsPtr->hlHistogramQueueIndex]); + for (EB_U8 row = 0; row < pictureHeightInLcu; row++) { + pictureControlSetPtr->rowStats[row]->predictedBits = 0; + for (EB_U8 col = 0; col < pictureControlSetPtr->rowStats[row]->totalCUEncoded; col++) { + pictureControlSetPtr->rowStats[row]->predictedBits += predBitsPerLcu(pictureControlSetPtr, sequenceControlSetPtr->encodeContextPtr,pictureControlSetPtr->lcuPtrArray[row*pictureWidthInLcu+col],qpVbv); + } + *encodedBitsSoFar += pictureControlSetPtr->rowStats[row]->encodedBits; + predictedBitsDoneSoFar += pictureControlSetPtr->rowStats[row]->predictedBits; + } + predictedBitsForFrame = predictBits(sequenceControlSetPtr, sequenceControlSetPtr->encodeContextPtr, hlRateControlHistogramPtrTemp, qpVbv); + framesizeEstimated = (predictedBitsForFrame - predictedBitsDoneSoFar) + (*encodedBitsSoFar); + return framesizeEstimated; +} + +EB_U8 RowVbvRateControl(PictureControlSet_t *pictureControlSetPtr, + SequenceControlSet_t *sequenceControlSetPtr, + RCStatRow_t *rowPtr, + EncodeContext_t *rcData, + EB_U8 qpVbv) +{ + /* tweak quality based on difference from predicted size */ + EB_U8 prevRowQp= qpVbv; + EB_U8 lcuSizeLog2 = (EB_U8)Log2f(sequenceControlSetPtr->lcuSize); + EB_U8 pictureHeightInLcu = (sequenceControlSetPtr->lumaHeight + sequenceControlSetPtr->lcuSize - 1) >> lcuSizeLog2; + EB_U8 qpAbsoluteMax = sequenceControlSetPtr->staticConfig.maxQpAllowed; + EB_U8 qpAbsoluteMin = sequenceControlSetPtr->staticConfig.minQpAllowed; + EB_U8 qpMax = MIN(prevRowQp + 4, qpAbsoluteMax); + EB_U8 qpMin = MAX(prevRowQp - 4, qpAbsoluteMin); + + EB_U64 bufferLeftPlanned = pictureControlSetPtr->bufferFillPerFrame - pictureControlSetPtr->frameSizePlanned; + if (rowPtr->rowIndex < pictureHeightInLcu) { + //There is no tolerance limit allowed in low level RC as of now. + EB_U64 rcTol = RC_TOL; + EB_U64 encodedBitsSoFar = 0; + EB_U64 accFrameBits = predictRowsSizeSum(pictureControlSetPtr, sequenceControlSetPtr, qpVbv, &encodedBitsSoFar); + + + /* Don't increase the row QPs until a sufficent amount of the bits of + the frame have been processed, in case a flat area at the top of the + frame was measured inaccurately. */ + if (encodedBitsSoFar < (EB_U64)(0.05f * pictureControlSetPtr->frameSizePlanned)) + qpMax = qpAbsoluteMax = prevRowQp; + + if (pictureControlSetPtr->sliceType!= EB_I_PICTURE) + rcTol *= RC_TOL_FACTOR; + + if(sequenceControlSetPtr->targetBitrate <= sequenceControlSetPtr->encodeContextPtr->vbvMaxrate) + qpMin = MAX(qpMin, pictureControlSetPtr->qpNoVbv); + + //Increase the Qp when the current frame size exceeds the estimated frame size + while (qpVbv < qpMax + && (((accFrameBits > pictureControlSetPtr->frameSizePlanned + rcTol) || + (pictureControlSetPtr->bufferFillPerFrame - accFrameBits < (EB_U64)(bufferLeftPlanned * 0.5)) || + (accFrameBits > pictureControlSetPtr->frameSizePlanned && qpVbv < pictureControlSetPtr->qpNoVbv) + ))) { + qpVbv += STEP_SIZE; + encodedBitsSoFar = 0; + accFrameBits = predictRowsSizeSum(pictureControlSetPtr, sequenceControlSetPtr, qpVbv, &encodedBitsSoFar); + } + + //Decrease the Qp when the current frame size is lower that that of the estimated frame size + while (qpVbv > qpMin + && (qpVbv > pictureControlSetPtr->rowStats[0]->rowQp ) + && (((accFrameBits < (EB_U64)(pictureControlSetPtr->frameSizePlanned * 0.8f) && qpVbv <= prevRowQp) + || (EB_S64)accFrameBits < (EB_S64)((EB_S64)(pictureControlSetPtr->bufferFillPerFrame - (rcData->vbvBufsize + (rcData->vbvMaxrate / (sequenceControlSetPtr->staticConfig.frameRate >> 16)))) * 1.1)) + )) { + qpVbv -= STEP_SIZE; + encodedBitsSoFar = 0; + accFrameBits = predictRowsSizeSum(pictureControlSetPtr, sequenceControlSetPtr, qpVbv, &encodedBitsSoFar); + } + + /* avoid VBV underflow */ + while ((qpVbv < qpAbsoluteMax) + && (pictureControlSetPtr->bufferFillPerFrame - accFrameBits < (rcData->vbvMaxrate / (sequenceControlSetPtr->staticConfig.frameRate >> 16)))) { + qpVbv += STEP_SIZE; + accFrameBits = predictRowsSizeSum(pictureControlSetPtr, sequenceControlSetPtr, qpVbv, &encodedBitsSoFar); + } + pictureControlSetPtr->frameSizeEstimated = accFrameBits; + + + } + + return qpVbv; +} /****************************************************** * EncDec Kernel @@ -2652,6 +2828,9 @@ void* EncDecKernel(void *inputPtr) EB_U32 lcuRowIndexCount; EB_U32 tileGroupWidthInLcu; MdcLcuData_t *mdcPtr; + + //Row level vbv controls + RCStatRow_t *rowPtr; // Variables EB_BOOL enableSaoFlag = EB_TRUE; EB_BOOL is16bit; @@ -2671,6 +2850,13 @@ void* EncDecKernel(void *inputPtr) EB_U32 tileGroupIdx; EB_U32 tileGroupLcuStartX, tileGroupLcuStartY; + //Proxy entropy coding + EbPictureBufferDesc_t * tempCoeffPicturePtr; + EB_U32 tempWrittenBitsBeforeQuantizedCoeff; + EB_U32 tempWrittenBitsAfterQuantizedCoeff; + EB_U8 baseQp; + EB_U32 lcuWidth; + EB_U32 lcuHeight; for (;;) { @@ -2705,6 +2891,7 @@ void* EncDecKernel(void *inputPtr) lcuSizeLog2 = (EB_U8)Log2f(lcuSize); contextPtr->lcuSize = lcuSize; tileGroupWidthInLcu = ppcsPtr->tileGroupInfoArray[tileGroupIdx].tileGroupWidthInLcu; + contextPtr->totIntraCodedArea = 0; contextPtr->codedLcuCount = 0; @@ -2812,6 +2999,15 @@ void* EncDecKernel(void *inputPtr) contextPtr, pictureControlSetPtr, sequenceControlSetPtr); + + //Reset Stats required for low level vbv + if (segmentIndex == 0) { + // Reset for the tiles in the 1st row. + if (tileGroupIdx == 0) { + ResetTempEntropy(pictureControlSetPtr, sequenceControlSetPtr); + ResetRowStats(pictureControlSetPtr,sequenceControlSetPtr); + } + } contextPtr->mdContext->CabacCost = pictureControlSetPtr->cabacCost; @@ -2966,6 +3162,30 @@ void* EncDecKernel(void *inputPtr) } } + //Block level vbv tuning starts here + if (sequenceControlSetPtr->staticConfig.lowLevelVbv && sequenceControlSetPtr->staticConfig.lookAheadDistance > 0) { + EbBlockOnMutex(pictureControlSetPtr->rowStats[yLcuIndex]->rowUpdateMutex); + rowPtr = pictureControlSetPtr->rowStats[yLcuIndex]; + rowPtr->rowIndex = yLcuIndex; + lcuPtr->rowInd = yLcuIndex; + if (!yLcuIndex) + pictureControlSetPtr->rowStats[rowPtr->rowIndex]->rowQp = pictureControlSetPtr->pictureQp; + + //Assign the base qp for the LCU + if (xLcuIndex <= yLcuIndex && yLcuIndex) + lcuPtr->qp = pictureControlSetPtr->lcuPtrArray[lcuIndex - ppcsPtr->pictureWidthInLcu]->qp; + else + lcuPtr->qp = pictureControlSetPtr->rowStats[rowPtr->rowIndex]->rowQp; + + // If current block is at row diagonal checkpoint, call vbv ratecontrol. + if (xLcuIndex == yLcuIndex && yLcuIndex) + { + baseQp = RowVbvRateControl(pictureControlSetPtr, sequenceControlSetPtr, rowPtr, sequenceControlSetPtr->encodeContextPtr, lcuPtr->qp); + lcuPtr->qp = CLIP3(sequenceControlSetPtr->staticConfig.minQpAllowed, sequenceControlSetPtr->staticConfig.maxQpAllowed, baseQp); + rowPtr->rowQp = lcuPtr->qp; + } + EbReleaseMutex(pictureControlSetPtr->rowStats[yLcuIndex]->rowUpdateMutex); + } // Configure the LCU EncDecConfigureLcu( // HT done @@ -2979,7 +3199,7 @@ void* EncDecKernel(void *inputPtr) // Encode Pass EncodePass( // HT done - sequenceControlSetPtr, + sequenceControlSetPtr, pictureControlSetPtr, lcuPtr, lcuIndex, @@ -2988,7 +3208,55 @@ void* EncDecKernel(void *inputPtr) lcuPtr->qp, enableSaoFlag, contextPtr); - + if (sequenceControlSetPtr->staticConfig.lowLevelVbv && sequenceControlSetPtr->staticConfig.lookAheadDistance > 0) { + /*Entropy Estimation for LCU*/ + tempCoeffPicturePtr = lcuPtr->quantizedCoeff; + tempWrittenBitsBeforeQuantizedCoeff = ((OutputBitstreamUnit_t*)EntropyCoderGetBitstreamPtr(pictureControlSetPtr->entropyCodingInfo[tileGroupIdx]->tempEntropyCoderPtr))->writtenBitsCount + + 32 - ((CabacEncodeContext_t*)pictureControlSetPtr->entropyCodingInfo[tileGroupIdx]->tempEntropyCoderPtr->cabacEncodeContextPtr)->bacEncContext.bitsRemainingNum + + (((CabacEncodeContext_t*)pictureControlSetPtr->entropyCodingInfo[tileGroupIdx]->tempEntropyCoderPtr->cabacEncodeContextPtr)->bacEncContext.tempBufferedBytesNum << 3); + EstimateLcu( + lcuPtr, + lcuOriginX, + lcuOriginY, + pictureControlSetPtr, + sequenceControlSetPtr->lcuSize, + pictureControlSetPtr->entropyCodingInfo[tileGroupIdx]->tempEntropyCoderPtr, + tempCoeffPicturePtr, + pictureControlSetPtr->tempModeTypeNeighborArray[tileGroupIdx], + pictureControlSetPtr->tempLeafDepthNeighborArray[tileGroupIdx], + pictureControlSetPtr->tempIntraLumaModeNeighborArray[tileGroupIdx], + pictureControlSetPtr->tempSkipFlagNeighborArray[tileGroupIdx], + tileGroupIdx, + 0, + 0); + + tempWrittenBitsAfterQuantizedCoeff = ((OutputBitstreamUnit_t*)EntropyCoderGetBitstreamPtr(pictureControlSetPtr->entropyCodingInfo[tileGroupIdx]->tempEntropyCoderPtr))->writtenBitsCount + + 32 - ((CabacEncodeContext_t*)pictureControlSetPtr->entropyCodingInfo[tileGroupIdx]->tempEntropyCoderPtr->cabacEncodeContextPtr)->bacEncContext.bitsRemainingNum + + (((CabacEncodeContext_t*)pictureControlSetPtr->entropyCodingInfo[tileGroupIdx]->tempEntropyCoderPtr->cabacEncodeContextPtr)->bacEncContext.tempBufferedBytesNum << 3); + lcuPtr->proxytotalBits = tempWrittenBitsAfterQuantizedCoeff - tempWrittenBitsBeforeQuantizedCoeff; + lcuWidth = (sequenceControlSetPtr->lumaWidth - lcuOriginX) < (EB_U16)MAX_LCU_SIZE ? (sequenceControlSetPtr->lumaWidth - lcuOriginX) : (EB_U16)MAX_LCU_SIZE; + lcuHeight = (sequenceControlSetPtr->lumaHeight - lcuOriginY) < (EB_U16)MAX_LCU_SIZE ? (sequenceControlSetPtr->lumaHeight - lcuOriginY) : (EB_U16)MAX_LCU_SIZE; + lcuPtr->fullLcu = 0; + if (pictureControlSetPtr->sliceType == EB_I_PICTURE) { + if (lcuWidth == MAX_LCU_SIZE && lcuHeight == MAX_LCU_SIZE) { + lcuPtr->intraSadInterval = pictureControlSetPtr->ParentPcsPtr->intraSadIntervalIndex[lcuIndex]; + lcuPtr->fullLcu = 1; + } + } + else { + if (lcuWidth == MAX_LCU_SIZE && lcuHeight == MAX_LCU_SIZE) { + lcuPtr->intraSadInterval = pictureControlSetPtr->ParentPcsPtr->intraSadIntervalIndex[lcuIndex]; + lcuPtr->interSadInterval = pictureControlSetPtr->ParentPcsPtr->interSadIntervalIndex[lcuIndex]; + lcuPtr->fullLcu = 1; + } + } + //Update LCU Stats for row level vbv control + EbBlockOnMutex(pictureControlSetPtr->rowStats[yLcuIndex]->rowUpdateMutex); + pictureControlSetPtr->rowStats[yLcuIndex]->encodedBits += lcuPtr->proxytotalBits; + pictureControlSetPtr->rowStats[yLcuIndex]->totalCUEncoded++; + pictureControlSetPtr->rowStats[yLcuIndex]->lastEncodedCU = lcuPtr->index; + EbReleaseMutex(pictureControlSetPtr->rowStats[yLcuIndex]->rowUpdateMutex); + } if (pictureControlSetPtr->ParentPcsPtr->referencePictureWrapperPtr != NULL){ ((EbReferenceObject_t*)pictureControlSetPtr->ParentPcsPtr->referencePictureWrapperPtr->objectPtr)->intraCodedAreaLCU[lcuIndex] = (EB_U8)((100 * contextPtr->intraCodedAreaLCU[lcuIndex]) / (64 * 64)); } diff --git a/Source/Lib/Codec/EbEncHandle.c b/Source/Lib/Codec/EbEncHandle.c index 5e799f019..5d6b78722 100644 --- a/Source/Lib/Codec/EbEncHandle.c +++ b/Source/Lib/Codec/EbEncHandle.c @@ -2122,7 +2122,7 @@ void LoadDefaultBufferConfigurationSettings( EB_U32 meSegH = (((sequenceControlSetPtr->maxInputLumaHeight + 32) / MAX_LCU_SIZE) < 6) ? 1 : 6; EB_U32 meSegW = (((sequenceControlSetPtr->maxInputLumaWidth + 32) / MAX_LCU_SIZE) < 10) ? 1 : 10; - EB_U16 tileColCount = sequenceControlSetPtr->staticConfig.tileColumnCount; + // EB_U16 tileColCount = sequenceControlSetPtr->staticConfig.tileColumnCount; EB_U16 tileRowCount = sequenceControlSetPtr->staticConfig.tileRowCount; EB_U32 inputPic = SetParentPcs(&sequenceControlSetPtr->staticConfig); @@ -2578,7 +2578,7 @@ void CopyApiFromApp( sequenceControlSetPtr->chromaFormatIdc = (EB_U32)(sequenceControlSetPtr->staticConfig.encoderColorFormat); sequenceControlSetPtr->encoderBitDepth = (EB_U32)(sequenceControlSetPtr->staticConfig.encoderBitDepth); sequenceControlSetPtr->enableTmvpSps = sequenceControlSetPtr->staticConfig.unrestrictedMotionVector; - + // Copying to masteringDisplayColorVolume structure sequenceControlSetPtr->masteringDisplayColorVolume.displayPrimaryX[0] = sequenceControlSetPtr->staticConfig.displayPrimaryX[0]; sequenceControlSetPtr->masteringDisplayColorVolume.displayPrimaryX[1] = sequenceControlSetPtr->staticConfig.displayPrimaryX[1]; @@ -2905,6 +2905,16 @@ static EB_ERRORTYPE VerifySettings(\ SVT_LOG("SVT [Error]: Instance %u: Invalid vbvBufInit [0 - 100]\n", channelNumber + 1); return_error = EB_ErrorBadParameter; } + + if (config->lowLevelVbv > 1) { + SVT_LOG("SVT [Error]: Instance %u : Invalid lowLevelVbv flag [0 - 1]\n", channelNumber + 1); + return_error = EB_ErrorBadParameter; + } + + if (config->lowLevelVbv == 1 && ((config->vbvBufsize <= 0) || (config->vbvMaxrate <= 0))) { + SVT_LOG("SVT [Error]: Instance %u: Enabling Low level vbv requires Frame Level Vbv to be enabled", channelNumber + 1); + return_error = EB_ErrorBadParameter; + } if ( config->enableSaoFlag > 1) { SVT_LOG("SVT [Error]: Instance %u: Invalid SAO. SAO range must be [0 - 1]\n",channelNumber+1); @@ -3005,13 +3015,20 @@ static EB_ERRORTYPE VerifySettings(\ } if (config->constrainedIntra > 1) { SVT_LOG("SVT [Error]: Instance %u: The constrained intra must be [0 - 1] \n", channelNumber + 1); - return_error = EB_ErrorBadParameter; + return_error = EB_ErrorBadParameter; } - if (config->rateControlMode > 1) { - SVT_LOG("SVT [Error]: Instance %u: The rate control mode must be [0 - 1] \n", channelNumber + 1); + if (config->rateControlMode > 2) { + SVT_LOG("SVT [Error]: Instance %u: The rate control mode must be [0 - 2] \n", channelNumber + 1); + return_error = EB_ErrorBadParameter; + } + if (((config->rateControlMode == 0) || (config->rateControlMode == 1)) && (config->crf != 28)) { + SVT_LOG("SVT [Warning]: Instance %u: The crf setting with %u wouldn't take effect with rc %u\n", + channelNumber + 1, config->crf, config->rateControlMode); + } + if ((config->rateControlMode == 2) && (config->crf > 51)) { + SVT_LOG("SVT [Error]: Instance %u:The crf value must be [0-51] \n", channelNumber + 1); return_error = EB_ErrorBadParameter; } - if (config->lookAheadDistance > 250 && config->lookAheadDistance != (EB_U32)~0) { SVT_LOG("SVT [Error]: Instance %u: The lookahead distance must be [0 - 250] \n", channelNumber + 1); return_error = EB_ErrorBadParameter; @@ -3390,9 +3407,12 @@ static void PrintLibParams( SVT_LOG("\nSVT [config]: HierarchicalLevels / BaseLayerSwitchMode / PredStructure\t\t\t: %d / %d / %d ", config->hierarchicalLevels, config->baseLayerSwitchMode, config->predStructure); if (config->rateControlMode == 1) - SVT_LOG("\nSVT [config]: RCMode / TargetBitrate / LAD / SceneChange / QP Range [%u ~ %u]\t\t: VBR / %d / %d / %d ", config->minQpAllowed, config->maxQpAllowed, config->targetBitRate, config->lookAheadDistance, config->sceneChangeDetection); + SVT_LOG("\nSVT [config]: RCMode / TargetBitrate / LAD / SceneChange / QP Range [%u ~ %u]\t\t: VBR / %d / %d / %d ", + config->minQpAllowed, config->maxQpAllowed, config->targetBitRate, config->lookAheadDistance, config->sceneChangeDetection); + else if (config->rateControlMode == 2) + SVT_LOG("\nSVT [config]: RCMode / TargetQuality / LAD / SceneChange\t\t: CRF / %d / %d / %d ", config->crf, config->lookAheadDistance, config->sceneChangeDetection); else - SVT_LOG("\nSVT [config]: BRC Mode / QP / LookaheadDistance / SceneChange\t\t\t\t: CQP / %d / %d / %d ", config->qp, config->lookAheadDistance, config->sceneChangeDetection); + SVT_LOG("\nSVT [config]: RCMode / QP / LAD / SceneChange\t\t\t\t: CQP / %d / %d / %d ", config->qp, config->lookAheadDistance, config->sceneChangeDetection); if (config->tune <= 1) SVT_LOG("\nSVT [config]: BitRateReduction / ImproveSharpness\t\t\t\t\t: %d / %d ", config->bitRateReduction, config->improveSharpness); diff --git a/Source/Lib/Codec/EbEntropyCoding.c b/Source/Lib/Codec/EbEntropyCoding.c index a32231955..b98153ea3 100644 --- a/Source/Lib/Codec/EbEntropyCoding.c +++ b/Source/Lib/Codec/EbEntropyCoding.c @@ -5055,7 +5055,7 @@ EB_ERRORTYPE ComputeProfileTierLevelInfo( scsPtr->profileIdc = scsPtr->staticConfig.profile; - if (scsPtr->staticConfig.rateControlMode == 0){ + if (scsPtr->staticConfig.rateControlMode != 1){ // level calculation if ((lumaSampleRate <= maxLumaSampleRate[0]) && (lumaPictureSize <= maxLumaPictureSize[0]) && (lumaWidthSquare <= maxLumaPictureSize[0] * 8) && (lumaHeightSquare <= maxLumaPictureSize[0] * 8)) scsPtr->levelIdc = 30; //1*30 @@ -5472,16 +5472,15 @@ static void CodeVPS( // vps_poc_proportional_to_timing_flag WriteFlagCavlc( - bitstreamPtr, - 0); + bitstreamPtr, + 0); // vps_num_hrd_parameters WriteUvlc( - bitstreamPtr, - 0); + bitstreamPtr, + 0); } - // "vps_extension_flag" WriteFlagCavlc( bitstreamPtr, @@ -6240,9 +6239,9 @@ static void CodePPS( // "cu_qp_delta_enabled_flag" WriteFlagCavlc( bitstreamPtr, - scsPtr->staticConfig.improveSharpness || scsPtr->staticConfig.bitRateReduction);// pcsPtr->useDeltaQp); + scsPtr->staticConfig.improveSharpness || scsPtr->staticConfig.bitRateReduction ||(scsPtr->staticConfig.lowLevelVbv));// pcsPtr->useDeltaQp); - if (scsPtr->staticConfig.improveSharpness || scsPtr->staticConfig.bitRateReduction) { //pcsPtr->useDeltaQp) { + if (scsPtr->staticConfig.improveSharpness || scsPtr->staticConfig.bitRateReduction|| (scsPtr->staticConfig.lowLevelVbv)) { //pcsPtr->useDeltaQp) { // "diff_cu_qp_delta_depth" WriteUvlc( bitstreamPtr, @@ -7324,6 +7323,471 @@ static EB_ERRORTYPE Intra4x4EncodeCoeff( return return_error; } +EB_ERRORTYPE EstimateLcu ( + LargestCodingUnit_t *tbPtr, + EB_U32 lcuOriginX, + EB_U32 lcuOriginY, + PictureControlSet_t *pictureControlSetPtr, + EB_U32 lcuSize, + EntropyCoder_t *entropyCoderPtr, + EbPictureBufferDesc_t *coeffPtr, + NeighborArrayUnit_t *modeTypeNeighborArray, + NeighborArrayUnit_t *leafDepthNeighborArray, + NeighborArrayUnit_t *intraLumaModeNeighborArray, + NeighborArrayUnit_t *skipFlagNeighborArray, + EB_U16 tileIdx, + EB_U32 pictureOriginX, + EB_U32 pictureOriginY) +{ + EB_ERRORTYPE return_error = EB_ErrorNone; + + CabacEncodeContext_t *cabacEncodeCtxPtr = (CabacEncodeContext_t*)entropyCoderPtr->cabacEncodeContextPtr; + SequenceControlSet_t *sequenceControlSetPtr = (SequenceControlSet_t*)pictureControlSetPtr->sequenceControlSetWrapperPtr->objectPtr; + EncodeContext_t *encodeContextPtr = ((SequenceControlSet_t*)(pictureControlSetPtr->sequenceControlSetWrapperPtr->objectPtr))->encodeContextPtr; + + // CU Varaiables + const CodedUnitStats_t *cuStatsPtr; + CodingUnit_t *cuPtr; + EB_U32 cuIndex = 0; + EB_REFLIST refList; + EB_U32 maxRefList; + EB_U32 cuOriginX; + EB_U32 cuOriginY; + EB_U32 cuSize; + EB_U8 cuDepth; + EB_BOOL availableCoeff; + cabacEncodeCtxPtr->colorFormat = pictureControlSetPtr->colorFormat; + + // PU Varaiables + PredictionUnit_t *puPtr; + EB_U32 cuQuantizedCoeffsBits; + EB_U32 log2MinCuQpDeltaSize = Log2f(MAX_LCU_SIZE) - pictureControlSetPtr->difCuDeltaQpDepth; + + tbPtr->quantizedCoeffsBits = 0; + EB_BOOL entropyDeltaQpNotCoded = EB_TRUE; + EB_BOOL deltaQpNotCoded = EB_TRUE; + EB_BOOL checkCuOutOfBound = EB_FALSE; + LcuParams_t * lcuParam = &sequenceControlSetPtr->lcuParamsArray[tbPtr->index]; + if (!(lcuParam->isCompleteLcu)) { + checkCuOutOfBound = EB_TRUE; + } + do { + EB_BOOL codeCuCond = EB_TRUE; // Code cu only if it is inside the picture + cuPtr = tbPtr->codedLeafArrayPtr[cuIndex]; + + if (checkCuOutOfBound) + codeCuCond = (EB_BOOL)lcuParam->rasterScanCuValidity[MD_SCAN_TO_RASTER_SCAN[cuIndex]]; // check if cu is inside the picture + + if (codeCuCond) { + // CU Stats + cuStatsPtr = GetCodedUnitStats(cuIndex); + cuSize = cuStatsPtr->size; + cuOriginX = lcuOriginX + cuStatsPtr->originX; + cuOriginY = lcuOriginY + cuStatsPtr->originY; + cuDepth = (EB_U8)cuStatsPtr->depth; + + // Code Split Flag + EncodeSplitFlag( + cabacEncodeCtxPtr, + cuDepth, + pictureControlSetPtr->lcuMaxDepth, + (EB_BOOL)cuPtr->splitFlag, + cuOriginX, + cuOriginY, + modeTypeNeighborArray, + leafDepthNeighborArray); + if (cuStatsPtr->sizeLog2 >= log2MinCuQpDeltaSize) { + deltaQpNotCoded = EB_TRUE; + } + if (((cuStatsPtr->originY & ((1 << log2MinCuQpDeltaSize) - 1)) == 0) && + ((cuStatsPtr->originX & ((1 << log2MinCuQpDeltaSize) - 1)) == 0)) { + deltaQpNotCoded = EB_TRUE; + } + + if (cuPtr->splitFlag == EB_FALSE) { + if (cuPtr->predictionModeFlag == INTRA_MODE && + cuPtr->predictionUnitArray->intraLumaMode == EB_INTRA_MODE_4x4) { + availableCoeff = ( + cuPtr->transformUnitArray[1].lumaCbf || + cuPtr->transformUnitArray[2].lumaCbf || + cuPtr->transformUnitArray[3].lumaCbf || + cuPtr->transformUnitArray[4].lumaCbf || + cuPtr->transformUnitArray[1].crCbf || + cuPtr->transformUnitArray[1].cbCbf || + cuPtr->transformUnitArray[2].crCbf || + cuPtr->transformUnitArray[2].cbCbf || + cuPtr->transformUnitArray[3].crCbf || + cuPtr->transformUnitArray[3].cbCbf || + cuPtr->transformUnitArray[4].crCbf || // 422 case will use 3rd 4x4 for the 2nd chroma + cuPtr->transformUnitArray[4].cbCbf) ? EB_TRUE : EB_FALSE; + } + else { + availableCoeff = (cuPtr->predictionModeFlag == INTER_MODE) ? (EB_BOOL)cuPtr->rootCbf : + (cuPtr->transformUnitArray[cuSize == sequenceControlSetPtr->lcuSize ? 1 : 0].lumaCbf || + cuPtr->transformUnitArray[cuSize == sequenceControlSetPtr->lcuSize ? 1 : 0].crCbf || + cuPtr->transformUnitArray[cuSize == sequenceControlSetPtr->lcuSize ? 1 : 0].crCbf2 || + cuPtr->transformUnitArray[cuSize == sequenceControlSetPtr->lcuSize ? 1 : 0].cbCbf || + cuPtr->transformUnitArray[cuSize == sequenceControlSetPtr->lcuSize ? 1 : 0].cbCbf2) ? EB_TRUE : EB_FALSE; + } + + EntropyCodingUpdateQp( + cuPtr, + availableCoeff, + cuOriginX, + cuOriginY, + cuSize, + sequenceControlSetPtr->lcuSize, + sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction || (sequenceControlSetPtr->staticConfig.lowLevelVbv) ? EB_TRUE : EB_FALSE, + &entropyDeltaQpNotCoded, + pictureControlSetPtr->difCuDeltaQpDepth, + &pictureControlSetPtr->tempprevCodedQp[tileIdx], + &pictureControlSetPtr->tempprevQuantGroupCodedQp[tileIdx], + tbPtr->qp, + pictureControlSetPtr, + pictureOriginX, + pictureOriginY); + + // Assign DLF QP + entropySetQpArrayBasedOnCU( + pictureControlSetPtr, + cuOriginX, + cuOriginY, + cuSize, + cuSize, + cuPtr->qp); + + // Code the skip flag + if (pictureControlSetPtr->sliceType == EB_P_PICTURE || pictureControlSetPtr->sliceType == EB_B_PICTURE) + { + EncodeSkipFlag( + cabacEncodeCtxPtr, + (EB_BOOL)cuPtr->skipFlag, + cuOriginX, + cuOriginY, + modeTypeNeighborArray, + skipFlagNeighborArray); + } + + if (cuPtr->skipFlag) + { + // Merge Index + EncodeMergeIndex( + cabacEncodeCtxPtr, + &cuPtr->predictionUnitArray[0]); + } + else + { + // Code CU pred mode (I, P, B, etc.) + // (not needed for Intra Slice) + if (pictureControlSetPtr->sliceType == EB_P_PICTURE || pictureControlSetPtr->sliceType == EB_B_PICTURE) + { + EncodePredictionMode( + cabacEncodeCtxPtr, + cuPtr); + } + + switch (cuPtr->predictionModeFlag) { + case INTRA_MODE: + if (cuPtr->predictionModeFlag == INTRA_MODE && + cuPtr->predictionUnitArray->intraLumaMode == EB_INTRA_MODE_4x4) { + // Code Partition Size + EncodeIntra4x4PartitionSize( + cabacEncodeCtxPtr, + cuPtr, + pictureControlSetPtr->lcuMaxDepth); + + // Get the PU Ptr + puPtr = cuPtr->predictionUnitArray; + + + EB_U8 partitionIndex; + + EB_U8 intraLumaLeftModeArray[4]; + EB_U8 intraLumaTopModeArray[4]; + + EB_U8 intraLumaLeftMode; + EB_U8 intraLumaTopMode; + + // Partition Loop + for (partitionIndex = 0; partitionIndex < 4; partitionIndex++) { + + EB_U8 intraLumaMode = tbPtr->intra4x4Mode[((MD_SCAN_TO_RASTER_SCAN[cuIndex] - 21) << 2) + partitionIndex]; + EB_U8 predictionModeFlag = INTRA_MODE; + + EB_U32 partitionOriginX = cuOriginX + INTRA_4x4_OFFSET_X[partitionIndex]; + EB_U32 partitionOriginY = cuOriginY + INTRA_4x4_OFFSET_Y[partitionIndex]; + + // Code Luma Mode for Intra First Stage + EncodeIntraLumaModeFirstStage( + cabacEncodeCtxPtr, + partitionOriginX, + partitionOriginY, + lcuSize, + &intraLumaLeftMode, + &intraLumaTopMode, + intraLumaMode, + modeTypeNeighborArray, + intraLumaModeNeighborArray); + + intraLumaLeftModeArray[partitionIndex] = intraLumaLeftMode; + intraLumaTopModeArray[partitionIndex] = intraLumaTopMode; + + NeighborArrayUnitModeWrite( + intraLumaModeNeighborArray, + (EB_U8*)&intraLumaMode, + partitionOriginX, + partitionOriginY, + MIN_PU_SIZE, + MIN_PU_SIZE, + NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); + + NeighborArrayUnitModeWrite( + modeTypeNeighborArray, + &predictionModeFlag, + partitionOriginX, + partitionOriginY, + MIN_PU_SIZE, + MIN_PU_SIZE, + NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); + + } + + for (partitionIndex = 0; partitionIndex < 4; partitionIndex++) { + EB_U8 intraLumaMode = tbPtr->intra4x4Mode[((MD_SCAN_TO_RASTER_SCAN[cuIndex] - 21) << 2) + partitionIndex]; + + // Code Luma Mode for Intra Second Stage + EncodeIntraLumaModeSecondStage( + cabacEncodeCtxPtr, + intraLumaLeftModeArray[partitionIndex], + intraLumaTopModeArray[partitionIndex], + intraLumaMode); + } + + // Code Chroma Mode for Intra + for (partitionIndex = 0; + partitionIndex < ((cabacEncodeCtxPtr->colorFormat == EB_YUV444) ? 4 : 1); + partitionIndex++) { + EncodeIntraChromaMode(cabacEncodeCtxPtr); + } + + // Encode Transform Unit Split & CBFs + Intra4x4EncodeCoeff( + tbPtr, + cabacEncodeCtxPtr, + cuPtr, + cuStatsPtr, + coeffPtr, + &cuQuantizedCoeffsBits, + (EB_BOOL)pictureControlSetPtr->useDeltaQp, + &deltaQpNotCoded); + + tbPtr->quantizedCoeffsBits += cuQuantizedCoeffsBits; + } + else { + // Code Partition Size + EncodePartitionSize( + cabacEncodeCtxPtr, + cuPtr, + pictureControlSetPtr->lcuMaxDepth); + + EB_U8 intraLumaLeftMode; + EB_U8 intraLumaTopMode; + EB_U8 intraLumaMode; + + // Get the PU Ptr + puPtr = cuPtr->predictionUnitArray; + // Code Luma Mode for Intra First Stage + EncodeIntraLumaModeFirstStage( + cabacEncodeCtxPtr, + cuOriginX, + cuOriginY, + lcuSize, + &intraLumaLeftMode, + &intraLumaTopMode, + puPtr->intraLumaMode, + modeTypeNeighborArray, + intraLumaModeNeighborArray); + + intraLumaMode = (EB_U8)puPtr->intraLumaMode; + + NeighborArrayUnitModeWrite( + intraLumaModeNeighborArray, + (EB_U8*)&intraLumaMode, + cuOriginX, + cuOriginY, + cuSize, + cuSize, + NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); + + { + EB_U8 predictionModeFlag = (EB_U8)cuPtr->predictionModeFlag; + NeighborArrayUnitModeWrite( + modeTypeNeighborArray, + &predictionModeFlag, + cuOriginX, + cuOriginY, + cuSize, + cuSize, + NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); + } + + // Get PU Ptr + puPtr = &cuPtr->predictionUnitArray[0]; + + // Code Luma Mode for Intra Second Stage + EncodeIntraLumaModeSecondStage( + cabacEncodeCtxPtr, + intraLumaLeftMode, + intraLumaTopMode, + puPtr->intraLumaMode); + + // Code Chroma Mode for Intra + EncodeIntraChromaMode( + cabacEncodeCtxPtr); + EncodeTuSplitCoeff( + cabacEncodeCtxPtr, + cuPtr, + cuStatsPtr, + coeffPtr, + &cuQuantizedCoeffsBits, + (EB_BOOL)pictureControlSetPtr->useDeltaQp, + &deltaQpNotCoded); + + tbPtr->quantizedCoeffsBits += cuQuantizedCoeffsBits; + } + break; + + case INTER_MODE: + { + + // Code Partition Size + EncodePartitionSize( + cabacEncodeCtxPtr, + cuPtr, + pictureControlSetPtr->lcuMaxDepth); + + puPtr = cuPtr->predictionUnitArray; + // mv merge Flag + EncodeMergeFlag( + cabacEncodeCtxPtr, + puPtr); + + if (puPtr->mergeFlag) { + // mv merge index + EncodeMergeIndex( + cabacEncodeCtxPtr, + cuPtr->predictionUnitArray); + } + else { + // Inter Prediction Direction + if (pictureControlSetPtr->sliceType == EB_B_PICTURE) { + EncodePredictionDirection( + cabacEncodeCtxPtr, + puPtr, + cuPtr); + } + + refList = puPtr->interPredDirectionIndex == UNI_PRED_LIST_1 ? REF_LIST_1 : REF_LIST_0; + maxRefList = (EB_U32)refList + (puPtr->interPredDirectionIndex == BI_PRED ? 2 : 1); + { + EB_U32 refIndex = refList; + for (; (EB_U32)refIndex < maxRefList; ++refIndex) { + // Reference Index + refList = (EB_REFLIST)refIndex; + // Reference Index + EncodeReferencePictureIndex( + cabacEncodeCtxPtr, + refList, + pictureControlSetPtr); + + // Motion Vector Difference + EncodeMvd( + cabacEncodeCtxPtr, + puPtr, + refList); + + // Motion Vector Prediction Index + EncodeMvpIndex( + cabacEncodeCtxPtr, + puPtr, + refList); + } + } + } + } + + // Encode Transform Unit Split & CBFs + EncodeTuSplitCoeff( + cabacEncodeCtxPtr, + cuPtr, + cuStatsPtr, + coeffPtr, + &cuQuantizedCoeffsBits, + (EB_BOOL)pictureControlSetPtr->useDeltaQp, + &deltaQpNotCoded); + + tbPtr->quantizedCoeffsBits += cuQuantizedCoeffsBits; + + break; + default: + CHECK_REPORT_ERROR_NC( + encodeContextPtr->appCallbackPtr, + EB_ENC_EC_ERROR3); + break; + } + } + + // Update the Leaf Depth Neighbor Array + NeighborArrayUnitModeWrite( + leafDepthNeighborArray, + &cuDepth, + cuOriginX, + cuOriginY, + cuSize, + cuSize, + NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); + + // Update the Mode Type Neighbor Array + { + EB_U8 predictionModeFlag = (EB_U8)cuPtr->predictionModeFlag; + NeighborArrayUnitModeWrite( + modeTypeNeighborArray, + &predictionModeFlag, + cuOriginX, + cuOriginY, + cuSize, + cuSize, + NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); + } + + // Update the Skip Flag Neighbor Array + { + EB_U8 skipFlag = (EB_U8)cuPtr->skipFlag; + NeighborArrayUnitModeWrite( + skipFlagNeighborArray, + (EB_U8*)&skipFlag, + cuOriginX, + cuOriginY, + cuSize, + cuSize, + NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); + } + + } + if (cuPtr->splitFlag == EB_FALSE) + cuIndex += DepthOffset[cuDepth]; + else + ++cuIndex; + } + else + ++cuIndex; + + } while (cuIndex < CU_MAX_COUNT); + + + return return_error; +} + /********************************************** * Encode Lcu **********************************************/ @@ -7440,7 +7904,7 @@ EB_ERRORTYPE EncodeLcu( cuOriginY, cuSize, sequenceControlSetPtr->lcuSize, - sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction ? EB_TRUE : EB_FALSE, + sequenceControlSetPtr->staticConfig.improveSharpness || sequenceControlSetPtr->staticConfig.bitRateReduction || (sequenceControlSetPtr->staticConfig.lowLevelVbv) ? EB_TRUE : EB_FALSE, &entropyDeltaQpNotCoded, pictureControlSetPtr->difCuDeltaQpDepth, &pictureControlSetPtr->prevCodedQp[tileIdx], diff --git a/Source/Lib/Codec/EbEntropyCoding.h b/Source/Lib/Codec/EbEntropyCoding.h index d5f67839b..5ea515cde 100644 --- a/Source/Lib/Codec/EbEntropyCoding.h +++ b/Source/Lib/Codec/EbEntropyCoding.h @@ -45,6 +45,22 @@ extern EB_ERRORTYPE EncodeLcu( EB_U32 pictureOriginX, EB_U32 pictureOriginY); +extern EB_ERRORTYPE EstimateLcu( + LargestCodingUnit_t *tbPtr, + EB_U32 lcuOriginX, + EB_U32 lcuOriginY, + PictureControlSet_t *pictureControlSetPtr, + EB_U32 lcuSize, + EntropyCoder_t *entropyCoderPtr, + EbPictureBufferDesc_t *coeffPtr, + NeighborArrayUnit_t *modeTypeNeighborArray, + NeighborArrayUnit_t *leafDepthNeighborArray, + NeighborArrayUnit_t *intraLumaModeNeighborArray, + NeighborArrayUnit_t *skipFlagNeighborArray, + EB_U16 tileIdx, + EB_U32 pictureOriginX, + EB_U32 pictureOriginY); + extern EB_ERRORTYPE EncodeTileFinish( EntropyCoder_t *entropyCoderPtr); diff --git a/Source/Lib/Codec/EbEntropyCodingProcess.c b/Source/Lib/Codec/EbEntropyCodingProcess.c index b2371c8ca..f0a3cb747 100644 --- a/Source/Lib/Codec/EbEntropyCodingProcess.c +++ b/Source/Lib/Codec/EbEntropyCodingProcess.c @@ -115,7 +115,6 @@ static void EntropyCodingConfigureLcu( LargestCodingUnit_t *lcuPtr, PictureControlSet_t *pictureControlSetPtr) { - contextPtr->qp = pictureControlSetPtr->pictureQp; // Asuming cb and cr offset to be the same for chroma QP in both slice and pps for lambda computation @@ -426,6 +425,9 @@ void* EntropyCodingKernel(void *inputPtr) lastLcuFlagInSlice = lastLcuFlagInTile; } + if (sequenceControlSetPtr->staticConfig.lowLevelVbv) + contextPtr->qp = lcuPtr->qp; + // Configure the LCU EntropyCodingConfigureLcu( contextPtr, diff --git a/Source/Lib/Codec/EbInitialRateControlProcess.c b/Source/Lib/Codec/EbInitialRateControlProcess.c index 5ba8f4e82..49bd5e935 100644 --- a/Source/Lib/Codec/EbInitialRateControlProcess.c +++ b/Source/Lib/Codec/EbInitialRateControlProcess.c @@ -812,6 +812,7 @@ void GetHistogramQueueData( histogramQueueEntryIndex = (histogramQueueEntryIndex > HIGH_LEVEL_RATE_CONTROL_HISTOGRAM_QUEUE_MAX_DEPTH - 1) ? histogramQueueEntryIndex - HIGH_LEVEL_RATE_CONTROL_HISTOGRAM_QUEUE_MAX_DEPTH : histogramQueueEntryIndex; + pictureControlSetPtr->hlHistogramQueueIndex = histogramQueueEntryIndex; histogramQueueEntryPtr = encodeContextPtr->hlRateControlHistorgramQueue[histogramQueueEntryIndex]; @@ -945,7 +946,7 @@ void* InitialRateControlKernel(void *inputPtr) pictureControlSetPtr, inputResultsPtr); - if (sequenceControlSetPtr->staticConfig.rateControlMode) + if (sequenceControlSetPtr->staticConfig.rateControlMode != 0) { if (sequenceControlSetPtr->staticConfig.lookAheadDistance != 0){ @@ -1032,7 +1033,7 @@ void* InitialRateControlKernel(void *inputPtr) else pictureControlSetPtr->endOfSequenceRegion = EB_FALSE; - if (sequenceControlSetPtr->staticConfig.rateControlMode) + if (sequenceControlSetPtr->staticConfig.rateControlMode != 0) { // Determine offset from the Head Ptr for HLRC histogram queue and set the life count if (sequenceControlSetPtr->staticConfig.lookAheadDistance != 0){ diff --git a/Source/Lib/Codec/EbMotionEstimation.c b/Source/Lib/Codec/EbMotionEstimation.c index da62d1965..9751530b7 100644 --- a/Source/Lib/Codec/EbMotionEstimation.c +++ b/Source/Lib/Codec/EbMotionEstimation.c @@ -16,6 +16,7 @@ #include "EbReferenceObject.h" #include "EbAvcStyleMcp.h" #include "EbMeSadCalculation.h" +#include "EbMeSatdCalculation_C.h" #include "EbIntraPrediction.h" #include "EbLambdaRateTables.h" @@ -3743,8 +3744,7 @@ EB_ERRORTYPE MotionEstimateLcu( EB_U64 ref0Poc = 0; EB_U64 ref1Poc = 0; - - EB_U64 i; + EB_S16 hmeLevel1SearchAreaInWidth; EB_S16 hmeLevel1SearchAreaInHeight; @@ -4440,8 +4440,14 @@ EB_ERRORTYPE MotionEstimateLcu( // Compute the sum of the distortion of all 16 16x16 (best) blocks in the LCU pictureControlSetPtr->rcMEdistortion[lcuIndex] = 0; - for (i = 0; i < 16; i++) { - pictureControlSetPtr->rcMEdistortion[lcuIndex] += pictureControlSetPtr->meResults[lcuIndex][5 + i].distortionDirection[0].distortion; + pictureControlSetPtr->rcMESatdDistortion[lcuIndex] = 0; + for (puIndex = ME_TIER_ZERO_PU_16x16_0; puIndex <= ME_TIER_ZERO_PU_16x16_15; puIndex++) { + referenceObject = (EbPaReferenceObject_t*)pictureControlSetPtr->refPaPicPtrArray[0]->objectPtr; + refPicPtr = (EbPictureBufferDesc_t*)referenceObject->inputPaddedPicturePtr; + searchRegionIndex = (EB_S16)refPicPtr->originX + originX + ((EB_S16)refPicPtr->originY + originY) * refPicPtr->strideY; + pictureControlSetPtr->rcMESatdDistortion[lcuIndex] += SatdCalculation_16x16 + (contextPtr->lcuSrcPtr, contextPtr->lcuSrcStride, &(refPicPtr->bufferY[searchRegionIndex]), refPicPtr->strideY); + pictureControlSetPtr->rcMEdistortion[lcuIndex] += pictureControlSetPtr->meResults[lcuIndex][puIndex].distortionDirection[0].distortion; } } @@ -5112,13 +5118,11 @@ EB_ERRORTYPE OpenLoopIntraSearchLcu( (EB_U32)EB_INTRA_PLANAR); //Distortion - oisCuPtr[0].distortion = (EB_U32)NxMSadKernel_funcPtrArray[!!(ASM_TYPES & AVX2_MASK)][cuSize >> 3]( // Always SAD without weighting + oisCuPtr[0].distortion = SatdCalculation_16x16( &(inputPtr->bufferY[(inputPtr->originY + cuOriginY) * inputPtr->strideY + (inputPtr->originX + cuOriginX)]), inputPtr->strideY, &(contextPtr->meContextPtr->lcuBuffer[0]), - MAX_LCU_SIZE, - cuSize, - cuSize); + MAX_LCU_SIZE); oisCuPtr[0].intraMode = EB_INTRA_PLANAR; diff --git a/Source/Lib/Codec/EbPacketizationProcess.c b/Source/Lib/Codec/EbPacketizationProcess.c index a2652e3ec..b4ee8262d 100644 --- a/Source/Lib/Codec/EbPacketizationProcess.c +++ b/Source/Lib/Codec/EbPacketizationProcess.c @@ -711,7 +711,7 @@ void* PacketizationKernel(void *inputPtr) } } - + // Send the number of bytes per frame to RC pictureControlSetPtr->ParentPcsPtr->totalNumBits = outputStreamPtr->nFilledLen << 3; diff --git a/Source/Lib/Codec/EbPerFramePrediction.c b/Source/Lib/Codec/EbPerFramePrediction.c new file mode 100644 index 000000000..a8e99ce8a --- /dev/null +++ b/Source/Lib/Codec/EbPerFramePrediction.c @@ -0,0 +1,60 @@ +/* +* Copyright(c) 2018 Intel Corporation +* SPDX - License - Identifier: BSD - 2 - Clause - Patent +*/ + +#include "EbPerFramePrediction.h" + +EB_U64 predictBits(SequenceControlSet_t *sequenceControlSetPtr, EncodeContext_t *encodeContextPtr, HlRateControlHistogramEntry_t *hlRateControlHistogramPtrTemp, EB_U32 qp) +{ + EB_U64 totalBits = 0; + if (hlRateControlHistogramPtrTemp->isCoded) { + // If the frame is already coded, use the actual number of bits + totalBits = hlRateControlHistogramPtrTemp->totalNumBitsCoded; + } + else { + RateControlTables_t *rateControlTablesPtr = &encodeContextPtr->rateControlTablesArray[qp]; + EB_Bit_Number *sadBitsArrayPtr = rateControlTablesPtr->sadBitsArray[hlRateControlHistogramPtrTemp->temporalLayerIndex]; + EB_Bit_Number *intraSadBitsArrayPtr = rateControlTablesPtr->intraSadBitsArray[0]; + EB_U32 predBitsRefQp = 0; + EB_U32 numOfFullLcus = 0; + EB_U32 areaInPixel = sequenceControlSetPtr->lumaWidth * sequenceControlSetPtr->lumaHeight; + + if (hlRateControlHistogramPtrTemp->sliceType == EB_I_PICTURE) { + // Loop over block in the frame and calculated the predicted bits at reg QP + EB_U32 i; + EB_U32 accum = 0; + for (i = 0; i < NUMBER_OF_INTRA_SAD_INTERVALS; ++i) + { + accum += (EB_U32)(hlRateControlHistogramPtrTemp->oisDistortionHistogram[i] * intraSadBitsArrayPtr[i]); + } + + predBitsRefQp = accum; + numOfFullLcus = hlRateControlHistogramPtrTemp->fullLcuCount; + totalBits += predBitsRefQp; + } + else { + EB_U32 i; + EB_U32 accum = 0; + EB_U32 accumIntra = 0; + for (i = 0; i < NUMBER_OF_SAD_INTERVALS; ++i) + { + accum += (EB_U32)(hlRateControlHistogramPtrTemp->meDistortionHistogram[i] * sadBitsArrayPtr[i]); + accumIntra += (EB_U32)(hlRateControlHistogramPtrTemp->oisDistortionHistogram[i] * intraSadBitsArrayPtr[i]); + + } + if (accum > accumIntra * 3) + predBitsRefQp = accumIntra; + else + predBitsRefQp = accum; + numOfFullLcus = hlRateControlHistogramPtrTemp->fullLcuCount; + totalBits += predBitsRefQp; + } + + // Scale for in complete LCSs + // predBitsRefQp is normalized based on the area because of the LCUs at the picture boundries + totalBits = totalBits * (EB_U64)areaInPixel / (numOfFullLcus << 12); + } + hlRateControlHistogramPtrTemp->predBitsRefQp[qp] = totalBits; + return totalBits; +} \ No newline at end of file diff --git a/Source/Lib/Codec/EbPerFramePrediction.h b/Source/Lib/Codec/EbPerFramePrediction.h new file mode 100644 index 000000000..fdb7037ea --- /dev/null +++ b/Source/Lib/Codec/EbPerFramePrediction.h @@ -0,0 +1,22 @@ +/* +* Copyright(c) 2018 Intel Corporation +* SPDX - License - Identifier: BSD - 2 - Clause - Patent +*/ + + +#ifndef EbPerFramePrediction_h +#define EbPerFramePrediction_h + +#include "EbSequenceControlSet.h" +#ifdef __cplusplus +extern "C" { +#endif +EB_U64 predictBits( + SequenceControlSet_t *sequenceControlSetPtr, + EncodeContext_t *encodeContextPtr, + HlRateControlHistogramEntry_t *hlRateControlHistogramPtrTemp, + EB_U32 qp); +#ifdef __cplusplus +} +#endif +#endif // EbPerFramePrediction_h \ No newline at end of file diff --git a/Source/Lib/Codec/EbPictureControlSet.c b/Source/Lib/Codec/EbPictureControlSet.c index 713ec4802..258434d74 100644 --- a/Source/Lib/Codec/EbPictureControlSet.c +++ b/Source/Lib/Codec/EbPictureControlSet.c @@ -81,6 +81,7 @@ EB_ERRORTYPE PictureControlSetCtor( // LCUs EB_U16 lcuIndex; + EB_U16 rowIndex; EB_U16 lcuOriginX; EB_U16 lcuOriginY; EB_ERRORTYPE return_error = EB_ErrorNone; @@ -140,7 +141,7 @@ EB_ERRORTYPE PictureControlSetCtor( if (return_error == EB_ErrorInsufficientResources){ return EB_ErrorInsufficientResources; } - + // Cabaccost EB_MALLOC(CabacCost_t*, objectPtr->cabacCost, sizeof(CabacCost_t), EB_N_PTR); @@ -192,6 +193,16 @@ EB_ERRORTYPE PictureControlSetCtor( lcuOriginX = (lcuOriginX == pictureWidthInLcu - 1) ? 0 : lcuOriginX + 1; } + //Row stats Array + EB_MALLOC(RCStatRow_t**, objectPtr->rowStats, sizeof(RCStatRow_t*) * pictureHeightInLcu, EB_N_PTR); + for (rowIndex = 0; rowIndex < pictureHeightInLcu; ++rowIndex) + { + return_error = RCStatRowCtor( + &(objectPtr->rowStats[rowIndex]), (EB_U16)rowIndex); + if (return_error == EB_ErrorInsufficientResources) { + return EB_ErrorInsufficientResources; + } + } //ConfigureEdges(objectPtr, maxCuSize); // Mode Decision Control config @@ -246,6 +257,12 @@ EB_ERRORTYPE PictureControlSetCtor( EB_MALLOC(NeighborArrayUnit_t**, objectPtr->intraLumaModeNeighborArray, sizeof(NeighborArrayUnit_t*) * totalTileCount, EB_N_PTR); EB_MALLOC(NeighborArrayUnit_t**, objectPtr->skipFlagNeighborArray, sizeof(NeighborArrayUnit_t*) * totalTileCount, EB_N_PTR); + // For proxy entropy + EB_MALLOC(NeighborArrayUnit_t**, objectPtr->tempModeTypeNeighborArray, sizeof(NeighborArrayUnit_t*) * totalTileCount, EB_N_PTR); + EB_MALLOC(NeighborArrayUnit_t**, objectPtr->tempLeafDepthNeighborArray, sizeof(NeighborArrayUnit_t*) * totalTileCount, EB_N_PTR); + EB_MALLOC(NeighborArrayUnit_t**, objectPtr->tempIntraLumaModeNeighborArray, sizeof(NeighborArrayUnit_t*) * totalTileCount, EB_N_PTR); + EB_MALLOC(NeighborArrayUnit_t**, objectPtr->tempSkipFlagNeighborArray, sizeof(NeighborArrayUnit_t*) * totalTileCount, EB_N_PTR); + // Mode Decision Neighbor Arrays EB_U8 depth; EB_U16 array_size = sizeof(NeighborArrayUnit_t*) * totalTileCount; @@ -605,6 +622,56 @@ EB_ERRORTYPE PictureControlSetCtor( if (return_error == EB_ErrorInsufficientResources){ return EB_ErrorInsufficientResources; } + + //Proxy entropy Neighbor Arrays + return_error = NeighborArrayUnitCtor( + &objectPtr->tempModeTypeNeighborArray[tileIdx], + MAX_PICTURE_WIDTH_SIZE, + MAX_PICTURE_HEIGHT_SIZE, + sizeof(EB_U8), + PU_NEIGHBOR_ARRAY_GRANULARITY, + PU_NEIGHBOR_ARRAY_GRANULARITY, + NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); + + if (return_error == EB_ErrorInsufficientResources) { + return EB_ErrorInsufficientResources; + } + return_error = NeighborArrayUnitCtor( + &objectPtr->tempLeafDepthNeighborArray[tileIdx], + MAX_PICTURE_WIDTH_SIZE, + MAX_PICTURE_HEIGHT_SIZE, + sizeof(EB_U8), + CU_NEIGHBOR_ARRAY_GRANULARITY, + CU_NEIGHBOR_ARRAY_GRANULARITY, + NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); + + if (return_error == EB_ErrorInsufficientResources) { + return EB_ErrorInsufficientResources; + } + return_error = NeighborArrayUnitCtor( + &objectPtr->tempSkipFlagNeighborArray[tileIdx], + MAX_PICTURE_WIDTH_SIZE, + MAX_PICTURE_HEIGHT_SIZE, + sizeof(EB_U8), + CU_NEIGHBOR_ARRAY_GRANULARITY, + CU_NEIGHBOR_ARRAY_GRANULARITY, + NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); + + if (return_error == EB_ErrorInsufficientResources) { + return EB_ErrorInsufficientResources; + } + + return_error = NeighborArrayUnitCtor( + &objectPtr->tempIntraLumaModeNeighborArray[tileIdx], + MAX_PICTURE_WIDTH_SIZE, + MAX_PICTURE_HEIGHT_SIZE, + sizeof(EB_U8), + PU_NEIGHBOR_ARRAY_GRANULARITY, + PU_NEIGHBOR_ARRAY_GRANULARITY, + NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK); + if (return_error == EB_ErrorInsufficientResources) { + return EB_ErrorInsufficientResources; + } } } @@ -648,14 +715,20 @@ EB_ERRORTYPE PictureControlSetCtor( // Entropy Coder return_error = EntropyCoderCtor( - &objectPtr->entropyCodingInfo[tileIdx]->entropyCoderPtr, - SEGMENT_ENTROPY_BUFFER_SIZE); + &objectPtr->entropyCodingInfo[tileIdx]->entropyCoderPtr, + SEGMENT_ENTROPY_BUFFER_SIZE); - if (return_error == EB_ErrorInsufficientResources){ + if (return_error == EB_ErrorInsufficientResources) { + return EB_ErrorInsufficientResources; + } + //Proxy Entropy Coder to be used in Encdec + return_error = EntropyCoderCtor( + &objectPtr->entropyCodingInfo[tileIdx]->tempEntropyCoderPtr, + SEGMENT_ENTROPY_BUFFER_SIZE); + if (return_error == EB_ErrorInsufficientResources) { return EB_ErrorInsufficientResources; } } - // Entropy picture level mutex EB_CREATEMUTEX(EB_HANDLE, objectPtr->entropyCodingPicMutex, sizeof(EB_HANDLE), EB_MUTEX); @@ -795,6 +868,7 @@ EB_ERRORTYPE PictureParentControlSetCtor( EB_U32 cuIdx; for (cuIdx = 0; cuIdx < 21; ++cuIdx){ + contigousCand[cuIdx*maxOisCand] = (OisCandidate_t) { 0 }; objectPtr->oisCu32Cu16Results[lcuIndex]->sortedOisCandidate[cuIdx] = &contigousCand[cuIdx*maxOisCand]; } } @@ -825,9 +899,8 @@ EB_ERRORTYPE PictureParentControlSetCtor( } EB_MALLOC(EB_U32*, objectPtr->rcMEdistortion, sizeof(EB_U32) * objectPtr->lcuTotalCount, EB_N_PTR); + EB_MALLOC(EB_U32*, objectPtr->rcMESatdDistortion, sizeof(EB_U32) * objectPtr->lcuTotalCount, EB_N_PTR); - - // ME and OIS Distortion Histograms EB_MALLOC(EB_U16*, objectPtr->meDistortionHistogram, sizeof(EB_U16) * NUMBER_OF_SAD_INTERVALS, EB_N_PTR); diff --git a/Source/Lib/Codec/EbPictureControlSet.h b/Source/Lib/Codec/EbPictureControlSet.h index 08c75d427..339fb35c5 100644 --- a/Source/Lib/Codec/EbPictureControlSet.h +++ b/Source/Lib/Codec/EbPictureControlSet.h @@ -141,6 +141,7 @@ typedef struct EntropyTileInfo_s EB_BOOL entropyCodingInProgress; EB_BOOL entropyCodingPicDone; EntropyCoder_t *entropyCoderPtr; + EntropyCoder_t *tempEntropyCoderPtr; } EntropyTileInfo; typedef struct PictureControlSet_s @@ -155,10 +156,10 @@ typedef struct PictureControlSet_s // Reference Lists EbObjectWrapper_t *refPicPtrArray[MAX_NUM_OF_REF_PIC_LIST]; - EB_U8 refPicQpArray[MAX_NUM_OF_REF_PIC_LIST]; - EB_PICTURE refSliceTypeArray[MAX_NUM_OF_REF_PIC_LIST]; - + EB_PICTURE refSliceTypeArray[MAX_NUM_OF_REF_PIC_LIST]; + EB_U8 refPicTemporalLayerArray[MAX_NUM_OF_REF_PIC_LIST]; + // GOP EB_U64 pictureNumber; EB_U8 temporalLayerIndex; @@ -191,6 +192,7 @@ typedef struct PictureControlSet_s EB_U8 pictureQp; EB_U8 difCuDeltaQpDepth; EB_U8 useDeltaQp; + EB_U64 sadCost; // LCU Array EB_U8 lcuMaxDepth; @@ -210,6 +212,10 @@ typedef struct PictureControlSet_s EB_U8 prevCodedQp[EB_TILE_MAX_COUNT]; EB_U8 prevQuantGroupCodedQp[EB_TILE_MAX_COUNT]; + // Temp QP Assignment + EB_U8 tempprevCodedQp[EB_TILE_MAX_COUNT]; + EB_U8 tempprevQuantGroupCodedQp[EB_TILE_MAX_COUNT]; + // Enc/DecQP Assignment EB_U8 encPrevCodedQp[EB_TILE_MAX_COUNT][MAX_PICTURE_HEIGHT_SIZE / MAX_LCU_SIZE]; EB_U8 encPrevQuantGroupCodedQp[EB_TILE_MAX_COUNT][MAX_PICTURE_HEIGHT_SIZE / MAX_LCU_SIZE]; @@ -257,6 +263,11 @@ typedef struct PictureControlSet_s NeighborArrayUnit_t **intraLumaModeNeighborArray; NeighborArrayUnit_t **skipFlagNeighborArray; + NeighborArrayUnit_t **tempModeTypeNeighborArray; + NeighborArrayUnit_t **tempLeafDepthNeighborArray; + NeighborArrayUnit_t **tempIntraLumaModeNeighborArray; + NeighborArrayUnit_t **tempSkipFlagNeighborArray; + EB_REFLIST colocatedPuRefList; EB_BOOL isLowDelay; @@ -286,6 +297,15 @@ typedef struct PictureControlSet_s EB_BOOL bdpPresentFlag; EB_BOOL mdPresentFlag; + //Row level vbv data + RCStatRow_t **rowStats; + EB_BOOL firstRowOfPicture; + + EB_U64 frameSizePlanned; + EB_U64 frameSizeEstimated; + EB_U64 bufferFillPerFrame; + EB_U8 qpNoVbv; + } PictureControlSet_t; @@ -417,6 +437,7 @@ typedef struct PictureParentControlSet_s EB_BOOL tablesUpdated; EB_BOOL percentageUpdated; EB_U32 targetBitRate; + EB_U32 crf; EB_BOOL minTargetRateAssigned; EB_U32 frameRate; EB_BOOL frameRateIsUpdated; @@ -424,6 +445,8 @@ typedef struct PictureParentControlSet_s EB_U16 lcuTotalCount; EB_BOOL endOfSequenceRegion; EB_BOOL sceneChangeInGop; + EB_S32 hlHistogramQueueIndex; + // used for Look ahead EB_U8 framesInSw; EB_S16 historgramLifeCount; @@ -440,6 +463,7 @@ typedef struct PictureParentControlSet_s EB_U64 averageQp; EB_U64 lastIdrPicture; + EB_U64 lastIdrPictureOrder; EB_U64 startTimeSeconds; EB_U64 startTimeuSeconds; @@ -479,6 +503,7 @@ typedef struct PictureParentControlSet_s MeCuResults_t **meResults; EB_U32 *rcMEdistortion; + EB_U32 *rcMESatdDistortion; // Motion Estimation Distortion and OIS Historgram EB_U16 *meDistortionHistogram; diff --git a/Source/Lib/Codec/EbPictureManagerProcess.c b/Source/Lib/Codec/EbPictureManagerProcess.c index ec5b86470..aa0ef4cad 100644 --- a/Source/Lib/Codec/EbPictureManagerProcess.c +++ b/Source/Lib/Codec/EbPictureManagerProcess.c @@ -801,9 +801,9 @@ void* PictureManagerKernel(void *inputPtr) // Rate Control - ChildPictureControlSetPtr->useDeltaQp = (EB_U8)(entrySequenceControlSetPtr->staticConfig.improveSharpness || entrySequenceControlSetPtr->staticConfig.bitRateReduction); + ChildPictureControlSetPtr->useDeltaQp = (EB_U8)(entrySequenceControlSetPtr->staticConfig.improveSharpness || entrySequenceControlSetPtr->staticConfig.bitRateReduction ||(sequenceControlSetPtr->staticConfig.lowLevelVbv)); - // Check resolution + // Check resoluti if (entrySequenceControlSetPtr->inputResolution < INPUT_SIZE_1080p_RANGE) ChildPictureControlSetPtr->difCuDeltaQpDepth = 2; else @@ -815,6 +815,8 @@ void* PictureManagerKernel(void *inputPtr) EB_MEMSET(ChildPictureControlSetPtr->refPicQpArray, 0, 2 * sizeof(EB_U8)); EB_MEMSET(ChildPictureControlSetPtr->refSliceTypeArray, 0, 2 * sizeof(EB_PICTURE)); + + EB_MEMSET(ChildPictureControlSetPtr->refPicTemporalLayerArray, 0, 2 * sizeof(EB_U8)); // Configure List0 if ((entryPictureControlSetPtr->sliceType == EB_P_PICTURE) || (entryPictureControlSetPtr->sliceType == EB_B_PICTURE)) { @@ -831,6 +833,7 @@ void* PictureManagerKernel(void *inputPtr) ChildPictureControlSetPtr->refPicQpArray[REF_LIST_0] = ((EbReferenceObject_t*) referenceEntryPtr->referenceObjectPtr->objectPtr)->qp; ChildPictureControlSetPtr->refSliceTypeArray[REF_LIST_0] = ((EbReferenceObject_t*) referenceEntryPtr->referenceObjectPtr->objectPtr)->sliceType; + ChildPictureControlSetPtr->refPicTemporalLayerArray[REF_LIST_0] = ((EbReferenceObject_t*)referenceEntryPtr->referenceObjectPtr->objectPtr)->tmpLayerIdx; // Increment the Reference's liveCount by the number of tiles in the input picture EbObjectIncLiveCount( @@ -863,6 +866,7 @@ void* PictureManagerKernel(void *inputPtr) ChildPictureControlSetPtr->refPicQpArray[REF_LIST_1] = ((EbReferenceObject_t*) referenceEntryPtr->referenceObjectPtr->objectPtr)->qp; ChildPictureControlSetPtr->refSliceTypeArray[REF_LIST_1] = ((EbReferenceObject_t*) referenceEntryPtr->referenceObjectPtr->objectPtr)->sliceType; + ChildPictureControlSetPtr->refPicTemporalLayerArray[REF_LIST_1] = ((EbReferenceObject_t*)referenceEntryPtr->referenceObjectPtr->objectPtr)->tmpLayerIdx; diff --git a/Source/Lib/Codec/EbRateControlProcess.c b/Source/Lib/Codec/EbRateControlProcess.c index 6d0f89f40..b32111ad2 100644 --- a/Source/Lib/Codec/EbRateControlProcess.c +++ b/Source/Lib/Codec/EbRateControlProcess.c @@ -4,6 +4,7 @@ */ #include +#include #include "EbDefinitions.h" #include "EbRateControlProcess.h" @@ -16,6 +17,7 @@ #include "EbRateControlResults.h" #include "EbRateControlTasks.h" +#include "EbPerFramePrediction.h" /***************************** * Internal Typedefs @@ -344,9 +346,23 @@ EB_ERRORTYPE RateControlContextCtor( contextPtr->extraBitsGen = 0; contextPtr->maxRateAdjustDeltaQP = 0; + contextPtr->shorttermComplexSum = 0; + contextPtr->shorttermComplexCount = 0; + contextPtr->qcompress = 0.6; + return EB_ErrorNone; } +double qp2qScale(double qp) +{ + return 0.85 * pow(2.0, (qp - 12.0) / 6.0); +} + +double qScale2qp(double qScale) +{ + return 12.0 + 6.0 * (double)LOG2(qScale / 0.85); +} + static void HighLevelRcInputPictureMode2( PictureParentControlSet_t *pictureControlSetPtr, SequenceControlSet_t *sequenceControlSetPtr, @@ -718,7 +734,7 @@ static void HighLevelRcInputPictureMode2( queueEntryIndexTemp = queueEntryIndexHeadTemp; - // This is set to false, so the last frame would go inside the loop + // This is set to false, so the last frame would go inside the loop endOfSequenceFlag = EB_FALSE; while (!endOfSequenceFlag && @@ -929,6 +945,86 @@ static void HighLevelRcInputPictureMode2( EbReleaseMutex(sequenceControlSetPtr->encodeContextPtr->rateTableUpdateMutex); } +void FrameLevelRcInputPictureMode3( + PictureControlSet_t *pictureControlSetPtr, + SequenceControlSet_t *sequenceControlSetPtr, + RateControlContext_t *contextPtr, + EB_U32 bestOisCuIndex) + { + + double q = 0; + EB_U16 lcuTotalCount = pictureControlSetPtr->lcuTotalCount; + pictureControlSetPtr->sadCost = 0; + // Calculate the sad cost from the rcMEDistortion and OISDistortion by looping over the LCUs + if (pictureControlSetPtr->sliceType == EB_I_PICTURE) + { + for (EB_U16 lcuIndex = 0; lcuIndex < lcuTotalCount; lcuIndex++) + + pictureControlSetPtr->sadCost += pictureControlSetPtr->ParentPcsPtr->oisCu32Cu16Results[lcuIndex]->sortedOisCandidate[1][bestOisCuIndex].distortion + + pictureControlSetPtr->ParentPcsPtr->oisCu32Cu16Results[lcuIndex]->sortedOisCandidate[2][bestOisCuIndex].distortion + + pictureControlSetPtr->ParentPcsPtr->oisCu32Cu16Results[lcuIndex]->sortedOisCandidate[3][bestOisCuIndex].distortion + + pictureControlSetPtr->ParentPcsPtr->oisCu32Cu16Results[lcuIndex]->sortedOisCandidate[4][bestOisCuIndex].distortion; + } + else + { + for (EB_U16 lcuIndex = 0; lcuIndex < lcuTotalCount; lcuIndex++) + pictureControlSetPtr->sadCost += pictureControlSetPtr->ParentPcsPtr->rcMESatdDistortion[lcuIndex]; + } + + //pictureControlSetPtr->sadCost /= SAD_SATD_CONSTANT; + + if (pictureControlSetPtr->temporalLayerIndex > 1 && pictureControlSetPtr->sliceType != EB_I_PICTURE) + { + //Take the average of reference frame's QP and tune with Offset + double q0, q1; + EB_BOOL i0, i1; + EB_U8 d0, d1; + i0 = pictureControlSetPtr->refSliceTypeArray[0] == EB_I_PICTURE; + q0 = i0 ? pictureControlSetPtr->refPicQpArray[0] : (pictureControlSetPtr->refPicQpArray[0] - MOD_QP_OFFSET_LAYER_ARRAY[pictureControlSetPtr->ParentPcsPtr->hierarchicalLevels][pictureControlSetPtr->refPicTemporalLayerArray[0]]); + if (pictureControlSetPtr->sliceType == EB_P_PICTURE) + q = q0; + else + { + i1 = pictureControlSetPtr->refSliceTypeArray[1] == EB_I_PICTURE; + q1 = i1 ? pictureControlSetPtr->refPicQpArray[1] : (pictureControlSetPtr->refPicQpArray[1] - MOD_QP_OFFSET_LAYER_ARRAY[pictureControlSetPtr->ParentPcsPtr->hierarchicalLevels][pictureControlSetPtr->refPicTemporalLayerArray[1]]); + d0 = (EB_U8)ABS(((EB_S64)pictureControlSetPtr->pictureNumber - (EB_S64)pictureControlSetPtr->ParentPcsPtr->refPicPocArray[0])); + d1 = (EB_U8)ABS(((EB_S64)pictureControlSetPtr->pictureNumber - (EB_S64)pictureControlSetPtr->ParentPcsPtr->refPicPocArray[1])); + + if (i0&&i1) + q = (q0 + q1) / 2.0; + else if (i0) + q = q1; + else if (i1) + q = q0; + else + q = (q0*d1 + q1 * d0) / (d0 + d1); + } + q += MOD_QP_OFFSET_LAYER_ARRAY[pictureControlSetPtr->ParentPcsPtr->hierarchicalLevels][pictureControlSetPtr->temporalLayerIndex]; + } + else + { + double blurredComplexity; + + /*Calculate the blurred Complexity of the Frame */ + contextPtr->shorttermComplexSum *= 0.5; + contextPtr->shorttermComplexCount *= 0.5; + contextPtr->shorttermComplexSum += pictureControlSetPtr->sadCost / (contextPtr->frameDuration / BASE_FRAME_DURATION); + contextPtr->shorttermComplexCount++; + blurredComplexity = contextPtr->shorttermComplexSum / contextPtr->shorttermComplexCount; + q = pow(blurredComplexity, 1 - contextPtr->qcompress); + q /= contextPtr->rateFactorConstant; + if (pictureControlSetPtr->sliceType != EB_I_PICTURE) + q = qScale2qp(q) + MOD_QP_OFFSET_LAYER_ARRAY[pictureControlSetPtr->ParentPcsPtr->hierarchicalLevels][pictureControlSetPtr->temporalLayerIndex]; + else + q = qScale2qp(q); + } + if (pictureControlSetPtr->pictureNumber == 0) + pictureControlSetPtr->pictureQp = (EB_U8)contextPtr->crf; + else + pictureControlSetPtr->pictureQp = (EB_U8)(q + 0.5); + pictureControlSetPtr->pictureQp = (EB_U8)CLIP3((EB_U8)sequenceControlSetPtr->staticConfig.minQpAllowed,(EB_U8)sequenceControlSetPtr->staticConfig.maxQpAllowed, pictureControlSetPtr->pictureQp); +} + static void FrameLevelRcInputPictureMode2( PictureControlSet_t *pictureControlSetPtr, SequenceControlSet_t *sequenceControlSetPtr, @@ -2172,62 +2268,6 @@ static void HighLevelRcFeedBackPicture( } } -static EB_U64 predictBits(SequenceControlSet_t *sequenceControlSetPtr, - EncodeContext_t *encodeContextPtr, - HlRateControlHistogramEntry_t *hlRateControlHistogramPtrTemp, EB_U32 qp) -{ - EB_U64 totalBits = 0; - if (hlRateControlHistogramPtrTemp->isCoded) { - // If the frame is already coded, use the actual number of bits - totalBits = hlRateControlHistogramPtrTemp->totalNumBitsCoded; - } - else { - RateControlTables_t *rateControlTablesPtr = &encodeContextPtr->rateControlTablesArray[qp]; - EB_Bit_Number *sadBitsArrayPtr = rateControlTablesPtr->sadBitsArray[hlRateControlHistogramPtrTemp->temporalLayerIndex]; - EB_Bit_Number *intraSadBitsArrayPtr = rateControlTablesPtr->intraSadBitsArray[0]; - EB_U32 predBitsRefQp = 0; - EB_U32 numOfFullLcus = 0; - EB_U32 areaInPixel = sequenceControlSetPtr->lumaWidth * sequenceControlSetPtr->lumaHeight; - - if (hlRateControlHistogramPtrTemp->sliceType == EB_I_PICTURE) { - // Loop over block in the frame and calculated the predicted bits at reg QP - EB_U32 i; - EB_U32 accum = 0; - for (i = 0; i < NUMBER_OF_INTRA_SAD_INTERVALS; ++i) - { - accum += (EB_U32)(hlRateControlHistogramPtrTemp->oisDistortionHistogram[i] * intraSadBitsArrayPtr[i]); - } - - predBitsRefQp = accum; - numOfFullLcus = hlRateControlHistogramPtrTemp->fullLcuCount; - totalBits += predBitsRefQp; - } - else { - EB_U32 i; - EB_U32 accum = 0; - EB_U32 accumIntra = 0; - for (i = 0; i < NUMBER_OF_SAD_INTERVALS; ++i) - { - accum += (EB_U32)(hlRateControlHistogramPtrTemp->meDistortionHistogram[i] * sadBitsArrayPtr[i]); - accumIntra += (EB_U32)(hlRateControlHistogramPtrTemp->oisDistortionHistogram[i] * intraSadBitsArrayPtr[i]); - - } - if (accum > accumIntra * 3) - predBitsRefQp = accumIntra; - else - predBitsRefQp = accum; - numOfFullLcus = hlRateControlHistogramPtrTemp->fullLcuCount; - totalBits += predBitsRefQp; - } - - // Scale for in complete LCSs - // predBitsRefQp is normalized based on the area because of the LCUs at the picture boundries - totalBits = totalBits * (EB_U64)areaInPixel / (numOfFullLcus << 12); - } - hlRateControlHistogramPtrTemp->predBitsRefQp[qp] = totalBits; - return totalBits; -} - static EB_U8 Vbv_Buf_Calc(PictureControlSet_t *pictureControlSetPtr, SequenceControlSet_t *sequenceControlSetPtr, EncodeContext_t *encodeContextPtr) @@ -2352,6 +2392,7 @@ void* RateControlKernel(void *inputPtr) EB_U32 bestOisCuIndex = 0; RATE_CONTROL_TASKTYPES taskType; + HlRateControlHistogramEntry_t *hlRateControlHistogramPtrTemp; for (;;) { @@ -2417,10 +2458,24 @@ void* RateControlKernel(void *inputPtr) contextPtr->vbFillThreshold2 = (contextPtr->virtualBufferSize << 3) >> 3; contextPtr->baseLayerFramesAvgQp = sequenceControlSetPtr->qp; contextPtr->baseLayerIntraFramesAvgQp = sequenceControlSetPtr->qp; + + contextPtr->crf = sequenceControlSetPtr->staticConfig.crf; + + if (sequenceControlSetPtr->staticConfig.rateControlMode == 2) + { + int bFrames = pictureControlSetPtr->ParentPcsPtr->predStructure == EB_PRED_LOW_DELAY_P ? 80 : 120; + double baseCplx = pictureControlSetPtr->lcuTotalCount * bFrames * 16; + double frameDuration = 1.0 / (sequenceControlSetPtr->frameRate >> RC_PRECISION); + contextPtr->rateFactorConstant = pow(baseCplx, 1 - contextPtr->qcompress) / qp2qScale(contextPtr->crf); + contextPtr->frameDuration = CLIP3(MIN_FRAME_DURATION, MAX_FRAME_DURATION, frameDuration); + + } + encodeContextPtr->vbvMaxrate = sequenceControlSetPtr->staticConfig.vbvMaxrate; encodeContextPtr->vbvBufsize = sequenceControlSetPtr->staticConfig.vbvBufsize; + } - if (sequenceControlSetPtr->staticConfig.rateControlMode) + if (sequenceControlSetPtr->staticConfig.rateControlMode == 1) { pictureControlSetPtr->ParentPcsPtr->intraSelectedOrgQp = 0; HighLevelRcInputPictureMode2( @@ -2429,11 +2484,9 @@ void* RateControlKernel(void *inputPtr) sequenceControlSetPtr->encodeContextPtr, contextPtr, contextPtr->highLevelRateControlPtr); - - } - - // Frame level RC - if (sequenceControlSetPtr->intraPeriodLength == -1 || sequenceControlSetPtr->staticConfig.rateControlMode == 0){ + } + // Frame level RC + if (sequenceControlSetPtr->intraPeriodLength == -1 || sequenceControlSetPtr->staticConfig.rateControlMode == 0 || sequenceControlSetPtr->staticConfig.rateControlMode == 2){ rateControlParamPtr = contextPtr->rateControlParamQueue[0]; prevGopRateControlParamPtr = contextPtr->rateControlParamQueue[0]; nextGopRateControlParamPtr = contextPtr->rateControlParamQueue[0]; @@ -2494,6 +2547,16 @@ void* RateControlKernel(void *inputPtr) pictureControlSetPtr->pictureQp = (EB_U8)CLIP3((EB_S32)sequenceControlSetPtr->staticConfig.minQpAllowed, (EB_S32)sequenceControlSetPtr->staticConfig.maxQpAllowed,pictureControlSetPtr->ParentPcsPtr->pictureQp); } + } + else if (sequenceControlSetPtr->staticConfig.rateControlMode == 2) + + { + FrameLevelRcInputPictureMode3( + pictureControlSetPtr, + sequenceControlSetPtr, + contextPtr, + bestOisCuIndex); + } else{ FrameLevelRcInputPictureMode2( @@ -2598,11 +2661,19 @@ void* RateControlKernel(void *inputPtr) pictureControlSetPtr->pictureQp); } } - if (encodeContextPtr->vbvMaxrate && encodeContextPtr->vbvBufsize && sequenceControlSetPtr->staticConfig.lookAheadDistance > 0) - { + if (encodeContextPtr->vbvMaxrate && encodeContextPtr->vbvBufsize && sequenceControlSetPtr->staticConfig.lookAheadDistance > 0) { EbBlockOnMutex(encodeContextPtr->bufferFillMutex); + pictureControlSetPtr->qpNoVbv = pictureControlSetPtr->pictureQp; pictureControlSetPtr->pictureQp = (EB_U8)Vbv_Buf_Calc(pictureControlSetPtr, sequenceControlSetPtr, encodeContextPtr); - + hlRateControlHistogramPtrTemp = encodeContextPtr->hlRateControlHistorgramQueue[pictureControlSetPtr->ParentPcsPtr->hlHistogramQueueIndex]; + pictureControlSetPtr->frameSizePlanned = predictBits(sequenceControlSetPtr, encodeContextPtr, hlRateControlHistogramPtrTemp, pictureControlSetPtr->pictureQp); + /* Update low level VBV Plan*/ + EB_S64 bufferfill_plan = (EB_S64)(encodeContextPtr->bufferFill); + bufferfill_plan -= pictureControlSetPtr->frameSizePlanned; + bufferfill_plan = MAX(bufferfill_plan, 0); + bufferfill_plan = (EB_S64)(bufferfill_plan + (encodeContextPtr->vbvMaxrate * (1.0 / (sequenceControlSetPtr->frameRate >> RC_PRECISION)))); + bufferfill_plan = MIN(bufferfill_plan, encodeContextPtr->vbvBufsize); + pictureControlSetPtr->bufferFillPerFrame = (EB_U64)(bufferfill_plan); EbReleaseMutex(encodeContextPtr->bufferFillMutex); } pictureControlSetPtr->ParentPcsPtr->pictureQp = pictureControlSetPtr->pictureQp; @@ -2667,7 +2738,7 @@ void* RateControlKernel(void *inputPtr) sequenceControlSetPtr = (SequenceControlSet_t*)parentPictureControlSetPtr->sequenceControlSetWrapperPtr->objectPtr; // Frame level RC - if (sequenceControlSetPtr->intraPeriodLength == -1 || sequenceControlSetPtr->staticConfig.rateControlMode == 0){ + if (sequenceControlSetPtr->intraPeriodLength == -1 || sequenceControlSetPtr->staticConfig.rateControlMode == 0 || sequenceControlSetPtr->staticConfig.rateControlMode == 2){ rateControlParamPtr = contextPtr->rateControlParamQueue[0]; prevGopRateControlParamPtr = contextPtr->rateControlParamQueue[0]; if (parentPictureControlSetPtr->sliceType == EB_I_PICTURE){ @@ -2707,7 +2778,7 @@ void* RateControlKernel(void *inputPtr) contextPtr->rateControlParamQueue[intervalIndexTemp - 1]; } - if (sequenceControlSetPtr->staticConfig.rateControlMode != 0){ + if (sequenceControlSetPtr->staticConfig.rateControlMode == 1){ contextPtr->previousVirtualBufferLevel = contextPtr->virtualBufferLevel; diff --git a/Source/Lib/Codec/EbRateControlProcess.h b/Source/Lib/Codec/EbRateControlProcess.h index f517e82ce..e1aa93064 100644 --- a/Source/Lib/Codec/EbRateControlProcess.h +++ b/Source/Lib/Codec/EbRateControlProcess.h @@ -247,6 +247,15 @@ typedef struct RateControlContext_s EB_U64 frameRate; + //CRF + EB_U32 crf; + double shorttermComplexSum; + double shorttermComplexCount; + double rateFactorConstant; + double qcompress; + double frameDuration; + + EB_U64 virtualBufferSize; EB_S64 virtualBufferLevelInitialValue; diff --git a/Source/Lib/Codec/EbResourceCoordinationProcess.c b/Source/Lib/Codec/EbResourceCoordinationProcess.c index a95ecc499..19ea40dda 100644 --- a/Source/Lib/Codec/EbResourceCoordinationProcess.c +++ b/Source/Lib/Codec/EbResourceCoordinationProcess.c @@ -380,7 +380,7 @@ void* ResourceCoordinationKernel(void *inputPtr) EbObjectWrapper_t *prevPictureControlSetWrapperPtr = 0; EB_U32 chromaFormat = EB_YUV420; EB_U32 subWidthCMinus1 = 1; - EB_U32 subHeightCMinus1 = 1; + // EB_U32 subHeightCMinus1 = 1; for(;;) { @@ -401,7 +401,7 @@ void* ResourceCoordinationKernel(void *inputPtr) chromaFormat = sequenceControlSetPtr->chromaFormatIdc; subWidthCMinus1 = (chromaFormat == EB_YUV444 ? 1 : 2) - 1; - subHeightCMinus1 = (chromaFormat >= EB_YUV422 ? 1 : 2) - 1; + // subHeightCMinus1 = (chromaFormat >= EB_YUV422 ? 1 : 2) - 1; // If config changes occured since the last picture began encoding, then // prepare a new sequenceControlSetPtr containing the new changes and update the state // of the previous Active SequenceControlSet @@ -588,7 +588,7 @@ void* ResourceCoordinationKernel(void *inputPtr) // Rate Control // Set the ME Distortion and OIS Historgrams to zero - if (sequenceControlSetPtr->staticConfig.rateControlMode){ + if (sequenceControlSetPtr->staticConfig.rateControlMode != 0){ EB_MEMSET(pictureControlSetPtr->meDistortionHistogram, 0, NUMBER_OF_SAD_INTERVALS*sizeof(EB_U16)); EB_MEMSET(pictureControlSetPtr->oisDistortionHistogram, 0, NUMBER_OF_INTRA_SAD_INTERVALS*sizeof(EB_U16)); } diff --git a/Source/Lib/Codec/EbUtility.h b/Source/Lib/Codec/EbUtility.h index 00910b992..538404839 100644 --- a/Source/Lib/Codec/EbUtility.h +++ b/Source/Lib/Codec/EbUtility.h @@ -108,7 +108,7 @@ extern EB_U32 EndianSwap(EB_U32 ui); #define POW2_CHECK(x) ((x) == ((x) & (-((EB_S32)(x))))) #define ROUND_UP_MUL_8(x) ((x) + ((8 - ((x) & 0x7)) & 0x7)) #define ROUND_UP_MULT(x,mult) ((x) + (((mult) - ((x) & ((mult)-1))) & ((mult)-1))) - +#define LOG2(x) (log((double)(x)) * 1.4426950408889640513713538072172) // rounds down to the next power of two #define FLOOR_POW2(x) \ MULTI_LINE_MACRO_BEGIN \