From 38672426d53d190cc756e3792efb31da684f5611 Mon Sep 17 00:00:00 2001 From: Jens Brandenburg Date: Tue, 22 Dec 2020 11:21:20 +0100 Subject: [PATCH] merged develop v0.2.1.0 --- CMakeLists.txt | 2 +- README.md | 7 ++ changelog.txt | 13 +++- source/Lib/CommonLib/CodingStructure.cpp | 84 +++++++++++++----------- source/Lib/CommonLib/CodingStructure.h | 2 + source/Lib/CommonLib/Picture.cpp | 8 ++- source/Lib/CommonLib/TrQuant.cpp | 21 +++--- source/Lib/CommonLib/x86/QuantX86.h | 7 +- source/Lib/DecoderLib/DecLib.cpp | 12 +++- source/Lib/EncoderLib/BitAllocation.cpp | 11 ++-- source/Lib/EncoderLib/BitAllocation.h | 2 +- source/Lib/EncoderLib/EncLib.cpp | 8 --- source/Lib/EncoderLib/EncPicture.cpp | 10 ++- source/Lib/EncoderLib/EncSlice.cpp | 2 + source/Lib/EncoderLib/RateCtrl.cpp | 15 ++--- source/Lib/EncoderLib/RateCtrl.h | 2 +- source/Lib/vvenc/EncCfg.cpp | 2 +- 17 files changed, 124 insertions(+), 84 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 072a5744a..9f95c25fb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ if( NOT CMAKE_VERSION VERSION_LESS 3.12.0 ) endif() # project name -project( vvenc VERSION 0.2.0.0 ) +project( vvenc VERSION 0.2.1.0 ) if( NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR ) # enable sse4.1 build for all source files for gcc and clang diff --git a/README.md b/README.md index 31e042bd0..67465c7ed 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,13 @@ The expert mode encoder (**vvencFFapp**) is based on the [VVC test model (VTM)]( vvencFFapp -c randomaccess_medium.cfg -c sequence.cfg +# Contributing + +Feel free to contribute. To do so: + +* Fork the current-most state of the master branch +* Apply the desired changes +* Create a pull-request to the upstream repository # License diff --git a/changelog.txt b/changelog.txt index 41b6a841c..32b62e84c 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,14 @@ +///////////////////////////////////////// +tag 0.2.1.0 + +* bugfixes: + - ISP fix: erroneous placed sanity check + +* libvvenc: + - decrease memory consumption + - harmonize 2-pass rate control and perceptual QPA + - improve rate control + ///////////////////////////////////////// tag 0.2.0.0 @@ -22,7 +33,7 @@ tag 0.2.0.0 - various memory reductions (Rom.cpp, scaling list memory) - verious optimizations (SIMD for MCTF, forward transformation, single column IF; memory accesses for DMVR) - changed MCTF algorithm to do intermediate rounding between hor/ver filtering - + * vvencapp: - new parameter --refreshsec,-rs to define the intra refresh rate in seconds depending on the given frame rate. Internally, the refresh rate in seconds is translated into the frames where the refresh is set. diff --git a/source/Lib/CommonLib/CodingStructure.cpp b/source/Lib/CommonLib/CodingStructure.cpp index f03924dc8..5556b8ac8 100644 --- a/source/Lib/CommonLib/CodingStructure.cpp +++ b/source/Lib/CommonLib/CodingStructure.cpp @@ -122,24 +122,10 @@ void CodingStructure::destroy() destroyCoeffs(); - for( uint32_t i = 0; i < MAX_NUM_CH; i++ ) - { - delete[] m_cuPtr[ i ]; - m_cuPtr[ i ] = nullptr; - - delete[] m_tuPtr[ i ]; - m_tuPtr[ i ] = nullptr; - } - - for( int i = 0; i < NUM_EDGE_DIR; i++ ) - { - xFree( m_lfParam[ i ] ); - m_lfParam[ i ] = nullptr; - } - delete[] m_motionBuf; m_motionBuf = nullptr; + destroyTempBuffers(); if ( m_unitCacheMutex ) m_unitCacheMutex->lock(); @@ -650,17 +636,34 @@ void CodingStructure::createInternals( const UnitArea& _unit, const bool isTopLa parent = nullptr; refCS = nullptr; - unsigned numCh = getNumberValidChannels(area.chromaFormat); + unsigned _lumaAreaScaled = g_miScaling.scale( area.lumaSize() ).area(); + m_motionBuf = new MotionInfo[_lumaAreaScaled]; - for (unsigned i = 0; i < numCh; i++) + if( isTopLayer ) { - Size allocArea = area.blocks[i].size(); - m_mapSize[i] = unitScale[i].scale(allocArea); + motionLutBuf.resize( pcv->heightInCtus ); + } + else + { + createCoeffs(); + createTempBuffers( false ); + initStructData(); + } +} - unsigned _area = unitScale[i].scale( area.blocks[i].size() ).area(); +void CodingStructure::createTempBuffers( const bool isTopLayer ) +{ + unsigned numCh = getNumberValidChannels( area.chromaFormat ); + + for( unsigned i = 0; i < numCh; i++ ) + { + Size allocArea = area.blocks[i].size(); + m_mapSize[i] = unitScale[i].scale(allocArea); - m_cuPtr[i] = _area > 0 ? new CodingUnit* [_area] : nullptr; - m_tuPtr[i] = _area > 0 ? new TransformUnit* [_area] : nullptr; + unsigned _area = unitScale[i].scale( area.blocks[i].size() ).area(); + + m_cuPtr[i] = _area > 0 ? new CodingUnit* [_area] : nullptr; + m_tuPtr[i] = _area > 0 ? new TransformUnit* [_area] : nullptr; } for( unsigned i = 0; i < NUM_EDGE_DIR; i++ ) @@ -668,29 +671,29 @@ void CodingStructure::createInternals( const UnitArea& _unit, const bool isTopLa m_lfParam[i] = ( isTopLayer && m_mapSize[0].area() > 0 ) ? ( LoopFilterParam* ) xMalloc( LoopFilterParam, m_mapSize[0].area() ) : nullptr; } - numCh = getNumberValidComponents(area.chromaFormat); + unsigned _maxNumDmvrMvs = ( area.lwidth() >> 3 ) * ( area.lheight() >> 3 ); + m_dmvrMvCache.resize( _maxNumDmvrMvs ); +} - for (unsigned i = 0; i < numCh; i++) +void CodingStructure::destroyTempBuffers() +{ + for( uint32_t i = 0; i < MAX_NUM_CH; i++ ) { - m_offsets[i] = 0; + delete[] m_cuPtr[i]; + m_cuPtr[i] = nullptr; + + delete[] m_tuPtr[i]; + m_tuPtr[i] = nullptr; } - if( isTopLayer ) + for( int i = 0; i < NUM_EDGE_DIR; i++ ) { - motionLutBuf.resize( pcv->heightInCtus ); + xFree( m_lfParam[i] ); + m_lfParam[i] = nullptr; } - else - { - createCoeffs(); - } - - unsigned _lumaAreaScaled = g_miScaling.scale( area.lumaSize() ).area(); - m_motionBuf = new MotionInfo[_lumaAreaScaled]; - - unsigned _maxNumDmvrMvs = ( area.lwidth() >> 3 ) * ( area.lheight() >> 3 ); - m_dmvrMvCache.resize( _maxNumDmvrMvs ); - initStructData(); + // swap the contents of the vector so that memory released + std::vector().swap( m_dmvrMvCache ); } void CodingStructure::addMiToLut(static_vector &lut, const HPMVInfo &mi) @@ -739,6 +742,11 @@ void CodingStructure::createCoeffs() m_coeffs[i] = _area > 0 ? ( TCoeff* ) xMalloc( TCoeff, _area ) : nullptr; } + + for( unsigned i = 0; i < numComp; i++ ) + { + m_offsets[i] = 0; + } } void CodingStructure::destroyCoeffs() diff --git a/source/Lib/CommonLib/CodingStructure.h b/source/Lib/CommonLib/CodingStructure.h index a85176efc..84e63e7d6 100644 --- a/source/Lib/CommonLib/CodingStructure.h +++ b/source/Lib/CommonLib/CodingStructure.h @@ -176,6 +176,8 @@ class CodingStructure void clearCUs(); const int signalModeCons( const PartSplit split, Partitioner &partitioner, const ModeType modeTypeParent ) const; + void createTempBuffers( const bool isTopLayer ); + void destroyTempBuffers(); private: void createInternals(const UnitArea& _unit, const bool isTopLayer); diff --git a/source/Lib/CommonLib/Picture.cpp b/source/Lib/CommonLib/Picture.cpp index e15ff7faf..980b9d3af 100644 --- a/source/Lib/CommonLib/Picture.cpp +++ b/source/Lib/CommonLib/Picture.cpp @@ -187,7 +187,6 @@ void Picture::create( ChromaFormat _chromaFormat, const Size& size, unsigned _ma margin = _margin; const Area a = Area( Position(), size ); m_bufs[ PIC_RECONSTRUCTION ].create( _chromaFormat, a, _maxCUSize, _margin, MEMORY_ALIGN_DEF_SIZE ); - m_bufs[ PIC_SAO_TEMP ].create( _chromaFormat, a, _maxCUSize, 0, MEMORY_ALIGN_DEF_SIZE ); if( _decoder ) { @@ -230,11 +229,17 @@ void Picture::destroy() void Picture::createTempBuffers( unsigned _maxCUSize ) { + CHECK( !cs, "Coding structure is required a this point!" ); + + m_bufs[PIC_SAO_TEMP].create( chromaFormat, Y(), cs->pcv->maxCUSize, 0, MEMORY_ALIGN_DEF_SIZE ); + if( cs ) cs->rebindPicBufs(); } void Picture::destroyTempBuffers() { + m_bufs[PIC_SAO_TEMP].destroy(); + if( cs ) cs->rebindPicBufs(); } @@ -262,7 +267,6 @@ void Picture::finalInit( const VPS& _vps, const SPS& sps, const PPS& pps, PicHea if( cs ) { - cs->initStructData(); CHECK( cs->sps != &sps, "picture initialization error: sps changed" ); CHECK( cs->vps != &_vps, "picture initialization error: vps changed" ); } diff --git a/source/Lib/CommonLib/TrQuant.cpp b/source/Lib/CommonLib/TrQuant.cpp index e33f3d7a6..5f21057b4 100644 --- a/source/Lib/CommonLib/TrQuant.cpp +++ b/source/Lib/CommonLib/TrQuant.cpp @@ -501,19 +501,20 @@ void TrQuant::xT( const TransformUnit& tu, const ComponentID compID, const CPelB } #endif //ENABLE_SIMD_TRAFO - const int shift_1st = ((Log2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC; - const int shift_2nd = (Log2(height)) + TRANSFORM_MATRIX_SHIFT + COM16_C806_TRANS_PREC; - CHECK( shift_1st < 0, "Negative shift" ); - CHECK( shift_2nd < 0, "Negative shift" ); - if (width > 1 && height > 1) { + const int shift_1st = ((Log2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC; + const int shift_2nd = (Log2(height)) + TRANSFORM_MATRIX_SHIFT + COM16_C806_TRANS_PREC; + CHECK( shift_1st < 0, "Negative shift" ); + CHECK( shift_2nd < 0, "Negative shift" ); fastFwdTrans[trTypeHor][transformWidthIndex](block, tmp, shift_1st, height, 0, skipWidth); fastFwdTrans[trTypeVer][transformHeightIndex](tmp, dstCoeff.buf, shift_2nd, width, skipWidth, skipHeight); } else if (height == 1) // 1-D horizontal transform { - fastFwdTrans[trTypeHor][transformWidthIndex](block, dstCoeff.buf, shift_1st, 1, 0, skipWidth); + const int shift = ((Log2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC; + CHECK( shift < 0, "Negative shift" ); + fastFwdTrans[trTypeHor][transformWidthIndex](block, dstCoeff.buf, shift, 1, 0, skipWidth); } else // if (iWidth == 1) //1-D vertical transform { @@ -562,14 +563,14 @@ void TrQuant::xIT( const TransformUnit& tu, const ComponentID compID, const CCoe } } - const int shift_1st = TRANSFORM_MATRIX_SHIFT + 1 + COM16_C806_TRANS_PREC; // 1 has been added to shift_1st at the expense of shift_2nd - const int shift_2nd = ( TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1 ) - bitDepth + COM16_C806_TRANS_PREC; - CHECK( shift_1st < 0, "Negative shift" ); - CHECK( shift_2nd < 0, "Negative shift" ); TCoeff *block = m_blk; TCoeff *tmp = m_tmp; if (width > 1 && height > 1) // 2-D transform { + const int shift_1st = TRANSFORM_MATRIX_SHIFT + 1 + COM16_C806_TRANS_PREC; // 1 has been added to shift_1st at the expense of shift_2nd + const int shift_2nd = ( TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1 ) - bitDepth + COM16_C806_TRANS_PREC; + CHECK( shift_1st < 0, "Negative shift" ); + CHECK( shift_2nd < 0, "Negative shift" ); fastInvTrans[trTypeVer][transformHeightIndex](pCoeff.buf, tmp, shift_1st, width, skipWidth, skipHeight, clipMinimum, clipMaximum); fastInvTrans[trTypeHor][transformWidthIndex](tmp, block, shift_2nd, height, 0, skipWidth, clipMinimum, clipMaximum); } diff --git a/source/Lib/CommonLib/x86/QuantX86.h b/source/Lib/CommonLib/x86/QuantX86.h index 793b4b7d8..5e6e0ab6d 100644 --- a/source/Lib/CommonLib/x86/QuantX86.h +++ b/source/Lib/CommonLib/x86/QuantX86.h @@ -204,7 +204,7 @@ static void DeQuantCoreSIMD(const int maxX,const int maxY,const int scale,const { for( int y = 0; y <= maxY; y++) { - __m128i v_level = _mm_loadu_si128( ( __m128i const * )&piQCoef[y * piQCfStride] ); + __m128i v_level = maxX == 1 ? _mm_loadl_epi64( (__m128i const*) & piQCoef[y * piQCfStride] ) : _mm_loadu_si128( (__m128i const*) & piQCoef[y * piQCfStride] ); v_level = _mm_packs_epi32 (v_level,v_level); v_level = _mm_and_si128(v_level,vlevmask); v_level = _mm_max_epi16 (v_level, v_min); @@ -218,7 +218,10 @@ static void DeQuantCoreSIMD(const int maxX,const int maxY,const int scale,const v_level = _mm_max_epi32 (v_level, v_Tmin); v_level = _mm_min_epi32 (v_level, v_Tmax); - _mm_storeu_si128(( __m128i * )(piCoef+y*width ), v_level ); + if( maxX == 1 ) + _mm_storel_epi64( (__m128i*)(piCoef + y * width), v_level ); + else + _mm_storeu_si128( (__m128i*)(piCoef + y * width), v_level ); } } else diff --git a/source/Lib/DecoderLib/DecLib.cpp b/source/Lib/DecoderLib/DecLib.cpp index de5d2b55e..8f1e16bf5 100644 --- a/source/Lib/DecoderLib/DecLib.cpp +++ b/source/Lib/DecoderLib/DecLib.cpp @@ -156,6 +156,11 @@ bool tryDecodePicture( Picture* pcEncPic, const int expectedPoc, const std::stri { if( pic->poc == poc && (!bDecodeUntilPocFound || expectedPoc == poc ) ) { + pcEncPic->createTempBuffers( pic->cs->pcv->maxCUSize ); + pcEncPic->cs->createCoeffs(); + pcEncPic->cs->createTempBuffers( true ); + pcEncPic->cs->initStructData(); + CHECK( pcEncPic->slices.size() == 0, "at least one slice should be available" ); CHECK( expectedPoc != poc, "mismatch in POC - check encoder configuration" ); @@ -669,9 +674,10 @@ void DecLib::finishPicture(int& poc, PicList*& rpcListPic, MsgLevel msgl ) m_maxDecSubPicIdx = 0; m_maxDecSliceAddrInSubPic = -1; - m_pic->destroyTempBuffers(); - m_pic->cs->destroyCoeffs(); m_pic->cs->releaseIntermediateData(); + m_pic->cs->destroyTempBuffers(); + m_pic->cs->destroyCoeffs(); + m_pic->destroyTempBuffers(); m_pic->cs->picHeader->initPicHeader(); } @@ -1007,6 +1013,8 @@ void DecLib::xActivateParameterSets( const int layerId) m_pic->createTempBuffers( m_pic->cs->pps->pcv->maxCUSize ); m_pic->cs->createCoeffs(); + m_pic->cs->createTempBuffers( true ); + m_pic->cs->initStructData(); m_pic->allocateNewSlice(); // make the slice-pilot a real slice, and set up the slice-pilot for the next slice diff --git a/source/Lib/EncoderLib/BitAllocation.cpp b/source/Lib/EncoderLib/BitAllocation.cpp index 13628ebc5..161562521 100644 --- a/source/Lib/EncoderLib/BitAllocation.cpp +++ b/source/Lib/EncoderLib/BitAllocation.cpp @@ -339,7 +339,7 @@ int BitAllocation::applyQPAdaptationChroma (const Slice* slice, const EncCfg* en if (pic == nullptr || encCfg == nullptr || optChromaQPOffset == nullptr || encCfg->m_usePerceptQPA > 4) return -1; - const bool isXPSNRBasedQPA = (encCfg->m_usePerceptQPA & 1) == 0 && encCfg->m_RCNumPasses != 2; + const bool isXPSNRBasedQPA = (encCfg->m_usePerceptQPA & 1) == 0 && (encCfg->m_RCRateControlMode == 0 || encCfg->m_RCNumPasses != 2); const bool isHighResolution = (encCfg->m_SourceWidth > 2048 || encCfg->m_SourceHeight > 1280) && ( encCfg->m_usePerceptQPA & 1 ) == 0; const int bitDepth = slice->sps->bitDepths[CH_L]; @@ -394,6 +394,7 @@ int BitAllocation::applyQPAdaptationChroma (const Slice* slice, const EncCfg* en } int BitAllocation::applyQPAdaptationLuma (const Slice* slice, const EncCfg* encCfg, const int savedQP, const double lambda, std::vector& ctuPumpRedQP, + const bool forceFrameWiseQPA, const uint32_t ctuStartAddr, const uint32_t ctuBoundingAddr, const bool isHDR /*= false*/) { Picture* const pic = (slice != nullptr ? slice->pic : nullptr); @@ -403,9 +404,9 @@ int BitAllocation::applyQPAdaptationLuma (const Slice* slice, const EncCfg* encC if (pic == nullptr || pic->cs == nullptr || encCfg == nullptr || ctuStartAddr >= ctuBoundingAddr) return -1; - const bool isXPSNRBasedQPA = (encCfg->m_usePerceptQPA & 1) == 0 && encCfg->m_RCNumPasses != 2; + const bool isXPSNRBasedQPA = (encCfg->m_usePerceptQPA & 1) == 0 && (encCfg->m_RCRateControlMode == 0 || encCfg->m_RCNumPasses != 2); const bool isHighResolution = (encCfg->m_SourceWidth > 2048 || encCfg->m_SourceHeight > 1280) && ( encCfg->m_usePerceptQPA & 1 ) == 0; - const bool useFrameWiseQPA = (encCfg->m_QP > MAX_QP_PERCEPT_QPA); + const bool useFrameWiseQPA = (encCfg->m_QP > MAX_QP_PERCEPT_QPA) || forceFrameWiseQPA; const int bitDepth = slice->sps->bitDepths[CH_L]; const int sliceQP = (savedQP < 0 ? slice->sliceQp : savedQP); const PreCalcValues& pcv = *pic->cs->pcv; @@ -588,7 +589,7 @@ int BitAllocation::applyQPAdaptationSubCtu (const Slice* slice, const EncCfg* en if (pic == nullptr || encCfg == nullptr) return -1; - const bool isXPSNRBasedQPA = (encCfg->m_usePerceptQPA & 1) == 0 && encCfg->m_RCNumPasses != 2; + const bool isXPSNRBasedQPA = (encCfg->m_usePerceptQPA & 1) == 0 && (encCfg->m_RCRateControlMode == 0 || encCfg->m_RCNumPasses != 2); const bool isHighResolution = (encCfg->m_SourceWidth > 2048 || encCfg->m_SourceHeight > 1280) && ( encCfg->m_usePerceptQPA & 1 ) == 0; const int bitDepth = slice->sps->bitDepths[CH_L]; const PosType guardSize = (isHighResolution ? 2 : 1); @@ -664,7 +665,7 @@ double BitAllocation::getPicVisualActivity (const Slice* slice, const EncCfg* en if (pic == nullptr || encCfg == nullptr) return 0.0; - const bool isXPSNRQPA = (encCfg->m_usePerceptQPA & 1) == 0 && encCfg->m_RCNumPasses != 2; + const bool isXPSNRQPA = (encCfg->m_usePerceptQPA & 1) == 0 && (encCfg->m_RCRateControlMode == 0 || encCfg->m_RCNumPasses != 2); const bool isHighRes = ( encCfg->m_SourceWidth > 2048 || encCfg->m_SourceHeight > 1280 ) && ( encCfg->m_usePerceptQPA & 1 ) == 0; const CPelBuf picOrig = (origBuf != nullptr ? *origBuf : pic->getOrigBuf (COMP_Y)); const CPelBuf picPrv1 = (isXPSNRQPA ? pic->getOrigBufPrev (COMP_Y, false) : picOrig); diff --git a/source/Lib/EncoderLib/BitAllocation.h b/source/Lib/EncoderLib/BitAllocation.h index e382230b3..43226644c 100644 --- a/source/Lib/EncoderLib/BitAllocation.h +++ b/source/Lib/EncoderLib/BitAllocation.h @@ -65,7 +65,7 @@ namespace vvenc { std::vector& ctuPumpRedQP, int optChromaQPOffset[2], const bool isHDR = false); int applyQPAdaptationLuma (const Slice* slice, const EncCfg* encCfg, const int savedQP, const double lambda, - std::vector& ctuPumpRedQP, + std::vector& ctuPumpRedQP, const bool forceFrameWiseQPA, const uint32_t ctuStartAddr, const uint32_t ctuBoundingAddr, const bool isHDR = false); int applyQPAdaptationSubCtu (const Slice* slice, const EncCfg* encCfg, const Area& lumaArea, const bool isHDR = false); int getCtuPumpingReducingQP (const Slice* slice, const CPelBuf& origY, const Distortion uiSadBestForQPA, diff --git a/source/Lib/EncoderLib/EncLib.cpp b/source/Lib/EncoderLib/EncLib.cpp index 6a7d2152f..97f0c239b 100644 --- a/source/Lib/EncoderLib/EncLib.cpp +++ b/source/Lib/EncoderLib/EncLib.cpp @@ -362,11 +362,6 @@ void EncLib::xSetRCEncCfg( int pass ) // restore MCTF m_cBckCfg.m_MCTF = mctf; - // configure QPA in the first pass - m_cBckCfg.m_usePerceptQPA = 0; // disable QPA in the first pass - m_cBckCfg.m_sliceChromaQpOffsetPeriodicity = 0; - m_cBckCfg.m_usePerceptQPATempFiltISlice = 0; - std::swap( const_cast(m_cEncCfg), m_cBckCfg ); } } @@ -603,9 +598,6 @@ void EncLib::xInitPicture( Picture& pic, int picNum, const PPS& pps, const SPS& pic.vps = &vps; pic.dci = &dci; - pic.createTempBuffers( pic.cs->pps->pcv->maxCUSize ); - pic.cs->createCoeffs(); - // filter data initialization const uint32_t numberOfCtusInFrame = pic.cs->pcv->sizeInCtus; diff --git a/source/Lib/EncoderLib/EncPicture.cpp b/source/Lib/EncoderLib/EncPicture.cpp index 9ff166ace..6f7b22749 100644 --- a/source/Lib/EncoderLib/EncPicture.cpp +++ b/source/Lib/EncoderLib/EncPicture.cpp @@ -90,6 +90,11 @@ void EncPicture::encodePicture( Picture& pic, ParameterSetMap& shrdApsMap, // compress picture if ( pic.encPic ) { + pic.createTempBuffers( pic.cs->pcv->maxCUSize ); + pic.cs->createCoeffs(); + pic.cs->createTempBuffers( true ); + pic.cs->initStructData(); + xInitPicEncoder ( pic ); gopEncoder.picInitRateControl( pic.gopId, pic, pic.slices[ 0 ] ); xCompressPicture( pic ); @@ -113,9 +118,10 @@ void EncPicture::encodePicture( Picture& pic, ParameterSetMap& shrdApsMap, pic.picBlkStat.storeBlkSize( pic ); } // cleanup - pic.destroyTempBuffers(); - pic.cs->destroyCoeffs(); pic.cs->releaseIntermediateData(); + pic.cs->destroyTempBuffers(); + pic.cs->destroyCoeffs(); + pic.destroyTempBuffers(); pic.encTime.stopTimer(); diff --git a/source/Lib/EncoderLib/EncSlice.cpp b/source/Lib/EncoderLib/EncSlice.cpp index ac2ff03af..654e925b8 100644 --- a/source/Lib/EncoderLib/EncSlice.cpp +++ b/source/Lib/EncoderLib/EncSlice.cpp @@ -268,6 +268,7 @@ void EncSlice::xInitSliceLambdaQP( Slice* slice, int gopId ) } if (m_pcEncCfg->m_usePerceptQPA) { + const bool rcIsFirstPassOf2 = (m_pcEncCfg->m_RCRateControlMode == 2 ? m_pcEncCfg->m_RCNumPasses == 2 && !m_pcRateCtrl->rcIsFinalPass : false); uint32_t startCtuTsAddr = slice->sliceMap.ctuAddrInSlice[0]; uint32_t boundingCtuTsAddr = slice->pic->cs->pcv->sizeInCtus; @@ -275,6 +276,7 @@ void EncSlice::xInitSliceLambdaQP( Slice* slice, int gopId ) slice->pic->picInitialQP = iQP; if ((iQP = BitAllocation::applyQPAdaptationLuma (slice, m_pcEncCfg, adaptedLumaQP, dLambda, *m_CtuTaskRsrc[ 0 ]->m_encCu.getQpPtr(), + rcIsFirstPassOf2, startCtuTsAddr, boundingCtuTsAddr, m_pcEncCfg->m_usePerceptQPA > 2)) >= 0) // sets pic->ctuAdaptedQP[] & ctuQpaLambda[] { dLambda *= pow (2.0, ((double) iQP - dQP) / 3.0); // adjust lambda based on change of slice QP diff --git a/source/Lib/EncoderLib/RateCtrl.cpp b/source/Lib/EncoderLib/RateCtrl.cpp index b3369438f..43f060b08 100644 --- a/source/Lib/EncoderLib/RateCtrl.cpp +++ b/source/Lib/EncoderLib/RateCtrl.cpp @@ -304,7 +304,7 @@ void EncRCSeq::updateAfterPic ( int bits ) framesLeft--; } -void EncRCSeq::getTargetBitsFromFirstPass( int numPicCoded, int &targetBits, double &gopVsBitrateRatio, bool &isNewScene, double alpha[] ) +void EncRCSeq::getTargetBitsFromFirstPass( int numPicCoded, int &targetBits, double &gopVsBitrateRatio, double &frameVsGopRatio, bool &isNewScene, double alpha[] ) { int picCounter = 0; int numOfLevels = int( log( gopSize ) / log( 2 ) + 0.5 ) + 2; @@ -316,6 +316,7 @@ void EncRCSeq::getTargetBitsFromFirstPass( int numPicCoded, int &targetBits, dou { targetBits = it->targetBits; gopVsBitrateRatio = it->gopBitsVsBitrate; + frameVsGopRatio = it->frameInGopRatio; isNewScene = it->isNewScene; for ( int i = 0; i < numOfLevels; i++ ) { @@ -720,17 +721,11 @@ int EncRCPic::xEstPicTargetBits( EncRCSeq* encRcSeq, EncRCGOP* encRcGOP ) if ( encRcSeq->twoPass ) { double gopVsBitrateRatio = 1.0; + double frameVsGopRatio = 1.0; int tmpTargetBits = 0; double alpha[ 7 ] = { 0.0 }; - encRcSeq->getTargetBitsFromFirstPass( encRcSeq->framesCoded, tmpTargetBits, gopVsBitrateRatio, isNewScene, alpha ); - if ( currPicPosition == 0 || encRCSeq->framesLeft < encRcSeq->gopSize ) - { - targetBits = int( ( encRcSeq->estimatedBitUsage - encRcSeq->bitsUsed ) * gopVsBitrateRatio + tmpTargetBits ); // calculate the difference of under/overspent bits and adjust the current target bits based on the gop ratio only for the first frame in the gop - } - else - { - targetBits = tmpTargetBits; - } + encRcSeq->getTargetBitsFromFirstPass( encRcSeq->framesCoded, tmpTargetBits, gopVsBitrateRatio, frameVsGopRatio, isNewScene, alpha ); + targetBits = int( ( encRcSeq->estimatedBitUsage - encRcSeq->bitsUsed ) * gopVsBitrateRatio * frameVsGopRatio + tmpTargetBits ); // calculate the difference of under/overspent bits and adjust the current target bits based on the gop and frame ratio for every frame if ( encRcSeq->bitsUsed > 0 ) { diff --git a/source/Lib/EncoderLib/RateCtrl.h b/source/Lib/EncoderLib/RateCtrl.h index 824049253..a30ccefe3 100644 --- a/source/Lib/EncoderLib/RateCtrl.h +++ b/source/Lib/EncoderLib/RateCtrl.h @@ -122,7 +122,7 @@ namespace vvenc { void setQpInGOP( int gopId, int gopQp, int &qp ); bool isQpResetRequired( int gopId ); int getLeftAverageBits() { CHECK( !( framesLeft > 0 ), "No frames left" ); return (int)( bitsLeft / framesLeft ); } - void getTargetBitsFromFirstPass( int poc, int &targetBits, double &gopVsBitrateRatio, bool &isNewScene, double alpha[] ); + void getTargetBitsFromFirstPass( int poc, int &targetBits, double &gopVsBitrateRatio, double &frameVsGopRatio, bool &isNewScene, double alpha[] ); public: int rcMode; diff --git a/source/Lib/vvenc/EncCfg.cpp b/source/Lib/vvenc/EncCfg.cpp index 9fc919532..b3629beac 100644 --- a/source/Lib/vvenc/EncCfg.cpp +++ b/source/Lib/vvenc/EncCfg.cpp @@ -1417,7 +1417,7 @@ bool EncCfg::initCfgParameter() } /// Experimental settings - checkExperimental( m_RCRateControlMode != 0 && m_RCNumPasses == 2 && m_usePerceptQPA != 0, "2-pass rate control with perceptually optimized QP-adaptation is experimental!" ); + // checkExperimental( experimental combination of parameters, "Description!" ); return( m_confirmFailed ); }