Skip to content

Commit

Permalink
Merge pull request #14 from fraunhoferhhi/develop_v0.2.1.0
Browse files Browse the repository at this point in the history
develop v0.2.1.0
  • Loading branch information
jbrdbg authored Dec 22, 2020
2 parents 58282d7 + 3867242 commit 77af893
Show file tree
Hide file tree
Showing 17 changed files with 124 additions and 84 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ if( NOT CMAKE_VERSION VERSION_LESS 3.12.0 )
endif()

# project name
project( vvenc VERSION 0.2.0.0 )
project( vvenc VERSION 0.2.1.0 )

if( NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR )
# enable sse4.1 build for all source files for gcc and clang
Expand Down
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ The expert mode encoder (**vvencFFapp**) is based on the [VVC test model (VTM)](

vvencFFapp -c randomaccess_medium.cfg -c sequence.cfg

# Contributing

Feel free to contribute. To do so:

* Fork the current-most state of the master branch
* Apply the desired changes
* Create a pull-request to the upstream repository

# License

Expand Down
13 changes: 12 additions & 1 deletion changelog.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
/////////////////////////////////////////
tag 0.2.1.0

* bugfixes:
- ISP fix: erroneous placed sanity check

* libvvenc:
- decrease memory consumption
- harmonize 2-pass rate control and perceptual QPA
- improve rate control

/////////////////////////////////////////
tag 0.2.0.0

Expand All @@ -22,7 +33,7 @@ tag 0.2.0.0
- various memory reductions (Rom.cpp, scaling list memory)
- verious optimizations (SIMD for MCTF, forward transformation, single column IF; memory accesses for DMVR)
- changed MCTF algorithm to do intermediate rounding between hor/ver filtering

* vvencapp:
- new parameter --refreshsec,-rs to define the intra refresh rate in seconds depending on the given frame rate.
Internally, the refresh rate in seconds is translated into the frames where the refresh is set.
Expand Down
84 changes: 46 additions & 38 deletions source/Lib/CommonLib/CodingStructure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,24 +122,10 @@ void CodingStructure::destroy()

destroyCoeffs();

for( uint32_t i = 0; i < MAX_NUM_CH; i++ )
{
delete[] m_cuPtr[ i ];
m_cuPtr[ i ] = nullptr;

delete[] m_tuPtr[ i ];
m_tuPtr[ i ] = nullptr;
}

for( int i = 0; i < NUM_EDGE_DIR; i++ )
{
xFree( m_lfParam[ i ] );
m_lfParam[ i ] = nullptr;
}

delete[] m_motionBuf;
m_motionBuf = nullptr;

destroyTempBuffers();

if ( m_unitCacheMutex ) m_unitCacheMutex->lock();

Expand Down Expand Up @@ -650,47 +636,64 @@ void CodingStructure::createInternals( const UnitArea& _unit, const bool isTopLa
parent = nullptr;
refCS = nullptr;

unsigned numCh = getNumberValidChannels(area.chromaFormat);
unsigned _lumaAreaScaled = g_miScaling.scale( area.lumaSize() ).area();
m_motionBuf = new MotionInfo[_lumaAreaScaled];

for (unsigned i = 0; i < numCh; i++)
if( isTopLayer )
{
Size allocArea = area.blocks[i].size();
m_mapSize[i] = unitScale[i].scale(allocArea);
motionLutBuf.resize( pcv->heightInCtus );
}
else
{
createCoeffs();
createTempBuffers( false );
initStructData();
}
}

unsigned _area = unitScale[i].scale( area.blocks[i].size() ).area();
void CodingStructure::createTempBuffers( const bool isTopLayer )
{
unsigned numCh = getNumberValidChannels( area.chromaFormat );

for( unsigned i = 0; i < numCh; i++ )
{
Size allocArea = area.blocks[i].size();
m_mapSize[i] = unitScale[i].scale(allocArea);

m_cuPtr[i] = _area > 0 ? new CodingUnit* [_area] : nullptr;
m_tuPtr[i] = _area > 0 ? new TransformUnit* [_area] : nullptr;
unsigned _area = unitScale[i].scale( area.blocks[i].size() ).area();

m_cuPtr[i] = _area > 0 ? new CodingUnit* [_area] : nullptr;
m_tuPtr[i] = _area > 0 ? new TransformUnit* [_area] : nullptr;
}

for( unsigned i = 0; i < NUM_EDGE_DIR; i++ )
{
m_lfParam[i] = ( isTopLayer && m_mapSize[0].area() > 0 ) ? ( LoopFilterParam* ) xMalloc( LoopFilterParam, m_mapSize[0].area() ) : nullptr;
}

numCh = getNumberValidComponents(area.chromaFormat);
unsigned _maxNumDmvrMvs = ( area.lwidth() >> 3 ) * ( area.lheight() >> 3 );
m_dmvrMvCache.resize( _maxNumDmvrMvs );
}

for (unsigned i = 0; i < numCh; i++)
void CodingStructure::destroyTempBuffers()
{
for( uint32_t i = 0; i < MAX_NUM_CH; i++ )
{
m_offsets[i] = 0;
delete[] m_cuPtr[i];
m_cuPtr[i] = nullptr;

delete[] m_tuPtr[i];
m_tuPtr[i] = nullptr;
}

if( isTopLayer )
for( int i = 0; i < NUM_EDGE_DIR; i++ )
{
motionLutBuf.resize( pcv->heightInCtus );
xFree( m_lfParam[i] );
m_lfParam[i] = nullptr;
}
else
{
createCoeffs();
}

unsigned _lumaAreaScaled = g_miScaling.scale( area.lumaSize() ).area();
m_motionBuf = new MotionInfo[_lumaAreaScaled];

unsigned _maxNumDmvrMvs = ( area.lwidth() >> 3 ) * ( area.lheight() >> 3 );
m_dmvrMvCache.resize( _maxNumDmvrMvs );

initStructData();
// swap the contents of the vector so that memory released
std::vector<Mv>().swap( m_dmvrMvCache );
}

void CodingStructure::addMiToLut(static_vector<HPMVInfo, MAX_NUM_HMVP_CANDS> &lut, const HPMVInfo &mi)
Expand Down Expand Up @@ -739,6 +742,11 @@ void CodingStructure::createCoeffs()

m_coeffs[i] = _area > 0 ? ( TCoeff* ) xMalloc( TCoeff, _area ) : nullptr;
}

for( unsigned i = 0; i < numComp; i++ )
{
m_offsets[i] = 0;
}
}

void CodingStructure::destroyCoeffs()
Expand Down
2 changes: 2 additions & 0 deletions source/Lib/CommonLib/CodingStructure.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ class CodingStructure
void clearCUs();
const int signalModeCons( const PartSplit split, Partitioner &partitioner, const ModeType modeTypeParent ) const;

void createTempBuffers( const bool isTopLayer );
void destroyTempBuffers();
private:
void createInternals(const UnitArea& _unit, const bool isTopLayer);

Expand Down
8 changes: 6 additions & 2 deletions source/Lib/CommonLib/Picture.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,6 @@ void Picture::create( ChromaFormat _chromaFormat, const Size& size, unsigned _ma
margin = _margin;
const Area a = Area( Position(), size );
m_bufs[ PIC_RECONSTRUCTION ].create( _chromaFormat, a, _maxCUSize, _margin, MEMORY_ALIGN_DEF_SIZE );
m_bufs[ PIC_SAO_TEMP ].create( _chromaFormat, a, _maxCUSize, 0, MEMORY_ALIGN_DEF_SIZE );

if( _decoder )
{
Expand Down Expand Up @@ -230,11 +229,17 @@ void Picture::destroy()

void Picture::createTempBuffers( unsigned _maxCUSize )
{
CHECK( !cs, "Coding structure is required a this point!" );

m_bufs[PIC_SAO_TEMP].create( chromaFormat, Y(), cs->pcv->maxCUSize, 0, MEMORY_ALIGN_DEF_SIZE );

if( cs ) cs->rebindPicBufs();
}

void Picture::destroyTempBuffers()
{
m_bufs[PIC_SAO_TEMP].destroy();

if( cs ) cs->rebindPicBufs();
}

Expand Down Expand Up @@ -262,7 +267,6 @@ void Picture::finalInit( const VPS& _vps, const SPS& sps, const PPS& pps, PicHea

if( cs )
{
cs->initStructData();
CHECK( cs->sps != &sps, "picture initialization error: sps changed" );
CHECK( cs->vps != &_vps, "picture initialization error: vps changed" );
}
Expand Down
21 changes: 11 additions & 10 deletions source/Lib/CommonLib/TrQuant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -501,19 +501,20 @@ void TrQuant::xT( const TransformUnit& tu, const ComponentID compID, const CPelB
}
#endif //ENABLE_SIMD_TRAFO

const int shift_1st = ((Log2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC;
const int shift_2nd = (Log2(height)) + TRANSFORM_MATRIX_SHIFT + COM16_C806_TRANS_PREC;
CHECK( shift_1st < 0, "Negative shift" );
CHECK( shift_2nd < 0, "Negative shift" );

if (width > 1 && height > 1)
{
const int shift_1st = ((Log2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC;
const int shift_2nd = (Log2(height)) + TRANSFORM_MATRIX_SHIFT + COM16_C806_TRANS_PREC;
CHECK( shift_1st < 0, "Negative shift" );
CHECK( shift_2nd < 0, "Negative shift" );
fastFwdTrans[trTypeHor][transformWidthIndex](block, tmp, shift_1st, height, 0, skipWidth);
fastFwdTrans[trTypeVer][transformHeightIndex](tmp, dstCoeff.buf, shift_2nd, width, skipWidth, skipHeight);
}
else if (height == 1) // 1-D horizontal transform
{
fastFwdTrans[trTypeHor][transformWidthIndex](block, dstCoeff.buf, shift_1st, 1, 0, skipWidth);
const int shift = ((Log2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange + COM16_C806_TRANS_PREC;
CHECK( shift < 0, "Negative shift" );
fastFwdTrans[trTypeHor][transformWidthIndex](block, dstCoeff.buf, shift, 1, 0, skipWidth);
}
else // if (iWidth == 1) //1-D vertical transform
{
Expand Down Expand Up @@ -562,14 +563,14 @@ void TrQuant::xIT( const TransformUnit& tu, const ComponentID compID, const CCoe
}
}

const int shift_1st = TRANSFORM_MATRIX_SHIFT + 1 + COM16_C806_TRANS_PREC; // 1 has been added to shift_1st at the expense of shift_2nd
const int shift_2nd = ( TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1 ) - bitDepth + COM16_C806_TRANS_PREC;
CHECK( shift_1st < 0, "Negative shift" );
CHECK( shift_2nd < 0, "Negative shift" );
TCoeff *block = m_blk;
TCoeff *tmp = m_tmp;
if (width > 1 && height > 1) // 2-D transform
{
const int shift_1st = TRANSFORM_MATRIX_SHIFT + 1 + COM16_C806_TRANS_PREC; // 1 has been added to shift_1st at the expense of shift_2nd
const int shift_2nd = ( TRANSFORM_MATRIX_SHIFT + maxLog2TrDynamicRange - 1 ) - bitDepth + COM16_C806_TRANS_PREC;
CHECK( shift_1st < 0, "Negative shift" );
CHECK( shift_2nd < 0, "Negative shift" );
fastInvTrans[trTypeVer][transformHeightIndex](pCoeff.buf, tmp, shift_1st, width, skipWidth, skipHeight, clipMinimum, clipMaximum);
fastInvTrans[trTypeHor][transformWidthIndex](tmp, block, shift_2nd, height, 0, skipWidth, clipMinimum, clipMaximum);
}
Expand Down
7 changes: 5 additions & 2 deletions source/Lib/CommonLib/x86/QuantX86.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ static void DeQuantCoreSIMD(const int maxX,const int maxY,const int scale,const
{
for( int y = 0; y <= maxY; y++)
{
__m128i v_level = _mm_loadu_si128( ( __m128i const * )&piQCoef[y * piQCfStride] );
__m128i v_level = maxX == 1 ? _mm_loadl_epi64( (__m128i const*) & piQCoef[y * piQCfStride] ) : _mm_loadu_si128( (__m128i const*) & piQCoef[y * piQCfStride] );
v_level = _mm_packs_epi32 (v_level,v_level);
v_level = _mm_and_si128(v_level,vlevmask);
v_level = _mm_max_epi16 (v_level, v_min);
Expand All @@ -218,7 +218,10 @@ static void DeQuantCoreSIMD(const int maxX,const int maxY,const int scale,const

v_level = _mm_max_epi32 (v_level, v_Tmin);
v_level = _mm_min_epi32 (v_level, v_Tmax);
_mm_storeu_si128(( __m128i * )(piCoef+y*width ), v_level );
if( maxX == 1 )
_mm_storel_epi64( (__m128i*)(piCoef + y * width), v_level );
else
_mm_storeu_si128( (__m128i*)(piCoef + y * width), v_level );
}
}
else
Expand Down
12 changes: 10 additions & 2 deletions source/Lib/DecoderLib/DecLib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@ bool tryDecodePicture( Picture* pcEncPic, const int expectedPoc, const std::stri
{
if( pic->poc == poc && (!bDecodeUntilPocFound || expectedPoc == poc ) )
{
pcEncPic->createTempBuffers( pic->cs->pcv->maxCUSize );
pcEncPic->cs->createCoeffs();
pcEncPic->cs->createTempBuffers( true );
pcEncPic->cs->initStructData();

CHECK( pcEncPic->slices.size() == 0, "at least one slice should be available" );

CHECK( expectedPoc != poc, "mismatch in POC - check encoder configuration" );
Expand Down Expand Up @@ -669,9 +674,10 @@ void DecLib::finishPicture(int& poc, PicList*& rpcListPic, MsgLevel msgl )
m_maxDecSubPicIdx = 0;
m_maxDecSliceAddrInSubPic = -1;

m_pic->destroyTempBuffers();
m_pic->cs->destroyCoeffs();
m_pic->cs->releaseIntermediateData();
m_pic->cs->destroyTempBuffers();
m_pic->cs->destroyCoeffs();
m_pic->destroyTempBuffers();
m_pic->cs->picHeader->initPicHeader();
}

Expand Down Expand Up @@ -1007,6 +1013,8 @@ void DecLib::xActivateParameterSets( const int layerId)

m_pic->createTempBuffers( m_pic->cs->pps->pcv->maxCUSize );
m_pic->cs->createCoeffs();
m_pic->cs->createTempBuffers( true );
m_pic->cs->initStructData();

m_pic->allocateNewSlice();
// make the slice-pilot a real slice, and set up the slice-pilot for the next slice
Expand Down
11 changes: 6 additions & 5 deletions source/Lib/EncoderLib/BitAllocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ int BitAllocation::applyQPAdaptationChroma (const Slice* slice, const EncCfg* en

if (pic == nullptr || encCfg == nullptr || optChromaQPOffset == nullptr || encCfg->m_usePerceptQPA > 4) return -1;

const bool isXPSNRBasedQPA = (encCfg->m_usePerceptQPA & 1) == 0 && encCfg->m_RCNumPasses != 2;
const bool isXPSNRBasedQPA = (encCfg->m_usePerceptQPA & 1) == 0 && (encCfg->m_RCRateControlMode == 0 || encCfg->m_RCNumPasses != 2);
const bool isHighResolution = (encCfg->m_SourceWidth > 2048 || encCfg->m_SourceHeight > 1280) && ( encCfg->m_usePerceptQPA & 1 ) == 0;
const int bitDepth = slice->sps->bitDepths[CH_L];

Expand Down Expand Up @@ -394,6 +394,7 @@ int BitAllocation::applyQPAdaptationChroma (const Slice* slice, const EncCfg* en
}

int BitAllocation::applyQPAdaptationLuma (const Slice* slice, const EncCfg* encCfg, const int savedQP, const double lambda, std::vector<int>& ctuPumpRedQP,
const bool forceFrameWiseQPA,
const uint32_t ctuStartAddr, const uint32_t ctuBoundingAddr, const bool isHDR /*= false*/)
{
Picture* const pic = (slice != nullptr ? slice->pic : nullptr);
Expand All @@ -403,9 +404,9 @@ int BitAllocation::applyQPAdaptationLuma (const Slice* slice, const EncCfg* encC

if (pic == nullptr || pic->cs == nullptr || encCfg == nullptr || ctuStartAddr >= ctuBoundingAddr) return -1;

const bool isXPSNRBasedQPA = (encCfg->m_usePerceptQPA & 1) == 0 && encCfg->m_RCNumPasses != 2;
const bool isXPSNRBasedQPA = (encCfg->m_usePerceptQPA & 1) == 0 && (encCfg->m_RCRateControlMode == 0 || encCfg->m_RCNumPasses != 2);
const bool isHighResolution = (encCfg->m_SourceWidth > 2048 || encCfg->m_SourceHeight > 1280) && ( encCfg->m_usePerceptQPA & 1 ) == 0;
const bool useFrameWiseQPA = (encCfg->m_QP > MAX_QP_PERCEPT_QPA);
const bool useFrameWiseQPA = (encCfg->m_QP > MAX_QP_PERCEPT_QPA) || forceFrameWiseQPA;
const int bitDepth = slice->sps->bitDepths[CH_L];
const int sliceQP = (savedQP < 0 ? slice->sliceQp : savedQP);
const PreCalcValues& pcv = *pic->cs->pcv;
Expand Down Expand Up @@ -588,7 +589,7 @@ int BitAllocation::applyQPAdaptationSubCtu (const Slice* slice, const EncCfg* en

if (pic == nullptr || encCfg == nullptr) return -1;

const bool isXPSNRBasedQPA = (encCfg->m_usePerceptQPA & 1) == 0 && encCfg->m_RCNumPasses != 2;
const bool isXPSNRBasedQPA = (encCfg->m_usePerceptQPA & 1) == 0 && (encCfg->m_RCRateControlMode == 0 || encCfg->m_RCNumPasses != 2);
const bool isHighResolution = (encCfg->m_SourceWidth > 2048 || encCfg->m_SourceHeight > 1280) && ( encCfg->m_usePerceptQPA & 1 ) == 0;
const int bitDepth = slice->sps->bitDepths[CH_L];
const PosType guardSize = (isHighResolution ? 2 : 1);
Expand Down Expand Up @@ -664,7 +665,7 @@ double BitAllocation::getPicVisualActivity (const Slice* slice, const EncCfg* en

if (pic == nullptr || encCfg == nullptr) return 0.0;

const bool isXPSNRQPA = (encCfg->m_usePerceptQPA & 1) == 0 && encCfg->m_RCNumPasses != 2;
const bool isXPSNRQPA = (encCfg->m_usePerceptQPA & 1) == 0 && (encCfg->m_RCRateControlMode == 0 || encCfg->m_RCNumPasses != 2);
const bool isHighRes = ( encCfg->m_SourceWidth > 2048 || encCfg->m_SourceHeight > 1280 ) && ( encCfg->m_usePerceptQPA & 1 ) == 0;
const CPelBuf picOrig = (origBuf != nullptr ? *origBuf : pic->getOrigBuf (COMP_Y));
const CPelBuf picPrv1 = (isXPSNRQPA ? pic->getOrigBufPrev (COMP_Y, false) : picOrig);
Expand Down
2 changes: 1 addition & 1 deletion source/Lib/EncoderLib/BitAllocation.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ namespace vvenc {
std::vector<int>& ctuPumpRedQP,
int optChromaQPOffset[2], const bool isHDR = false);
int applyQPAdaptationLuma (const Slice* slice, const EncCfg* encCfg, const int savedQP, const double lambda,
std::vector<int>& ctuPumpRedQP,
std::vector<int>& ctuPumpRedQP, const bool forceFrameWiseQPA,
const uint32_t ctuStartAddr, const uint32_t ctuBoundingAddr, const bool isHDR = false);
int applyQPAdaptationSubCtu (const Slice* slice, const EncCfg* encCfg, const Area& lumaArea, const bool isHDR = false);
int getCtuPumpingReducingQP (const Slice* slice, const CPelBuf& origY, const Distortion uiSadBestForQPA,
Expand Down
8 changes: 0 additions & 8 deletions source/Lib/EncoderLib/EncLib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,11 +362,6 @@ void EncLib::xSetRCEncCfg( int pass )
// restore MCTF
m_cBckCfg.m_MCTF = mctf;

// configure QPA in the first pass
m_cBckCfg.m_usePerceptQPA = 0; // disable QPA in the first pass
m_cBckCfg.m_sliceChromaQpOffsetPeriodicity = 0;
m_cBckCfg.m_usePerceptQPATempFiltISlice = 0;

std::swap( const_cast<EncCfg&>(m_cEncCfg), m_cBckCfg );
}
}
Expand Down Expand Up @@ -603,9 +598,6 @@ void EncLib::xInitPicture( Picture& pic, int picNum, const PPS& pps, const SPS&
pic.vps = &vps;
pic.dci = &dci;

pic.createTempBuffers( pic.cs->pps->pcv->maxCUSize );
pic.cs->createCoeffs();

// filter data initialization
const uint32_t numberOfCtusInFrame = pic.cs->pcv->sizeInCtus;

Expand Down
Loading

0 comments on commit 77af893

Please sign in to comment.