Skip to content

Commit

Permalink
Reduce memory usage
Browse files Browse the repository at this point in the history
  • Loading branch information
Андрей Евстюхин committed May 8, 2020
1 parent e9be191 commit 5d8f084
Show file tree
Hide file tree
Showing 15 changed files with 778 additions and 462 deletions.
26 changes: 23 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Recompressing "BC7Ltest.png" (gained from https://code.google.com/archive/p/nvid
Bc7Compress.exe /slow /nomask /noflip BC7Ltest.png output.ktx /debug output.png
Loaded BC7Ltest.png
Image 152x152, Texture 152x152
Compressed 1444 blocks, elapsed 39 ms, throughput 0.592 Mpx/s
Compressed 1444 blocks, elapsed 36 ms, throughput 0.641 Mpx/s
SubTexture A MSE = 0.0, PSNR = 73.986163, SSIM_4x4 = 0.99999923
SubTexture RGB wMSE = 0.0, wPSNR = 62.172358, wSSIM_4x4 = 0.99999238
Saved output.ktx
Expand All @@ -32,14 +32,34 @@ Compressing "frymire.png" (gained from https://github.com/castano/nvidia-texture
Bc7Compress.exe /nomask /noflip frymire.png frymire.ktx
Loaded frymire.png
Image 1118x1105, Texture 1120x1108
Compressed 77560 blocks, elapsed 512 ms, throughput 2.423 Mpx/s
Compressed 77560 blocks, elapsed 498 ms, throughput 2.491 Mpx/s
Exactly A
SubTexture RGB wMSE = 0.2, wPSNR = 55.181449, wSSIM_4x4 = 0.99980677
Saved frymire.ktx

Compressing "frymire.png" in development mode:

Bc7Compress.exe /draft /nomask /noflip frymire.png frymire.ktx
Loaded frymire.png
Image 1118x1105, Texture 1120x1108
Compressed 77560 blocks, elapsed 188 ms, throughput 6.600 Mpx/s
Exactly A
SubTexture RGB wMSE = 0.4, wPSNR = 52.056761, wSSIM_4x4 = 0.99952034
Saved frymire.ktx

Compressing "8192.png" (gained from https://bitbucket.org/wolfpld/etcpak/downloads/8192.png) in development mode:

Bc7Compress.exe /draft /nomask /noflip 8192.png 8192.ktx
Loaded 8192.png
Image 8192x8192, Texture 8192x8192
Compressed 4194304 blocks, elapsed 16770 ms, throughput 4.001 Mpx/s
Exactly A
SubTexture RGB wMSE = 0.4, wPSNR = 52.364416, wSSIM_4x4 = 0.99625929
Saved 8192.ktx

## Copyright

Copyright (c) 2019 Andrew Evstyukhin
Copyright (c) 2019-2020 Andrew Evstyukhin

Licensed under the MIT License.

Expand Down
61 changes: 59 additions & 2 deletions src/Bc7Core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "Metrics.h"

#if defined(OPTION_COUNTERS)
#include "SnippetLevelsMinimum.h"
#include "SnippetLevelsBuffer.h"
#endif

Expand Down Expand Up @@ -388,10 +389,10 @@ NOTINLINED Node* radix_sort(Node* input, Node* work, size_t N) noexcept
Node* A = input;
Node* B = work;

int any_error = 0;
uint32_t any_error = 0;
for (size_t i = 0; i < N; i++)
{
any_error |= A[i].Error;
any_error |= (uint32_t)A[i].Error;
}

for (size_t shift = 0; shift < 32; shift += radix)
Expand Down Expand Up @@ -431,6 +432,60 @@ NOTINLINED Node* radix_sort(Node* input, Node* work, size_t N) noexcept
return A;
}

NOTINLINED NodeShort* radix_sort(NodeShort* input, NodeShort* work, size_t N) noexcept
{
constexpr size_t radix = 6;
constexpr size_t bucketCount = 1 << radix;
constexpr size_t bucketMask = bucketCount - 1;

alignas(32) uint32_t counts[bucketCount];

NodeShort* A = input;
NodeShort* B = work;

uint32_t any_error = 0;
for (size_t i = 0; i < N; i++)
{
any_error |= A[i].ColorError;
}

for (size_t shift = 16; shift < 32; shift += radix)
{
if (((any_error >> shift) & bucketMask) == 0)
continue;

for (size_t i = 0; i < bucketCount; i++)
{
counts[i] = 0;
}

for (size_t i = 0; i < N; i++)
{
size_t value = A[i].ColorError;
counts[(value >> shift) & bucketMask]++;
}

uint32_t total = 0;
for (size_t i = 0; i < bucketCount; i++)
{
uint32_t oldCount = counts[i];
counts[i] = total;
total += oldCount;
}

for (size_t i = 0; i < N; i++)
{
NodeShort val = A[i];
uint32_t p = counts[((size_t)val.ColorError >> shift) & bucketMask]++;
B[p] = val;
}

NodeShort* C = A; A = B; B = C;
}

return A;
}

NOTINLINED int ComputeSubsetTable(const Area& area, const __m128i mweights, const __m128i mfix, Modulations& state, const int M) noexcept
{
int good = (1 << M) - 1;
Expand Down Expand Up @@ -1050,6 +1105,8 @@ void CompressStatistics()
Mode5::PrintCounters();
Mode7::PrintCounters();

PRINTF("[Minimum]\tFull = %i, Short = %i",
gMinimumFull.load(), gMinimumShort.load());
PRINTF("[Estimate]\tFull = %i, Short = %i",
gEstimateFull.load(), gEstimateShort.load());

Expand Down
11 changes: 11 additions & 0 deletions src/Bc7Core.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,16 @@ struct alignas(8) Node
}
};

struct NodeShort
{
uint32_t ColorError;

INLINED void Init(int64_t error, int color) noexcept
{
ColorError = (static_cast<uint32_t>(error) << 16) | static_cast<uint32_t>(color);
}
};

// A,G,R,B
struct alignas(64) Modulations
{
Expand All @@ -116,6 +126,7 @@ void AreaReduceTable3(const Area& area, __m128i& mc, uint64_t& indices) noexcept
void AreaReduceTable4(__m128i& mc, uint64_t& indices) noexcept;

NOTINLINED Node* radix_sort(Node* input, Node* work, size_t N) noexcept;
NOTINLINED NodeShort* radix_sort(NodeShort* input, NodeShort* work, size_t N) noexcept;

NOTINLINED int ComputeSubsetTable(const Area& area, const __m128i mweights, const __m128i mfix, Modulations& state, const int M) noexcept;

Expand Down
20 changes: 11 additions & 9 deletions src/Bc7CoreMode0.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

namespace Mode0 {

constexpr int LevelsCapacity = 48;

#if defined(OPTION_COUNTERS)
static std::atomic_int gComputeSubsetError3, gComputeSubsetError3GR, gComputeSubsetError3GB;
#endif
Expand Down Expand Up @@ -235,24 +237,24 @@ namespace Mode0 {
class Subset final
{
public:
LevelsBuffer<48> ch1, ch2, ch3;
LevelsBuffer<LevelsCapacity> ch1, ch2, ch3;

INLINED Subset() noexcept = default;

template<int pbits>
INLINED bool InitLevels(const Area& area, const int water, const Estimation& estimation) noexcept
{
ch1.ComputeChannelLevelsReduced<4, pbits, false, gTableLevels3_Value5_U16>(area, 1, kGreen, water - estimation.ch2 - estimation.ch3);
ch1.ComputeChannelLevelsReduced<4, pbits, false, gTableDeltas3_Value5>(area, 1, kGreen, water - estimation.ch2 - estimation.ch3);
int min1 = ch1.MinErr;
if (min1 >= water)
return false;

ch2.ComputeChannelLevelsReduced<4, pbits, false, gTableLevels3_Value5_U16>(area, 2, kRed, water - min1 - estimation.ch3);
ch2.ComputeChannelLevelsReduced<4, pbits, false, gTableDeltas3_Value5>(area, 2, kRed, water - min1 - estimation.ch3);
int min2 = ch2.MinErr;
if (min1 + min2 >= water)
return false;

ch3.ComputeChannelLevelsReduced<4, pbits, false, gTableLevels3_Value5_U16>(area, 3, kBlue, water - min1 - min2);
ch3.ComputeChannelLevelsReduced<4, pbits, false, gTableDeltas3_Value5>(area, 3, kBlue, water - min1 - min2);
int min3 = ch3.MinErr;
if (min1 + min2 + min3 >= water)
return false;
Expand All @@ -272,7 +274,7 @@ namespace Mode0 {
int n2 = ch2.Count;
int n3 = ch3.Count;

int memGB[48];
int memGB[LevelsCapacity];

for (int i1 = 0; i1 < n1; i1++)
{
Expand Down Expand Up @@ -498,19 +500,19 @@ namespace Mode0 {
int error = 0;
if (error < water)
{
int level1 = LevelsMinimum::EstimateChannelLevelsReduced<5, false, gTableLevels3_Value5_U16, gTableCutLevels3_Value5_U16>(area, 1, kGreen, water - error);
int level1 = LevelsMinimum::EstimateChannelLevelsReduced<5, false, gTableDeltas3_Value5, gTableCuts3_Value5>(area, 1, kGreen, water - error);
estimation.ch1 = level1;
error += level1;

if (error < water)
{
int level2 = LevelsMinimum::EstimateChannelLevelsReduced<5, false, gTableLevels3_Value5_U16, gTableCutLevels3_Value5_U16>(area, 2, kRed, water - error);
int level2 = LevelsMinimum::EstimateChannelLevelsReduced<5, false, gTableDeltas3_Value5, gTableCuts3_Value5>(area, 2, kRed, water - error);
estimation.ch2 = level2;
error += level2;

if (error < water)
{
int level3 = LevelsMinimum::EstimateChannelLevelsReduced<5, false, gTableLevels3_Value5_U16, gTableCutLevels3_Value5_U16>(area, 3, kBlue, water - error);
int level3 = LevelsMinimum::EstimateChannelLevelsReduced<5, false, gTableDeltas3_Value5, gTableCuts3_Value5>(area, 3, kBlue, water - error);
estimation.ch3 = level3;
error += level3;

Expand All @@ -527,7 +529,7 @@ namespace Mode0 {

void CompressBlockFull(Cell& input) noexcept
{
Node order[16];
Node order[16]{};
Estimation estimations1[16];
Estimation estimations2[16];
Estimation estimations3[16];
Expand Down
20 changes: 11 additions & 9 deletions src/Bc7CoreMode1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

namespace Mode1 {

constexpr int LevelsCapacity = 48;

#if defined(OPTION_COUNTERS)
static std::atomic_int gComputeSubsetError3, gComputeSubsetError3GR, gComputeSubsetError3GB;
#endif
Expand Down Expand Up @@ -212,24 +214,24 @@ namespace Mode1 {
class Subset final
{
public:
LevelsBuffer<48> ch1, ch2, ch3;
LevelsBuffer<LevelsCapacity> ch1, ch2, ch3;

INLINED Subset() noexcept = default;

template<int pbits>
INLINED bool InitLevels(const Area& area, const int water, const Estimation& estimation) noexcept
{
ch1.ComputeChannelLevelsReduced<6, pbits, false, gTableLevels3_Value7Shared_U16>(area, 1, kGreen, water - estimation.ch2 - estimation.ch3);
ch1.ComputeChannelLevelsReduced<6, pbits, false, gTableDeltas3_Value7Shared>(area, 1, kGreen, water - estimation.ch2 - estimation.ch3);
int min1 = ch1.MinErr;
if (min1 >= water)
return false;

ch2.ComputeChannelLevelsReduced<6, pbits, false, gTableLevels3_Value7Shared_U16>(area, 2, kRed, water - min1 - estimation.ch3);
ch2.ComputeChannelLevelsReduced<6, pbits, false, gTableDeltas3_Value7Shared>(area, 2, kRed, water - min1 - estimation.ch3);
int min2 = ch2.MinErr;
if (min1 + min2 >= water)
return false;

ch3.ComputeChannelLevelsReduced<6, pbits, false, gTableLevels3_Value7Shared_U16>(area, 3, kBlue, water - min1 - min2);
ch3.ComputeChannelLevelsReduced<6, pbits, false, gTableDeltas3_Value7Shared>(area, 3, kBlue, water - min1 - min2);
int min3 = ch3.MinErr;
if (min1 + min2 + min3 >= water)
return false;
Expand All @@ -249,7 +251,7 @@ namespace Mode1 {
int n2 = ch2.Count;
int n3 = ch3.Count;

int memGB[48];
int memGB[LevelsCapacity];

for (int i1 = 0; i1 < n1; i1++)
{
Expand Down Expand Up @@ -438,19 +440,19 @@ namespace Mode1 {
int error = 0;
if (error < water)
{
int level1 = LevelsMinimum::EstimateChannelLevelsReduced<7, false, gTableLevels3_Value7Shared_U16, gTableCutLevels3_Value7Shared_U16>(area, 1, kGreen, water - error);
int level1 = LevelsMinimum::EstimateChannelLevelsReduced<7, false, gTableDeltas3_Value7Shared, gTableCuts3_Value7Shared>(area, 1, kGreen, water - error);
estimation.ch1 = level1;
error += level1;

if (error < water)
{
int level2 = LevelsMinimum::EstimateChannelLevelsReduced<7, false, gTableLevels3_Value7Shared_U16, gTableCutLevels3_Value7Shared_U16>(area, 2, kRed, water - error);
int level2 = LevelsMinimum::EstimateChannelLevelsReduced<7, false, gTableDeltas3_Value7Shared, gTableCuts3_Value7Shared>(area, 2, kRed, water - error);
estimation.ch2 = level2;
error += level2;

if (error < water)
{
int level3 = LevelsMinimum::EstimateChannelLevelsReduced<7, false, gTableLevels3_Value7Shared_U16, gTableCutLevels3_Value7Shared_U16>(area, 3, kBlue, water - error);
int level3 = LevelsMinimum::EstimateChannelLevelsReduced<7, false, gTableDeltas3_Value7Shared, gTableCuts3_Value7Shared>(area, 3, kBlue, water - error);
estimation.ch3 = level3;
error += level3;

Expand All @@ -467,7 +469,7 @@ namespace Mode1 {

void CompressBlockFull(Cell& input) noexcept
{
Node order[64];
Node order[64]{};
int lines1[64];
int lines2[64];

Expand Down
20 changes: 11 additions & 9 deletions src/Bc7CoreMode2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

namespace Mode2 {

constexpr int LevelsCapacity = 48;

#if defined(OPTION_COUNTERS)
static std::atomic_int gComputeSubsetError2, gComputeSubsetError2GR, gComputeSubsetError2GB;
#endif
Expand Down Expand Up @@ -211,23 +213,23 @@ namespace Mode2 {
class Subset final
{
public:
LevelsBuffer<48> ch1, ch2, ch3;
LevelsBuffer<LevelsCapacity> ch1, ch2, ch3;

INLINED Subset() noexcept = default;

INLINED bool InitLevels(const Area& area, const int water, const Estimation& estimation) noexcept
{
ch1.ComputeChannelLevelsReduced<5, -1, false, gTableLevels2_Value5_U16>(area, 1, kGreen, water - estimation.ch2 - estimation.ch3);
ch1.ComputeChannelLevelsReduced<5, -1, false, gTableDeltas2_Value5>(area, 1, kGreen, water - estimation.ch2 - estimation.ch3);
int min1 = ch1.MinErr;
if (min1 >= water)
return false;

ch2.ComputeChannelLevelsReduced<5, -1, false, gTableLevels2_Value5_U16>(area, 2, kRed, water - min1 - estimation.ch3);
ch2.ComputeChannelLevelsReduced<5, -1, false, gTableDeltas2_Value5>(area, 2, kRed, water - min1 - estimation.ch3);
int min2 = ch2.MinErr;
if (min1 + min2 >= water)
return false;

ch3.ComputeChannelLevelsReduced<5, -1, false, gTableLevels2_Value5_U16>(area, 3, kBlue, water - min1 - min2);
ch3.ComputeChannelLevelsReduced<5, -1, false, gTableDeltas2_Value5>(area, 3, kBlue, water - min1 - min2);
int min3 = ch3.MinErr;
if (min1 + min2 + min3 >= water)
return false;
Expand All @@ -247,7 +249,7 @@ namespace Mode2 {
int n2 = ch2.Count;
int n3 = ch3.Count;

int memGB[48];
int memGB[LevelsCapacity];

for (int i1 = 0; i1 < n1; i1++)
{
Expand Down Expand Up @@ -422,19 +424,19 @@ namespace Mode2 {
int error = 0;
if (error < water)
{
int level1 = LevelsMinimum::EstimateChannelLevelsReduced<5, false, gTableLevels2_Value5_U16, gTableCutLevels2_Value5_U16>(area, 1, kGreen, water - error);
int level1 = LevelsMinimum::EstimateChannelLevelsReduced<5, false, gTableDeltas2_Value5, gTableCuts2_Value5>(area, 1, kGreen, water - error);
estimation.ch1 = level1;
error += level1;

if (error < water)
{
int level2 = LevelsMinimum::EstimateChannelLevelsReduced<5, false, gTableLevels2_Value5_U16, gTableCutLevels2_Value5_U16>(area, 2, kRed, water - error);
int level2 = LevelsMinimum::EstimateChannelLevelsReduced<5, false, gTableDeltas2_Value5, gTableCuts2_Value5>(area, 2, kRed, water - error);
estimation.ch2 = level2;
error += level2;

if (error < water)
{
int level3 = LevelsMinimum::EstimateChannelLevelsReduced<5, false, gTableLevels2_Value5_U16, gTableCutLevels2_Value5_U16>(area, 3, kBlue, water - error);
int level3 = LevelsMinimum::EstimateChannelLevelsReduced<5, false, gTableDeltas2_Value5, gTableCuts2_Value5>(area, 3, kBlue, water - error);
estimation.ch3 = level3;
error += level3;

Expand All @@ -451,7 +453,7 @@ namespace Mode2 {

void CompressBlockFull(Cell& input) noexcept
{
Node order[64];
Node order[64]{};
Estimation estimations1[64];
Estimation estimations2[64];
Estimation estimations3[64];
Expand Down
Loading

0 comments on commit 5d8f084

Please sign in to comment.