Skip to content

Commit

Permalink
Use narrow colors in hot loops
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrew Evstyukhin committed Nov 18, 2022
1 parent 9078172 commit fbb8124
Show file tree
Hide file tree
Showing 11 changed files with 49 additions and 108 deletions.
13 changes: 5 additions & 8 deletions src/Bc7CoreMode0.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ namespace Mode0 {
#if defined(OPTION_COUNTERS)
gComputeSubsetError3++;
#endif
return ComputeSubsetError3(area, mc, gWeightsGRB, _mm_cvtsi32_si128(water));
return ComputeSubsetError3(area, _mm_packus_epi16(mc, mc), gWeightsGRB, _mm_cvtsi32_si128(water));
}

void CompressBlockFast(Cell& input) noexcept
Expand Down Expand Up @@ -306,7 +306,6 @@ namespace Mode0 {
__m128i mc = _mm_setzero_si128();
mc = _mm_insert_epi16(mc, c1, 1);
mc = _mm_insert_epi16(mc, c2, 2);
mc = _mm_cvtepu8_epi16(mc);

#if defined(OPTION_COUNTERS)
gComputeSubsetError3GR++;
Expand All @@ -330,7 +329,6 @@ namespace Mode0 {
__m128i mc = _mm_setzero_si128();
mc = _mm_insert_epi16(mc, c1, 1);
mc = _mm_insert_epi16(mc, c3, 3);
mc = _mm_cvtepu8_epi16(mc);

#if defined(OPTION_COUNTERS)
gComputeSubsetError3GB++;
Expand All @@ -345,7 +343,6 @@ namespace Mode0 {
mc = _mm_insert_epi16(mc, c1, 1);
mc = _mm_insert_epi16(mc, c2, 2);
mc = _mm_insert_epi16(mc, c3, 3);
mc = _mm_cvtepu8_epi16(mc);

#if defined(OPTION_COUNTERS)
gComputeSubsetError3++;
Expand All @@ -356,7 +353,7 @@ namespace Mode0 {
{
water = err;

best_color = mc;
best_color = _mm_cvtepu8_epi16(mc);
}
}
}
Expand Down Expand Up @@ -435,7 +432,7 @@ namespace Mode0 {
#if defined(OPTION_COUNTERS)
gComputeSubsetError3++;
#endif
int water1 = ComputeSubsetError3(area1, mc0, gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
int water1 = ComputeSubsetError3(area1, _mm_packus_epi16(mc0, mc0), gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
if (water1)
{
Subsets subsets1;
Expand All @@ -454,7 +451,7 @@ namespace Mode0 {
#if defined(OPTION_COUNTERS)
gComputeSubsetError3++;
#endif
int water2 = ComputeSubsetError3(area2, mc1, gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
int water2 = ComputeSubsetError3(area2, _mm_packus_epi16(mc1, mc1), gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
if (water2)
{
Subsets subsets2;
Expand All @@ -473,7 +470,7 @@ namespace Mode0 {
#if defined(OPTION_COUNTERS)
gComputeSubsetError3++;
#endif
int water3 = ComputeSubsetError3(area3, mc2, gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
int water3 = ComputeSubsetError3(area3, _mm_packus_epi16(mc2, mc2), gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
if (water3)
{
Subsets subsets3;
Expand Down
16 changes: 7 additions & 9 deletions src/Bc7CoreMode1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,23 +150,24 @@ namespace Mode1 {
#if defined(OPTION_COUNTERS)
gComputeSubsetError3++;
#endif
int error0 = ComputeSubsetError3(area, mc, gWeightsGRB, _mm_cvtsi32_si128(water));
int error0 = ComputeSubsetError3(area, _mm_packus_epi16(mc, mc), gWeightsGRB, _mm_cvtsi32_si128(water));
if (water > error0)
{
water = error0;
}

const __m128i mp = _mm_set1_epi16(2);
__m128i mcp = _mm_or_si128(mc, mp);

#if defined(OPTION_COUNTERS)
gComputeSubsetError3++;
#endif
int error1 = ComputeSubsetError3(area, _mm_or_si128(mc, mp), gWeightsGRB, _mm_cvtsi32_si128(water));
int error1 = ComputeSubsetError3(area, _mm_packus_epi16(mcp, mcp), gWeightsGRB, _mm_cvtsi32_si128(water));
if (water > error1)
{
water = error1;

mc = _mm_or_si128(mc, mp);
mc = mcp;
}

return water;
Expand Down Expand Up @@ -283,7 +284,6 @@ namespace Mode1 {
__m128i mc = _mm_setzero_si128();
mc = _mm_insert_epi16(mc, c1, 1);
mc = _mm_insert_epi16(mc, c2, 2);
mc = _mm_cvtepu8_epi16(mc);

#if defined(OPTION_COUNTERS)
gComputeSubsetError3GR++;
Expand All @@ -307,7 +307,6 @@ namespace Mode1 {
__m128i mc = _mm_setzero_si128();
mc = _mm_insert_epi16(mc, c1, 1);
mc = _mm_insert_epi16(mc, c3, 3);
mc = _mm_cvtepu8_epi16(mc);

#if defined(OPTION_COUNTERS)
gComputeSubsetError3GB++;
Expand All @@ -322,7 +321,6 @@ namespace Mode1 {
mc = _mm_insert_epi16(mc, c1, 1);
mc = _mm_insert_epi16(mc, c2, 2);
mc = _mm_insert_epi16(mc, c3, 3);
mc = _mm_cvtepu8_epi16(mc);

#if defined(OPTION_COUNTERS)
gComputeSubsetError3++;
Expand All @@ -333,7 +331,7 @@ namespace Mode1 {
{
water = err;

best_color = mc;
best_color = _mm_cvtepu8_epi16(mc);
}
}
}
Expand Down Expand Up @@ -395,7 +393,7 @@ namespace Mode1 {
#if defined(OPTION_COUNTERS)
gComputeSubsetError3++;
#endif
int water1 = ComputeSubsetError3(area1, mc0, gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
int water1 = ComputeSubsetError3(area1, _mm_packus_epi16(mc0, mc0), gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
if (water1)
{
Subsets subsets1;
Expand All @@ -414,7 +412,7 @@ namespace Mode1 {
#if defined(OPTION_COUNTERS)
gComputeSubsetError3++;
#endif
int water2 = ComputeSubsetError3(area2, mc1, gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
int water2 = ComputeSubsetError3(area2, _mm_packus_epi16(mc1, mc1), gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
if (water2)
{
Subsets subsets2;
Expand Down
13 changes: 5 additions & 8 deletions src/Bc7CoreMode2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ namespace Mode2 {
#if defined(OPTION_COUNTERS)
gComputeSubsetError2++;
#endif
return ComputeSubsetError2(area, mc, gWeightsGRB, _mm_cvtsi32_si128(water));
return ComputeSubsetError2(area, _mm_packus_epi16(mc, mc), gWeightsGRB, _mm_cvtsi32_si128(water));
}

void CompressBlockFast(Cell& input) noexcept
Expand Down Expand Up @@ -281,7 +281,6 @@ namespace Mode2 {
__m128i mc = _mm_setzero_si128();
mc = _mm_insert_epi16(mc, c1, 1);
mc = _mm_insert_epi16(mc, c2, 2);
mc = _mm_cvtepu8_epi16(mc);

#if defined(OPTION_COUNTERS)
gComputeSubsetError2GR++;
Expand All @@ -305,7 +304,6 @@ namespace Mode2 {
__m128i mc = _mm_setzero_si128();
mc = _mm_insert_epi16(mc, c1, 1);
mc = _mm_insert_epi16(mc, c3, 3);
mc = _mm_cvtepu8_epi16(mc);

#if defined(OPTION_COUNTERS)
gComputeSubsetError2GB++;
Expand All @@ -320,7 +318,6 @@ namespace Mode2 {
mc = _mm_insert_epi16(mc, c1, 1);
mc = _mm_insert_epi16(mc, c2, 2);
mc = _mm_insert_epi16(mc, c3, 3);
mc = _mm_cvtepu8_epi16(mc);

#if defined(OPTION_COUNTERS)
gComputeSubsetError2++;
Expand All @@ -331,7 +328,7 @@ namespace Mode2 {
{
water = err;

best_color = mc;
best_color = _mm_cvtepu8_epi16(mc);
}
}
}
Expand Down Expand Up @@ -359,7 +356,7 @@ namespace Mode2 {
#if defined(OPTION_COUNTERS)
gComputeSubsetError2++;
#endif
int water1 = ComputeSubsetError2(area1, mc0, gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
int water1 = ComputeSubsetError2(area1, _mm_packus_epi16(mc0, mc0), gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
if (water1)
{
Subset subset1;
Expand All @@ -378,7 +375,7 @@ namespace Mode2 {
#if defined(OPTION_COUNTERS)
gComputeSubsetError2++;
#endif
int water2 = ComputeSubsetError2(area2, mc1, gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
int water2 = ComputeSubsetError2(area2, _mm_packus_epi16(mc1, mc1), gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
if (water2)
{
Subset subset2;
Expand All @@ -397,7 +394,7 @@ namespace Mode2 {
#if defined(OPTION_COUNTERS)
gComputeSubsetError2++;
#endif
int water3 = ComputeSubsetError2(area3, mc2, gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
int water3 = ComputeSubsetError2(area3, _mm_packus_epi16(mc2, mc2), gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
if (water3)
{
Subset subset3;
Expand Down
11 changes: 4 additions & 7 deletions src/Bc7CoreMode3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ namespace Mode3 {
#if defined(OPTION_COUNTERS)
gComputeSubsetError2++;
#endif
return ComputeSubsetError2(area, mc, gWeightsGRB, _mm_cvtsi32_si128(water));
return ComputeSubsetError2(area, _mm_packus_epi16(mc, mc), gWeightsGRB, _mm_cvtsi32_si128(water));
}

void CompressBlockFast(Cell& input) noexcept
Expand Down Expand Up @@ -266,7 +266,6 @@ namespace Mode3 {
__m128i mc = _mm_setzero_si128();
mc = _mm_insert_epi16(mc, c1, 1);
mc = _mm_insert_epi16(mc, c2, 2);
mc = _mm_cvtepu8_epi16(mc);

#if defined(OPTION_COUNTERS)
gComputeSubsetError2GR++;
Expand All @@ -290,7 +289,6 @@ namespace Mode3 {
__m128i mc = _mm_setzero_si128();
mc = _mm_insert_epi16(mc, c1, 1);
mc = _mm_insert_epi16(mc, c3, 3);
mc = _mm_cvtepu8_epi16(mc);

#if defined(OPTION_COUNTERS)
gComputeSubsetError2GB++;
Expand All @@ -305,7 +303,6 @@ namespace Mode3 {
mc = _mm_insert_epi16(mc, c1, 1);
mc = _mm_insert_epi16(mc, c2, 2);
mc = _mm_insert_epi16(mc, c3, 3);
mc = _mm_cvtepu8_epi16(mc);

#if defined(OPTION_COUNTERS)
gComputeSubsetError2++;
Expand All @@ -316,7 +313,7 @@ namespace Mode3 {
{
water = err;

best_color = mc;
best_color = _mm_cvtepu8_epi16(mc);
}
}
}
Expand Down Expand Up @@ -394,7 +391,7 @@ namespace Mode3 {
#if defined(OPTION_COUNTERS)
gComputeSubsetError2++;
#endif
int water1 = ComputeSubsetError2(area1, mc0, gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
int water1 = ComputeSubsetError2(area1, _mm_packus_epi16(mc0, mc0), gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
if (water1)
{
Subsets subsets1;
Expand All @@ -413,7 +410,7 @@ namespace Mode3 {
#if defined(OPTION_COUNTERS)
gComputeSubsetError2++;
#endif
int water2 = ComputeSubsetError2(area2, mc1, gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
int water2 = ComputeSubsetError2(area2, _mm_packus_epi16(mc1, mc1), gWeightsGRB, _mm_cvtsi32_si128(kBlockMaximalColorError));
if (water2)
{
Subsets subsets2;
Expand Down
Loading

0 comments on commit fbb8124

Please sign in to comment.