Skip to content

Commit

Permalink
Values expanded to 14-bit (to accumulated worse-case error)
Browse files Browse the repository at this point in the history
As 16-bit we couldn't accumulate the worst-case error without overflowing. Also fixed a bug whereby the values6 were truncated to 8-bit, therefore mostly favouring values8. The return from encode_bc4_hq() is now scaled to the same range from before the changes.
  • Loading branch information
cwoffenden committed Sep 5, 2022
1 parent 457c74f commit c07f344
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 29 deletions.
16 changes: 8 additions & 8 deletions rgbcx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2830,9 +2830,10 @@ namespace rgbcx
else if (!trial_block.is_alpha6_block())
std::swap(trial_block.m_endpoints[0], trial_block.m_endpoints[1]);

// note: block vals are expanded to 16-bit, as is the error
uint16_t block_vals16[8];
trial_block.get_block_values(block_vals16, trial_block.m_endpoints[0], trial_block.m_endpoints[1]);
// note: block vals are expanded to 8:6 fixed point, as is the error,
// with 8:6 able to accumulate 16x the worse-case error (255.98 ^ 2)
uint16_t block_vals14[8];
trial_block.get_block_values(block_vals14, trial_block.m_endpoints[0], trial_block.m_endpoints[1]);

uint32_t trial_err = 0;
uint8_t trial_sels[16];
Expand All @@ -2842,8 +2843,7 @@ namespace rgbcx
memcpy(trial_sels, pForce_selectors, 16);

for (uint32_t i = 0; i < 16; i++) {
int val = pPixels[i * stride];
trial_err += squarei(block_vals16[pForce_selectors[i]] - ((val << 8) | val));
trial_err += squarei(block_vals14[pForce_selectors[i]] - bc4_block::expand8to14(pPixels[i * stride]));
}
}
else
Expand All @@ -2854,8 +2854,7 @@ namespace rgbcx
uint32_t best_index = 0;
for (uint32_t j = 0; j < 8; j++)
{
int val = pPixels[i * stride];
uint32_t err = squarei(block_vals16[j] - ((val << 8) | val));
uint32_t err = squarei(block_vals14[j] - bc4_block::expand8to14(pPixels[i * stride]));
if (err < best_index_err)
{
best_index_err = err;
Expand Down Expand Up @@ -2902,7 +2901,8 @@ namespace rgbcx
} // mode
error_reached_zero:

return best_err >> 8;
// scale the error back to 8-bit from 8:6 fixed point (to match what was previously returned)
return (best_err + 63) >> 12;
}

void encode_bc3(void* pDst, const uint8_t* pPixels, uint32_t flags, uint32_t total_orderings_to_try)
Expand Down
47 changes: 26 additions & 21 deletions rgbcx.h
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,11 @@ namespace rgbcx
return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1);
}

// Expands an 8-bit value to 14-bit
static inline uint32_t expand8to14(uint32_t val) {
return (val << 6) | (val >> 2);
}

// Interpolated values as 8-bit (as per BC3 alpha)
static inline uint32_t get_block_values6(uint8_t* pDst, uint32_t l, uint32_t h)
{
Expand All @@ -402,19 +407,19 @@ namespace rgbcx
return 6;
}

// Interpolated values expanded to 16-bit (as per BC4/5)
// Interpolated values expanded to 14-bit (for BC4/5)
static inline uint32_t get_block_values6(uint16_t* pDst, uint32_t l, uint32_t h)
{
uint32_t l16 = (l << 8) | l;
uint32_t h16 = (h << 8) | h;
pDst[0] = static_cast<uint8_t>(l16);
pDst[1] = static_cast<uint8_t>(h16);
pDst[2] = static_cast<uint8_t>((l16 * 4 + h16 ) / 5);
pDst[3] = static_cast<uint8_t>((l16 * 3 + h16 * 2) / 5);
pDst[4] = static_cast<uint8_t>((l16 * 2 + h16 * 3) / 5);
pDst[5] = static_cast<uint8_t>((l16 + h16 * 4) / 5);
uint32_t l14 = expand8to14(l);
uint32_t h14 = expand8to14(h);
pDst[0] = static_cast<uint16_t>(l14);
pDst[1] = static_cast<uint16_t>(h14);
pDst[2] = static_cast<uint16_t>((l14 * 4 + h14 ) / 5);
pDst[3] = static_cast<uint16_t>((l14 * 3 + h14 * 2) / 5);
pDst[4] = static_cast<uint16_t>((l14 * 2 + h14 * 3) / 5);
pDst[5] = static_cast<uint16_t>((l14 + h14 * 4) / 5);
pDst[6] = 0;
pDst[7] = 65535;
pDst[7] = static_cast<uint16_t>(expand8to14(255));
return 6;
}

Expand All @@ -432,19 +437,19 @@ namespace rgbcx
return 8;
}

// Interpolated values expanded to 16-bit (as per BC4/5)
// Interpolated values expanded to 14-bit (for BC4/5)
static inline uint32_t get_block_values8(uint16_t* pDst, uint32_t l, uint32_t h)
{
uint32_t l16 = (l << 8) | l;
uint32_t h16 = (h << 8) | h;
pDst[0] = static_cast<uint16_t>(l16);
pDst[1] = static_cast<uint16_t>(h16);
pDst[2] = static_cast<uint16_t>((l16 * 6 + h16 ) / 7);
pDst[3] = static_cast<uint16_t>((l16 * 5 + h16 * 2) / 7);
pDst[4] = static_cast<uint16_t>((l16 * 4 + h16 * 3) / 7);
pDst[5] = static_cast<uint16_t>((l16 * 3 + h16 * 4) / 7);
pDst[6] = static_cast<uint16_t>((l16 * 2 + h16 * 5) / 7);
pDst[7] = static_cast<uint16_t>((l16 + h16 * 6) / 7);
uint32_t l14 = expand8to14(l);
uint32_t h14 = expand8to14(h);
pDst[0] = static_cast<uint16_t>(l14);
pDst[1] = static_cast<uint16_t>(h14);
pDst[2] = static_cast<uint16_t>((l14 * 6 + h14 ) / 7);
pDst[3] = static_cast<uint16_t>((l14 * 5 + h14 * 2) / 7);
pDst[4] = static_cast<uint16_t>((l14 * 4 + h14 * 3) / 7);
pDst[5] = static_cast<uint16_t>((l14 * 3 + h14 * 4) / 7);
pDst[6] = static_cast<uint16_t>((l14 * 2 + h14 * 5) / 7);
pDst[7] = static_cast<uint16_t>((l14 + h14 * 6) / 7);
return 8;
}

Expand Down

0 comments on commit c07f344

Please sign in to comment.