Skip to content

Commit

Permalink
xBRZ slice calculation from batch size for more even parallel process…
Browse files Browse the repository at this point in the history
…ing.

Fix texture sizes (max PSX texture size is 256*256).
  • Loading branch information
Nucleoprotein committed Feb 5, 2016
1 parent 1ded43e commit 399563a
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 53 deletions.
43 changes: 26 additions & 17 deletions gpuPeteOpenGL2Tweak/GPUPatches.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ void APIENTRY GPUPatches::Hook_glCopyTexSubImage2D(GLenum target, GLint level, G
if (target != GL_TEXTURE_2D)
return oglCopyTexSubImage2D(target, level, xoffset, yoffset, x, y, width, height);

if (width >= MAX_TEXTURE_X || height >= MAX_TEXTURE_Y)
if (width > MAX_TEXTURE_X || height > MAX_TEXTURE_Y)
{
PLUGINLOG("glCopyTexSubImage2D !!!Texture too big !!!");
return oglCopyTexSubImage2D(target, level, xoffset, yoffset, x, y, width, height);
Expand All @@ -295,14 +295,14 @@ void (APIENTRY* GPUPatches::oglTexSubImage2D)(GLenum target, GLint level, GLint
void APIENTRY GPUPatches::Hook_glTexSubImage2D(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels)
{
//PLUGINLOG("glTexSubImage2D 0x%08X %d %d %d %d %d 0x%08X 0x%08X %p", target, level, xoffset, yoffset, width, height, format, type, pixels);
if ((target != GL_TEXTURE_2D || format != GL_RGBA || width >= MAX_TEXTURE_X || height >= MAX_TEXTURE_Y))
if ((target != GL_TEXTURE_2D || format != GL_RGBA || width > MAX_TEXTURE_X || height > MAX_TEXTURE_Y))
{
return oglTexSubImage2D(target, level, xoffset, yoffset, width, height, format, type, pixels);
}

if (format == GL_RGBA)
{
std::vector<u32> textureBuffer = s_pGPUPatches->ScaleTexture((u32*)pixels, width, height);
std::vector<u32>& textureBuffer = s_pGPUPatches->ScaleTexture((u32*)pixels, width, height);
return oglTexSubImage2D(target, level, xoffset * s_pGPUPatches->m_scale, yoffset * s_pGPUPatches->m_scale, width * s_pGPUPatches->m_scale, height * s_pGPUPatches->m_scale, format, type, textureBuffer.data());
}
return oglTexSubImage2D(target, level, xoffset, yoffset, width, height, format, type, pixels);
Expand All @@ -311,18 +311,18 @@ void APIENTRY GPUPatches::Hook_glTexSubImage2D(GLenum target, GLint level, GLint
void (APIENTRY* GPUPatches::oglTexImage2D)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const void *pixels);
void APIENTRY GPUPatches::Hook_glTexImage2D(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const void *pixels)
{
if (target == GL_TEXTURE_2D && format == GL_RGBA && width >= MAX_TEXTURE_X && height >= MAX_TEXTURE_Y)
if (target == GL_TEXTURE_2D && format == GL_RGBA && width > MAX_TEXTURE_X && height > MAX_TEXTURE_Y)
PLUGINLOG("ResHack: Detected render surface, size: %d x %d x 32bpp, %d MiB", width, height, (width * height * 4) / (1024 * 1024));

//PLUGINLOG("glTexImage2D 0x%08X %d %d %d 0x%08X 0x%08X %p", target, level, width, height, format, type, pixels);
if (target != GL_TEXTURE_2D || format != GL_RGBA || width >= MAX_TEXTURE_X || height >= MAX_TEXTURE_Y)
if (target != GL_TEXTURE_2D || format != GL_RGBA || width > MAX_TEXTURE_X || height > MAX_TEXTURE_Y)
{
return oglTexImage2D(target, level, internalformat, width, height, border, format, type, pixels);
}

if (format == GL_RGBA)
{
std::vector<u32> textureBuffer = s_pGPUPatches->ScaleTexture((u32*)pixels, width, height);
std::vector<u32>& textureBuffer = s_pGPUPatches->ScaleTexture((u32*)pixels, width, height);
return oglTexImage2D(target, level, internalformat, width * s_pGPUPatches->m_scale, height * s_pGPUPatches->m_scale, border, format, type, textureBuffer.data());
}
return oglTexImage2D(target, level, internalformat, width, height, border, format, type, pixels);
Expand All @@ -331,21 +331,30 @@ void APIENTRY GPUPatches::Hook_glTexImage2D(GLenum target, GLint level, GLint in
std::vector<u32> GPUPatches::ScaleTexture(const u32* source, u32 srcWidth, u32 srcHeight)
{
u32 texture_hash = XXH32(source, srcWidth * srcHeight * sizeof(u32), 0);
//PLUGINLOG("texture_hash: %llu", texture_hash);

if (m_texture_cache_size > 0 && texture_hash > 0 && !m_TextureCache[texture_hash].empty())
{
//PLUGINLOG("Cache HIT: %lu", texture_hash);
return m_TextureCache[texture_hash];
}

//PLUGINLOG("Cache MISS: %lu", texture_hash);

std::vector<u32> textureBuffer(srcWidth * m_scale * srcHeight * m_scale);

//calculate slice
int slice = (srcWidth * srcHeight) / m_batch_size;
if (slice > (int)(srcHeight / 2)) slice = srcHeight / 2;
if (slice <= 0) slice = 1;

//PLUGINLOG("slice: %lu, srcWidth: %lu, srcHeight: %lu, pixels: %lu", slice, srcWidth, srcHeight, srcWidth * srcHeight);

if ((m_fast_fbe && *locFBE) || m_force_nearest)
{
concurrency::parallel_for(0, (int)srcHeight, (int)m_slice, [&](const int& i)
concurrency::parallel_for(0, (int)srcHeight, slice, [&](const int& i)
{
xbrz::nearestNeighborScale(source, srcWidth, srcHeight, srcWidth * sizeof(u32), textureBuffer.data(),
srcWidth * m_scale, srcHeight * m_scale, srcWidth * m_scale * sizeof(u32), xbrz::SliceType::NN_SCALE_SLICE_SOURCE, i, i + m_slice);
srcWidth * m_scale, srcHeight * m_scale, srcWidth * m_scale * sizeof(u32), xbrz::SliceType::NN_SCALE_SLICE_SOURCE, i, i + slice);
}
);
}
Expand All @@ -357,17 +366,17 @@ std::vector<u32> GPUPatches::ScaleTexture(const u32* source, u32 srcWidth, u32 s
DePosterize(source, deposterizeBuffer.data(), srcWidth, srcHeight);
source = deposterizeBuffer.data();

concurrency::parallel_for(0, (int)srcHeight, (int)m_slice, [&](const int& i)
concurrency::parallel_for(0, (int)srcHeight, slice, [&](const int& i)
{
xbrz::scale(m_scale, source, textureBuffer.data(), srcWidth, srcHeight, xbrz::ColorFormat::ARGB, xbrz::ScalerCfg(), i, i + m_slice);
xbrz::scale(m_scale, source, textureBuffer.data(), srcWidth, srcHeight, xbrz::ColorFormat::ARGB, xbrz::ScalerCfg(), i, i + slice);
}
);
}
else
{
concurrency::parallel_for(0, (int)srcHeight, (int)m_slice, [&](const int& i)
concurrency::parallel_for(0, (int)srcHeight, slice, [&](const int& i)
{
xbrz::scale(m_scale, source, textureBuffer.data(), srcWidth, srcHeight, xbrz::ColorFormat::ARGB, xbrz::ScalerCfg(), i, i + m_slice);
xbrz::scale(m_scale, source, textureBuffer.data(), srcWidth, srcHeight, xbrz::ColorFormat::ARGB, xbrz::ScalerCfg(), i, i + slice);
}
);
}
Expand All @@ -381,7 +390,7 @@ std::vector<u32> GPUPatches::ScaleTexture(const u32* source, u32 srcWidth, u32 s
//PLUGINLOG("cache time size: %lu", m_TextureCacheTimestamp.size());
if (m_TextureCache.size() >= m_texture_cache_size)
{
//PLUGINLOG("removing hash %llu", m_TextureCacheTimestamp.front());
//PLUGINLOG("removing hash %lu", m_TextureCacheTimestamp.front());
m_TextureCache.erase(m_TextureCacheTimestamp.front());
m_TextureCacheTimestamp.erase(m_TextureCacheTimestamp.begin());
//PLUGINLOG("cache size: %lu", m_TextureCache.size());
Expand All @@ -391,7 +400,7 @@ std::vector<u32> GPUPatches::ScaleTexture(const u32* source, u32 srcWidth, u32 s
return std::move(textureBuffer);
}

void GPUPatches::EnableTextureScaler(u32 scale, u32 slice, bool force_nearest, bool fast_fbe, u32 texture_cache_size)
void GPUPatches::EnableTextureScaler(u32 scale, u32 batch_size, bool force_nearest, bool fast_fbe, u32 texture_cache_size)
{
if (scale <= 1)
return;
Expand All @@ -400,12 +409,12 @@ void GPUPatches::EnableTextureScaler(u32 scale, u32 slice, bool force_nearest, b
std::call_once(flag, [&]()
{
m_scale = scale;
m_slice = slice;
m_batch_size = batch_size;
m_force_nearest = force_nearest;
m_fast_fbe = fast_fbe;
m_texture_cache_size = texture_cache_size;

PLUGINLOG("%ux%s Texture Filter, slice %u", scale, m_force_nearest ? " Nearest Neighbour" : "BRZ", slice);
PLUGINLOG("%ux%s Texture Filter, BatchSize: %lu", m_scale, m_force_nearest ? " Nearest Neighbour" : "BRZ", batch_size);

MH_CreateHook(glTexImage2D, Hook_glTexImage2D, reinterpret_cast<void**>(&oglTexImage2D));
MH_EnableHook(glTexImage2D);
Expand Down
8 changes: 4 additions & 4 deletions gpuPeteOpenGL2Tweak/GPUPatches.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#pragma once

#define MAX_TEXTURE_X 512
#define MAX_TEXTURE_Y 512
#define MAX_TEXTURE_X 256
#define MAX_TEXTURE_Y 256

class GPUPatches
{
Expand All @@ -26,12 +26,12 @@ class GPUPatches
void ResHack(u32 _x, u32 _y);
void FixFullscreenAspect();
void ApplyWindowProc(HWND hWnd);
void EnableTextureScaler(u32 scale, u32 slice, bool force_nearest, bool fast_fbe, u32 texture_cache_size);
void EnableTextureScaler(u32 scale, u32 m_batch_size, bool force_nearest, bool fast_fbe, u32 texture_cache_size);

private:
GTEData m_gtedata;
u32 m_scale;
u32 m_slice;
u32 m_batch_size;

bool m_force_nearest;
bool m_fast_fbe;
Expand Down
16 changes: 8 additions & 8 deletions gpuPeteOpenGL2Tweak/Settings.def
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,21 @@ SETTING(LongValue, s32, VSyncInterval, "Options", 0, "#Enables VSync, paramater
SETTING(BoolValue, bool, HideCursor, "Options", false, "#Hides cursor in window");
SETTING(BoolValue, bool, UsePEC, "Options", false, "#Use Playstation Emulation Cheater plugin");
SETTING(BoolValue, bool, HardcoreMode, "Options", false, "#Disables PEC and emulator save states (ie. F1/F2/F3 keys)");
SETTING(BoolValue, bool, FixFullscreenAspect, "Options", false, "#Fixes fullscreen aspect ratio");
SETTING(BoolValue, bool, FixFullscreenAspect, "Options", false, "#Fixes fullscreen aspect ratio, default disabled because it's buggy");

SETTING(LongValue, s32, WindowX, "Window", -1, "#Moves window to X position");
SETTING(LongValue, s32, WindowY, "Window", -1, "#Moves window to Y position");
SETTING(BoolValue, bool, WindowOnTop, "Window", false, "#Set window on-top flag");

SETTING(LongValue, u32, MulX, "ResHack", 0, "#Internal resolution X multiplier, recommended max: 8");
SETTING(LongValue, u32, MulY, "ResHack", 0, "#Internal resolution Y multiplier, recommended max: 12");
SETTING(LongValue, u32, MulX, "ResHack", 8, "#Internal resolution X multiplier, recommended max: 8");
SETTING(LongValue, u32, MulY, "ResHack", 8, "#Internal resolution Y multiplier, recommended max: 12");

SETTING(LongValue, u32, xBRZScale, "xBRZ", 0, "#xBRZ scale, max 6");
SETTING(LongValue, u32, SliceSize, "xBRZ", 8, "#Texture slice for scaler, different slices can provide better or worse perfomance, recommended 8");
SETTING(BoolValue, bool, Deposterize, "xBRZ", false, "#Deposterize");
SETTING(BoolValue, bool, FastFBE, "xBRZ", true, "#Use nearest neighbour scaler for FBE, requires Standard setting for FBE!");
SETTING(LongValue, u32, xBRZScale, "xBRZ", 4, "#xBRZ scale, max 6");
SETTING(LongValue, u32, BatchSize, "xBRZ", 1024, "#Number of texels in batch for xBRZ scaler");
SETTING(BoolValue, bool, Deposterize, "xBRZ", true, "#Deposterize, usefull for games with heavy dithering (Silent Hill), NOTE: single threaded processing");
SETTING(BoolValue, bool, FastFBE, "xBRZ", true, "#Use nearest neighbour scaler for FBE, requires \"2: Standard\" setting for FBE!");
SETTING(BoolValue, bool, ForceNearest, "xBRZ", false, "#Forces nearest neighbour scaler");
SETTING(LongValue, u32, TextureCacheSize, "xBRZ", 128, "#Texture Cache size (in textures count!) for upscaled textures so texture is upscaled only once - fast!");
SETTING(LongValue, u32, TextureCacheSize, "xBRZ", 256, "#Texture Cache size (in textures count!) for upscaled textures so texture is upscaled only once - fast!");

SETTING(Value, std::string, Port1, "PadPlugin", "", "#Port1 pad plugin path");
SETTING(Value, std::string, Port2, "PadPlugin", "", "#Port2 pad plugin path");
2 changes: 1 addition & 1 deletion gpuPeteOpenGL2Tweak/gpuPeteOpenGL2Tweak.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ s32 Context::OnGPUinit()
m_gpupatches.GTEAccuracy();

u32 scale = clamp<u32>(m_config.GetxBRZScale(), 1, 6);
m_gpupatches.EnableTextureScaler(scale, m_config.GetSliceSize(), m_config.GetForceNearest(), m_config.GetFastFBE(), m_config.GetTextureCacheSize());
m_gpupatches.EnableTextureScaler(scale, m_config.GetBatchSize(), m_config.GetForceNearest(), m_config.GetFastFBE(), m_config.GetTextureCacheSize());

if (m_config.GetMulX() > 0 && m_config.GetMulY() > 0)
m_gpupatches.ResHack(m_config.GetMulX(), m_config.GetMulY());
Expand Down
4 changes: 2 additions & 2 deletions gpuPeteOpenGL2Tweak/gte_accuracy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ void GPUaddVertex(s16 sx, s16 sy, s64 fx, s64 fy, s64 fz)
{
gteCoords[sy + 0x800][sx + 0x800].x = fx / (std::numeric_limits<u16>::max() * 1.0f);
gteCoords[sy + 0x800][sx + 0x800].y = fy / (std::numeric_limits<u16>::max() * 1.0f);
gteCoords[sy + 0x800][sx + 0x800].z = fz / (std::numeric_limits<u16>::max() * 1.0f);
//gteCoords[sy + 0x800][sx + 0x800].z = fz / (std::numeric_limits<u16>::max() * 1.0f);
}
}

Expand Down Expand Up @@ -67,7 +67,7 @@ bool getGteVertex(s16 sx, s16 sy, OGLVertexTag* vertex)
{
vertex->x = gteCoords[sy + 0x800][sx + 0x800].x;
vertex->y = gteCoords[sy + 0x800][sx + 0x800].y;
vertex->z = gteCoords[sy + 0x800][sx + 0x800].z;
//vertex->z = gteCoords[sy + 0x800][sx + 0x800].z;

return true;
}
Expand Down
48 changes: 27 additions & 21 deletions gpuPeteOpenGL2Tweak/xBRZ/xbrz.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,38 +175,44 @@ double distRGB(uint32_t pix1, uint32_t pix2)
}


#if 0
inline
double distYCbCr(uint32_t pix1, uint32_t pix2, double lumaWeight)
{
//http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
//YCbCr conversion is a matrix multiplication => take advantage of linearity by subtracting first!
const int r_diff = static_cast<int>(getRed (pix1)) - getRed (pix2); //we may delay division by 255 to after matrix multiplication
const int g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2); //
const int b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2); //substraction for int is noticeable faster than for double!

//const double k_b = 0.0722; //ITU-R BT.709 conversion
//const double k_r = 0.2126; //
const double k_b = 0.0593; //ITU-R BT.2020 conversion
const double k_r = 0.2627; //
const double k_g = 1 - k_b - k_r;

const double scale_b = 0.5 / (1 - k_b);
const double scale_r = 0.5 / (1 - k_r);

const double y = k_r * r_diff + k_g * g_diff + k_b * b_diff; //[!], analog YCbCr!
const double c_b = scale_b * (b_diff - y);
const double c_r = scale_r * (r_diff - y);

//we skip division by 255 to have similar range like other distance functions
return std::sqrt(square(lumaWeight * y) + square(c_b) + square(c_r));
//http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
//YCbCr conversion is a matrix multiplication => take advantage of linearity by subtracting first!
const int r_diff = static_cast<int>(getRed(pix1)) - getRed(pix2); //we may delay division by 255 to after matrix multiplication
const int g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2); //
const int b_diff = static_cast<int>(getBlue(pix1)) - getBlue(pix2); //substraction for int is noticeable faster than for double!

//const double k_b = 0.0722; //ITU-R BT.709 conversion
//const double k_r = 0.2126; //
const double k_b = 0.0593; //ITU-R BT.2020 conversion
const double k_r = 0.2627; //
const double k_g = 1 - k_b - k_r;

const double scale_b = 0.5 / (1 - k_b);
const double scale_r = 0.5 / (1 - k_r);

const double y = k_r * r_diff + k_g * g_diff + k_b * b_diff; //[!], analog YCbCr!
const double c_b = scale_b * (b_diff - y);
const double c_r = scale_r * (r_diff - y);

//we skip division by 255 to have similar range like other distance functions
return std::sqrt(square(lumaWeight * y) + square(c_b) + square(c_r));
}
#endif // 0



struct DistYCbCrBuffer //30% perf boost compared to distYCbCr()!
{
public:
static double dist(uint32_t pix1, uint32_t pix2)
{
#if defined _MSC_VER && _MSC_VER < 1900
#error function scope static initialization is not yet thread-safe!
#endif
static const DistYCbCrBuffer inst;
return inst.distImpl(pix1, pix2);
}
Expand Down

0 comments on commit 399563a

Please sign in to comment.