diff --git a/examples_tests/22.RaytracedAO/CMakeLists.txt b/examples_tests/22.RaytracedAO/CMakeLists.txt
index b3e2b275ed..c8c4e1e7f7 100644
--- a/examples_tests/22.RaytracedAO/CMakeLists.txt
+++ b/examples_tests/22.RaytracedAO/CMakeLists.txt
@@ -31,6 +31,7 @@ endif()
set(EXTRA_SOURCES
../../src/nbl/ext/DebugDraw/CDraw3DLine.cpp
+ ../../src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp
Renderer.cpp
CommandLineHandler.cpp
)
diff --git a/examples_tests/22.RaytracedAO/README.md b/examples_tests/22.RaytracedAO/README.md
index 6f52403ae3..f2848fbe4b 100644
--- a/examples_tests/22.RaytracedAO/README.md
+++ b/examples_tests/22.RaytracedAO/README.md
@@ -58,7 +58,7 @@ You can switch between those sensors using `PAGE UP/DOWN` Keys defined in more d
| tonemapper | Tonemapper Settings for Denoiser | string | "ACES=0.4,0.8"
| cropOffsetX, cropOffsetY | Used to control the offset for cropping cubemap renders (instead of highQualityEdges) | int | 0 |
| cropWidth, cropHeight | Used to control the size for cropping cubemap renders (instead of highQualityEdges) | int | width-cropOffsetX, height-cropOffsetY
-| envmapRegularizationFactor | if RIS is enabled then paths will be guided towards envmap based on this regularization factor.
1.0 is based on product of envmap and bxdf
0.0 is based only on bxdf
But 1.0 is never a valid value to use.
Valid Range is [0.2, 0.8] | float | 0.0 |
+| envmapRegularizationFactor | Fractional blend between guiding paths based on just the BxDF (0.0) or the product of the BxDF and the Environment Map (1.0)
Valid parameter ranges are between 0.0 and 0.8 as guiding fully by the product produces extreme fireflies from indirect light or local lights. | float | 0.5 |
### Example of a sensor using all new properties described above.
```xml
diff --git a/examples_tests/22.RaytracedAO/Renderer.cpp b/examples_tests/22.RaytracedAO/Renderer.cpp
index a801618268..ea4adcace5 100644
--- a/examples_tests/22.RaytracedAO/Renderer.cpp
+++ b/examples_tests/22.RaytracedAO/Renderer.cpp
@@ -53,7 +53,8 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I
m_framesDispatched(0u), m_rcpPixelSize{0.f,0.f},
m_staticViewData{{0u,0u},0u,0u}, m_raytraceCommonData{core::matrix4SIMD(), vec3(),0.f,0u,0u,0u,0.f},
m_indirectDrawBuffers{nullptr},m_cullPushConstants{core::matrix4SIMD(),1.f,0u,0u,0u},m_cullWorkGroups(0u),
- m_raygenWorkGroups{0u,0u},m_visibilityBuffer(nullptr),m_colorBuffer(nullptr)
+ m_raygenWorkGroups{0u,0u},m_visibilityBuffer(nullptr),m_colorBuffer(nullptr),
+ m_envMapImportanceSampling(_driver)
{
// TODO: reimplement
m_useDenoiser = false;
@@ -129,8 +130,6 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I
bindings[7].type = asset::EDT_COMBINED_IMAGE_SAMPLER;
bindings[8].type = asset::EDT_COMBINED_IMAGE_SAMPLER;
bindings[9].type = asset::EDT_COMBINED_IMAGE_SAMPLER;
- bindings[9].count = MipCountLuminance;
-
m_commonRaytracingDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+raytracingCommonDescriptorCount);
}
@@ -789,7 +788,7 @@ void Renderer::initSceneNonAreaLights(Renderer::InitializationData& initData)
// TODO: better filter and GPU accelerated
m_finalEnvmap->regenerateMipMapLevels();
- initWarpingResources();
+ m_envMapImportanceSampling.initResources(m_finalEnvmap);
}
void Renderer::finalizeScene(Renderer::InitializationData& initData)
@@ -830,7 +829,7 @@ void Renderer::finalizeScene(Renderer::InitializationData& initData)
}
}
-core::smart_refctd_ptr Renderer::createTexture(uint32_t width, uint32_t height, E_FORMAT format, uint32_t layers)
+core::smart_refctd_ptr Renderer::createTexture(uint32_t width, uint32_t height, E_FORMAT format, uint32_t mipLevels, uint32_t layers)
{
const auto real_layers = layers ? layers:1u;
@@ -839,7 +838,7 @@ core::smart_refctd_ptr Renderer::createTexture(uint32_t width, ui
imgparams.arrayLayers = real_layers;
imgparams.flags = static_cast(0);
imgparams.format = format;
- imgparams.mipLevels = 1u;
+ imgparams.mipLevels = mipLevels;
imgparams.samples = IImage::ESCF_1_BIT;
imgparams.type = IImage::ET_2D;
@@ -852,14 +851,14 @@ core::smart_refctd_ptr Renderer::createTexture(uint32_t width, ui
viewparams.subresourceRange.baseArrayLayer = 0u;
viewparams.subresourceRange.layerCount = real_layers;
viewparams.subresourceRange.baseMipLevel = 0u;
- viewparams.subresourceRange.levelCount = 1u;
+ viewparams.subresourceRange.levelCount = mipLevels;
return m_driver->createGPUImageView(std::move(viewparams));
}
core::smart_refctd_ptr Renderer::createScreenSizedTexture(E_FORMAT format, uint32_t layers)
{
- return createTexture(m_staticViewData.imageDimensions.x, m_staticViewData.imageDimensions.y, format, layers);
+ return createTexture(m_staticViewData.imageDimensions.x, m_staticViewData.imageDimensions.y, format, 1u, layers);
}
core::smart_refctd_ptr Renderer::SampleSequence::createCPUBuffer(uint32_t quantizedDimensions, uint32_t sampleCount)
@@ -1120,7 +1119,7 @@ void Renderer::deinitSceneResources()
m_sceneBound = core::aabbox3df(FLT_MAX, FLT_MAX, FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX);
m_finalEnvmap = nullptr;
- deinitWarpingResources();
+ m_envMapImportanceSampling.deinitResources();
m_staticViewData = {{0u,0u},0u,0u};
auto rr = m_rrManager->getRadeonRaysAPI();
@@ -1142,7 +1141,7 @@ void Renderer::deinitSceneResources()
void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, float envMapRegularizationFactor)
{
- bool enableRIS = computeWarpMap(envMapRegularizationFactor);
+ bool enableRIS = m_envMapImportanceSampling.computeWarpMap(envMapRegularizationFactor);
m_staticViewData.imageDimensions = {width, height};
m_rcpPixelSize = { 2.f/float(m_staticViewData.imageDimensions.x),-2.f/float(m_staticViewData.imageDimensions.y) };
@@ -1162,7 +1161,7 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, float e
uint32_t _maxRaysPerDispatch = 0u;
auto setRayBufferSizes = [renderPixelCount,this,&_maxRaysPerDispatch,&raygenBufferSize,&intersectionBufferSize](uint32_t sampleMultiplier) -> void
{
- m_staticViewData.samplesPerPixelPerDispatch = SAMPLING_STRATEGY_COUNT*sampleMultiplier;
+ m_staticViewData.samplesPerPixelPerDispatch = sampleMultiplier;
const size_t minimumSampleCountPerDispatch = static_cast(renderPixelCount)*getSamplesPerPixelPerDispatch();
_maxRaysPerDispatch = static_cast(minimumSampleCountPerDispatch);
@@ -1283,7 +1282,10 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, float e
constexpr uint32_t MaxDescritorUpdates = 10u;
IGPUDescriptorSet::SDescriptorInfo infos[MaxDescritorUpdates];
IGPUDescriptorSet::SWriteDescriptorSet writes[MaxDescritorUpdates];
-
+
+ auto warpMap = m_envMapImportanceSampling.getWarpMapImageView();
+ auto lumaMap = m_envMapImportanceSampling.getLuminanceImageView();
+
// set up m_commonRaytracingDS
core::smart_refctd_ptr _staticViewDataBuffer;
size_t staticViewDataBufferSize=0u;
@@ -1304,24 +1306,21 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, float e
}
// warpmap
{
- setImageInfo(infos+8,asset::EIL_GENERAL,core::smart_refctd_ptr(m_warpMap));
+ setImageInfo(infos+8,asset::EIL_GENERAL,core::smart_refctd_ptr(m_envMapImportanceSampling.m_warpMap));
ISampler::SParams samplerParams = { ISampler::ETC_REPEAT, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS };
infos[8].image.sampler = m_driver->createGPUSampler(samplerParams);
infos[8].image.imageLayout = EIL_SHADER_READ_ONLY_OPTIMAL;
}
- IGPUDescriptorSet::SDescriptorInfo luminanceDescriptorInfos[MipCountLuminance];
+ IGPUDescriptorSet::SDescriptorInfo luminanceDescriptorInfo = {};
// luminance mip maps
{
ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_BORDER, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, 0u, false, ECO_ALWAYS };
auto sampler = m_driver->createGPUSampler(samplerParams);
- for(uint32_t i = 0; i < MipCountLuminance; ++i)
- {
- luminanceDescriptorInfos[i].desc = m_luminanceMipMaps[i];
- luminanceDescriptorInfos[i].image.sampler = sampler;
- luminanceDescriptorInfos[i].image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL;
- }
+ luminanceDescriptorInfo.desc = m_envMapImportanceSampling.m_luminanceBaseImageView;
+ luminanceDescriptorInfo.image.sampler = sampler;
+ luminanceDescriptorInfo.image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL;
}
createEmptyInteropBufferAndSetUpInfo(infos+3,m_rayBuffer[0],raygenBufferSize);
@@ -1343,13 +1342,13 @@ void Renderer::initScreenSizedResources(uint32_t width, uint32_t height, float e
EDT_COMBINED_IMAGE_SAMPLER,
});
- // Set last write which is a descriptor array
+ // Set last write
writes[9].binding = 9u;
writes[9].arrayElement = 0u;
- writes[9].count = MipCountLuminance;
+ writes[9].count = 1u;
writes[9].descriptorType = EDT_COMBINED_IMAGE_SAMPLER;
writes[9].dstSet = m_commonRaytracingDS[0].get();
- writes[9].info = luminanceDescriptorInfos;
+ writes[9].info = &luminanceDescriptorInfo;
m_driver->updateDescriptorSets(descriptorUpdateCount,writes,0u,nullptr);
// set up second DS
@@ -1954,389 +1953,6 @@ bool Renderer::traceBounce(uint32_t& raycount)
return true;
}
-void Renderer::initWarpingResources()
-{
- for(uint32_t i = 0; i < MipCountLuminance; ++i)
- {
- const uint32_t resolution = 0x1u<<(MipCountLuminance - 1 - i);
- const uint32_t width = std::max(resolution, 1u);
- const uint32_t height = std::max(resolution/2u, 1u);
- m_luminanceMipMaps[i] = createTexture(width, height, EF_R32_SFLOAT);
- assert(m_luminanceMipMaps[i]);
- }
-
- {
- const uint32_t resolution = 0x1u<<(MipCountEnvmap-1); // same size as envmap
- const uint32_t width = std::max(resolution, 1u);
- const uint32_t height = std::max(resolution/2u, 1u);
- // m_warpMap = createTexture(width, height, EF_R16G16_SFLOAT);
- m_warpMap = createTexture(width, height, EF_R32G32_SFLOAT);
- }
-
- ISampler::SParams samplerParams;
- samplerParams.TextureWrapU = samplerParams.TextureWrapV = samplerParams.TextureWrapW = ISampler::ETC_CLAMP_TO_EDGE;
- samplerParams.MinFilter = samplerParams.MaxFilter = ISampler::ETF_NEAREST;
- samplerParams.MipmapMode = ISampler::ESMM_NEAREST;
- samplerParams.AnisotropicFilter = 0u;
- samplerParams.CompareEnable = false;
- auto nearestSampler = m_driver->createGPUSampler(samplerParams);
-
- // Create DescriptorLayout
- {
- {
- constexpr auto lumaDescriptorCount = 3u;
- IGPUDescriptorSetLayout::SBinding bindings[lumaDescriptorCount];
- bindings[0].binding = 0u;
- bindings[0].type = asset::EDT_COMBINED_IMAGE_SAMPLER;
- bindings[0].stageFlags = ISpecializedShader::ESS_COMPUTE;
- bindings[0].count = 1u;
- bindings[0].samplers = &nearestSampler;
-
- bindings[1].binding = 1u;
- bindings[1].type = asset::EDT_STORAGE_IMAGE;
- bindings[1].stageFlags = ISpecializedShader::ESS_COMPUTE;
- bindings[1].count = 1u;
-
- bindings[2].binding = 2u;
- bindings[2].type = asset::EDT_STORAGE_IMAGE;
- bindings[2].stageFlags = ISpecializedShader::ESS_COMPUTE;
- bindings[2].count = 1u;
-
- m_lumaDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+lumaDescriptorCount);
- }
-
- {
-
- ISampler::SParams lumaSamplerParams;
- lumaSamplerParams.TextureWrapU = lumaSamplerParams.TextureWrapV = lumaSamplerParams.TextureWrapW = ISampler::ETC_CLAMP_TO_BORDER;
- lumaSamplerParams.BorderColor = ISampler::ETBC_FLOAT_OPAQUE_BLACK;
- lumaSamplerParams.MinFilter = samplerParams.MaxFilter = ISampler::ETF_NEAREST;
- lumaSamplerParams.MipmapMode = ISampler::ESMM_NEAREST;
- lumaSamplerParams.AnisotropicFilter = 0u;
- lumaSamplerParams.CompareEnable = false;
- auto lumaSampler = m_driver->createGPUSampler(lumaSamplerParams);
-
- core::smart_refctd_ptr samplers[MipCountLuminance];
- for(uint32_t i = 0u; i < MipCountLuminance; ++i)
- samplers[i] = lumaSampler;
-
- constexpr auto warpDescriptorCount = 2u;
- IGPUDescriptorSetLayout::SBinding bindings[warpDescriptorCount];
- bindings[0].binding = 0u;
- bindings[0].type = asset::EDT_COMBINED_IMAGE_SAMPLER;
- bindings[0].stageFlags = ISpecializedShader::ESS_COMPUTE;
- bindings[0].count = MipCountLuminance;
- bindings[0].samplers = samplers;
-
- bindings[1].binding = 1u;
- bindings[1].type = asset::EDT_STORAGE_IMAGE;
- bindings[1].stageFlags = ISpecializedShader::ESS_COMPUTE;
- bindings[1].count = 1u;
-
- m_warpDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+warpDescriptorCount);
- }
- }
-
- {
- {
- SPushConstantRange range{ISpecializedShader::ESS_COMPUTE,0u,sizeof(LumaMipMapGenShaderData_t)};
- m_lumaPipelineLayout = m_driver->createGPUPipelineLayout(&range,&range+1u,core::smart_refctd_ptr(m_lumaDSLayout));
-
- for(uint32_t i = 0u; i < MipCountLuminance - 1; ++i)
- m_lumaDS[i] = m_driver->createGPUDescriptorSet(core::smart_refctd_ptr(m_lumaDSLayout));
-
- for(uint32_t i = 0u; i < MipCountLuminance - 1; ++i)
- {
- const uint32_t src = i;
- const uint32_t dst = i + 1;
-
- IGPUDescriptorSet::SDescriptorInfo envMapDescriptorInfo = {};
- envMapDescriptorInfo.desc = m_finalEnvmap;
- envMapDescriptorInfo.image.sampler = nullptr;
- envMapDescriptorInfo.image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL;
-
- IGPUDescriptorSet::SDescriptorInfo srcMipDescriptorInfo = {};
- srcMipDescriptorInfo.desc = m_luminanceMipMaps[src];
- srcMipDescriptorInfo.image.sampler = nullptr;
- srcMipDescriptorInfo.image.imageLayout = asset::EIL_GENERAL;
-
- IGPUDescriptorSet::SDescriptorInfo dstMipDescriptorInfo = {};
- dstMipDescriptorInfo.desc = m_luminanceMipMaps[dst];
- dstMipDescriptorInfo.image.sampler = nullptr;
- dstMipDescriptorInfo.image.imageLayout = asset::EIL_GENERAL;
-
- IGPUDescriptorSet::SWriteDescriptorSet writes[3u];
- writes[0].binding = 0u;
- writes[0].arrayElement = 0u;
- writes[0].count = 1u;
- writes[0].descriptorType = EDT_COMBINED_IMAGE_SAMPLER;
- writes[0].dstSet = m_lumaDS[i].get();
- writes[0].info = &envMapDescriptorInfo;
-
- writes[1].binding = 1u;
- writes[1].arrayElement = 0u;
- writes[1].count = 1u;
- writes[1].descriptorType = EDT_STORAGE_IMAGE;
- writes[1].dstSet = m_lumaDS[i].get();
- writes[1].info = &srcMipDescriptorInfo;
-
- writes[2].binding = 2u;
- writes[2].arrayElement = 0u;
- writes[2].count = 1u;
- writes[2].descriptorType = EDT_STORAGE_IMAGE;
- writes[2].dstSet = m_lumaDS[i].get();
- writes[2].info = &dstMipDescriptorInfo;
-
- m_driver->updateDescriptorSets(3u,writes,0u,nullptr);
- }
- }
-
- {
-
- SPushConstantRange range{ISpecializedShader::ESS_COMPUTE,0u,sizeof(WarpMapGenShaderData_t)};
- m_warpPipelineLayout = m_driver->createGPUPipelineLayout(&range,&range+1u,core::smart_refctd_ptr(m_warpDSLayout));
-
- m_warpDS = m_driver->createGPUDescriptorSet(core::smart_refctd_ptr(m_warpDSLayout));
-
- IGPUDescriptorSet::SDescriptorInfo luminanceDescriptorInfos[MipCountLuminance] = {};
- for(uint32_t i = 0; i < MipCountLuminance; ++i)
- {
- luminanceDescriptorInfos[i].desc = m_luminanceMipMaps[i];
- luminanceDescriptorInfos[i].image.sampler = nullptr;
- luminanceDescriptorInfos[i].image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL;
- }
-
- IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo = {};
- warpMapDescriptorInfo.desc = m_warpMap;
- warpMapDescriptorInfo.image.sampler = nullptr;
- warpMapDescriptorInfo.image.imageLayout = asset::EIL_GENERAL;
-
- IGPUDescriptorSet::SWriteDescriptorSet writes[2u];
- writes[0].binding = 0u;
- writes[0].arrayElement = 0u;
- writes[0].count = MipCountLuminance;
- writes[0].descriptorType = EDT_COMBINED_IMAGE_SAMPLER;
- writes[0].dstSet = m_warpDS.get();
- writes[0].info = luminanceDescriptorInfos;
-
- writes[1].binding = 1u;
- writes[1].arrayElement = 0u;
- writes[1].count = 1u;
- writes[1].descriptorType = EDT_STORAGE_IMAGE;
- writes[1].dstSet = m_warpDS.get();
- writes[1].info = &warpMapDescriptorInfo;
-
- m_driver->updateDescriptorSets(2u,writes,0u,nullptr);
- }
- }
-
- {
- m_lumaGPUShader = gpuSpecializedShaderFromFile(m_assetManager, m_driver, "../lumaMipMapGen.comp");
- assert(m_lumaGPUShader);
-
- m_lumaPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_lumaPipelineLayout), core::smart_refctd_ptr(m_lumaGPUShader));
- assert(m_lumaPipeline);
-
- m_warpGPUShader = gpuSpecializedShaderFromFile(m_assetManager, m_driver, "../genWarpMap.comp");
- assert(m_warpGPUShader);
-
- m_warpPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_warpPipelineLayout), core::smart_refctd_ptr(m_warpGPUShader));
- assert(m_warpPipeline);
- }
-
-}
-
-void Renderer::deinitWarpingResources()
-{
- m_lumaPipeline = nullptr;
- m_lumaGPUShader = nullptr;
- for(uint32_t i = 0u; i < MipCountLuminance - 1; ++i)
- m_lumaDS[i] = nullptr;
- m_lumaPipelineLayout = nullptr;
- m_lumaDSLayout = nullptr;
-
- for(uint32_t i = 0; i < MipCountLuminance; ++i)
- m_luminanceMipMaps[i] = nullptr;
-
- m_warpPipeline = nullptr;
- m_warpGPUShader = nullptr;
- m_warpDS = nullptr;
- m_warpPipelineLayout = nullptr;
- m_warpDSLayout = nullptr;
- m_warpMap = nullptr;
-}
-
-bool Renderer::computeWarpMap(float envMapRegularizationFactor)
-{
- bool enableRIS = false;
-
- LumaMipMapGenShaderData_t pcData = {};
- const nbl::core::vectorSIMDf lumaScales = nbl::core::vectorSIMDf(0.2126729f, 0.7151522f, 0.0721750f, 1.0f);
-
- m_driver->bindComputePipeline(m_lumaPipeline.get());
-
- // Calc Luma without Sin Factor
- {
- pcData.luminanceScales = nbl::core::vectorSIMDf(lumaScales[0] * envMapRegularizationFactor, lumaScales[1] * envMapRegularizationFactor, lumaScales[2] * envMapRegularizationFactor, (1.0f-envMapRegularizationFactor));
- pcData.calcLuma = 1;
- pcData.sinFactor = 0;
- m_driver->bindDescriptorSets(EPBP_COMPUTE,m_lumaPipeline->getLayout(),0u,1u,&m_lumaDS[0].get(),nullptr);
-
- const uint32_t resolution = 0x1u<<(MipCountLuminance - 1);
- const uint32_t sourceMipWidth = std::max(resolution, 1u);
- const uint32_t sourceMipHeight = std::max(resolution/2u, 1u);
-
- uint32_t workGroups[2] = {
- (sourceMipWidth-1u)/LUMA_MIP_MAP_GEN_WORKGROUP_DIM+1u,
- (sourceMipHeight-1u)/LUMA_MIP_MAP_GEN_WORKGROUP_DIM+1u
- };
-
- m_driver->pushConstants(m_lumaPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData);
- m_driver->dispatch(workGroups[0],workGroups[1],1);
- COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT);
- }
-
- // Download Luma Image and caclulate Variance and new Regularization Factor
- float variance = 0.0f;
- {
- const uint32_t resolution = 0x1u<<(MipCountLuminance - 1);
- const uint32_t width = std::max(resolution, 1u);
- const uint32_t height = std::max(resolution/2u, 1u);
-
- const uint32_t colorBufferBytesize = width * height * asset::getTexelOrBlockBytesize(EF_R32_SFLOAT);
-
- auto downloadStagingArea = m_driver->getDefaultDownStreamingBuffer();
-
- constexpr uint64_t timeoutInNanoSeconds = 300000000000u;
- const auto waitPoint = std::chrono::high_resolution_clock::now()+std::chrono::nanoseconds(timeoutInNanoSeconds);
-
- uint32_t address = std::remove_pointer::type::invalid_address; // remember without initializing the address to be allocated to invalid_address you won't get an allocation!
- const uint32_t alignment = 4096u; // common page size
- auto unallocatedSize = downloadStagingArea->multi_alloc(waitPoint, 1u, &address, &colorBufferBytesize, &alignment);
- if (unallocatedSize)
- {
- os::Printer::log("Could not download the buffer from the GPU!", ELL_ERROR);
- }
-
- IImage::SBufferCopy copyRegion = {};
- copyRegion.bufferOffset = 0u;
- copyRegion.bufferRowLength = 0u;
- copyRegion.bufferImageHeight = 0u;
- //copyRegion.imageSubresource.aspectMask = wait for Vulkan;
- copyRegion.imageSubresource.mipLevel = 0u;
- copyRegion.imageSubresource.baseArrayLayer = 0u;
- copyRegion.imageSubresource.layerCount = 1u;
- copyRegion.imageOffset = { 0u,0u,0u };
- copyRegion.imageExtent = { width, height, 1u };
-
- auto luminanceGPUImage = m_luminanceMipMaps[0].get()->getCreationParameters().image.get();
- m_driver->copyImageToBuffer(luminanceGPUImage, downloadStagingArea->getBuffer(), 1, ©Region);
-
- auto downloadFence = m_driver->placeFence(true);
-
- auto* data = reinterpret_cast(downloadStagingArea->getBufferPointer()) + address;
-
- // wait for download fence and then invalidate the CPU cache
- {
- auto result = downloadFence->waitCPU(timeoutInNanoSeconds,true);
- if (result==E_DRIVER_FENCE_RETVAL::EDFR_TIMEOUT_EXPIRED||result==E_DRIVER_FENCE_RETVAL::EDFR_FAIL)
- {
- os::Printer::log("Could not download the buffer from the GPU, fence not signalled!", ELL_ERROR);
- downloadStagingArea->multi_free(1u, &address, &colorBufferBytesize, nullptr);
- }
- if (downloadStagingArea->needsManualFlushOrInvalidate())
- m_driver->invalidateMappedMemoryRanges({{downloadStagingArea->getBuffer()->getBoundMemory(),address,colorBufferBytesize}});
- }
-
- float* fltData = reinterpret_cast(data);
- float avg_x2 = 0.0f;
- float avg_x = 0.0f;
- for(uint32_t i = 0; i < width * height; ++i)
- {
- const float x = fltData[i];
- const float x2 = x*x;
- const float n = float(i + 1);
- avg_x = avg_x + (x-avg_x)/(n);
- avg_x2 = avg_x2 + (x2-avg_x2)/(n);
- }
-
- variance = avg_x2 - avg_x * avg_x; // V[x] = E[X^2]-E[X]^2
- std::cout << "Final Luminance Variance = " << variance << std::endl;
- }
-
- float regularizationFactor = envMapRegularizationFactor*(1.0f-1.0f/(1.0f+variance));
- std::cout << "New Regularization Factor based on Variance = " << regularizationFactor << std::endl;
- constexpr float varianceThreshold = 0.001f;
- enableRIS = (variance >= varianceThreshold);
-
- // Calc Luma again with Sin Factor and new Regularization Factor
- {
- pcData.luminanceScales = nbl::core::vectorSIMDf(lumaScales[0] * regularizationFactor, lumaScales[1] * regularizationFactor, lumaScales[2] * regularizationFactor, (1.0f-regularizationFactor));
- pcData.calcLuma = 1;
- pcData.sinFactor = 1;
-
- m_driver->bindDescriptorSets(EPBP_COMPUTE,m_lumaPipeline->getLayout(),0u,1u,&m_lumaDS[0].get(),nullptr);
-
- const uint32_t resolution = 0x1u<<(MipCountLuminance - 1);
- const uint32_t sourceMipWidth = std::max(resolution, 1u);
- const uint32_t sourceMipHeight = std::max(resolution/2u, 1u);
-
- uint32_t workGroups[2] = {
- (sourceMipWidth-1u)/LUMA_MIP_MAP_GEN_WORKGROUP_DIM+1u,
- (sourceMipHeight-1u)/LUMA_MIP_MAP_GEN_WORKGROUP_DIM+1u
- };
-
- m_driver->pushConstants(m_lumaPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData);
- m_driver->dispatch(workGroups[0],workGroups[1],1);
- COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT);
- }
-
- // Calc Mipmaps
- for(uint32_t s = 0; s < MipCountLuminance - 1; ++s)
- {
- m_driver->bindDescriptorSets(EPBP_COMPUTE,m_lumaPipeline->getLayout(),0u,1u,&m_lumaDS[s].get(),nullptr);
-
- const uint32_t resolution = 0x1u<<(MipCountLuminance - 1 - s);
- const uint32_t sourceMipWidth = std::max(resolution, 1u);
- const uint32_t sourceMipHeight = std::max(resolution/2u, 1u);
-
- uint32_t workGroups[2] = {
- (sourceMipWidth-1u)/LUMA_MIP_MAP_GEN_WORKGROUP_DIM+1u,
- (sourceMipHeight-1u)/LUMA_MIP_MAP_GEN_WORKGROUP_DIM+1u
- };
-
- pcData.calcLuma = 0;
- m_driver->pushConstants(m_lumaPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData);
- m_driver->dispatch(workGroups[0],workGroups[1],1);
- COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT);
- }
-
- // Generate WarpMap
- {
- m_driver->bindComputePipeline(m_warpPipeline.get());
-
- WarpMapGenShaderData_t warpPcData = {};
- warpPcData.lumaMipCount = MipCountLuminance;
-
- m_driver->bindDescriptorSets(EPBP_COMPUTE,m_warpPipeline->getLayout(),0u,1u,&m_warpDS.get(),nullptr);
-
- const uint32_t resolution = 0x1u<<(MipCountEnvmap-1);
- const uint32_t warpMapWidth = std::max(resolution, 1u);
- const uint32_t warpMapHeight = std::max(resolution/2u, 1u);
-
- uint32_t workGroups[2] = {
- (warpMapWidth-1u)/WARP_MAP_GEN_WORKGROUP_DIM+1u,
- (warpMapHeight-1u)/WARP_MAP_GEN_WORKGROUP_DIM+1u
- };
-
- m_driver->pushConstants(m_warpPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(warpPcData),&warpPcData);
- m_driver->dispatch(workGroups[0],workGroups[1],1);
- COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT);
- }
-
- return enableRIS;
-}
-
const float Renderer::AntiAliasingSequence[Renderer::AntiAliasingSequenceLength][2] =
{
{0.229027962000000, 0.100901043000000},
diff --git a/examples_tests/22.RaytracedAO/Renderer.h b/examples_tests/22.RaytracedAO/Renderer.h
index 2c98f18b28..202c2d9f0a 100644
--- a/examples_tests/22.RaytracedAO/Renderer.h
+++ b/examples_tests/22.RaytracedAO/Renderer.h
@@ -7,6 +7,7 @@
#undef PI
#include "nbl/ext/MitsubaLoader/CMitsubaLoader.h"
+#include "nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h"
#include
@@ -23,7 +24,7 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
public:
#include "rasterizationCommon.h"
#include "raytraceCommon.h"
- #include "warpCommon.h"
+ #include "nbl/builtin/glsl/ext/EnvmapImportanceSampling/parameters.glsl"
#ifdef __cplusplus
#undef uint
#undef vec4
@@ -129,7 +130,7 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
void finalizeScene(InitializationData& initData);
//
- nbl::core::smart_refctd_ptr createTexture(uint32_t width, uint32_t height, nbl::asset::E_FORMAT format, uint32_t layers=0u);
+ nbl::core::smart_refctd_ptr createTexture(uint32_t width, uint32_t height, nbl::asset::E_FORMAT format, uint32_t mipLevels=1u, uint32_t layers=0u);
nbl::core::smart_refctd_ptr createScreenSizedTexture(nbl::asset::E_FORMAT format, uint32_t layers=0u);
//
@@ -246,31 +247,8 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac
nbl::core::smart_refctd_ptr blendEnvPipeline;
nbl::core::smart_refctd_ptr blendEnvDescriptorSet;
nbl::core::smart_refctd_ptr blendEnvMeshBuffer;
-
- // Shader and Resources for Generating Luminance MipMaps from EnvMap
- static constexpr uint32_t MipCountLuminance = MipCountEnvmap;
- nbl::core::smart_refctd_ptr m_luminanceMipMaps[MipCountLuminance];
- uint32_t m_lumaWorkGroups[2];
- nbl::core::smart_refctd_ptr m_lumaDSLayout;
- nbl::core::smart_refctd_ptr m_lumaDS[MipCountLuminance - 1];
- nbl::core::smart_refctd_ptr m_lumaPipelineLayout;
- nbl::core::smart_refctd_ptr m_lumaGPUShader;
- nbl::core::smart_refctd_ptr m_lumaPipeline;
-
- // Shader and Resources for EnvironmentalMap Sample Warping
- nbl::core::smart_refctd_ptr m_warpMap; // Warps Sample based on EnvMap Luminance
-
- nbl::core::smart_refctd_ptr m_warpDSLayout;
- nbl::core::smart_refctd_ptr m_warpDS;
- nbl::core::smart_refctd_ptr m_warpPipelineLayout;
- nbl::core::smart_refctd_ptr m_warpGPUShader;
- nbl::core::smart_refctd_ptr m_warpPipeline;
- void initWarpingResources();
- void deinitWarpingResources();
-
- // returns if RIS should be enabled based on variance calculations
- bool computeWarpMap(float envMapRegularizationFactor);
+ nbl::ext::EnvmapImportanceSampling::EnvmapImportanceSampling m_envMapImportanceSampling;
std::future compileShadersFuture;
};
diff --git a/examples_tests/22.RaytracedAO/main.cpp b/examples_tests/22.RaytracedAO/main.cpp
index 6f2cc89522..1f17d9fb4d 100644
--- a/examples_tests/22.RaytracedAO/main.cpp
+++ b/examples_tests/22.RaytracedAO/main.cpp
@@ -414,7 +414,7 @@ int main(int argc, char** argv)
mainSensorData.denoiserInfo.bloomIntensity = film.denoiserBloomIntensity;
mainSensorData.denoiserInfo.tonemapperArgs = std::string(film.denoiserTonemapperArgs);
mainSensorData.fileFormat = film.fileFormat;
- mainSensorData.envmapRegFactor = core::clamp(film.envmapRegularizationFactor, 0.2f, 0.8f);
+ mainSensorData.envmapRegFactor = core::clamp(film.envmapRegularizationFactor, 0.0f, 0.8f);
mainSensorData.outputFilePath = std::filesystem::path(film.outputFilePath);
if(!isFileExtensionCompatibleWithFormat(mainSensorData.outputFilePath.extension().string(), mainSensorData.fileFormat))
{
@@ -621,7 +621,11 @@ int main(int argc, char** argv)
{
mainSensorData.width = film.cropWidth;
mainSensorData.height = film.cropHeight;
- assert(film.cropOffsetX == 0 && film.cropOffsetY == 0);
+
+ if(film.cropOffsetX != 0 || film.cropOffsetY != 0)
+ {
+ std::cout << "[WARN] CropOffsets are non-zero. cropping is not supported for non cubemap renders." << std::endl;
+ }
mainSensorData.staticCamera = smgr->addCameraSceneNode(nullptr);
auto& staticCamera = mainSensorData.staticCamera;
diff --git a/examples_tests/22.RaytracedAO/raytraceCommon.glsl b/examples_tests/22.RaytracedAO/raytraceCommon.glsl
index b20161f7b9..cb6f33b95e 100644
--- a/examples_tests/22.RaytracedAO/raytraceCommon.glsl
+++ b/examples_tests/22.RaytracedAO/raytraceCommon.glsl
@@ -2,14 +2,10 @@
#define _RAYTRACE_COMMON_GLSL_INCLUDED_
#include "virtualGeometry.glsl"
-#include "warpCommon.h"
#include
#include
-// #define ONLY_BXDF_SAMPLING
-// #define ONLY_ENV_SAMPLING
-
layout(push_constant, row_major) uniform PushConstants
{
RaytraceShaderCommonData_t cummon;
@@ -50,7 +46,7 @@ layout(set = 2, binding = 6, r32ui) restrict uniform uimage2DArray normalAOV;
// environment emitter
layout(set = 2, binding = 7) uniform sampler2D envMap;
layout(set = 2, binding = 8) uniform sampler2D warpMap;
-layout(set = 2, binding = 9) uniform sampler2D luminance[MAX_LUMINANCE_LEVELS];
+layout(set = 2, binding = 9) uniform sampler2D luminance;
void clear_raycount()
{
@@ -260,25 +256,16 @@ vec3 load_normal_and_prefetch_textures(
return geomNormal;
}
-vec3 nbl_glsl_unormSphericalToCartesian(in vec2 uv, out float sinTheta)
-{
- vec3 dir;
- nbl_glsl_sincos((uv.x-0.5)*2.f*nbl_glsl_PI,dir.y,dir.x);
- nbl_glsl_sincos(uv.y*nbl_glsl_PI,sinTheta,dir.z);
- dir.xy *= sinTheta;
- return dir;
-}
-
// return regularized pdf of sample
float Envmap_regularized_deferred_pdf(in vec3 rayDirection)
{
- const ivec2 luminanceMapSize = textureSize(luminance[0], 0);
- uint lastLuminanceMip = uint(log2(luminanceMapSize.x)); // TODO: later turn into push constant
- const vec2 envmapUV = nbl_glsl_sampling_generateUVCoordFromDirection(rayDirection);
+ const ivec2 luminanceMapSize = textureSize(luminance, 0);
+ int lastLuminanceMip = int(log2(luminanceMapSize.x)); // TODO: later turn into push constant
+ const vec2 envmapUV = nbl_glsl_sampling_envmap_generateUVCoordFromDirection(rayDirection);
float sinTheta = length(rayDirection.zx);
- float sumLum = texelFetch(luminance[lastLuminanceMip], ivec2(0), 0).r;
- float lum = textureLod(luminance[0], envmapUV, 0).r;
+ float sumLum = texelFetch(luminance, ivec2(0), lastLuminanceMip).r;
+ float lum = textureLod(luminance, envmapUV, 0).r;
float bigfactor = float(luminanceMapSize.x*luminanceMapSize.y)/sumLum;
return bigfactor*(lum/(sinTheta*2.0f*nbl_glsl_PI*nbl_glsl_PI));
}
@@ -309,7 +296,7 @@ void Envmap_generateRegularizedSample_and_pdf(out float pdf, out nbl_glsl_LightS
const vec2 uv = yDiff*interpolant.y+yVals[0];
float sinTheta;
- const vec3 L = nbl_glsl_unormSphericalToCartesian(uv, sinTheta);
+ const vec3 L = nbl_glsl_sampling_envmap_generateDirectionFromUVCoord(uv, sinTheta);
lightSample = nbl_glsl_createLightSample(L, interaction);
const float detInterpolJacobian = determinant(mat2(
@@ -322,36 +309,41 @@ void Envmap_generateRegularizedSample_and_pdf(out float pdf, out nbl_glsl_LightS
}
#include
-mat2x3 rand6d(in uvec3 scramble_key, in int _sample, int depth)
+mat2x3 rand6d(in uvec3 scramble_keys[2], in int _sample, int depth)
{
mat2x3 retVal;
// decrement depth because first vertex is rasterized and picked with a different sample sequence
--depth;
//
int offset = int(_sample)*SAMPLE_SEQUENCE_STRIDE+depth;
- int eachStrategyStride = SAMPLE_SEQUENCE_STRIDE/2; // get this from cpp side?
+ int eachStrategyStride = SAMPLE_SEQUENCE_STRIDE/SAMPLING_STRATEGY_COUNT;
const nbl_glsl_sampling_quantized3D quant1 = texelFetch(quantizedSampleSequence, offset).xy;
const nbl_glsl_sampling_quantized3D quant2 = texelFetch(quantizedSampleSequence, offset + eachStrategyStride).xy;
- retVal[0] = nbl_glsl_sampling_decodeSample3Dimensions(quant1,scramble_key);
- retVal[1] = nbl_glsl_sampling_decodeSample3Dimensions(quant2,scramble_key);
+ retVal[0] = nbl_glsl_sampling_decodeSample3Dimensions(quant1,scramble_keys[0]);
+ retVal[1] = nbl_glsl_sampling_decodeSample3Dimensions(quant2,scramble_keys[1]);
return retVal;
}
nbl_glsl_MC_quot_pdf_aov_t gen_sample_ray(
out vec3 direction,
- in uvec3 scramble_key,
+ in uvec3 scramble_keys[2],
in uint sampleID, in uint depth,
in nbl_glsl_MC_precomputed_t precomp,
in nbl_glsl_MC_instr_stream_t gcs,
in nbl_glsl_MC_instr_stream_t rnps
)
{
- mat2x3 rand = rand6d(scramble_key,int(sampleID),int(depth));
+ mat2x3 rand = rand6d(scramble_keys,int(sampleID),int(depth));
// (1) BXDF Sample and Weight
nbl_glsl_LightSample bxdfSample;
nbl_glsl_MC_quot_pdf_aov_t bxdfCosThroughput = nbl_glsl_MC_runGenerateAndRemainderStream(precomp,gcs,rnps,rand[0],bxdfSample);
+
+ nbl_glsl_LightSample outSample;
+ nbl_glsl_MC_quot_pdf_aov_t result;
+
+#ifndef ONLY_BXDF_SAMPLING
float bxdfWeight = 0;
float p_bxdf_bxdf = bxdfCosThroughput.pdf; // BxDF PDF evaluated with BxDF sample (returned from
@@ -407,10 +399,7 @@ nbl_glsl_MC_quot_pdf_aov_t gen_sample_ray(
}
const float bxdfChoiceProb = w_bxdf/w_sum;
-#endif
-
- nbl_glsl_LightSample outSample;
- nbl_glsl_MC_quot_pdf_aov_t result;
+#endif // ifdef TRADE_REGISTERS_FOR_IEEE754_ACCURACY
float rcpChoiceProb;
float w_star_over_p_env = w_sum;
@@ -429,13 +418,11 @@ nbl_glsl_MC_quot_pdf_aov_t gen_sample_ray(
result.quotient *= w_star_over_p_env;
result.pdf /= w_star_over_p_env;
+#endif // ifndef ONLY_BXDF_SAMPLING
#ifdef ONLY_BXDF_SAMPLING
outSample = bxdfSample;
result = bxdfCosThroughput;
-#elif defined(ONLY_ENV_SAMPLING)
- outSample = envmapSample;
- result = envmapSampleThroughput;
#endif
// russian roulette
@@ -479,12 +466,16 @@ void generate_next_rays(
vec3 nextThroughput[MAX_RAYS_GENERATED];
float nextAoVThroughputScale[MAX_RAYS_GENERATED];
{
- const uvec3 scramble_key = uvec3(nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state));
+ const uvec3 scramble_keys[2] = {
+ uvec3(nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state)),
+ uvec3(nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state))
+ };
+
for (uint i=0u; inormalization.finalize();
storeToImage(core::rational<>(inv_cvg_num,inv_cvg_den),axis,outOffsetLayer);
+ }
};
// filter in X-axis
filterAxis(IImage::ET_1D,kernelX);
diff --git a/include/nbl/asset/filters/CSwizzleAndConvertImageFilter.h b/include/nbl/asset/filters/CSwizzleAndConvertImageFilter.h
index cc0251a7fb..479640f7c3 100644
--- a/include/nbl/asset/filters/CSwizzleAndConvertImageFilter.h
+++ b/include/nbl/asset/filters/CSwizzleAndConvertImageFilter.h
@@ -56,9 +56,9 @@ class CSwizzleAndConvertImageFilterBase : public CSwizzleableAndDitherableFilter
static inline void normalizationPrepass(E_FORMAT rInFormat, const ExecutionPolicy& policy, state_type* state, const core::vectorSIMDu32& blockDims)
{
if constexpr (!std::is_void_v)
- {
+ {
assert(kInFormat==EF_UNKNOWN || rInFormat==EF_UNKNOWN);
- state->normalization.initialize();
+ state->normalization.initialize();
auto perOutputRegion = [policy,&blockDims,&state,rInFormat](const CMatchedSizeInOutImageFilterCommon::CommonExecuteData& commonExecuteData, CBasicImageFilterCommon::clip_region_functor_t& clip) -> bool
{
auto normalizePrepass = [&commonExecuteData,&blockDims,&state,rInFormat](uint32_t readBlockArrayOffset, core::vectorSIMDu32 readBlockPos)
@@ -84,6 +84,7 @@ class CSwizzleAndConvertImageFilterBase : public CSwizzleableAndDitherableFilter
return true;
};
CMatchedSizeInOutImageFilterCommon::commonExecute(state,perOutputRegion);
+ state->normalization.finalize();
}
}
};
diff --git a/include/nbl/asset/filters/NormalizationStates.h b/include/nbl/asset/filters/NormalizationStates.h
index 3b5be06b62..badcdccbb8 100644
--- a/include/nbl/asset/filters/NormalizationStates.h
+++ b/include/nbl/asset/filters/NormalizationStates.h
@@ -183,7 +183,7 @@ class CDerivativeMapNormalizationState : public impl::CDerivativeMapNormalizatio
static_assert(std::is_floating_point_v, "Integer encode types not supported yet!");
if constexpr (isotropic)
{
- const float isotropicMax = core::max(core::max(maxAbsPerChannel[0],maxAbsPerChannel[1]),core::max(maxAbsPerChannel[2],maxAbsPerChannel[3]));
+ float isotropicMax = core::max(core::max(maxAbsPerChannel[0].load(),maxAbsPerChannel[1].load()),core::max(maxAbsPerChannel[2].load(),maxAbsPerChannel[3].load()));
for (auto i=0u; i<4u; i++)
maxAbsPerChannel[i] = isotropicMax;
}
diff --git a/examples_tests/22.RaytracedAO/lumaMipMapGen.comp b/include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_luma_mipmap.comp
similarity index 83%
rename from examples_tests/22.RaytracedAO/lumaMipMapGen.comp
rename to include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_luma_mipmap.comp
index 3f09179614..cbe3571f5d 100644
--- a/examples_tests/22.RaytracedAO/lumaMipMapGen.comp
+++ b/include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_luma_mipmap.comp
@@ -1,6 +1,7 @@
-#version 430 core
+#ifndef _NBL_GLSL_EXT_ENVMAP_SAMPLING_GEN_LUMA_INCLUDED_
+#define _NBL_GLSL_EXT_ENVMAP_SAMPLING_GEN_LUMA_INCLUDED_
-#include "warpCommon.h"
+#include
#include
layout(local_size_x = LUMA_MIP_MAP_GEN_WORKGROUP_DIM, local_size_y = LUMA_MIP_MAP_GEN_WORKGROUP_DIM) in;
@@ -35,8 +36,8 @@ void main()
if(pc.data.calcLuma > 0)
{
float sinTheta = sin(nbl_glsl_PI * (float(pixelCoord.y + 0.5f) / envMapSize.y));
- vec4 envMapSample = texelFetch(envMap, pixelCoord, 0);
- float luma = dot(pc.data.luminanceScales, envMapSample);
+ vec3 envMapSample = texelFetch(envMap, pixelCoord, 0).rgb;
+ float luma = dot(pc.data.luminanceScales, vec4(envMapSample, 1.0f));
if(pc.data.sinFactor > 0)
luma *= sinTheta;
imageStore(srcLuminance, pixelCoord, vec4(luma));
@@ -54,4 +55,6 @@ void main()
}
}
}
-}
\ No newline at end of file
+}
+
+#endif
\ No newline at end of file
diff --git a/examples_tests/22.RaytracedAO/genWarpMap.comp b/include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_warpmap.comp
similarity index 84%
rename from examples_tests/22.RaytracedAO/genWarpMap.comp
rename to include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_warpmap.comp
index 12c685f43e..6bd7a7c5ea 100644
--- a/examples_tests/22.RaytracedAO/genWarpMap.comp
+++ b/include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_warpmap.comp
@@ -1,11 +1,12 @@
-#version 430 core
+#ifndef _NBL_GLSL_EXT_ENVMAP_SAMPLING_GEN_WARPMAP_INCLUDED_
+#define _NBL_GLSL_EXT_ENVMAP_SAMPLING_GEN_WARPMAP_INCLUDED_
-#include "warpCommon.h"
+#include
#include
layout(local_size_x = WARP_MAP_GEN_WORKGROUP_DIM, local_size_y = WARP_MAP_GEN_WORKGROUP_DIM) in;
-layout(set = 0, binding = 0) uniform sampler2D luminance[MAX_LUMINANCE_LEVELS];
+layout(set = 0, binding = 0) uniform sampler2D luminance;
layout(set = 0, binding = 1, rg32f) uniform image2D warpMap;
layout(push_constant) uniform PushConstants
@@ -30,13 +31,13 @@ void main()
// (skip 0 which is 1x1 and useless in warping)
for(int i = int(pc.data.lumaMipCount - 1); i >= 1; --i)
{
- ivec2 luminanceMipSize = textureSize(luminance[i], 0).xy;
+ ivec2 luminanceMipSize = textureSize(luminance, i).xy;
vec4 values = vec4(0);
- values[0] = texelFetch(luminance[i], p + ivec2(0, 1), 0).r;
- values[1] = texelFetch(luminance[i], p + ivec2(1, 1), 0).r;
- values[2] = texelFetch(luminance[i], p + ivec2(1, 0), 0).r;
- values[3] = texelFetch(luminance[i], p + ivec2(0, 0), 0).r;
+ values[0] = texelFetch(luminance, p + ivec2(0, 1), i).r;
+ values[1] = texelFetch(luminance, p + ivec2(1, 1), i).r;
+ values[2] = texelFetch(luminance, p + ivec2(1, 0), i).r;
+ values[3] = texelFetch(luminance, p + ivec2(0, 0), i).r;
float wy_0 = 0.0f;
float wy_1 = 0.0f;
@@ -128,4 +129,6 @@ void main()
*/
}
-}
\ No newline at end of file
+}
+
+#endif
\ No newline at end of file
diff --git a/include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/parameters.glsl b/include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/parameters.glsl
new file mode 100644
index 0000000000..3e3b1c89ef
--- /dev/null
+++ b/include/nbl/builtin/glsl/ext/EnvmapImportanceSampling/parameters.glsl
@@ -0,0 +1,37 @@
+#ifndef _NBL_GLSL_EXT_ENVMAP_SAMPLING_PARAMETERS_STRUCT_INCLUDED_
+#define _NBL_GLSL_EXT_ENVMAP_SAMPLING_PARAMETERS_STRUCT_INCLUDED_
+
+#ifdef __cplusplus
+ #define uint uint32_t
+ struct uvec2
+ {
+ uint x,y;
+ };
+ struct vec2
+ {
+ float x,y;
+ };
+ struct vec3
+ {
+ float x,y,z;
+ };
+ #define vec4 nbl::core::vectorSIMDf
+ #define mat4 nbl::core::matrix4SIMD
+ #define mat4x3 nbl::core::matrix3x4SIMD
+#endif
+
+struct LumaMipMapGenShaderData_t
+{
+ vec4 luminanceScales;
+ uint calcLuma;
+ uint sinFactor;
+ vec2 padding;
+};
+
+struct WarpMapGenShaderData_t
+{
+ uint lumaMipCount;
+ vec3 padding;
+};
+
+#endif
diff --git a/include/nbl/builtin/glsl/sampling/envmap.glsl b/include/nbl/builtin/glsl/sampling/envmap.glsl
index 8aea359be7..6f68673761 100644
--- a/include/nbl/builtin/glsl/sampling/envmap.glsl
+++ b/include/nbl/builtin/glsl/sampling/envmap.glsl
@@ -3,7 +3,7 @@
#include
-vec2 nbl_glsl_sampling_generateUVCoordFromDirection(vec3 v)
+vec2 nbl_glsl_sampling_envmap_generateUVCoordFromDirection(vec3 v)
{
vec2 uv = vec2(atan(v.z, v.x), acos(v.y));
uv.x *= nbl_glsl_RECIPROCAL_PI*0.5;
@@ -12,4 +12,13 @@ vec2 nbl_glsl_sampling_generateUVCoordFromDirection(vec3 v)
return uv;
}
+vec3 nbl_glsl_sampling_envmap_generateDirectionFromUVCoord(in vec2 uv, out float sinTheta)
+{
+ vec3 dir;
+ nbl_glsl_sincos((uv.x-0.5)*2.f*nbl_glsl_PI,dir.y,dir.x);
+ nbl_glsl_sincos(uv.y*nbl_glsl_PI,sinTheta,dir.z);
+ dir.xy *= sinTheta;
+ return dir;
+}
+
#endif
diff --git a/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h b/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h
new file mode 100644
index 0000000000..24c09998c8
--- /dev/null
+++ b/include/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h
@@ -0,0 +1,84 @@
+// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#ifndef _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_
+#define _NBL_EXT_ENVMAP_IMPORTANCE_SAMPLING_INCLUDED_
+
+#include "nabla.h"
+#include "nbl/video/IGPUShader.h"
+#include "nbl/asset/ICPUShader.h"
+
+namespace nbl
+{
+namespace ext
+{
+namespace EnvmapImportanceSampling
+{
+
+#include "nbl/builtin/glsl/ext/EnvmapImportanceSampling/parameters.glsl"
+#undef uint
+#undef vec4
+#undef mat4
+#undef mat4x3
+
+class EnvmapImportanceSampling final : public core::IReferenceCounted
+{
+ public:
+ EnvmapImportanceSampling(nbl::video::IVideoDriver* _driver);
+
+ ~EnvmapImportanceSampling() {}
+
+ static constexpr uint32_t MaxMipCountLuminance = 13u;
+ static constexpr uint32_t DefaultLumaMipMapGenWorkgroupDimension = 16u;
+ static constexpr uint32_t DefaultWarpMapGenWorkgroupDimension = 16u;
+
+ void initResources(
+ nbl::core::smart_refctd_ptr envmap,
+ uint32_t lumaMipMapGenWorkgroupDimension = DefaultLumaMipMapGenWorkgroupDimension,
+ uint32_t warpMapGenWorkgroupDimension = DefaultWarpMapGenWorkgroupDimension);
+
+ void deinitResources();
+
+ // returns if RIS should be enabled based on variance calculations
+ bool computeWarpMap(float envMapRegularizationFactor);
+
+ nbl::core::smart_refctd_ptr getLuminanceImageView() { return m_luminanceBaseImageView; }
+ nbl::core::smart_refctd_ptr getWarpMapImageView() { return m_warpMap; }
+
+ nbl::core::smart_refctd_ptr m_luminanceBaseImageView;
+ nbl::core::smart_refctd_ptr m_warpMap; // Warps Sample based on EnvMap Luminance
+ private:
+
+ nbl::core::smart_refctd_ptr createTexture(uint32_t width, uint32_t height, nbl::asset::E_FORMAT format, uint32_t mipLevels = 1u, uint32_t layers = 1u);
+
+ uint32_t m_mipCountEnvmap;
+ uint32_t m_mipCountLuminance;
+ uint32_t m_lumaMipMapGenWorkgroupDimension;
+ uint32_t m_warpMapGenWorkgroupDimension;
+
+ nbl::core::smart_refctd_ptr m_luminanceMipMaps[MaxMipCountLuminance];
+ uint32_t m_lumaWorkGroups[2];
+ nbl::core::smart_refctd_ptr m_lumaDSLayout;
+ nbl::core::smart_refctd_ptr m_lumaDS[MaxMipCountLuminance - 1];
+ nbl::core::smart_refctd_ptr m_lumaPipelineLayout;
+ nbl::core::smart_refctd_ptr m_lumaGPUShader;
+ nbl::core::smart_refctd_ptr m_lumaPipeline;
+
+ // Shader and Resources for EnvironmentalMap Sample Warping
+
+ nbl::core::smart_refctd_ptr m_warpDSLayout;
+ nbl::core::smart_refctd_ptr m_warpDS;
+ nbl::core::smart_refctd_ptr m_warpPipelineLayout;
+ nbl::core::smart_refctd_ptr m_warpGPUShader;
+ nbl::core::smart_refctd_ptr m_warpPipeline;
+
+ nbl::video::IVideoDriver* m_driver;
+};
+
+
+}
+}
+}
+
+#endif
diff --git a/include/nbl/ext/MitsubaLoader/CElementFilm.h b/include/nbl/ext/MitsubaLoader/CElementFilm.h
index f28d893f20..2a1ff38d14 100644
--- a/include/nbl/ext/MitsubaLoader/CElementFilm.h
+++ b/include/nbl/ext/MitsubaLoader/CElementFilm.h
@@ -141,7 +141,7 @@ class CElementFilm : public IElement
float denoiserBloomIntensity = 0.0f;
_NBL_STATIC_INLINE_CONSTEXPR size_t MaxTonemapperArgsLen = 128;
char denoiserTonemapperArgs[MaxTonemapperArgsLen+1] = {0};
- float envmapRegularizationFactor = 0.0f; // 1.0f means based envmap luminance, 0.0f means uniform
+ float envmapRegularizationFactor = 0.5f; // 1.0f means based envmap luminance, 0.0f means uniform
};
diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt
index be6a25dd42..b204878a51 100644
--- a/src/nbl/builtin/CMakeLists.txt
+++ b/src/nbl/builtin/CMakeLists.txt
@@ -143,6 +143,9 @@ set(nbl_resources_to_embed
"nbl/builtin/glsl/workgroup/vote.glsl"
# ext shouldn't be built into the engine, but there's no harm including some non-dynamic GLSL source to make life easier
#"nbl/builtin/glsl/ext/.glsl"
+ "nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_luma_mipmap.comp"
+ "nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_warpmap.comp"
+ "nbl/builtin/glsl/ext/EnvmapImportanceSampling/parameters.glsl"
"nbl/builtin/glsl/ext/FFT/default_compute_fft.comp"
"nbl/builtin/glsl/ext/FFT/fft.glsl"
"nbl/builtin/glsl/ext/FFT/parameters_struct.glsl"
diff --git a/src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp b/src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp
new file mode 100644
index 0000000000..bf2fa7dc45
--- /dev/null
+++ b/src/nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.cpp
@@ -0,0 +1,493 @@
+// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+
+#include "nbl/ext/EnvmapImportanceSampling/EnvmapImportanceSampling.h"
+
+#include
+
+using namespace nbl;
+using namespace nbl::asset;
+using namespace nbl::video;
+using namespace ext::EnvmapImportanceSampling;
+
+EnvmapImportanceSampling::EnvmapImportanceSampling(IVideoDriver* _driver) : m_driver(_driver)
+{
+}
+
+void getEnvmapResolutionFromMipLevel(uint32_t level, uint32_t& outWidth, uint32_t& outHeight)
+{
+ const uint32_t resolution = 0x1u<<(level);
+ outWidth = std::max(resolution, 1u);
+ outHeight = std::max(resolution/2u, 1u);
+}
+
+core::smart_refctd_ptr EnvmapImportanceSampling::createTexture(uint32_t width, uint32_t height, E_FORMAT format, uint32_t mipLevels, uint32_t layers)
+{
+ const auto real_layers = layers ? layers:1u;
+
+ IGPUImage::SCreationParams imgparams;
+ imgparams.extent = {width, height, 1u};
+ imgparams.arrayLayers = real_layers;
+ imgparams.flags = static_cast(0);
+ imgparams.format = format;
+ imgparams.mipLevels = mipLevels;
+ imgparams.samples = IImage::ESCF_1_BIT;
+ imgparams.type = IImage::ET_2D;
+
+ IGPUImageView::SCreationParams viewparams;
+ viewparams.flags = static_cast(0);
+ viewparams.format = format;
+ viewparams.image = m_driver->createDeviceLocalGPUImageOnDedMem(std::move(imgparams));
+ viewparams.viewType = layers ? IGPUImageView::ET_2D_ARRAY:IGPUImageView::ET_2D;
+ viewparams.subresourceRange.aspectMask = static_cast(0);
+ viewparams.subresourceRange.baseArrayLayer = 0u;
+ viewparams.subresourceRange.layerCount = real_layers;
+ viewparams.subresourceRange.baseMipLevel = 0u;
+ viewparams.subresourceRange.levelCount = mipLevels;
+
+ return m_driver->createGPUImageView(std::move(viewparams));
+}
+
+void EnvmapImportanceSampling::initResources(core::smart_refctd_ptr envmap, uint32_t lumaMipMapGenWorkgroupDimension, uint32_t warpMapGenWorkgroupDimension)
+{
+ const uint32_t MipCountEnvMap = envmap->getCreationParameters().subresourceRange.levelCount;
+ const uint32_t MipCountLuminance = MipCountEnvMap;
+
+ m_lumaMipMapGenWorkgroupDimension = lumaMipMapGenWorkgroupDimension;
+ m_warpMapGenWorkgroupDimension = warpMapGenWorkgroupDimension;
+ m_mipCountLuminance = MipCountLuminance;
+ m_mipCountEnvmap = MipCountEnvMap;
+
+ {
+ uint32_t width, height = 0u;
+ getEnvmapResolutionFromMipLevel(MipCountLuminance - 1, width, height);
+ m_luminanceBaseImageView = createTexture(width, height, EF_R32_SFLOAT, MipCountLuminance);
+ assert(m_luminanceBaseImageView);
+
+ m_luminanceMipMaps[0] = m_luminanceBaseImageView;
+ for(uint32_t i = 1; i < MipCountLuminance; ++i)
+ {
+ IGPUImageView::SCreationParams viewCreateParams = m_luminanceBaseImageView->getCreationParameters();
+ viewCreateParams.subresourceRange.baseMipLevel = i;
+ viewCreateParams.subresourceRange.levelCount = 1u;
+
+ m_luminanceMipMaps[i] = m_driver->createGPUImageView(std::move(viewCreateParams));
+ }
+ }
+
+ {
+ uint32_t width, height = 0u;
+ getEnvmapResolutionFromMipLevel(m_mipCountEnvmap - 1, width, height);
+ m_warpMap = createTexture(width, height, EF_R32G32_SFLOAT);
+ }
+
+ ISampler::SParams samplerParams;
+ samplerParams.TextureWrapU = samplerParams.TextureWrapV = samplerParams.TextureWrapW = ISampler::ETC_CLAMP_TO_EDGE;
+ samplerParams.MinFilter = samplerParams.MaxFilter = ISampler::ETF_NEAREST;
+ samplerParams.MipmapMode = ISampler::ESMM_NEAREST;
+ samplerParams.AnisotropicFilter = 0u;
+ samplerParams.CompareEnable = false;
+ auto nearestSampler = m_driver->createGPUSampler(samplerParams);
+
+ // Create DescriptorLayout
+ {
+ {
+ constexpr auto lumaDescriptorCount = 3u;
+ IGPUDescriptorSetLayout::SBinding bindings[lumaDescriptorCount];
+ bindings[0].binding = 0u;
+ bindings[0].type = asset::EDT_COMBINED_IMAGE_SAMPLER;
+ bindings[0].stageFlags = ISpecializedShader::ESS_COMPUTE;
+ bindings[0].count = 1u;
+ bindings[0].samplers = &nearestSampler;
+
+ bindings[1].binding = 1u;
+ bindings[1].type = asset::EDT_STORAGE_IMAGE;
+ bindings[1].stageFlags = ISpecializedShader::ESS_COMPUTE;
+ bindings[1].count = 1u;
+
+ bindings[2].binding = 2u;
+ bindings[2].type = asset::EDT_STORAGE_IMAGE;
+ bindings[2].stageFlags = ISpecializedShader::ESS_COMPUTE;
+ bindings[2].count = 1u;
+
+ m_lumaDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+lumaDescriptorCount);
+ }
+
+ {
+
+ ISampler::SParams lumaSamplerParams;
+ lumaSamplerParams.TextureWrapU = lumaSamplerParams.TextureWrapV = lumaSamplerParams.TextureWrapW = ISampler::ETC_CLAMP_TO_BORDER;
+ lumaSamplerParams.BorderColor = ISampler::ETBC_FLOAT_OPAQUE_BLACK;
+ lumaSamplerParams.MinFilter = samplerParams.MaxFilter = ISampler::ETF_NEAREST;
+ lumaSamplerParams.MipmapMode = ISampler::ESMM_NEAREST;
+ lumaSamplerParams.AnisotropicFilter = 0u;
+ lumaSamplerParams.CompareEnable = false;
+ auto lumaSampler = m_driver->createGPUSampler(lumaSamplerParams);
+
+ core::smart_refctd_ptr samplers[MaxMipCountLuminance];
+ for(uint32_t i = 0u; i < MipCountLuminance; ++i)
+ samplers[i] = lumaSampler;
+
+ constexpr auto warpDescriptorCount = 2u;
+ IGPUDescriptorSetLayout::SBinding bindings[warpDescriptorCount];
+ bindings[0].binding = 0u;
+ bindings[0].type = asset::EDT_COMBINED_IMAGE_SAMPLER;
+ bindings[0].stageFlags = ISpecializedShader::ESS_COMPUTE;
+ bindings[0].count = MipCountLuminance;
+ bindings[0].samplers = samplers;
+
+ bindings[1].binding = 1u;
+ bindings[1].type = asset::EDT_STORAGE_IMAGE;
+ bindings[1].stageFlags = ISpecializedShader::ESS_COMPUTE;
+ bindings[1].count = 1u;
+
+ m_warpDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+warpDescriptorCount);
+ }
+ }
+
+ {
+ {
+ SPushConstantRange range{ISpecializedShader::ESS_COMPUTE,0u,sizeof(LumaMipMapGenShaderData_t)};
+ m_lumaPipelineLayout = m_driver->createGPUPipelineLayout(&range,&range+1u,core::smart_refctd_ptr(m_lumaDSLayout));
+
+ for(uint32_t i = 0u; i < MipCountLuminance - 1; ++i)
+ m_lumaDS[i] = m_driver->createGPUDescriptorSet(core::smart_refctd_ptr(m_lumaDSLayout));
+
+ for(uint32_t i = 0u; i < MipCountLuminance - 1; ++i)
+ {
+ const uint32_t src = i;
+ const uint32_t dst = i + 1;
+
+ IGPUDescriptorSet::SDescriptorInfo envMapDescriptorInfo = {};
+ envMapDescriptorInfo.desc = envmap;
+ envMapDescriptorInfo.image.sampler = nullptr;
+ envMapDescriptorInfo.image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL;
+
+ IGPUDescriptorSet::SDescriptorInfo srcMipDescriptorInfo = {};
+ srcMipDescriptorInfo.desc = m_luminanceMipMaps[src];
+ srcMipDescriptorInfo.image.sampler = nullptr;
+ srcMipDescriptorInfo.image.imageLayout = asset::EIL_GENERAL;
+
+ IGPUDescriptorSet::SDescriptorInfo dstMipDescriptorInfo = {};
+ dstMipDescriptorInfo.desc = m_luminanceMipMaps[dst];
+ dstMipDescriptorInfo.image.sampler = nullptr;
+ dstMipDescriptorInfo.image.imageLayout = asset::EIL_GENERAL;
+
+ IGPUDescriptorSet::SWriteDescriptorSet writes[3u];
+ writes[0].binding = 0u;
+ writes[0].arrayElement = 0u;
+ writes[0].count = 1u;
+ writes[0].descriptorType = EDT_COMBINED_IMAGE_SAMPLER;
+ writes[0].dstSet = m_lumaDS[i].get();
+ writes[0].info = &envMapDescriptorInfo;
+
+ writes[1].binding = 1u;
+ writes[1].arrayElement = 0u;
+ writes[1].count = 1u;
+ writes[1].descriptorType = EDT_STORAGE_IMAGE;
+ writes[1].dstSet = m_lumaDS[i].get();
+ writes[1].info = &srcMipDescriptorInfo;
+
+ writes[2].binding = 2u;
+ writes[2].arrayElement = 0u;
+ writes[2].count = 1u;
+ writes[2].descriptorType = EDT_STORAGE_IMAGE;
+ writes[2].dstSet = m_lumaDS[i].get();
+ writes[2].info = &dstMipDescriptorInfo;
+
+ m_driver->updateDescriptorSets(3u,writes,0u,nullptr);
+ }
+ }
+
+ {
+
+ SPushConstantRange range{ISpecializedShader::ESS_COMPUTE,0u,sizeof(WarpMapGenShaderData_t)};
+ m_warpPipelineLayout = m_driver->createGPUPipelineLayout(&range,&range+1u,core::smart_refctd_ptr(m_warpDSLayout));
+
+ m_warpDS = m_driver->createGPUDescriptorSet(core::smart_refctd_ptr(m_warpDSLayout));
+
+ IGPUDescriptorSet::SDescriptorInfo luminanceDescriptorInfo = {};
+ luminanceDescriptorInfo.desc = m_luminanceBaseImageView;
+ luminanceDescriptorInfo.image.sampler = nullptr;
+ luminanceDescriptorInfo.image.imageLayout = asset::EIL_SHADER_READ_ONLY_OPTIMAL;
+
+ IGPUDescriptorSet::SDescriptorInfo warpMapDescriptorInfo = {};
+ warpMapDescriptorInfo.desc = m_warpMap;
+ warpMapDescriptorInfo.image.sampler = nullptr;
+ warpMapDescriptorInfo.image.imageLayout = asset::EIL_GENERAL;
+
+ IGPUDescriptorSet::SWriteDescriptorSet writes[2u];
+ writes[0].binding = 0u;
+ writes[0].arrayElement = 0u;
+ writes[0].count = 1u;
+ writes[0].descriptorType = EDT_COMBINED_IMAGE_SAMPLER;
+ writes[0].dstSet = m_warpDS.get();
+ writes[0].info = &luminanceDescriptorInfo;
+
+ writes[1].binding = 1u;
+ writes[1].arrayElement = 0u;
+ writes[1].count = 1u;
+ writes[1].descriptorType = EDT_STORAGE_IMAGE;
+ writes[1].dstSet = m_warpDS.get();
+ writes[1].info = &warpMapDescriptorInfo;
+
+ m_driver->updateDescriptorSets(2u,writes,0u,nullptr);
+ }
+ }
+
+ {
+
+ const char* sourceFmt =
+R"===(#version 430 core
+
+#define LUMA_MIP_MAP_GEN_WORKGROUP_DIM %u
+#define WARP_MAP_GEN_WORKGROUP_DIM %u
+#define MAX_LUMINANCE_LEVELS %u
+
+#include "%s"
+
+)===";
+
+ {
+ const size_t extraSize = 3u*8u+128u;
+ auto lumaShader = core::make_smart_refctd_ptr(strlen(sourceFmt)+extraSize+1u);
+ snprintf(
+ reinterpret_cast(lumaShader->getPointer()),lumaShader->getSize(), sourceFmt,
+ lumaMipMapGenWorkgroupDimension,
+ warpMapGenWorkgroupDimension,
+ MipCountLuminance,
+ "nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_luma_mipmap.comp"
+ );
+
+ auto cpuSpecializedShader = core::make_smart_refctd_ptr(
+ core::make_smart_refctd_ptr(std::move(lumaShader),ICPUShader::buffer_contains_glsl),
+ ISpecializedShader::SInfo{nullptr, nullptr, "main", asset::ISpecializedShader::ESS_COMPUTE}
+ );
+
+ auto gpuShader = m_driver->createGPUShader(nbl::core::smart_refctd_ptr(cpuSpecializedShader->getUnspecialized()));
+
+ m_lumaGPUShader = m_driver->createGPUSpecializedShader(gpuShader.get(), cpuSpecializedShader->getSpecializationInfo());
+ assert(m_lumaGPUShader);
+ }
+
+ m_lumaPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_lumaPipelineLayout), core::smart_refctd_ptr(m_lumaGPUShader));
+ assert(m_lumaPipeline);
+
+ {
+ const size_t extraSize = 3u*8u+128u;
+ auto warpGenShader = core::make_smart_refctd_ptr(strlen(sourceFmt)+extraSize+1u);
+ snprintf(
+ reinterpret_cast(warpGenShader->getPointer()),warpGenShader->getSize(), sourceFmt,
+ lumaMipMapGenWorkgroupDimension,
+ warpMapGenWorkgroupDimension,
+ MipCountLuminance,
+ "nbl/builtin/glsl/ext/EnvmapImportanceSampling/gen_warpmap.comp"
+ );
+
+ auto cpuSpecializedShader = core::make_smart_refctd_ptr(
+ core::make_smart_refctd_ptr(std::move(warpGenShader),ICPUShader::buffer_contains_glsl),
+ ISpecializedShader::SInfo{nullptr, nullptr, "main", asset::ISpecializedShader::ESS_COMPUTE}
+ );
+
+ auto gpuShader = m_driver->createGPUShader(nbl::core::smart_refctd_ptr(cpuSpecializedShader->getUnspecialized()));
+
+ m_warpGPUShader = m_driver->createGPUSpecializedShader(gpuShader.get(), cpuSpecializedShader->getSpecializationInfo());
+ assert(m_warpGPUShader);
+ }
+
+ m_warpPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_warpPipelineLayout), core::smart_refctd_ptr(m_warpGPUShader));
+ assert(m_warpPipeline);
+ }
+}
+
+void EnvmapImportanceSampling::deinitResources()
+{
+ m_lumaPipeline = nullptr;
+ m_lumaGPUShader = nullptr;
+ for(uint32_t i = 0u; i < MaxMipCountLuminance - 1; ++i)
+ m_lumaDS[i] = nullptr;
+ m_lumaPipelineLayout = nullptr;
+ m_lumaDSLayout = nullptr;
+
+ for(uint32_t i = 0; i < MaxMipCountLuminance; ++i)
+ m_luminanceMipMaps[i] = nullptr;
+
+ m_warpPipeline = nullptr;
+ m_warpGPUShader = nullptr;
+ m_warpDS = nullptr;
+ m_warpPipelineLayout = nullptr;
+ m_warpDSLayout = nullptr;
+ m_warpMap = nullptr;
+}
+
+bool EnvmapImportanceSampling::computeWarpMap(float envMapRegularizationFactor)
+{
+ bool enableRIS = false;
+
+ LumaMipMapGenShaderData_t pcData = {};
+ const nbl::core::vectorSIMDf lumaScales = nbl::core::vectorSIMDf(0.2126729f, 0.7151522f, 0.0721750f, 1.0f);
+
+ m_driver->bindComputePipeline(m_lumaPipeline.get());
+
+ // Calc Luma without Sin Factor
+ {
+ pcData.luminanceScales = nbl::core::vectorSIMDf(lumaScales[0] * envMapRegularizationFactor, lumaScales[1] * envMapRegularizationFactor, lumaScales[2] * envMapRegularizationFactor, (1.0f-envMapRegularizationFactor));
+ pcData.calcLuma = 1;
+ pcData.sinFactor = 0;
+ m_driver->bindDescriptorSets(EPBP_COMPUTE,m_lumaPipeline->getLayout(),0u,1u,&m_lumaDS[0].get(),nullptr);
+
+
+ uint32_t sourceMipWidth, sourceMipHeight = 0u;
+ getEnvmapResolutionFromMipLevel(m_mipCountLuminance - 1, sourceMipWidth, sourceMipHeight);
+
+ uint32_t workGroups[2] = {
+ (sourceMipWidth-1u)/m_lumaMipMapGenWorkgroupDimension+1u,
+ (sourceMipHeight-1u)/m_lumaMipMapGenWorkgroupDimension+1u
+ };
+
+ m_driver->pushConstants(m_lumaPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData);
+ m_driver->dispatch(workGroups[0],workGroups[1],1);
+ COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT);
+ }
+
+ // Download Luma Image and caclulate Variance and new Regularization Factor
+ float variance = 0.0f;
+ {
+ uint32_t width, height = 0u;
+ getEnvmapResolutionFromMipLevel(m_mipCountLuminance - 1, width, height);
+
+ const uint32_t colorBufferBytesize = width * height * asset::getTexelOrBlockBytesize(EF_R32_SFLOAT);
+
+ auto downloadStagingArea = m_driver->getDefaultDownStreamingBuffer();
+
+ constexpr uint64_t timeoutInNanoSeconds = 300000000000u;
+ const auto waitPoint = std::chrono::high_resolution_clock::now()+std::chrono::nanoseconds(timeoutInNanoSeconds);
+
+ uint32_t address = std::remove_pointer::type::invalid_address; // remember without initializing the address to be allocated to invalid_address you won't get an allocation!
+ const uint32_t alignment = 4096u; // common page size
+ auto unallocatedSize = downloadStagingArea->multi_alloc(waitPoint, 1u, &address, &colorBufferBytesize, &alignment);
+ if (unallocatedSize)
+ {
+ os::Printer::log("Could not download the buffer from the GPU!", ELL_ERROR);
+ }
+
+ IImage::SBufferCopy copyRegion = {};
+ copyRegion.bufferOffset = address;
+ copyRegion.bufferRowLength = 0u;
+ copyRegion.bufferImageHeight = 0u;
+ //copyRegion.imageSubresource.aspectMask = wait for Vulkan;
+ copyRegion.imageSubresource.mipLevel = 0u;
+ copyRegion.imageSubresource.baseArrayLayer = 0u;
+ copyRegion.imageSubresource.layerCount = 1u;
+ copyRegion.imageOffset = { 0u,0u,0u };
+ copyRegion.imageExtent = { width, height, 1u };
+
+ auto luminanceGPUImage = m_luminanceMipMaps[0].get()->getCreationParameters().image.get();
+ m_driver->copyImageToBuffer(luminanceGPUImage, downloadStagingArea->getBuffer(), 1, ©Region);
+
+ auto downloadFence = m_driver->placeFence(true);
+
+ auto* data = reinterpret_cast(downloadStagingArea->getBufferPointer()) + address;
+
+ // wait for download fence and then invalidate the CPU cache
+ {
+ auto result = downloadFence->waitCPU(timeoutInNanoSeconds,true);
+ if (result==E_DRIVER_FENCE_RETVAL::EDFR_TIMEOUT_EXPIRED||result==E_DRIVER_FENCE_RETVAL::EDFR_FAIL)
+ {
+ os::Printer::log("Could not download the buffer from the GPU, fence not signalled!", ELL_ERROR);
+ downloadStagingArea->multi_free(1u, &address, &colorBufferBytesize, nullptr);
+ }
+ if (downloadStagingArea->needsManualFlushOrInvalidate())
+ m_driver->invalidateMappedMemoryRanges({{downloadStagingArea->getBuffer()->getBoundMemory(),address,colorBufferBytesize}});
+ }
+
+ float* fltData = reinterpret_cast(data);
+ float avg_x2 = 0.0f;
+ float avg_x = 0.0f;
+ for(uint32_t i = 0; i < width * height; ++i)
+ {
+ const float x = fltData[i];
+ const float x2 = x*x;
+ const float n = float(i + 1);
+ avg_x = avg_x + (x-avg_x)/(n);
+ avg_x2 = avg_x2 + (x2-avg_x2)/(n);
+ }
+
+ variance = avg_x2 - avg_x * avg_x; // V[x] = E[X^2]-E[X]^2
+ std::cout << "Final Luminance Variance = " << variance << std::endl;
+
+ downloadStagingArea->multi_free(1u, &address, &colorBufferBytesize, nullptr);
+ }
+
+ float regularizationFactor = envMapRegularizationFactor*(1.0f-1.0f/(1.0f+variance));
+ std::cout << "New Regularization Factor based on Variance = " << regularizationFactor << std::endl;
+ constexpr float varianceThreshold = 0.001f;
+ enableRIS = (variance >= varianceThreshold);
+
+ // Calc Luma again with Sin Factor and new Regularization Factor
+ {
+ pcData.luminanceScales = nbl::core::vectorSIMDf(lumaScales[0] * regularizationFactor, lumaScales[1] * regularizationFactor, lumaScales[2] * regularizationFactor, (1.0f-regularizationFactor));
+ pcData.calcLuma = 1;
+ pcData.sinFactor = 1;
+
+ m_driver->bindDescriptorSets(EPBP_COMPUTE,m_lumaPipeline->getLayout(),0u,1u,&m_lumaDS[0].get(),nullptr);
+
+ uint32_t sourceMipWidth, sourceMipHeight = 0u;
+ getEnvmapResolutionFromMipLevel(m_mipCountLuminance - 1, sourceMipWidth, sourceMipHeight);
+
+ uint32_t workGroups[2] = {
+ (sourceMipWidth-1u)/m_lumaMipMapGenWorkgroupDimension+1u,
+ (sourceMipHeight-1u)/m_lumaMipMapGenWorkgroupDimension+1u
+ };
+
+ m_driver->pushConstants(m_lumaPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData);
+ m_driver->dispatch(workGroups[0],workGroups[1],1);
+ COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT|GL_SHADER_STORAGE_BARRIER_BIT);
+ }
+
+ // Calc Mipmaps
+ for(uint32_t s = 0; s < m_mipCountLuminance - 1; ++s)
+ {
+ m_driver->bindDescriptorSets(EPBP_COMPUTE,m_lumaPipeline->getLayout(),0u,1u,&m_lumaDS[s].get(),nullptr);
+
+ uint32_t sourceMipWidth, sourceMipHeight = 0u;
+ getEnvmapResolutionFromMipLevel(m_mipCountLuminance - 1 - s, sourceMipWidth, sourceMipHeight);
+
+ uint32_t workGroups[2] = {
+ (sourceMipWidth-1u)/m_lumaMipMapGenWorkgroupDimension+1u,
+ (sourceMipHeight-1u)/m_lumaMipMapGenWorkgroupDimension+1u
+ };
+
+ pcData.calcLuma = 0;
+ m_driver->pushConstants(m_lumaPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(pcData),&pcData);
+ m_driver->dispatch(workGroups[0],workGroups[1],1);
+ COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT);
+ }
+
+ // Generate WarpMap
+ {
+ m_driver->bindComputePipeline(m_warpPipeline.get());
+
+ WarpMapGenShaderData_t warpPcData = {};
+ warpPcData.lumaMipCount = m_mipCountLuminance;
+
+ m_driver->bindDescriptorSets(EPBP_COMPUTE,m_warpPipeline->getLayout(),0u,1u,&m_warpDS.get(),nullptr);
+
+ uint32_t warpMapWidth, warpMapHeight = 0u;
+ getEnvmapResolutionFromMipLevel(m_mipCountEnvmap - 1, warpMapWidth, warpMapHeight);
+
+ uint32_t workGroups[2] = {
+ (warpMapWidth-1u)/m_warpMapGenWorkgroupDimension+1u,
+ (warpMapHeight-1u)/m_warpMapGenWorkgroupDimension+1u
+ };
+
+ m_driver->pushConstants(m_warpPipeline->getLayout(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(warpPcData),&warpPcData);
+ m_driver->dispatch(workGroups[0],workGroups[1],1);
+ COpenGLExtensionHandler::pGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT|GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_TEXTURE_UPDATE_BARRIER_BIT);
+ }
+
+ return enableRIS;
+}
+
+