Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Main minimal stage #64

Merged
merged 9 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions omm-sdk/include/omm.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
#include <stddef.h>

#define OMM_VERSION_MAJOR 1
#define OMM_VERSION_MINOR 5
#define OMM_VERSION_BUILD 1
#define OMM_VERSION_MINOR 6
#define OMM_VERSION_BUILD 0

#define OMM_MAX_TRANSIENT_POOL_BUFFERS 8

Expand Down Expand Up @@ -420,8 +420,13 @@ typedef struct ommCpuBakeInputDesc
const ommFormat* formats;
// Determines how to promote mixed states
ommUnknownStatePromotion unknownStatePromotion;
// Determines the state of unresolvable/degenerate triangles (nan/inf or zeroa area UV-triangles)
ommSpecialIndex degenTriState;
// Determines the state of unresolvable(nan/inf UV-triangles) and disabled triangles. Note that degenerate triangles (points/lines) will be resolved correctly.
union
{
OMM_DEPRECATED_MSG("unresolvedTriState has been deprecated, please use unresolvedTriState instead")
ommSpecialIndex degenTriState;
ommSpecialIndex unresolvedTriState;
};
// Micro triangle count is 4^N, where N is the subdivision level.
// maxSubdivisionLevel level must be in range [0, 12].
// When dynamicSubdivisionScale is enabled maxSubdivisionLevel is the max subdivision level allowed.
Expand Down Expand Up @@ -466,7 +471,7 @@ inline ommCpuBakeInputDesc ommCpuBakeInputDescDefault()
v.format = ommFormat_OC1_4_State;
v.formats = NULL;
v.unknownStatePromotion = ommUnknownStatePromotion_ForceOpaque;
v.degenTriState = ommSpecialIndex_FullyUnknownOpaque;
v.unresolvedTriState = ommSpecialIndex_FullyUnknownOpaque;
v.maxSubdivisionLevel = 8;
v.subdivisionLevels = NULL;
v.maxWorkloadSize = 0xFFFFFFFFFFFFFFFF;
Expand Down
9 changes: 7 additions & 2 deletions omm-sdk/include/omm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -316,8 +316,13 @@ namespace omm
const Format* formats = nullptr;
// Determines how to promote mixed states
UnknownStatePromotion unknownStatePromotion = UnknownStatePromotion::ForceOpaque;
// Determines the state of unresolvable/degenerate triangles (nan/inf or zeroa area UV-triangles)
SpecialIndex degenTriState = SpecialIndex::FullyUnknownOpaque;
// Determines the state of unresolvable(nan/inf UV-triangles) and disabled triangles. Note that degenerate triangles (points/lines) will be resolved correctly.
union
{
OMM_DEPRECATED_MSG("degenTriState has been deprecated, please use unresolvedTriState instead")
omm::SpecialIndex degenTriState;
omm::SpecialIndex unresolvedTriState = SpecialIndex::FullyUnknownOpaque;
};
// Micro triangle count is 4^N, where N is the subdivision level.
// maxSubdivisionLevel level must be in range [0, 12].
// When dynamicSubdivisionScale is enabled maxSubdivisionLevel is the max subdivision level allowed.
Expand Down
134 changes: 101 additions & 33 deletions omm-sdk/src/bake_cpu_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ namespace Cpu
DisableLevelLineIntersection = 1u << 8,
DisableFineClassification = 1u << 9,
EnableNearDuplicateDetectionBruteForce = 1u << 10,
EnableEdgeHeuristic = 1u << 11,
};

constexpr void ValidateInternalBakeFlags()
Expand All @@ -70,7 +71,8 @@ namespace Cpu
enableAABBTesting(((uint32_t)flags& (uint32_t)BakeFlagsInternal::EnableAABBTesting) == (uint32_t)BakeFlagsInternal::EnableAABBTesting),
disableRemovePoorQualityOMM(((uint32_t)flags& (uint32_t)BakeFlagsInternal::DisableRemovePoorQualityOMM) == (uint32_t)BakeFlagsInternal::DisableRemovePoorQualityOMM),
disableLevelLineIntersection(((uint32_t)flags& (uint32_t)BakeFlagsInternal::DisableLevelLineIntersection) == (uint32_t)BakeFlagsInternal::DisableLevelLineIntersection),
disableFineClassification(((uint32_t)flags& (uint32_t)BakeFlagsInternal::DisableFineClassification) == (uint32_t)BakeFlagsInternal::DisableFineClassification)
disableFineClassification(((uint32_t)flags& (uint32_t)BakeFlagsInternal::DisableFineClassification) == (uint32_t)BakeFlagsInternal::DisableFineClassification),
enableEdgeHeuristic(((uint32_t)flags& (uint32_t)BakeFlagsInternal::EnableEdgeHeuristic) == (uint32_t)BakeFlagsInternal::EnableEdgeHeuristic)
{ }
const bool enableInternalThreads;
const bool disableSpecialIndices;
Expand All @@ -82,6 +84,7 @@ namespace Cpu
const bool disableRemovePoorQualityOMM;
const bool disableLevelLineIntersection;
const bool disableFineClassification;
const bool enableEdgeHeuristic;
};

BakerImpl::~BakerImpl()
Expand Down Expand Up @@ -411,7 +414,7 @@ namespace Cpu
return GetArea2D(uvTri.p0, uvTri.p1, uvTri.p2);
};

static const uint32_t CalculateSuitableSubdivisionLevel(const ommCpuBakeInputDesc& desc, const Triangle& uvTri, uint2 texSize)
static const uint32_t ComputeAreaHeuristic(const ommCpuBakeInputDesc& desc, const Triangle& uvTri, uint2 texSize)
{
auto GetNextPow2 = [](uint v)->uint
{
Expand Down Expand Up @@ -442,7 +445,6 @@ namespace Cpu
// Solves the following eqn:
// targetPixelArea / (4^N) = pixelUvArea

// Questionable heuristic... micro-triangle should cover 8x8 pixel region?
const float targetPixelArea = desc.dynamicSubdivisionScale * desc.dynamicSubdivisionScale;
const uint ratio = uint(pixelUvArea / targetPixelArea);
const uint ratioNextPow2 = GetNextPow2(ratio);
Expand All @@ -453,19 +455,38 @@ namespace Cpu
return std::min<uint>(SubdivisionLevel, desc.maxSubdivisionLevel);
}

static bool IsDegenerate(const Triangle& t)
static const uint32_t ComputeEdgeHeuristic(const ommCpuBakeInputDesc& desc, const Triangle& uvTri, uint2 texSize)
{
const bool anyNan = glm::any(glm::isnan(t.p0)) || glm::any(glm::isnan(t.p1)) || glm::any(glm::isnan(t.p2));
const bool anyInf = glm::any(glm::isinf(t.p0)) || glm::any(glm::isinf(t.p1)) || glm::any(glm::isinf(t.p2));

const float3 N = glm::cross(float3(t.p2 - t.p0, 0), float3(t.p1 - t.p0, 0));
const float N2 = N.z * N.z;
const bool bIsZeroArea = N2 < 1e-9;

return anyNan || anyInf || bIsZeroArea;
// Adapted from 3.1.1 https://fileadmin.cs.lth.se/graphics/research/papers/2024/succinct_opacity_micromaps/paper-author-version.pdf
const float2 ve0 = (float2)texSize * (uvTri.p1 - uvTri.p0);
const float2 ve1 = (float2)texSize * (uvTri.p2 - uvTri.p0);
const float2 ve2 = (float2)texSize * (uvTri.p2 - uvTri.p1);

const float le0 = glm::dot(ve0, ve0);
const float le1 = glm::dot(ve1, ve1);
const float le2 = glm::dot(ve2, ve2);

const float eMax = std::max({ le0, le1, le2 });

const float n = eMax < 1e-6 ? 0 : std::log2(eMax) / 2.f - std::log2(desc.dynamicSubdivisionScale);

const int SubdivisionLevel = (int)std::ceil(n);
return std::clamp<int>(SubdivisionLevel, 0, desc.maxSubdivisionLevel);
}

static const uint32_t CalculateSuitableSubdivisionLevel(const ommCpuBakeInputDesc& desc, const Options& options, const Triangle& uvTri, uint2 texSize)
{
if (uvTri.GetIsDegenerate() || options.enableEdgeHeuristic)
{
return ComputeEdgeHeuristic(desc, uvTri, texSize);
}
else
{
return ComputeAreaHeuristic(desc, uvTri, texSize);
}
}

static int32_t GetSubdivisionLevelForPrimitive(const ommCpuBakeInputDesc& desc, uint32_t i, const Triangle& uvTri, uint2 texSize)
static int32_t GetSubdivisionLevelForPrimitive(const ommCpuBakeInputDesc& desc, const Options& options, uint32_t i, const Triangle& uvTri, uint2 texSize)
{
if (desc.subdivisionLevels && desc.subdivisionLevels[i] <= 12)
{
Expand All @@ -477,14 +498,29 @@ namespace Cpu

if (enableDynamicSubdivisionLevel)
{
return CalculateSuitableSubdivisionLevel(desc, uvTri, texSize);
return CalculateSuitableSubdivisionLevel(desc, options, uvTri, texSize);
}
else
{
return desc.maxSubdivisionLevel;
}
}

static bool GetIsInvalid(const Options& options, const Triangle& uvTriangle)
{
if (uvTriangle.GetIsInvalid())
{
return true;
}

if (options.disableLevelLineIntersection && uvTriangle.GetIsDegenerate())
{
return true; // we only support degen triangles in level line intersection mode.
}

return false;
}

namespace impl
{
static ommResult SetupWorkItems(
Expand All @@ -506,7 +542,7 @@ namespace Cpu
{
const uint32_t texCoordStrideInBytes = desc.texCoordStrideInBytes == 0 ? GetTexCoordFormatSize(desc.texCoordFormat) : desc.texCoordStrideInBytes;

uint32_t numDegenTri = 0;
uint32_t numDisabledTri = 0;

for (int32_t i = 0; i < triangleCount; ++i)
{
Expand All @@ -515,14 +551,13 @@ namespace Cpu

const Triangle uvTri = FetchUVTriangle(desc.texCoords, texCoordStrideInBytes, desc.texCoordFormat, triangleIndices);

const int32_t subdivisionLevel = GetSubdivisionLevelForPrimitive(desc, i, uvTri, texture->GetSize(0 /*always based on mip 0*/));
const int32_t subdivisionLevel = GetSubdivisionLevelForPrimitive(desc, options, i, uvTri, texture->GetSize(0 /*always based on mip 0*/));

const bool bIsDisabled = subdivisionLevel == kDisabledPrimitive;
const bool bIsDegenerate = IsDegenerate(uvTri);

if (bIsDisabled || bIsDegenerate)

if (bIsDisabled || GetIsInvalid(options, uvTri))
{
numDegenTri++;
numDisabledTri++;
continue; // These indices will be set to special index unknown later.
}

Expand Down Expand Up @@ -556,11 +591,11 @@ namespace Cpu
}
}

if (options.enableValidation && numDegenTri != 0)
if (options.enableValidation && numDisabledTri != 0)
{
const char* specialIndex = ToString(desc.degenTriState);
log.Infof("[Info] - The workload consists of %d degenerate triangles, these will be classified as Fully Unknown Opaque (this behaviour can be changed by degenTriState).",
numDegenTri, specialIndex);
const char* specialIndex = ToString(desc.unresolvedTriState);
log.Infof("[Info] - The workload consists of %d unclassifiable triangles, these will be classified as unresolvedTriState = %s.",
numDisabledTri, specialIndex);
}
}
return ommResult_SUCCESS;
Expand Down Expand Up @@ -714,7 +749,13 @@ namespace Cpu
return ommResult_SUCCESS;
}

template<ommCpuTextureFormat eFormat, TilingMode eTilingMode, ommTextureAddressMode eTextureAddressMode, ommTextureFilterMode eFilterMode>
enum TriangleClass
{
Normal,
Degenerate
};

template<ommCpuTextureFormat eFormat, TilingMode eTilingMode, ommTextureAddressMode eTextureAddressMode, ommTextureFilterMode eFilterMode, TriangleClass eTriangleClass>
static ommResult ResampleFine(const ommCpuBakeInputDesc& desc, const Logger& log, const Options& options, vector<OmmWorkItem>& vmWorkItems)
{
if (options.enableAABBTesting && !options.disableLevelLineIntersection)
Expand All @@ -738,6 +779,17 @@ namespace Cpu
{
// Subdivide the input triangle in to smaller triangles. They will be "bird-curve" ordered.
OmmWorkItem& workItem = vmWorkItems[workItemIt];
const bool isDegenerate = workItem.uvTri.GetIsDegenerate();

if (eTriangleClass == TriangleClass::Normal && isDegenerate)
{
continue;
}

if (eTriangleClass == TriangleClass::Degenerate && !isDegenerate)
{
continue;
}

const uint32_t numMicroTriangles = omm::bird::GetNumMicroTriangles(workItem.subdivisionLevel);

Expand Down Expand Up @@ -779,8 +831,18 @@ namespace Cpu
else
vmCoverage.numBelowAlpha++;

auto kernel = &LevelLineIntersectionKernel::run<eFormat, eTextureAddressMode, eTilingMode>;
RasterizeConservativeSerialWithOffsetCoverage(subTri, rasterSize, pixelOffset, kernel, &params);

if constexpr (eTriangleClass == TriangleClass::Normal)
{
auto kernel = &LevelLineIntersectionKernel::run<eFormat, eTextureAddressMode, eTilingMode, false /*degenerate*/>;
RasterizeConservativeSerialWithOffsetCoverage(subTri, rasterSize, pixelOffset, kernel, &params);
}
else
{
auto kernel = &LevelLineIntersectionKernel::run<eFormat, eTextureAddressMode, eTilingMode, true /*degenerate*/>;
Line l(subTri.aabb_s, subTri.aabb_e);
RasterizeConservativeLineWithOffset(l, rasterSize, pixelOffset, kernel, &params);
}

OMM_ASSERT(vmCoverage.numAboveAlpha != 0 || vmCoverage.numBelowAlpha != 0);
const ommOpacityState state = GetStateFromCoverage(desc.format, desc.unknownStatePromotion, desc.alphaCutoffGreater, desc.alphaCutoffLessEqual, vmCoverage);
Expand Down Expand Up @@ -867,7 +929,7 @@ namespace Cpu

params.vmState = &vmCoverage;

auto kernel = [](int2 pixel, float3* bc, void* ctx)
auto kernel = [](int2 pixel, void* ctx)
{
KernelParams* p = (KernelParams*)ctx;

Expand Down Expand Up @@ -1513,7 +1575,7 @@ namespace Cpu
// Set special indices...
{
res.ommIndexBuffer.resize(triangleCount);
std::fill(res.ommIndexBuffer.begin(), res.ommIndexBuffer.end(), (int32_t)desc.degenTriState);
std::fill(res.ommIndexBuffer.begin(), res.ommIndexBuffer.end(), (int32_t)desc.unresolvedTriState);
for (const OmmWorkItem& vm : vmWorkItems)
{
for (uint32_t primitiveIndex : vm.primitiveIndices)
Expand Down Expand Up @@ -1564,8 +1626,12 @@ namespace Cpu
return impl::ResampleCoarse<eFormat, eTilingMode, eTextureAddressMode, eFilterMode>(desc, log, options, vmWorkItems);
};

auto impl__ResampleFine = [](const ommCpuBakeInputDesc& desc, const Logger& log, const Options& options, vector<OmmWorkItem>& vmWorkItems) {
return impl::ResampleFine<eFormat, eTilingMode, eTextureAddressMode, eFilterMode>(desc, log, options, vmWorkItems);
auto impl__ResampleFineNormal = [](const ommCpuBakeInputDesc& desc, const Logger& log, const Options& options, vector<OmmWorkItem>& vmWorkItems) {
return impl::ResampleFine<eFormat, eTilingMode, eTextureAddressMode, eFilterMode, impl::TriangleClass::Normal>(desc, log, options, vmWorkItems);
};

auto impl__ResampleFineDegen = [](const ommCpuBakeInputDesc& desc, const Logger& log, const Options& options, vector<OmmWorkItem>& vmWorkItems) {
return impl::ResampleFine<eFormat, eTilingMode, eTextureAddressMode, eFilterMode, impl::TriangleClass::Degenerate>(desc, log, options, vmWorkItems);
};

{
Expand All @@ -1577,7 +1643,9 @@ namespace Cpu

RETURN_STATUS_IF_FAILED(impl__ResampleCoarse(desc, m_log, options, vmWorkItems));

RETURN_STATUS_IF_FAILED(impl__ResampleFine(desc, m_log, options, vmWorkItems));
RETURN_STATUS_IF_FAILED(impl__ResampleFineNormal(desc, m_log, options, vmWorkItems));

RETURN_STATUS_IF_FAILED(impl__ResampleFineDegen(desc, m_log, options, vmWorkItems));

RETURN_STATUS_IF_FAILED(impl::PromoteToSpecialIndices(desc, options, vmWorkItems));

Expand All @@ -1604,4 +1672,4 @@ namespace Cpu
}

} // namespace Cpu
} // namespace omm
} // namespace omm
Loading
Loading