diff --git a/strahl-lib/src/denoise-pass-shader.ts b/strahl-lib/src/denoise-pass-shader.ts new file mode 100644 index 0000000..1773e7d --- /dev/null +++ b/strahl-lib/src/denoise-pass-shader.ts @@ -0,0 +1,25 @@ +import denoisePassShader from "./denoise-pass-shader.wgsl?raw"; + +type Params = { + imageWidth: number; + imageHeight: number; + maxWorkgroupDimension: number; + maxBvhStackDepth: number; +}; + +const PARAM_PLACEHOLDER_MAP: Record = { + imageWidth: "imageWidth", + imageHeight: "imageHeight", + maxWorkgroupDimension: "maxWorkgroupDimension", + maxBvhStackDepth: "maxBvhStackDepth", +}; + +export default function build(params: Params) { + const placeholders = Object.entries(PARAM_PLACEHOLDER_MAP) as [ + keyof Params, + string, + ][]; + return placeholders.reduce((aggregate, [key, value]) => { + return aggregate.replaceAll(`\${${value}}`, `${params[key]}`); + }, denoisePassShader); +} diff --git a/strahl-lib/src/denoise-pass-shader.wgsl b/strahl-lib/src/denoise-pass-shader.wgsl new file mode 100644 index 0000000..7ecb4d9 --- /dev/null +++ b/strahl-lib/src/denoise-pass-shader.wgsl @@ -0,0 +1,1312 @@ +alias Color = vec3f; + +struct Material { + baseWeight: f32, + baseColor: Color, + // todo: switch order of baseDiffuseRoughness and baseMetalness + baseDiffuseRoughness: f32, + baseMetalness: f32, + specularWeight: f32, + specularColor: Color, + specularRoughness: f32, + specularAnisotropy: f32, + specularRotation: f32, + specularIor: f32, + coatWeight: f32, + coatColor: Color, + coatRoughness: f32, + coatRoughnessAnisotropy: f32, + coatIor: f32, + coatDarkening: f32, + emissionLuminance: f32, + emissionColor: Color, + thinFilmThickness: f32, + thinFilmIOR: f32, +} + +struct UniformData { + invProjectionMatrix: mat4x4, + cameraWorldMatrix: mat4x4, + invModelMatrix: mat4x4, + seedOffset: u32, + priorSamples: u32, + samplesPerPixel: u32, + sunDirection: vec3f, + skyPower: f32, + skyColor: Color, + sunPower: f32, + sunAngularSize: f32, + sunColor: Color, + clearColor: Color, + // bool is not supported in uniform + enableClearColor: i32, + maxRayDepth: i32, + objectDefinitionLength: i32, + // 0 -> normal + // 1 -> albedo + mode: i32, +} + +// Use due to 16 bytes alignment of vec3 +struct IndicesPackage { + x: i32, + y: i32, + z: i32, +} + +// CODE#BUFFER-BINDINGS +@group(0) @binding(0) var positions: array>; +// todo: Check when i16 is supported +@group(0) @binding(1) var indices: array; + +@group(0) @binding(2) var bounds: array>; +@group(0) @binding(3) var contents: array; + +@group(0) @binding(5) var indirectIndices: array; + +@group(0) @binding(6) var objectDefinitions: array; + +@group(0) @binding(7) var materials: array; + +@group(1) @binding(0) var texture: texture_storage_2d; + +@group(1) @binding(1) var readTexture: texture_storage_2d; + +@group(1) @binding(2) var uniformData: UniformData; + +@group(1) @binding(3) var hdrColor: array; + +const MINIMUM_FLOAT_EPSILON = 1e-8; +const FLT_EPSILON = 1.1920929e-7; +const PI = 3.1415926535897932; +const PI_INVERSE = 1.0 / PI; + +struct Ray { + origin: vec3, + direction: vec3, +}; + +struct ObjectDefinition { + start: u32, + count: u32, + material: MaterialDefinition, +} + +struct MaterialDefinition { + index: u32, +} + +struct HitRecord { + point: vec3, + normal: vec3, + t: f32, + frontFace: bool, + material: MaterialDefinition, +} + +struct Triangle { + Q: vec3, + u: vec3, + v: vec3, + material: MaterialDefinition, + normal0: vec3, + normal1: vec3, + normal2: vec3, +} + +struct Interval { + min: f32, + max: f32, +} + +struct BinaryBvhNodeInfo { + // 0-16: isLeaf, 17-31: splitAxis|triangleCount + x: u32, + // rightIndex|triangleOffset + y: u32, +} + +fn nearZero(v: vec3f) -> bool { + let epsilon = vec3f(MINIMUM_FLOAT_EPSILON); + return any(abs(v) < epsilon); +} + +fn sqr(x: f32) -> f32 { + return x * x; +} + +fn maxVec3(v: vec3f) -> f32 { + return max(v.x, max(v.y, v.z)); +} + +fn ggxNDF(H: vec3f, alpha: vec2) -> f32 { + let safeAlpha = clamp(alpha, vec2(DENOM_TOLERANCE, DENOM_TOLERANCE), vec2(1.0, 1.0)); + let Ddenom = PI * safeAlpha.x * safeAlpha.y * sqr(sqr(H.x/safeAlpha.x) + sqr(H.y/safeAlpha.y) + sqr(H.z)); + return 1.0 / max(Ddenom, DENOM_TOLERANCE); +} + +// GGX NDF sampling routine, as described in +// "Sampling Visible GGX Normals with Spherical Caps", Dupuy et al., HPG 2023. +// NB, this assumes wiL is in the +z hemisphere, and returns a sampled micronormal in that hemisphere. +fn ggxNDFSample(wiL: vec3f, alpha: vec2, seed: ptr) -> vec3f { + let Xi = vec2f(randomF32(seed), randomF32(seed)); + var V = wiL; + + V = normalize(vec3f(V.xy * alpha, V.z)); + + let phi = 2.0 * PI * Xi.x; + let z = (1.0 - Xi.y) * (1.0 + V.z) - V.z; + let sinTheta = sqrt(clamp(1.0 - z * z, 0.0, 1.0)); + let x = sinTheta * cos(phi); + let y = sinTheta * sin(phi); + let c = vec3f(x, y, z); + + var H = c + V; + + H = normalize(vec3f(H.xy * alpha, H.z)); + + return H; +} + +fn ggxNDFEval(m: vec3f, alpha: vec2f) -> f32 { + let ax = max(alpha.x, DENOM_TOLERANCE); + let ay = max(alpha.y, DENOM_TOLERANCE); + let Ddenom = PI * ax * ay * sqr(sqr(m.x/ax) + sqr(m.y/ay) + sqr(m.z)); + return 1.0 / max(Ddenom, DENOM_TOLERANCE); +} + +fn ggxLambda(w: vec3f, alpha: vec2f) -> f32 { + if (abs(w.z) < FLT_EPSILON) { + return 0.0; + } + return (-1.0 + sqrt(1.0 + (sqr(alpha.x*w.x) + sqr(alpha.y*w.y))/sqr(w.z))) / 2.0; +} + +fn ggxG1(w: vec3f, alpha: vec2f) -> f32 { + return 1.0 / (1.0 + ggxLambda(w, alpha)); +} + +fn ggxG2(woL: vec3f, wiL: vec3f, alpha: vec2f) -> f32 { + return 1.0 / (1.0 + ggxLambda(woL, alpha) + ggxLambda(wiL, alpha)); +} + +struct BsdfResponse { + response: vec3f, + throughput: vec3f, + thickness: f32, + ior: f32, +} + +struct FresnelData { + model: i32, + ior: vec3f, + extinction: vec3f, + F0: vec3f, + F90: vec3f, + exponent: f32, + thinFilmThickness: f32, + thinFilmIOR: f32, + refraction: bool, +} + +const FRESNEL_MODEL_SCHLICK = 2; + +fn initFresnelSchlick(F0: vec3f, F90: vec3f, exponent: f32) -> FresnelData { + return FresnelData( + FRESNEL_MODEL_SCHLICK, + vec3f(0.0), + vec3f(0.0), + F0, + F90, + exponent, + 0.0, + 0.0, + false + ); +} + +fn fresnelSchlick(cosTheta: f32, F0: vec3f, F90: vec3f, exponent: f32) -> vec3f { + let x = clamp(1.0 - cosTheta, 0.0, 1.0); + return mix(F0, F90, pow(x, exponent)); +} + +fn fresnelSchlickV2(F0: vec3f, mu: f32) -> vec3f { + return F0 + pow(1.0 - mu, 5.0) * (vec3f(1.0) - F0); +} + +fn computeFresnel(cosTheta: f32, fd: FresnelData) -> vec3f { + // todo: implement other models (dielectric, conductor, airy) + if (fd.model == FRESNEL_MODEL_SCHLICK) { + return fresnelSchlick(cosTheta, fd.F0, fd.F90, fd.exponent); + } + + return vec3f(0.0); +} + +fn fresnelF82Tint(mu: f32, F0: vec3f, f82Tint: vec3f) -> vec3f { + let muBar = 1.0/7.0; + let denom = muBar * pow(1.0 - muBar, 6); + let fSchlickBar = fresnelSchlickV2(F0, muBar); + let fSchlick = fresnelSchlickV2(F0, mu); + return fSchlick - mu * pow(1.0 - mu, 6.0) * (vec3f(1.0) - f82Tint) * fSchlickBar / denom; +} + + +fn rayAt(ray: Ray, t: f32) -> vec3 { + return ray.origin + t * ray.direction; +} + +fn lengthSquared(v: vec3) -> f32 { + return dot(v, v); +} + +// CODE#RNG +// See https://github.com/imneme/pcg-c/blob/83252d9c23df9c82ecb42210afed61a7b42402d7/include/pcg_variants.h#L283 +const PCG_INC = 2891336453u; +// See https://github.com/imneme/pcg-c/blob/83252d9c23df9c82ecb42210afed61a7b42402d7/include/pcg_variants.h#L278 +const PCG_MULTIPLIER = 747796405u; + +// https://www.pcg-random.org/download.html#id1 +// See https://github.com/imneme/pcg-c/blob/83252d9c23df9c82ecb42210afed61a7b42402d7/include/pcg_variants.h#L1533 +fn randomU32(seed: u32) -> u32 { + let state = seed * PCG_MULTIPLIER + PCG_INC; + let word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u; + return (word >> 22u) ^ word; +} + +const range = 1.0 / f32(0xffffffffu); + +// Generate a random float in the range [0, 1). +fn randomF32(seed: ptr) -> f32 { + *seed = randomU32(*seed); + return f32(*seed - 1u) * range; +} + +const TRIANGLE_EPSILON = 1.0e-6; + +// CODE#TRIANGLE-INTERSECTION +// Möller–Trumbore intersection algorithm without culling +fn triangleHit(triangle: Triangle, ray: Ray, rayT: Interval, hitRecord: ptr) -> bool { + let edge1 = triangle.u; + let edge2 = triangle.v; + let pvec = cross(ray.direction, edge2); + let det = dot(edge1, pvec); + // No hit if ray is parallel to the triangle (ray lies in plane of triangle) + if (det > -TRIANGLE_EPSILON && det < TRIANGLE_EPSILON) { + return false; + } + let invDet = 1.0 / det; + let tvec = ray.origin - triangle.Q; + let u = dot(tvec, pvec) * invDet; + + if (u < 0.0 || u > 1.0) { + return false; + } + + let qvec = cross(tvec, edge1); + let v = dot(ray.direction, qvec) * invDet; + + if (v < 0.0 || u + v > 1.0) { + return false; + } + + let t = dot(edge2, qvec) * invDet; + + // check if the intersection point is within the ray's interval + if (t < (rayT).min || t > (rayT).max) { + return false; + } + + (*hitRecord).t = t; + (*hitRecord).point = rayAt(ray, t); + (*hitRecord).normal = normalize(triangle.normal0 * (1.0 - u - v) + triangle.normal1 * u + triangle.normal2 * v); + + (*hitRecord).material = triangle.material; + + return true; +} + +// CODE#BVH-TESTS +// Based on https://github.com/gkjohnson/three-mesh-bvh/blob/master/src/gpu/glsl/bvh_ray_functions.glsl.js +fn intersectsBounds(ray: Ray, boundsMin: vec3f, boundsMax: vec3f, dist: ptr) -> bool { + let invDir = vec3f(1.0) / ray.direction; + + let tMinPlane = invDir * (boundsMin - ray.origin); + let tMaxPlane = invDir * (boundsMax - ray.origin); + + let tMinHit = min(tMaxPlane, tMinPlane); + let tMaxHit = max(tMaxPlane, tMinPlane); + + var t = max(tMinHit.xx, tMinHit.yz); + let t0 = max(t.x, t.y); + + t = min(tMaxHit.xx, tMaxHit.yz); + let t1 = min(t.x, t.y); + + (*dist) = max(t0, 0.0); + + return t1 >= (*dist); +} + +fn intersectsBVHNodeBounds(ray: Ray, currNodeIndex: u32, dist: ptr) -> bool { + // 2 x x,y,z + unused alpha + let boundaries = bounds[currNodeIndex]; + let boundsMin = boundaries[0]; + let boundsMax = boundaries[1]; + return intersectsBounds(ray, boundsMin.xyz, boundsMax.xyz, dist); +} + +fn intersectTriangles(offset: u32, count: u32, ray: Ray, rayT: Interval, hitRecord: ptr) -> bool { + var found = false; + var localDist = hitRecord.t; + let l = offset + count; + + for (var i = offset; i < l; i += 1) { + let indAccess = indirectIndices[i]; + let indicesPackage = indices[indAccess]; + let v1Index = indicesPackage.x; + let v2Index = indicesPackage.y; + let v3Index = indicesPackage.z; + + let v1 = positions[v1Index]; + let v2 = positions[v2Index]; + let v3 = positions[v3Index]; + let x = v1[0]; + let y = v2[0]; + let z = v3[0]; + + let normalX = v1[1]; + let normalY = v2[1]; + let normalZ = v3[1]; + + let Q = x; + let u = y - x; + let v = z - x; + + let vIndexOffset = indAccess * 3; + var matchingObjectDefinition: ObjectDefinition = objectDefinitions[0]; + for (var j = 0; j < uniformData.objectDefinitionLength ; j++) { + let objectDefinition = objectDefinitions[j]; + if (objectDefinition.start <= vIndexOffset && objectDefinition.start + objectDefinition.count > vIndexOffset) { + matchingObjectDefinition = objectDefinition; + break; + } + } + let materialDefinition = matchingObjectDefinition.material; + + let triangle = Triangle(Q, u, v, materialDefinition, normalX, normalY, normalZ); + + var tmpRecord: HitRecord; + if (triangleHit(triangle, ray, Interval(rayT.min, localDist), &tmpRecord)) { + if (localDist < tmpRecord.t) { + continue; + } + (*hitRecord) = tmpRecord; + + localDist = (*hitRecord).t; + found = true; + } + } + return found; +} + +fn hittableListHit(ray: Ray, rayT: Interval, hitRecord: ptr) -> bool { + var tempRecord: HitRecord; + var hitAnything = false; + var closestSoFar = rayT.max; + + // Inspired by https://github.com/gkjohnson/three-mesh-bvh/blob/master/src/gpu/glsl/bvh_ray_functions.glsl.js + + // BVH Intersection Detection + var sPtr = 0; + var stack: array = array(); + stack[sPtr] = 0u; + + while (sPtr > -1 && sPtr < ${maxBvhStackDepth}) { + let currNodeIndex = stack[sPtr]; + sPtr -= 1; + + var boundsHitDistance: f32; + + if (!intersectsBVHNodeBounds(ray, currNodeIndex, &boundsHitDistance) || boundsHitDistance > closestSoFar) { + continue; + } + + let boundsInfo = contents[currNodeIndex]; + let boundsInfoX = boundsInfo.x; + let boundsInfoY = boundsInfo.y; + + // CODE#BVH-NODE-ACCESS + let isLeaf = (boundsInfoX & 0xffff0000u) == 0xffff0000u; + + if (isLeaf) { + let count = boundsInfoX & 0x0000ffffu; + let offset = boundsInfoY; + + let found2 = intersectTriangles( + offset, + count, + ray, + rayT, + hitRecord + ); + if (found2) { + closestSoFar = (*hitRecord).t; + } + + hitAnything = hitAnything || found2; + } else { + // Left node is always the next node + let leftIndex = currNodeIndex + 1u; + let splitAxis = boundsInfoX & 0x0000ffffu; + let rightIndex = boundsInfoY; + + let leftToRight = ray.direction[splitAxis] > 0.0; + let c1 = select(rightIndex, leftIndex, leftToRight); + let c2 = select(leftIndex, rightIndex, leftToRight); + + sPtr += 1; + stack[sPtr] = c2; + sPtr += 1; + stack[sPtr] = c1; + } + } + + return hitAnything; +} + +struct BouncingInfo { + attenuation: Color, + emission: Color, +} + +struct Basis { + nW: vec3f, + tW: vec3f, + bW: vec3f, + baryCoords: vec3f, +} + +const DENOM_TOLERANCE = 1.0e-10; +const RADIANCE_EPSILON = 1.0e-12; + +fn safeNormalize(v: vec3f) -> vec3f { + let len = length(v); + return v/max(len, DENOM_TOLERANCE); +} + +fn normalToTangent(N: vec3f) -> vec3f { + var T: vec3f; + if (abs(N.z) < abs(N.x)) { + T = vec3f(N.z, 0.0, -N.x); + } else { + T = vec3f(0.0, N.z, -N.y); + } + return safeNormalize(T); +} + +fn makeBasis(nWI: vec3f) -> Basis { + let nW = safeNormalize(nWI); + let tW = normalToTangent(nWI); + let bW = cross(nWI, tW); + return Basis(nW, tW, bW, vec3f(0.0)); +} + +fn makeBasisFull(nW: vec3f, tW: vec3f, baryCoords: vec3f) -> Basis { + let nWo = safeNormalize(nW); + let tWo = safeNormalize(tW); + let bWo = cross(nWo, tWo); + return Basis(nWo, tWo, bWo, baryCoords); +} + +fn worldToLocal(vWorld: vec3f, basis: Basis) -> vec3f { + return vec3f(dot(vWorld, basis.tW), dot(vWorld, basis.bW), dot(vWorld, basis.nW)); +} + +fn localToWorld(vLocal: vec3f, basis: Basis) -> vec3f { + return basis.tW * vLocal.x + basis.bW * vLocal.y + basis.nW * vLocal.z; +} + +struct LocalFrameRotation { + M: mat2x2, + Minv: mat2x2, +} + +fn getLocalFrameRotation(angle: f32) -> LocalFrameRotation { + if (angle == 0.0 || angle==2*PI) { + let identity = mat2x2(1.0, 0.0, 0.0, 1.0); + return LocalFrameRotation(identity, identity); + } else { + let cosRot = cos(angle); + let sinRot = sin(angle); + let M = mat2x2(cosRot, sinRot, -sinRot, cosRot); + let Minv = mat2x2(cosRot, -sinRot, sinRot, cosRot); + return LocalFrameRotation(M, Minv); + } +} + +fn localToRotated(vLocal: vec3f, rotation: LocalFrameRotation) -> vec3f { + let xyRot = rotation.M * vLocal.xy; + return vec3f(xyRot.x, xyRot.y, vLocal.z); +} + +fn rotatedToLocal(vRotated: vec3f, rotation: LocalFrameRotation) -> vec3f { + let xyLocal = rotation.Minv * vRotated.xy; + return vec3f(xyLocal.x, xyLocal.y, vRotated.z); +} + +struct LobeWeights { + m: array, +} + +struct LobeAlbedos { + m: array, +} + +struct LobeProbs { + m: array, +} + +struct LobePDFs { + m: array, +} + +struct LobeData { + weights: LobeWeights, + albedos: LobeAlbedos, + probs: LobeProbs, +} + +// todo: implement +fn placeholderBrdfAlbedo() -> Color { + return Color(0.0, 0.0, 0.0); +} + +fn specularNDFRoughness(material: Material) -> vec2f { + let rsqr = material.specularRoughness * material.specularRoughness; + let specularAnisotropyInv = 1.0 - material.specularAnisotropy; + let alphaX = rsqr * sqrt(2.0/(1.0+(specularAnisotropyInv*specularAnisotropyInv))); + let alphaY = (1.0 - material.specularAnisotropy) * alphaX; + + let minAlpha = 1.0e-4; + return vec2f(max(alphaX, minAlpha), max(alphaY, minAlpha)); +} + +fn metalBrdfEvaluate(pW: vec3f, basis: Basis, winputL: vec3f, woutputL: vec3f, material: Material, pdfWoutputL: ptr) -> vec3f { + if (winputL.z < DENOM_TOLERANCE || woutputL.z < DENOM_TOLERANCE) { + (*pdfWoutputL) = PDF_EPSILON; + return vec3f(0.0); + } + + let rotation = getLocalFrameRotation(2*PI*material.specularRotation); + let winputR = localToRotated(winputL, rotation); + let woutputR = localToRotated(woutputL, rotation); + + let alpha = specularNDFRoughness(material); + + let mR = normalize(winputR + woutputR); + + let D = ggxNDFEval(mR, alpha); + let DV = D * ggxG1(winputR, alpha) * max(0.0, dot(winputR, mR)) / max(DENOM_TOLERANCE, winputR.z); + + let dwhDwo = 1.0 / max(abs(4.0*dot(winputR, mR)), DENOM_TOLERANCE); + (*pdfWoutputL) = max(PDF_EPSILON, DV * dwhDwo); + + let FnoFilm = fresnelF82Tint(abs(dot(winputR, mR)), material.baseWeight * material.baseColor, material.specularWeight * material.specularColor); + + // todo: thin film workflow + + let F = FnoFilm; + + let G2 = ggxG2(winputR, woutputR, alpha); + + return F * D * G2 * max(4.0*abs(woutputL.z)*abs(winputL.z), DENOM_TOLERANCE); +} + +fn metalBrdfSample(pW: vec3f, basis: Basis, winputL: vec3f, material: Material, seed: ptr, woutputL: ptr, pdfWoutputL: ptr) -> vec3f { + if (winputL.z < DENOM_TOLERANCE) { + (*pdfWoutputL) = PDF_EPSILON; + return vec3f(0.0); + } + + let alpha = specularNDFRoughness(material); + + var rotation = getLocalFrameRotation(2*PI*material.specularRotation); + var winputR = localToRotated(winputL, rotation); + + let mR = ggxNDFSample(winputR, alpha, seed); + + let woutputR = -winputR + 2.0*dot(winputR, mR)*mR; + if (winputR.z * woutputR.z < FLT_EPSILON) { + return vec3f(0.0); + } + (*woutputL) = rotatedToLocal(woutputR, rotation); + + let D = ggxNDF(mR, alpha); + let DV = D * ggxG1(winputR, alpha) * max(0.0, dot(winputR, mR)) / max(DENOM_TOLERANCE, winputR.z); // todo: should latter max term use abs for .z? + + let dwhDwo = 1.0 / max(abs(4.0*dot(winputR, mR)), DENOM_TOLERANCE); + (*pdfWoutputL) = max(PDF_EPSILON, DV * dwhDwo); + + // todo: implement thin film workflow + let F_nofilm = fresnelF82Tint(abs(dot(winputR, mR)), material.baseWeight * material.baseColor, material.specularWeight * material.specularColor); + let F = F_nofilm; + + let G2 = ggxG2(winputR, woutputR, alpha); + + return F * D * G2 / max(4.0*abs(woutputL.z)*abs(winputL.z), DENOM_TOLERANCE); +} + +fn metalBrdfAlbedo(material: Material, pW: vec3f, basis: Basis, winputL: vec3f, seed: ptr) -> Color { + if (winputL.z < DENOM_TOLERANCE) { + return vec3f(0.0); + } + + let numSamples = 1; + var albedo = vec3f(0.0); + for (var n=0; n RADIANCE_EPSILON) { + albedo += f * abs(woutputL.z) / max(PDF_EPSILON, pdfWoutputL); + } + } + + albedo /= f32(numSamples); + return albedo; +} + +fn diffuseBrdfAlbedo(material: Material, pW: vec3f, basis: Basis, winputL: vec3f, seed: ptr) -> vec3f { + if (winputL.z < DENOM_TOLERANCE) { + return vec3f(0.0); + } + return material.baseWeight * material.baseColor; +} + +// https://github.com/AcademySoftwareFoundation/MaterialX/blob/main/libraries/pbrlib/genglsl/lib/mx_microfacet_diffuse.glsl +fn fujiiMaterialX(albedo: vec3f, roughness: f32, V: vec3f, L: vec3f) -> vec3f { + let NdotV = V.z; + let NdotL = L.z; + let s = dot(L, V) - NdotV * NdotL; + let stinv = select(0.0, s / max(NdotL, NdotV), s > 0.0f); + let sigma = roughness; + let sigma2 = sqr(sigma); + let A = 1.0 - 0.5 * (sigma2 / (sigma2 + 0.33)); + let B = 0.45 * sigma2 / (sigma2 + 0.09); + return albedo * NdotL / PI * (A + B * stinv); +} + +fn diffuseBrdfEvalImplementation(woutputL: vec3f, winputL: vec3f, material: Material) -> vec3f { + let albedo = material.baseWeight * material.baseColor; + let V = winputL; + let L = woutputL; + let NdotL = max(FLT_EPSILON, abs(L.z)); + + return fujiiMaterialX(albedo, material.baseDiffuseRoughness, V, L) / NdotL; +} + +fn diffuseBrdfEvaluate(material: Material, pW: vec3f, basis: Basis, winputL: vec3f, woutputL: vec3f, pdfWoutputL: ptr) -> vec3f { + if (winputL.z < DENOM_TOLERANCE || woutputL.z < DENOM_TOLERANCE) { + return vec3f(0.0); + } + (*pdfWoutputL) = pdfHemisphereCosineWeighted(woutputL); + return diffuseBrdfEvalImplementation(winputL, woutputL, material); +} + +fn diffuseBrdfSample(material: Material, pW: vec3f, basis: Basis, winputL: vec3f, woutputL: ptr, pdfWoutputL: ptr, seed: ptr) -> vec3f { + if (winputL.z < DENOM_TOLERANCE) { + return vec3f(0.0); + } + (*woutputL) = sampleHemisphereCosineWeighted(pdfWoutputL, seed); + return diffuseBrdfEvalImplementation(winputL, *woutputL, material); +} + +fn fresnelDielectricPolarizations(mui: f32, etaTi: f32) -> vec2f { + let mut2 = sqr(etaTi) - (1.0 - sqr(mui)); + if (mut2 <= 0.0) { + return vec2f(1.0); + } + + let mut1 = sqrt(mut2) / etaTi; + let rs = (mui - etaTi*mut1) / (mui + etaTi*mut1); + let rp = (mut1 - etaTi*mui) / (mut1 + etaTi*mui); + return vec2f(rs, rp); +} + +fn fresnelDielectricReflectance(mui: f32, etaTi: f32) -> f32 { + let r = fresnelDielectricPolarizations(mui, etaTi); + return 0.5 * dot(r, r); +} + +fn specularBrdfSample(material: Material, pW: vec3f, basis: Basis, winputL: vec3f, seed: ptr, woutputL: ptr, pdfWoutputL: ptr) -> vec3f { + let beamOutgoingL = winputL; + let externalReflection = beamOutgoingL.z > 0.0; + + let etaIe = specularIorRatio(material); + let etaTiRefl = select(1.0/etaIe, etaIe, externalReflection); + if (abs(etaTiRefl - 1.0) < IOR_EPSILON) { + // (*pdfWoutputL) = PDF_EPSILON; // todo: reset? + return vec3f(0.0); + } + + let tint = material.specularColor; + + let alpha = specularNDFRoughness(material); + + let rotation = getLocalFrameRotation(2*PI*material.specularRotation); + let winputR = localToRotated(winputL, rotation); + + var mR: vec3f; + if (winputR.z > 0.0) { + mR = ggxNDFSample(winputR, alpha, seed); + } else { + var winputRReflected = winputR; + winputRReflected.z = -winputRReflected.z; + mR = ggxNDFSample(winputRReflected, alpha, seed); + mR.z = -mR.z; + } + + var woutputR = -winputR + 2.0*dot(winputR, mR)*mR; + if (winputR.z * woutputR.z < 0.0) { + (*pdfWoutputL) = 1.0; + return vec3f(0.0); + } + + (*woutputL) = rotatedToLocal(woutputR, rotation); + + let D = ggxNDFEval(mR, alpha); + let DV = D * ggxG1(winputR, alpha) * abs(dot(winputR, mR)) / max(DENOM_TOLERANCE, abs(winputR.z)); + + let dwhDwo = 1.0 / max(abs(4.0*dot(winputR, mR)), DENOM_TOLERANCE); + (*pdfWoutputL) = DV * dwhDwo; + + let G2 = ggxG2(winputR, woutputR, alpha); + + // todo: coat workflow + let F = vec3f(fresnelDielectricReflectance(abs(dot(winputR, mR)), etaTiRefl)); + + let f = F * D * G2 / max(4.0 * abs(woutputL.z) * abs(winputL.z), DENOM_TOLERANCE); + + return f * tint; +} + +fn specularBrdfEvaluate(material: Material, pW: vec3f, basis: Basis, winputL: vec3f, woutputL: vec3f, pdfWoutputL: ptr) -> vec3f { + let transmitted = woutputL.z * winputL.z < 0.0; + if (transmitted) { + // (*pdfWoutputL) = PDF_EPSILON; todo: reset? + return vec3f(0.0); + } + + let beamOutgoingL = winputL; + let externalReflection = beamOutgoingL.z > 0.0; + + let etaIe = specularIorRatio(material); + let etaTiRefl = select(1.0/etaIe, etaIe, externalReflection); + if (abs(etaTiRefl - 1.0) < IOR_EPSILON) { + return vec3f(0.0); + } + + let tint = material.specularColor; + + let alpha = specularNDFRoughness(material); + + let rotation = getLocalFrameRotation(2*PI*material.specularRotation); + let winputR = localToRotated(winputL, rotation); + let woutputR = localToRotated(woutputL, rotation); + + let mR = normalize(woutputR + winputR); + + if (dot(mR, winputR) * winputR.z < 0.0 || dot(mR, woutputR) * woutputR.z < 0.0) { + return vec3f(0.0); + } + + let D = ggxNDFEval(mR, alpha); + let DV = D * ggxG1(winputR, alpha) * max(0.0, dot(winputR, mR)) / max(DENOM_TOLERANCE, winputR.z); + + let dwhDwo = 1.0 / max(abs(4.0*dot(winputR, mR)), DENOM_TOLERANCE); + (*pdfWoutputL) = DV * dwhDwo; + + let G2 = ggxG2(winputR, woutputR, alpha); + + // todo: coat workflow + let F = vec3f(fresnelDielectricReflectance(abs(dot(winputR, mR)), etaTiRefl)); + + let f = F * D * G2 / max(4.0 * abs(woutputL.z) * abs(winputL.z), DENOM_TOLERANCE); + return f * tint; +} + +fn etaS(material: Material) -> f32 { + const ambientIor = 1.0; + let coatIorAverage = mix(ambientIor, material.coatIor, material.coatWeight); + let etaS = material.specularIor / coatIorAverage; + return etaS; +} + +fn fresnelReflNormalIncidence(material: Material) -> f32 { + let etaS = etaS(material); + let Fs = sqr((etaS - 1.0)/(etaS + 1.0)); + return Fs; +} + +fn specularIorRatio(material: Material) -> f32 { + let Fs = fresnelReflNormalIncidence(material); + let xiS = clamp(material.specularWeight, 0.0, 1.0/max(Fs, DENOM_TOLERANCE)); + let etaS = etaS(material); + let temp = min(1.0, sign(etaS - 1.0) * sqrt(xiS * Fs)); + let etaSPrime = (1.0 + temp) / max(1.0 - temp, DENOM_TOLERANCE); + return etaSPrime; +} + +fn specularBrdfAlbedo(material: Material, pW: vec3f, basis: Basis, winputL: vec3f, seed: ptr) -> vec3f { + let etaIe = specularIorRatio(material); + if (abs(etaIe - 1.0) < IOR_EPSILON) { + return vec3f(0.0); + } + + const samples = 1; + var albedo = vec3f(0.0); + for (var n = 0; n < samples; n += 1) { + var woutputL: vec3f; + var pdfWoutputL: f32; + var f = specularBrdfSample(material, pW, basis, winputL, seed, &woutputL, &pdfWoutputL); + if (length(f) > RADIANCE_EPSILON) { + albedo += f * abs(woutputL.z) / max(DENOM_TOLERANCE, pdfWoutputL); + } + } + albedo /= f32(samples); + + return albedo; +} + +struct WeightsAndAlbedo { + weights: LobeWeights, + albedos: LobeAlbedos, +} + +fn openPbrLobeWeights(pW: vec3f, basis: Basis, winputL: vec3f, material: Material, seed: ptr) -> WeightsAndAlbedo { + let F = 0.0; // todo: move to material definition fuzzWeight + let C = material.coatWeight; + let M = material.baseMetalness; + let T = 0.0; // todo: move to material definition transmissionWeight + let S = 0.0; // todo: move to material definition subsurfaceWeight + + let coated = C > 0.0; + let metallic = M > 0.0; + let fullyMetallic = M == 1.0; + let transmissive = T > 0.0; + let fullyTransmissive = T == 1.0; + let subsurfaced = S > 0.0; + let fullySubsurfaced = S == 1.0; + + var albedos = LobeAlbedos(); + albedos.m[ID_COAT_BRDF] = select(vec3f(0.0), placeholderBrdfAlbedo(), coated); + albedos.m[ID_META_BRDF] = select(vec3f(0.0), metalBrdfAlbedo(material, pW, basis, winputL, seed), metallic); + albedos.m[ID_SPEC_BRDF] = select(vec3f(0.0), specularBrdfAlbedo(material, pW, basis, winputL, seed), !fullyMetallic); + albedos.m[ID_SPEC_BTDF] = select(vec3f(0.0), placeholderBrdfAlbedo(), !fullyMetallic && transmissive); + albedos.m[ID_DIFF_BRDF] = select(vec3f(0.0), diffuseBrdfAlbedo(material, pW, basis, winputL, seed), !fullyMetallic && !fullyTransmissive && !fullySubsurfaced); + albedos.m[ID_SSSC_BTDF] = select(vec3f(0.0), placeholderBrdfAlbedo(), !fullyMetallic && !fullyTransmissive && subsurfaced); + + var weights = LobeWeights(); + + weights.m[ID_FUZZ_BRDF] = vec3f(0.0); // todo: check + + let wCoatedBase = vec3f(1.0); // todo: check + + weights.m[ID_COAT_BRDF] = wCoatedBase * C; + + // todo: implement coat workflow + let baseDarkening = vec3f(1.0); // todo: check + let materialCoatColor = vec3f(1.0); // todo: move to material definition (coat_color) + let wBaseSubstrate = wCoatedBase * mix(vec3f(1.0), baseDarkening * materialCoatColor * (vec3(1.0) - albedos.m[ID_COAT_BRDF]), C); + + weights.m[ID_META_BRDF] = wBaseSubstrate * M; + + let wDielectricBase = wBaseSubstrate * vec3f(max(0.0, 1.0 - M)); + + weights.m[ID_SPEC_BRDF] = wDielectricBase; + + weights.m[ID_SPEC_BTDF] = wDielectricBase * T; + + let wOpaqueDielectricBase = wDielectricBase * (1.0 - T); + + weights.m[ID_SSSC_BTDF] = wOpaqueDielectricBase * S; + + weights.m[ID_DIFF_BRDF] = wOpaqueDielectricBase * (1.0 - S) * (vec3f(1.0) - albedos.m[ID_SPEC_BRDF]); + + return WeightsAndAlbedo( + weights, + albedos + ); +} + + +fn openPbrLobeProbabilities(weights: LobeWeights, albedos: LobeAlbedos) -> LobeProbs { + var probs = LobeProbs(); + var Wtotal = 0.0; + for (var lobeId = 0; lobeId < NUM_LOBES; lobeId += 1) { + probs.m[lobeId] = length(weights.m[lobeId] * albedos.m[lobeId]); + Wtotal += probs.m[lobeId]; + } + Wtotal = max(DENOM_TOLERANCE, Wtotal); + for (var lobeId = 0; lobeId < NUM_LOBES; lobeId += 1) { + probs.m[lobeId] /= Wtotal; + } + return probs; +} + +fn openPbrPrepare(pW: vec3f, basis: Basis, winputL: vec3f, material: Material, seed: ptr) -> LobeData { + let weightsAndAlbedo = openPbrLobeWeights(pW, basis, winputL, material, seed); + let probs = openPbrLobeProbabilities(weightsAndAlbedo.weights, weightsAndAlbedo.albedos); + + return LobeData( + weightsAndAlbedo.weights, + weightsAndAlbedo.albedos, + probs, + ); +} + +const PDF_EPSILON = 1.0e-6; +const IOR_EPSILON = 1.0e-5; +const RAY_OFFSET = 1.0e-4; + +fn pdfHemisphereCosineWeighted(wiL: vec3f) -> f32 { + if (wiL.z <= PDF_EPSILON) { + return PDF_EPSILON / PI; + } + return wiL.z / PI; +} + +fn sampleHemisphereCosineWeighted(pdf: ptr, seed: ptr) -> vec3f { + let r = sqrt(randomF32(seed)); + let theta = 2.0 * PI * randomF32(seed); + let x = r * cos(theta); + let y = r * sin(theta); + let z = sqrt(max(0.0, 1.0 - x*x - y*y)); + (*pdf) = max(PDF_EPSILON, abs(z) / PI); + return vec3f(x, y, z); +} + +fn skyPdf(woutputL: vec3f, woutputWs: vec3f) -> f32 { + return pdfHemisphereCosineWeighted(woutputL); +} + +fn sunPdf(woutputL: vec3f, woutputW: vec3f) -> f32 { + let thetaMax = uniformData.sunAngularSize * PI/180.0; + if (dot(woutputW, uniformData.sunDirection) < cos(thetaMax)) { + return 0.0; + } + let solidAngle = 2.0 * PI * (1.0 - cos(thetaMax)); + return 1.0 / solidAngle; +} + +fn sunTotalPower() -> f32 { + let thetaMax = uniformData.sunAngularSize * PI/180.0; + let solidAngle = 2.0 * PI * (1.0 - cos(thetaMax)); + return length(uniformData.sunPower * uniformData.sunColor) * solidAngle; +} + +fn skyTotalPower() -> f32 { + return length(uniformData.skyPower * uniformData.skyColor) * 2.0 * PI; +} + +fn sunRadiance(woutputW: vec3f) -> vec3f { + let thetaMax = uniformData.sunAngularSize * PI/180.0; + if (dot(woutputW, uniformData.sunDirection) < cos(thetaMax)) { + return vec3f(0.0); + } + return uniformData.sunPower * uniformData.sunColor; +} + +fn skyRadiance() -> vec3f { + return uniformData.skyPower * uniformData.skyColor; +} + +fn lightPdf(shadowW: vec3f, basis: Basis) -> f32 { + let shadowL = worldToLocal(shadowW, basis); + let pdfSky = skyPdf(shadowL, shadowW); + let pdfSun = sunPdf(shadowL, shadowW); + let wSun = sunTotalPower(); + let wSky = skyTotalPower(); + let pSun = wSun / (wSun + wSky); + let pSky = max(0.0, 1.0 - pSun); + let lightPdf = pSun * pdfSun + pSky * pdfSky; + + return lightPdf; +} + +fn powerHeuristic(a: f32, b: f32) -> f32 { + return pow(a, 2) / max(DENOM_TOLERANCE, pow(a, 2) + pow(b, 2)); +} + +fn brdfSamplePlaceholder() -> vec3f { + return vec3f(0.0); +} + +fn brdfEvaluatePlaceholder() -> vec3f { + return vec3f(0.0); +} + +fn openpbrBsdfEvaluateLobes(pW: vec3f, basis: Basis, material: Material, winputL: vec3f, woutputL: vec3f, skipLobeId: i32, lobeData: LobeData, pdfs: ptr) -> vec3f { + var f = vec3f(0.0); + if (skipLobeId != ID_FUZZ_BRDF && lobeData.probs.m[ID_FUZZ_BRDF] > 0.0) { + f += vec3f(0.0); + } + if (skipLobeId != ID_COAT_BRDF && lobeData.probs.m[ID_COAT_BRDF] > 0.0) { + f += lobeData.weights.m[ID_COAT_BRDF] * brdfEvaluatePlaceholder(); + } + if (skipLobeId != ID_META_BRDF && lobeData.probs.m[ID_META_BRDF] > 0.0) { + f += metalBrdfEvaluate(pW, basis, winputL, woutputL, material, &pdfs.m[ID_META_BRDF]); + } + if (skipLobeId != ID_SPEC_BRDF && lobeData.probs.m[ID_SPEC_BRDF] > 0.0) { + f += lobeData.weights.m[ID_SPEC_BRDF] * specularBrdfEvaluate(material, pW, basis, winputL, woutputL, &pdfs.m[ID_SPEC_BRDF]); + } + if (skipLobeId != ID_DIFF_BRDF && lobeData.probs.m[ID_DIFF_BRDF] > 0.0) { + f += lobeData.weights.m[ID_DIFF_BRDF] * diffuseBrdfEvaluate(material, pW, basis, winputL, woutputL, &pdfs.m[ID_DIFF_BRDF]); + } + + let evalSpecBtdf = skipLobeId != ID_SPEC_BTDF && lobeData.probs.m[ID_SPEC_BTDF] > 0.0; + let evalSsscBtdf = skipLobeId != ID_SSSC_BTDF && lobeData.probs.m[ID_SSSC_BTDF] > 0.0; + let evalTransmission = evalSpecBtdf || evalSsscBtdf; + if (evalTransmission) { + // todo: implement + } + + return f; +} + +fn openpbrBsdfTotalPdf(pdfs: LobePDFs, lobeData: LobeData) -> f32 { + var pdfWoutputL = 0.0; + for (var lobeId = 0; lobeId < NUM_LOBES; lobeId += 1) { + pdfWoutputL += lobeData.probs.m[lobeId] * pdfs.m[lobeId]; + } + return pdfWoutputL; +} + +const ID_FUZZ_BRDF = 0; +const ID_COAT_BRDF = 1; +const ID_META_BRDF = 2; +const ID_SPEC_BRDF = 3; +const ID_SPEC_BTDF = 4; +const ID_DIFF_BRDF = 5; +const ID_SSSC_BTDF = 6; +const NUM_LOBES = 7; + +fn sampleBsdf(pW: vec3f, basis: Basis, winputL: vec3f, lobeData: LobeData, material: Material, woutputL: ptr, pdfWoutputL: ptr, seed: ptr) -> vec3f { + let X = randomF32(seed); + var CDF = 0.0; + + for (var lobeId = 0; lobeId < NUM_LOBES; lobeId += 1) { + CDF += lobeData.probs.m[lobeId]; + if (X < CDF) { + var pdfLobe: f32; + var fLobe: vec3f; + if (lobeId == ID_FUZZ_BRDF) { fLobe = brdfSamplePlaceholder(); } + else if (lobeId == ID_COAT_BRDF) { fLobe = brdfSamplePlaceholder(); } + else if (lobeId == ID_META_BRDF) { + fLobe = metalBrdfSample(pW, basis, winputL, material, seed, woutputL, &pdfLobe); + } + else if (lobeId == ID_SPEC_BRDF) { + fLobe = specularBrdfSample(material, pW, basis, winputL, seed, woutputL, &pdfLobe); + } + else if (lobeId == ID_SPEC_BTDF) { fLobe = brdfSamplePlaceholder(); } + else if (lobeId == ID_SSSC_BTDF) { fLobe = brdfSamplePlaceholder(); } + else if (lobeId == ID_DIFF_BRDF) { + fLobe = diffuseBrdfSample(material, pW, basis, winputL, woutputL, &pdfLobe, seed); + } + else { break; } + + var pdfs: LobePDFs; + var skipLobeId = lobeId; + var f = openpbrBsdfEvaluateLobes(pW, basis, material, winputL, *woutputL, skipLobeId, lobeData, &pdfs); + f += lobeData.weights.m[lobeId] * fLobe; + + pdfs.m[lobeId] = pdfLobe; + (*pdfWoutputL) = openpbrBsdfTotalPdf(pdfs, lobeData); + + let transmitted = woutputL.z * winputL.z < 0.0; + let transmittedInside = transmitted && woutputL.z < 0.0; + if (!transmittedInside) { + return f; + } + + // todo: volume + + return f; + } + } + + (*pdfWoutputL) = 1.0; + return vec3f(0); +} + +fn evaluateBsdf(pW: vec3f, basis: Basis, winputL: vec3f, woutputL: vec3f, lobeData: LobeData, material: Material, pdfWoutputL: ptr) -> vec3f { + var pdfs: LobePDFs; + let f = openpbrBsdfEvaluateLobes(pW, basis, material, winputL, woutputL, -1, lobeData, &pdfs); + (*pdfWoutputL) = openpbrBsdfTotalPdf(pdfs, lobeData); + + return f; +} + +fn evaluateEdf(material: Material) -> vec3f { + return material.emissionColor * material.emissionLuminance; +} + +fn sunSample(basis: Basis, sunBasis: Basis, woutputL: ptr, woutputW: ptr, pdfDir: ptr, seed: ptr) -> vec3f { + let thetaMax = uniformData.sunAngularSize * PI/180.0; + let theta = thetaMax * sqrt(randomF32(seed)); + let cosTheta = cos(theta); + let sinTheta = sqrt(max(0, 1.0-cosTheta*cosTheta)); + let phi = 2.0 * PI * randomF32(seed); + let cosPhi = cos(phi); + let sinPhi = sin(phi); + let x = sinTheta * cosPhi; + let y = sinTheta * sinPhi; + let z = cosTheta; + let solidAngle = 2.0 * PI * (1.0 - cos(thetaMax)); + *pdfDir = 1.0 / solidAngle; + *woutputW = localToWorld(vec3f(x, y, z), sunBasis); + *woutputL = worldToLocal(*woutputW, basis); + return uniformData.sunPower * uniformData.sunColor; +} + +fn skySample(basis: Basis, woutputL: ptr, woutputW: ptr, pdfDir: ptr, seed: ptr) -> vec3f { + *woutputL = sampleHemisphereCosineWeighted(pdfDir, seed); + *woutputW = localToWorld(*woutputL, basis); + return skyRadiance(); +} + +fn getDirectLighting(pW: vec3f, basis: Basis, sunBasis: Basis, shadowL: ptr, shadowW: ptr, lightPdf: ptr, seed: ptr) -> vec3f { + var Li: vec3f; + + let wSun = sunTotalPower(); + let wSky = skyTotalPower(); + let pSun = wSun / (wSun + wSky); + let pSky = max(0.0, 1.0 - pSun); + var pdfSun: f32; + var pdfSky: f32; + let r = randomF32(seed); + if (r < pSun) { + Li = sunSample(basis, sunBasis, shadowL, shadowW, &pdfSun, seed); + Li += skyRadiance(); + pdfSky = skyPdf(*shadowL, *shadowW); + } else { + Li = skySample(basis, shadowL, shadowW, &pdfSky, seed); + Li += sunRadiance(*shadowW); + pdfSun = sunPdf(*shadowL, *shadowW); + } + *lightPdf = pSun * pdfSun + pSky * pdfSky; + + if (shadowL.z < 0) { + return vec3f(0); + } + if (maxVec3(Li) < RADIANCE_EPSILON) { + return vec3f(0); + } + + let occluded = isOccluded(Ray(pW, *shadowW), TRIANGLE_MAX_DISTANCE_THRESHOLD); + let visibility = select(1.0, 0.0, occluded); + + return visibility * Li; +} + +fn isOccluded(ray: Ray, maxDistance: f32) -> bool { + var hitRecord = HitRecord(); + hitRecord.t = maxDistance; + return hittableListHit(ray, Interval(TRIANGLE_MIN_DISTANCE_THRESHOLD, maxDistance), &hitRecord); +} + +const TRIANGLE_MIN_DISTANCE_THRESHOLD = 0.0005; +const TRIANGLE_MAX_DISTANCE_THRESHOLD = 10e37f; + +fn getRayOutput(cameraRay: Ray, seed: ptr) -> vec4f { + var hitRecord: HitRecord; + var ray = cameraRay; + + var throughput = vec3f(1.0); + var L = vec3f(0.0); + var bsdfPdfContinuation = 1.0; + + var dW = ray.direction; + var pW = ray.origin; + + var basis: Basis; + + + // todo: handle setting t nicely + hitRecord.t = TRIANGLE_MAX_DISTANCE_THRESHOLD; + let hit = hittableListHit(ray, Interval(TRIANGLE_MIN_DISTANCE_THRESHOLD, TRIANGLE_MAX_DISTANCE_THRESHOLD), &hitRecord); + + // todo: consider normal handling + + if (!hit) { + // todo: handle this more appropriately + return vec4f(0.5,1,0.5,1); + } + + let material = materials[hitRecord.material.index]; + + // Surface Normal + var NsW = hitRecord.normal; + + if (uniformData.mode == 0) { + return vec4f(-NsW.x, NsW.y, -NsW.z, 1.0); + } else { + return vec4f(material.baseColor * material.baseWeight, 1.0); + } +} + +fn writeColor(pixelColor: vec4f, x: i32, y: i32) { + textureStore(texture, vec2(x, y), pixelColor); +} + +fn sampleTriangleFilter(xi: f32) -> f32 { + return select(1.0 - sqrt(2.0 - 2.0 * xi), sqrt(2.0 * xi) - 1.0, xi < 0.5); +} + +// CODE#VIEWPROJECTION +fn ndcToCameraRay(coord: vec2f, cameraWorld: mat4x4, invProjectionMatrix: mat4x4, seed: ptr) -> Ray { + let lookDirection = cameraWorld * vec4f(0.0, 0.0, -1.0, 0.0); + let nearVector = invProjectionMatrix * vec4f(0.0, 0.0, -1.0, 1.0); + let near = abs(nearVector.z / nearVector.w); + + var origin = cameraWorld * vec4f(0.0, 0.0, 0.0, 1.0); + + var direction = invProjectionMatrix * vec4f(coord.x, -coord.y, 0.5, 1.0); + direction /= direction.w; + direction = cameraWorld * direction - origin; + + origin += vec4f(direction.xyz * near / dot(direction, lookDirection), 0); + + return Ray( + origin.xyz, + direction.xyz + ); +} + +fn getPixelJitter(seed: ptr) -> vec2f { + let jitterX = 0.5 * sampleTriangleFilter(randomF32(seed)); + let jitterY = 0.5 * sampleTriangleFilter(randomF32(seed)); + return vec2f(jitterX, jitterY); +} + +@compute +@workgroup_size(${maxWorkgroupDimension}, ${maxWorkgroupDimension}, 1) +fn computeMain(@builtin(global_invocation_id) globalId: vec3) { + var seed = globalId.x + globalId.y * ${imageWidth}; + seed ^= uniformData.seedOffset; + + let pixelOrigin = vec2f(f32(globalId.x), f32(globalId.y)); + + let pixel = pixelOrigin; + let ndc = -1.0 + 2.0*pixel / vec2(${imageWidth}, ${imageHeight}); + + var ray = ndcToCameraRay(ndc, uniformData.invModelMatrix * uniformData.cameraWorldMatrix, uniformData.invProjectionMatrix, &seed); + ray.direction = normalize(ray.direction); + + let output = getRayOutput(ray, &seed); + + //writeColor(output, i32(globalId.x), i32(globalId.y)); + //writeColor(vec4f(1.0), i32(globalId.x), i32(globalId.y)); + hdrColor[i32(globalId.x) + i32(globalId.y) * ${imageWidth}] = output; +} diff --git a/strahl-lib/src/full-oidn-pass.ts b/strahl-lib/src/full-oidn-pass.ts new file mode 100644 index 0000000..fa7f326 --- /dev/null +++ b/strahl-lib/src/full-oidn-pass.ts @@ -0,0 +1,350 @@ +// Source: https://github.com/pissang/oidn-web +export const fullScreenQuadVertexShaderWGSL = /*wgsl */ ` +@vertex +fn main( + @builtin(vertex_index) vertexIndex: u32 +) -> @builtin(position) vec4f { + const pos = array( + vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), + vec2(-1.0, 1.0), vec2(1.0, -1.0), vec2(1.0, 1.0), + ); + + return vec4(pos[vertexIndex], 0.0, 1.0); +} +`; + +function isTextureParamsEqual( + params: WGPUFullQuadPassOutput, + other: WGPUFullQuadPassOutput, +) { + return params.format === other.format; +} + +export interface Uniform { + label: string; + type: string; + data: Float32Array | Int32Array | Uint32Array; +} + +export interface WGPUFullQuadPassOutput { + format: GPUTextureFormat; +} +export class WGPUFullQuadPass { + private _label; + + private _device; + private _outputTextures: Record< + string, + { + texture: GPUTexture; + params: WGPUFullQuadPassOutput; + } + > = {}; + + private _pipeline!: GPURenderPipeline; + private _bindGroups: GPUBindGroup[] = []; + private _needsUpdatePipeline = true; + /** + * When render to canvas + */ + private _renderToScreen?: { + screenTexture: GPUTexture; + presentationFormat: GPUTextureFormat; + }; + private _inputs: string[] = []; + private _outputs: string[] = []; + private _uniforms: Uniform[] = []; + private _uniformBuffers: Record = {}; + + private _width = 10; + private _height = 10; + + private _fsCode = ""; + private _fsMain; + private _fsDefine; + + constructor( + label: string, + device: GPUDevice, + opts: { + inputs: I[]; + outputs: O[]; + fsMain: string; + fsDefine?: string; + uniforms: Uniform[]; + }, + ) { + this._label = label; + this._device = device; + this._fsMain = opts.fsMain; + this._fsDefine = opts.fsDefine; + this._inputs = opts.inputs; + this._outputs = opts.outputs; + this._uniforms = opts.uniforms; + + opts.uniforms.forEach((uniform) => { + this._uniformBuffers[uniform.label] = device.createBuffer({ + label: this._label, + size: uniform.data.byteLength, + usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, + }); + this._device.queue.writeBuffer( + this._uniformBuffers[uniform.label], + 0, + uniform.data, + ); + }); + } + + setSize(width: number, height: number) { + width = Math.ceil(width); + height = Math.ceil(height); + const sizeChanged = width !== this._width || height !== this._height; + this._width = width; + this._height = height; + if (sizeChanged) { + this._resizeOutputTextures(); + this._needsUpdatePipeline = true; + } + } + + setOutputParams(outputParams: Record) { + this._renderToScreen = undefined; + this._updateOutputTextures(outputParams); + this._needsUpdatePipeline = true; + } + + setRenderToScreen( + screenTexture: GPUTexture, + presentationFormat: GPUTextureFormat, + ) { + this._renderToScreen = { + screenTexture, + presentationFormat, + }; + } + + setUniform(label: string, data: Float32Array | Int32Array | Uint32Array) { + const buffer = this._uniformBuffers[label]; + this._device.queue.writeBuffer(buffer, 0, data); + } + + getOutputTexture(name: O) { + return this._outputTextures[name].texture; + } + + dispose() { + Object.keys(this._uniformBuffers).forEach((key) => { + (this._uniformBuffers as any)[key].destroy(); + }); + Object.keys(this._outputTextures).forEach((key) => { + (this._outputTextures as any)[key].texture.destroy(); + }); + } + + private _createTexture(params: WGPUFullQuadPassOutput) { + return this._device.createTexture({ + label: this._label, + size: { + width: this._width, + height: this._height, + depthOrArrayLayers: 1, + }, + format: params.format, + usage: + GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.TEXTURE_BINDING, + }); + } + + private _resizeOutputTextures() { + const outputTextures = this._outputTextures; + for (const key in outputTextures) { + const { texture, params } = outputTextures[key]; + texture.destroy(); + outputTextures[key].texture = this._createTexture(params); + } + } + + private _updateOutputTextures( + outputParams: Record, + ) { + const outputTextures = this._outputTextures; + for (const key in outputParams) { + const params = outputParams[key]; + if ( + !isTextureParamsEqual( + params, + outputTextures[key]?.params || ({} as WGPUFullQuadPassOutput), + ) + ) { + outputTextures[key]?.texture.destroy(); + const texture = this._createTexture(params); + outputTextures[key] = { + texture, + params, + }; + } + } + } + + private _updatePipeline() { + if (!this._needsUpdatePipeline) { + return; + } + this._needsUpdatePipeline = false; + const device = this._device; + const fsCode = this._getFullFs(); + if (fsCode === this._fsCode) { + return; + } + const { screenTexture, presentationFormat } = this._renderToScreen || {}; + this._fsCode = fsCode; + this._pipeline = device.createRenderPipeline({ + label: this._label, + layout: "auto", + vertex: { + module: device.createShaderModule({ + label: this._label, + code: fullScreenQuadVertexShaderWGSL, + }), + entryPoint: "main", + }, + fragment: { + module: device.createShaderModule({ + label: this._label, + code: fsCode, + }), + entryPoint: "main", + targets: screenTexture + ? [ + { + format: presentationFormat!, + }, + ] + : this._outputs.map((key) => ({ + format: this._outputTextures[key].params.format, + })), + }, + primitive: { + topology: "triangle-list", + }, + }); + this._updateBindGroups(); + } + + private _getFullFs() { + const inputs = this._inputs; + const hasInputs = inputs.length > 0; + const fs = ` +${inputs + .sort() + .map( + (textureName, idx) => + `@group(0) @binding(${idx}) var ${textureName}: texture_2d;`, + ) + .join("\n")} +${this._uniforms + .map( + (uniform, idx) => + `@group(${hasInputs ? 1 : 0}) @binding(${idx}) var ${ + uniform.label + }: ${uniform.type};`, + ) + .join("\n")} + +struct FSOutput { +${this._outputs + .map((name, idx) => `@location(${idx}) ${name}: vec4f,`) + .join("\n")} +} +${this._fsDefine ?? ""} +@fragment +fn main( + @builtin(position) coord: vec4f +) -> FSOutput { + var uv = vec2i(floor(coord.xy)); + var output: FSOutput; +${this._fsMain} + return output; +} +`; + + return fs; + } + + private _updateBindGroups() { + const bindGroups: GPUBindGroup[] = []; + const device = this._device; + + //TODO + const uniformBindGroupIndex = this._inputs.length > 0 ? 1 : 0; + if (this._uniforms.length > 0) { + bindGroups[uniformBindGroupIndex] = device.createBindGroup({ + label: this._label, + layout: this._pipeline.getBindGroupLayout(uniformBindGroupIndex), + entries: this._uniforms.map( + (uniform, idx) => + ({ + binding: idx, + resource: { + buffer: this._uniformBuffers[uniform.label], + }, + }) satisfies GPUBindGroupEntry, + ), + }); + } + + this._bindGroups = bindGroups; + } + + createPass( + commandEncoder: GPUCommandEncoder, + inputTextures: Record, + ) { + this._updatePipeline(); + + // TODO createBindGrou every time? + if (this._inputs.length > 0) { + this._bindGroups[0] = this._device.createBindGroup({ + label: this._label, + layout: this._pipeline.getBindGroupLayout(0), + entries: this._inputs.map((textureName, idx) => ({ + binding: idx, + // TODO + resource: inputTextures[textureName as I].createView(), + })), + }); + } + // Begin the render pass + const renderPass = commandEncoder.beginRenderPass({ + colorAttachments: this._renderToScreen + ? [ + { + view: this._renderToScreen.screenTexture.createView(), + clearValue: { r: 0, g: 0, b: 0, a: 0 }, + storeOp: "store" as GPUStoreOp, + loadOp: "clear" as GPULoadOp, + }, + ] + : this._outputs.map( + (textureName) => + ({ + view: this._outputTextures[textureName].texture.createView(), + clearValue: { r: 0, g: 0, b: 0, a: 0 }, + loadOp: "clear" as GPULoadOp, + storeOp: "store" as GPUStoreOp, + }) satisfies GPURenderPassColorAttachment, + ), + }); + + // Draw a full quad + renderPass.setPipeline(this._pipeline); + // Bind groups + this._bindGroups.forEach((bindGroup, idx) => { + renderPass.setBindGroup(idx, bindGroup); + }); + renderPass.draw(6, 1, 0, 0); + // End the render pass + renderPass.end(); + } +} diff --git a/strahl-lib/src/path-tracer.ts b/strahl-lib/src/path-tracer.ts index 0c961c4..d5811e7 100644 --- a/strahl-lib/src/path-tracer.ts +++ b/strahl-lib/src/path-tracer.ts @@ -1,5 +1,7 @@ import buildTracerShader from "./tracer-shader"; import buildRenderShader from "./render-shader"; +import buildDenoisePassShader from "./denoise-pass-shader.ts"; +import buildTextureConverterPassShader from "./texture-converter-pass-shader.ts"; import { logGroup } from "./benchmark/cpu-performance-logger.ts"; import { OpenPBRMaterial } from "./openpbr-material"; import { @@ -29,6 +31,7 @@ import { buildAbortEventHub } from "./util/abort-event-hub.ts"; import { Group } from "three"; import { prepareGeometry } from "./prepare-geometry.ts"; import { initUNetFromURL, UNet } from "oidn-web"; +import { WGPUFullQuadPass } from "./full-oidn-pass.ts"; /** * Configuration options for the path tracer. @@ -58,10 +61,30 @@ export type PathTracerOptions = { }; async function denoise( - unet: UNet, - data: ArrayBuffer, + { device, adapterInfo }: { device: GPUDevice; adapterInfo: GPUAdapterInfo }, + { + colorBuffer, + albedoBuffer, + normalBuffer, + }: { + colorBuffer: GPUBuffer; + albedoBuffer: GPUBuffer; + normalBuffer: GPUBuffer; + }, size: { width: number; height: number }, ) { + const TZA_URL = "./oidn-weights/rt_hdr_alb_nrm.tza"; + const unet = await initUNetFromURL( + TZA_URL, + { + device, + adapterInfo, + }, + { + aux: true, + hdr: true, + }, + ); const outputCanvas = document.createElement("canvas"); outputCanvas.width = size.width; outputCanvas.height = size.height; @@ -70,26 +93,28 @@ async function denoise( outputCanvas.style.cssText = "position:absolute; right: 0; bottom: 0; z-index: 1; pointer-events: none;"; - var imgData = new ImageData(size.width, size.height); - const clampedData = new Uint8ClampedArray(data); - for (let y = 0; y < size.height; y++) { - for (let x = 0; x < size.width; x++) { - // Consider flipping the image - const sourceIndex = ((size.height - 1 - y) * size.width + x) * 4; - const targetIndex = (y * size.width + x) * 4; - - imgData.data[targetIndex] = clampedData[sourceIndex]; - imgData.data[targetIndex + 1] = clampedData[sourceIndex + 1]; - imgData.data[targetIndex + 2] = clampedData[sourceIndex + 2]; - imgData.data[targetIndex + 3] = clampedData[sourceIndex + 3]; - } - } + console.log(size); return new Promise((resolve, reject) => { unet.tileExecute({ - color: imgData, - done() {}, - progress: (_, tileData, tile) => { + color: { + data: colorBuffer, + width: size.width, + height: size.height, + }, + albedo: { + data: albedoBuffer, + width: size.width, + height: size.height, + }, + normal: { + data: normalBuffer, + width: size.width, + height: size.height, + }, + done(finalBuffer, tileData, tile) { + console.log(finalBuffer); + resolve(finalBuffer); if (!tileData) { reject("No tile data"); return; @@ -97,6 +122,7 @@ async function denoise( outputCtx?.putImageData(tileData, tile.x, tile.y); resolve(tileData); }, + progress: (finalBuffer, tileData, tile) => {}, }); }); } @@ -771,43 +797,50 @@ async function runPathTracer( } } - const pass = encoder.beginRenderPass({ - colorAttachments: [ - { - view: context.getCurrentTexture().createView(), - loadOp: "clear", - clearValue: { r: 0, g: 0, b: 0.2, a: 1 }, - storeOp: "store", - }, - ], - }); + const executeRenderPass = ( + texture: GPUTexture, + encoder: GPUCommandEncoder, + ) => { + const pass = encoder.beginRenderPass({ + colorAttachments: [ + { + view: context.getCurrentTexture().createView(), + loadOp: "clear", + clearValue: { r: 0, g: 0, b: 0.2, a: 1 }, + storeOp: "store", + }, + ], + }); - pass.setPipeline(renderPipeline); + pass.setPipeline(renderPipeline); - const renderBindGroup = device.createBindGroup({ - label: "Texture sampler bind group", - layout: renderBindGroupLayout, - entries: [ - { - binding: 0, - resource: sampler, - }, - { - binding: 1, - resource: texture.createView(), - }, - ], - }); + const renderBindGroup = device.createBindGroup({ + label: "Texture sampler bind group", + layout: renderBindGroupLayout, + entries: [ + { + binding: 0, + resource: sampler, + }, + { + binding: 1, + resource: texture.createView(), + }, + ], + }); - pass.setBindGroup(0, renderBindGroup); - const RENDER_TEXTURE_VERTEX_COUNT = 6; - pass.draw(RENDER_TEXTURE_VERTEX_COUNT); + pass.setBindGroup(0, renderBindGroup); + const RENDER_TEXTURE_VERTEX_COUNT = 6; + pass.draw(RENDER_TEXTURE_VERTEX_COUNT); - pass.end(); + pass.end(); - const commandBuffer = encoder.finish(); + const commandBuffer = encoder.finish(); + + device.queue.submit([commandBuffer]); + }; - device.queue.submit([commandBuffer]); + executeRenderPass(texture, encoder); if ( !isNil(timestampQueryData) && @@ -846,30 +879,370 @@ async function runPathTracer( state = "halted"; if (enableDenoise) { - const readbackBuffer = device.createBuffer({ - usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ, - size: 4 * width * height, + const dynamicComputeBindGroupLayout = device.createBindGroupLayout({ + label: "Dynamic denoise pass compute bind group layout", + entries: [ + { + binding: 0, + visibility: GPUShaderStage.COMPUTE, + storageTexture: { + format: "rgba8unorm" /*, access: "write-only"*/, + }, + }, + { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + storageTexture: { format: "rgba8unorm", access: "read-only" }, + }, + { + binding: 2, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: "uniform", + }, + }, + { + binding: 3, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: "storage", + }, + }, + ], }); - const encoder = device.createCommandEncoder(); + const denoisePassShaderCode = buildDenoisePassShader({ + imageWidth: width, + imageHeight: height, + maxWorkgroupDimension, + maxBvhStackDepth: maxBvhDepth, + }); + + const denoisePassDefinitions = makeShaderDataDefinitions( + denoisePassShaderCode, + ); + const { size: bytesForUniform } = + denoisePassDefinitions.uniforms.uniformData; + const uniformData = makeStructuredView( + denoisePassDefinitions.uniforms.uniformData, + new ArrayBuffer(bytesForUniform), + ); + + const buildDenoisePassUniformBuffer = (mode: 0 | 1) => { + const uniformBuffer = device.createBuffer({ + label: "Denoise pass uniform data buffer", + size: bytesForUniform, + usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST, + // mappedAtCreation: true, + }); + + uniformData.set({ + invProjectionMatrix: invProjectionMatrix.elements, + cameraWorldMatrix: matrixWorld.elements, + invModelMatrix: sceneMatrixWorld.clone().invert().elements, + seedOffset: Math.random() * Number.MAX_SAFE_INTEGER, + priorSamples: currentSample, + samplesPerPixel: samplesPerIteration, + sunDirection, + skyPower: environmentLightConfiguration.sky.power, + skyColor: environmentLightConfiguration.sky.color, + sunPower: Math.pow(10, environmentLightConfiguration.sun.power), + sunAngularSize: environmentLightConfiguration.sun.angularSize, + sunColor: environmentLightConfiguration.sun.color, + clearColor: clearColor === false ? [0, 0, 0] : clearColor, + enableClearColor: clearColor === false ? 0 : 1, + maxRayDepth, + objectDefinitionLength: modelGroups.length, + mode, + }); + // todo: consider buffer writing + device.queue.writeBuffer(uniformBuffer, 0, uniformData.arrayBuffer); + + return uniformBuffer; + }; + + const normalUniformBuffer = buildDenoisePassUniformBuffer(0); + const albedoUniformBuffer = buildDenoisePassUniformBuffer(1); + + const computePipelineLayout = device.createPipelineLayout({ + label: "Dynamic denoise pass compute pipeline layout", + bindGroupLayouts: [ + computeBindGroupLayout, + dynamicComputeBindGroupLayout, + ], + }); + + const float32ArrayImageSize = width * height * 4; + + const normalImageBuffer = device.createBuffer({ + label: "Normal image buffer", + size: Float32Array.BYTES_PER_ELEMENT * float32ArrayImageSize, + usage: + GPUBufferUsage.STORAGE | + // todo: check flags + GPUBufferUsage.COPY_SRC | + GPUBufferUsage.COPY_DST, + // mappedAtCreation: true, + }); + + const albedoImageBuffer = device.createBuffer({ + label: "Albedo image buffer", + size: Float32Array.BYTES_PER_ELEMENT * float32ArrayImageSize, + usage: + GPUBufferUsage.STORAGE | + // todo: check flags + GPUBufferUsage.COPY_SRC | + GPUBufferUsage.COPY_DST, + // mappedAtCreation: true, + }); + + const computeShaderModule = device.createShaderModule({ + label: "Denoise Pass Compute Shader", + code: denoisePassShaderCode, + }); + + const computePipeline = device.createComputePipeline({ + label: "Denoise Pass Compute pipeline", + layout: computePipelineLayout, + compute: { + module: computeShaderModule, + entryPoint: "computeMain", + }, + }); + + const executeDenoisePass = ( + imageBuffer: GPUBuffer, + uniformBuffer: GPUBuffer, + ) => { + const dynamicComputeBindGroup = device.createBindGroup({ + label: "Dynamic denoise pass compute bind group", + layout: dynamicComputeBindGroupLayout, + entries: [ + { + binding: 0, + resource: writeTexture.createView(), + }, + { + binding: 1, + resource: readTexture.createView(), + }, + { + binding: 2, + resource: { + buffer: uniformBuffer, + }, + }, + { + binding: 3, + resource: { + buffer: imageBuffer, + }, + }, + ], + }); + const encoder = device.createCommandEncoder(); + + const computePass = encoder.beginComputePass(); + computePass.setBindGroup(0, computeBindGroup); + computePass.setBindGroup(1, dynamicComputeBindGroup); + + computePass.setPipeline(computePipeline); + + const dispatchX = Math.ceil(width / maxWorkgroupDimension); + const dispatchY = Math.ceil(height / maxWorkgroupDimension); + computePass.dispatchWorkgroups(dispatchX, dispatchY); + + computePass.end(); + + device.queue.submit([encoder.finish()]); + }; + + executeDenoisePass(normalImageBuffer, normalUniformBuffer); + executeDenoisePass(albedoImageBuffer, albedoUniformBuffer); + + const textureBuffer = device.createBuffer({ + label: "Texture buffer", + usage: + GPUBufferUsage.COPY_DST | + GPUBufferUsage.COPY_SRC | + GPUBufferUsage.STORAGE, + size: Float32Array.BYTES_PER_ELEMENT * 4 * width * height, + }); + + // fixme: use manual texture-converter-pass-shader + { + const textureConverterPassShaderCode = + buildTextureConverterPassShader({ + imageWidth: width, + imageHeight: height, + maxWorkgroupDimension, + }); + + const computeShaderModule = device.createShaderModule({ + label: "Texture Converter Pass Compute Shader", + code: textureConverterPassShaderCode, + }); + + const dynamicComputeBindGroupLayout = device.createBindGroupLayout({ + label: "Texture Converter pass compute bind group layout", + entries: [ + { + binding: 0, + visibility: GPUShaderStage.COMPUTE, + storageTexture: { + format: "rgba8unorm", + access: "read-only", + }, + }, + { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { + type: "storage", + }, + }, + ], + }); + + const computePipelineLayout = device.createPipelineLayout({ + label: "Dynamic texture converter pass compute pipeline layout", + bindGroupLayouts: [dynamicComputeBindGroupLayout], + }); + + const computePipeline = device.createComputePipeline({ + label: "Texture Converter Pass Compute pipeline", + layout: computePipelineLayout, + compute: { + module: computeShaderModule, + entryPoint: "computeMain", + }, + }); + + const dynamicComputeBindGroup = device.createBindGroup({ + label: "Texture converter pass compute bind group", + layout: dynamicComputeBindGroupLayout, + entries: [ + { + binding: 0, + resource: writeTexture.createView(), + }, + { + binding: 1, + resource: { + buffer: textureBuffer, + }, + }, + ], + }); + + const encoder = device.createCommandEncoder(); + + const computePass = encoder.beginComputePass(); + computePass.setBindGroup(0, dynamicComputeBindGroup); + + computePass.setPipeline(computePipeline); + + const dispatchX = Math.ceil(width / maxWorkgroupDimension); + const dispatchY = Math.ceil(height / maxWorkgroupDimension); + console.log("dispatch", dispatchX, dispatchY); + computePass.dispatchWorkgroups(dispatchX, dispatchY); + + computePass.end(); + + device.queue.submit([encoder.finish()]); + } + + /*const encoder = device.createCommandEncoder(); encoder.copyTextureToBuffer( - { texture: readTexture }, - { buffer: readbackBuffer, bytesPerRow: width * 4 }, + { texture: writeTexture }, + { buffer: textureBuffer, bytesPerRow: width * 4 }, [width, height], ); - device.queue.submit([encoder.finish()]); - - await readbackBuffer.mapAsync(GPUMapMode.READ, 0, 4 * width * height); - const data = readbackBuffer.getMappedRange(0, 4 * width * height); - const uint8Array = new Uint8Array(data); + device.queue.submit([encoder.finish()]);*/ + const outputBuffer = await denoise( + { device, adapterInfo: adapter.info }, + { + colorBuffer: textureBuffer, + albedoBuffer: albedoImageBuffer, + normalBuffer: normalImageBuffer, + }, + { + width, + height, + }, + ); - const TZA_URL = "./oidn-weights/rt_ldr.tza"; - const unet = await initUNetFromURL(TZA_URL); - await denoise(unet, uint8Array, { - width, - height, + /*const textureBuffer = device.createBuffer({ + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC, + size: Float32Array.BYTES_PER_ELEMENT * 4 * width * height, + mappedAtCreation: true, }); - readbackBuffer.unmap(); + + const textureMapped = textureBuffer.getMappedRange(); + const textureData = new Float32Array(textureMapped); + console.log(tileData, textureData); + textureData.set(tileData.data, 0); + + textureBuffer.unmap();*/ + + function createDisplayPass(device: GPUDevice) { + const displayPass = new WGPUFullQuadPass("display", device, { + inputs: ["colorTex"], + outputs: ["color"], + uniforms: [], + fsMain: /*wgsl*/ ` + let color = textureLoad(colorTex, uv, 0); + let v = color.rgb; + output.color = vec4( + color.rgb, + 1.0 + ); + `, + }); + return displayPass; + } + + { + const textureFinal = device.createTexture({ + size: [width, height], + format: "rgba32float", + usage: + GPUTextureUsage.TEXTURE_BINDING | + GPUTextureUsage.COPY_DST | + GPUTextureUsage.COPY_SRC, + }); + const encoder = device.createCommandEncoder(); + encoder.copyBufferToTexture( + // fixme: this is used to debug the different buffers + { buffer: outputBuffer.data, bytesPerRow: width * 4 * 4 }, + //{ buffer: normalImageBuffer, bytesPerRow: width * 4 * 4 }, + //{ buffer: textureBuffer, bytesPerRow: width * 4 * 4 }, + { texture: textureFinal }, + [width, height], + ); + device.queue.submit([encoder.finish()]); + + /*const encoder2 = device.createCommandEncoder(); + executeRenderPass(textureFinal, encoder2);*/ + const commandEncoder = device.createCommandEncoder(); + const presentationFormat = navigator.gpu.getPreferredCanvasFormat(); + context.configure({ + device, + format: presentationFormat, + alphaMode: "premultiplied", + }); + const displayPass = createDisplayPass(device); + displayPass.setRenderToScreen( + context.getCurrentTexture(), + presentationFormat, + ); + displayPass.createPass(commandEncoder, { + colorTex: textureFinal, + }); + device.queue.submit([commandEncoder.finish()]); + } + + textureBuffer.unmap(); } finishedSampling?.({ diff --git a/strahl-lib/src/texture-converter-pass-shader.ts b/strahl-lib/src/texture-converter-pass-shader.ts new file mode 100644 index 0000000..dc03a12 --- /dev/null +++ b/strahl-lib/src/texture-converter-pass-shader.ts @@ -0,0 +1,23 @@ +import denoisePassShader from "./texture-converter-pass-shader.wgsl?raw"; + +type Params = { + imageWidth: number; + imageHeight: number; + maxWorkgroupDimension: number; +}; + +const PARAM_PLACEHOLDER_MAP: Record = { + imageWidth: "imageWidth", + imageHeight: "imageHeight", + maxWorkgroupDimension: "maxWorkgroupDimension", +}; + +export default function build(params: Params) { + const placeholders = Object.entries(PARAM_PLACEHOLDER_MAP) as [ + keyof Params, + string, + ][]; + return placeholders.reduce((aggregate, [key, value]) => { + return aggregate.replaceAll(`\${${value}}`, `${params[key]}`); + }, denoisePassShader); +} diff --git a/strahl-lib/src/texture-converter-pass-shader.wgsl b/strahl-lib/src/texture-converter-pass-shader.wgsl new file mode 100644 index 0000000..e8dc80a --- /dev/null +++ b/strahl-lib/src/texture-converter-pass-shader.wgsl @@ -0,0 +1,11 @@ +@group(0) @binding(0) var readTexture: texture_storage_2d; +// float32 rgba +@group(0) @binding(1) var targetBuffer: array; + +@compute +@workgroup_size(${maxWorkgroupDimension}, ${maxWorkgroupDimension}, 1) +fn computeMain(@builtin(global_invocation_id) globalId: vec3) { + let position = vec2(i32(globalId.x), i32(globalId.y)); + let previousColor = textureLoad(readTexture, position); + targetBuffer[position.x + position.y * ${imageWidth}] = previousColor; +}