-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpath_tracing.cpp
363 lines (326 loc) · 14.6 KB
/
path_tracing.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
#include <luisa/dsl/sugar.h>
#include <luisa/luisa-compute.h>
#include <luisa/backends/ext/dx_hdr_ext.hpp>
#define TINYOBJLOADER_IMPLEMENTATION
#include <tiny_obj_loader.h>
#include <stb/stb_image_write.h>
#include <cornell_box.h>
#include <string>
#include <iostream>
using namespace luisa;
using namespace luisa::compute;
using namespace std::literals::string_literals;
struct Onb {
float3 tangent;
float3 binormal;
float3 normal;
};
LUISA_STRUCT(Onb, tangent, binormal, normal) {
[[nodiscard]] Float3 to_world(Expr<float3> v) const noexcept {
return v.x * tangent + v.y * binormal + v.z * normal;
}
};
int main(int argc, char *argv[]) {
if (argc <= 1) {
LUISA_INFO(
"Usage: {} <backend> <spp>\nbackend: cuda, dx, cpu, metal\nspp: preferably an integer multiple of 64",
argv[0]
);
exit(1);
}
log_level_verbose();
Context context { argv[0] };
Device device = context.create_device(argv[1]);
int samples_per_pixel = std::stoi(argv[2]);
// load the cornell box scene
tinyobj::ObjReaderConfig obj_reader_config;
obj_reader_config.triangulate = true;
obj_reader_config.triangulation_method = "simple";
obj_reader_config.vertex_color = false;
obj_reader_config.mtl_search_path = ""s;
tinyobj::ObjReader obj_reader;
if (!obj_reader.ParseFromString(cornell_box_string, "", obj_reader_config)) {
luisa::string_view error_message = "unknown error.";
if (auto &&e = obj_reader.Error(); !e.empty()) { error_message = e; }
LUISA_ERROR_WITH_LOCATION("Failed to load OBJ file: {}", error_message);
}
if (auto &&e = obj_reader.Warning(); !e.empty()) {
LUISA_WARNING_WITH_LOCATION("{}", e);
}
auto &&p = obj_reader.GetAttrib().vertices;
luisa::vector<float3> vertices;
vertices.reserve(p.size() / 3u);
for (uint i = 0u; i < p.size(); i += 3u) {
vertices.emplace_back(
make_float3(
p[i + 0u], p[i + 1u], p[i + 2u]
)
);
}
LUISA_INFO(
"Loaded mesh with {} shape(s) and {} vertices.",
obj_reader.GetShapes().size(), vertices.size()
);
BindlessArray heap = device.create_bindless_array();
Stream stream = device.create_stream(StreamTag::GRAPHICS);
Buffer<float3> vertex_buffer = device.create_buffer<float3>(vertices.size());
stream << vertex_buffer.copy_from(vertices.data())
<< synchronize();
luisa::vector<Mesh> meshes;
luisa::vector<Buffer<Triangle>> triangle_buffers;
for (auto &&shape : obj_reader.GetShapes()) {
uint index = static_cast<uint>(meshes.size());
std::vector<tinyobj::index_t> const &t = shape.mesh.indices;
uint triangle_count = t.size() / 3u;
LUISA_INFO(
"Processing shape '{}' at index {} with {} triangle(s).",
shape.name, index, triangle_count
);
luisa::vector<uint> indices;
indices.reserve(t.size());
for (tinyobj::index_t i : t) { indices.emplace_back(i.vertex_index); }
Buffer<Triangle> &triangle_buffer = triangle_buffers.emplace_back(
device.create_buffer<Triangle>(triangle_count)
);
Mesh &mesh = meshes.emplace_back(device.create_mesh(vertex_buffer, triangle_buffer));
heap.emplace_on_update(index, triangle_buffer);
stream << triangle_buffer.copy_from(indices.data())
<< mesh.build()
<< synchronize();
}
Accel accel = device.create_accel({});
for (Mesh &m : meshes) {
accel.emplace_back(m, make_float4x4(1.0f));
}
stream << heap.update()
<< accel.build()
<< synchronize();
Constant materials{
make_float3(0.725f, 0.710f, 0.680f),// floor
make_float3(0.725f, 0.710f, 0.680f),// ceiling
make_float3(0.725f, 0.710f, 0.680f),// back wall
make_float3(0.140f, 0.450f, 0.091f),// right wall
make_float3(0.630f, 0.065f, 0.050f),// left wall
make_float3(0.725f, 0.710f, 0.680f),// short box
make_float3(0.725f, 0.710f, 0.680f),// tall box
make_float3(0.000f, 0.000f, 0.000f),// light
};
Callable linear_to_srgb = [&](Var<float3> x) noexcept {
return saturate(
select(
1.055f * pow(x, 1.0f / 2.4f) - 0.055f,
12.92f * x,
x <= 0.00031308f
)
);
};
Callable tea = [](UInt v0, UInt v1) noexcept {
set_name("tea");
UInt s0 = def(0u);
for (uint n = 0u; n < 4u; n++) {
s0 += 0x9e3779b9u;
v0 += ((v1 << 4) + 0xa341316cu) ^ (v1 + s0) ^ ((v1 >> 5u) + 0xc8013ea4u);
v1 += ((v0 << 4) + 0xad90777du) ^ (v0 + s0) ^ ((v0 >> 5u) + 0x7e95761eu);
}
return v0;
};
Kernel2D make_sampler_kernel = [&](ImageUInt seed_image) noexcept {
set_name("make_sampler_kernel");
UInt2 p = dispatch_id().xy();
UInt state = tea(p.x, p.y);
seed_image.write(p, make_uint4(state));
};
Callable lcg = [](UInt &state) noexcept {
set_name("lcg");
constexpr uint lcg_a = 1664525u;
constexpr uint lcg_c = 1013904223u;
state = lcg_a * state + lcg_c;
return cast<float>(state & 0x00ffffffu) *
(1.0f / static_cast<float>(0x01000000u));
};
Callable make_onb = [](const Float3 &normal) noexcept {
set_name("make_onb");
Float3 binormal = normalize(
ite(
abs(normal.x) > abs(normal.z),
make_float3(-normal.y, normal.x, 0.0f),
make_float3(0.0f, -normal.z, normal.y)
)
);
Float3 tangent = normalize(cross(binormal, normal));
return def<Onb>(tangent, binormal, normal);
};
Callable generate_ray = [](Float2 p) noexcept {
set_name("generate_ray");
static constexpr float fov = radians(27.8f);
static constexpr float3 origin = make_float3(-0.01f, 0.995f, 5.0f);
Float3 pixel = origin + make_float3(p * tan(0.5f * fov), -1.0f);
Float3 direction = normalize(pixel - origin);
return make_ray(origin, direction);
};
Callable cosine_sample_hemisphere = [](Float2 u) noexcept {
set_name("cosine_sample_hemisphere");
Float r = sqrt(u.x);
Float phi = 2.0f * constants::pi * u.y;
return make_float3(r * cos(phi), r * sin(phi), sqrt(1.0f - u.x));
};
Callable balanced_heuristic = [](Float pdf_a, Float pdf_b) noexcept {
set_name("balanced_heuristic");
return pdf_a / max(pdf_a + pdf_b, 1e-4f);
};
auto spp_per_dispatch =
device.backend_name() == "metal" || device.backend_name() == "cpu" || device.backend_name() == "fallback"
? 1u
: 64u;
// auto spp_per_dispatch = 1u;
Kernel2D raytracing_kernel = [&](
ImageFloat image,
ImageUInt seed_image,
AccelVar accel,
UInt2 resolution
) noexcept {
set_name("raytracing_kernel");
set_block_size(16u, 16u, 1u);
UInt2 coord = dispatch_id().xy();
Float frame_size = min(resolution.x, resolution.y).cast<float>();
UInt state = seed_image.read(coord).x;
Float rx = lcg(state);
Float ry = lcg(state);
Float2 pixel = (make_float2(coord) + make_float2(rx, ry)) / frame_size * 2.0f - 1.0f;
Float3 radiance = def(make_float3(0.0f));
$for (i, spp_per_dispatch) {
Var<Ray> ray = generate_ray(pixel * make_float2(1.0f, -1.0f));
Float3 beta = def(make_float3(1.0f));
Float pdf_bsdf = def(0.0f);
constexpr float3 light_position = make_float3(-0.24f, 1.98f, 0.16f);
constexpr float3 light_u = make_float3(-0.24f, 1.98f, -0.22f) - light_position;
constexpr float3 light_v = make_float3(0.23f, 1.98f, 0.16f) - light_position;
constexpr float3 light_emission = make_float3(17.0f, 12.0f, 4.0f);
Float light_area = length(cross(light_u, light_v));
Float3 light_normal = normalize(cross(light_u, light_v));
$for (depth, 10u) {
// trace
Var<TriangleHit> hit = accel.intersect(ray, {});
reorder_shader_execution();
$if (hit->miss()) { $break; };
Var<Triangle> triangle = heap->buffer<Triangle>(hit.inst).read(hit.prim);
Float3 p0 = vertex_buffer->read(triangle.i0);
Float3 p1 = vertex_buffer->read(triangle.i1);
Float3 p2 = vertex_buffer->read(triangle.i2);
Float3 p = triangle_interpolate(hit.bary, p0, p1, p2);
Float3 n = normalize(cross(p1 - p0, p2 - p0));
Float cos_wo = dot(-ray->direction(), n);
$if (cos_wo < 1e-4f) { $break; };
// hit light
$if (hit.inst == static_cast<uint>(meshes.size() - 1u)) {
$if (depth == 0u) {
radiance += light_emission;
} $else {
Float pdf_light = length_squared(p - ray->origin()) / (light_area * cos_wo);
Float mis_weight = balanced_heuristic(pdf_bsdf, pdf_light);
radiance += mis_weight * beta * light_emission;
};
$break;
};
// sample light
Float ux_light = lcg(state);
Float uy_light = lcg(state);
Float3 p_light = light_position + ux_light * light_u + uy_light * light_v;
Float3 pp = offset_ray_origin(p, n);
Float3 pp_light = offset_ray_origin(p_light, light_normal);
Float d_light = distance(pp, pp_light);
Float3 wi_light = normalize(pp_light - pp);
Var<Ray> shadow_ray = make_ray(
offset_ray_origin(pp, n),
wi_light,
0.0f,
d_light
);
Bool occluded = accel.intersect_any(shadow_ray, {});
Float cos_wi_light = dot(wi_light, n);
Float cos_light = -dot(light_normal, wi_light);
Float3 albedo = materials.read(hit.inst);
$if (!occluded & cos_wi_light > 1e-4f & cos_light > 1e-4f) {
Float pdf_light = (d_light * d_light) / (light_area * cos_light);
Float pdf_bsdf = cos_wi_light * inv_pi;
Float mis_weight = balanced_heuristic(pdf_light, pdf_bsdf);
Float3 bsdf = albedo * inv_pi * cos_wi_light;
radiance += beta * bsdf * mis_weight * light_emission / max(pdf_light, 1e-4f);
};
// sample BSDF
Var<Onb> onb = make_onb(n);
Float ux = lcg(state);
Float uy = lcg(state);
Float3 wi_local = cosine_sample_hemisphere(make_float2(ux, uy));
Float cos_wi = abs(wi_local.z);
Float3 new_direction = onb->to_world(wi_local);
ray = make_ray(pp, new_direction);
pdf_bsdf = cos_wi * inv_pi;
beta *= albedo;// * cos_wi * inv_pi / pdf_bsdf => * 1.f
// rr
Float l = dot(make_float3(0.212671f, 0.715160f, 0.072169f), beta);
$if (l == 0.0f) { $break; };
Float q = max(l, 0.05f);
Float r = lcg(state);
$if (r >= q) { $break; };
beta *= 1.0f / q;
};
};
radiance /= static_cast<float>(spp_per_dispatch);
seed_image.write(coord, make_uint4(state));
$if (any(dsl::isnan(radiance))) { radiance = make_float3(0.0f); };
image.write(dispatch_id().xy(), make_float4(clamp(radiance, 0.0f, 30.0f), 1.0f));
};
Kernel2D accumulate_kernel = [&](ImageFloat accum_image, ImageFloat curr_image) noexcept {
set_name("accumulate_kernel");
UInt2 p = dispatch_id().xy();
Float4 accum = accum_image.read(p);
Float3 curr = curr_image.read(p).xyz();
accum_image.write(p, accum + make_float4(curr, 1.f));
};
Kernel2D clear_kernel = [](ImageFloat image) noexcept {
set_name("clear_kernel");
image.write(dispatch_id().xy(), make_float4(0.f));
};
Kernel2D hdr2ldr_kernel = [&](ImageFloat hdr_image, ImageFloat ldr_image, Float scale) noexcept {
set_name("hdr2ldr_kernel");
UInt2 coord = dispatch_id().xy();
Float4 hdr = hdr_image.read(coord);
Float3 ldr = linear_to_srgb(clamp(hdr.xyz() / hdr.w * scale, 0.f, 1.f));
ldr_image.write(coord, make_float4(ldr, 1.f));
};
ShaderOption o{.enable_debug_info = false};
auto clear_shader = device.compile(clear_kernel, o);
auto hdr2ldr_shader = device.compile(hdr2ldr_kernel, o);
auto accumulate_shader = device.compile(accumulate_kernel, o);
auto raytracing_shader = device.compile(raytracing_kernel, ShaderOption{.name = "path_tracing"});
auto make_sampler_shader = device.compile(make_sampler_kernel, o);
static constexpr uint2 resolution = make_uint2(1024u);
Image<float> framebuffer = device.create_image<float>(PixelStorage::HALF4, resolution);
Image<float> accum_image = device.create_image<float>(PixelStorage::FLOAT4, resolution);
luisa::vector<std::array<std::uint8_t, 4u>> host_image(resolution.x * resolution.y);
// luisa::vector<std::uint8_t> host_image(resolution.x * resolution.y * 4u); // is also ok
Image<uint> seed_image = device.create_image<uint>(PixelStorage::INT1, resolution);
stream << clear_shader(accum_image).dispatch(resolution)
<< make_sampler_shader(seed_image).dispatch(resolution)
<< synchronize();
Clock clk;
Image<float> ldr_image = device.create_image<float>(PixelStorage::BYTE4, resolution);
for (std::size_t sample_index = 0; sample_index < samples_per_pixel / spp_per_dispatch; ++sample_index) {
stream << raytracing_shader(framebuffer, seed_image, accel, resolution).dispatch(resolution)
<< accumulate_shader(accum_image, framebuffer).dispatch(resolution)
<< hdr2ldr_shader(accum_image, ldr_image, 2.0f).dispatch(resolution)
<< [sample_index, spp_per_dispatch, samples_per_pixel, &clk] () {
LUISA_INFO(
"Samples: {} / {} ({:.1f}s)",
(sample_index + 1u) * spp_per_dispatch,
samples_per_pixel,
clk.toc() * 1e-3
);
};
}
stream << ldr_image.copy_to(host_image.data())
<< synchronize();
stbi_write_png("test_path_tracing.png", resolution.x, resolution.y, 4, host_image.data(), 0);
return 0;
}