Skip to content

Commit 1f3692f

Browse files
bors[bot]goddessfreyafkaamsiglreith
committed
2071: Remaping descriptor sets in the gl backend r=kvark a=ZeGentzy I'll rebase off master when I'm done. Uniforms in gl only have a bindings field, not a set one. This means that for shaders that use multiple sets to work, we must change where we are binding them. See page 14 for what I mean: https://www.khronos.org/assets/uploads/developers/library/2016-vulkan-devday-uk/4-Using-spir-v-with-spirv-cross.pdf PR checklist: - [ ] `make` succeeds (on *nix) - [ ] `make reftests` succeeds - [ ] tested examples with the following backends: 2149: [dx11] add memory flush/invalidate & image/buffer copies r=kvark a=fkaa Main changes are adding more robust implementation of `Memory`, adding flush/invalidate and adding image/buffer copies. Implements `Memory` like the following (for `HOST_VISIBLE` memory): ``` 0.........................size +----------------------------+ | Memory | +----------------------------+ A..B C.....D E...F 1 fixed-size `STAGING` buffer which gets used for reading back from resources.(and should be used to copy from/to on flush/invalidate): (0..size, ComPtr<Buffer>) 1 `Vec<u8>` which covers the whole memory range (0..size). This is pointer we hand out to users. flush/invalidate moves the affected regions into our `STAGING` buffer to get read/uploaded. *N* Resources: (A..B, ComPtr<Resource>), (C..D, ComPtr<Resource>), (E..F, ComPtr<Resource>), ``` Implements copying between images and buffers. Image<->Image copies are mostly handled by `CopySubresourceRegion` but some formats, while same size, cant be copied with this method: > Cannot invoke CopySubresourceRegion when the Formats of each Resource are not the same or at least castable to each other, unless one format is compressed (DXGI_FORMAT_R9G9B9E5_SHAREDEXP, or DXGI_FORMAT_BC[1,2,3,4,5]_* ) and the source format is similar to the dest according to: BC[1|4] ~= R16G16B16A16|R32G32, BC[2|3|5] ~= R32G32B32A32, R9G9B9E5_SHAREDEXP ~= R32. [ RESOURCE_MANIPULATION ERROR #281: ] These has to be done through compute shaders instead. Image->Buffer & Buffer->Image copies also have to be done through compute shaders, as `CopySubresourceRegion` can only copy between resources of same type (Image<->Image, Buffer<->Buffer). The following formats have Buffer->Image and Image->Buffer copies implemented with these changes: * `R8` * `Rg8` * `R16` * `Rg16` * `R32` Gets about 400 tests passed and equal amount failed in `dEQP-VK.api.copy_and_blit.core.image_to_image.all_formats.color` (mostly because `CopySubresourceRegion` failing to copy between some formats as mentioned above) Fixes #issue PR checklist: - [ ] `make` succeeds (on *nix) - [ ] `make reftests` succeeds - [ ] tested examples with the following backends: 2154: hal: Improve buffer documentation and cleanup error handling r=kvark a=msiglreith Fixes #issue PR checklist: - [ ] `make` succeeds (on *nix) - [ ] `make reftests` succeeds - [ ] tested examples with the following backends: Co-authored-by: Hal Gentz <[email protected]> Co-authored-by: Felix Kaaman <[email protected]> Co-authored-by: msiglreith <[email protected]>
4 parents b56bac2 + d7e0676 + d2a313f + 8c8281c commit 1f3692f

File tree

18 files changed

+1331
-373
lines changed

18 files changed

+1331
-373
lines changed

src/backend/dx11/shaders/copy.hlsl

+193-8
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,200 @@
1-
cbuffer BufferImageCopy : register(b0) {
2-
uint2 BufferSize;
3-
uint2 ImageOffset;
1+
struct BufferCopy {
2+
uint4 SrcDst;
43
};
54

6-
StructuredBuffer<uint> CopySrc : register(t0);
7-
RWTexture2D<uint> CopyDst : register(u0);
5+
struct ImageCopy {
6+
uint4 Src;
7+
uint4 Dst;
8+
};
9+
10+
struct BufferImageCopy {
11+
// x=offset, yz=size
12+
uint4 BufferVars;
13+
uint4 ImageOffset;
14+
uint4 ImageExtent;
15+
};
16+
17+
cbuffer CopyConstants : register(b0) {
18+
BufferCopy BufferCopies;
19+
ImageCopy ImageCopies;
20+
BufferImageCopy BufferImageCopies;
21+
};
22+
23+
uint2 GetImageDst(uint3 dispatch_thread_id)
24+
{
25+
return BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy;
26+
}
27+
28+
uint2 GetImageSrc(uint3 dispatch_thread_id)
29+
{
30+
return BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy;
31+
}
32+
33+
uint GetBufferDst(uint3 dispatch_thread_id)
34+
{
35+
return BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y;
36+
}
37+
38+
uint GetBufferSrc(uint3 dispatch_thread_id)
39+
{
40+
return BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y;
41+
}
42+
43+
uint Uint4ToUint(uint4 data)
44+
{
45+
data.x = min(data.x, 0x000000ff);
46+
data.y = min(data.y, 0x000000ff);
47+
data.z = min(data.z, 0x000000ff);
48+
data.w = min(data.w, 0x000000ff);
49+
50+
uint output = (data.x |
51+
(data.y << 8) |
52+
(data.z << 16) |
53+
(data.w << 24));
54+
55+
return output;
56+
}
57+
58+
uint4 UintToUint4(uint data)
59+
{
60+
return uint4((data & 0xff000000) >> 24, (data & 0xff0000) >> 16, (data & 0xff00) >> 8, data & 0xff);
61+
}
62+
63+
uint2 UintToUint2(uint data)
64+
{
65+
return uint2((data >> 16) & 0x0000ffff, data & 0x0000ffff);
66+
}
67+
68+
uint Uint2ToUint(uint2 data)
69+
{
70+
data.x = min(data.x, 0x0000ffff);
71+
data.y = min(data.y, 0x0000ffff);
72+
73+
uint output = (data.x |
74+
(data.y << 16));
75+
76+
return output;
77+
}
78+
79+
// Buffers are always R32-aligned
80+
StructuredBuffer<uint> BufferCopySrc : register(t0);
81+
RWBuffer<uint> BufferCopyDst: register(u0);
82+
83+
// R32
84+
Texture2D<uint> ImageCopySrcR32 : register(t0);
85+
RWTexture2D<uint> ImageCopyDstR32 : register(u0);
86+
87+
// TODO: correct, but slow
88+
[numthreads(1, 1, 1)]
89+
void cs_copy_buffer_image2d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID) {
90+
uint2 dst_idx = GetImageDst(dispatch_thread_id);
91+
uint src_idx = GetBufferSrc(dispatch_thread_id);
92+
93+
ImageCopyDstR32[dst_idx] = BufferCopySrc[src_idx];
94+
}
95+
96+
[numthreads(1, 1, 1)]
97+
void cs_copy_image2d_r32_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
98+
uint dst_idx = GetBufferDst(dispatch_thread_id);
99+
uint2 src_idx = GetImageSrc(dispatch_thread_id);
100+
101+
BufferCopyDst[dst_idx] = ImageCopySrcR32[src_idx];
102+
}
103+
104+
// R16G16
105+
Texture2D<uint2> ImageCopySrcR16G16 : register(t0);
106+
RWTexture2D<uint2> ImageCopyDstR16G16 : register(u0);
8107

9108
// TODO: correct, but slow
10109
[numthreads(1, 1, 1)]
11-
void cs_copy_buffer_image_2d(uint3 dispatch_thread_id : SV_DispatchThreadID) {
12-
uint2 idx = ImageOffset + dispatch_thread_id.xy;
110+
void cs_copy_buffer_image2d_r16g16(uint3 dispatch_thread_id : SV_DispatchThreadID) {
111+
uint2 dst_idx = GetImageDst(dispatch_thread_id);
112+
uint src_idx = GetBufferSrc(dispatch_thread_id);
113+
114+
ImageCopyDstR16G16[dst_idx] = UintToUint2(BufferCopySrc[src_idx]);
115+
}
116+
117+
[numthreads(1, 1, 1)]
118+
void cs_copy_image2d_r16g16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
119+
uint dst_idx = GetBufferDst(dispatch_thread_id);
120+
uint2 src_idx = GetImageSrc(dispatch_thread_id);
121+
122+
BufferCopyDst[dst_idx] = Uint2ToUint(ImageCopySrcR16G16[src_idx].yx);
123+
}
124+
125+
// R16
126+
Texture2D<uint> ImageCopySrcR16 : register(t0);
127+
RWTexture2D<uint> ImageCopyDstR16 : register(u0);
128+
129+
[numthreads(1, 1, 1)]
130+
void cs_copy_buffer_image2d_r16(uint3 dispatch_thread_id : SV_DispatchThreadID) {
131+
uint src_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2;
132+
133+
uint2 data = UintToUint2(BufferCopySrc[src_idx]);
134+
135+
ImageCopyDstR16[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))] = data.y;
136+
ImageCopyDstR16[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))] = data.x;
137+
}
138+
139+
[numthreads(1, 1, 1)]
140+
void cs_copy_image2d_r16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
141+
uint dst_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2;
142+
143+
uint upper = ImageCopySrcR16[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))];
144+
uint lower = ImageCopySrcR16[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))];
145+
uint data = Uint2ToUint(uint2(upper, lower));
146+
147+
BufferCopyDst[dst_idx] = data;
148+
}
149+
150+
// R8G8
151+
Texture2D<uint2> ImageCopySrcR8G8 : register(t0);
152+
RWTexture2D<uint2> ImageCopyDstR8G8 : register(u0);
153+
154+
[numthreads(1, 1, 1)]
155+
void cs_copy_buffer_image2d_r8g8(uint3 dispatch_thread_id : SV_DispatchThreadID) {
156+
uint src_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2;
157+
158+
uint4 data = UintToUint4(BufferCopySrc[src_idx]);
159+
160+
ImageCopyDstR8G8[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))] = data.xy;
161+
ImageCopyDstR8G8[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))] = data.zw;
162+
}
163+
164+
[numthreads(1, 1, 1)]
165+
void cs_copy_image2d_r8g8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
166+
uint dst_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2;
167+
168+
uint2 lower = ImageCopySrcR8G8[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))].yx;
169+
uint2 upper = ImageCopySrcR8G8[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))].yx;
170+
uint data = Uint4ToUint(uint4(upper.x, upper.y, lower.x, lower.y));
171+
172+
BufferCopyDst[dst_idx] = data;
173+
}
174+
175+
// R8
176+
Texture2D<uint> ImageCopySrcR8 : register(t0);
177+
RWTexture2D<uint> ImageCopyDstR8 : register(u0);
178+
179+
[numthreads(1, 1, 1)]
180+
void cs_copy_buffer_image2d_r8(uint3 dispatch_thread_id : SV_DispatchThreadID) {
181+
uint src_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 4;
182+
uint4 data = UintToUint4(BufferCopySrc[src_idx]);
183+
184+
ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))] = data.w;
185+
ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))] = data.z;
186+
ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(2, 0, 0))] = data.y;
187+
ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(3, 0, 0))] = data.x;
188+
}
189+
190+
[numthreads(1, 1, 1)]
191+
void cs_copy_image2d_r8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) {
192+
uint dst_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 4;
193+
194+
uint src_1 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))];
195+
uint src_2 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))];
196+
uint src_3 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(2, 0, 0))];
197+
uint src_4 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(3, 0, 0))];
13198

14-
CopyDst[idx] = CopySrc[BufferSize.x + idx.x + idx.y * BufferSize.y];
199+
BufferCopyDst[dst_idx] = Uint4ToUint(uint4(src_1, src_2, src_3, src_4));
15200
}

src/backend/dx11/src/conv.rs

+34-12
Original file line numberDiff line numberDiff line change
@@ -24,62 +24,84 @@ pub fn map_index_type(ty: IndexType) -> DXGI_FORMAT {
2424
}
2525
}
2626

27-
pub fn typeless_format(format: DXGI_FORMAT) -> Option<DXGI_FORMAT> {
27+
pub fn typeless_format(format: DXGI_FORMAT) -> Option<(DXGI_FORMAT, DXGI_FORMAT)> {
2828
match format {
2929
DXGI_FORMAT_R8G8B8A8_UNORM |
3030
DXGI_FORMAT_R8G8B8A8_SNORM |
3131
DXGI_FORMAT_R8G8B8A8_UINT |
3232
DXGI_FORMAT_R8G8B8A8_SINT |
33-
DXGI_FORMAT_R8G8B8A8_UNORM_SRGB => Some(DXGI_FORMAT_R8G8B8A8_TYPELESS),
33+
DXGI_FORMAT_R8G8B8A8_UNORM_SRGB => Some((DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UINT)),
3434

3535
// ?`
3636
DXGI_FORMAT_B8G8R8A8_UNORM |
37-
DXGI_FORMAT_B8G8R8A8_UNORM_SRGB => Some(DXGI_FORMAT_B8G8R8A8_TYPELESS),
37+
DXGI_FORMAT_B8G8R8A8_UNORM_SRGB => Some((DXGI_FORMAT_B8G8R8A8_TYPELESS, DXGI_FORMAT_R32_UINT)),
3838

3939
DXGI_FORMAT_R8_UNORM |
4040
DXGI_FORMAT_R8_SNORM |
4141
DXGI_FORMAT_R8_UINT |
42-
DXGI_FORMAT_R8_SINT => Some(DXGI_FORMAT_R8_TYPELESS),
42+
DXGI_FORMAT_R8_SINT => Some((DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UINT)),
4343

4444
DXGI_FORMAT_R8G8_UNORM |
4545
DXGI_FORMAT_R8G8_SNORM |
4646
DXGI_FORMAT_R8G8_UINT |
47-
DXGI_FORMAT_R8G8_SINT => Some(DXGI_FORMAT_R8G8_TYPELESS),
47+
DXGI_FORMAT_R8G8_SINT => Some((DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UINT)),
4848

4949
DXGI_FORMAT_R16_UNORM |
5050
DXGI_FORMAT_R16_SNORM |
5151
DXGI_FORMAT_R16_UINT |
5252
DXGI_FORMAT_R16_SINT |
53-
DXGI_FORMAT_R16_FLOAT => Some(DXGI_FORMAT_R16_TYPELESS),
53+
DXGI_FORMAT_R16_FLOAT => Some((DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_UINT)),
5454

5555
DXGI_FORMAT_R16G16_UNORM |
5656
DXGI_FORMAT_R16G16_SNORM |
5757
DXGI_FORMAT_R16G16_UINT |
5858
DXGI_FORMAT_R16G16_SINT |
59-
DXGI_FORMAT_R16G16_FLOAT => Some(DXGI_FORMAT_R16G16_TYPELESS),
59+
DXGI_FORMAT_R16G16_FLOAT => Some((DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_UINT)),
6060

6161
DXGI_FORMAT_R16G16B16A16_UNORM |
6262
DXGI_FORMAT_R16G16B16A16_SNORM |
6363
DXGI_FORMAT_R16G16B16A16_UINT |
6464
DXGI_FORMAT_R16G16B16A16_SINT |
65-
DXGI_FORMAT_R16G16B16A16_FLOAT => Some(DXGI_FORMAT_R16G16B16A16_TYPELESS),
65+
DXGI_FORMAT_R16G16B16A16_FLOAT => Some((DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UINT)),
6666

6767
DXGI_FORMAT_D32_FLOAT |
6868
DXGI_FORMAT_R32_UINT |
6969
DXGI_FORMAT_R32_SINT |
70-
DXGI_FORMAT_R32_FLOAT => Some(DXGI_FORMAT_R32_TYPELESS),
70+
DXGI_FORMAT_R32_FLOAT => Some((DXGI_FORMAT_R32_TYPELESS, DXGI_FORMAT_R32_UINT)),
7171

7272
DXGI_FORMAT_R32G32_UINT |
7373
DXGI_FORMAT_R32G32_SINT |
74-
DXGI_FORMAT_R32G32_FLOAT => Some(DXGI_FORMAT_R32G32_TYPELESS),
74+
DXGI_FORMAT_R32G32_FLOAT => Some((DXGI_FORMAT_R32G32_TYPELESS, DXGI_FORMAT_R32G32_UINT)),
7575

7676
DXGI_FORMAT_R32G32B32_UINT |
7777
DXGI_FORMAT_R32G32B32_SINT |
78-
DXGI_FORMAT_R32G32B32_FLOAT => Some(DXGI_FORMAT_R32G32B32_TYPELESS),
78+
DXGI_FORMAT_R32G32B32_FLOAT => Some((DXGI_FORMAT_R32G32B32_TYPELESS, DXGI_FORMAT_R32G32B32_UINT)),
7979

8080
DXGI_FORMAT_R32G32B32A32_UINT |
8181
DXGI_FORMAT_R32G32B32A32_SINT |
82-
DXGI_FORMAT_R32G32B32A32_FLOAT => Some(DXGI_FORMAT_R32G32B32A32_TYPELESS),
82+
DXGI_FORMAT_R32G32B32A32_FLOAT => Some((DXGI_FORMAT_R32G32B32A32_TYPELESS, DXGI_FORMAT_R32G32B32A32_UINT)),
83+
84+
DXGI_FORMAT_BC1_UNORM |
85+
DXGI_FORMAT_BC1_UNORM_SRGB => Some((DXGI_FORMAT_BC1_TYPELESS, DXGI_FORMAT_R32_UINT)),
86+
87+
DXGI_FORMAT_BC2_UNORM |
88+
DXGI_FORMAT_BC2_UNORM_SRGB => Some((DXGI_FORMAT_BC2_TYPELESS, DXGI_FORMAT_R32_UINT)),
89+
90+
DXGI_FORMAT_BC3_UNORM |
91+
DXGI_FORMAT_BC3_UNORM_SRGB => Some((DXGI_FORMAT_BC3_TYPELESS, DXGI_FORMAT_R32_UINT)),
92+
93+
DXGI_FORMAT_BC4_UNORM |
94+
DXGI_FORMAT_BC4_SNORM => Some((DXGI_FORMAT_BC4_TYPELESS, DXGI_FORMAT_R32_UINT)),
95+
96+
DXGI_FORMAT_BC5_UNORM |
97+
DXGI_FORMAT_BC5_SNORM => Some((DXGI_FORMAT_BC5_TYPELESS, DXGI_FORMAT_R32_UINT)),
98+
99+
DXGI_FORMAT_BC6H_UF16 |
100+
DXGI_FORMAT_BC6H_SF16 => Some((DXGI_FORMAT_BC6H_TYPELESS, DXGI_FORMAT_R32_UINT)),
101+
102+
// TODO: srgb craziness
103+
DXGI_FORMAT_BC7_UNORM |
104+
DXGI_FORMAT_BC7_UNORM_SRGB => Some((DXGI_FORMAT_BC7_TYPELESS, DXGI_FORMAT_BC7_UNORM)),
83105

84106
/*R5g6b5Unorm => DXGI_FORMAT_B5G6R5_UNORM,
85107
R5g5b5a1Unorm => DXGI_FORMAT_B5G5R5A1_UNORM,

0 commit comments

Comments
 (0)