|
1 |
| -cbuffer BufferImageCopy : register(b0) { |
2 |
| - uint2 BufferSize; |
3 |
| - uint2 ImageOffset; |
| 1 | +struct BufferCopy { |
| 2 | + uint4 SrcDst; |
4 | 3 | };
|
5 | 4 |
|
6 |
| -StructuredBuffer<uint> CopySrc : register(t0); |
7 |
| -RWTexture2D<uint> CopyDst : register(u0); |
| 5 | +struct ImageCopy { |
| 6 | + uint4 Src; |
| 7 | + uint4 Dst; |
| 8 | +}; |
| 9 | + |
| 10 | +struct BufferImageCopy { |
| 11 | + // x=offset, yz=size |
| 12 | + uint4 BufferVars; |
| 13 | + uint4 ImageOffset; |
| 14 | + uint4 ImageExtent; |
| 15 | +}; |
| 16 | + |
| 17 | +cbuffer CopyConstants : register(b0) { |
| 18 | + BufferCopy BufferCopies; |
| 19 | + ImageCopy ImageCopies; |
| 20 | + BufferImageCopy BufferImageCopies; |
| 21 | +}; |
| 22 | + |
| 23 | +uint2 GetImageDst(uint3 dispatch_thread_id) |
| 24 | +{ |
| 25 | + return BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy; |
| 26 | +} |
| 27 | + |
| 28 | +uint2 GetImageSrc(uint3 dispatch_thread_id) |
| 29 | +{ |
| 30 | + return BufferImageCopies.ImageOffset.xy + dispatch_thread_id.xy; |
| 31 | +} |
| 32 | + |
| 33 | +uint GetBufferDst(uint3 dispatch_thread_id) |
| 34 | +{ |
| 35 | + return BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y; |
| 36 | +} |
| 37 | + |
| 38 | +uint GetBufferSrc(uint3 dispatch_thread_id) |
| 39 | +{ |
| 40 | + return BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y; |
| 41 | +} |
| 42 | + |
| 43 | +uint Uint4ToUint(uint4 data) |
| 44 | +{ |
| 45 | + data.x = min(data.x, 0x000000ff); |
| 46 | + data.y = min(data.y, 0x000000ff); |
| 47 | + data.z = min(data.z, 0x000000ff); |
| 48 | + data.w = min(data.w, 0x000000ff); |
| 49 | + |
| 50 | + uint output = (data.x | |
| 51 | + (data.y << 8) | |
| 52 | + (data.z << 16) | |
| 53 | + (data.w << 24)); |
| 54 | + |
| 55 | + return output; |
| 56 | +} |
| 57 | + |
| 58 | +uint4 UintToUint4(uint data) |
| 59 | +{ |
| 60 | + return uint4((data & 0xff000000) >> 24, (data & 0xff0000) >> 16, (data & 0xff00) >> 8, data & 0xff); |
| 61 | +} |
| 62 | + |
| 63 | +uint2 UintToUint2(uint data) |
| 64 | +{ |
| 65 | + return uint2((data >> 16) & 0x0000ffff, data & 0x0000ffff); |
| 66 | +} |
| 67 | + |
| 68 | +uint Uint2ToUint(uint2 data) |
| 69 | +{ |
| 70 | + data.x = min(data.x, 0x0000ffff); |
| 71 | + data.y = min(data.y, 0x0000ffff); |
| 72 | + |
| 73 | + uint output = (data.x | |
| 74 | + (data.y << 16)); |
| 75 | + |
| 76 | + return output; |
| 77 | +} |
| 78 | + |
| 79 | +// Buffers are always R32-aligned |
| 80 | +StructuredBuffer<uint> BufferCopySrc : register(t0); |
| 81 | +RWBuffer<uint> BufferCopyDst: register(u0); |
| 82 | + |
| 83 | +// R32 |
| 84 | +Texture2D<uint> ImageCopySrcR32 : register(t0); |
| 85 | +RWTexture2D<uint> ImageCopyDstR32 : register(u0); |
| 86 | + |
| 87 | +// TODO: correct, but slow |
| 88 | +[numthreads(1, 1, 1)] |
| 89 | +void cs_copy_buffer_image2d_r32(uint3 dispatch_thread_id : SV_DispatchThreadID) { |
| 90 | + uint2 dst_idx = GetImageDst(dispatch_thread_id); |
| 91 | + uint src_idx = GetBufferSrc(dispatch_thread_id); |
| 92 | + |
| 93 | + ImageCopyDstR32[dst_idx] = BufferCopySrc[src_idx]; |
| 94 | +} |
| 95 | + |
| 96 | +[numthreads(1, 1, 1)] |
| 97 | +void cs_copy_image2d_r32_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { |
| 98 | + uint dst_idx = GetBufferDst(dispatch_thread_id); |
| 99 | + uint2 src_idx = GetImageSrc(dispatch_thread_id); |
| 100 | + |
| 101 | + BufferCopyDst[dst_idx] = ImageCopySrcR32[src_idx]; |
| 102 | +} |
| 103 | + |
| 104 | +// R16G16 |
| 105 | +Texture2D<uint2> ImageCopySrcR16G16 : register(t0); |
| 106 | +RWTexture2D<uint2> ImageCopyDstR16G16 : register(u0); |
8 | 107 |
|
9 | 108 | // TODO: correct, but slow
|
10 | 109 | [numthreads(1, 1, 1)]
|
11 |
| -void cs_copy_buffer_image_2d(uint3 dispatch_thread_id : SV_DispatchThreadID) { |
12 |
| - uint2 idx = ImageOffset + dispatch_thread_id.xy; |
| 110 | +void cs_copy_buffer_image2d_r16g16(uint3 dispatch_thread_id : SV_DispatchThreadID) { |
| 111 | + uint2 dst_idx = GetImageDst(dispatch_thread_id); |
| 112 | + uint src_idx = GetBufferSrc(dispatch_thread_id); |
| 113 | + |
| 114 | + ImageCopyDstR16G16[dst_idx] = UintToUint2(BufferCopySrc[src_idx]); |
| 115 | +} |
| 116 | + |
| 117 | +[numthreads(1, 1, 1)] |
| 118 | +void cs_copy_image2d_r16g16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { |
| 119 | + uint dst_idx = GetBufferDst(dispatch_thread_id); |
| 120 | + uint2 src_idx = GetImageSrc(dispatch_thread_id); |
| 121 | + |
| 122 | + BufferCopyDst[dst_idx] = Uint2ToUint(ImageCopySrcR16G16[src_idx].yx); |
| 123 | +} |
| 124 | + |
| 125 | +// R16 |
| 126 | +Texture2D<uint> ImageCopySrcR16 : register(t0); |
| 127 | +RWTexture2D<uint> ImageCopyDstR16 : register(u0); |
| 128 | + |
| 129 | +[numthreads(1, 1, 1)] |
| 130 | +void cs_copy_buffer_image2d_r16(uint3 dispatch_thread_id : SV_DispatchThreadID) { |
| 131 | + uint src_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2; |
| 132 | + |
| 133 | + uint2 data = UintToUint2(BufferCopySrc[src_idx]); |
| 134 | + |
| 135 | + ImageCopyDstR16[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))] = data.y; |
| 136 | + ImageCopyDstR16[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))] = data.x; |
| 137 | +} |
| 138 | + |
| 139 | +[numthreads(1, 1, 1)] |
| 140 | +void cs_copy_image2d_r16_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { |
| 141 | + uint dst_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2; |
| 142 | + |
| 143 | + uint upper = ImageCopySrcR16[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))]; |
| 144 | + uint lower = ImageCopySrcR16[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))]; |
| 145 | + uint data = Uint2ToUint(uint2(upper, lower)); |
| 146 | + |
| 147 | + BufferCopyDst[dst_idx] = data; |
| 148 | +} |
| 149 | + |
| 150 | +// R8G8 |
| 151 | +Texture2D<uint2> ImageCopySrcR8G8 : register(t0); |
| 152 | +RWTexture2D<uint2> ImageCopyDstR8G8 : register(u0); |
| 153 | + |
| 154 | +[numthreads(1, 1, 1)] |
| 155 | +void cs_copy_buffer_image2d_r8g8(uint3 dispatch_thread_id : SV_DispatchThreadID) { |
| 156 | + uint src_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2; |
| 157 | + |
| 158 | + uint4 data = UintToUint4(BufferCopySrc[src_idx]); |
| 159 | + |
| 160 | + ImageCopyDstR8G8[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))] = data.xy; |
| 161 | + ImageCopyDstR8G8[GetImageDst(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))] = data.zw; |
| 162 | +} |
| 163 | + |
| 164 | +[numthreads(1, 1, 1)] |
| 165 | +void cs_copy_image2d_r8g8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { |
| 166 | + uint dst_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 2; |
| 167 | + |
| 168 | + uint2 lower = ImageCopySrcR8G8[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))].yx; |
| 169 | + uint2 upper = ImageCopySrcR8G8[GetImageSrc(uint3(2, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))].yx; |
| 170 | + uint data = Uint4ToUint(uint4(upper.x, upper.y, lower.x, lower.y)); |
| 171 | + |
| 172 | + BufferCopyDst[dst_idx] = data; |
| 173 | +} |
| 174 | + |
| 175 | +// R8 |
| 176 | +Texture2D<uint> ImageCopySrcR8 : register(t0); |
| 177 | +RWTexture2D<uint> ImageCopyDstR8 : register(u0); |
| 178 | + |
| 179 | +[numthreads(1, 1, 1)] |
| 180 | +void cs_copy_buffer_image2d_r8(uint3 dispatch_thread_id : SV_DispatchThreadID) { |
| 181 | + uint src_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 4; |
| 182 | + uint4 data = UintToUint4(BufferCopySrc[src_idx]); |
| 183 | + |
| 184 | + ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))] = data.w; |
| 185 | + ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))] = data.z; |
| 186 | + ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(2, 0, 0))] = data.y; |
| 187 | + ImageCopyDstR8[GetImageDst(uint3(4, 1, 0) * dispatch_thread_id + uint3(3, 0, 0))] = data.x; |
| 188 | +} |
| 189 | + |
| 190 | +[numthreads(1, 1, 1)] |
| 191 | +void cs_copy_image2d_r8_buffer(uint3 dispatch_thread_id : SV_DispatchThreadID) { |
| 192 | + uint dst_idx = BufferImageCopies.BufferVars.x + dispatch_thread_id.x + dispatch_thread_id.y * BufferImageCopies.BufferVars.y / 4; |
| 193 | + |
| 194 | + uint src_1 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(0, 0, 0))]; |
| 195 | + uint src_2 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(1, 0, 0))]; |
| 196 | + uint src_3 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(2, 0, 0))]; |
| 197 | + uint src_4 = ImageCopySrcR8[GetImageSrc(uint3(4, 1, 0) * dispatch_thread_id + uint3(3, 0, 0))]; |
13 | 198 |
|
14 |
| - CopyDst[idx] = CopySrc[BufferSize.x + idx.x + idx.y * BufferSize.y]; |
| 199 | + BufferCopyDst[dst_idx] = Uint4ToUint(uint4(src_1, src_2, src_3, src_4)); |
15 | 200 | }
|
0 commit comments