Argument Buffers (#24)

* Stuff

* More arg buffer stuff

* Fixes

* Rebase

* Pass storage buffers to inline functions

* Fix binding

* Fix typo + Fix a couple shaders

* Enforce ids

* Dispose

* Mark used buffers as resident

* Update depth clear shader

* Fix non-contiguous struct defs

* Update ChangeBufferStride

* Fix StorageBuffer assignments

* Fix odyssey crash

* Retain buffer bindings

* Pad Std140

* Set texture data with safe buffers

* Clone buffers

* Always declare vert in

* Stop clears from breaking OpenGL games

* Fix depth clear

* Use invariant position

* Horribly inefficient texture & sampler arg buffers

* Fix missing struct access

* Minimise rebinds as much as possible

* Build arg buffers on staging buffer
This commit is contained in:
Isaac Marovitz
2024-06-25 14:25:31 +01:00
committed by Evan Husted
parent a1ab7fe6a2
commit dae0f3cded
20 changed files with 721 additions and 402 deletions

View File

@@ -7,14 +7,154 @@ struct CopyVertexOut {
float2 uv;
};
struct TexCoords {
float data[4];
};
struct ConstantBuffers {
constant TexCoords* texCoord;
};
struct Textures
{
texture2d<float, access::sample> texture;
ulong padding_1;
ulong padding_2;
ulong padding_3;
ulong padding_4;
ulong padding_5;
ulong padding_6;
ulong padding_7;
ulong padding_8;
ulong padding_9;
ulong padding_10;
ulong padding_11;
ulong padding_12;
ulong padding_13;
ulong padding_14;
ulong padding_15;
ulong padding_16;
ulong padding_17;
ulong padding_18;
ulong padding_19;
ulong padding_20;
ulong padding_21;
ulong padding_22;
ulong padding_23;
ulong padding_24;
ulong padding_25;
ulong padding_26;
ulong padding_27;
ulong padding_28;
ulong padding_29;
ulong padding_30;
ulong padding_31;
ulong padding_32;
ulong padding_33;
ulong padding_34;
ulong padding_35;
ulong padding_36;
ulong padding_37;
ulong padding_38;
ulong padding_39;
ulong padding_40;
ulong padding_41;
ulong padding_42;
ulong padding_43;
ulong padding_44;
ulong padding_45;
ulong padding_46;
ulong padding_47;
ulong padding_48;
ulong padding_49;
ulong padding_50;
ulong padding_51;
ulong padding_52;
ulong padding_53;
ulong padding_54;
ulong padding_55;
ulong padding_56;
ulong padding_57;
ulong padding_58;
ulong padding_59;
ulong padding_60;
ulong padding_61;
ulong padding_62;
ulong padding_63;
sampler sampler;
ulong padding_65;
ulong padding_66;
ulong padding_67;
ulong padding_68;
ulong padding_69;
ulong padding_70;
ulong padding_71;
ulong padding_72;
ulong padding_73;
ulong padding_74;
ulong padding_75;
ulong padding_76;
ulong padding_77;
ulong padding_78;
ulong padding_79;
ulong padding_80;
ulong padding_81;
ulong padding_82;
ulong padding_83;
ulong padding_84;
ulong padding_85;
ulong padding_86;
ulong padding_87;
ulong padding_88;
ulong padding_89;
ulong padding_90;
ulong padding_91;
ulong padding_92;
ulong padding_93;
ulong padding_94;
ulong padding_95;
ulong padding_96;
ulong padding_97;
ulong padding_98;
ulong padding_99;
ulong padding_100;
ulong padding_101;
ulong padding_102;
ulong padding_103;
ulong padding_104;
ulong padding_105;
ulong padding_106;
ulong padding_107;
ulong padding_108;
ulong padding_109;
ulong padding_110;
ulong padding_111;
ulong padding_112;
ulong padding_113;
ulong padding_114;
ulong padding_115;
ulong padding_116;
ulong padding_117;
ulong padding_118;
ulong padding_119;
ulong padding_120;
ulong padding_121;
ulong padding_122;
ulong padding_123;
ulong padding_124;
ulong padding_125;
ulong padding_126;
ulong padding_127;
};
vertex CopyVertexOut vertexMain(uint vid [[vertex_id]],
const device float* texCoord [[buffer(0)]]) {
constant ConstantBuffers &constant_buffers [[buffer(20)]]) {
CopyVertexOut out;
int low = vid & 1;
int high = vid >> 1;
out.uv.x = texCoord[low];
out.uv.y = texCoord[2 + high];
out.uv.x = constant_buffers.texCoord->data[low];
out.uv.y = constant_buffers.texCoord->data[2 + high];
out.position.x = (float(low) - 0.5f) * 2.0f;
out.position.y = (float(high) - 0.5f) * 2.0f;
out.position.z = 0.0f;
@@ -24,7 +164,6 @@ vertex CopyVertexOut vertexMain(uint vid [[vertex_id]],
}
fragment float4 fragmentMain(CopyVertexOut in [[stage_in]],
texture2d<float, access::sample> texture [[texture(0)]],
sampler sampler [[sampler(0)]]) {
return texture.sample(sampler, in.uv);
constant Textures &textures [[buffer(22)]]) {
return textures.texture.sample(textures.sampler, in.uv);
}

View File

@@ -2,19 +2,40 @@
using namespace metal;
kernel void kernelMain(constant int4& stride_arguments [[buffer(0)]],
device uint8_t* in_data [[buffer(1)]],
device uint8_t* out_data [[buffer(2)]],
struct StrideArguments {
int4 data;
};
struct InData {
uint8_t data[1];
};
struct OutData {
uint8_t data[1];
};
struct ConstantBuffers {
constant StrideArguments* stride_arguments;
};
struct StorageBuffers {
ulong padding;
device InData* in_data;
device OutData* out_data;
};
kernel void kernelMain(constant ConstantBuffers &constant_buffers [[buffer(20)]],
device StorageBuffers &storage_buffers [[buffer(21)]],
uint3 thread_position_in_grid [[thread_position_in_grid]],
uint3 threads_per_threadgroup [[threads_per_threadgroup]],
uint3 threadgroups_per_grid [[threads_per_grid]])
{
// Determine what slice of the stride copies this invocation will perform.
int sourceStride = stride_arguments.x;
int targetStride = stride_arguments.y;
int bufferSize = stride_arguments.z;
int sourceOffset = stride_arguments.w;
int sourceStride = constant_buffers.stride_arguments->data.x;
int targetStride = constant_buffers.stride_arguments->data.y;
int bufferSize = constant_buffers.stride_arguments->data.z;
int sourceOffset = constant_buffers.stride_arguments->data.w;
int strideRemainder = targetStride - sourceStride;
int invocations = int(threads_per_threadgroup.x * threadgroups_per_grid.x);
@@ -42,11 +63,11 @@ kernel void kernelMain(constant int4& stride_arguments [[buffer(0)]],
// Perform the copies for this region
for (int i = 0; i < copyCount; i++) {
for (int j = 0; j < sourceStride; j++) {
out_data[dstOffset++] = in_data[srcOffset++];
storage_buffers.out_data->data[dstOffset++] = storage_buffers.in_data->data[srcOffset++];
}
for (int j = 0; j < strideRemainder; j++) {
out_data[dstOffset++] = uint8_t(0);
storage_buffers.out_data->data[dstOffset++] = uint8_t(0);
}
}
}

View File

@@ -6,6 +6,14 @@ struct VertexOut {
float4 position [[position]];
};
struct ClearColor {
float4 data;
};
struct ConstantBuffers {
constant ClearColor* clear_color;
};
vertex VertexOut vertexMain(ushort vid [[vertex_id]]) {
int low = vid & 1;
int high = vid >> 1;
@@ -25,6 +33,6 @@ struct FragmentOut {
};
fragment FragmentOut fragmentMain(VertexOut in [[stage_in]],
constant float4& clear_color [[buffer(0)]]) {
return {clear_color};
constant ConstantBuffers &constant_buffers [[buffer(20)]]) {
return {constant_buffers.clear_color->data};
}

View File

@@ -11,6 +11,14 @@ struct FragmentOut {
uint stencil [[stencil]];
};
struct ClearDepth {
float data;
};
struct ConstantBuffers {
constant ClearDepth* clear_depth;
};
vertex VertexOut vertexMain(ushort vid [[vertex_id]]) {
int low = vid & 1;
int high = vid >> 1;
@@ -26,10 +34,10 @@ vertex VertexOut vertexMain(ushort vid [[vertex_id]]) {
}
fragment FragmentOut fragmentMain(VertexOut in [[stage_in]],
constant float& clear_depth [[buffer(0)]]) {
constant ConstantBuffers &constant_buffers [[buffer(20)]]) {
FragmentOut out;
out.depth = clear_depth;
out.depth = constant_buffers.clear_depth->data;
// out.stencil = stencil_clear;
return out;