@@ -12,6 +12,45 @@ using namespace gpu; // createContext, createTensor, createKernel,
1212
1313#define VOCAB_SIZE 50257
1414
15+ // See https://github.com/google/dawn/blob/a8fbe981a86cb59536e2de423d2013a82d9b54a0/src/dawn/native/Limits.cpp
16+ #define LIMITS_BUFFER_SIZE_1GB { \
17+ .limits = { \
18+ .maxTextureDimension1D =8192 , \
19+ .maxTextureDimension2D =8192 , \
20+ .maxTextureDimension3D =2048 , \
21+ .maxTextureArrayLayers =256 , \
22+ .maxBindGroups =4 , \
23+ .maxBindGroupsPlusVertexBuffers =24 , \
24+ .maxBindingsPerBindGroup =1000 , \
25+ .maxDynamicUniformBuffersPerPipelineLayout =8 , \
26+ .maxDynamicStorageBuffersPerPipelineLayout =4 , \
27+ .maxSampledTexturesPerShaderStage =16 , \
28+ .maxSamplersPerShaderStage =16 , \
29+ .maxStorageBuffersPerShaderStage =8 , \
30+ .maxStorageTexturesPerShaderStage =4 , \
31+ .maxUniformBuffersPerShaderStage =12 , \
32+ .maxUniformBufferBindingSize =65536 , \
33+ .maxStorageBufferBindingSize =1073741824 , \
34+ .minUniformBufferOffsetAlignment =256 , \
35+ .minStorageBufferOffsetAlignment =256 , \
36+ .maxVertexBuffers =8 , \
37+ .maxBufferSize =0x80000000 , \
38+ .maxVertexAttributes =16 , \
39+ .maxVertexBufferArrayStride =2048 , \
40+ .maxInterStageShaderComponents =64 , \
41+ .maxInterStageShaderVariables =16 , \
42+ .maxColorAttachments =8 , \
43+ .maxColorAttachmentBytesPerSample =32 , \
44+ .maxComputeWorkgroupStorageSize =16384 , \
45+ .maxComputeInvocationsPerWorkgroup =256 , \
46+ .maxComputeWorkgroupSizeX =256 , \
47+ .maxComputeWorkgroupSizeY =256 , \
48+ .maxComputeWorkgroupSizeZ =64 , \
49+ .maxComputeWorkgroupsPerDimension =65535 \
50+ }, \
51+ .nextInChain = nullptr \
52+ }
53+
1554void ENCODER_FORWARD_GPU (float * out,
1655 int * inp, float * wte, float * wpe,
1756 int B, int T, int C){
@@ -25,7 +64,10 @@ void ENCODER_FORWARD_GPU(float* out,
2564 uint32_t C;
2665 };
2766 setLogLevel (kError );
28- Context ctx = createContext ();
67+ WGPURequiredLimits requiredLimits = LIMITS_BUFFER_SIZE_1GB;
68+ Context ctx = createContext ({},{},{
69+ .requiredLimits = &requiredLimits
70+ });
2971 Tensor input = createTensor (ctx, Shape{b * t}, ki32, inp);
3072 Tensor wte_t = createTensor (ctx, Shape{v, c}, kf32, wte);
3173 Tensor wpe_t = createTensor (ctx, Shape{t, c}, kf32, wpe);
@@ -59,7 +101,10 @@ void ENCODER_BACKWARD_GPU(float* dwte, float* dwpe,
59101 uint32_t C;
60102 };
61103 setLogLevel (kError );
62- Context ctx = createContext ();
104+ WGPURequiredLimits requiredLimits = LIMITS_BUFFER_SIZE_1GB;
105+ Context ctx = createContext ({},{},{
106+ .requiredLimits = &requiredLimits
107+ });
63108 Tensor dwte_t = createTensor (ctx, Shape{v, c}, kf32, dwte);
64109 Tensor dwpe_t = createTensor (ctx, Shape{t, c}, kf32, dwpe);
65110 Tensor dout_t = createTensor (ctx, Shape{b * t * c}, kf32, dout);
@@ -171,44 +216,7 @@ void MATMUL_FORWARD_GPU(float* out,
171216 unsigned long c = static_cast <unsigned long >(C);
172217 unsigned long oc = static_cast <unsigned long >(OC);
173218 setLogLevel (kError );
174- // See https://github.com/google/dawn/blob/a8fbe981a86cb59536e2de423d2013a82d9b54a0/src/dawn/native/Limits.cpp
175- WGPURequiredLimits requiredLimits = {
176- .limits = {
177- .maxTextureDimension1D =8192 ,
178- .maxTextureDimension2D =8192 ,
179- .maxTextureDimension3D =2048 ,
180- .maxTextureArrayLayers =256 ,
181- .maxBindGroups =4 ,
182- .maxBindGroupsPlusVertexBuffers =24 ,
183- .maxBindingsPerBindGroup =1000 ,
184- .maxDynamicUniformBuffersPerPipelineLayout =8 ,
185- .maxDynamicStorageBuffersPerPipelineLayout =4 ,
186- .maxSampledTexturesPerShaderStage =16 ,
187- .maxSamplersPerShaderStage =16 ,
188- .maxStorageBuffersPerShaderStage =8 ,
189- .maxStorageTexturesPerShaderStage =4 ,
190- .maxUniformBuffersPerShaderStage =12 ,
191- .maxUniformBufferBindingSize =65536 ,
192- .maxStorageBufferBindingSize =1073741824 ,
193- .minUniformBufferOffsetAlignment =256 ,
194- .minStorageBufferOffsetAlignment =256 ,
195- .maxVertexBuffers =8 ,
196- .maxBufferSize =0x80000000 ,
197- .maxVertexAttributes =16 ,
198- .maxVertexBufferArrayStride =2048 ,
199- .maxInterStageShaderComponents =64 ,
200- .maxInterStageShaderVariables =16 ,
201- .maxColorAttachments =8 ,
202- .maxColorAttachmentBytesPerSample =32 ,
203- .maxComputeWorkgroupStorageSize =16384 ,
204- .maxComputeInvocationsPerWorkgroup =256 ,
205- .maxComputeWorkgroupSizeX =256 ,
206- .maxComputeWorkgroupSizeY =256 ,
207- .maxComputeWorkgroupSizeZ =64 ,
208- .maxComputeWorkgroupsPerDimension =65535
209- },
210- .nextInChain = nullptr
211- };
219+ WGPURequiredLimits requiredLimits = LIMITS_BUFFER_SIZE_1GB;
212220 Context ctx = createContext ({},{},{
213221 .requiredLimits = &requiredLimits
214222 });
@@ -249,7 +257,10 @@ void MATMUL_BACKWARD_GPU(float* dinp, float* dweight, float* dbias,
249257 unsigned long c = static_cast <unsigned long >(C);
250258 unsigned long oc = static_cast <unsigned long >(OC);
251259 setLogLevel (kError );
252- Context ctx = createContext ();
260+ WGPURequiredLimits requiredLimits = LIMITS_BUFFER_SIZE_1GB;
261+ Context ctx = createContext ({},{},{
262+ .requiredLimits = &requiredLimits
263+ });
253264 Tensor dinp_t = createTensor (ctx, Shape{b * t * c}, kf32, dinp);
254265 Tensor dweight_t = createTensor (ctx, Shape{oc * c}, kf32, dweight);
255266 Tensor dbias_t = createTensor (ctx, Shape{oc}, kf32, dbias);
0 commit comments