Skip to content

Commit f6a8f2e

Browse files
committed
debug: add per-step tracing in gatherQ8 fallback
1 parent 52e0350 commit f6a8f2e

1 file changed

Lines changed: 10 additions & 0 deletions

File tree

compute/gpu_engine.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3436,18 +3436,25 @@ func (e *GPUEngine[T]) gatherQ8(
34363436
}
34373437

34383438
// Upload indices as int32 to GPU.
3439+
trace := debugGPU || os.Getenv("UPLOAD_TRACE") == "1"
34393440
idx32 := make([]int32, N)
34403441
for i, id := range idxData {
34413442
idx32[i] = int32(id)
34423443
}
34433444
idxBytes := N * 4
34443445
devIdx, err := e.pool.Alloc(e.deviceID, idxBytes)
34453446
if err != nil {
3447+
if trace {
3448+
fmt.Fprintf(os.Stderr, "[GATHER_Q8] pool.Alloc(idx %d) FAILED: %v — CPU fallback\n", idxBytes, err)
3449+
}
34463450
return e.cpu.Gather(context.Background(), params, indices, output)
34473451
}
34483452
defer e.pool.Free(e.deviceID, devIdx, idxBytes)
34493453

34503454
if err := e.runtime.Memcpy(devIdx, unsafe.Pointer(&idx32[0]), idxBytes, gpuapi.MemcpyHostToDevice); err != nil {
3455+
if trace {
3456+
fmt.Fprintf(os.Stderr, "[GATHER_Q8] Memcpy(idx H2D %d bytes) FAILED: %v — CPU fallback\n", idxBytes, err)
3457+
}
34513458
return e.cpu.Gather(context.Background(), params, indices, output)
34523459
}
34533460

@@ -3456,6 +3463,9 @@ func (e *GPUEngine[T]) gatherQ8(
34563463
outBytes := outElems * f32Size
34573464
devOut, err := e.pool.Alloc(e.deviceID, outBytes)
34583465
if err != nil {
3466+
if trace {
3467+
fmt.Fprintf(os.Stderr, "[GATHER_Q8] pool.Alloc(out %d) FAILED: %v — CPU fallback\n", outBytes, err)
3468+
}
34593469
return e.cpu.Gather(context.Background(), params, indices, output)
34603470
}
34613471

0 commit comments

Comments
 (0)