Skip to content

Commit dc63c8f

Browse files
committed
chore: remove debug tracing from gatherQ8 and UploadWeights
1 parent 2e83a27 commit dc63c8f

1 file changed

Lines changed: 0 additions & 29 deletions

File tree

compute/gpu_engine.go

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -569,16 +569,6 @@ func (e *GPUEngine[T]) UploadWeights(tensors []*tensor.TensorNumeric[float32]) e
569569
"device", fmt.Sprintf("%d", e.deviceID),
570570
"method", method)
571571
}
572-
// Check for sticky CUDA errors after UploadWeights.
573-
if debugGPU || os.Getenv("UPLOAD_TRACE") == "1" {
574-
if e.stream != nil {
575-
if syncErr := e.stream.Synchronize(); syncErr != nil {
576-
fmt.Fprintf(os.Stderr, "[UPLOAD] CUDA sync error after UploadWeights: %v\n", syncErr)
577-
} else {
578-
fmt.Fprintf(os.Stderr, "[UPLOAD] CUDA context clean after UploadWeights\n")
579-
}
580-
}
581-
}
582572
return nil
583573
}
584574

@@ -3421,9 +3411,6 @@ func (e *GPUEngine[T]) gatherQ8(
34213411
devQ8 unsafe.Pointer,
34223412
) error {
34233413
e.setDevice()
3424-
if debugGPU || os.Getenv("UPLOAD_TRACE") == "1" {
3425-
fmt.Fprintf(os.Stderr, "[GATHER_Q8] called: V=%d D=%d devQ8=%p\n", params.Shape()[0], params.Shape()[1], devQ8)
3426-
}
34273414

34283415
pShape := params.Shape()
34293416
V := pShape[0]
@@ -3436,25 +3423,18 @@ func (e *GPUEngine[T]) gatherQ8(
34363423
}
34373424

34383425
// Upload indices as int32 to GPU.
3439-
trace := debugGPU || os.Getenv("UPLOAD_TRACE") == "1"
34403426
idx32 := make([]int32, N)
34413427
for i, id := range idxData {
34423428
idx32[i] = int32(id)
34433429
}
34443430
idxBytes := N * 4
34453431
devIdx, err := e.pool.Alloc(e.deviceID, idxBytes)
34463432
if err != nil {
3447-
if trace {
3448-
fmt.Fprintf(os.Stderr, "[GATHER_Q8] pool.Alloc(idx %d) FAILED: %v — CPU fallback\n", idxBytes, err)
3449-
}
34503433
return e.cpu.Gather(context.Background(), params, indices, output)
34513434
}
34523435
defer e.pool.Free(e.deviceID, devIdx, idxBytes)
34533436

34543437
if err := e.runtime.Memcpy(devIdx, unsafe.Pointer(&idx32[0]), idxBytes, gpuapi.MemcpyHostToDevice); err != nil {
3455-
if trace {
3456-
fmt.Fprintf(os.Stderr, "[GATHER_Q8] Memcpy(idx H2D %d bytes) FAILED: %v — CPU fallback\n", idxBytes, err)
3457-
}
34583438
return e.cpu.Gather(context.Background(), params, indices, output)
34593439
}
34603440

@@ -3463,23 +3443,14 @@ func (e *GPUEngine[T]) gatherQ8(
34633443
outBytes := outElems * f32Size
34643444
devOut, err := e.pool.Alloc(e.deviceID, outBytes)
34653445
if err != nil {
3466-
if trace {
3467-
fmt.Fprintf(os.Stderr, "[GATHER_Q8] pool.Alloc(out %d) FAILED: %v — CPU fallback\n", outBytes, err)
3468-
}
34693446
return e.cpu.Gather(context.Background(), params, indices, output)
34703447
}
34713448

34723449
// Launch Q8 gather kernel.
34733450
if err := e.kernels.GatherQ8F32(devQ8, devIdx, devOut, N, D, V, e.stream); err != nil {
3474-
if debugGPU || os.Getenv("UPLOAD_TRACE") == "1" {
3475-
fmt.Fprintf(os.Stderr, "[GATHER_Q8] kernel FAILED: %v — fallback to CPU\n", err)
3476-
}
34773451
e.pool.Free(e.deviceID, devOut, outBytes)
34783452
return e.cpu.Gather(context.Background(), params, indices, output)
34793453
}
3480-
if debugGPU || os.Getenv("UPLOAD_TRACE") == "1" {
3481-
fmt.Fprintf(os.Stderr, "[GATHER_Q8] kernel OK, devOut=%p elems=%d\n", devOut, outElems)
3482-
}
34833454

34843455
// Write result into output tensor as GPUStorage (pool-backed).
34853456
gs, err := tensor.NewGPUStorageFromPool[float32](devOut, outElems, e.pool, e.deviceID)

0 commit comments

Comments
 (0)