Skip to content

Commit 5c7ec7a

Browse files
committed
fix(compute): Q5_0 GEMV byte-wise loads for ARM64 alignment
1 parent 2e91650 commit 5c7ec7a

1 file changed

Lines changed: 1 addition & 2 deletions

File tree

compute/gpu_engine.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -484,8 +484,7 @@ func (e *GPUEngine[T]) UploadWeights(tensors []*tensor.TensorNumeric[float32]) e
484484
if _, ok := any(t.GetStorage()).(*tensor.Q8Storage); ok {
485485
continue
486486
}
487-
// Skip Q4Storage — already uploaded as raw Q4 bytes by the Q4 handler
488-
// above (line ~272). Q4 GEMV reads quantized data directly (0.5 bytes/weight).
487+
// Skip Q4_0: already uploaded as raw Q4 bytes by the Q4 handler above.
489488
if _, ok := any(t.GetStorage()).(*tensor.Q4Storage); ok {
490489
continue
491490
}

0 commit comments

Comments
 (0)