fix(compute): Q5_0 GEMV byte-wise loads for ARM64 alignment

dndungu · dndungu · commit 5c7ec7a7eeba · 2026-03-31T21:15:24.000-07:00
diff --git a/compute/gpu_engine.go b/compute/gpu_engine.go
@@ -484,8 +484,7 @@ func (e *GPUEngine[T]) UploadWeights(tensors []*tensor.TensorNumeric[float32]) e
 		if _, ok := any(t.GetStorage()).(*tensor.Q8Storage); ok {
 			continue
 		}
-		// Skip Q4Storage — already uploaded as raw Q4 bytes by the Q4 handler
-		// above (line ~272). Q4 GEMV reads quantized data directly (0.5 bytes/weight).
+		// Skip Q4_0: already uploaded as raw Q4 bytes by the Q4 handler above.
 		if _, ok := any(t.GetStorage()).(*tensor.Q4Storage); ok {
 			continue
 		}

Original file line number	Diff line number	Diff line change
`@@ -484,8 +484,7 @@ func (e GPUEngine[T]) UploadWeights(tensors []tensor.TensorNumeric[float32]) e`
`484`	`484`	`if _, ok := any(t.GetStorage()).(*tensor.Q8Storage); ok {`
`485`	`485`	`continue`
`486`	`486`	`}`
`487`		`- // Skip Q4Storage — already uploaded as raw Q4 bytes by the Q4 handler`
`488`		`- // above (line ~272). Q4 GEMV reads quantized data directly (0.5 bytes/weight).`
	`487`	`+ // Skip Q4_0: already uploaded as raw Q4 bytes by the Q4 handler above.`
`489`	`488`	`if _, ok := any(t.GetStorage()).(*tensor.Q4Storage); ok {`
`490`	`489`	`continue`
`491`	`490`	`}`