Skip to content

Commit 2e83a27

Browse files
committed
build(cuda): add missing .cu files to Makefile SRCS
Add dequant_q5_0.cu, dequant_q5k.cu, dequant_q6k.cu, gather_q8.cu, flash_decode.cu, megakernel_ops.cu, ternary_gemv.cu to SRCS. These were defined but not compiled into libkernels.so.
1 parent 5f19e54 commit 2e83a27

1 file changed

Lines changed: 1 addition & 1 deletion

File tree

internal/cuda/kernels/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ ifeq ($(CUDA_ARCH),sm_121)
1111
NVCC_FLAGS += -DFLASH_BLOCK_SIZE=64
1212
endif
1313

14-
SRCS = counter.cu dequant_q4k.cu elementwise.cu elementwise_fp16.cu flash_attention.cu flash_attention2.cu fp4_gemv.cu fp8_gemm.cu fp8_ops.cu fused_add_rmsnorm.cu fused_norm_add.cu fused_qk_norm_rope.cu fused_repeat_interleave.cu fused_rope.cu fused_softmax_vmul.cu fused_swiglu.cu gemm_int8.cu gemm_int4.cu gemm_q4.cu gemm_q8.cu gemv_q4k.cu gemv_q4k_sm121.cu gemv_q5k.cu gemv_q5_0.cu gemv_q6k.cu gemv_warp.cu offset_memcpy.cu paged_attention.cu ragged_attention.cu rope_select.cu scaled_softmax.cu selective_scan.cu sgemv_m1.cu transpose.cu gather.cu rmsnorm.cu argmax.cu
14+
SRCS = counter.cu dequant_q4k.cu dequant_q5_0.cu dequant_q5k.cu dequant_q6k.cu elementwise.cu elementwise_fp16.cu flash_attention.cu flash_attention2.cu flash_decode.cu fp4_gemv.cu fp8_gemm.cu fp8_ops.cu fused_add_rmsnorm.cu fused_norm_add.cu fused_qk_norm_rope.cu fused_repeat_interleave.cu fused_rope.cu fused_softmax_vmul.cu fused_swiglu.cu gather.cu gather_q8.cu gemm_int8.cu gemm_int4.cu gemm_q4.cu gemm_q8.cu gemv_q4k.cu gemv_q4k_sm121.cu gemv_q5k.cu gemv_q5_0.cu gemv_q6k.cu gemv_warp.cu megakernel_ops.cu offset_memcpy.cu paged_attention.cu ragged_attention.cu rope_select.cu scaled_softmax.cu selective_scan.cu sgemv_m1.cu ternary_gemv.cu transpose.cu rmsnorm.cu argmax.cu
1515
OBJS = $(SRCS:.cu=.o)
1616
PIC_OBJS = $(SRCS:.cu=.pic.o)
1717
LIB = libkernels.a

0 commit comments

Comments
 (0)