@@ -202,8 +202,6 @@ static inline int clock_gettime(int id, struct timespec* ts) {
202202// Section 1: Types and Specs (from tq_types.h, tq_spec.h)
203203// ============================================================================
204204
205-
206-
207205/* Cross-language static assert: works in both C11 and C++11/17 */
208206#ifdef __cplusplus
209207#define TQ_STATIC_ASSERT(cond, msg) static_assert(cond, msg)
@@ -219,8 +217,6 @@ static inline int clock_gettime(int id, struct timespec* ts) {
219217#define TQ_PI_2 1.5707963267948966f
220218#endif
221219
222-
223-
224220/* ============================================================
225221 * Constants
226222 * ============================================================ */
@@ -398,8 +394,6 @@ typedef struct {
398394 int enable_recompression;/* Tier 1 → Tier 2 re-compression */
399395} tq_progressive_config_t;
400396
401-
402-
403397/* TurboQuant KV cache block: RHT + Lloyd-Max codebook + QJL residual
404398 * 3-bit variant: 2-bit codebook (4 levels) + 1-bit QJL sign hash
405399 * Block covers TQ_BK elements (128).
@@ -469,12 +463,6 @@ TQ_CHECK_SIZE(block_tq_turbo_kv_4b, 8 + TQ_BK * 3 / 8 + TQ_BK / 8);
469463TQ_CHECK_SIZE(block_tq_turbo_kv_1b, 8 + TQ_BK / 8);
470464TQ_CHECK_SIZE(block_tq_turbo_kv_2b, 8 + TQ_BK / 8 + TQ_BK / 8);
471465
472-
473-
474-
475-
476-
477-
478466/* Format specification — version-aware, ONNX-inspired */
479467
480468#define TQ_SPEC_VERSION 1
@@ -500,18 +488,10 @@ typedef struct {
500488 uint8_t flags; /* TQ_FLAG_* bitmask */
501489} tq_format_spec_t;
502490
503-
504-
505-
506-
507491// ============================================================================
508492// Section 2: Engine Types (from tq_engine.h)
509493// ============================================================================
510494
511-
512-
513-
514-
515495/* ============================================================
516496 * Model configuration
517497 * ============================================================ */
@@ -1123,9 +1103,6 @@ void tq_tp_run(void* (*fn)(void*), void** args, int n_tasks);
11231103/* Max threads supported by thread pool */
11241104#define TQ_TP_MAX 16
11251105
1126-
1127-
1128-
11291106// ============================================================================
11301107// Section 3: GGUF Types (from tq_gguf.h)
11311108// ============================================================================
@@ -1143,10 +1120,6 @@ void tq_tp_run(void* (*fn)(void*), void** args, int n_tasks);
11431120 * directly into TurboQuant inference engine.
11441121 */
11451122
1146-
1147-
1148-
1149-
11501123/* ============================================================
11511124 * GGUF format constants
11521125 * ============================================================ */
@@ -1462,24 +1435,17 @@ int tq_metal_moe_forward(
14621435 const int* up_types, /* per-expert up quant types, NULL = use weight_type */
14631436 const int* down_types); /* per-expert down quant types, NULL = use weight_type */
14641437
1465-
1466-
1467-
14681438// ============================================================================
14691439// Section 4: Internal API (from turboquant.h)
14701440// ============================================================================
14711441
1472-
14731442/**
14741443 * TurboQuant.cpp — Cross-platform KV cache compression library
14751444 *
14761445 * Public C API — single header include for all functionality.
14771446 * Zero external dependencies (libc/libm only).
14781447 */
14791448
1480-
1481-
1482-
14831449/* ============================================================
14841450 * Version
14851451 * ============================================================ */
@@ -1753,15 +1719,10 @@ void tq_progressive_free(tq_progressive_t* p);
17531719
17541720tq_progressive_config_t tq_progressive_default_config(void);
17551721
1756-
1757-
1758-
1759-
17601722// ============================================================================
17611723// Section 5: quant_ctx struct definition
17621724// ============================================================================
17631725
1764-
17651726struct quant_ctx {
17661727 tq_model_t* model;
17671728 tq_state_t* state;
@@ -1788,7 +1749,6 @@ struct quant_ctx {
17881749 * - Random signs decorrelate channels across different blocks
17891750 */
17901751
1791-
17921752#ifdef __ARM_NEON
17931753#include <arm_neon.h>
17941754#endif
@@ -1902,7 +1862,6 @@ void tq_rht_inverse(float* data, int n, uint32_t seed) {
19021862 */
19031863/* Generic reference — no compiler-specific pragmas */
19041864
1905-
19061865/* ---------- FP16 helpers ---------- */
19071866
19081867static uint16_t uni_fp32_to_fp16(float v) {
@@ -2285,7 +2244,6 @@ void tq_uniform_3b_attention_ref(const float* query, const void* kv,
22852244// Section 8: Type Traits (from tq_traits.c)
22862245// ============================================================================
22872246
2288-
22892247/* Stub implementations for excluded quantization types (polar, qjl, turbo, mixed) */
22902248static void tq_stub_quantize(const float* src, void* dst, int n) {
22912249 (void)src; (void)dst; (void)n;
@@ -2583,7 +2541,6 @@ tq_type tq_type_from_name(const char* name) {
25832541 * No external dependencies — libc/libm only.
25842542 */
25852543
2586-
25872544#ifdef __ARM_NEON
25882545#include <arm_neon.h>
25892546#endif
@@ -2617,7 +2574,6 @@ static struct {
26172574
26182575static int g_n_threads = 1;
26192576
2620-
26212577static void* tp_worker(void* arg) {
26222578 int id = (int)(intptr_t)arg;
26232579 int my_gen = 0;
@@ -4388,8 +4344,6 @@ void tq_matmul_1bit(float* out, const float* x,
43884344 * SPDX-License-Identifier: MIT
43894345 */
43904346
4391-
4392-
43934347#ifdef _WIN32
43944348#else
43954349#endif
@@ -5098,8 +5052,6 @@ const tq_gguf_tensor_t* tq_gguf_find_tensor(const tq_gguf_ctx_t* ctx, const char
50985052 * Pure C11, no external dependencies.
50995053 */
51005054
5101-
5102-
51035055#if defined(__ARM_NEON) || defined(__ARM_NEON__)
51045056#include <arm_neon.h>
51055057#define TQ_HAS_NEON 1
@@ -7174,7 +7126,6 @@ void tq_metal_batch_end_if_available(void) {
71747126 * Also supports the legacy llama2.c binary tokenizer format as fallback.
71757127 */
71767128
7177-
71787129/* Global for qsort comparator (vocab index sorting) */
71797130static char** g_vocab_for_sort;
71807131static int cmp_vocab_idx(const void* a, const void* b) {
@@ -8519,7 +8470,6 @@ const char* tq_decode(const tq_tokenizer_t* tok, int prev_token, int token) {
85198470 * Supports hybrid architectures (e.g., Qwen3.5 DeltaNet + self_attn).
85208471 */
85218472
8522-
85238473#ifdef _WIN32
85248474#else
85258475#endif
@@ -12934,7 +12884,6 @@ void tq_quantize_weights_1bit(tq_model_t* model) {
1293412884 * -> residual add
1293512885 */
1293612886
12937-
1293812887/* Unified Q2/1-bit matmul dispatch.
1293912888 * When model->use_1bit_weights, Q2 fields contain sign bits + norms,
1294012889 * dispatched to tq_matmul_1bit (FP32 input required).
@@ -15194,7 +15143,6 @@ float* tq_forward(tq_model_t* model, tq_state_t* s, int token, int pos) {
1519415143 }
1519515144 }
1519615145
15197-
1519815146 /* Increment profile token count if profiling is active */
1519915147 if (s->profile_kv) {
1520015148 s->profile_kv_count++;
@@ -15245,7 +15193,6 @@ float* tq_forward(tq_model_t* model, tq_state_t* s, int token, int pos) {
1524515193 * - Full generation loop with streaming callback
1524615194 */
1524715195
15248-
1524915196/* ============================================================
1525015197 * Argmax sampling: return token with highest logit
1525115198 * ============================================================ */
@@ -15673,7 +15620,6 @@ int tq_generate(tq_model_t* model, tq_tokenizer_t* tokenizer,
1567315620 return generated;
1567415621}
1567515622
15676-
1567715623// ============================================================================
1567815624
1567915625// ============================================================================
0 commit comments