Skip to content

Commit 48cb3a3

Browse files
unamedkrclaude
andcommitted
CLI: progressive k128 auto-enabled when KV is compressed
k_highres_window defaults to -1 (auto). When KV compression is active (any kv_type except fp32), it automatically resolves to 128. Users can still disable with --k-window 0 or set a custom value. Now all 3 distribution channels default to progressive: Python: progressive=True WASM: k_highres_window=128 CLI: auto k128 when -k is set Verified: SmolLM2 PPL 48.37 (same as explicit --k-window 128). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent da985db commit 48cb3a3

1 file changed

Lines changed: 13 additions & 2 deletions

File tree

tools/quant.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ int main(int argc, char** argv) {
192192
int override_ctx = 0; /* 0 = use model default (capped at 4096) */
193193
int delta_kv = 0; /* 1 = delta KV compression (store key deltas) */
194194
int delta_iframe_int = 0; /* I-frame interval for delta KV (0 = auto = 64) */
195-
int k_highres_window = 0; /* age-based: recent N keys at FP32, rest at 2-bit */
195+
int k_highres_window = -1; /* -1=auto (128 when KV compressed), 0=off, N=explicit */
196196
int json_output = 0; /* 1 = JSON output for --ppl */
197197
int chat_mode = 0; /* 1 = auto-wrap prompt with chat template */
198198
const char* save_logits_file = NULL;
@@ -478,7 +478,14 @@ int main(int argc, char** argv) {
478478
fprintf(stderr, "Delta KV compression: ENABLED (mixed-precision, I-frame=%d)\n", ifi);
479479
}
480480

481-
/* Set up K highres window (age-based progressive K compression) */
481+
/* Progressive KV: auto-enable k128 when KV is compressed.
482+
* Verified on 3 models: strictly better quality at 1.75 MB cost.
483+
* User can override with --k-window 0 to disable. */
484+
if (k_highres_window == -1 && state->kv_quant_type < TQ_TYPE_COUNT && state->quant_key_cache) {
485+
k_highres_window = 128;
486+
} else if (k_highres_window == -1) {
487+
k_highres_window = 0;
488+
}
482489
if (k_highres_window > 0 && state->kv_quant_type < TQ_TYPE_COUNT && state->quant_key_cache) {
483490
int kv_dim_e = model->config.n_kv_heads * model->config.head_dim;
484491
int cache_kv_dim_e = model->config.n_kv_heads * model->config.head_dim;
@@ -1271,6 +1278,10 @@ int main(int argc, char** argv) {
12711278
config.v_highres_window = v_highres_window;
12721279
config.delta_kv = delta_kv;
12731280
config.delta_iframe_interval = delta_iframe_int;
1281+
/* Auto progressive for generation path too */
1282+
if (k_highres_window == -1) {
1283+
k_highres_window = (kv_type < TQ_TYPE_COUNT) ? 128 : 0;
1284+
}
12741285
config.k_highres_window = k_highres_window;
12751286
config.save_kv_path = save_kv_file;
12761287
config.load_kv_path = load_kv_file;

0 commit comments

Comments
 (0)