Skip to content

Commit 69f2994

Browse files
unamedkrclaude
andcommitted
Add --version flag and --json PPL output
--version: prints "quant.cpp v0.2.0" with repo URL --json: outputs PPL results as machine-parseable JSON to stdout (human-readable output still goes to stderr) Example: ./quant model.gguf --ppl text.txt -k uniform_4b --json Output: {"model": "...", "perplexity": 14.58, "kv_type": "uniform_4b", ...} Fixes #4 and #8 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 80a8e87 commit 69f2994

1 file changed

Lines changed: 35 additions & 0 deletions

File tree

tools/quant.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,14 @@ static tq_type parse_kv_type(const char* s) {
6767
return TQ_TYPE_UNIFORM_4B;
6868
}
6969

70+
#define QUANT_VERSION "0.2.0"
71+
72+
static void print_version(void) {
73+
printf("quant.cpp v%s\n", QUANT_VERSION);
74+
printf("Embeddable LLM inference in pure C\n");
75+
printf("https://github.com/quantumaikr/quant.cpp\n");
76+
}
77+
7078
static void print_usage(const char* prog) {
7179
fprintf(stderr, "quant — Minimal C inference engine. Zero dependencies.\n");
7280
fprintf(stderr, "Usage: %s <model.safetensors> [options]\n\n", prog);
@@ -97,6 +105,8 @@ static void print_usage(const char* prog) {
97105
fprintf(stderr, " --ctx <N> Override max context length (default: 4096)\n");
98106
fprintf(stderr, " --delta, -D Enable delta KV compression (store key deltas)\n");
99107
fprintf(stderr, " --k-window <N> Age-based K: recent N tokens FP32, rest quantized\n");
108+
fprintf(stderr, " --version Print version and exit\n");
109+
fprintf(stderr, " --json JSON output for --ppl (machine-parseable)\n");
100110
}
101111

102112
int main(int argc, char** argv) {
@@ -130,6 +140,7 @@ int main(int argc, char** argv) {
130140
int delta_kv = 0; /* 1 = delta KV compression (store key deltas) */
131141
int delta_iframe_int = 0; /* I-frame interval for delta KV (0 = auto = 64) */
132142
int k_highres_window = 0; /* age-based: recent N keys at FP32, rest at 2-bit */
143+
int json_output = 0; /* 1 = JSON output for --ppl */
133144

134145
for (int i = 1; i < argc; i++) {
135146
if (argv[i][0] != '-') {
@@ -214,6 +225,11 @@ int main(int argc, char** argv) {
214225
delta_iframe_int = atoi(argv[++i]);
215226
} else if (strcmp(argv[i], "--k-window") == 0 && i + 1 < argc) {
216227
k_highres_window = atoi(argv[++i]);
228+
} else if (strcmp(argv[i], "--version") == 0) {
229+
print_version();
230+
return 0;
231+
} else if (strcmp(argv[i], "--json") == 0) {
232+
json_output = 1;
217233
} else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
218234
print_usage(argv[0]);
219235
return 0;
@@ -452,6 +468,25 @@ int main(int argc, char** argv) {
452468
/* Machine-parseable */
453469
fprintf(stderr, "PPL_CSV:%d,%.6f,%.4f\n", n_eval, avg_nll, perplexity);
454470

471+
/* JSON output (--json flag) */
472+
if (json_output) {
473+
const char* kv_name = kv_type < TQ_TYPE_COUNT ? tq_type_name(kv_type) : "fp32";
474+
const char* v_name = value_quant_bits == 4 ? "q4" : (value_quant_bits == 2 ? "q2" : "fp16");
475+
printf("{\n");
476+
printf(" \"model\": \"%s\",\n", model_path);
477+
printf(" \"benchmark\": \"%s\",\n", ppl_file);
478+
printf(" \"tokens\": %d,\n", n_tokens);
479+
printf(" \"tokens_evaluated\": %d,\n", n_eval);
480+
printf(" \"kv_type\": \"%s\",\n", kv_name);
481+
printf(" \"v_quant\": \"%s\",\n", v_name);
482+
printf(" \"delta_kv\": %s,\n", delta_kv ? "true" : "false");
483+
printf(" \"perplexity\": %.4f,\n", perplexity);
484+
printf(" \"avg_nll\": %.6f,\n", avg_nll);
485+
printf(" \"elapsed_s\": %.2f,\n", ppl_elapsed);
486+
printf(" \"tok_per_s\": %.1f\n", (double)n_eval / ppl_elapsed);
487+
printf("}\n");
488+
}
489+
455490
tq_free_state(state);
456491
free(tokens);
457492
tq_free_tokenizer(tok);

0 commit comments

Comments
 (0)