Skip to content

Commit f360b90

Browse files
unamedkrclaude
andcommitted
feat(server): --template flag + Phi-4 auto-detection (#86)
Unified server improvements: - New --template flag: chatml (default), phi3, gemma ./quant-server-unified model.gguf --template phi3 - Auto-detect Phi-4 models (same template as Phi-3) - Filename detection covers: Phi-3/3.5/4, Gemma 2/3/4 Refs #86 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 393bdd5 commit f360b90

1 file changed

Lines changed: 33 additions & 13 deletions

File tree

tools/quant_server_unified.c

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -532,9 +532,10 @@ int main(int argc, char** argv) {
532532
"quant-server-unified — OpenAI-compatible server (quant.h unified build)\n\n"
533533
"Usage: %s <model.gguf> [options]\n\n"
534534
"Options:\n"
535-
" -p <port> Listen port (default: 8080)\n"
536-
" -j <threads> Threads per inference (default: 4)\n"
537-
" --help Show this help\n\n"
535+
" -p <port> Listen port (default: 8080)\n"
536+
" -j <threads> Threads per inference (default: 4)\n"
537+
" --template T Chat template: chatml (default), phi3, gemma\n"
538+
" --help Show this help\n\n"
538539
"Example:\n"
539540
" %s model.gguf -p 8080 -j 8\n"
540541
" curl http://localhost:8080/v1/chat/completions \\\n"
@@ -584,18 +585,37 @@ int main(int argc, char** argv) {
584585
return 1;
585586
}
586587

587-
/* Detect model architecture for chat template selection.
588-
* Check model filename for architecture hints. */
589-
int template_type = TMPL_CHATML; /* default */
588+
/* Detect chat template from filename or --template flag.
589+
* Supports: chatml (default), phi3, gemma.
590+
* #86: auto-detection covers Phi-3/3.5/4, Gemma 2/3/4. */
591+
int template_type = TMPL_CHATML;
590592
const char* bn = strrchr(model_path, '/');
591593
bn = bn ? bn + 1 : model_path;
592-
if (strstr(bn, "hi-3") || strstr(bn, "hi3") || strstr(bn, "Hi-3") || strstr(bn, "Hi3") ||
593-
strstr(bn, "phi-3") || strstr(bn, "phi3") || strstr(bn, "Phi-3") || strstr(bn, "Phi3")) {
594-
template_type = TMPL_PHI3;
595-
fprintf(stderr, "Detected Phi-3 model — using Phi-3 chat template\n");
596-
} else if (strstr(bn, "gemma") || strstr(bn, "Gemma")) {
597-
template_type = TMPL_GEMMA;
598-
fprintf(stderr, "Detected Gemma model — using Gemma chat template\n");
594+
595+
/* Check --template CLI override first */
596+
for (int i = 2; i < argc; i++) {
597+
if (strcmp(argv[i], "--template") == 0 && i + 1 < argc) {
598+
const char* t = argv[++i];
599+
if (strcmp(t, "phi3") == 0) template_type = TMPL_PHI3;
600+
else if (strcmp(t, "gemma") == 0) template_type = TMPL_GEMMA;
601+
else if (strcmp(t, "chatml") == 0) template_type = TMPL_CHATML;
602+
fprintf(stderr, "Chat template: %s (--template override)\n", t);
603+
}
604+
}
605+
606+
/* Auto-detect from filename if no override */
607+
if (template_type == TMPL_CHATML) {
608+
/* Phi family: Phi-3, Phi-3.5, Phi-4 all use <|user|>...<|end|> */
609+
if (strstr(bn, "phi-3") || strstr(bn, "phi3") || strstr(bn, "Phi-3") || strstr(bn, "Phi3") ||
610+
strstr(bn, "phi-4") || strstr(bn, "phi4") || strstr(bn, "Phi-4") || strstr(bn, "Phi4")) {
611+
template_type = TMPL_PHI3;
612+
fprintf(stderr, "Detected Phi model — using Phi chat template\n");
613+
}
614+
/* Gemma family */
615+
else if (strstr(bn, "gemma") || strstr(bn, "Gemma")) {
616+
template_type = TMPL_GEMMA;
617+
fprintf(stderr, "Detected Gemma model — using Gemma chat template\n");
618+
}
599619
}
600620
int has_fused_qkv = (template_type == TMPL_PHI3) ? 1 : 0;
601621

0 commit comments

Comments
 (0)