|
| 1 | +#!/usr/bin/env bash |
| 2 | +# test_long_seq.sh — autoregressive stress test. |
| 3 | +# |
| 4 | +# Why: PPL (teacher-forced) can be fine while T=0 generation collapses |
| 5 | +# after a few hundred tokens — a failure mode KV compression bugs |
| 6 | +# typically produce. This test generates 500 tokens at T=0 and rejects |
| 7 | +# runs where printable chars fall below 80% of the output (indicating |
| 8 | +# repetition-trap garbage, NaN-spew, or token-ID soup). |
| 9 | +# |
| 10 | +# Complements test_models.sh (which tests first 10 tokens coherence). |
| 11 | + |
| 12 | +set -u |
| 13 | +MODELS_DIR="${1:-models}" |
| 14 | +QUANT_BIN="${QUANT_BIN:-./build/quant}" |
| 15 | +N_TOKENS=500 |
| 16 | +PASS=0 |
| 17 | +FAIL=0 |
| 18 | +SKIP=0 |
| 19 | +TMP=$(mktemp) |
| 20 | +trap 'rm -f "$TMP"' EXIT |
| 21 | + |
| 22 | +if [[ ! -x "$QUANT_BIN" ]]; then |
| 23 | + echo "ERROR: $QUANT_BIN not built." >&2 |
| 24 | + exit 1 |
| 25 | +fi |
| 26 | + |
| 27 | +run_long() { |
| 28 | + local model="$1" |
| 29 | + local prompt="$2" |
| 30 | + local chat_flag="${3:-}" |
| 31 | + local extra_env="${4:-TQ_NO_METAL=1}" |
| 32 | + |
| 33 | + if [[ ! -f "$MODELS_DIR/$model" ]]; then |
| 34 | + printf " %-50s [SKIP] not found\n" "$model" |
| 35 | + SKIP=$((SKIP + 1)) |
| 36 | + return |
| 37 | + fi |
| 38 | + |
| 39 | + env $extra_env "$QUANT_BIN" "$MODELS_DIR/$model" $chat_flag \ |
| 40 | + -p "$prompt" -n "$N_TOKENS" -T 0 > "$TMP" 2>/dev/null |
| 41 | + |
| 42 | + local total printable ratio |
| 43 | + total=$(wc -c < "$TMP") |
| 44 | + # Printable = ASCII printable + whitespace + valid UTF-8 multibyte |
| 45 | + # Approximation: chars passing tr -cd '[:print:][:space:]' OR bytes >= 0x80. |
| 46 | + printable=$(tr -cd '[:print:][:space:]\200-\377' < "$TMP" | wc -c) |
| 47 | + |
| 48 | + if [ "$total" -lt 100 ]; then |
| 49 | + printf " %-50s [FAIL] too short (%d bytes)\n" "$model" "$total" |
| 50 | + FAIL=$((FAIL + 1)) |
| 51 | + return |
| 52 | + fi |
| 53 | + |
| 54 | + # integer percentage |
| 55 | + ratio=$(( printable * 100 / total )) |
| 56 | + # Preview: first 60 chars, newlines squashed |
| 57 | + preview=$(tr '\n' ' ' < "$TMP" | tr -s ' ' | cut -c1-60) |
| 58 | + |
| 59 | + if [ "$ratio" -ge 80 ]; then |
| 60 | + printf " %-50s [PASS] %d%% printable, %d bytes | '%s...'\n" \ |
| 61 | + "$model" "$ratio" "$total" "$preview" |
| 62 | + PASS=$((PASS + 1)) |
| 63 | + else |
| 64 | + printf " %-50s [FAIL] %d%% printable, %d bytes | '%s...'\n" \ |
| 65 | + "$model" "$ratio" "$total" "$preview" |
| 66 | + FAIL=$((FAIL + 1)) |
| 67 | + fi |
| 68 | +} |
| 69 | + |
| 70 | +echo "=== quant.cpp Long-Sequence Stress Test (N=$N_TOKENS, T=0) ===" |
| 71 | +echo "Models dir: $MODELS_DIR" |
| 72 | +echo "" |
| 73 | + |
| 74 | +# Short story continuation prompts — must sustain coherent generation. |
| 75 | +run_long "Llama-3.2-1B-Instruct-Q8_0.gguf" \ |
| 76 | + "Once upon a time in a small village by the sea, there lived a young woman named Elena who" |
| 77 | +run_long "Llama-3.2-3B-Instruct-Q8_0.gguf" \ |
| 78 | + "Once upon a time in a small village by the sea, there lived a young woman named Elena who" |
| 79 | +run_long "Phi-3.5-mini-instruct-Q8_0.gguf" \ |
| 80 | + "Here is a short essay on the importance of clear writing:" |
| 81 | +run_long "Phi-3.5-mini-instruct-Q4_K_M.gguf" \ |
| 82 | + "Here is a short essay on the importance of clear writing:" |
| 83 | +run_long "Qwen3.5-4B-Q4_K_M.gguf" \ |
| 84 | + "Write a short story about a robot who learns to paint" "--chat" |
| 85 | +run_long "gemma-4-e2b-it-Q8_0.gguf" \ |
| 86 | + "Write a short paragraph about the solar system:" |
| 87 | + |
| 88 | +echo "" |
| 89 | +echo "--- Summary ---" |
| 90 | +echo " PASS: $PASS" |
| 91 | +echo " FAIL: $FAIL" |
| 92 | +echo " SKIP: $SKIP" |
| 93 | + |
| 94 | +[ "$FAIL" -gt 0 ] && exit 1 |
| 95 | +exit 0 |
0 commit comments