Skip to content

Commit 8cb5e1d

Browse files
unamedkrclaude
andcommitted
Add WASM browser demo: LLM inference in the browser
- wasm/quant_wasm.c: Emscripten entry point using quant.h single header - wasm/index.html: Dark-themed chat UI with drag-and-drop GGUF loading - wasm/build.sh: One-command build (emcc → quant.js 64KB + quant.wasm 192KB) - Streaming token output, KV compression active by default - Zero server dependency: everything runs client-side in the browser Usage: cd wasm && bash build.sh && python3 -m http.server 8080 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 926e7c3 commit 8cb5e1d

6 files changed

Lines changed: 498 additions & 1 deletion

File tree

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,15 @@ Tested extensively: 2-bit delta, sub-block scaling, multi-hash, error feedback,
285285

286286
**Can it run in the browser (WASM)?**
287287

288-
The code is pure C11 with no platform-specific dependencies in the core path. Emscripten compilation is supported. A browser demo with a small model is on the roadmap.
288+
Yes! The `wasm/` directory contains a complete browser demo:
289+
290+
```bash
291+
cd wasm && bash build.sh # Build with Emscripten
292+
python3 -m http.server 8080 # Serve locally
293+
# Open http://localhost:8080, drag & drop a GGUF model
294+
```
295+
296+
The WASM build is 192KB (quant.wasm) + 64KB (quant.js). Drop a small GGUF model (e.g., SmolLM2-135M, 270MB) and chat directly in the browser. Everything runs client-side — nothing is uploaded.
289297

290298
---
291299

wasm/build.sh

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#!/bin/bash
2+
# Build quant.cpp WASM demo
3+
# Requires: Emscripten SDK (emcc)
4+
#
5+
# Usage: cd wasm && bash build.sh
6+
# Then: python3 -m http.server 8080
7+
# Open: http://localhost:8080
8+
9+
set -e
10+
11+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
12+
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
13+
14+
echo "=== Building quant.cpp WASM ==="
15+
16+
# Check emcc
17+
if ! command -v emcc &>/dev/null; then
18+
echo "Error: emcc not found. Install Emscripten:"
19+
echo " brew install emscripten"
20+
echo " # or: git clone https://github.com/emscripten-core/emsdk && ./emsdk install latest && ./emsdk activate latest"
21+
exit 1
22+
fi
23+
24+
echo "emcc version: $(emcc --version | head -1)"
25+
26+
# Build
27+
emcc "$SCRIPT_DIR/quant_wasm.c" \
28+
-I"$PROJECT_DIR" \
29+
-o "$SCRIPT_DIR/quant.js" \
30+
-O2 \
31+
-s WASM=1 \
32+
-s ALLOW_MEMORY_GROWTH=1 \
33+
-s MAXIMUM_MEMORY=4GB \
34+
-s INITIAL_MEMORY=256MB \
35+
-s EXPORTED_FUNCTIONS='["_main","_wasm_load_model","_wasm_generate","_wasm_model_info","_wasm_is_ready","_malloc","_free"]' \
36+
-s EXPORTED_RUNTIME_METHODS='["UTF8ToString","allocateUTF8","FS"]' \
37+
-s FORCE_FILESYSTEM=1 \
38+
-s MODULARIZE=0 \
39+
-s ENVIRONMENT=web \
40+
-s NO_EXIT_RUNTIME=1 \
41+
-s ASSERTIONS=0 \
42+
-s STACK_SIZE=1MB \
43+
-lm \
44+
-DNDEBUG \
45+
-D__EMSCRIPTEN__ \
46+
-Wno-gnu-zero-variadic-macro-arguments \
47+
-Wno-dollar-in-identifier-extension
48+
49+
echo ""
50+
echo "=== Build complete ==="
51+
echo "Files: quant.js ($(du -h "$SCRIPT_DIR/quant.js" | cut -f1)), quant.wasm ($(du -h "$SCRIPT_DIR/quant.wasm" | cut -f1))"
52+
echo ""
53+
echo "To serve locally:"
54+
echo " cd $SCRIPT_DIR && python3 -m http.server 8080"
55+
echo " Open http://localhost:8080"
56+
echo ""
57+
echo "For HTTPS (required for SharedArrayBuffer):"
58+
echo " npx serve -s $SCRIPT_DIR --ssl-cert cert.pem --ssl-key key.pem"

wasm/index.html

Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
6+
<title>quant.cpp — LLM in Your Browser</title>
7+
<style>
8+
* { margin: 0; padding: 0; box-sizing: border-box; }
9+
body {
10+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif;
11+
background: #0a0a0a; color: #e0e0e0;
12+
min-height: 100vh; display: flex; flex-direction: column;
13+
}
14+
.header {
15+
padding: 20px 24px; border-bottom: 1px solid #222;
16+
display: flex; align-items: center; gap: 16px;
17+
}
18+
.header h1 { font-size: 20px; font-weight: 600; }
19+
.header h1 span { color: #6ee7b7; }
20+
.header .badge {
21+
font-size: 11px; padding: 2px 8px; border-radius: 12px;
22+
background: #1a3a2a; color: #6ee7b7; font-weight: 500;
23+
}
24+
.header .github {
25+
margin-left: auto; color: #888; text-decoration: none; font-size: 13px;
26+
}
27+
.header .github:hover { color: #6ee7b7; }
28+
29+
.main { flex: 1; display: flex; flex-direction: column; max-width: 800px; width: 100%; margin: 0 auto; padding: 24px; }
30+
31+
/* Drop zone */
32+
.dropzone {
33+
border: 2px dashed #333; border-radius: 12px; padding: 48px;
34+
text-align: center; cursor: pointer; transition: all 0.2s;
35+
margin-bottom: 24px;
36+
}
37+
.dropzone:hover, .dropzone.drag-over { border-color: #6ee7b7; background: #0d1f17; }
38+
.dropzone h2 { font-size: 18px; margin-bottom: 8px; }
39+
.dropzone p { color: #666; font-size: 13px; }
40+
.dropzone.loaded { border-color: #2a5a3a; background: #0d1a14; padding: 16px; }
41+
.dropzone.loaded h2 { font-size: 14px; color: #6ee7b7; }
42+
43+
/* Chat */
44+
.chat { flex: 1; overflow-y: auto; margin-bottom: 16px; }
45+
.message { padding: 12px 16px; margin-bottom: 8px; border-radius: 8px; font-size: 14px; line-height: 1.6; }
46+
.message.user { background: #1a1a2e; border: 1px solid #2a2a4e; }
47+
.message.assistant { background: #111; border: 1px solid #222; }
48+
.message.assistant .cursor { animation: blink 1s step-end infinite; }
49+
@keyframes blink { 50% { opacity: 0; } }
50+
.message.system { color: #666; font-size: 12px; text-align: center; }
51+
.message code { background: #1a1a1a; padding: 1px 4px; border-radius: 3px; font-size: 13px; }
52+
.message pre { background: #1a1a1a; padding: 12px; border-radius: 6px; overflow-x: auto; margin: 8px 0; }
53+
.message pre code { background: none; padding: 0; }
54+
.message strong { color: #6ee7b7; }
55+
56+
/* Input */
57+
.input-row {
58+
display: flex; gap: 8px;
59+
}
60+
.input-row input {
61+
flex: 1; padding: 12px 16px; background: #111; border: 1px solid #333;
62+
border-radius: 8px; color: #e0e0e0; font-size: 14px; outline: none;
63+
}
64+
.input-row input:focus { border-color: #6ee7b7; }
65+
.input-row input:disabled { opacity: 0.4; }
66+
.input-row button {
67+
padding: 12px 24px; background: #6ee7b7; color: #000; border: none;
68+
border-radius: 8px; font-weight: 600; font-size: 14px; cursor: pointer;
69+
}
70+
.input-row button:hover { background: #5cd4a4; }
71+
.input-row button:disabled { opacity: 0.3; cursor: not-allowed; }
72+
73+
/* Stats */
74+
.stats {
75+
display: flex; gap: 16px; padding: 12px 0; font-size: 12px; color: #555;
76+
border-top: 1px solid #1a1a1a; margin-top: 8px;
77+
}
78+
.stats span { display: flex; align-items: center; gap: 4px; }
79+
80+
/* Loading */
81+
.loading { display: none; align-items: center; gap: 8px; padding: 16px; color: #6ee7b7; }
82+
.loading.active { display: flex; }
83+
.spinner { width: 16px; height: 16px; border: 2px solid #333; border-top-color: #6ee7b7; border-radius: 50%; animation: spin 0.8s linear infinite; }
84+
@keyframes spin { to { transform: rotate(360deg); } }
85+
</style>
86+
</head>
87+
<body>
88+
89+
<div class="header">
90+
<h1>quant<span>.cpp</span></h1>
91+
<span class="badge">WASM</span>
92+
<span class="badge" id="kvBadge" style="display:none">7x Context</span>
93+
<a class="github" href="https://github.com/quantumaikr/quant.cpp" target="_blank">GitHub ↗</a>
94+
</div>
95+
96+
<div class="main">
97+
<div class="dropzone" id="dropzone" onclick="document.getElementById('fileInput').click()">
98+
<h2>Drop a GGUF model here</h2>
99+
<p>Or click to browse. Recommended: SmolLM2-135M-Instruct (270MB) for browser use.</p>
100+
<p style="margin-top:8px; color:#444">Runs entirely in your browser. Nothing uploaded to any server.</p>
101+
<input type="file" id="fileInput" accept=".gguf" style="display:none">
102+
</div>
103+
104+
<div class="loading" id="loading">
105+
<div class="spinner"></div>
106+
<span id="loadingText">Loading...</span>
107+
</div>
108+
109+
<div class="chat" id="chat"></div>
110+
111+
<div class="input-row">
112+
<input type="text" id="prompt" placeholder="Ask anything..." disabled
113+
onkeydown="if(event.key==='Enter') generate()">
114+
<button id="sendBtn" onclick="generate()" disabled>Send</button>
115+
</div>
116+
117+
<div class="stats">
118+
<span id="statTokens"></span>
119+
<span id="statSpeed"></span>
120+
<span id="statMemory"></span>
121+
</div>
122+
</div>
123+
124+
<script>
125+
// State
126+
let modelLoaded = false;
127+
let generating = false;
128+
129+
// File handling
130+
const dropzone = document.getElementById('dropzone');
131+
const fileInput = document.getElementById('fileInput');
132+
133+
['dragenter','dragover'].forEach(e => {
134+
dropzone.addEventListener(e, ev => { ev.preventDefault(); dropzone.classList.add('drag-over'); });
135+
});
136+
['dragleave','drop'].forEach(e => {
137+
dropzone.addEventListener(e, ev => { ev.preventDefault(); dropzone.classList.remove('drag-over'); });
138+
});
139+
140+
dropzone.addEventListener('drop', e => {
141+
const file = e.dataTransfer.files[0];
142+
if (file && file.name.endsWith('.gguf')) loadModel(file);
143+
});
144+
145+
fileInput.addEventListener('change', e => {
146+
const file = e.target.files[0];
147+
if (file) loadModel(file);
148+
});
149+
150+
function showLoading(msg) {
151+
document.getElementById('loading').classList.add('active');
152+
document.getElementById('loadingText').textContent = msg;
153+
}
154+
function hideLoading() {
155+
document.getElementById('loading').classList.remove('active');
156+
}
157+
158+
function addMessage(role, text) {
159+
const chat = document.getElementById('chat');
160+
const div = document.createElement('div');
161+
div.className = `message ${role}`;
162+
div.innerHTML = formatText(text);
163+
chat.appendChild(div);
164+
chat.scrollTop = chat.scrollHeight;
165+
return div;
166+
}
167+
168+
function formatText(text) {
169+
// Basic markdown: **bold**, `code`, ```blocks```
170+
return text
171+
.replace(/```(\w*)\n([\s\S]*?)```/g, '<pre><code>$2</code></pre>')
172+
.replace(/`([^`]+)`/g, '<code>$1</code>')
173+
.replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>');
174+
}
175+
176+
async function loadModel(file) {
177+
showLoading(`Loading ${file.name} (${(file.size/1024/1024).toFixed(0)} MB)...`);
178+
addMessage('system', `Loading ${file.name}...`);
179+
180+
try {
181+
const buffer = await file.arrayBuffer();
182+
const bytes = new Uint8Array(buffer);
183+
184+
// Write to WASM filesystem
185+
Module.FS.writeFile('/model.gguf', bytes);
186+
187+
showLoading('Initializing model...');
188+
189+
// Call C function to load
190+
const rc = Module._wasm_load_model(Module.allocateUTF8('/model.gguf'));
191+
192+
if (rc === 0) {
193+
modelLoaded = true;
194+
dropzone.classList.add('loaded');
195+
dropzone.innerHTML = `<h2>✓ ${file.name} (${(file.size/1024/1024).toFixed(0)} MB)</h2>`;
196+
document.getElementById('kvBadge').style.display = '';
197+
document.getElementById('prompt').disabled = false;
198+
document.getElementById('sendBtn').disabled = false;
199+
document.getElementById('prompt').focus();
200+
addMessage('system', 'Model loaded! KV compression active (7x longer context).');
201+
} else {
202+
addMessage('system', 'Failed to load model. Try a smaller GGUF file.');
203+
}
204+
} catch(e) {
205+
addMessage('system', `Error: ${e.message}`);
206+
}
207+
hideLoading();
208+
}
209+
210+
async function generate() {
211+
if (!modelLoaded || generating) return;
212+
const input = document.getElementById('prompt');
213+
const text = input.value.trim();
214+
if (!text) return;
215+
216+
input.value = '';
217+
generating = true;
218+
document.getElementById('sendBtn').disabled = true;
219+
220+
addMessage('user', text);
221+
const assistantDiv = addMessage('assistant', '<span class="cursor">▌</span>');
222+
let output = '';
223+
224+
// Set callbacks
225+
Module.onToken = (token) => {
226+
output += token;
227+
assistantDiv.innerHTML = formatText(output) + '<span class="cursor">▌</span>';
228+
document.getElementById('chat').scrollTop = document.getElementById('chat').scrollHeight;
229+
};
230+
Module.onDone = (nTokens, elapsedMs) => {
231+
assistantDiv.innerHTML = formatText(output);
232+
const tps = (nTokens / (elapsedMs / 1000)).toFixed(1);
233+
document.getElementById('statTokens').textContent = `${nTokens} tokens`;
234+
document.getElementById('statSpeed').textContent = `${tps} tok/s`;
235+
generating = false;
236+
document.getElementById('sendBtn').disabled = false;
237+
document.getElementById('prompt').focus();
238+
};
239+
Module.onStatus = (msg) => {
240+
addMessage('system', msg);
241+
};
242+
243+
// Call generation
244+
const promptPtr = Module.allocateUTF8(text);
245+
Module._wasm_generate(promptPtr, 0.7, 256);
246+
Module._free(promptPtr);
247+
248+
if (!output) {
249+
assistantDiv.innerHTML = '<em style="color:#666">No output generated</em>';
250+
}
251+
generating = false;
252+
document.getElementById('sendBtn').disabled = false;
253+
}
254+
</script>
255+
256+
<!-- Emscripten-generated JS will be loaded here -->
257+
<script>
258+
var Module = {
259+
onToken: null,
260+
onDone: null,
261+
onStatus: null,
262+
print: function(text) { console.log(text); },
263+
printErr: function(text) { console.warn(text); },
264+
onRuntimeInitialized: function() {
265+
console.log('quant.cpp WASM ready');
266+
addMessage('system', 'Runtime ready. Drop a GGUF model file to begin.');
267+
}
268+
};
269+
</script>
270+
<script src="quant.js"></script>
271+
272+
</body>
273+
</html>

wasm/quant.js

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

wasm/quant.wasm

189 KB
Binary file not shown.

0 commit comments

Comments
 (0)