4343.dropzone .loaded { border-color : # 2a5a3a ; background : # 0d1a14 ; padding : 16px ; }
4444.dropzone .loaded h2 { font-size : 14px ; color : # 6ee7b7 ; }
4545
46+ /* Model selector */
47+ .model-cards {
48+ display : flex; gap : 12px ; margin-bottom : 16px ; justify-content : center; flex-wrap : wrap;
49+ }
50+ .model-card {
51+ padding : 14px 20px ; border : 1px solid # 333 ; border-radius : 10px ;
52+ cursor : pointer; transition : all 0.2s ; text-align : left; min-width : 220px ;
53+ background : # 111 ;
54+ }
55+ .model-card : hover { border-color : # 6ee7b7 ; background : # 0d1f17 ; }
56+ .model-card .recommended { border-color : # 059669 ; }
57+ .model-card .name { font-weight : 600 ; font-size : 14px ; margin-bottom : 4px ; }
58+ .model-card .meta { font-size : 12px ; color : # 888 ; }
59+ .model-card .tag {
60+ display : inline-block; font-size : 10px ; padding : 1px 6px ; border-radius : 6px ;
61+ background : # 1a3a2a ; color : # 6ee7b7 ; margin-top : 6px ;
62+ }
63+ .model-card .tag .blue { background : # 1a2a3a ; color : # 7bb8f0 ; }
64+
4665/* Chat */
4766.chat { flex : 1 ; overflow-y : auto; margin-bottom : 16px ; }
48- .message { padding : 12px 16px ; margin-bottom : 8px ; border-radius : 8px ; font-size : 14px ; line-height : 1.6 ; }
67+ .message { padding : 12px 16px ; margin-bottom : 8px ; border-radius : 8px ; font-size : 14px ; line-height : 1.6 ; white-space : pre-wrap; word-wrap : break-word; }
4968.message .user { background : # 1a1a2e ; border : 1px solid # 2a2a4e ; }
5069.message .assistant { background : # 111 ; border : 1px solid # 222 ; }
5170.message .assistant .cursor { animation : blink 1s step-end infinite; }
5271@keyframes blink { 50% { opacity : 0 ; } }
53- .message .system { color : # 666 ; font-size : 12px ; text-align : center; }
72+ .message .system { color : # 666 ; font-size : 12px ; text-align : center; white-space : normal; }
5473.message code { background : # 1a1a1a ; padding : 1px 4px ; border-radius : 3px ; font-size : 13px ; }
5574.message pre { background : # 1a1a1a ; padding : 12px ; border-radius : 6px ; overflow-x : auto; margin : 8px 0 ; }
5675.message pre code { background : none; padding : 0 ; }
92111< div class ="header ">
93112 < h1 > quant< span > .cpp</ span > </ h1 >
94113 < span class ="badge "> WASM</ span >
95- < span class ="badge " id ="kvBadge " style ="display:none "> 7x Context</ span >
114+ < span class ="badge " id ="kvBadge " style ="display:none "> 3x Context</ span >
96115 < a class ="github " href ="https://github.com/quantumaikr/quant.cpp " target ="_blank "> GitHub ↗</ a >
97116</ div >
98117
99118< div class ="main ">
100119 < div class ="dropzone " id ="dropzone ">
101- < h2 > LLM in Your Browser — 189 KB </ h2 >
120+ < h2 > LLM in Your Browser</ h2 >
102121 < p style ="margin-bottom:16px; color:#6ee7b7; font-size:15px "> No install. No API key. No server. Just click.</ p >
103- < button id ="demoBtn " onclick ="loadDemoModel() " style ="
104- padding: 12px 32px; font-size: 16px; font-weight: 600;
105- background: #059669; color: white; border: none; border-radius: 8px;
106- cursor: pointer; margin-bottom: 12px;
107- "> ▶ Try with SmolLM2-135M (~135 MB download)</ button >
122+
123+ < div class ="model-cards " id ="modelCards ">
124+ < div class ="model-card recommended " onclick ="loadDemoModel('qwen3-0.6b') ">
125+ < div class ="name "> Qwen3 0.6B</ div >
126+ < div class ="meta "> ~378 MB download · Q4_K_M</ div >
127+ < span class ="tag "> Recommended</ span >
128+ < div class ="meta " style ="margin-top:4px "> Fast, multilingual, good for demo</ div >
129+ </ div >
130+ < div class ="model-card " onclick ="loadDemoModel('llama-3.2-1b') ">
131+ < div class ="name "> Llama 3.2 1B</ div >
132+ < div class ="meta "> ~770 MB download · Q4_K_M</ div >
133+ < span class ="tag blue "> Higher quality</ span >
134+ < div class ="meta " style ="margin-top:4px "> Better reasoning, longer wait</ div >
135+ </ div >
136+ </ div >
137+
108138 < p style ="color:#555; font-size:13px "> Or < a href ="# " onclick ="document.getElementById('fileInput').click(); return false " style ="color:#6ee7b7 "> drop your own GGUF</ a > file.</ p >
109- < p style ="margin-top:8px; color:#333; font-size:12px "> Runs entirely in your browser. Nothing uploaded to any server .</ p >
139+ < p style ="margin-top:8px; color:#333; font-size:12px "> Runs entirely in your browser. Nothing uploaded.</ p >
110140 < input type ="file " id ="fileInput " accept =".gguf " style ="display:none ">
111141 </ div >
112142
@@ -135,15 +165,36 @@ <h2>LLM in Your Browser — 189 KB</h2>
135165let modelLoaded = false ;
136166let generating = false ;
137167
168+ // ---- Model registry ----
169+ const MODELS = {
170+ 'qwen3-0.6b' : {
171+ url : 'https://huggingface.co/unsloth/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q4_K_M.gguf' ,
172+ name : 'Qwen3-0.6B Q4_K_M' ,
173+ size : '~378 MB' ,
174+ cacheKey : 'qwen3-0.6b-q4km' ,
175+ chatTemplate : ( text ) => `<|im_start|>user\n${ text } <|im_end|>\n<|im_start|>assistant\n` ,
176+ } ,
177+ 'llama-3.2-1b' : {
178+ url : 'https://huggingface.co/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/resolve/main/llama-3.2-1b-instruct-q4_k_m.gguf' ,
179+ name : 'Llama-3.2-1B-Instruct Q4_K_M' ,
180+ size : '~770 MB' ,
181+ cacheKey : 'llama-3.2-1b-q4km' ,
182+ chatTemplate : ( text ) => `<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n${ text } <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n` ,
183+ } ,
184+ } ;
185+ let activeModelId = null ;
186+
138187// ---- IndexedDB model cache ----
139188const DB_NAME = 'quantcpp_cache' ;
140189const DB_STORE = 'models' ;
141- const DEMO_KEY = 'smollm2-135m' ;
142190
143191function openDB ( ) {
144192 return new Promise ( ( resolve , reject ) => {
145- const req = indexedDB . open ( DB_NAME , 1 ) ;
146- req . onupgradeneeded = ( ) => req . result . createObjectStore ( DB_STORE ) ;
193+ const req = indexedDB . open ( DB_NAME , 2 ) ;
194+ req . onupgradeneeded = ( ) => {
195+ if ( ! req . result . objectStoreNames . contains ( DB_STORE ) )
196+ req . result . createObjectStore ( DB_STORE ) ;
197+ } ;
147198 req . onsuccess = ( ) => resolve ( req . result ) ;
148199 req . onerror = ( ) => reject ( req . error ) ;
149200 } ) ;
@@ -199,27 +250,28 @@ <h2>LLM in Your Browser — 189 KB</h2>
199250}
200251
201252// Demo model — cache-first, download only if not in IndexedDB
202- async function loadDemoModel ( ) {
203- const url = 'https://huggingface.co/Felladrin/gguf-Q8_0-SmolLM2-135M-Instruct/resolve/main/smollm2-135m-instruct-q8_0.gguf' ;
204- const btn = document . getElementById ( 'demoBtn' ) ;
205- btn . disabled = true ;
253+ async function loadDemoModel ( modelId ) {
254+ const model = MODELS [ modelId ] ;
255+ if ( ! model ) return ;
256+
257+ activeModelId = modelId ;
258+ const cards = document . querySelectorAll ( '.model-card' ) ;
259+ cards . forEach ( c => c . style . pointerEvents = 'none' ) ;
206260
207261 try {
208262 // 1. Try cache first
209263 showLoading ( 'Checking local cache...' ) ;
210- const cached = await getCachedModel ( DEMO_KEY ) ;
264+ const cached = await getCachedModel ( model . cacheKey ) ;
211265 if ( cached ) {
212- btn . textContent = 'Loading from cache...' ;
213- showLoading ( 'Loading cached model...' ) ;
214- loadModelFromBytes ( new Uint8Array ( cached ) , 'smollm2-135m (cached)' ) ;
266+ showLoading ( `Loading cached ${ model . name } ...` ) ;
267+ loadModelFromBytes ( new Uint8Array ( cached ) , `${ model . name } (cached)` ) ;
215268 return ;
216269 }
217270
218271 // 2. Download from HuggingFace
219- btn . textContent = 'Downloading...' ;
220- showLoading ( 'Downloading SmolLM2-135M (~135 MB)...' ) ;
272+ showLoading ( `Downloading ${ model . name } (${ model . size } )...` ) ;
221273
222- const response = await fetch ( url ) ;
274+ const response = await fetch ( model . url ) ;
223275 if ( ! response . ok ) throw new Error ( `HTTP ${ response . status } ` ) ;
224276
225277 const total = parseInt ( response . headers . get ( 'content-length' ) || '0' ) ;
@@ -237,7 +289,7 @@ <h2>LLM in Your Browser — 189 KB</h2>
237289 const mb = ( received / 1048576 ) . toFixed ( 0 ) ;
238290 const totalMb = ( total / 1048576 ) . toFixed ( 0 ) ;
239291 document . getElementById ( 'loadingText' ) . textContent =
240- `Downloading... ${ pct } % (${ mb } /${ totalMb } MB)` ;
292+ `Downloading ${ model . name } ... ${ pct } % (${ mb } /${ totalMb } MB)` ;
241293 }
242294 }
243295
@@ -247,26 +299,33 @@ <h2>LLM in Your Browser — 189 KB</h2>
247299
248300 // 3. Cache for next time
249301 showLoading ( 'Caching model for instant reload...' ) ;
250- await cacheModel ( DEMO_KEY , arrayBuffer ) . catch ( ( ) => { } ) ;
302+ await cacheModel ( model . cacheKey , arrayBuffer ) . catch ( ( ) => { } ) ;
251303
252304 showLoading ( 'Loading model into WASM...' ) ;
253- loadModelFromBytes ( data , 'smollm2-135m-instruct-q8_0.gguf' ) ;
305+ loadModelFromBytes ( data , model . name ) ;
254306 } catch ( err ) {
255307 hideLoading ( ) ;
256- btn . disabled = false ;
257- btn . textContent = '▶ Try with SmolLM2-135M (~135 MB download)' ;
308+ cards . forEach ( c => c . style . pointerEvents = '' ) ;
309+ activeModelId = null ;
258310 alert ( 'Download failed: ' + err . message + '\n\nTry dropping a local GGUF file instead.' ) ;
259311 }
260312}
261313
262- // Auto-load cached model on page load
314+ // Auto-detect cached models on page load and show badges
263315window . addEventListener ( 'load' , async ( ) => {
264316 try {
265- const cached = await getCachedModel ( DEMO_KEY ) ;
266- if ( cached ) {
267- const btn = document . getElementById ( 'demoBtn' ) ;
268- btn . textContent = '▶ Load cached SmolLM2-135M (instant)' ;
269- btn . style . background = '#047857' ;
317+ for ( const [ id , model ] of Object . entries ( MODELS ) ) {
318+ const cached = await getCachedModel ( model . cacheKey ) ;
319+ if ( cached ) {
320+ const cards = document . querySelectorAll ( '.model-card' ) ;
321+ cards . forEach ( card => {
322+ if ( card . querySelector ( '.name' ) . textContent . toLowerCase ( ) . includes ( id . split ( '-' ) [ 0 ] ) ) {
323+ const meta = card . querySelector ( '.meta' ) ;
324+ meta . textContent = 'Cached — instant load' ;
325+ meta . style . color = '#6ee7b7' ;
326+ }
327+ } ) ;
328+ }
270329 }
271330 } catch ( e ) { }
272331} ) ;
@@ -275,7 +334,11 @@ <h2>LLM in Your Browser — 189 KB</h2>
275334 const chat = document . getElementById ( 'chat' ) ;
276335 const div = document . createElement ( 'div' ) ;
277336 div . className = `message ${ role } ` ;
278- div . innerHTML = formatText ( text ) ;
337+ if ( role === 'assistant' ) {
338+ div . textContent = '' ;
339+ } else {
340+ div . innerHTML = formatText ( text ) ;
341+ }
279342 chat . appendChild ( div ) ;
280343 chat . scrollTop = chat . scrollHeight ;
281344 return div ;
@@ -290,7 +353,6 @@ <h2>LLM in Your Browser — 189 KB</h2>
290353}
291354
292355function loadModelFromBytes ( bytes , name ) {
293- // Shared model loading from Uint8Array (used by both file drop and demo download)
294356 try {
295357 Module . FS . writeFile ( '/model.gguf' , bytes ) ;
296358 showLoading ( 'Initializing model...' ) ;
@@ -318,6 +380,7 @@ <h2>LLM in Your Browser — 189 KB</h2>
318380async function loadModel ( file ) {
319381 showLoading ( `Loading ${ file . name } (${ ( file . size / 1024 / 1024 ) . toFixed ( 0 ) } MB)...` ) ;
320382 addMessage ( 'system' , `Loading ${ file . name } ...` ) ;
383+ activeModelId = null ; // custom model — use generic template
321384 try {
322385 const buffer = await file . arrayBuffer ( ) ;
323386 const bytes = new Uint8Array ( buffer ) ;
@@ -328,6 +391,14 @@ <h2>LLM in Your Browser — 189 KB</h2>
328391 hideLoading ( ) ;
329392}
330393
394+ function getChatPrompt ( text ) {
395+ if ( activeModelId && MODELS [ activeModelId ] ) {
396+ return MODELS [ activeModelId ] . chatTemplate ( text ) ;
397+ }
398+ // Generic ChatML fallback for custom GGUF
399+ return `<|im_start|>user\n${ text } <|im_end|>\n<|im_start|>assistant\n` ;
400+ }
401+
331402async function generate ( ) {
332403 if ( ! modelLoaded || generating ) return ;
333404 const input = document . getElementById ( 'prompt' ) ;
@@ -337,45 +408,65 @@ <h2>LLM in Your Browser — 189 KB</h2>
337408 input . value = '' ;
338409 generating = true ;
339410 document . getElementById ( 'sendBtn' ) . disabled = true ;
411+ input . disabled = true ;
340412
341413 addMessage ( 'user' , text ) ;
342- const assistantDiv = addMessage ( 'assistant' , '<span class="cursor">▌</span> ' ) ;
414+ const assistantDiv = addMessage ( 'assistant' , '' ) ;
343415 let output = '' ;
416+ let tokenCount = 0 ;
417+ const startTime = performance . now ( ) ;
344418
345- // Set callbacks
419+ // Set streaming token callback
346420 Module . onToken = ( token ) => {
347421 output += token ;
348- assistantDiv . innerHTML = formatText ( output ) + '<span class="cursor">▌</span>' ;
349- document . getElementById ( 'chat' ) . scrollTop = document . getElementById ( 'chat' ) . scrollHeight ;
422+ tokenCount ++ ;
423+ // Update the assistant message with raw text + blinking cursor
424+ assistantDiv . textContent = output ;
425+ const cursor = document . createElement ( 'span' ) ;
426+ cursor . className = 'cursor' ;
427+ cursor . textContent = '▌' ;
428+ assistantDiv . appendChild ( cursor ) ;
429+ // Auto-scroll
430+ const chat = document . getElementById ( 'chat' ) ;
431+ chat . scrollTop = chat . scrollHeight ;
432+ // Live stats
433+ const elapsed = ( performance . now ( ) - startTime ) / 1000 ;
434+ if ( elapsed > 0.1 ) {
435+ document . getElementById ( 'statTokens' ) . textContent = `${ tokenCount } tokens` ;
436+ document . getElementById ( 'statSpeed' ) . textContent = `${ ( tokenCount / elapsed ) . toFixed ( 1 ) } tok/s` ;
437+ }
350438 } ;
439+
351440 Module . onDone = ( nTokens , elapsedMs ) => {
441+ // Final render with markdown formatting
352442 assistantDiv . innerHTML = formatText ( output ) ;
353- const tps = ( nTokens / ( elapsedMs / 1000 ) ) . toFixed ( 1 ) ;
443+ const tps = nTokens > 0 ? ( nTokens / ( elapsedMs / 1000 ) ) . toFixed ( 1 ) : '0' ;
354444 document . getElementById ( 'statTokens' ) . textContent = `${ nTokens } tokens` ;
355445 document . getElementById ( 'statSpeed' ) . textContent = `${ tps } tok/s` ;
356446 generating = false ;
357447 document . getElementById ( 'sendBtn' ) . disabled = false ;
358- document . getElementById ( 'prompt' ) . focus ( ) ;
359- } ;
360- Module . onStatus = ( msg ) => {
361- addMessage ( 'system' , msg ) ;
448+ input . disabled = false ;
449+ input . focus ( ) ;
362450 } ;
363451
364- // Wrap with ChatML template (instruct models need this to generate)
365- const chatPrompt = `<|im_start|>user\n${ text } <|im_end|>\n<|im_start|>assistant\n` ;
452+ const chatPrompt = getChatPrompt ( text ) ;
366453
367- // Run generation asynchronously so the UI doesn't freeze
368- setTimeout ( ( ) => {
369- const promptPtr = Module . allocateUTF8 ( chatPrompt ) ;
454+ // Use ASYNCIFY: _wasm_generate_async yields to browser between tokens
455+ const promptPtr = Module . allocateUTF8 ( chatPrompt ) ;
456+ try {
457+ await Module . _wasm_generate_async ( promptPtr , 0.7 , 256 ) ;
458+ } catch ( e ) {
459+ // Fallback for non-ASYNCIFY builds
370460 Module . _wasm_generate ( promptPtr , 0.7 , 256 ) ;
371- Module . _free ( promptPtr ) ;
461+ }
462+ Module . _free ( promptPtr ) ;
372463
373- if ( ! output ) {
374- assistantDiv . innerHTML = '<em style="color:#666">No output generated. Try a longer prompt.</em>' ;
375- }
376- generating = false ;
377- document . getElementById ( 'sendBtn' ) . disabled = false ;
378- } , 50 ) ; // yield to browser for one frame to show the spinner
464+ if ( ! output ) {
465+ assistantDiv . innerHTML = '<em style="color:#666">No output generated. Try a longer prompt.</em>' ;
466+ }
467+ generating = false ;
468+ document . getElementById ( 'sendBtn' ) . disabled = false ;
469+ input . disabled = false ;
379470}
380471</ script >
381472
@@ -389,7 +480,7 @@ <h2>LLM in Your Browser — 189 KB</h2>
389480 printErr : function ( text ) { console . warn ( text ) ; } ,
390481 onRuntimeInitialized : function ( ) {
391482 console . log ( 'quant.cpp WASM ready' ) ;
392- addMessage ( 'system' , 'Runtime ready. Drop a GGUF model file to begin .' ) ;
483+ addMessage ( 'system' , 'Runtime ready. Choose a model or drop your own GGUF file .' ) ;
393484 }
394485} ;
395486</ script >
0 commit comments