@@ -356,20 +356,37 @@ <h2>LLM in Your Browser</h2>
356356}
357357
358358function loadModelFromBytes ( bytes , name ) {
359- showLoading ( 'Loading model into WASM...' ) ;
360- // Transfer ArrayBuffer to worker (zero-copy)
361- const buffer = bytes . buffer . slice ( bytes . byteOffset , bytes . byteOffset + bytes . byteLength ) ;
362- worker . postMessage ( { type : 'load' , bytes : buffer , name : name } , [ buffer ] ) ;
359+ try {
360+ Module . FS . writeFile ( '/model.gguf' , bytes ) ;
361+ showLoading ( 'Initializing model...' ) ;
362+ const rc = Module . _wasm_load_model ( Module . allocateUTF8 ( '/model.gguf' ) ) ;
363+ if ( rc === 0 ) {
364+ modelLoaded = true ;
365+ const dropzone = document . getElementById ( 'dropzone' ) ;
366+ dropzone . classList . add ( 'loaded' ) ;
367+ dropzone . innerHTML = `<h2>✓ ${ name } (${ ( bytes . length / 1048576 ) . toFixed ( 0 ) } MB)</h2>
368+ <p style="color:#6ee7b7">KV compression active — 3x longer context</p>` ;
369+ document . getElementById ( 'kvBadge' ) . style . display = '' ;
370+ document . getElementById ( 'prompt' ) . disabled = false ;
371+ document . getElementById ( 'sendBtn' ) . disabled = false ;
372+ document . getElementById ( 'prompt' ) . focus ( ) ;
373+ addMessage ( 'system' , `Model loaded! ${ name } (${ ( bytes . length / 1048576 ) . toFixed ( 0 ) } MB). Ask anything.` ) ;
374+ } else {
375+ addMessage ( 'system' , 'Failed to load model.' ) ;
376+ }
377+ } catch ( e ) {
378+ addMessage ( 'system' , `Error: ${ e . message } ` ) ;
379+ }
380+ hideLoading ( ) ;
363381}
364382
365383async function loadModel ( file ) {
366384 showLoading ( `Loading ${ file . name } (${ ( file . size / 1024 / 1024 ) . toFixed ( 0 ) } MB)...` ) ;
367385 addMessage ( 'system' , `Loading ${ file . name } ...` ) ;
368- activeModelId = null ; // custom model — use generic template
386+ activeModelId = null ;
369387 try {
370388 const buffer = await file . arrayBuffer ( ) ;
371- const bytes = new Uint8Array ( buffer ) ;
372- loadModelFromBytes ( bytes , file . name ) ;
389+ loadModelFromBytes ( new Uint8Array ( buffer ) , file . name ) ;
373390 } catch ( e ) {
374391 addMessage ( 'system' , `Error: ${ e . message } ` ) ;
375392 }
@@ -380,85 +397,11 @@ <h2>LLM in Your Browser</h2>
380397 if ( activeModelId && MODELS [ activeModelId ] ) {
381398 return MODELS [ activeModelId ] . chatTemplate ( text ) ;
382399 }
383- // Generic ChatML fallback for custom GGUF
384400 return `<|im_start|>user\n${ text } <|im_end|>\n<|im_start|>assistant\n` ;
385401}
386402
387- // ---- Web Worker inference engine (no ASYNCIFY overhead) ----
388- let worker = null ;
389- let pendingAssistantDiv = null ;
390- let pendingOutput = '' ;
391- let pendingTokenCount = 0 ;
392- let pendingStartTime = 0 ;
393-
394- function initWorker ( ) {
395- worker = new Worker ( 'inference-worker.js' ) ;
396- worker . onmessage = function ( e ) {
397- const msg = e . data ;
398-
399- if ( msg . type === 'ready' ) {
400- addMessage ( 'system' , 'Runtime ready. Choose a model or drop your own GGUF file.' ) ;
401- }
402- else if ( msg . type === 'status' ) {
403- if ( msg . msg === 'thinking' && pendingAssistantDiv ) {
404- pendingAssistantDiv . innerHTML = '<span class="thinking"><span class="spinner" style="display:inline-block;width:12px;height:12px;vertical-align:middle;margin-right:6px"></span>Thinking...</span>' ;
405- document . getElementById ( 'statTokens' ) . textContent = 'Processing prompt...' ;
406- document . getElementById ( 'statSpeed' ) . textContent = '' ;
407- } else {
408- addMessage ( 'system' , msg . msg ) ;
409- }
410- }
411- else if ( msg . type === 'loaded' ) {
412- modelLoaded = true ;
413- const dropzone = document . getElementById ( 'dropzone' ) ;
414- dropzone . classList . add ( 'loaded' ) ;
415- dropzone . innerHTML = `<h2>✓ ${ msg . name } (${ ( msg . size / 1048576 ) . toFixed ( 0 ) } MB)</h2>
416- <p style="color:#6ee7b7">KV compression active — 3x longer context</p>` ;
417- document . getElementById ( 'kvBadge' ) . style . display = '' ;
418- document . getElementById ( 'prompt' ) . disabled = false ;
419- document . getElementById ( 'sendBtn' ) . disabled = false ;
420- document . getElementById ( 'prompt' ) . focus ( ) ;
421- hideLoading ( ) ;
422- }
423- else if ( msg . type === 'token' && pendingAssistantDiv ) {
424- pendingOutput += msg . text ;
425- pendingTokenCount ++ ;
426- pendingAssistantDiv . textContent = pendingOutput ;
427- const cursor = document . createElement ( 'span' ) ;
428- cursor . className = 'cursor' ;
429- cursor . textContent = '▌' ;
430- pendingAssistantDiv . appendChild ( cursor ) ;
431- const chat = document . getElementById ( 'chat' ) ;
432- chat . scrollTop = chat . scrollHeight ;
433- const elapsed = ( performance . now ( ) - pendingStartTime ) / 1000 ;
434- if ( elapsed > 0.1 ) {
435- document . getElementById ( 'statTokens' ) . textContent = `${ pendingTokenCount } tokens` ;
436- document . getElementById ( 'statSpeed' ) . textContent = `${ ( pendingTokenCount / elapsed ) . toFixed ( 1 ) } tok/s` ;
437- }
438- }
439- else if ( msg . type === 'done' ) {
440- if ( pendingAssistantDiv ) {
441- if ( pendingOutput ) {
442- pendingAssistantDiv . innerHTML = formatText ( pendingOutput ) ;
443- } else {
444- pendingAssistantDiv . innerHTML = '<em style="color:#666">No output generated. Try a longer prompt.</em>' ;
445- }
446- const elapsed = ( performance . now ( ) - pendingStartTime ) / 1000 ;
447- const tps = pendingTokenCount > 0 ? ( pendingTokenCount / elapsed ) . toFixed ( 1 ) : '0' ;
448- document . getElementById ( 'statTokens' ) . textContent = `${ pendingTokenCount } tokens` ;
449- document . getElementById ( 'statSpeed' ) . textContent = `${ tps } tok/s` ;
450- }
451- generating = false ;
452- document . getElementById ( 'sendBtn' ) . disabled = false ;
453- document . getElementById ( 'prompt' ) . disabled = false ;
454- document . getElementById ( 'prompt' ) . focus ( ) ;
455- pendingAssistantDiv = null ;
456- }
457- } ;
458- }
459-
460- function generate ( ) {
461- if ( ! modelLoaded || generating || ! worker ) return ;
403+ async function generate ( ) {
404+ if ( ! modelLoaded || generating ) return ;
462405 const input = document . getElementById ( 'prompt' ) ;
463406 const text = input . value . trim ( ) ;
464407 if ( ! text ) return ;
@@ -469,19 +412,70 @@ <h2>LLM in Your Browser</h2>
469412 input . disabled = true ;
470413
471414 addMessage ( 'user' , text ) ;
472- pendingAssistantDiv = addMessage ( 'assistant' , '' ) ;
473- pendingAssistantDiv . innerHTML = '<span class="thinking"><span class="spinner" style="display:inline-block;width:12px;height:12px;vertical-align:middle;margin-right:6px"></span>Thinking...</span>' ;
474- pendingOutput = '' ;
475- pendingTokenCount = 0 ;
476- pendingStartTime = performance . now ( ) ;
415+ const assistantDiv = addMessage ( 'assistant' , '' ) ;
416+ assistantDiv . innerHTML = '<span class="thinking"><span class="spinner" style="display:inline-block;width:12px;height:12px;vertical-align:middle;margin-right:6px"></span>Thinking...</span>' ;
417+ let output = '' ;
418+ let tokenCount = 0 ;
419+ const startTime = performance . now ( ) ;
420+ document . getElementById ( 'statTokens' ) . textContent = 'Processing prompt...' ;
421+ document . getElementById ( 'statSpeed' ) . textContent = '' ;
422+
423+ Module . onToken = ( token ) => {
424+ output += token ;
425+ tokenCount ++ ;
426+ assistantDiv . textContent = output ;
427+ const cursor = document . createElement ( 'span' ) ;
428+ cursor . className = 'cursor' ;
429+ cursor . textContent = '▌' ;
430+ assistantDiv . appendChild ( cursor ) ;
431+ document . getElementById ( 'chat' ) . scrollTop = document . getElementById ( 'chat' ) . scrollHeight ;
432+ const elapsed = ( performance . now ( ) - startTime ) / 1000 ;
433+ if ( elapsed > 0.1 ) {
434+ document . getElementById ( 'statTokens' ) . textContent = `${ tokenCount } tokens` ;
435+ document . getElementById ( 'statSpeed' ) . textContent = `${ ( tokenCount / elapsed ) . toFixed ( 1 ) } tok/s` ;
436+ }
437+ } ;
438+
439+ Module . onDone = ( nTokens , elapsedMs ) => {
440+ assistantDiv . innerHTML = formatText ( output ) ;
441+ const tps = nTokens > 0 ? ( nTokens / ( elapsedMs / 1000 ) ) . toFixed ( 1 ) : '0' ;
442+ document . getElementById ( 'statTokens' ) . textContent = `${ nTokens } tokens` ;
443+ document . getElementById ( 'statSpeed' ) . textContent = `${ tps } tok/s` ;
444+ generating = false ;
445+ document . getElementById ( 'sendBtn' ) . disabled = false ;
446+ input . disabled = false ;
447+ input . focus ( ) ;
448+ } ;
477449
478450 const chatPrompt = getChatPrompt ( text ) ;
479- worker . postMessage ( { type : 'generate' , prompt : chatPrompt , temperature : 0.7 , maxTokens : 256 } ) ;
451+ const promptPtr = Module . allocateUTF8 ( chatPrompt ) ;
452+ try {
453+ await Module . _wasm_generate_async ( promptPtr , 0.7 , 256 ) ;
454+ } catch ( e ) {
455+ Module . _wasm_generate ( promptPtr , 0.7 , 256 ) ;
456+ }
457+ Module . _free ( promptPtr ) ;
458+
459+ if ( ! output ) {
460+ assistantDiv . innerHTML = '<em style="color:#666">No output generated. Try a longer prompt.</em>' ;
461+ }
462+ generating = false ;
463+ document . getElementById ( 'sendBtn' ) . disabled = false ;
464+ input . disabled = false ;
480465}
466+ </ script >
481467
482- // Initialize worker on page load
483- initWorker ( ) ;
468+ < script >
469+ var Module = {
470+ onToken : null , onDone : null , onStatus : null ,
471+ print : function ( text ) { console . log ( text ) ; } ,
472+ printErr : function ( text ) { console . warn ( text ) ; } ,
473+ onRuntimeInitialized : function ( ) {
474+ addMessage ( 'system' , 'Runtime ready. Choose a model or drop your own GGUF file.' ) ;
475+ }
476+ } ;
484477</ script >
478+ < script src ="quant.js "> </ script >
485479
486480</ body >
487481</ html >
0 commit comments