|
| 1 | +/** |
| 2 | + * @quantcpp/wasm — ESM entry point |
| 3 | + * |
| 4 | + * Single-header C LLM inference engine in your browser. |
| 5 | + * |
| 6 | + * Usage: |
| 7 | + * |
| 8 | + * import { Quant } from '@quantcpp/wasm'; |
| 9 | + * |
| 10 | + * const q = await Quant.create({ |
| 11 | + * modelUrl: '/models/SmolLM2-135M-Instruct-Q8_0.gguf', |
| 12 | + * kvType: 'uniform_4b', |
| 13 | + * vQuant: 'q4', |
| 14 | + * onStatus: (msg) => console.log('[status]', msg), |
| 15 | + * }); |
| 16 | + * |
| 17 | + * await q.generate('Hello, my name is', { |
| 18 | + * maxTokens: 64, |
| 19 | + * temperature: 0.7, |
| 20 | + * onToken: (text) => process.stdout.write(text), |
| 21 | + * }); |
| 22 | + * |
| 23 | + * q.free(); |
| 24 | + */ |
| 25 | + |
| 26 | +let _modulePromise = null; |
| 27 | + |
| 28 | +function loadEmscriptenModule(scriptUrl) { |
| 29 | + if (_modulePromise) return _modulePromise; |
| 30 | + _modulePromise = new Promise((resolve, reject) => { |
| 31 | + if (typeof window === 'undefined') { |
| 32 | + reject(new Error('Node.js loader not implemented yet — use the browser build for now')); |
| 33 | + return; |
| 34 | + } |
| 35 | + const script = document.createElement('script'); |
| 36 | + script.src = scriptUrl; |
| 37 | + script.onload = () => { |
| 38 | + // Emscripten modularize=0 attaches Module to globalThis |
| 39 | + if (typeof Module === 'undefined') { |
| 40 | + reject(new Error('quant.js loaded but Module is undefined')); |
| 41 | + return; |
| 42 | + } |
| 43 | + Module.onRuntimeInitialized = () => resolve(Module); |
| 44 | + }; |
| 45 | + script.onerror = () => reject(new Error(`Failed to load ${scriptUrl}`)); |
| 46 | + document.head.appendChild(script); |
| 47 | + }); |
| 48 | + return _modulePromise; |
| 49 | +} |
| 50 | + |
| 51 | +export class Quant { |
| 52 | + constructor(module) { |
| 53 | + this._m = module; |
| 54 | + this._loaded = false; |
| 55 | + } |
| 56 | + |
| 57 | + /** |
| 58 | + * Create a Quant instance, optionally loading a model. |
| 59 | + * @param {object} opts |
| 60 | + * @param {string} [opts.scriptUrl='./quant.js'] |
| 61 | + * @param {string} [opts.modelUrl] - URL to a .gguf model file |
| 62 | + * @param {string} [opts.kvType='uniform_4b'] - one of fp32, uniform_4b, turbo_kv_3b, ... |
| 63 | + * @param {string} [opts.vQuant='fp16'] - one of fp16, q4, q2 |
| 64 | + * @param {function} [opts.onStatus] - status callback |
| 65 | + */ |
| 66 | + static async create(opts = {}) { |
| 67 | + const scriptUrl = opts.scriptUrl || './quant.js'; |
| 68 | + const module = await loadEmscriptenModule(scriptUrl); |
| 69 | + |
| 70 | + if (opts.onStatus) module.onStatus = opts.onStatus; |
| 71 | + |
| 72 | + const q = new Quant(module); |
| 73 | + |
| 74 | + if (opts.modelUrl) { |
| 75 | + await q.loadModel(opts.modelUrl); |
| 76 | + } |
| 77 | + |
| 78 | + return q; |
| 79 | + } |
| 80 | + |
| 81 | + /** |
| 82 | + * Load a GGUF model from a URL into the WASM filesystem. |
| 83 | + */ |
| 84 | + async loadModel(url) { |
| 85 | + const resp = await fetch(url); |
| 86 | + if (!resp.ok) throw new Error(`Failed to fetch model: ${resp.status} ${resp.statusText}`); |
| 87 | + const buf = new Uint8Array(await resp.arrayBuffer()); |
| 88 | + const path = '/model.gguf'; |
| 89 | + this._m.FS.writeFile(path, buf); |
| 90 | + const ret = this._m.ccall('wasm_load_model', 'number', ['string'], [path]); |
| 91 | + if (ret !== 0) throw new Error(`wasm_load_model failed (rc=${ret})`); |
| 92 | + this._loaded = true; |
| 93 | + } |
| 94 | + |
| 95 | + /** |
| 96 | + * Generate text from a prompt. |
| 97 | + * @param {string} prompt |
| 98 | + * @param {object} opts |
| 99 | + * @param {number} [opts.maxTokens=128] |
| 100 | + * @param {number} [opts.temperature=0.7] |
| 101 | + * @param {function} [opts.onToken] - called per token with (text) string |
| 102 | + * @param {function} [opts.onDone] - called with (nTokens, elapsedMs) |
| 103 | + */ |
| 104 | + generate(prompt, opts = {}) { |
| 105 | + if (!this._loaded) throw new Error('No model loaded — call loadModel() first or pass modelUrl to create()'); |
| 106 | + if (opts.onToken) this._m.onToken = opts.onToken; |
| 107 | + if (opts.onDone) this._m.onDone = opts.onDone; |
| 108 | + const maxTokens = opts.maxTokens ?? 128; |
| 109 | + const temperature = opts.temperature ?? 0.7; |
| 110 | + return new Promise((resolve) => { |
| 111 | + this._m.onDone = (nTokens, elapsedMs) => { |
| 112 | + if (opts.onDone) opts.onDone(nTokens, elapsedMs); |
| 113 | + resolve({ nTokens, elapsedMs }); |
| 114 | + }; |
| 115 | + this._m.ccall('wasm_generate', 'number', ['string', 'number', 'number'], [prompt, maxTokens, temperature]); |
| 116 | + }); |
| 117 | + } |
| 118 | + |
| 119 | + /** |
| 120 | + * Free model resources. Call when done. |
| 121 | + */ |
| 122 | + free() { |
| 123 | + if (this._loaded) { |
| 124 | + this._m.ccall('wasm_free_model', null, [], []); |
| 125 | + this._loaded = false; |
| 126 | + } |
| 127 | + } |
| 128 | + |
| 129 | + isReady() { |
| 130 | + return this._loaded; |
| 131 | + } |
| 132 | +} |
| 133 | + |
| 134 | +export default Quant; |
0 commit comments