diff --git a/BLOG.md b/BLOG.md index 9232a20..38c4f20 100644 --- a/BLOG.md +++ b/BLOG.md @@ -8,7 +8,7 @@ By leveraging the raw power of the **Snapdragon X Elite** and its high-performan ## šŸš€ The Vision: AI at the Edge -Most AI-integrated IDEs rely on heavy cloud APIs. This introduces latency, subscription costs, and—most importantly—privacy concerns. **Nova IDE** flips the script. It uses **MediaPipe GenAI** and **WebGPU** to run Large Language Models (LLMs) locally. +Most AI-integrated IDEs rely on heavy cloud APIs. This introduces latency, subscription costs, and—most importantly—privacy concerns. **Nova IDE** flips the script. It uses **LiteRT LM** and **WebGPU** to run Large Language Models (LLMs) locally. When you run Nova IDE on a machine powered by the **Snapdragon X Elite**, you're not just running a web app; you're utilizing one of the most efficient NPU/GPU architectures ever designed for portable computing. @@ -29,7 +29,7 @@ graph TD end subgraph AI_Runtime ["AI Execution Layer"] - MP["MediaPipe GenAI (WASM)"] + MP["LiteRT LM (WASM)"] WG["WebGPU API"] end @@ -47,8 +47,8 @@ graph TD MP -->|Streaming Tokens| UI ``` -### 1. The Inference Engine (MediaPipe + WebGPU) -At the heart of Nova IDE is the MediaPipe GenAI runtime. Unlike traditional JavaScript which runs on the CPU, Nova IDE uses **WebGPU** to talk directly to the **Qualcomm Adreno GPU**. +### 1. The Inference Engine (LiteRT LM + WebGPU) +At the heart of Nova IDE is the LiteRT LM runtime. Unlike traditional JavaScript which runs on the CPU, Nova IDE uses **WebGPU** to talk directly to the **Qualcomm Adreno GPU**. - **WebGPU** allows for massively parallel tensor operations required by transformers. - On the Snapdragon X Elite, the Adreno GPU provides the floating-point performance needed to generate tokens at lightning speed, rivaling cloud-based solutions. @@ -74,9 +74,9 @@ The Snapdragon X Elite is a breakthrough for web-based AI. While Intel and AMD h When you click **"⚔ Load Local AI Model"** in Nova IDE, the following sequence occurs: -1. **WASM Initialization**: The IDE loads the MediaPipe GenAI WebAssembly runtime. +1. **WASM Initialization**: The IDE loads the LiteRT LM WebAssembly runtime. 2. **GPU Adapter Request**: The browser requests a WebGPU adapter. On an X Elite machine, this identifies the **Qualcomm Adreno GPU**. -3. **Model Loading**: The model (in `.task` format) is fetched into a `SharedArrayBuffer`. +3. **Model Loading**: The model (in `.litertlm` format) is fetched into a `SharedArrayBuffer`. 4. **GPU Compilation**: The model's computation graph is compiled into GPU-specific kernels. 5. **Streaming Inference**: When you type a prompt, the tokens are generated on the Adreno GPU and streamed back to the UI in real-time. diff --git a/README.md b/README.md index 45b7984..d99872e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # ⚔ Nova IDE -Nova IDE is a lightweight, browser-based coding environment designed for the future of **on-device AI**. It leverages WebGPU and MediaPipe to run high-performance Large Language Models (LLMs) entirely within your browser—no cloud, no APIs, and total privacy. +Nova IDE is a lightweight, browser-based coding environment designed for the future of **on-device AI**. It leverages WebGPU and LiteRT LM to run high-performance Large Language Models (LLMs) entirely within your browser—no cloud, no APIs, and total privacy. ![Nova IDE Screenshot](src/assets/hero.png) @@ -60,7 +60,7 @@ Nova IDE is a lightweight, browser-based coding environment designed for the fut - **Core**: JavaScript (ESM), HTML5, CSS3 - **Editor**: [CodeMirror 6](https://codemirror.net/) -- **AI Engine**: [MediaPipe GenAI](https://ai.google.dev/edge/mediapipe/solutions/genai/llm_inference) +- **AI Engine**: [LiteRT LM](https://ai.google.dev/edge/litert-lm/js) - **Bundler**: [Vite](https://vitejs.dev/) - **Storage**: IndexedDB (Virtual File System) diff --git a/package-lock.json b/package-lock.json index d89dfb7..a367c52 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,11 +1,11 @@ { - "name": "local-ide-litertjs", + "name": "nova-ide", "version": "0.0.0", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "local-ide-litertjs", + "name": "nova-ide", "version": "0.0.0", "dependencies": { "@codemirror/autocomplete": "^6.20.2", @@ -15,7 +15,7 @@ "@codemirror/lang-python": "^6.2.1", "@codemirror/lint": "^6.9.6", "@codemirror/theme-one-dark": "^6.1.3", - "@mediapipe/tasks-genai": "^0.10.27", + "@litert-lm/core": "^0.12.1", "codemirror": "^6.0.2" }, "devDependencies": { @@ -174,29 +174,6 @@ "w3c-keyname": "^2.2.4" } }, - "node_modules/@emnapi/core": { - "version": "1.10.0", - "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.10.0.tgz", - "integrity": "sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw==", - "dev": true, - "license": "MIT", - "optional": true, - "dependencies": { - "@emnapi/wasi-threads": "1.2.1", - "tslib": "^2.4.0" - } - }, - "node_modules/@emnapi/runtime": { - "version": "1.10.0", - "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.10.0.tgz", - "integrity": "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA==", - "dev": true, - "license": "MIT", - "optional": true, - "dependencies": { - "tslib": "^2.4.0" - } - }, "node_modules/@emnapi/wasi-threads": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/@emnapi/wasi-threads/-/wasi-threads-1.2.1.tgz", @@ -276,18 +253,27 @@ "@lezer/lr": "^1.0.0" } }, + "node_modules/@litert-lm/core": { + "version": "0.12.1", + "resolved": "https://registry.npmjs.org/@litert-lm/core/-/core-0.12.1.tgz", + "integrity": "sha512-lLF+Hvg52TIN0ekHQ+RLpjyDt3pPuEqMFvE/6xYCvcnsJ3R03dz3GmVJ3mGgumPZ7gxc+iqLUF3Be5v5L1K1Xw==", + "license": "Apache-2.0", + "dependencies": { + "@litertjs/wasm-utils": "^2.0.0" + } + }, + "node_modules/@litertjs/wasm-utils": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/@litertjs/wasm-utils/-/wasm-utils-2.5.0.tgz", + "integrity": "sha512-zhMAqJRJ3ROi48flZxYx+K2MiMllJVuH7oeumpSIfQMBeOb6JyLV/7ltLbY6E+nERUAfNwzIBqjslWAeXcO6iQ==", + "license": "Apache-2.0" + }, "node_modules/@marijn/find-cluster-break": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/@marijn/find-cluster-break/-/find-cluster-break-1.0.2.tgz", "integrity": "sha512-l0h88YhZFyKdXIFNfSWpyjStDjGHwZ/U7iobcK1cQQD8sejsONdQtTVU+1wVN1PBw40PiiHB1vA5S7VTfQiP9g==", "license": "MIT" }, - "node_modules/@mediapipe/tasks-genai": { - "version": "0.10.27", - "resolved": "https://registry.npmjs.org/@mediapipe/tasks-genai/-/tasks-genai-0.10.27.tgz", - "integrity": "sha512-cv69CPPAtEDBUs6dGZft2S+sBqde1XvEMST367siSyxrhffdWtm4uQIsfdedAbhJ33BwAjuMnAdxDrO9WrzIAQ==", - "license": "Apache-2.0" - }, "node_modules/@napi-rs/wasm-runtime": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-1.1.4.tgz", @@ -949,6 +935,7 @@ "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "dev": true, "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, diff --git a/package.json b/package.json index dc9c9dd..61e02b8 100644 --- a/package.json +++ b/package.json @@ -20,7 +20,7 @@ "@codemirror/lang-python": "^6.2.1", "@codemirror/lint": "^6.9.6", "@codemirror/theme-one-dark": "^6.1.3", - "@mediapipe/tasks-genai": "^0.10.27", + "@litert-lm/core": "^0.12.1", "codemirror": "^6.0.2" } } diff --git a/packages/web-agent-core/README.md b/packages/web-agent-core/README.md index 0d35c6c..cda5a2b 100644 --- a/packages/web-agent-core/README.md +++ b/packages/web-agent-core/README.md @@ -2,7 +2,7 @@ A purely in-browser, WebGPU-accelerated, zero-dependency autonomous ReAct (Reason+Act) Agent Framework. -Built for local-first, privacy-respecting AI applications using MediaPipe GenAI and lightweight models like Gemma. +Built for local-first, privacy-respecting AI applications using LiteRT LM and lightweight models like Gemma. ## Features - **Zero Backend**: Runs entirely in the browser using WebGPU. @@ -41,7 +41,7 @@ const myTools = [ // Your LLM object just needs a \`generateRaw(history, onToken)\` method const aiEngine = { generateRaw: async (prompt, onToken) => { - // Call MediaPipe or Transformers.js here + // Call LiteRT LM, Transformers.js, or another local model runtime here // stream tokens to onToken(token) } }; diff --git a/packages/web-agent-core/package.json b/packages/web-agent-core/package.json index 1137a15..00d82f7 100644 --- a/packages/web-agent-core/package.json +++ b/packages/web-agent-core/package.json @@ -15,7 +15,7 @@ "agent", "llm", "webgpu", - "mediapipe", + "litert-lm", "react", "autonomous", "browser", @@ -26,4 +26,4 @@ "publishConfig": { "registry": "https://npm.pkg.github.com" } -} \ No newline at end of file +} diff --git a/packages/web-agent-core/website/docs/intro.md b/packages/web-agent-core/website/docs/intro.md index 9589fb6..dd17a23 100644 --- a/packages/web-agent-core/website/docs/intro.md +++ b/packages/web-agent-core/website/docs/intro.md @@ -6,7 +6,7 @@ sidebar_position: 1 Welcome to **Nova Web Agent Core**! -This package provides a purely in-browser, WebGPU-accelerated, zero-dependency autonomous ReAct (Reason+Act) Agent Framework. It is built for local-first, privacy-respecting AI applications using MediaPipe GenAI and lightweight models like Gemma. +This package provides a purely in-browser, WebGPU-accelerated, zero-dependency autonomous ReAct (Reason+Act) Agent Framework. It is built for local-first, privacy-respecting AI applications using LiteRT LM and lightweight models like Gemma. ## Why this framework? @@ -80,7 +80,7 @@ Your LLM wrapper only needs one method: `generateRaw(prompt, onTokenCallback)`. ```javascript const aiEngine = { generateRaw: async (prompt, onToken) => { - // Call MediaPipe, Transformers.js, or even a cloud API here + // Call LiteRT LM, Transformers.js, or even a cloud API here // stream tokens to onToken(token) } }; diff --git a/packages/web-agent-core/website/src/components/HomepageFeatures/index.js b/packages/web-agent-core/website/src/components/HomepageFeatures/index.js index 13e00da..4457077 100644 --- a/packages/web-agent-core/website/src/components/HomepageFeatures/index.js +++ b/packages/web-agent-core/website/src/components/HomepageFeatures/index.js @@ -17,7 +17,7 @@ const FeatureList = [ icon: '🧠', description: ( <> - Powered by the cutting-edge WebGPU backend and MediaPipe, execute your agent's reasoning loops completely client-side without API keys. + Powered by the cutting-edge WebGPU backend and LiteRT LM, execute your agent's reasoning loops completely client-side without API keys. ), }, diff --git a/src/ai-engine.js b/src/ai-engine.js index 01df7ad..d3eaca0 100644 --- a/src/ai-engine.js +++ b/src/ai-engine.js @@ -1,6 +1,6 @@ /** - * ai-engine.js — Local AI Inference wrapper - * Uses @mediapipe/tasks-genai for on-device WebGPU inference. + * ai-engine.js - Local AI inference wrapper. + * Uses LiteRT LM for on-device WebGPU inference. */ const MODEL_STATUS = { @@ -12,13 +12,26 @@ const MODEL_STATUS = { GENERATING: 'generating', }; +const SYSTEM_PROMPT = 'You are an expert coding assistant inside an IDE. Provide concise, correct code and explanations. Use markdown code blocks for code snippets.'; +const MIN_MODEL_BYTES = 1024 * 1024; +const MODEL_DOWNLOAD_MEMORY_LIMIT_BYTES = 1024 * 1024 * 1024; +const LITERT_LM_CONFIG = { + maxTokens: 8192, + samplerParams: { + k: 40, + temperature: 0.7, + seed: 42, + }, +}; + class AIEngine { constructor() { - this.llmInference = null; + this.engine = null; + this.conversation = null; this.status = MODEL_STATUS.IDLE; this.statusMessage = 'No model loaded'; this.listeners = new Set(); - this.modelName = 'Local AI Engine'; + this.modelName = 'LiteRT LM'; this._opfsRoot = null; } @@ -29,19 +42,23 @@ class AIEngine { return this._opfsRoot; } - /** Check if a model exists in OPFS cache */ + /** Check if a model exists in OPFS cache. */ async getCachedModel(filename) { try { const root = await this._getOpfs(); const fileHandle = await root.getFileHandle(filename); const file = await fileHandle.getFile(); + if (file.size < MIN_MODEL_BYTES) { + await root.removeEntry(filename); + return null; + } return file; } catch (e) { return null; } } - /** Save a buffer to OPFS cache */ + /** Save a buffer to OPFS cache. */ async saveToCache(filename, buffer) { try { const root = await this._getOpfs(); @@ -55,7 +72,54 @@ class AIEngine { } } - /** Delete a model from OPFS cache */ + /** Stream a response body directly into OPFS without buffering the model in RAM. */ + async saveResponseToCache(filename, response, onProgress) { + const root = await this._getOpfs(); + const fileHandle = await root.getFileHandle(filename, { create: true }); + const writable = await fileHandle.createWritable(); + const reader = response.body.getReader(); + const total = parseInt(response.headers.get('content-length') || '0'); + let received = 0; + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) break; + + await writable.write(value); + received += value.length; + + if (onProgress) onProgress(received, total); + this._setStatus( + MODEL_STATUS.DOWNLOADING, + total + ? `Downloading... ${(received / 1048576).toFixed(0)} / ${(total / 1048576).toFixed(0)} MB` + : `Downloading... ${(received / 1048576).toFixed(0)} MB` + ); + } + } catch (err) { + await writable.abort(); + await this.deleteFromCache(filename); + throw err; + } + + await writable.close(); + const file = await fileHandle.getFile(); + const contentEncoding = (response.headers.get('content-encoding') || 'identity').toLowerCase(); + const canValidateLength = total > 0 && (contentEncoding === 'identity' || contentEncoding === ''); + + if (file.size < MIN_MODEL_BYTES || (canValidateLength && file.size !== total)) { + await this.deleteFromCache(filename); + throw new Error( + canValidateLength + ? `Downloaded model is incomplete (${file.size} of ${total} bytes). Please retry.` + : 'Downloaded model is incomplete. Please retry.' + ); + } + return file; + } + + /** Delete a model from OPFS cache. */ async deleteFromCache(filename) { try { const root = await this._getOpfs(); @@ -63,7 +127,7 @@ class AIEngine { } catch (e) {} } - /** Subscribe to status changes */ + /** Subscribe to status changes. */ onStatusChange(fn) { this.listeners.add(fn); return () => this.listeners.delete(fn); @@ -84,59 +148,70 @@ class AIEngine { this._emit(); } + async _deleteConversation() { + if (this.conversation) { + await this.conversation.delete(); + this.conversation = null; + } + } + + async _deleteEngine() { + await this._deleteConversation(); + if (this.engine) { + await this.engine.delete(); + this.engine = null; + } + } + + async _createConversation() { + if (!this.engine) { + throw new Error('Model not loaded'); + } + + return this.engine.createConversation({ + sessionConfig: { + samplerParams: LITERT_LM_CONFIG.samplerParams, + maxOutputTokens: LITERT_LM_CONFIG.maxTokens, + }, + preface: { + messages: [ + { role: 'system', content: SYSTEM_PROMPT }, + ], + }, + }); + } + /** - * Load model from a URL or user-uploaded file. + * Load model from a URL, OPFS File, or user-uploaded file. * @param {string|File} modelSource - URL string or File object */ async loadModel(modelSource) { try { - this._setStatus(MODEL_STATUS.LOADING, 'Importing AI Engine…'); - - // Dynamic import so initial page load is fast - const genai = await import('@mediapipe/tasks-genai'); - const { FilesetResolver, LlmInference } = genai; - - this._setStatus(MODEL_STATUS.LOADING, 'Loading AI runtime…'); + this._setStatus(MODEL_STATUS.LOADING, 'Importing LiteRT LM...'); - // Initialize the WASM fileset resolver (required by MediaPipe) - const genaiFileset = await FilesetResolver.forGenAiTasks( - 'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai@latest/wasm' - ); - - this._setStatus(MODEL_STATUS.LOADING, 'Initializing LLM engine…'); + const { Engine } = await import('@litert-lm/core'); - let modelAssetPath = null; - let modelAssetBuffer = null; + await this._deleteEngine(); - if (typeof modelSource === 'string') { - modelAssetPath = modelSource; - this._setStatus(MODEL_STATUS.DOWNLOADING, 'Downloading model…'); - } else if (modelSource instanceof File) { - this._setStatus(MODEL_STATUS.LOADING, 'Reading model file…'); - const buf = await modelSource.arrayBuffer(); - modelAssetBuffer = new Uint8Array(buf); + let model = modelSource; + if (modelSource instanceof File) { + this._setStatus(MODEL_STATUS.LOADING, 'Reading LiteRT LM model file...'); + model = modelSource.stream(); + } else if (typeof modelSource === 'string') { + this._setStatus(MODEL_STATUS.DOWNLOADING, 'Preparing model download...'); } - this._setStatus(MODEL_STATUS.LOADING, 'Compiling model for WebGPU…'); + this._setStatus(MODEL_STATUS.LOADING, 'Initializing LiteRT LM engine...'); - const options = { - baseOptions: {}, - maxTokens: 8192, - topK: 40, - temperature: 0.7, - randomSeed: 42, - }; + this.engine = await Engine.create({ + model, + mainExecutorSettings: { + maxNumTokens: LITERT_LM_CONFIG.maxTokens, + }, + }); - if (modelAssetPath) { - options.baseOptions.modelAssetPath = modelAssetPath; - } - if (modelAssetBuffer) { - options.baseOptions.modelAssetBuffer = modelAssetBuffer; - } - - // FilesetResolver must be passed as the first argument - this.llmInference = await LlmInference.createFromOptions(genaiFileset, options); - this._setStatus(MODEL_STATUS.READY, 'Model ready — on-device inference active'); + this.conversation = await this._createConversation(); + this._setStatus(MODEL_STATUS.READY, 'Model ready - LiteRT LM on-device inference active'); } catch (err) { console.error('[AIEngine] Load error:', err); this._setStatus(MODEL_STATUS.ERROR, `Failed: ${err.message}`); @@ -144,41 +219,62 @@ class AIEngine { } } + _chunkText(chunk) { + if (!chunk) return ''; + if (typeof chunk.content === 'string') return chunk.content; + if (!Array.isArray(chunk.content)) return ''; + + return chunk.content + .filter(item => item && item.type === 'text' && typeof item.text === 'string') + .map(item => item.text) + .join(''); + } + + async _streamMessage(conversation, message, onToken) { + let fullResponse = ''; + const stream = conversation.sendMessageStreaming(message); + + for await (const chunk of stream) { + const text = this._chunkText(chunk); + if (!text) continue; + fullResponse += text; + if (onToken) onToken(fullResponse); + } + + return fullResponse; + } + + _messagesFromGemmaTranscript(rawPrompt) { + const turnRegex = /(user|model)\n([\s\S]*?)(?:|$)/g; + const messages = []; + let match; + + while ((match = turnRegex.exec(rawPrompt)) !== null) { + const role = match[1] === 'model' ? 'assistant' : 'user'; + const content = match[2].trim(); + if (!content) continue; + messages.push({ role, content }); + } + + return messages.length ? messages : rawPrompt; + } + /** - * Generate a response (streaming). + * Generate a response in the main chat conversation. * @param {string} prompt - * @param {(partial: string) => void} onToken - called with each partial result + * @param {(partial: string) => void} onToken - called with accumulated text * @returns {Promise} full response */ async generate(prompt, onToken) { - if (!this.llmInference) { + if (!this.conversation) { throw new Error('Model not loaded'); } - this._setStatus(MODEL_STATUS.GENERATING, 'Generating…'); + this._setStatus(MODEL_STATUS.GENERATING, 'Generating...'); try { - // Format as instruction-tuned prompt - const formattedPrompt = this._formatPrompt(prompt); - - let fullResponse = ''; - - // Use streaming API - const response = await this.llmInference.generateResponse( - formattedPrompt, - (partialResult, done) => { - fullResponse = partialResult; - if (onToken) onToken(partialResult); - } - ); - - // If streaming callback didn't fire, use direct result - if (!fullResponse && response) { - fullResponse = response; - if (onToken) onToken(response); - } - - this._setStatus(MODEL_STATUS.READY, 'Model ready — on-device inference active'); + const fullResponse = await this._streamMessage(this.conversation, prompt, onToken); + this._setStatus(MODEL_STATUS.READY, 'Model ready - LiteRT LM on-device inference active'); return fullResponse; } catch (err) { console.error('[AIEngine] Generation error:', err); @@ -188,46 +284,33 @@ class AIEngine { } /** - * Generate a response using raw formatted string (for multi-turn/agents). + * Generate a response using a raw formatted string for the agent loop. */ async generateRaw(rawPrompt, onToken) { - if (!this.llmInference) throw new Error('Model not loaded'); - this._setStatus(MODEL_STATUS.GENERATING, 'Agent thinking…'); + if (!this.engine) { + throw new Error('Model not loaded'); + } + + this._setStatus(MODEL_STATUS.GENERATING, 'Agent thinking...'); + let conversation = null; try { - let fullResponse = ''; - const response = await this.llmInference.generateResponse( - rawPrompt, - (partialResult, done) => { - fullResponse = partialResult; - if (onToken) onToken(partialResult); - } - ); - if (!fullResponse && response) { - fullResponse = response; - if (onToken) onToken(response); - } - this._setStatus(MODEL_STATUS.READY, 'Model ready — on-device inference active'); + conversation = await this._createConversation(); + const message = this._messagesFromGemmaTranscript(rawPrompt); + const fullResponse = await this._streamMessage(conversation, message, onToken); + this._setStatus(MODEL_STATUS.READY, 'Model ready - LiteRT LM on-device inference active'); return fullResponse; } catch (err) { console.error('[AIEngine] Raw generation error:', err); this._setStatus(MODEL_STATUS.READY, 'Generation completed with errors'); throw err; + } finally { + if (conversation) { + await conversation.delete(); + } } } - /** - * Format prompt using instruction template. - */ - _formatPrompt(userMessage) { - return `user -You are an expert coding assistant inside an IDE. Provide concise, correct code and explanations. Use markdown code blocks for code snippets. - -${userMessage} -model -`; - } - get isReady() { return this.status === MODEL_STATUS.READY; } @@ -246,22 +329,38 @@ ${userMessage} try { const filename = url.split('/').pop(); - // Check cache first if persist is requested if (persist) { const cached = await this.getCachedModel(filename); if (cached) { - this._setStatus(MODEL_STATUS.LOADING, 'Loading from local disk…'); - await this.loadModel(cached); - return; + this._setStatus(MODEL_STATUS.LOADING, 'Loading from local disk...'); + try { + await this.loadModel(cached); + return; + } catch (err) { + await this.deleteFromCache(filename); + console.warn('[AIEngine] Removed corrupt cached model:', err); + this._setStatus(MODEL_STATUS.DOWNLOADING, 'Cached model was corrupt. Downloading again...'); + } } } - this._setStatus(MODEL_STATUS.DOWNLOADING, 'Connecting…'); + this._setStatus(MODEL_STATUS.DOWNLOADING, 'Connecting...'); const response = await fetch(url); if (!response.ok) throw new Error(`HTTP ${response.status} ${response.statusText}`); const total = parseInt(response.headers.get('content-length') || '0'); + + if (persist) { + const file = await this.saveResponseToCache(filename, response, onProgress); + await this.loadModel(file); + return; + } + + if (total > MODEL_DOWNLOAD_MEMORY_LIMIT_BYTES) { + throw new Error('This model is too large for memory-only loading. Enable "Save to local disk (OPFS)" and try again.'); + } + const reader = response.body.getReader(); const chunks = []; let received = 0; @@ -269,18 +368,23 @@ ${userMessage} while (true) { const { done, value } = await reader.read(); if (done) break; + + if (received + value.length > MODEL_DOWNLOAD_MEMORY_LIMIT_BYTES) { + await reader.cancel(); + throw new Error('This model is too large for memory-only loading. Enable "Save to local disk (OPFS)" and try again.'); + } + chunks.push(value); received += value.length; if (onProgress) onProgress(received, total); this._setStatus( MODEL_STATUS.DOWNLOADING, total - ? `Downloading… ${(received / 1048576).toFixed(0)} / ${(total / 1048576).toFixed(0)} MB` - : `Downloading… ${(received / 1048576).toFixed(0)} MB` + ? `Downloading... ${(received / 1048576).toFixed(0)} / ${(total / 1048576).toFixed(0)} MB` + : `Downloading... ${(received / 1048576).toFixed(0)} MB` ); } - // Concatenate chunks into a single buffer const buffer = new Uint8Array(received); let offset = 0; for (const chunk of chunks) { @@ -290,10 +394,6 @@ ${userMessage} const file = new File([buffer], filename); - if (persist) { - await this.saveToCache(filename, buffer); - } - await this.loadModel(file); } catch (err) { console.error('[AIEngine] Download error:', err); @@ -302,12 +402,16 @@ ${userMessage} } } + async disposeAsync() { + await this._deleteEngine(); + this._setStatus(MODEL_STATUS.IDLE, 'Engine disposed'); + } + dispose() { - if (this.llmInference) { - this.llmInference.close(); - this.llmInference = null; - } this._setStatus(MODEL_STATUS.IDLE, 'Engine disposed'); + this._deleteEngine().catch(err => { + console.error('[AIEngine] Dispose error:', err); + }); } } diff --git a/src/chat.js b/src/chat.js index ef83e52..923c35b 100644 --- a/src/chat.js +++ b/src/chat.js @@ -10,17 +10,17 @@ import { vfs } from './file-system.js'; const MODELS = [ { id: 'gemma4-e2b', - label: 'Gemma 4 E2B', + label: 'Gemma 4 E2B LiteRT LM', size: '2 GB', badge: 'ā˜… Recommended', - url: 'https://huggingface.co/litert-community/gemma-4-E2B-it-litert-lm/resolve/main/gemma-4-E2B-it-web.task', + url: 'https://huggingface.co/litert-community/gemma-4-E2B-it-litert-lm/resolve/main/gemma-4-E2B-it-web.litertlm', }, { id: 'gemma4-e4b', - label: 'Gemma 4 E4B', + label: 'Gemma 4 E4B LiteRT LM', size: '3 GB', badge: 'šŸ”„ Best', - url: 'https://huggingface.co/litert-community/gemma-4-E4B-it-litert-lm/resolve/main/gemma-4-E4B-it-web.task', + url: 'https://huggingface.co/litert-community/gemma-4-E4B-it-litert-lm/resolve/main/gemma-4-E4B-it-web.litertlm', }, ]; @@ -94,7 +94,7 @@ export class ChatPanel {
- +
diff --git a/vite.config.js b/vite.config.js index 96b6a13..374b87e 100644 --- a/vite.config.js +++ b/vite.config.js @@ -1,7 +1,7 @@ import { defineConfig } from 'vite'; export default defineConfig({ - // Allow serving the large .task model file from public/ + // Allow serving large LiteRT LM model files from public/ server: { headers: { 'Cross-Origin-Embedder-Policy': 'require-corp', @@ -12,9 +12,9 @@ export default defineConfig({ timeout: 60000, }, }, - // Exclude model files from being processed by Vite's pipeline - assetsInclude: ['**/*.task'], + // Exclude model files from being processed by Vite's pipeline. + assetsInclude: ['**/*.litertlm'], optimizeDeps: { - exclude: ['@mediapipe/tasks-genai'], + exclude: ['@litert-lm/core'], }, });