Ollama performance mode option (#2014)

* ollama performance mode option * Change ENV prop Move perf setting to advanced --------- Co-authored-by: timothycarambat <[email protected]>
Mintplex-Labs · Aug 2, 2024 · 7273c89 · 7273c89
1 parent 8cfe855
commit 7273c89
Show file tree

Hide file tree

Showing 4 changed files with 62 additions and 8 deletions.
diff --git a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
@@ -2,8 +2,9 @@ import React, { useEffect, useState } from "react";
 import System from "@/models/system";
 import PreLoader from "@/components/Preloader";
 import { OLLAMA_COMMON_URLS } from "@/utils/constants";
-import { CaretDown, CaretUp } from "@phosphor-icons/react";
+import { CaretDown, CaretUp, Info } from "@phosphor-icons/react";
 import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery";
+import { Tooltip } from "react-tooltip";
 
 export default function OllamaLLMOptions({ settings }) {
   const {
@@ -18,15 +19,13 @@ export default function OllamaLLMOptions({ settings }) {
     initialBasePath: settings?.OllamaLLMBasePath,
     ENDPOINTS: OLLAMA_COMMON_URLS,
   });
-
+  const [performanceMode, setPerformanceMode] = useState(
+    settings?.OllamaLLMPerformanceMode || "base"
+  );
   const [maxTokens, setMaxTokens] = useState(
     settings?.OllamaLLMTokenLimit || 4096
   );
 
-  const handleMaxTokensChange = (e) => {
-    setMaxTokens(Number(e.target.value));
-  };
-
   return (
     <div className="w-full flex flex-col gap-y-7">
       <div className="w-full flex items-start gap-[36px] mt-1.5">
@@ -46,7 +45,7 @@ export default function OllamaLLMOptions({ settings }) {
             defaultChecked="4096"
             min={1}
             value={maxTokens}
-            onChange={handleMaxTokensChange}
+            onChange={(e) => setMaxTokens(Number(e.target.value))}
             onScroll={(e) => e.target.blur()}
             required={true}
             autoComplete="off"
@@ -64,7 +63,7 @@ export default function OllamaLLMOptions({ settings }) {
           }}
           className="text-white hover:text-white/70 flex items-center text-sm"
         >
-          {showAdvancedControls ? "Hide" : "Show"} Manual Endpoint Input
+          {showAdvancedControls ? "Hide" : "Show"} advanced settings
           {showAdvancedControls ? (
             <CaretUp size={14} className="ml-1" />
           ) : (
@@ -134,12 +133,57 @@ export default function OllamaLLMOptions({ settings }) {
                 className="underline text-blue-300"
                 href="https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately"
                 target="_blank"
+                rel="noreferrer"
               >
                 {" "}
                 Learn more &rarr;
               </a>
             </p>
           </div>
+
+          <div className="flex flex-col w-60">
+            <label className="text-white text-sm font-semibold mb-2 flex items-center">
+              Performance Mode
+              <Info
+                size={16}
+                className="ml-2 text-white"
+                data-tooltip-id="performance-mode-tooltip"
+              />
+            </label>
+            <select
+              name="OllamaLLMPerformanceMode"
+              required={true}
+              className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
+              value={performanceMode}
+              onChange={(e) => setPerformanceMode(e.target.value)}
+            >
+              <option value="base">Base (Default)</option>
+              <option value="maximum">Maximum</option>
+            </select>
+            <p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
+              Choose the performance mode for the Ollama model.
+            </p>
+            <Tooltip
+              id="performance-mode-tooltip"
+              place="bottom"
+              className="tooltip !text-xs max-w-xs"
+            >
+              <p className="text-red-500">
+                <strong>Note:</strong> Only change this setting if you
+                understand its implications on performance and resource usage.
+              </p>
+              <br />
+              <p>
+                <strong>Base:</strong> Ollama automatically limits the context
+                to 2048 tokens, reducing VRAM usage. Suitable for most users.
+              </p>
+              <br />
+              <p>
+                <strong>Maximum:</strong> Uses the full context window (up to
+                Max Tokens). May increase VRAM usage significantly.
+              </p>
+            </Tooltip>
+          </div>
         </div>
       </div>
     </div>

diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
@@ -412,6 +412,7 @@ const SystemSettings = {
       OllamaLLMModelPref: process.env.OLLAMA_MODEL_PREF,
       OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT,
       OllamaLLMKeepAliveSeconds: process.env.OLLAMA_KEEP_ALIVE_TIMEOUT ?? 300,
+      OllamaLLMPerformanceMode: process.env.OLLAMA_PERFORMANCE_MODE ?? "base",
 
       // TogetherAI Keys
       TogetherAiApiKey: !!process.env.TOGETHER_AI_API_KEY,

diff --git a/server/utils/AiProviders/ollama/index.js b/server/utils/AiProviders/ollama/index.js
@@ -13,6 +13,7 @@ class OllamaAILLM {
 
     this.basePath = process.env.OLLAMA_BASE_PATH;
     this.model = modelPreference || process.env.OLLAMA_MODEL_PREF;
+    this.performanceMode = process.env.OLLAMA_PERFORMANCE_MODE || "base";
     this.keepAlive = process.env.OLLAMA_KEEP_ALIVE_TIMEOUT
       ? Number(process.env.OLLAMA_KEEP_ALIVE_TIMEOUT)
       : 300; // Default 5-minute timeout for Ollama model loading.
@@ -33,6 +34,10 @@ class OllamaAILLM {
       model: this.model,
       keepAlive: this.keepAlive,
       useMLock: true,
+      // There are currently only two performance settings so if its not "base" - its max context.
+      ...(this.performanceMode === "base"
+        ? {}
+        : { numCtx: this.promptWindowLimit() }),
       temperature,
     });
   }

diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
@@ -101,6 +101,10 @@ const KEY_MAPPING = {
     envKey: "OLLAMA_MODEL_TOKEN_LIMIT",
     checks: [nonZero],
   },
+  OllamaLLMPerformanceMode: {
+    envKey: "OLLAMA_PERFORMANCE_MODE",
+    checks: [],
+  },
   OllamaLLMKeepAliveSeconds: {
     envKey: "OLLAMA_KEEP_ALIVE_TIMEOUT",
     checks: [isInteger],