Skip to content

Commit

Permalink
Ollama performance mode option (#2014)
Browse files Browse the repository at this point in the history
* ollama performance mode option

* Change ENV prop
Move perf setting to advanced

---------

Co-authored-by: timothycarambat <[email protected]>
  • Loading branch information
shatfield4 and timothycarambat authored Aug 2, 2024
1 parent 8cfe855 commit 7273c89
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 8 deletions.
60 changes: 52 additions & 8 deletions frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ import React, { useEffect, useState } from "react";
import System from "@/models/system";
import PreLoader from "@/components/Preloader";
import { OLLAMA_COMMON_URLS } from "@/utils/constants";
import { CaretDown, CaretUp } from "@phosphor-icons/react";
import { CaretDown, CaretUp, Info } from "@phosphor-icons/react";
import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery";
import { Tooltip } from "react-tooltip";

export default function OllamaLLMOptions({ settings }) {
const {
Expand All @@ -18,15 +19,13 @@ export default function OllamaLLMOptions({ settings }) {
initialBasePath: settings?.OllamaLLMBasePath,
ENDPOINTS: OLLAMA_COMMON_URLS,
});

const [performanceMode, setPerformanceMode] = useState(
settings?.OllamaLLMPerformanceMode || "base"
);
const [maxTokens, setMaxTokens] = useState(
settings?.OllamaLLMTokenLimit || 4096
);

const handleMaxTokensChange = (e) => {
setMaxTokens(Number(e.target.value));
};

return (
<div className="w-full flex flex-col gap-y-7">
<div className="w-full flex items-start gap-[36px] mt-1.5">
Expand All @@ -46,7 +45,7 @@ export default function OllamaLLMOptions({ settings }) {
defaultChecked="4096"
min={1}
value={maxTokens}
onChange={handleMaxTokensChange}
onChange={(e) => setMaxTokens(Number(e.target.value))}
onScroll={(e) => e.target.blur()}
required={true}
autoComplete="off"
Expand All @@ -64,7 +63,7 @@ export default function OllamaLLMOptions({ settings }) {
}}
className="text-white hover:text-white/70 flex items-center text-sm"
>
{showAdvancedControls ? "Hide" : "Show"} Manual Endpoint Input
{showAdvancedControls ? "Hide" : "Show"} advanced settings
{showAdvancedControls ? (
<CaretUp size={14} className="ml-1" />
) : (
Expand Down Expand Up @@ -134,12 +133,57 @@ export default function OllamaLLMOptions({ settings }) {
className="underline text-blue-300"
href="https://github.com/ollama/ollama/blob/main/docs/faq.md#how-do-i-keep-a-model-loaded-in-memory-or-make-it-unload-immediately"
target="_blank"
rel="noreferrer"
>
{" "}
Learn more &rarr;
</a>
</p>
</div>

<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold mb-2 flex items-center">
Performance Mode
<Info
size={16}
className="ml-2 text-white"
data-tooltip-id="performance-mode-tooltip"
/>
</label>
<select
name="OllamaLLMPerformanceMode"
required={true}
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
value={performanceMode}
onChange={(e) => setPerformanceMode(e.target.value)}
>
<option value="base">Base (Default)</option>
<option value="maximum">Maximum</option>
</select>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
Choose the performance mode for the Ollama model.
</p>
<Tooltip
id="performance-mode-tooltip"
place="bottom"
className="tooltip !text-xs max-w-xs"
>
<p className="text-red-500">
<strong>Note:</strong> Only change this setting if you
understand its implications on performance and resource usage.
</p>
<br />
<p>
<strong>Base:</strong> Ollama automatically limits the context
to 2048 tokens, reducing VRAM usage. Suitable for most users.
</p>
<br />
<p>
<strong>Maximum:</strong> Uses the full context window (up to
Max Tokens). May increase VRAM usage significantly.
</p>
</Tooltip>
</div>
</div>
</div>
</div>
Expand Down
1 change: 1 addition & 0 deletions server/models/systemSettings.js
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,7 @@ const SystemSettings = {
OllamaLLMModelPref: process.env.OLLAMA_MODEL_PREF,
OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT,
OllamaLLMKeepAliveSeconds: process.env.OLLAMA_KEEP_ALIVE_TIMEOUT ?? 300,
OllamaLLMPerformanceMode: process.env.OLLAMA_PERFORMANCE_MODE ?? "base",

// TogetherAI Keys
TogetherAiApiKey: !!process.env.TOGETHER_AI_API_KEY,
Expand Down
5 changes: 5 additions & 0 deletions server/utils/AiProviders/ollama/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class OllamaAILLM {

this.basePath = process.env.OLLAMA_BASE_PATH;
this.model = modelPreference || process.env.OLLAMA_MODEL_PREF;
this.performanceMode = process.env.OLLAMA_PERFORMANCE_MODE || "base";
this.keepAlive = process.env.OLLAMA_KEEP_ALIVE_TIMEOUT
? Number(process.env.OLLAMA_KEEP_ALIVE_TIMEOUT)
: 300; // Default 5-minute timeout for Ollama model loading.
Expand All @@ -33,6 +34,10 @@ class OllamaAILLM {
model: this.model,
keepAlive: this.keepAlive,
useMLock: true,
// There are currently only two performance settings so if its not "base" - its max context.
...(this.performanceMode === "base"
? {}
: { numCtx: this.promptWindowLimit() }),
temperature,
});
}
Expand Down
4 changes: 4 additions & 0 deletions server/utils/helpers/updateENV.js
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ const KEY_MAPPING = {
envKey: "OLLAMA_MODEL_TOKEN_LIMIT",
checks: [nonZero],
},
OllamaLLMPerformanceMode: {
envKey: "OLLAMA_PERFORMANCE_MODE",
checks: [],
},
OllamaLLMKeepAliveSeconds: {
envKey: "OLLAMA_KEEP_ALIVE_TIMEOUT",
checks: [isInteger],
Expand Down

0 comments on commit 7273c89

Please sign in to comment.