Skip to content

Commit 14ca511

Browse files
OVEP Stateful: Improve accuracy on NPU for sequence lengths >= 2048 (#684)
1 parent 651f535 commit 14ca511

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

onnxruntime/core/providers/openvino/ov_stateful_patch_utils.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,13 @@ void UpdateNPUConfig(ov::AnyMap& config, const KVAxesPosition& kv_pos, const KVD
292292
RenameKey(config, "PREFILL_HINT", "NPUW_LLM_PREFILL_HINT");
293293
RenameKey(config, "GENERATE_CONFIG", "NPUW_LLM_GENERATE_CONFIG");
294294
RenameKey(config, "GENERATE_HINT", "NPUW_LLM_GENERATE_HINT");
295+
296+
const size_t npuw_context_len_threshold = 2048;
297+
if ((kv_desc.max_prompt_len + kv_desc.min_response_len) >= npuw_context_len_threshold) {
298+
// This improves accuracy for generation sequences that exceed 2k tokens.
299+
config["++NPUW_LLM_PREFILL_CONFIG"] = ov::AnyMap{{"NPUW_DEVICES", "NPU,CPU"}, {"NPUW_ONLINE_AVOID", "P:SinCos/NPU"}};
300+
config["++NPUW_LLM_GENERATE_CONFIG"] = ov::AnyMap{{"NPUW_DEVICES", "NPU,CPU"}, {"NPUW_ONLINE_AVOID", "P:SinCos/NPU"}};
301+
}
295302
}
296303

297304
std::optional<ov::Any> PopOptionNew(ov::AnyMap& config, const std::string& option_name) {

0 commit comments

Comments
 (0)