continuedev · sestinj · Jun 16, 2025 · Jun 3, 2025 · Jun 3, 2025 · Jun 5, 2025
@@ -1068,9 +1068,17 @@ export abstract class BaseLLM implements ILLM {
         documents: chunks.map((chunk) => chunk.content),
       });
 
-      // Put them in the order they were given
-      const sortedResults = results.data.sort((a, b) => a.index - b.index);
-      return sortedResults.map((result) => result.relevance_score);
+      // Standard OpenAI format
+      if (results.data && Array.isArray(results.data)) {
+        return results.data
+          .sort((a, b) => a.index - b.index)
+          .map((result) => result.relevance_score);
+      }
+
+      throw new Error(
+        `Unexpected rerank response format from ${this.providerName}. ` +
+          `Expected 'data' array but got: ${JSON.stringify(Object.keys(results))}`,
+      );
     }
 
     throw new Error(

@@ -1,7 +1,25 @@
-import { LLMOptions } from "../../index.js";
+import { Chunk, LLMOptions } from "../../index.js";
 
 import OpenAI from "./OpenAI.js";
 
+// vLLM-specific rerank response types
+interface VllmRerankItem {
+  index: number;
+  document: {
+    text: string;
+  };
+  relevance_score: number;
+}
+
+interface VllmRerankResponse {
+  id: string;
+  model: string;
+  usage: {
+    total_tokens: number;
+  };
+  results: VllmRerankItem[];
+}
+
 class Vllm extends OpenAI {
   static providerName = "vllm";
   constructor(options: LLMOptions) {
@@ -16,6 +34,28 @@ class Vllm extends OpenAI {
     return false;
   }
 
+  async rerank(query: string, chunks: Chunk[]): Promise<number[]> {
+    if (this.useOpenAIAdapterFor.includes("rerank") && this.openaiAdapter) {
+      const results = (await this.openaiAdapter.rerank({
+        model: this.model,
+        query,
+        documents: chunks.map((chunk) => chunk.content),
+      })) as unknown as VllmRerankResponse;
+
+      // vLLM uses 'results' array instead of 'data'
+      if (results.results && Array.isArray(results.results)) {
+        const sortedResults = results.results.sort((a, b) => a.index - b.index);
+        return sortedResults.map((result) => result.index);
+      }
+
+      throw new Error(
+        `vLLM rerank response missing 'results' array. Got: ${JSON.stringify(Object.keys(results))}`,
+      );
+    }
+
+    throw new Error("vLLM rerank requires OpenAI adapter");
+  }
+
   private _setupCompletionOptions() {
     this.fetch(this._getEndpoint("models"), {
       method: "GET",

@@ -1,5 +1,3 @@
-import TabItem from "@theme/TabItem";
-import Tabs from "@theme/Tabs";
 
 # vLLM
 
@@ -100,6 +98,8 @@ We recommend configuring **Nomic Embed Text** as your embeddings model.
 
 ## Reranking model
 
+Continue automatically handles vLLM's response format (which uses `results` instead of `data`).
+
 [Click here](../../model-roles/reranking.mdx) to see a list of reranking model providers.
 
 The continue implementation uses [OpenAI](../top-level/openai.mdx) under the hood. [View the source](https://github.com/continuedev/continue/blob/main/core/llm/llms/Vllm.ts)