🐛 fix for upstream compatibility - use LLM.embed() instead for embeddings (#188)

prashantgupta24 · web-flow · commit 324f252e751f · 2025-05-29T16:00:57.000-07:00
- `LLM.embed()` is the preferred way of returning embeddings.
- `engine_core.step()` now returns a `tuple`

---------

Signed-off-by: Prashant Gupta &lt;prashantgupta@us.ibm.com&gt;
diff --git a/tests/e2e/test_spyre_cb.py b/tests/e2e/test_spyre_cb.py
@@ -473,4 +473,9 @@ def test_scheduler_cb_steps_tkv(model: str, backend: str,
                 f"Step {step}, finished request output"
 
         # Perform next step
-        request_outputs = engine_core.step().outputs
+        step_output = engine_core.step()
+        # backward compatibility
+        if isinstance(step_output, tuple):
+            request_outputs = step_output[0].outputs
+        else:
+            request_outputs = step_output.outputs
diff --git a/tests/spyre_util.py b/tests/spyre_util.py
@@ -408,7 +408,7 @@ def spyre_vllm_embeddings(model: str, prompts: list[str],
                      block_size=block_size,
                      tensor_parallel_size=tensor_parallel_size)
 
-    vllm_outputs = vllm_model.encode(prompts)
+    vllm_outputs = vllm_model.embed(prompts)
 
     results = []
     for req_output in vllm_outputs: