run-llama · marcusschiesser · May 19, 2025 · May 19, 2025 · May 19, 2025 · May 19, 2025
diff --git a/python/llama-index-server/examples/hitl/README.md b/python/llama-index-server/examples/hitl/README.md
@@ -0,0 +1,3 @@
+# Human in the Loop
+
+This example shows how to use the LlamaIndexServer with a human in the loop.
diff --git a/python/llama-index-server/examples/hitl/components/human_response.tsx b/python/llama-index-server/examples/hitl/components/human_response.tsx
@@ -0,0 +1,84 @@
+import { useChatUI } from "@llamaindex/chat-ui";
+import { JSONValue } from "ai";
+import { FC, useState } from "react";
+import { z } from "zod";
+import { Button } from "../../../../../packages/server/next/app/components/ui/button";
+import { Card, CardContent, CardFooter } from "../../../../../packages/server/next/app/components/ui/card";
+
+const HumanEventSchema = z.object({
+  type: z.literal("human"),
+  data: z.object({
+    prefix: z.string(),
+  }),
+});
+
+type HumanEvent = z.infer<typeof HumanEventSchema>;
+
+export const HumanResponse: FC<{
+  events: JSONValue[];
+}> = ({ events }) => {
+  const { append } = useChatUI();
+  const [confirmedValue, setConfirmedValue] = useState<boolean | null>(null);
+
+  const humanEvent = events.find((e): e is HumanEvent => {
+    try {
+      return HumanEventSchema.parse(e) !== null;
+    } catch {
+      return false;
+    }
+  });
+
+  if (!humanEvent) return null;
+
+  const handleConfirm = () => {
+    append({
+      content: "Yes",
+      role: "user",
+      annotations: [
+        {
+          type: "human_response",
+          data: {
+            response: "yes",
+          },
+        },
+      ],
+    });
+    setConfirmedValue(true);
+  };
+
+  const handleCancel = () => {
+    append({
+      content: "No",
+      role: "user",
+      annotations: [
+        {
+          type: "human_response",
+          data: {
+            response: "no",
+          },
+        },
+      ],
+    });
+    setConfirmedValue(false);
+  };
+
+  return (
+    <Card className="my-4">
+      <CardContent className="pt-6">
+        <p className="text-sm text-gray-700">{humanEvent.data.prefix}</p>
+      </CardContent>
+      <CardFooter className="flex justify-end gap-2">
+        {confirmedValue === null ? (
+          <>
+            <Button onClick={handleConfirm}>Yes</Button>
+            <Button onClick={handleCancel}>No</Button>
+          </>
+        ) : confirmedValue ? (
+          <p className="text-sm text-gray-700">Yes</p>
+        ) : (
+          <p className="text-sm text-gray-700">No</p>
+        )}
+      </CardFooter>
+    </Card>
+  );
+};
diff --git a/python/llama-index-server/examples/hitl/custom_workflow.py b/python/llama-index-server/examples/hitl/custom_workflow.py
@@ -0,0 +1,90 @@
+import platform
+import subprocess
+from typing import Any
+
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.settings import Settings
+from llama_index.core.workflow import (
+    Context,
+    Event,
+    HumanResponseEvent,
+    InputRequiredEvent,
+    StartEvent,
+    StopEvent,
+    Workflow,
+    step,
+)
+
+
+class CLIExecutionEvent(Event):
+    command: str
+
+
+class CLIWorkflow(Workflow):
+    """
+    A workflow has ability to execute command line tool.
+    """
+
+    default_prompt = PromptTemplate(
+        template="""
+        You are a helpful assistant who can write CLI commands to execute using {cli_language}.
+        Your task is to analyze the user's request and write a CLI command to execute.
+
+        ## User Request
+        {user_request}
+
+        Don't be verbose, only respond with the CLI command without any other text.
+        """
+    )
+
+    def __init__(self, **kwargs: Any) -> None:
+        # HITL Workflow should disable timeout otherwise, we will get a timeout error from callback
+        kwargs["timeout"] = None
+        super().__init__(**kwargs)
+
+    @step
+    async def start(self, ctx: Context, ev: StartEvent) -> InputRequiredEvent:
+        user_msg = ev.user_msg
+        if user_msg is None:
+            raise ValueError("Missing user_msg in StartEvent")
+        await ctx.set("user_msg", user_msg)
+        # Get current operating system and CLI language
+        os_name = platform.system()
+        if os_name == "Linux" or os_name == "Darwin":
+            cli_language = "bash"
+        else:
+            cli_language = "cmd"
+        prompt = self.default_prompt.format(
+            user_request=user_msg, cli_language=cli_language
+        )
+        llm = Settings.llm
+        if llm is None:
+            raise ValueError("Missing LLM in Settings")
+        response = await llm.acomplete(prompt, formatted=True)
+        command = response.text.strip()
+        if command == "":
+            raise ValueError("Couldn't generate a command")
+        await ctx.set("command", command)
+        return InputRequiredEvent(  # type: ignore
+            prefix=f"Do you wanna execute command: `{command}`?",
+            command=command,
+        )
+
+    @step
+    async def handle_human_response(
+        self, ctx: Context, ev: HumanResponseEvent
+    ) -> StopEvent | CLIExecutionEvent:
+        if ev.response.lower().strip() == "yes":
+            return CLIExecutionEvent(
+                command=await ctx.get("command"),
+            )
+        else:
+            return StopEvent(result=None)
+
+    @step
+    async def cli_execution(self, ctx: Context, ev: CLIExecutionEvent) -> StopEvent:
+        command = ev.command or ""
+        if command == "":
+            raise ValueError("Missing command in CLIExecutionEvent")
+        res = subprocess.run(command, shell=True, capture_output=True, text=True)
+        return StopEvent(result=res.stdout or res.stderr)
diff --git a/python/llama-index-server/examples/hitl/gen_ui.py b/python/llama-index-server/examples/hitl/gen_ui.py
@@ -0,0 +1,34 @@
+import asyncio
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+from llama_index.llms.openai import OpenAI
+from llama_index.server.gen_ui import generate_event_component
+
+
+class InputRequiredEvent(BaseModel):
+    """InputRequiredEvent is sent when LLM needs to ask for input from the human. Should showed as a small box in the UI (not a dialog)"""
+
+    prefix: str = Field(
+        description="The prefix and description of the input that is required."
+    )
+
+
+class HumanInputEvent(BaseModel):
+    """
+    Event for asking for input from the human.
+    """
+
+    type: Literal["human"]
+    data: InputRequiredEvent
+
+
+if __name__ == "__main__":
+    code = asyncio.run(
+        generate_event_component(
+            event_cls=InputRequiredEvent,
+            llm=OpenAI(model="gpt-4.1"),
+        )
+    )
+    print(code)
diff --git a/python/llama-index-server/examples/hitl/main.py b/python/llama-index-server/examples/hitl/main.py
@@ -0,0 +1,81 @@
+import uuid
+from pydantic import Field
+import subprocess
+
+from fastapi import FastAPI
+
+# Uncomment this to use the custom workflow
+# from custom_workflow import CLIWorkflow
+from llama_index.core.agent.workflow import AgentWorkflow
+from llama_index.core.workflow import Context, HumanResponseEvent, InputRequiredEvent
+from llama_index.llms.openai import OpenAI
+from llama_index.server import LlamaIndexServer, UIConfig
+
+
+class CLIInputRequiredEvent(InputRequiredEvent):
+    # TODO: this needs to have a to_response method that sends the event in the right format
+    # We don't want this method to be defined here
+    """CLIInputRequiredEvent is sent when the agent needs permission from the user to execute the CLI command or not. Render this event by showing the command and a boolean button to execute the command or not."""
+
+    event_component: str = (
+        "human_response"  # used to find the right component to render the event
+    )
+    command: str = Field(description="The command to execute.")
+
+
+class CLIHumanResponseEvent(HumanResponseEvent):
+    execute: bool = Field(
+        description="True if the human wants to execute the command, False otherwise."
+    )
+    command: str = Field(description="The command to execute.")
+
+
+async def cli_executor(ctx: Context, command: str) -> str:
+    """
+    This tool carefully waits for user confirmation before executing a command.
+    """
+    confirmation = await ctx.wait_for_event(
+        CLIHumanResponseEvent,
+        waiter_id=str(
+            uuid.uuid4()
+        ),  # ideally not needed, should default to something reasonable
+        waiter_event=CLIInputRequiredEvent(  # type: ignore
+            command=command,
+        ),
+    )
+    if confirmation.execute:
+        return subprocess.check_output(command, shell=True).decode("utf-8")
+    else:
+        return "Command execution cancelled."
+
+
+def create_workflow() -> AgentWorkflow:
+    # Uncomment this to use the custom workflow
+    # return CLIWorkflow()
+    return AgentWorkflow.from_tools_or_functions(
+        tools_or_functions=[cli_executor],
+        llm=OpenAI(model="gpt-4.1-mini"),
+        system_prompt="""
+        You are a helpful assistant that help the user execute commands.
+        You can execute commands using the cli_executor tool, don't need to ask for confirmation for triggering the tool.
+        """,
+    )
+
+
+def create_app() -> FastAPI:
+    app = LlamaIndexServer(
+        workflow_factory=create_workflow,
+        ui_config=UIConfig(
+            app_title="CLI Assistant",
+            starter_questions=[
+                "List all files in the current directory",
+                "Fetch changes from the remote repository",
+            ],
+            component_dir="components",
+        ),
+    )
+    return app
+
+
+# Run command: `uv run fastapi dev`
+app = create_app()
diff --git a/python/llama-index-server/llama_index/server/api/models.py b/python/llama-index-server/llama_index/server/api/models.py
@@ -1,5 +1,6 @@
 import logging
 import os
+import re
 from enum import Enum
 from typing import Any, Dict, List, Literal, Optional, Union
 
@@ -28,8 +29,20 @@ class ChatAPIMessage(BaseModel):
     def to_llamaindex_message(self) -> ChatMessage:
         return ChatMessage(role=self.role, content=self.content)
 
+    @property
+    def human_response(self) -> Optional[str]:
+        if self.annotations:
+            for annotation in self.annotations:
+                if (
+                    isinstance(annotation, dict)
+                    and annotation.get("type") == "human_response"
+                ):
+                    return annotation.get("data", {}).get("response", None)
+        return None
+
 
 class ChatRequest(BaseModel):
+    id: str  # provided by FE
     messages: List[ChatAPIMessage]
     data: Optional[Any] = None
     config: Optional[ChatConfig] = ChatConfig()
@@ -40,6 +53,12 @@ def validate_messages(cls, v: List[ChatAPIMessage]) -> List[ChatAPIMessage]:
             raise ValueError("Last message must be from user")
         return v
 
+    @field_validator("id")
+    def validate_id(cls, v: str) -> str:
+        if re.search(r"[^a-zA-Z0-9_-]", v):
+            raise ValueError("ID contains special characters")
+        return v
+
 
 class AgentRunEventType(Enum):
     TEXT = "text"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# Human in the Loop

		This example shows how to use the LlamaIndexServer with a human in the loop.