Skip to content

feat: Support citation for agentic template #642

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 29 commits into from
May 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
ec06a12
feat: add citation processing and prompts for query engine
leehuwuj May 22, 2025
ff9acac
add example and improve tool description
leehuwuj May 23, 2025
a46d655
better cond
leehuwuj May 23, 2025
ad0c2f3
Improve citation instructions in prompts and enhance logging in query…
leehuwuj May 26, 2025
0960b14
Merge remote-tracking branch 'origin/main' into lee/citation-agentic
leehuwuj May 26, 2025
491dd8e
Enhance citation instructions in prompts, improve error handling in s…
leehuwuj May 26, 2025
22ca4ba
Update @llamaindex/chat-ui to version 0.4.6 in package.json and pnpm-…
leehuwuj May 27, 2025
eedffb0
Enhance system prompt and tool description for improved clarity on kn…
leehuwuj May 27, 2025
13a1454
introduce preconfigured agent for citation answering
leehuwuj May 27, 2025
8799669
Refactor workflow creation to utilize query tool with citation suppor…
leehuwuj May 27, 2025
fd82563
Implement AgentCallTool for event handling in chat router; enhance to…
leehuwuj May 27, 2025
7a463d0
better display llamacloud file name
leehuwuj May 27, 2025
f6f5f23
Merge remote-tracking branch 'origin/main' into lee/citation-agentic
leehuwuj May 27, 2025
1c83fa2
Remove citation agent implementation and update dependencies in pypro…
leehuwuj May 27, 2025
7a33a58
update createllama and add changesets
leehuwuj May 27, 2025
3a2be5c
Refactor citation handling in query tools
leehuwuj May 28, 2025
641a2be
Refactor SourceNodesFromToolCall initialization to use optional tool_…
leehuwuj May 28, 2025
46dec12
Refactor query engine and citation handling; enable citation in workf…
leehuwuj May 28, 2025
b95dcc7
refactor llamacloud file
leehuwuj May 28, 2025
b73680f
Refactor SourceNodesFromToolCall to remove deprecated tool_name param…
leehuwuj May 28, 2025
31576d9
fix mypy
leehuwuj May 28, 2025
356bbb3
add test for local python package
leehuwuj May 28, 2025
e6e2f78
remove tool name constraint
leehuwuj May 28, 2025
f9f3437
add missing working-directory
leehuwuj May 28, 2025
0ad7581
Update e2e workflow to build server package and set SERVER_PACKAGE_PA…
leehuwuj May 28, 2025
c3ad902
Update dependency handling for llama-index-server template in CI; rem…
leehuwuj May 28, 2025
c32b7f3
config hatch to fix script
leehuwuj May 28, 2025
f7c4ed3
fix mkdir windows
leehuwuj May 28, 2025
8764d93
fix wrong build command
leehuwuj May 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/few-news-marry.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@create-llama/llama-index-server": patch
---

Show agent widget in UI when making a tool call
5 changes: 5 additions & 0 deletions .changeset/rich-nights-hug.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@create-llama/llama-index-server": patch
---

Support citation for query engine tool
5 changes: 5 additions & 0 deletions .changeset/small-insects-hug.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"create-llama": patch
---

Support citation for agentic template (Python)
5 changes: 5 additions & 0 deletions .changeset/stale-things-beg.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@llamaindex/server": patch
---

Bump version: [email protected]
10 changes: 10 additions & 0 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,15 @@ jobs:
run: pnpm run pack-install
working-directory: packages/create-llama

- name: Build and store server package
run: |
pnpm run build
wheel_file=$(ls dist/*.whl | head -n 1)
mkdir -p "${{ runner.temp }}"
cp "$wheel_file" "${{ runner.temp }}/"
echo "SERVER_PACKAGE_PATH=${{ runner.temp }}/$(basename "$wheel_file")" >> $GITHUB_ENV
working-directory: python/llama-index-server

- name: Run Playwright tests for Python
run: pnpm run e2e:python
env:
Expand All @@ -74,6 +83,7 @@ jobs:
TEMPLATE_TYPE: ${{ matrix.template-types }}
PYTHONIOENCODING: utf-8
PYTHONLEGACYWINDOWSSTDIO: utf-8
SERVER_PACKAGE_PATH: ${{ env.SERVER_PACKAGE_PATH }}
working-directory: packages/create-llama

- uses: actions/upload-artifact@v4
Expand Down
14 changes: 14 additions & 0 deletions packages/create-llama/helpers/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { parse, stringify } from "smol-toml";
import terminalLink from "terminal-link";
import { isUvAvailable, tryUvSync } from "./uv";

import { isCI } from "ci-info";
import { assetRelocator, copy } from "./copy";
import { templatesDir } from "./dir";
import { Tool } from "./tools";
Expand Down Expand Up @@ -278,6 +279,19 @@ const getAdditionalDependencies = (
}
}

// If app template is llama-index-server and CI and SERVER_PACKAGE_PATH is set,
// add @llamaindex/server to dependencies
if (
templateType === "llamaindexserver" &&
isCI &&
process.env.SERVER_PACKAGE_PATH
) {
dependencies.push({
name: "llama-index-server",
version: `@file://${process.env.SERVER_PACKAGE_PATH}`,
});
}

return dependencies;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
from app.index import get_index
from llama_index.core.agent.workflow import AgentWorkflow
from llama_index.core.settings import Settings
from llama_index.llms.openai import OpenAI
from llama_index.server.api.models import ChatRequest
from llama_index.server.tools.index import get_query_engine_tool
from llama_index.server.tools.index.citation import (
CITATION_SYSTEM_PROMPT,
enable_citation,
)


def create_workflow(chat_request: Optional[ChatRequest] = None) -> AgentWorkflow:
Expand All @@ -14,9 +17,16 @@ def create_workflow(chat_request: Optional[ChatRequest] = None) -> AgentWorkflow
raise RuntimeError(
"Index not found! Please run `uv run generate` to index the data first."
)
query_tool = get_query_engine_tool(index=index)
# Create a query tool with citations enabled
query_tool = enable_citation(get_query_engine_tool(index=index))

# Define the system prompt for the agent
# Append the citation system prompt to the system prompt
system_prompt = """You are a helpful assistant"""
system_prompt += CITATION_SYSTEM_PROMPT

return AgentWorkflow.from_tools_or_functions(
tools_or_functions=[query_tool],
llm=Settings.llm or OpenAI(model="gpt-4o-mini"),
system_prompt="You are a helpful assistant.",
llm=Settings.llm,
system_prompt=system_prompt,
)
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@
def init_settings():
if os.getenv("OPENAI_API_KEY") is None:
raise RuntimeError("OPENAI_API_KEY is missing in environment variables")
Settings.llm = OpenAI(model="gpt-4o-mini")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
Settings.llm = OpenAI(model="gpt-4.1")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-large")
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ disable_error_code = [ "return-value", "assignment" ]
module = "app.*"
ignore_missing_imports = false

[tool.hatch.metadata]
allow-direct-references = true

[build-system]
requires = [ "hatchling>=1.24" ]
build-backend = "hatchling.build"
build-backend = "hatchling.build"
2 changes: 1 addition & 1 deletion packages/server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
"@babel/traverse": "^7.27.0",
"@babel/types": "^7.27.0",
"@hookform/resolvers": "^5.0.1",
"@llamaindex/chat-ui": "0.4.5",
"@llamaindex/chat-ui": "0.4.6",
"@radix-ui/react-accordion": "^1.2.3",
"@radix-ui/react-alert-dialog": "^1.1.7",
"@radix-ui/react-aspect-ratio": "^1.1.3",
Expand Down
10 changes: 5 additions & 5 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

113 changes: 113 additions & 0 deletions python/llama-index-server/examples/llamacloud/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import os
from typing import List, Optional

from fastapi import FastAPI
from llama_index.core.agent.workflow import AgentWorkflow
from llama_index.core.query_engine.retriever_query_engine import RetrieverQueryEngine
from llama_index.core.settings import Settings
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.llms.openai import OpenAI
from llama_index.server import LlamaIndexServer, UIConfig
from llama_index.server.api.models import ChatRequest
from llama_index.server.services.llamacloud import LlamaCloudIndex, get_index
from llama_index.server.tools.index.citation import (
CITATION_SYSTEM_PROMPT,
enable_citation,
)

# Please set the following environment variables to use LlamaCloud
if os.getenv("LLAMA_CLOUD_API_KEY") is None:
raise ValueError("LLAMA_CLOUD_API_KEY is not set")
if os.getenv("LLAMA_CLOUD_PROJECT_NAME") is None:
raise ValueError("LLAMA_CLOUD_PROJECT_NAME is not set")
if os.getenv("LLAMA_CLOUD_INDEX_NAME") is None:
raise ValueError("LLAMA_CLOUD_INDEX_NAME is not set")

Settings.llm = OpenAI(model="gpt-4.1")


def get_tools(index: LlamaCloudIndex) -> List[QueryEngineTool]:
"""
Get the tools for the given index.
"""

chunk_retriever = index.as_retriever(
retrieval_mode="chunks",
rerank_top_n=15,
dense_similarity_top_k=1,
)
doc_retriever = index.as_retriever(
retrieval_mode="files_via_content",
files_top_k=1,
)

# You can either create query engine with CitationSynthesizer and NodeCitationProcessor
# or use the enable_citation function to enable citation for the query engine.
chunk_engine = RetrieverQueryEngine.from_args(
retriever=chunk_retriever,
llm=Settings.llm,
)
doc_engine = RetrieverQueryEngine.from_args(
retriever=doc_retriever,
llm=Settings.llm,
)

chunk_tool = QueryEngineTool(
query_engine=chunk_engine,
metadata=ToolMetadata(
name="chunk_query_engine",
description=(
"Get answer from specific chunk of a given document. Best used for lower-level questions that require specific information from a given document."
"Do NOT use if the answer can be found in the entire document. Use the file_query_engine instead for that purpose"
),
),
)
doc_tool = QueryEngineTool(
query_engine=doc_engine,
metadata=ToolMetadata(
name="file_query_engine",
description=(
"Get answer from entire document as context. Best used for higher-level summarization questions."
"Do NOT use if the answer can be found in a specific chunk of a given document. Use the chunk_query_engine instead for that purpose"
),
),
)

return [enable_citation(chunk_tool), enable_citation(doc_tool)]


def create_workflow(chat_request: Optional[ChatRequest] = None) -> AgentWorkflow:
index = get_index(chat_request=chat_request)
if index is None:
raise RuntimeError("Index not found!")

# Append the citation system prompt to the system prompt
system_prompt = """
You are a helpful assistant that has access to a knowledge base.
"""
system_prompt += CITATION_SYSTEM_PROMPT
return AgentWorkflow.from_tools_or_functions(
tools_or_functions=get_tools(index),
system_prompt=system_prompt,
)


def create_app() -> FastAPI:
app = LlamaIndexServer(
workflow_factory=create_workflow,
env="dev",
suggest_next_questions=False,
ui_config=UIConfig(
llamacloud_index_selector=True, # to select different indexes in the UI
),
)
return app


app = create_app()


if __name__ == "__main__":
import uvicorn

uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from llama_index.server.api.callbacks.agent_call_tool import AgentCallTool
from llama_index.server.api.callbacks.base import EventCallback
from llama_index.server.api.callbacks.llamacloud import LlamaCloudFileDownload
from llama_index.server.api.callbacks.source_nodes import SourceNodesFromToolCall
Expand All @@ -10,4 +11,5 @@
"SourceNodesFromToolCall",
"SuggestNextQuestions",
"LlamaCloudFileDownload",
"AgentCallTool",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import logging
from typing import Any

from llama_index.core.agent.workflow.workflow_events import ToolCall, ToolCallResult
from llama_index.server.api.callbacks.base import EventCallback
from llama_index.server.api.models import AgentRunEvent

logger = logging.getLogger("uvicorn")


class AgentCallTool(EventCallback):
"""
Adapter for convert tool call events to agent run events.
"""

async def run(self, event: Any) -> Any:
if isinstance(event, ToolCall) and not isinstance(event, ToolCallResult):
return AgentRunEvent(
name="Agent",
msg=f"Calling tool: {event.tool_name} with: {event.tool_kwargs}",
)
return event

@classmethod
def from_default(cls, *args: Any, **kwargs: Any) -> "AgentCallTool":
return cls()
Original file line number Diff line number Diff line change
@@ -1,31 +1,51 @@
from typing import Any
import logging
from typing import Any, List, Optional

from llama_index.core.agent.workflow.workflow_events import ToolCallResult
from llama_index.core.schema import NodeWithScore
from llama_index.server.api.callbacks.base import EventCallback
from llama_index.server.api.models import SourceNodesEvent

logger = logging.getLogger(__name__)


class SourceNodesFromToolCall(EventCallback):
"""
Extract source nodes from the query tool output.

Args:
query_tool_name: The name of the tool that queries the index.
default is "query_index"
"""

def __init__(self, query_tool_name: str = "query_index"):
self.query_tool_name = query_tool_name

def transform_tool_call_result(self, event: ToolCallResult) -> SourceNodesEvent:
source_nodes = event.tool_output.raw_output.source_nodes
return SourceNodesEvent(nodes=source_nodes)
def __init__(self, tool_name: Optional[str] = None):
# backward compatibility
if tool_name is not None:
logger.warning(
"tool_name has been deprecated. It's now detected by the tool output."
)

def _get_source_nodes(self, event: ToolCallResult) -> Optional[List[NodeWithScore]]:
# If result is not error
if event.tool_output.is_error:
return None
# If result is not error, check if source nodes are in the tool output
raw_output = event.tool_output.raw_output
if hasattr(raw_output, "source_nodes"):
source_nodes = raw_output.source_nodes
# Verify if source_nodes is List[NodeWithScore]
if isinstance(source_nodes, list) and all(
isinstance(node, NodeWithScore) for node in source_nodes
):
return source_nodes
else:
return None
else:
return None

async def run(self, event: Any) -> Any:
events = [event]
if isinstance(event, ToolCallResult):
if event.tool_name == self.query_tool_name:
return event, self.transform_tool_call_result(event)
return event
source_nodes = self._get_source_nodes(event)
if source_nodes is not None:
events.append(SourceNodesEvent(nodes=source_nodes))
return events

@classmethod
def from_default(cls, *args: Any, **kwargs: Any) -> "SourceNodesFromToolCall":
Expand Down
Loading
Loading