Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions internal/agent/agent_tool.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package agent
import (
"context"
_ "embed"
"encoding/json"
"errors"
"fmt"

Expand Down Expand Up @@ -43,9 +42,6 @@ func (c *coordinator) agentTool(ctx context.Context) (fantasy.AgentTool, error)
AgentToolName,
string(agentToolDescription),
func(ctx context.Context, params AgentParams, call fantasy.ToolCall) (fantasy.ToolResponse, error) {
if err := json.Unmarshal([]byte(call.Input), &params); err != nil {
return fantasy.NewTextErrorResponse(fmt.Sprintf("error parsing parameters: %s", err)), nil
}
if params.Prompt == "" {
return fantasy.NewTextErrorResponse("prompt is required"), nil
}
Expand Down
217 changes: 217 additions & 0 deletions internal/agent/agentic_fetch_tool.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
package agent

import (
"context"
_ "embed"
"errors"
"fmt"
"net/http"
"os"
"time"

"charm.land/fantasy"

"github.com/charmbracelet/crush/internal/agent/prompt"
"github.com/charmbracelet/crush/internal/agent/tools"
"github.com/charmbracelet/crush/internal/permission"
)

//go:embed templates/agentic_fetch.md
var agenticFetchToolDescription []byte

// agenticFetchValidationResult holds the validated parameters from the tool call context.
type agenticFetchValidationResult struct {
SessionID string
AgentMessageID string
}

// validateAgenticFetchParams validates the tool call parameters and extracts required context values.
func validateAgenticFetchParams(ctx context.Context, params tools.AgenticFetchParams) (agenticFetchValidationResult, error) {
if params.URL == "" {
return agenticFetchValidationResult{}, errors.New("url is required")
}

if params.Prompt == "" {
return agenticFetchValidationResult{}, errors.New("prompt is required")
}

sessionID := tools.GetSessionFromContext(ctx)
if sessionID == "" {
return agenticFetchValidationResult{}, errors.New("session id missing from context")
}

agentMessageID := tools.GetMessageFromContext(ctx)
if agentMessageID == "" {
return agenticFetchValidationResult{}, errors.New("agent message id missing from context")
}

return agenticFetchValidationResult{
SessionID: sessionID,
AgentMessageID: agentMessageID,
}, nil
}

//go:embed templates/agentic_fetch_prompt.md.tpl
var agenticFetchPromptTmpl []byte

func (c *coordinator) agenticFetchTool(_ context.Context, client *http.Client) (fantasy.AgentTool, error) {
if client == nil {
client = &http.Client{
Timeout: 30 * time.Second,
Transport: &http.Transport{
MaxIdleConns: 100,
MaxIdleConnsPerHost: 10,
IdleConnTimeout: 90 * time.Second,
},
}
}

return fantasy.NewAgentTool(
tools.AgenticFetchToolName,
string(agenticFetchToolDescription),
func(ctx context.Context, params tools.AgenticFetchParams, call fantasy.ToolCall) (fantasy.ToolResponse, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a lot of data validation logic in this lambda, it looks important to make sure we're getting these right and in order. Can we move this into a concrete/defined function and break it up into individual steps, either consolidate all of the validation to run at once as a method on a agent specific config type or break into individual functions?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let me know if this looks better now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think its better. I still think the lambda size overall is massive but at least there isn't so much inplace nesting going on.

validationResult, err := validateAgenticFetchParams(ctx, params)
if err != nil {
return fantasy.NewTextErrorResponse(err.Error()), nil
}

p := c.permissions.Request(
permission.CreatePermissionRequest{
SessionID: validationResult.SessionID,
Path: c.cfg.WorkingDir(),
ToolCallID: call.ID,
ToolName: tools.AgenticFetchToolName,
Action: "fetch",
Description: fmt.Sprintf("Fetch and analyze content from URL: %s", params.URL),
Params: tools.AgenticFetchPermissionsParams(params),
},
)

if !p {
return fantasy.ToolResponse{}, permission.ErrorPermissionDenied
}

content, err := tools.FetchURLAndConvert(ctx, client, params.URL)
if err != nil {
return fantasy.NewTextErrorResponse(fmt.Sprintf("Failed to fetch URL: %s", err)), nil
}

tmpDir, err := os.MkdirTemp(c.cfg.Options.DataDirectory, "crush-fetch-*")
if err != nil {
return fantasy.NewTextErrorResponse(fmt.Sprintf("Failed to create temporary directory: %s", err)), nil
}
defer os.RemoveAll(tmpDir)

hasLargeContent := len(content) > tools.LargeContentThreshold
var fullPrompt string

if hasLargeContent {
tempFile, err := os.CreateTemp(tmpDir, "page-*.md")
if err != nil {
return fantasy.NewTextErrorResponse(fmt.Sprintf("Failed to create temporary file: %s", err)), nil
}
tempFilePath := tempFile.Name()

if _, err := tempFile.WriteString(content); err != nil {
tempFile.Close()
return fantasy.NewTextErrorResponse(fmt.Sprintf("Failed to write content to file: %s", err)), nil
}
tempFile.Close()

fullPrompt = fmt.Sprintf("%s\n\nThe web page from %s has been saved to: %s\n\nUse the view and grep tools to analyze this file and extract the requested information.", params.Prompt, params.URL, tempFilePath)
} else {
fullPrompt = fmt.Sprintf("%s\n\nWeb page URL: %s\n\n<webpage_content>\n%s\n</webpage_content>", params.Prompt, params.URL, content)
}

promptOpts := []prompt.Option{
prompt.WithWorkingDir(tmpDir),
}

promptTemplate, err := prompt.NewPrompt("agentic_fetch", string(agenticFetchPromptTmpl), promptOpts...)
if err != nil {
return fantasy.ToolResponse{}, fmt.Errorf("error creating prompt: %s", err)
}

_, small, err := c.buildAgentModels(ctx)
if err != nil {
return fantasy.ToolResponse{}, fmt.Errorf("error building models: %s", err)
}

systemPrompt, err := promptTemplate.Build(ctx, small.Model.Provider(), small.Model.Model(), *c.cfg)
if err != nil {
return fantasy.ToolResponse{}, fmt.Errorf("error building system prompt: %s", err)
}

smallProviderCfg, ok := c.cfg.Providers.Get(small.ModelCfg.Provider)
if !ok {
return fantasy.ToolResponse{}, errors.New("small model provider not configured")
}

webFetchTool := tools.NewWebFetchTool(tmpDir, client)
fetchTools := []fantasy.AgentTool{
webFetchTool,
tools.NewGlobTool(tmpDir),
tools.NewGrepTool(tmpDir),
tools.NewViewTool(c.lspClients, c.permissions, tmpDir),
}

agent := NewSessionAgent(SessionAgentOptions{
LargeModel: small, // Use small model for both (fetch doesn't need large)
SmallModel: small,
SystemPromptPrefix: smallProviderCfg.SystemPromptPrefix,
SystemPrompt: systemPrompt,
DisableAutoSummarize: c.cfg.Options.DisableAutoSummarize,
IsYolo: c.permissions.SkipRequests(),
Sessions: c.sessions,
Messages: c.messages,
Tools: fetchTools,
})

agentToolSessionID := c.sessions.CreateAgentToolSessionID(validationResult.AgentMessageID, call.ID)
session, err := c.sessions.CreateTaskSession(ctx, agentToolSessionID, validationResult.SessionID, "Fetch Analysis")
if err != nil {
return fantasy.ToolResponse{}, fmt.Errorf("error creating session: %s", err)
}

c.permissions.AutoApproveSession(session.ID)

// Use small model for web content analysis (faster and cheaper)
maxTokens := small.CatwalkCfg.DefaultMaxTokens
if small.ModelCfg.MaxTokens != 0 {
maxTokens = small.ModelCfg.MaxTokens
}

result, err := agent.Run(ctx, SessionAgentCall{
SessionID: session.ID,
Prompt: fullPrompt,
MaxOutputTokens: maxTokens,
ProviderOptions: getProviderOptions(small, smallProviderCfg),
Temperature: small.ModelCfg.Temperature,
TopP: small.ModelCfg.TopP,
TopK: small.ModelCfg.TopK,
FrequencyPenalty: small.ModelCfg.FrequencyPenalty,
PresencePenalty: small.ModelCfg.PresencePenalty,
})
if err != nil {
return fantasy.NewTextErrorResponse("error generating response"), nil
}

updatedSession, err := c.sessions.Get(ctx, session.ID)
if err != nil {
return fantasy.ToolResponse{}, fmt.Errorf("error getting session: %s", err)
}
parentSession, err := c.sessions.Get(ctx, validationResult.SessionID)
if err != nil {
return fantasy.ToolResponse{}, fmt.Errorf("error getting parent session: %s", err)
}

parentSession.Cost += updatedSession.Cost

_, err = c.sessions.Save(ctx, parentSession)
if err != nil {
return fantasy.ToolResponse{}, fmt.Errorf("error saving parent session: %s", err)
}

return fantasy.NewTextResponse(result.Response.Content.Text()), nil
}), nil
}
9 changes: 8 additions & 1 deletion internal/agent/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,14 @@ func (c *coordinator) buildTools(ctx context.Context, agent config.Agent) ([]fan
allTools = append(allTools, agentTool)
}

if slices.Contains(agent.AllowedTools, tools.AgenticFetchToolName) {
agenticFetchTool, err := c.agenticFetchTool(ctx, nil)
if err != nil {
return nil, err
}
allTools = append(allTools, agenticFetchTool)
}

allTools = append(allTools,
tools.NewBashTool(c.permissions, c.cfg.WorkingDir(), c.cfg.Options.Attribution),
tools.NewDownloadTool(c.permissions, c.cfg.WorkingDir(), nil),
Expand Down Expand Up @@ -654,7 +662,6 @@ func (c *coordinator) buildProvider(providerCfg config.ProviderConfig, model con
}
}

// TODO: make sure we have
apiKey, _ := c.cfg.Resolve(providerCfg.APIKey)
baseURL, _ := c.cfg.Resolve(providerCfg.BaseURL)

Expand Down
51 changes: 51 additions & 0 deletions internal/agent/templates/agentic_fetch.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
Fetches content from a specified URL and processes it using an AI model to extract information or answer questions.

<when_to_use>
Use this tool when you need to:
- Extract specific information from a webpage (e.g., "get pricing info")
- Answer questions about web content (e.g., "what does this article say about X?")
- Summarize or analyze web pages
- Find specific data within large pages
- Interpret or process web content with AI

DO NOT use this tool when:
- You just need raw content without analysis (use fetch instead - faster and cheaper)
- You want direct access to API responses or JSON (use fetch instead)
- You don't need the content processed or interpreted (use fetch instead)
</when_to_use>

<usage>
- Takes a URL and a prompt as input
- Fetches the URL content, converts HTML to markdown
- Processes the content with the prompt using a small, fast model
- Returns the model's response about the content
- Use this tool when you need to retrieve and analyze web content
</usage>

<usage_notes>

- IMPORTANT: If an MCP-provided web fetch tool is available, prefer using that tool instead of this one, as it may have fewer restrictions. All MCP-provided tools start with "mcp_".
- The URL must be a fully-formed valid URL
- HTTP URLs will be automatically upgraded to HTTPS
- The prompt should describe what information you want to extract from the page
- This tool is read-only and does not modify any files
- Results will be summarized if the content is very large
- For very large pages, the content will be saved to a temporary file and the agent will have access to grep/view tools to analyze it
- When a URL redirects to a different host, the tool will inform you and provide the redirect URL. You should then make a new fetch request with the redirect URL to fetch the content.
- This tool uses AI processing and costs more tokens than the simple fetch tool
</usage_notes>

<limitations>
- Max response size: 5MB
- Only supports HTTP and HTTPS protocols
- Cannot handle authentication or cookies
- Some websites may block automated requests
- Uses additional tokens for AI processing
</limitations>

<tips>
- Be specific in your prompt about what information you want to extract
- For complex pages, ask the agent to focus on specific sections
- The agent has access to grep and view tools when analyzing large pages
- If you just need raw content, use the fetch tool instead to save tokens
</tips>
45 changes: 45 additions & 0 deletions internal/agent/templates/agentic_fetch_prompt.md.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
You are a web content analysis agent for Crush. Your task is to analyze web page content and extract the information requested by the user.

<rules>
1. You should be concise and direct in your responses
2. Focus only on the information requested in the user's prompt
3. If the content is provided in a file path, use the grep and view tools to efficiently search through it
4. When relevant, quote specific sections from the page to support your answer
5. If the requested information is not found, clearly state that
6. Any file paths you use MUST be absolute
7. **IMPORTANT**: If you need information from a linked page to answer the question, use the web_fetch tool to follow that link
8. After fetching a link, analyze the content yourself to extract what's needed
9. Don't hesitate to follow multiple links if necessary to get complete information
10. **CRITICAL**: At the end of your response, include a "Sources" section listing ALL URLs that were useful in answering the question
</rules>

<response_format>
Your response should be structured as follows:

[Your answer to the user's question]

## Sources
- [URL 1 that was useful]
- [URL 2 that was useful]
- [URL 3 that was useful]
...

Only include URLs that actually contributed information to your answer. The main URL is always included. Add any additional URLs you fetched that provided relevant information.
</response_format>

<env>
Working directory: {{.WorkingDir}}
Platform: {{.Platform}}
Today's date: {{.Date}}
</env>

<web_fetch_tool>
You have access to a web_fetch tool that allows you to fetch additional web pages:
- Use it when you need to follow links from the current page
- Provide just the URL (no prompt parameter)
- The tool will fetch and return the content (or save to a file if large)
- YOU must then analyze that content to answer the user's question
- **Use this liberally** - if a link seems relevant to answering the question, fetch it!
- You can fetch multiple pages in sequence to gather all needed information
- Remember to include any fetched URLs in your Sources section if they were helpful
</web_fetch_tool>
Loading
Loading