From 1080c516261cd03176776f95d410a3c9623a8e7a Mon Sep 17 00:00:00 2001 From: umerkay Date: Mon, 4 May 2026 16:06:34 +0500 Subject: [PATCH 1/3] feat: olostep as alternative to tavily --- .env.example | 2 + CHANGELOG.md | 4 ++ README.md | 3 +- atlas | 28 +++++++- src/beever_atlas/agents/tools/__init__.py | 2 +- .../agents/tools/external_tools.py | 64 ++++++++++++++++++- src/beever_atlas/infra/config.py | 2 + tests/test_config.py | 2 + tests/test_setup_script.sh | 15 +++-- 9 files changed, 109 insertions(+), 13 deletions(-) diff --git a/.env.example b/.env.example index b8d51174..f28dc14c 100644 --- a/.env.example +++ b/.env.example @@ -97,6 +97,8 @@ OLLAMA_API_BASE=http://localhost:11434 # --- 2.4 External web search (Tavily) --------------------- TAVILY_API_KEY= +OLOSTEP_API_KEY= # Optional: Olostep web search (alternative to Tavily) +WEB_SEARCH_PROVIDER=tavily # Options: tavily, olostep # --- 2.5 Chat history DB (blank = reuse MONGODB_URI) ------ BEEVER_CHAT_HISTORY_DB= diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f6b869e..96dc2860 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Optional Olostep web search provider for QA external search, selectable via + `WEB_SEARCH_PROVIDER=olostep` and `OLOSTEP_API_KEY`. + ## [0.1.2] - 2026-04-30 ### Added diff --git a/README.md b/README.md index 005eeb8d..0d136f8c 100644 --- a/README.md +++ b/README.md @@ -161,6 +161,7 @@ Optional (skip unless you know you need them): | Key | What it enables | |---|---| | `TAVILY_API_KEY` | External web search when QA retrieval confidence is low — [tavily.com](https://tavily.com/) | +| `OLOSTEP_API_KEY` | Olostep web search (alternative to Tavily). Set `WEB_SEARCH_PROVIDER=olostep` in your `.env` — [olostep.com/dashboard](https://www.olostep.com/dashboard) | | Slack / Discord / Teams bot tokens | **Configured via the web UI after setup**, not `.env` — the bot stores platform credentials encrypted in MongoDB | > **Tip:** Keep the two required keys handy before you start. Option 1 prompts for them interactively; Options 2 and 3 need them pasted into `.env`. @@ -182,7 +183,7 @@ Optional (skip unless you know you need them): The `atlas` installer walks you through a guided 4-step checklist: 1. **Required LLM keys** — prompts for `GOOGLE_API_KEY` (Gemini) and `JINA_API_KEY` (embeddings); press Enter to skip either. -2. **Optional integrations** — Tavily web search, Ollama, MCP server for Claude Code / Cursor. +2. **Optional integrations** — Tavily or Olostep web search, Ollama, MCP server for Claude Code / Cursor. 3. **Graph backend** — Neo4j (default) or skip. 4. **Auth tokens** — keep dev defaults or rotate now. diff --git a/atlas b/atlas index 3f7d4600..6941f1c3 100755 --- a/atlas +++ b/atlas @@ -9,7 +9,7 @@ # What it does (in order): # Prerequisites → .env bootstrap → # Step 1/4 Required LLM keys (Google Gemini, Jina) -# Step 2/4 Optional integrations (Tavily, Ollama, MCP server) +# Step 2/4 Optional integrations (Tavily/Olostep, Ollama, MCP server) # Step 3/4 Graph backend (neo4j or none) # Step 4/4 Auth tokens (keep dev defaults or rotate) # Auto-gen secrets → docker compose up -d @@ -331,6 +331,30 @@ if [ "$NON_INTERACTIVE" != "true" ]; then # Tavily prompt_external_key "TAVILY_API_KEY" "Tavily (external web search in QA)" "https://tavily.com/" + # Olostep + prompt_external_key "OLOSTEP_API_KEY" "Olostep (external web search in QA)" "https://www.olostep.com/dashboard" + + # Web search provider (only if at least one key is set) + tavily_key=$(grep -E "^TAVILY_API_KEY=" .env | head -n 1 | cut -d'=' -f2-) + olostep_key=$(grep -E "^OLOSTEP_API_KEY=" .env | head -n 1 | cut -d'=' -f2-) + if [ -n "$tavily_key" ] || [ -n "$olostep_key" ]; then + current_provider=$(grep -E "^WEB_SEARCH_PROVIDER=" .env | head -n 1 | cut -d'=' -f2-) + ask "Web search provider (tavily/olostep)" + if [ -n "$current_provider" ]; then + hint "Current: ${current_provider} (Enter = keep)" + else + hint "Press Enter for default (tavily)" + fi + printf " ${C_DIM}WEB_SEARCH_PROVIDER${C_RESET} " + provider="" + read -r provider || true + if [ -z "$provider" ]; then + provider="${current_provider:-tavily}" + fi + replace_env_value "WEB_SEARCH_PROVIDER" "$provider" + ok "WEB_SEARCH_PROVIDER=${provider}" + fi + # Ollama (local LLM) if confirm "N" "Enable Ollama (run a local LLM instead of Gemini)?"; then replace_env_value "OLLAMA_ENABLED" "true" @@ -421,7 +445,7 @@ fi # "Finalizing secrets" so auto-gen logic sees the correct filled state. # --------------------------------------------------------------------- if [ "$NON_INTERACTIVE" = "true" ]; then - for _preseed_key in GOOGLE_API_KEY JINA_API_KEY TAVILY_API_KEY; do + for _preseed_key in GOOGLE_API_KEY JINA_API_KEY TAVILY_API_KEY OLOSTEP_API_KEY WEB_SEARCH_PROVIDER; do _preseed_val="${!_preseed_key:-}" 2>/dev/null || _preseed_val="" if [ -n "$_preseed_val" ]; then replace_env_value "$_preseed_key" "$_preseed_val" diff --git a/src/beever_atlas/agents/tools/__init__.py b/src/beever_atlas/agents/tools/__init__.py index df953cf8..25181fc0 100644 --- a/src/beever_atlas/agents/tools/__init__.py +++ b/src/beever_atlas/agents/tools/__init__.py @@ -98,7 +98,7 @@ { "name": "search_external_knowledge", "category": "external", - "description": "Search external web knowledge via the Tavily API.", + "description": "Search external web knowledge via the configured provider (Tavily or Olostep).", }, # Orchestration tools — available in deep mode only. Surfaced here so the # Tools panel can disable them per request via AskRequest.disabled_tools. diff --git a/src/beever_atlas/agents/tools/external_tools.py b/src/beever_atlas/agents/tools/external_tools.py index b50af7a3..90b6b60e 100644 --- a/src/beever_atlas/agents/tools/external_tools.py +++ b/src/beever_atlas/agents/tools/external_tools.py @@ -1,4 +1,4 @@ -"""External knowledge tool: Tavily web search.""" +"""External knowledge tool: web search.""" from __future__ import annotations @@ -14,10 +14,10 @@ @cite_tool_output(kind="web_result") async def search_external_knowledge(query: str, mode: str = "general") -> dict: - """Search external web knowledge via Tavily API. + """Search external web knowledge via Tavily or Olostep. Cost: ~$0.01. Target latency: ~1s. - Requires TAVILY_API_KEY environment variable. + Requires TAVILY_API_KEY or OLOSTEP_API_KEY environment variable. Args: query: Search query. @@ -33,6 +33,31 @@ async def search_external_knowledge(query: str, mode: str = "general") -> dict: from beever_atlas.infra.config import get_settings settings = get_settings() + provider = (settings.web_search_provider or "tavily").strip().lower() + if provider == "olostep": + api_key = settings.olostep_api_key + if not api_key: + return { + "error": "olostep_unavailable", + "message": "OLOSTEP_API_KEY is not configured. External search unavailable.", + "results": [], + "source": "external", + } + + results = await asyncio.to_thread( + search_with_olostep, + query, + api_key, + 5, + ) + + return { + "answer": "", + "results": results, + "source": "external_olostep", + "mode": mode, + } + api_key = settings.tavily_api_key if not api_key: return { @@ -91,3 +116,36 @@ async def search_external_knowledge(query: str, mode: str = "general") -> dict: "results": [], "source": "external", } + + +def search_with_olostep(query: str, api_key: str, max_results: int = 5) -> list[dict]: + """Search the web using Olostep /searches endpoint. + + Returns a list of result dicts with 'title', 'url', 'content', 'text', + and 'score' keys to match the shape expected by callers. + """ + import httpx + + response = httpx.post( + "https://api.olostep.com/v1/searches", + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + }, + json={"query": query}, + timeout=30, + ) + response.raise_for_status() + data = response.json() + links = data.get("result", {}).get("links", [])[:max_results] + + return [ + { + "title": link.get("title", ""), + "url": link.get("url", ""), + "content": link.get("description", "")[:500], + "text": link.get("description", "")[:500], + "score": 0.0, + } + for link in links + ] diff --git a/src/beever_atlas/infra/config.py b/src/beever_atlas/infra/config.py index 4e2d7bc5..fd0f94d1 100644 --- a/src/beever_atlas/infra/config.py +++ b/src/beever_atlas/infra/config.py @@ -97,6 +97,8 @@ class Settings(BaseSettings): # External services jina_api_key: str = Field(default="") tavily_api_key: str = Field(default="") + olostep_api_key: str = Field(default="") + web_search_provider: str = Field(default="tavily") # LLM model tiers (ADK pipeline) llm_fast_model: str = Field(default="gemini-2.5-flash") diff --git a/tests/test_config.py b/tests/test_config.py index a670968e..4f59c34f 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -34,6 +34,8 @@ def test_all_api_key_fields_exist(self): assert hasattr(settings, "google_api_key") assert hasattr(settings, "jina_api_key") assert hasattr(settings, "tavily_api_key") + assert hasattr(settings, "olostep_api_key") + assert hasattr(settings, "web_search_provider") # TestLiteLLMConfig removed — beever_atlas.infra.litellm_config replaced by beever_atlas.llm diff --git a/tests/test_setup_script.sh b/tests/test_setup_script.sh index ee376f75..94105fc0 100755 --- a/tests/test_setup_script.sh +++ b/tests/test_setup_script.sh @@ -270,17 +270,20 @@ mk_workspace "$WS" # 2. JINA_API_KEY (skip) # 3. Configure integrations? y # 4. TAVILY_API_KEY my-tavily-key -# 5. Ollama? y -# 6. MCP server? y -# 7. Graph backend [1 = default neo4j] -# 8. Rotate auth tokens? y -printf '\n\ny\nmy-tavily-key\ny\ny\n\ny\n' | ( +# 5. OLOSTEP_API_KEY (skip) +# 6. WEB_SEARCH_PROVIDER (default tavily) +# 7. Ollama? y +# 8. MCP server? y +# 9. Graph backend [1 = default neo4j] +# 10. Rotate auth tokens? y +printf '\n\ny\nmy-tavily-key\n\n\ny\ny\n\ny\n' | ( cd "$WS" PATH="${WS}/stubs:${MINBIN}" bash ./atlas ) > "$WS/stdout" 2> "$WS/stderr" status=$? assert "exited with status 0" "[ $status -eq 0 ]" assert "TAVILY_API_KEY was written" "grep -qE '^TAVILY_API_KEY=my-tavily-key$' '$WS/.env'" +assert "WEB_SEARCH_PROVIDER defaulted" "grep -qE '^WEB_SEARCH_PROVIDER=tavily$' '$WS/.env'" assert "OLLAMA_ENABLED=true" "grep -qE '^OLLAMA_ENABLED=true$' '$WS/.env'" assert "BEEVER_MCP_ENABLED=true" "grep -qE '^BEEVER_MCP_ENABLED=true$' '$WS/.env'" assert "BEEVER_MCP_API_KEYS auto-generated" "grep -qE '^BEEVER_MCP_API_KEYS=mcp-' '$WS/.env'" @@ -369,7 +372,7 @@ mk_workspace "$WS" ( cd "$WS" # Ensure neither key is in the environment - env -u GOOGLE_API_KEY -u JINA_API_KEY -u TAVILY_API_KEY \ + env -u GOOGLE_API_KEY -u JINA_API_KEY -u TAVILY_API_KEY -u OLOSTEP_API_KEY -u WEB_SEARCH_PROVIDER \ ATLAS_HEALTH_POLL_TIMEOUT=0 PATH="${WS}/stubs:${MINBIN}" bash ./atlas --non-interactive ) > "$WS/stdout" 2> "$WS/stderr" status=$? From 25a8b4cb4fa8e8aa20355d174b20484574ea40f1 Mon Sep 17 00:00:00 2001 From: umerkay Date: Thu, 7 May 2026 11:38:38 +0500 Subject: [PATCH 2/3] Fix requested changes --- .env.example | 6 ++++-- src/beever_atlas/agents/tools/external_tools.py | 8 ++++++-- src/beever_atlas/infra/config.py | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.env.example b/.env.example index f28dc14c..a530f5ab 100644 --- a/.env.example +++ b/.env.example @@ -97,8 +97,10 @@ OLLAMA_API_BASE=http://localhost:11434 # --- 2.4 External web search (Tavily) --------------------- TAVILY_API_KEY= -OLOSTEP_API_KEY= # Optional: Olostep web search (alternative to Tavily) -WEB_SEARCH_PROVIDER=tavily # Options: tavily, olostep +# Optional: Olostep web search (alternative to Tavily) +OLOSTEP_API_KEY= +# Options: tavily, olostep +WEB_SEARCH_PROVIDER=tavily # --- 2.5 Chat history DB (blank = reuse MONGODB_URI) ------ BEEVER_CHAT_HISTORY_DB= diff --git a/src/beever_atlas/agents/tools/external_tools.py b/src/beever_atlas/agents/tools/external_tools.py index 90b6b60e..84bf738a 100644 --- a/src/beever_atlas/agents/tools/external_tools.py +++ b/src/beever_atlas/agents/tools/external_tools.py @@ -33,7 +33,8 @@ async def search_external_knowledge(query: str, mode: str = "general") -> dict: from beever_atlas.infra.config import get_settings settings = get_settings() - provider = (settings.web_search_provider or "tavily").strip().lower() + provider = settings.web_search_provider + logger.info("web_search.provider=%s mode=%s", provider, mode) if provider == "olostep": api_key = settings.olostep_api_key if not api_key: @@ -137,7 +138,10 @@ def search_with_olostep(query: str, api_key: str, max_results: int = 5) -> list[ ) response.raise_for_status() data = response.json() - links = data.get("result", {}).get("links", [])[:max_results] + links = data.get("result", {}).get("links", []) + if not links: + logger.warning("olostep response missing links: %s", list(data.keys())) + links = links[:max_results] return [ { diff --git a/src/beever_atlas/infra/config.py b/src/beever_atlas/infra/config.py index fd0f94d1..5a4c1405 100644 --- a/src/beever_atlas/infra/config.py +++ b/src/beever_atlas/infra/config.py @@ -98,7 +98,7 @@ class Settings(BaseSettings): jina_api_key: str = Field(default="") tavily_api_key: str = Field(default="") olostep_api_key: str = Field(default="") - web_search_provider: str = Field(default="tavily") + web_search_provider: Literal["tavily", "olostep"] = Field(default="tavily") # LLM model tiers (ADK pipeline) llm_fast_model: str = Field(default="gemini-2.5-flash") From 78d623838d4503d5239be5c5eae38a4b36ddfe96 Mon Sep 17 00:00:00 2001 From: umerkay Date: Fri, 15 May 2026 14:57:49 +0500 Subject: [PATCH 3/3] olostep fixes --- atlas | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/atlas b/atlas index 13a52a81..a95073ec 100755 --- a/atlas +++ b/atlas @@ -11,7 +11,7 @@ # Step 1/5 Embedding model (pick provider, then its key) # Step 2/5 Agent LLM provider (pick provider; assign per-agent later in AI Setup) # Step 3/5 Graph backend (neo4j or none) -# Step 4/5 Optional integrations (Tavily, MCP server) +# Step 4/5 Optional integrations (Tavily/Olostep, MCP server) # Step 5/5 Auth tokens (keep dev defaults or rotate) # # For CI/Docker: pass BEEVER_LLM_API_KEY (single-provider shortcut) or @@ -790,17 +790,24 @@ fi # --------------------------------------------------------------------- if [ "$NON_INTERACTIVE" != "true" ]; then step "4/5" "Optional integrations" - hint "External web search (Tavily) and MCP for Claude Code / Cursor." + hint "External web search (Tavily or Olostep) and MCP for Claude Code / Cursor." opt_parts=() if confirm "N" "Configure optional integrations now?"; then # Tavily prompt_external_key "TAVILY_API_KEY" "Tavily (external web search in QA)" "https://tavily.com/" + # Olostep + prompt_external_key "OLOSTEP_API_KEY" "Olostep (external web search in QA)" "https://www.olostep.com/dashboard" if grep -qE '^TAVILY_API_KEY=.+' .env 2>/dev/null; then opt_parts+=("Tavily ✓") else opt_parts+=("Tavily skipped") fi + if grep -qE '^OLOSTEP_API_KEY=.+' .env 2>/dev/null; then + opt_parts+=("Olostep ✓") + else + opt_parts+=("Olostep skipped") + fi # Ollama is now a first-class agent-provider choice in Step 2 — no # separate prompt here. (If you picked Ollama in Step 2 it's already @@ -827,7 +834,7 @@ if [ "$NON_INTERACTIVE" != "true" ]; then _atlas_summary+=("${C_GREEN}✓${C_RESET} Optional ${opt_parts[*]}") else ok "skipped optional integrations" - _atlas_summary+=("${C_DIM}—${C_RESET} Optional skipped (Tavily / MCP)") + _atlas_summary+=("${C_DIM}—${C_RESET} Optional skipped (Tavily / Olostep / MCP)") fi fi @@ -874,7 +881,7 @@ fi # "Finalizing secrets" so auto-gen logic sees the correct filled state. # --------------------------------------------------------------------- if [ "$NON_INTERACTIVE" = "true" ]; then - for _preseed_key in GOOGLE_API_KEY JINA_API_KEY TAVILY_API_KEY \ + for _preseed_key in GOOGLE_API_KEY JINA_API_KEY TAVILY_API_KEY OLOSTEP_API_KEY WEB_SEARCH_PROVIDER \ EMBEDDING_PROVIDER EMBEDDING_MODEL EMBEDDING_DIMENSIONS \ EMBEDDING_API_KEY OPENAI_API_KEY COHERE_API_KEY \ VOYAGE_API_KEY MISTRAL_API_KEY \