Skip to content

Commit

Permalink
Add llm evaluations (#204)
Browse files Browse the repository at this point in the history
This PR has mostly 3 changes:
- Improvement of the prompt to prevent the LLM mentioning separately
`kubectl logs` and `kubectl logs --previous`
- Introduction of a test suite for investigations, including the ability
to mock the DB access
- Integration with brantrust,dev although that code is currently
commented out as I need to talk to them as I hit the freemium limits.


It's a big PR and most of it is the refactor of the existing mock
mechanism to work well with the DAL.
  • Loading branch information
nherment authored Nov 25, 2024
1 parent 3383b2f commit 57951e1
Show file tree
Hide file tree
Showing 153 changed files with 3,622 additions and 1,901 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,5 @@ cython_debug/
playwright.png
.deepeval*
pyrightconfig.json

*.AUTOGENERATED
7 changes: 4 additions & 3 deletions holmes/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pydash.arrays import concat
from rich.console import Console


from holmes.core.runbooks import RunbookManager
from holmes.core.supabase_dal import SupabaseDal
from holmes.core.tool_calling_llm import (IssueInvestigator, ToolCallingLLM,
Expand Down Expand Up @@ -102,7 +103,7 @@ def load_from_env(cls):
kwargs[field_name] = val
return cls(**kwargs)

def _create_tool_executor(
def create_tool_executor(
self, console: Console, allowed_toolsets: ToolsetPattern, dal:Optional[SupabaseDal]
) -> ToolExecutor:
all_toolsets = load_builtin_toolsets(dal=dal)
Expand Down Expand Up @@ -146,7 +147,7 @@ def _create_tool_executor(
def create_toolcalling_llm(
self, console: Console, allowed_toolsets: ToolsetPattern, dal:Optional[SupabaseDal] = None
) -> ToolCallingLLM:
tool_executor = self._create_tool_executor(console, allowed_toolsets, dal)
tool_executor = self.create_tool_executor(console, allowed_toolsets, dal)
return ToolCallingLLM(
tool_executor,
self.max_steps,
Expand All @@ -164,7 +165,7 @@ def create_issue_investigator(
all_runbooks.extend(load_runbooks_from_file(runbook_path))

runbook_manager = RunbookManager(all_runbooks)
tool_executor = self._create_tool_executor(console, allowed_toolsets, dal)
tool_executor = self.create_tool_executor(console, allowed_toolsets, dal)
return IssueInvestigator(
tool_executor,
runbook_manager,
Expand Down
47 changes: 47 additions & 0 deletions holmes/core/investigation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@

from rich.console import Console
from holmes.common.env_vars import ALLOWED_TOOLSETS, HOLMES_POST_PROCESSING_PROMPT
from holmes.config import Config
from holmes.core.issue import Issue
from holmes.core.models import InvestigateRequest, InvestigationResult
from holmes.core.supabase_dal import SupabaseDal
from holmes.utils.robusta import load_robusta_api_key


def investigate_issues(investigate_request: InvestigateRequest, dal: SupabaseDal, config: Config, console:Console):
load_robusta_api_key(dal=dal, config=config)
context = dal.get_issue_data(
investigate_request.context.get("robusta_issue_id")
)

resource_instructions = dal.get_resource_instructions(
"alert", investigate_request.context.get("issue_type")
)
raw_data = investigate_request.model_dump()
if context:
raw_data["extra_context"] = context

ai = config.create_issue_investigator(
console, allowed_toolsets=ALLOWED_TOOLSETS, dal=dal
)
issue = Issue(
id=context["id"] if context else "",
name=investigate_request.title,
source_type=investigate_request.source,
source_instance_id=investigate_request.source_instance_id,
raw=raw_data,
)

investigation = ai.investigate(
issue,
prompt=investigate_request.prompt_template,
console=console,
post_processing_prompt=HOLMES_POST_PROCESSING_PROMPT,
instructions=resource_instructions,
)

return InvestigationResult(
analysis=investigation.result,
tool_calls=investigation.tool_calls or [],
instructions=investigation.instructions,
)
12 changes: 6 additions & 6 deletions holmes/core/supabase_dal.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import os
import threading
from typing import Dict, Optional, List
from typing import Dict, Optional, List, Tuple
from uuid import uuid4

import yaml
Expand Down Expand Up @@ -31,7 +31,6 @@
class RobustaConfig(BaseModel):
sinks_config: List[Dict[str, Dict]]


class RobustaToken(BaseModel):
store_url: str
api_key: str
Expand Down Expand Up @@ -127,10 +126,11 @@ def sign_in(self) -> str:
self.client.postgrest.auth(res.session.access_token)
return res.user.id

def get_issue_data(self, issue_id: str) -> Optional[Dict]:
def get_issue_data(self, issue_id: Optional[str]) -> Optional[Dict]:
# TODO this could be done in a single atomic SELECT, but there is no
# foreign key relation between Issues and Evidence.

if not issue_id:
return None
if not self.enabled: # store not initialized
return None
issue_data = None
Expand All @@ -145,7 +145,7 @@ def get_issue_data(self, issue_id: str) -> Optional[Dict]:
if len(issue_response.data):
issue_data = issue_response.data[0]

except: # e.g. invalid id format
except Exception: # e.g. invalid id format
logging.exception("Supabase error while retrieving issue data")
return None
if not issue_data:
Expand Down Expand Up @@ -205,7 +205,7 @@ def create_session_token(self) -> str:
).execute()
return token

def get_ai_credentials(self) -> (str, str):
def get_ai_credentials(self) -> Tuple[str, str]:
with self.lock:
session_token = self.token_cache.get("session_token")
if not session_token:
Expand Down
2 changes: 1 addition & 1 deletion holmes/core/tool_calling_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import json
import logging
import textwrap
import os
from typing import List, Optional, Dict
from holmes.utils.tags import format_tags_in_string, parse_messages_tags
from holmes.plugins.prompts import load_and_render_prompt
Expand Down Expand Up @@ -333,6 +332,7 @@ def investigate(
post_processing_prompt: Optional[str] = None,
) -> LLMResult:
runbooks = self.runbook_manager.get_instructions_for_issue(issue)

if instructions != None and instructions.instructions:
runbooks.extend(instructions.instructions)

Expand Down
5 changes: 3 additions & 2 deletions holmes/plugins/prompts/_general_instructions.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ If investigating Kubernetes problems:
* run as many kubectl commands as you need to gather more information, then respond.
* if possible, do so repeatedly on different Kubernetes objects.
* for example, for deployments first run kubectl on the deployment then a replicaset inside it, then a pod inside that.
* when investigating a pod that crashed or application errors, always run kubectl_describe and fetch logs with both kubectl_previous_logs and kubectl_logs so that you see current logs and any logs from before a crash.
* use both kubectl_previous_logs and kubectl_logs when reading logs. Treat the output of both as a single unified logs stream
* when investigating a pod that crashed or application errors, always run kubectl_describe and fetch the logs
* do not give an answer like "The pod is pending" as that doesn't state why the pod is pending and how to fix it.
* do not give an answer like "Pod's node affinity/selector doesn't match any available nodes" because that doesn't include data on WHICH label doesn't match
* if investigating an issue on many pods, there is no need to check more than 3 individual pods in the same deployment. pick up to a representative 3 from each deployment if relevant
* if the user says something isn't working, ALWAYS:
** use kubectl_describe on the owner workload + individual pods and look for any transient issues they might have been referring to
** check the application aspects with kubectl_logs + kubectl_previous_logs and other relevant tools
** check the application aspects through the logs (kubectl_logs and kubectl_previous_logs) and other relevant tools
** look for misconfigured ingresses/services etc

Special cases and how to reply:
Expand Down
Loading

0 comments on commit 57951e1

Please sign in to comment.