feat: cleanup structured output code

robusta-dev · Jan 20, 2025 · c3de44f · c3de44f
1 parent 9acaa1f
commit c3de44f
Show file tree

Hide file tree

Showing 8 changed files with 60 additions and 176 deletions.
diff --git a/holmes/core/investigation.py b/holmes/core/investigation.py
@@ -8,7 +8,7 @@
 from holmes.utils.robusta import load_robusta_api_key
 
 
-def investigate_issues(investigate_request: InvestigateRequest, dal: SupabaseDal, config: Config, console:Console):
+def investigate_issues(investigate_request: InvestigateRequest, dal: SupabaseDal, config: Config, console:Console) -> InvestigationResult:
     load_robusta_api_key(dal=dal, config=config)
     context = dal.get_issue_data(
         investigate_request.context.get("robusta_issue_id")
@@ -42,9 +42,9 @@ def investigate_issues(investigate_request: InvestigateRequest, dal: SupabaseDal
         instructions=resource_instructions,
         global_instructions=global_instructions
     )
-
     return InvestigationResult(
         analysis=investigation.result,
+        sections=investigation.sections,
         tool_calls=investigation.tool_calls or [],
         instructions=investigation.instructions,
     )
diff --git a/holmes/core/investigation_output_format.py b/holmes/core/investigation_output_format.py
@@ -0,0 +1,43 @@
+from typing import Any
+
+schema = {
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "type": "object",
+  "required": [
+    "Alert Explanation",
+    "Investigation",
+    "Conclusions and Possible Root causes",
+    "Next Steps"
+  ],
+  "properties": {
+    "Alert Explanation": {
+      "type": ["string", "null"],
+      "description": "1-2 sentences explaining the alert itself - note don't say \"The alert indicates a warning event related to a Kubernetes pod doing blah\" rather just say \"The pod XYZ did blah\" because that is what the user actually cares about"
+    },
+    "Investigation": {
+      "type": ["string", "null"],
+      "description": "what you checked and found"
+    },
+    "Conclusions and Possible Root causes": {
+      "type": ["string", "null"],
+      "description": "what conclusions can you reach based on the data you found? what are possible root causes (if you have enough conviction to say) or what uncertainty remains"
+    },
+    "Next Steps": {
+      "type": ["string", "null"],
+      "description": "what you would do next to troubleshoot this issue, any commands that could be run to fix it, or other ways to solve it (prefer giving precise bash commands when possible)"
+    }
+  },
+  "additionalProperties": False
+}
+
+ExpectedInvestigationOutputFormat = { "type": "json_schema", "json_schema": { "name": "InvestigationResult", "schema": schema, "strict": True} }
+
+def combine_sections(sections: Any) -> str:
+    if isinstance(sections, dict):
+        content = ''
+        for section_title, section_content in sections.items():
+            if section_content:
+                # content = content + f'\n# {" ".join(section_title.split("_")).title()}\n{section_content}'
+                content = content + f'\n# {section_title}\n{section_content}\n'
+        return content
+    return f"{sections}"
diff --git a/holmes/core/models.py b/holmes/core/models.py
@@ -6,6 +6,7 @@
 
 class InvestigationResult(BaseModel):
     analysis: Optional[str] = None
+    sections: Optional[Dict[str, str]] = None
     tool_calls: List[ToolCallResult] = []
     instructions: List[str] = []
 

diff --git a/holmes/core/structured_output.py b/holmes/core/structured_output.py
@@ -23,8 +23,6 @@ class StructuredResponse(LLMResult):
 
 def generate_structured_output(llm_result:LLMResult) -> StructuredResponse:
 
-    print("")
-
     return StructuredResponse(
         **llm_result.model_dump(),
         sections=[],

diff --git a/holmes/core/tool_calling_llm.py b/holmes/core/tool_calling_llm.py
@@ -8,6 +8,7 @@
 from typing import List, Optional
 from holmes.core.llm import LLM
 from holmes.plugins.prompts import load_and_render_prompt
+from holmes.core.investigation_output_format import ExpectedInvestigationOutputFormat, combine_sections
 from openai import BadRequestError
 from openai._types import NOT_GIVEN
 from openai.types.chat.chat_completion_message_tool_call import (
@@ -61,44 +62,6 @@ class ResourceInstructions(BaseModel):
     instructions: List[str] = []
     documents: List[ResourceInstructionDocument] = []
 
-class ExpectedOutputFormat(BaseModel):
-    alert_explanation: Union[str, None]
-    investigation: Union[str, None]
-    conclusions_and_possible_root_causes: Union[str, None]
-    next_steps: Union[str, None]
-
-schema = {
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "required": [
-    "Alert Explanation",
-    "Investigation",
-    "Conclusions and Possible Root causes",
-    "Next Steps"
-  ],
-  "properties": {
-    "Alert Explanation": {
-      "type": ["string", "null"],
-      "description": "1-2 sentences explaining the alert itself - note don't say \"The alert indicates a warning event related to a Kubernetes pod doing blah\" rather just say \"The pod XYZ did blah\" because that is what the user actually cares about"
-    },
-    "Investigation": {
-      "type": ["string", "null"],
-      "description": "what you checked and found"
-    },
-    "Conclusions and Possible Root causes": {
-      "type": ["string", "null"],
-      "description": "what conclusions can you reach based on the data you found? what are possible root causes (if you have enough conviction to say) or what uncertainty remains"
-    },
-    "Next Steps": {
-      "type": ["string", "null"],
-      "description": "what you would do next to troubleshoot this issue, any commands that could be run to fix it, or other ways to solve it (prefer giving precise bash commands when possible)"
-    }
-  },
-  "additionalProperties": False
-}
-
-response_format = { "type": "json_schema", "json_schema": schema , "strict": False }
-
 class ToolCallingLLM:
 
     llm: LLM
@@ -156,7 +119,7 @@ def call(
                 logging.warning("Token limit exceeded. Truncating tool responses.")
                 messages = self.truncate_messages_to_fit_context(
                     messages, max_context_size, maximum_output_token
-                )
+                )combine_sections
 
             logging.debug(f"sending messages={messages}\n\ntools={tools}")
             try:
@@ -199,7 +162,7 @@ def call(
                     pass
             if not isinstance(text_response, str):
                 sections = text_response
-                text_response = stringify_sections(sections)
+                text_response = combine_sections(sections)
 
             if not tools_to_call:
                 # For chatty models post process and summarize the result
@@ -275,7 +238,7 @@ def _invoke_tool(
 
         tool_response = tool.invoke(tool_params)
 
-        return ToolCallResult(
+        return ToolCallResult(combine_sections
             tool_call_id=tool_call_id,
             tool_name=tool_name,
             description=tool.get_parameterized_one_liner(tool_params),
@@ -426,93 +389,6 @@ def investigate(
         )
         logging.debug("Rendered user prompt:\n%s", textwrap.indent(user_prompt, "    "))
 
-        res = self.prompt_call(system_prompt, user_prompt, post_processing_prompt, response_format=ExpectedOutputFormat)
-        print(res)
-        print("******")
+        res = self.prompt_call(system_prompt, user_prompt, post_processing_prompt, response_format=ExpectedInvestigationOutputFormat)
         res.instructions = runbooks
-        generate_structured_output(res, llm=self.llm)
         return res
-
-## ## ## ## ## ## START CUSTOM STRUCTURED RESPONSE
-
-class StructuredSection(BaseModel):
-    title: str
-    content: Union[str, None]
-    contains_meaningful_information: bool
-
-class StructuredResponse(BaseModel):
-    sections: List[StructuredSection]
-
-# class StructuredLLMResult(LLMResult):
-#     sections: List[StructuredSection]
-
-
-EXPECTED_SECTIONS = [
-    "investigation steps",
-    "conclusions and possible root causes",
-    "related logs",
-    "alert explanation",
-    "next steps"
-]
-
-PROMPT = f"""
-Your job as a LLM is to take the unstructured output from another LLM
-and structure it into sections. Keep the original wording and do not
-add any information that is not already there.
-
-Return a JSON with the section title, its content and . Each section content should
-be markdown formatted text. If you consider a section as empty, set
-its corresponding value to null.
-
-For example:
-
-[{{
-  "title": "investigation steps",
-  "text": "The pod `kafka-consumer` is in a `Failed` state with the container terminated for an unknown reason and an exit code of 255.",
-  "contains_meaningful_information": true
-}}, {{
-  "title": "conclusions and possible root causes",
-  "text": "...",
-  "contains_meaningful_information": true
-}}, {{
-  "title": "next steps",
-  "text": null,
-  "contains_meaningful_information": false
-}}, ...]
-
-The section titles are [{", ".join(EXPECTED_SECTIONS)}]
-"""
-
-def stringify_sections(sections: Any) -> str:
-    if isinstance(sections, dict):
-        content = ''
-        for section_title, section_content in sections.items():
-            content = content + f'\n# {" ".join(section_title.split("_")).title()}\n{section_content}'
-        return content
-    return f"{sections}"
-
-def generate_structured_output(llm_result:LLMResult, llm:LLM) -> LLMResult:
-    if not llm_result.result:
-        return LLMResult(
-            **llm_result.model_dump()
-        )
-
-    messages = [
-        {"role": "system", "content": PROMPT},
-        {"role": "user", "content": llm_result.result},
-    ]
-
-    r = llm.completion(
-        model_override="gpt-4o-mini",
-        messages=messages,
-        temperature=0.00000001,
-        response_format=StructuredResponse,
-        drop_params=True,
-    )
-    # r_json = r.to_json()
-    # result = StructuredLLMResult.model_validate_json(r.choices[0].message.content)
-    print(r)
-    llm_result.sections = {}
-    return llm_result
-
-## ## ## ## ## ## END CUSTOM STRUCTURED RESPONSE
diff --git a/holmes/plugins/prompts/generic_investigation copy.jinja2 b/holmes/plugins/prompts/generic_investigation copy.jinja2
diff --git a/holmes/plugins/prompts/generic_investigation.jinja2 b/holmes/plugins/prompts/generic_investigation.jinja2
@@ -32,7 +32,7 @@ Style Guide:
 * But only quote relevant numbers or metrics that are available. Do not guess.
 * Remove unnecessary words
 
-Give your answer in a JSON format with the following sections. The content of each section should be formatted with markdown:
+Give your answer in a JSON format with the following sections. You can skip a section if it's not relevant to the investigation. The content of each section should be formatted with markdown:
 
 - Alert Explanation: <1-2 sentences explaining the alert itself - note don't say "The alert indicates a warning event related to a Kubernetes pod doing blah" rather just say "The pod XYZ did blah" because that is what the user actually cares about>
 - Investigation: <what you checked and found>

diff --git a/tests/llm/test_investigate.py b/tests/llm/test_investigate.py
@@ -33,7 +33,7 @@ def __init__(self, test_case:InvestigateTestCase):
         self._test_case = test_case
 
     def create_tool_executor(
-        self, console: Console, allowed_toolsets: ToolsetPattern, dal:Optional[SupabaseDal]
+        self, console: Console, dal:Optional[SupabaseDal]
     ) -> ToolExecutor:
 
         mock = MockToolsets(generate_mocks=self._test_case.generate_mocks, test_case_folder=self._test_case.folder)
@@ -127,6 +127,13 @@ def test_investigate(experiment_name, test_case):
     print(f"** OUTPUT **\n{output}")
     print(f"** SCORES **\n{scores}")
 
+    assert result.sections
+    assert len(result.sections) >= 4
+    assert result.sections.get("Alert Explanation")
+    assert result.sections.get("Investigation")
+    assert result.sections.get("Conclusions and Possible Root causes")
+    assert result.sections.get("Next Steps")
+
     if scores.get("faithfulness"):
         assert scores.get("faithfulness") >= test_case.evaluation.faithfulness