Use gdb instead of lldb (#1077)

maoyixie · DonggeLiu · web-flow · commit 83bd92992656 · 2025-06-10T15:29:01.000+10:00
This PR aims to fix wrong artifact path in lldb, and uses gdb instead of lldb. example issue: https://llm-exp.oss-fuzz.com/Result-reports/ofg-pr/2025-06-02-814-my4-comparison/sample/output-xs-fxloadmodulesrejected/06.html#:~:text=factor.%0A%3C/reason%3E%0A%3Cbash%3E-,ls%20%2Dl%20/artifact/crash%2Dda39a3ee5e6b4b0d3255bfef95601890afd80709,-%3C/bash%3E --------- Co-authored-by: Dongge Liu <donggeliu@google.com>
diff --git a/agent/crash_analyzer.py b/agent/crash_analyzer.py
@@ -30,7 +30,7 @@
 from results import AnalysisResult, CrashResult, Result, RunResult
 from tool.base_tool import BaseTool
 from tool.container_tool import ProjectContainerTool
-from tool.lldb_tool import LLDBTool
+from tool.gdb_tool import GDBTool
 
 MAX_ROUND = 100
 
@@ -66,31 +66,39 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
                  trial=self.trial)
     return prompt_builder.CrashAnalyzerTemplateBuilder(self.llm).build([])
 
-  def _format_lldb_execution_result(
+  def _format_gdb_execution_result(
       self,
-      lldb_command: str,
+      gdb_command: str,
       process: sp.CompletedProcess,
       previous_prompt: Optional[Prompt] = None) -> str:
-    """Formats a prompt based on lldb execution result."""
+    """Formats a prompt based on gdb execution result."""
     if previous_prompt:
       previous_prompt_text = previous_prompt.get()
     else:
       previous_prompt_text = ''
-    stdout = self.llm.truncate_prompt(process.stdout,
+
+    raw_lines = process.stdout.strip().splitlines()
+    if raw_lines and raw_lines[-1].strip().startswith("(gdb)"):
+      raw_lines.pop()
+    if raw_lines:
+      raw_lines[0] = f'(gdb) {raw_lines[0].strip()}'
+    processed_stdout = '\n'.join(raw_lines)
+
+    stdout = self.llm.truncate_prompt(processed_stdout,
                                       previous_prompt_text).strip()
     stderr = self.llm.truncate_prompt(process.stderr,
                                       stdout + previous_prompt_text).strip()
-    return (f'<lldb command>\n{lldb_command.strip()}\n</lldb command>\n'
-            f'<lldb output>\n{stdout}\n</lldb output>\n'
+    return (f'<gdb command>\n{gdb_command.strip()}\n</gdb command>\n'
+            f'<gdb output>\n{stdout}\n</gdb output>\n'
             f'<stderr>\n{stderr}\n</stderr>\n')
 
-  def _container_handle_lldb_command(self, response: str, tool: LLDBTool,
-                                     prompt: Prompt) -> Prompt:
-    """Handles the command from LLM with lldb tool."""
+  def _container_handle_gdb_command(self, response: str, tool: GDBTool,
+                                    prompt: Prompt) -> Prompt:
+    """Handles the command from LLM with gdb tool."""
     prompt_text = ''
-    for command in self._parse_tags(response, 'lldb'):
+    for command in self._parse_tags(response, 'gdb'):
       process = tool.execute_in_screen(command)
-      prompt_text += self._format_lldb_execution_result(
+      prompt_text += self._format_gdb_execution_result(
           command, process, previous_prompt=prompt) + '\n'
       prompt.append(prompt_text)
     return prompt
@@ -103,9 +111,9 @@ def _container_handle_conclusion(self, cur_round: int, response: str,
                 trial=self.trial)
 
     conclusion = self._parse_tag(response, 'conclusion')
-    if conclusion == 'Crash is caused by bug in fuzz driver.':
+    if conclusion == 'False':
       crash_result.true_bug = False
-    elif conclusion == 'Crash is caused by bug in project.':
+    elif conclusion == 'True':
       crash_result.true_bug = True
     else:
       logger.error('***** Failed to match conclusion in %02d rounds *****',
@@ -127,11 +135,10 @@ def _container_tool_reaction(self, cur_round: int, response: str,
                                                crash_result)
     prompt = prompt_builder.CrashAnalyzerTemplateBuilder(self.llm,
                                                          None).build([])
-    if self._parse_tag(response, 'lldb'):
-      return self._container_handle_lldb_command(response, self.analyze_tool,
-                                                 prompt)
+    if self._parse_tag(response, 'gdb'):
+      return self._container_handle_gdb_command(response, self.gdb_tool, prompt)
     if self._parse_tag(response, 'bash'):
-      return self._container_handle_bash_command(response, self.check_tool,
+      return self._container_handle_bash_command(response, self.bash_tool,
                                                  prompt)
     return None
 
@@ -152,7 +159,7 @@ def execute(self, result_history: list[Result]) -> AnalysisResult:
     generated_target_name = os.path.basename(benchmark.target_path)
     sample_id = os.path.splitext(generated_target_name)[0]
     generated_oss_fuzz_project = (
-        f'{benchmark.id}-{sample_id}-lldb-{self.trial:02d}')
+        f'{benchmark.id}-{sample_id}-gdb-{self.trial:02d}')
     generated_oss_fuzz_project = oss_fuzz_checkout.rectify_docker_tag(
         generated_oss_fuzz_project)
 
@@ -169,25 +176,35 @@ def execute(self, result_history: list[Result]) -> AnalysisResult:
     else:
       build_script_path = ''
 
-    evaluator_lib.Evaluator.create_ossfuzz_project_with_lldb(
+    evaluator_lib.Evaluator.create_ossfuzz_project_with_gdb(
         benchmark, generated_oss_fuzz_project, fuzz_target_path, last_result,
         build_script_path, last_result.artifact_path)
 
-    self.analyze_tool = LLDBTool(benchmark,
-                                 result=last_result,
-                                 name='lldb',
-                                 project_name=generated_oss_fuzz_project)
-    self.analyze_tool.execute('compile > /dev/null')
-    # Launch LLDB and load fuzz target binary
-    self.analyze_tool.execute(f'screen -dmS lldb_session -L '
-                              f'-Logfile /tmp/lldb_log.txt '
-                              f'lldb /out/{last_result.benchmark.target_name}')
-    self.check_tool = ProjectContainerTool(
+    self.gdb_tool = GDBTool(benchmark,
+                            result=last_result,
+                            name='gdb',
+                            project_name=generated_oss_fuzz_project)
+    #TODO(dongge): Use a dedicated debugger image, which has the binary and
+    #source code.
+    self.gdb_tool.execute(
+        'apt update && '
+        'apt install -y software-properties-common && '
+        'add-apt-repository -y ppa:ubuntu-toolchain-r/test && '
+        'apt update && '
+        'apt install -y gdb screen')
+    self.gdb_tool.execute('export CFLAGS="$CFLAGS -g -O0"')
+    self.gdb_tool.execute('export CXXFLAGS="$CXXFLAGS -g -O0"')
+    self.gdb_tool.execute('compile > /dev/null')
+    # Launch GDB and load fuzz target binary
+    self.gdb_tool.execute(f'screen -dmS gdb_session -L '
+                          f'-Logfile /tmp/gdb_log.txt '
+                          f'gdb /out/{last_result.benchmark.target_name}')
+    self.bash_tool = ProjectContainerTool(
         benchmark, name='check', project_name=generated_oss_fuzz_project)
-    self.check_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null')
+    self.bash_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null')
     prompt = self._initial_prompt(result_history)
-    prompt.add_problem(self.analyze_tool.tutorial())
-    prompt.add_problem(self.check_tool.tutorial())
+    prompt.add_problem(self.gdb_tool.tutorial())
+    prompt.add_problem(self.bash_tool.tutorial())
     crash_result = CrashResult(benchmark=benchmark,
                                trial=last_result.trial,
                                work_dirs=last_result.work_dirs,
@@ -208,9 +225,9 @@ def execute(self, result_history: list[Result]) -> AnalysisResult:
     finally:
       # Cleanup: stop the container
       logger.debug('Stopping the crash analyze container %s',
-                   self.analyze_tool.container_id,
+                   self.gdb_tool.container_id,
                    trial=self.trial)
-      self.analyze_tool.terminate()
+      self.gdb_tool.terminate()
 
     analysis_result = AnalysisResult(
         author=self,
diff --git a/experiment/evaluator.py b/experiment/evaluator.py
@@ -303,12 +303,12 @@ def create_ossfuzz_project(benchmark: Benchmark,
     return name
 
   @staticmethod
-  def create_ossfuzz_project_with_lldb(benchmark: Benchmark,
-                                       name: str,
-                                       target_file: str,
-                                       run_result: results.RunResult,
-                                       build_script_path: str = '',
-                                       artifact_path: str = '') -> str:
+  def create_ossfuzz_project_with_gdb(benchmark: Benchmark,
+                                      name: str,
+                                      target_file: str,
+                                      run_result: results.RunResult,
+                                      build_script_path: str = '',
+                                      artifact_path: str = '') -> str:
     """Creates an OSS-Fuzz project with the generated target and new dockerfile.
     The new project will replicate an existing project |name| but replace its
     fuzz target and build script with the new |target_file| and
@@ -322,15 +322,15 @@ def create_ossfuzz_project_with_lldb(benchmark: Benchmark,
         artifact_path,
         os.path.join(generated_project_path, os.path.basename(artifact_path)))
     # Add additional statement in dockerfile to copy testcase,
-    # enable -g, install lldb and screen
+    # enable -g, install gdb and screen
     with open(os.path.join(generated_project_path, 'Dockerfile'), 'a') as f:
       f.write(
           '\nRUN mkdir -p /artifact\n'
           f'\nCOPY {os.path.basename(run_result.artifact_path)} /artifact/\n'
           '\nENV CFLAGS="${CFLAGS} -g -O0"\n'
           '\nENV CXXFLAGS="${CXXFLAGS} -g -O0"\n'
           '\nRUN apt-get update\n'
-          '\nRUN apt-get install -y lldb\n'
+          '\nRUN apt-get install -y gdb\n'
           '\nRUN apt-get install -y screen\n')
 
     return name
diff --git a/llm_toolkit/output_parser.py b/llm_toolkit/output_parser.py
@@ -103,9 +103,9 @@ def parse_triage(triage_path: str) -> tuple[str, str]:
   solution = triage.split('</solution>')[0]
   lines = solution.splitlines()
   for line in lines:
-    if "Crash is caused by bug in fuzz driver" in line:
+    if "False" in line:
       return (TriageResult.DRIVER, '\n'.join(lines))
-    if "Crash is caused by bug in project" in line:
+    if "True" in line:
       return (TriageResult.PROJECT, '\n'.join(lines))
 
   return (TriageResult.NOT_APPLICABLE, '\n'.join(lines))
diff --git a/prompts/agent/crash_analyzer-priming.txt b/prompts/agent/crash_analyzer-priming.txt
@@ -1,5 +1,5 @@
-Given the following crash report, fuzz driver code and relevant project function code, analyze the cause of the crash using LLDB tool step by step.
-First, make a conclusion, only answer “Crash is caused by bug in fuzz driver” or “Crash is caused by bug in project”. Second, offer succinct and to-the-point analyses and suggestions.
+Given the following crash report, fuzz driver code and relevant project function code, analyze the cause of the crash using GDB tool step by step.
+First, make a conclusion, ONLY ANSWER "False" if the crash is caused by bug in fuzz driver OR ONLY ANSWER "True" if the crash is caused by bug in project. Second, offer succinct and to-the-point analyses and suggestions.
 
 Below is crash report:
 <log>
@@ -16,4 +16,4 @@ Below is relevant project function code:
 {PROJECT_FUNCTION_CODE}
 </code>
 
-To help analyze the root cause behind the runtime crash, you can leverage LLDB tool and BASH tool to obtain information.
+To help analyze the root cause behind the runtime crash, you can leverage GDB tool and BASH tool to obtain information.
diff --git a/prompts/template_xml/triager_priming.txt b/prompts/template_xml/triager_priming.txt
@@ -1,3 +1,3 @@
 Given the following crash report, fuzz driver code and relevant project function code, analyze the cause of the crash.
 
-First, make a conclusion, only answer “Crash is caused by bug in fuzz driver” or “Crash is caused by bug in project”. Second, offer succinct and to-the-point analyses and suggestions.
+First, make a conclusion, ONLY ANSWER "False" if the crash is caused by bug in fuzz driver OR ONLY ANSWER "True" if the crash is caused by bug in project. Second, offer succinct and to-the-point analyses and suggestions.
diff --git a/prompts/tool/gdb_tool.txt b/prompts/tool/gdb_tool.txt
@@ -0,0 +1,42 @@
+<tool>
+**GDB tool Guide**
+You can leverage GDB by iteractively sending me a GDB command, and I will provide you with the output of the command. The path of fuzz driver binary is '/out/{TARGET_NAME}'. The testcase that triggers runtime crash is stored at '{AFTIFACT_PATH}'.
+
+<interaction protocols>
+1. I have executed 'gdb /out/{TARGET_NAME}'. You are now in GDB session, NOT in shell session. DO NOT run 'gdb /out/{TARGET_NAME}' again! DO NOT run shell commands!
+2. Strictly ONE GDB command at a time!
+3. Each message you send should first explain the reason why you want to run the command wrapped by <reason></reason>, then provide the command to run wrapped in <gdb></gdb> in this format:
+<reason>
+Reasons here.
+</reason>
+<gdb>
+One gdb command here.
+</gdb>
+4. Each reponse I send will repeat the command you sent wrapped in <gdb command></gdb command> for you to double-check, followed by the command standard output wrapped in <gdb output></gdb output> and stderr wrapped in <stderr></stderr> in this format:
+<gdb command>
+The command I executed, copied from the command you sent.
+</gdb command>
+<gdb output>
+The standard output of the command.
+</gdb output>
+<stderr>
+The standard error of the command.
+</stderr>
+5. The final goal is to answer questions about runtime crash, executed fuzz driver and project under test: a) ‘False’(if the crash is caused by bug in fuzz driver) or ‘True'(if the crash is caused by bug in project)? b) If the crash is caused by bug in fuzz driver, provide analyses, and are there any suggestions for modifying the fuzz driver? c) If the crash is caused by bug in project, provide analyses, and are there any suggestions for patching the project?
+6. If you have a conclusion on above questions, output the conclusion wrapped by <conclusion></conclusion> followed by the analysis and suggestion wrapped in <analysis and suggestion></analysis and suggestion>:
+<conclusion>
+‘False’ or ‘True’
+</conclusion>
+<analysis and suggestion>
+Analysis and suggestion
+</analysis and suggestion>
+</interaction protocols>
+
+<general rules>
+1. DO NOT wrap code snippets with ```, using the XML-style tags above will suffice.
+2. DO NOT Compile or Run Code!
+3. Strictly ONE GDB command at a time!
+4. DO NOT run 'gdb /out/{TARGET_NAME}' again!
+5. DO NOT run shell commands!
+</general rules>
+</tool>
diff --git a/prompts/tool/lldb_tool.txt b/prompts/tool/lldb_tool.txt
diff --git a/pyproject.toml b/pyproject.toml
@@ -66,7 +66,7 @@ py-modules = [
   "tool.base_tool",
   "tool.container_tool",
   "tool.gbucket_tool",
-  "tool.lldb_tool",
+  "tool.gdb_tool",
   "tool.bash_tool",
   "tool.fuzz_introspector_tool",
   "experiment.fuzz_target_error",
diff --git a/tool/gdb_tool.py b/tool/gdb_tool.py
@@ -0,0 +1,53 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A tool for LLM agents to interact within a GDB."""
+import logging
+import subprocess as sp
+import time
+
+from experiment.benchmark import Benchmark
+from results import RunResult
+from tool.container_tool import ProjectContainerTool
+
+logger = logging.getLogger(__name__)
+
+
+class GDBTool(ProjectContainerTool):
+  """A tool for LLM agents to interact within a GDB."""
+
+  def __init__(self,
+               benchmark: Benchmark,
+               result: RunResult,
+               name: str = '',
+               project_name: str = '') -> None:
+    super().__init__(benchmark, name, project_name)
+    self.result = result
+
+  def tutorial(self) -> str:
+    """Constructs a tool guide tutorial for LLM agents."""
+    return self._get_tutorial_file_content('gdb_tool.txt')\
+      .replace('{AFTIFACT_PATH}', self.result.artifact_path)\
+      .replace('{TARGET_NAME}', self.benchmark.target_name)
+
+  def execute_in_screen(self, gdb_command: str) -> sp.CompletedProcess:
+    """Sends a command to the gdb_session screen and returns GDB output."""
+    self.execute('screen -S gdb_session -X logfile flush 0')
+    self.execute('truncate -s 0 /tmp/gdb_log.txt')
+
+    safe_cmd = gdb_command.replace('"', '\\"') + '\r'
+    self.execute(f'screen -S gdb_session -X stuff "{safe_cmd}"')
+
+    time.sleep(1.0)
+    self.execute('screen -S gdb_session -X logfile flush 0')
+    return self.execute('cat /tmp/gdb_log.txt')
diff --git a/tool/lldb_tool.py b/tool/lldb_tool.py

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`Given the following crash report, fuzz driver code and relevant project function code, analyze the cause of the crash.`
`2`	`2`
`3`		`-First, make a conclusion, only answer “Crash is caused by bug in fuzz driver” or “Crash is caused by bug in project”. Second, offer succinct and to-the-point analyses and suggestions.`
	`3`	`+First, make a conclusion, ONLY ANSWER "False" if the crash is caused by bug in fuzz driver OR ONLY ANSWER "True" if the crash is caused by bug in project. Second, offer succinct and to-the-point analyses and suggestions.`