Skip to content

Commit 83bd929

Browse files
maoyixieDonggeLiu
andauthored
1 parent 97c85f2 commit 83bd929

File tree

10 files changed

+162
-155
lines changed

10 files changed

+162
-155
lines changed

agent/crash_analyzer.py

Lines changed: 52 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from results import AnalysisResult, CrashResult, Result, RunResult
3131
from tool.base_tool import BaseTool
3232
from tool.container_tool import ProjectContainerTool
33-
from tool.lldb_tool import LLDBTool
33+
from tool.gdb_tool import GDBTool
3434

3535
MAX_ROUND = 100
3636

@@ -66,31 +66,39 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
6666
trial=self.trial)
6767
return prompt_builder.CrashAnalyzerTemplateBuilder(self.llm).build([])
6868

69-
def _format_lldb_execution_result(
69+
def _format_gdb_execution_result(
7070
self,
71-
lldb_command: str,
71+
gdb_command: str,
7272
process: sp.CompletedProcess,
7373
previous_prompt: Optional[Prompt] = None) -> str:
74-
"""Formats a prompt based on lldb execution result."""
74+
"""Formats a prompt based on gdb execution result."""
7575
if previous_prompt:
7676
previous_prompt_text = previous_prompt.get()
7777
else:
7878
previous_prompt_text = ''
79-
stdout = self.llm.truncate_prompt(process.stdout,
79+
80+
raw_lines = process.stdout.strip().splitlines()
81+
if raw_lines and raw_lines[-1].strip().startswith("(gdb)"):
82+
raw_lines.pop()
83+
if raw_lines:
84+
raw_lines[0] = f'(gdb) {raw_lines[0].strip()}'
85+
processed_stdout = '\n'.join(raw_lines)
86+
87+
stdout = self.llm.truncate_prompt(processed_stdout,
8088
previous_prompt_text).strip()
8189
stderr = self.llm.truncate_prompt(process.stderr,
8290
stdout + previous_prompt_text).strip()
83-
return (f'<lldb command>\n{lldb_command.strip()}\n</lldb command>\n'
84-
f'<lldb output>\n{stdout}\n</lldb output>\n'
91+
return (f'<gdb command>\n{gdb_command.strip()}\n</gdb command>\n'
92+
f'<gdb output>\n{stdout}\n</gdb output>\n'
8593
f'<stderr>\n{stderr}\n</stderr>\n')
8694

87-
def _container_handle_lldb_command(self, response: str, tool: LLDBTool,
88-
prompt: Prompt) -> Prompt:
89-
"""Handles the command from LLM with lldb tool."""
95+
def _container_handle_gdb_command(self, response: str, tool: GDBTool,
96+
prompt: Prompt) -> Prompt:
97+
"""Handles the command from LLM with gdb tool."""
9098
prompt_text = ''
91-
for command in self._parse_tags(response, 'lldb'):
99+
for command in self._parse_tags(response, 'gdb'):
92100
process = tool.execute_in_screen(command)
93-
prompt_text += self._format_lldb_execution_result(
101+
prompt_text += self._format_gdb_execution_result(
94102
command, process, previous_prompt=prompt) + '\n'
95103
prompt.append(prompt_text)
96104
return prompt
@@ -103,9 +111,9 @@ def _container_handle_conclusion(self, cur_round: int, response: str,
103111
trial=self.trial)
104112

105113
conclusion = self._parse_tag(response, 'conclusion')
106-
if conclusion == 'Crash is caused by bug in fuzz driver.':
114+
if conclusion == 'False':
107115
crash_result.true_bug = False
108-
elif conclusion == 'Crash is caused by bug in project.':
116+
elif conclusion == 'True':
109117
crash_result.true_bug = True
110118
else:
111119
logger.error('***** Failed to match conclusion in %02d rounds *****',
@@ -127,11 +135,10 @@ def _container_tool_reaction(self, cur_round: int, response: str,
127135
crash_result)
128136
prompt = prompt_builder.CrashAnalyzerTemplateBuilder(self.llm,
129137
None).build([])
130-
if self._parse_tag(response, 'lldb'):
131-
return self._container_handle_lldb_command(response, self.analyze_tool,
132-
prompt)
138+
if self._parse_tag(response, 'gdb'):
139+
return self._container_handle_gdb_command(response, self.gdb_tool, prompt)
133140
if self._parse_tag(response, 'bash'):
134-
return self._container_handle_bash_command(response, self.check_tool,
141+
return self._container_handle_bash_command(response, self.bash_tool,
135142
prompt)
136143
return None
137144

@@ -152,7 +159,7 @@ def execute(self, result_history: list[Result]) -> AnalysisResult:
152159
generated_target_name = os.path.basename(benchmark.target_path)
153160
sample_id = os.path.splitext(generated_target_name)[0]
154161
generated_oss_fuzz_project = (
155-
f'{benchmark.id}-{sample_id}-lldb-{self.trial:02d}')
162+
f'{benchmark.id}-{sample_id}-gdb-{self.trial:02d}')
156163
generated_oss_fuzz_project = oss_fuzz_checkout.rectify_docker_tag(
157164
generated_oss_fuzz_project)
158165

@@ -169,25 +176,35 @@ def execute(self, result_history: list[Result]) -> AnalysisResult:
169176
else:
170177
build_script_path = ''
171178

172-
evaluator_lib.Evaluator.create_ossfuzz_project_with_lldb(
179+
evaluator_lib.Evaluator.create_ossfuzz_project_with_gdb(
173180
benchmark, generated_oss_fuzz_project, fuzz_target_path, last_result,
174181
build_script_path, last_result.artifact_path)
175182

176-
self.analyze_tool = LLDBTool(benchmark,
177-
result=last_result,
178-
name='lldb',
179-
project_name=generated_oss_fuzz_project)
180-
self.analyze_tool.execute('compile > /dev/null')
181-
# Launch LLDB and load fuzz target binary
182-
self.analyze_tool.execute(f'screen -dmS lldb_session -L '
183-
f'-Logfile /tmp/lldb_log.txt '
184-
f'lldb /out/{last_result.benchmark.target_name}')
185-
self.check_tool = ProjectContainerTool(
183+
self.gdb_tool = GDBTool(benchmark,
184+
result=last_result,
185+
name='gdb',
186+
project_name=generated_oss_fuzz_project)
187+
#TODO(dongge): Use a dedicated debugger image, which has the binary and
188+
#source code.
189+
self.gdb_tool.execute(
190+
'apt update && '
191+
'apt install -y software-properties-common && '
192+
'add-apt-repository -y ppa:ubuntu-toolchain-r/test && '
193+
'apt update && '
194+
'apt install -y gdb screen')
195+
self.gdb_tool.execute('export CFLAGS="$CFLAGS -g -O0"')
196+
self.gdb_tool.execute('export CXXFLAGS="$CXXFLAGS -g -O0"')
197+
self.gdb_tool.execute('compile > /dev/null')
198+
# Launch GDB and load fuzz target binary
199+
self.gdb_tool.execute(f'screen -dmS gdb_session -L '
200+
f'-Logfile /tmp/gdb_log.txt '
201+
f'gdb /out/{last_result.benchmark.target_name}')
202+
self.bash_tool = ProjectContainerTool(
186203
benchmark, name='check', project_name=generated_oss_fuzz_project)
187-
self.check_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null')
204+
self.bash_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null')
188205
prompt = self._initial_prompt(result_history)
189-
prompt.add_problem(self.analyze_tool.tutorial())
190-
prompt.add_problem(self.check_tool.tutorial())
206+
prompt.add_problem(self.gdb_tool.tutorial())
207+
prompt.add_problem(self.bash_tool.tutorial())
191208
crash_result = CrashResult(benchmark=benchmark,
192209
trial=last_result.trial,
193210
work_dirs=last_result.work_dirs,
@@ -208,9 +225,9 @@ def execute(self, result_history: list[Result]) -> AnalysisResult:
208225
finally:
209226
# Cleanup: stop the container
210227
logger.debug('Stopping the crash analyze container %s',
211-
self.analyze_tool.container_id,
228+
self.gdb_tool.container_id,
212229
trial=self.trial)
213-
self.analyze_tool.terminate()
230+
self.gdb_tool.terminate()
214231

215232
analysis_result = AnalysisResult(
216233
author=self,

experiment/evaluator.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -303,12 +303,12 @@ def create_ossfuzz_project(benchmark: Benchmark,
303303
return name
304304

305305
@staticmethod
306-
def create_ossfuzz_project_with_lldb(benchmark: Benchmark,
307-
name: str,
308-
target_file: str,
309-
run_result: results.RunResult,
310-
build_script_path: str = '',
311-
artifact_path: str = '') -> str:
306+
def create_ossfuzz_project_with_gdb(benchmark: Benchmark,
307+
name: str,
308+
target_file: str,
309+
run_result: results.RunResult,
310+
build_script_path: str = '',
311+
artifact_path: str = '') -> str:
312312
"""Creates an OSS-Fuzz project with the generated target and new dockerfile.
313313
The new project will replicate an existing project |name| but replace its
314314
fuzz target and build script with the new |target_file| and
@@ -322,15 +322,15 @@ def create_ossfuzz_project_with_lldb(benchmark: Benchmark,
322322
artifact_path,
323323
os.path.join(generated_project_path, os.path.basename(artifact_path)))
324324
# Add additional statement in dockerfile to copy testcase,
325-
# enable -g, install lldb and screen
325+
# enable -g, install gdb and screen
326326
with open(os.path.join(generated_project_path, 'Dockerfile'), 'a') as f:
327327
f.write(
328328
'\nRUN mkdir -p /artifact\n'
329329
f'\nCOPY {os.path.basename(run_result.artifact_path)} /artifact/\n'
330330
'\nENV CFLAGS="${CFLAGS} -g -O0"\n'
331331
'\nENV CXXFLAGS="${CXXFLAGS} -g -O0"\n'
332332
'\nRUN apt-get update\n'
333-
'\nRUN apt-get install -y lldb\n'
333+
'\nRUN apt-get install -y gdb\n'
334334
'\nRUN apt-get install -y screen\n')
335335

336336
return name

llm_toolkit/output_parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,9 @@ def parse_triage(triage_path: str) -> tuple[str, str]:
103103
solution = triage.split('</solution>')[0]
104104
lines = solution.splitlines()
105105
for line in lines:
106-
if "Crash is caused by bug in fuzz driver" in line:
106+
if "False" in line:
107107
return (TriageResult.DRIVER, '\n'.join(lines))
108-
if "Crash is caused by bug in project" in line:
108+
if "True" in line:
109109
return (TriageResult.PROJECT, '\n'.join(lines))
110110

111111
return (TriageResult.NOT_APPLICABLE, '\n'.join(lines))

prompts/agent/crash_analyzer-priming.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
Given the following crash report, fuzz driver code and relevant project function code, analyze the cause of the crash using LLDB tool step by step.
2-
First, make a conclusion, only answer “Crash is caused by bug in fuzz driver” or “Crash is caused by bug in project. Second, offer succinct and to-the-point analyses and suggestions.
1+
Given the following crash report, fuzz driver code and relevant project function code, analyze the cause of the crash using GDB tool step by step.
2+
First, make a conclusion, ONLY ANSWER "False" if the crash is caused by bug in fuzz driver OR ONLY ANSWER "True" if the crash is caused by bug in project. Second, offer succinct and to-the-point analyses and suggestions.
33

44
Below is crash report:
55
<log>
@@ -16,4 +16,4 @@ Below is relevant project function code:
1616
{PROJECT_FUNCTION_CODE}
1717
</code>
1818

19-
To help analyze the root cause behind the runtime crash, you can leverage LLDB tool and BASH tool to obtain information.
19+
To help analyze the root cause behind the runtime crash, you can leverage GDB tool and BASH tool to obtain information.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
Given the following crash report, fuzz driver code and relevant project function code, analyze the cause of the crash.
22

3-
First, make a conclusion, only answer “Crash is caused by bug in fuzz driver” or “Crash is caused by bug in project. Second, offer succinct and to-the-point analyses and suggestions.
3+
First, make a conclusion, ONLY ANSWER "False" if the crash is caused by bug in fuzz driver OR ONLY ANSWER "True" if the crash is caused by bug in project. Second, offer succinct and to-the-point analyses and suggestions.

prompts/tool/gdb_tool.txt

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
<tool>
2+
**GDB tool Guide**
3+
You can leverage GDB by iteractively sending me a GDB command, and I will provide you with the output of the command. The path of fuzz driver binary is '/out/{TARGET_NAME}'. The testcase that triggers runtime crash is stored at '{AFTIFACT_PATH}'.
4+
5+
<interaction protocols>
6+
1. I have executed 'gdb /out/{TARGET_NAME}'. You are now in GDB session, NOT in shell session. DO NOT run 'gdb /out/{TARGET_NAME}' again! DO NOT run shell commands!
7+
2. Strictly ONE GDB command at a time!
8+
3. Each message you send should first explain the reason why you want to run the command wrapped by <reason></reason>, then provide the command to run wrapped in <gdb></gdb> in this format:
9+
<reason>
10+
Reasons here.
11+
</reason>
12+
<gdb>
13+
One gdb command here.
14+
</gdb>
15+
4. Each reponse I send will repeat the command you sent wrapped in <gdb command></gdb command> for you to double-check, followed by the command standard output wrapped in <gdb output></gdb output> and stderr wrapped in <stderr></stderr> in this format:
16+
<gdb command>
17+
The command I executed, copied from the command you sent.
18+
</gdb command>
19+
<gdb output>
20+
The standard output of the command.
21+
</gdb output>
22+
<stderr>
23+
The standard error of the command.
24+
</stderr>
25+
5. The final goal is to answer questions about runtime crash, executed fuzz driver and project under test: a) ‘False’(if the crash is caused by bug in fuzz driver) or ‘True'(if the crash is caused by bug in project)? b) If the crash is caused by bug in fuzz driver, provide analyses, and are there any suggestions for modifying the fuzz driver? c) If the crash is caused by bug in project, provide analyses, and are there any suggestions for patching the project?
26+
6. If you have a conclusion on above questions, output the conclusion wrapped by <conclusion></conclusion> followed by the analysis and suggestion wrapped in <analysis and suggestion></analysis and suggestion>:
27+
<conclusion>
28+
‘False’ or ‘True’
29+
</conclusion>
30+
<analysis and suggestion>
31+
Analysis and suggestion
32+
</analysis and suggestion>
33+
</interaction protocols>
34+
35+
<general rules>
36+
1. DO NOT wrap code snippets with ```, using the XML-style tags above will suffice.
37+
2. DO NOT Compile or Run Code!
38+
3. Strictly ONE GDB command at a time!
39+
4. DO NOT run 'gdb /out/{TARGET_NAME}' again!
40+
5. DO NOT run shell commands!
41+
</general rules>
42+
</tool>

prompts/tool/lldb_tool.txt

Lines changed: 0 additions & 42 deletions
This file was deleted.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ py-modules = [
6666
"tool.base_tool",
6767
"tool.container_tool",
6868
"tool.gbucket_tool",
69-
"tool.lldb_tool",
69+
"tool.gdb_tool",
7070
"tool.bash_tool",
7171
"tool.fuzz_introspector_tool",
7272
"experiment.fuzz_target_error",

tool/gdb_tool.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
"""A tool for LLM agents to interact within a GDB."""
15+
import logging
16+
import subprocess as sp
17+
import time
18+
19+
from experiment.benchmark import Benchmark
20+
from results import RunResult
21+
from tool.container_tool import ProjectContainerTool
22+
23+
logger = logging.getLogger(__name__)
24+
25+
26+
class GDBTool(ProjectContainerTool):
27+
"""A tool for LLM agents to interact within a GDB."""
28+
29+
def __init__(self,
30+
benchmark: Benchmark,
31+
result: RunResult,
32+
name: str = '',
33+
project_name: str = '') -> None:
34+
super().__init__(benchmark, name, project_name)
35+
self.result = result
36+
37+
def tutorial(self) -> str:
38+
"""Constructs a tool guide tutorial for LLM agents."""
39+
return self._get_tutorial_file_content('gdb_tool.txt')\
40+
.replace('{AFTIFACT_PATH}', self.result.artifact_path)\
41+
.replace('{TARGET_NAME}', self.benchmark.target_name)
42+
43+
def execute_in_screen(self, gdb_command: str) -> sp.CompletedProcess:
44+
"""Sends a command to the gdb_session screen and returns GDB output."""
45+
self.execute('screen -S gdb_session -X logfile flush 0')
46+
self.execute('truncate -s 0 /tmp/gdb_log.txt')
47+
48+
safe_cmd = gdb_command.replace('"', '\\"') + '\r'
49+
self.execute(f'screen -S gdb_session -X stuff "{safe_cmd}"')
50+
51+
time.sleep(1.0)
52+
self.execute('screen -S gdb_session -X logfile flush 0')
53+
return self.execute('cat /tmp/gdb_log.txt')

0 commit comments

Comments
 (0)