Skip to content
81 changes: 48 additions & 33 deletions agent/enhancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@
"""An LLM agent to improve a fuzz target's runtime performance.
Use it as a usual module locally, or as script in cloud builds.
"""
import os
import logger
from agent.prototyper import Prototyper
from llm_toolkit.prompt_builder import (CoverageEnhancerTemplateBuilder,
EnhancerTemplateBuilder,
JvmFixingBuilder)
from agent.jvm_coverage_enhancer import JvmCoverageEnhancer
from llm_toolkit.prompt_builder import (
CoverageEnhancerTemplateBuilder,
EnhancerTemplateBuilder
)
from llm_toolkit.prompts import Prompt, TextPrompt
from results import AnalysisResult, BuildResult, Result

Expand Down Expand Up @@ -48,37 +51,49 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
trial=self.trial)
return Prompt()

# Delegate JVM-specific logic to JvmCoverageEnhancer
if benchmark.language == 'jvm':
# TODO: Do this in a separate agent for JVM coverage.
builder = JvmFixingBuilder(self.llm, benchmark,
last_result.run_result.fuzz_target_source, [])
prompt = builder.build([], None, None)
else:
# TODO(dongge): Refine this logic.
if last_result.semantic_result:
error_desc, errors = last_result.semantic_result.get_error_info()
builder = EnhancerTemplateBuilder(self.llm, benchmark,
last_build_result, error_desc, errors)
elif last_result.coverage_result:
builder = CoverageEnhancerTemplateBuilder(
return JvmCoverageEnhancer(
self.llm,
benchmark,
last_build_result,
coverage_result=last_result.coverage_result)
else:
logger.error(
'Last result does not contain either semantic result or '
'coverage result',
trial=self.trial)
# TODO(dongge): Give some default initial prompt.
prompt = TextPrompt(
'Last result does not contain either semantic result or '
'coverage result')
return prompt
prompt = builder.build(example_pair=[],
tool_guides=self.inspect_tool.tutorial(),
project_dir=self.inspect_tool.project_dir)
# TODO: A different file name/dir.
prompt.save(self.args.work_dirs.prompt)
last_result,
last_build,
self.args
).initial_prompt()

return prompt
#TODO(dongge): Refine this logic.
if last_result.semantic_result:
error_desc, errors = last_result.semantic_result.get_error_info()
builder = EnhancerTemplateBuilder(
self.llm,
benchmark,
last_build,
error_desc,
errors
)
elif last_result.coverage_result:
builder = CoverageEnhancerTemplateBuilder(
self.llm,
benchmark,
last_build,
coverage_result=last_result.coverage_result
)
else:
logger.error(
'Last result does not contain either semantic result or coverage result',
trial=self.trial
)
# TODO(dongge): Give some default initial prompt.
return TextPrompt(
'Last result does not contain either semantic result or coverage result'
)

prompt = builder.build(
example_pair=[],
tool_guides=self.inspect_tool.tutorial(),
project_dir=self.inspect_tool.project_dir
)
# Save to a dedicated enhancer prompt file
prompt_path = os.path.join(self.args.work_dirs.prompt, 'enhancer_initial.txt')
prompt.save(prompt_path)
return prompt
52 changes: 52 additions & 0 deletions agent/jvm_coverage_enhancer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import logger
from agent.prototyper import Prototyper
from llm_toolkit.prompt_builder import JvmFixingBuilder
from llm_toolkit.prompts import Prompt
from results import AnalysisResult, BuildResult


class JvmCoverageEnhancer(Prototyper):
"""Helper agent for JVM-specific coverage improvement."""

def __init__(
self,
llm,
benchmark,
analysis_result: AnalysisResult,
build_result: BuildResult,
args
):
super().__init__(llm, benchmark, args=args)
self.analysis = analysis_result
self.build = build_result

def initial_prompt(self) -> Prompt:
"""Constructs initial JVM-focused prompt."""
# Build the JVM fixing prompt
source_code = self.analysis.run_result.fuzz_target_source
builder = JvmFixingBuilder(
self.llm,
self.benchmark,
source_code,
[]
)
prompt = builder.build(example_pair=[], tool_guides=None, project_dir=None)

# Save to a dedicated JVM prompt file
prompt_path = os.path.join(self.args.work_dirs.prompt, 'jvm_initial.txt')
prompt.save(prompt_path)
return prompt
141 changes: 82 additions & 59 deletions agent/one_prompt_enhancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,76 +17,99 @@
import logger
from agent.one_prompt_prototyper import OnePromptPrototyper
from experiment.workdir import WorkDirs
from llm_toolkit.prompt_builder import DefaultTemplateBuilder, JvmFixingBuilder
from llm_toolkit.prompt_builder import DefaultTemplateBuilder
from llm_toolkit.prompts import Prompt
from results import AnalysisResult, BuildResult, Result
from jvm_coverage_enhancer import JvmCoverageEnhancer


class OnePromptEnhancer(OnePromptPrototyper):
"""The Agent to generate a simple but valid fuzz target from scratch."""
"""The Agent to generate a simple but valid fuzz target from scratch."""

def _initial_prompt(self, results: list[Result]) -> Prompt:
"""Constructs initial prompt of the agent."""
last_result = results[-1]
benchmark = last_result.benchmark
def _initial_prompt(self, results: list[Result]) -> Prompt:
"""Constructs initial prompt of the agent."""
last_result = results[-1]
benchmark = last_result.benchmark

if not isinstance(last_result, AnalysisResult):
logger.error('The last result in Enhancer is not AnalysisResult: %s',
results,
trial=self.trial)
return Prompt()
if not isinstance(last_result, AnalysisResult):
logger.error(
'The last result in Enhancer is not AnalysisResult: %s',
results,
trial=self.trial
)
return Prompt()

if benchmark.language == 'jvm':
# TODO: Do this in a separate agent for JVM coverage.
builder = JvmFixingBuilder(self.llm, benchmark,
last_result.run_result.fuzz_target_source, [])
prompt = builder.build([], None, None)
else:
# TODO(dongge): Refine this logic.
builder = DefaultTemplateBuilder(self.llm)
if last_result.semantic_result:
error_desc, errors = last_result.semantic_result.get_error_info()
prompt = builder.build_fixer_prompt(benchmark,
last_result.fuzz_target_source,
error_desc,
errors,
context='',
instruction='')
else:
prompt = builder.build_fixer_prompt(
benchmark=benchmark,
raw_code=last_result.fuzz_target_source,
error_desc='',
errors=[],
coverage_result=last_result.coverage_result,
context='',
instruction='')
# TODO: A different file name/dir.
prompt.save(self.args.work_dirs.prompt)
# For JVM benchmarks, delegate to the dedicated coverage enhancer
if benchmark.language == 'jvm':
jvm_agent = JvmCoverageEnhancer(
llm=self.llm,
benchmark=benchmark,
analysis_result=last_result,
build_result=None,
args=self.args,
)
prompt = jvm_agent.initial_prompt()
else:
builder = DefaultTemplateBuilder(self.llm)

return prompt
# If there were semantic errors, build a fixer prompt
# TODO(dongge): Refine this logic.
if last_result.semantic_result:
error_desc, errors = last_result.semantic_result.get_error_info()
prompt = builder.build_fixer_prompt(
benchmark=benchmark,
raw_code=last_result.fuzz_target_source,
error_desc=error_desc,
errors=errors,
context='',
instruction='',
)
else:
# Build a default fixer prompt based on coverage feedback
prompt = builder.build_fixer_prompt(
benchmark=benchmark,
raw_code=last_result.fuzz_target_source,
error_desc='',
errors=[],
coverage_result=last_result.coverage_result,
context='',
instruction='',
)

def execute(self, result_history: list[Result]) -> BuildResult:
"""Executes the agent based on previous result."""
last_result = result_history[-1]
logger.info('Executing One Prompt Enhancer', trial=last_result.trial)
# Use keep to avoid deleting files, such as benchmark.yaml
WorkDirs(self.args.work_dirs.base, keep=True)
# TODO: A different file name/dir.
prompt.save(self.args.work_dirs.prompt)

prompt = self._initial_prompt(result_history)
cur_round = 1
build_result = BuildResult(benchmark=last_result.benchmark,
trial=last_result.trial,
work_dirs=last_result.work_dirs,
author=self,
chat_history={self.name: prompt.gettext()})
return prompt

while prompt and cur_round <= self.max_round:
self._generate_fuzz_target(prompt, result_history, build_result,
cur_round)
def execute(self, result_history: list[Result]) -> BuildResult:
"""Executes the agent based on previous result."""
last_result = result_history[-1]
logger.info('Executing One Prompt Enhancer', trial=last_result.trial)

self._validate_fuzz_target(cur_round, build_result)
prompt = self._advice_fuzz_target(build_result, cur_round)
cur_round += 1
# Use keep to avoid deleting files, such as benchmark.yaml
WorkDirs(self.args.work_dirs.base, keep=True)

prompt = self._initial_prompt(result_history)
cur_round = 1
build_result = BuildResult(
benchmark=last_result.benchmark,
trial=last_result.trial,
work_dirs=last_result.work_dirs,
author=self,
chat_history={self.name: prompt.gettext()},
)

while prompt and cur_round <= self.max_round:
self._generate_fuzz_target(
prompt,
result_history,
build_result,
cur_round,
)

self._validate_fuzz_target(cur_round, build_result)
prompt = self._advice_fuzz_target(build_result, cur_round)
cur_round += 1

return build_result

return build_result
Loading