From 1b092217c67515dd556d2ae2b9985fccf3d55e2e Mon Sep 17 00:00:00 2001 From: iany0 <820447623@qq.com> Date: Thu, 14 Aug 2025 03:18:36 +1000 Subject: [PATCH 01/11] Integrate RAG-based long-term memory for build error handling into prototyper --- agent/prototyper.py | 22 +- helper/error_classifier.py | 51 +++++ helper/error_patterns.yaml | 201 ++++++++++++++++++ llm_toolkit/prompt_builder.py | 43 ++++ prompts/agent/prototyper-error-classifier.txt | 36 ++++ 5 files changed, 352 insertions(+), 1 deletion(-) create mode 100644 helper/error_classifier.py create mode 100644 helper/error_patterns.yaml create mode 100644 prompts/agent/prototyper-error-classifier.txt diff --git a/agent/prototyper.py b/agent/prototyper.py index 70fb4ad754..5ab44afa9f 100644 --- a/agent/prototyper.py +++ b/agent/prototyper.py @@ -21,6 +21,7 @@ from datetime import timedelta from typing import Optional +from helper.error_classifier import BuildErrorClassifier import logger from agent.base_agent import BaseAgent from data_prep import project_targets @@ -367,6 +368,25 @@ def _generate_prompt_from_build_result( # Preference 7: New fuzz target + both `build.sh`s cannot compile. No need # to mention the default build.sh. # return build_result + error_classifier = BuildErrorClassifier("helper/error_patterns.yaml") + classification = error_classifier.classify(compile_log) + logger.debug("=== Compilation Log Start ===\n%s\n=== Compilation Log End ===", compile_log, trial=build_result.trial) + + if classification: + logger.info("RAG match: identified build error type %s", classification["type"], trial=build_result.trial) + builder = prompt_builder.PrototyperErrorClassifierTemplateBuilder( + model=self.llm, + benchmark=build_result.benchmark, + build_result=build_result, + compile_log=compile_log, + error_classifier=error_classifier, + initial=prompt.get() + ) + prompt = builder.build(project_dir=self.inspect_tool.project_dir) + return build_result, prompt + + # Fallback: uncategorized error + logger.warning("RAG match: classification failed, no error type matched", trial=build_result.trial) builder = prompt_builder.PrototyperFixerTemplateBuilder( model=self.llm, benchmark=build_result.benchmark, @@ -374,7 +394,7 @@ def _generate_prompt_from_build_result( compile_log=compile_log, initial=prompt.get()) prompt = builder.build(example_pair=[], - project_dir=self.inspect_tool.project_dir) + project_dir=self.inspect_tool.project_dir) return build_result, prompt def _container_handle_conclusion(self, cur_round: int, response: str, diff --git a/helper/error_classifier.py b/helper/error_classifier.py new file mode 100644 index 0000000000..a6d40c43d6 --- /dev/null +++ b/helper/error_classifier.py @@ -0,0 +1,51 @@ +import re +import yaml + +class BuildErrorClassifier: + def __init__(self, error_db_path: str): + with open(error_db_path, 'r') as f: + self.error_db = yaml.safe_load(f) + + def classify(self, compile_log: str) -> dict | None: + for error_type, data in self.error_db.items(): + for pattern in data.get("patterns", []): + if re.search(pattern, compile_log, re.IGNORECASE): + return { + "type": error_type, + "good": data.get("good", []), + "bad": data.get("bad", []), + } + return None + + def _find_first_error_msg(self, compile_log: str) -> str | None: + match = re.search(r"(.*?)", compile_log, re.DOTALL) + if match: + compile_log = match.group(1).strip() + else: + return None + + lines = compile_log.splitlines() + for i, line in enumerate(lines): + if any(kw in line.lower() for kw in ('error:', 'fatal error', 'undefined reference')): + return '\n'.join(lines[i:]) + return None + + def trim_and_classify_err_msg(self, compile_log:str) -> dict | None: + compile_log = self._find_first_error_msg(compile_log) + if not compile_log: + return None + for error_type, data in self.error_db.items(): + for pattern in data.get("patterns", []): + try: + match = re.search(pattern, compile_log, re.IGNORECASE) + except Exception: + print(f"Error with pattern: {pattern}") + continue + if match: + return { + "type": error_type, + "trimmed_msg": compile_log.strip()} + return { + "type": "unknown", + "trimmed_msg": compile_log.strip()} + diff --git a/helper/error_patterns.yaml b/helper/error_patterns.yaml new file mode 100644 index 0000000000..46ba2d6918 --- /dev/null +++ b/helper/error_patterns.yaml @@ -0,0 +1,201 @@ +INCLUDE ERROR: + patterns: + - "fatal error: .*: No such file or directory" + - "error: no such file or directory: '.*'" + - ": .*: No such file or directory" + - "fatal error: '.*' file not found" + - "cannot find include file" + - "header file not found" + - "file not found" + - "include.*No such file" + - "error: include file '.*' not found" + - "'[A-Za-z0-9_]+\\.h': No such file or directory" + - "fatal error: cannot open file '.*'" + - "This header is only to be used internally" + - "forward declaration of '.*'" + - "cannot open source file" + good: + - "Suggest including the standard header file for a missing type or function." + - "Suggest including a project-specific header file using quotes and the correct relative path." + - "Leverage umbrella headers that might provide necessary include paths for other project headers." + - "Verify the actual location of header files and use the correct path in include directives." + - "Remove unnecessary includes of internal headers that are not exposed by the public API." + - "Fallback to forward declarations or typedefs if the specific types are not directly accessible." + - "If a header file is not found, suggest adding an include search path to the build script using `-I`." + - "Distinguish between system headers (angle brackets) and local/project headers (quotes)." + - "If a standard library header is included with angle brackets in a project header, and the project provides its own version, consider replacing the angle brackets with quotes and a relative path." + - "If unsure about a header's location, suggest trying both angle brackets and quotes." + bad: + - "Not verifying if a header file exists in the standard include paths or if it requires a specific include path setting in the build script." + - "Using absolute paths when relative paths would be more appropriate and portable." + - "Assuming that including a high-level header automatically resolves all dependencies without verifying its contents." + - "Insisting on including internal headers that are not meant for direct inclusion." + - "Not recognizing that the compiler's include search path might differ from the project's root directory." + - "Failing to account for the project's directory structure and include search paths." + - "Incorrectly assuming the location of a header file." + - "Not considering that a required header might be indirectly included through another header, leading to redundant includes." + - "Suggesting incorrect header files or paths." + - "Not considering that an undefined reference might stem from a missing library link rather than a missing header." + +SYNTACTIC ERROR: + patterns: + - 'error: expected .*' + - 'expected .* before' + - 'error: stray '' .* '' in program' + - 'syntax error' + - 'parse error' + - 'missing ''\;''' + - 'implicit conversion changes signedness' + - 'error: assigning to '' .* '' from incompatible type '' .* ''' + - 'error: no member named '' .* '' in '' .* ''' + - 'no matching function for call to' + - 'candidate function not viable' + - 'requires \d+ arguments, but \d+ were provided' + - 'incompatible pointer to .* conversion' + - 'no type named .* in namespace .*' + - 'is a private member of' + - 'expected declaration or statement at end of input' + - 'unterminated string literal' + - 'expected identifier or ''\('' before .*' + - 'expected unqualified-id' + - 'use of undeclared identifier '' .* ''' + - 'error: expected parameter declarator' + - 'error: expected ''\)''' + - 'invalid conversion from' + - 'incomplete type .* named in nested name specifier' + - 'unknown type name .*' + - 'redefinition of .*' + - 'no member named .* in .*' + - 'non-void function does not return a value' + - 'forward declaration of .*' + - 'candidate constructor .* not viable' + - 'call to undeclared function' + - 'ISO C99 and later do not support implicit function declarations' + - 'error: krb5\.h included before k5-int\.h' + - 'This header is only to be used internally to libarchive\.' + - 'class member cannot be redeclared' + - 'expected member name or ''\;'' after declaration specifiers' + good: + - "Include necessary header files for built-in types like `bool` (`stdbool.h`)." + - "Remove code that accesses non-existent members of structs, simplifying logic if necessary." + - "Replace undefined constants or macros with valid alternatives if available, or remove them if their usage is incorrect." + - "Use code search tools to locate the definition of undefined symbols and include the correct header." + - "Correctly identify and fix HTML/encoding issues with operators like `>=` or `<=`." + - "Define functions before their usage in struct initializers." + - "If a class needs to be instantiated, ensure it's done correctly before using its methods." + - "Correctly identify the issue of using a class name as a function name." + - "Inspect similar working examples to understand the correct usage pattern." + - "Remove redundant definitions of functions or variables if a 'redefinition' error occurs." + bad: + - "Incorrectly assuming the existence of members in structs based on similar data structures." + - "Failing to recognize that certain constants or macros might be tied to specific types or contexts." + - "Relying on guesses about the project's structure to pick a header file." + - "Including incorrect headers based on partial matches or similar symbol names." + - "Ignoring that some types may be defined in-project but not exposed via public headers." + - "Guessing header locations without verifying." + - "Leaving related undefined types/symbols unresolved after a fix." + - "Not proposing a direct replacement for an incorrect function call once identified." + - "Suggesting adding include guards via build script (instead of fixing headers)." + - "Using a class name as a function name." + - "Ignoring namespace impacts." + +UNDEFINED REFERENCE ERROR: + patterns: + - "undefined reference to `.*'" + - "undefined reference to '.*'" + - "symbol not found" + - "unresolved external symbol" + - "linker error: symbol undefined" + - "error: undefined reference to `.*`" + - "error: use of undeclared identifier '.*'" + good: + - "Suggest including the standard header files that contain the declarations of the undefined functions." + - "Correctly identify the missing header based on the undefined function name." + - "Explain the missing declaration and its purpose clearly." + - "Suggest adding `extern \"C\"` to a fuzz target when C linkage is required." + - "If the symbol is in an external library, ensure it is linked in the build script." + - "Use `nm` to inspect symbols in objects/libraries." + - "Replicate linking approach from a similar working target." + - "If explicit linking fails, use build-system variables (e.g., `$LIB_FUZZING_ENGINE`) for the fuzzing engine." + - "Add include paths for headers containing the undefined symbols." + - "Remove unnecessary internal headers not exposed by the public API." + bad: + - "Linking random libraries without confirming the symbol actually lives there." + - "Adding prototypes without including headers or linking the defining libraries." + - "Changing compiler/linker flags when only source/build.sh edits are allowed." + - "Misreading a missing entry-point linker error as a source-code issue." + - "Blaming flags or libraries when the root cause is build misconfiguration." + - "Dismissing relevant diagnostics (e.g., debug info parse errors) without investigation." + - "Assuming declarations are in different headers when the linker indicates a missing definition." + +LINKER ERROR: + patterns: + - 'multiple definition of' + - 'ld: duplicate symbol' + - 'conflicting types for' + - 'linking failed' + - 'ld returned 1 exit status' + - 'collect2: error: ld returned .* exit status' + - 'linker command failed with exit code' + - 'cannot find -l.*' + - 'no such file or directory: ''\$LIB_FUZZING_ENGINE''' + - 'relocation overflowed' + - 'error: relocation truncated to fit' + - 'first defined here' + - 'no such file or directory: .*\.a' + - '/usr/bin/ld: cannot find -ljsoncpp' + good: + - "After building a library, verify the location of the built library file." + - "Ensure the linker knows where to find libraries using `-L` and link with `-l`." + - "Link dependent libraries as well when needed." + - "If dynamic linking fails, try static variants if available." + - "Inspect the build system config (e.g., CMakeLists.txt, Makefile) to see how libraries are linked." + - "Place the C++ standard library and system libs after other libs in the link line." + - "Use verbose linker flags (e.g., `-v`) for more detail." + bad: + - "Suggesting `.c/.cpp` inclusion directly (causing multiple definitions)." + - "Confusing linker errors with compile-time errors." + - "Failing to provide concrete library path/linking advice (`-L`, `-l`)." + - "Misclassifying multiple-definition errors as undefined references." + - "Suggesting irrelevant text changes when the issue is link-time." + +BUILD_CONFIGURATION_ERROR: + patterns: + - 'make: \*\*\* No rule to make target .*' + - 'CMake Error:.*' + - 'CMake was unable to find a build program' + - 'CMAKE_(C|CXX)_COMPILER not set' + - 'ninja: error: loading ''build\.ninja'': No such file or directory' + - 'The source directory .* does not appear to contain CMakeLists\.txt' + - 'Policy CMP\d+ is not set' + - 'The OLD behavior for policy CMP\d+ will be removed' + - 'This warning is for project developers\. *Use -Wno-dev to suppress it' + - '/src/build\.sh: line \d+: syntax error' + - 'sed: -e expression #1, char \d+: (Invalid content of \{\}|extra characters after command)' + - 'sed: can''t read.*No such file or directory' + - '/src/build\.sh: line \d+: unbound variable' + - 'unbound variable' + - 'configure: error: .* not found' + - 'configure: WARNING: .*' + - 'autoreconf: ''configure\.(ac|in)'' is required' + - 'Could not find a package configuration file provided by .*' + - 'CMake Error at CMakeLists\.txt:\d+ \(find_package\)' + - 'By not providing "Find.*\.cmake" in CMAKE_MODULE_PATH.*' + - 'debconf: delaying package configuration, since apt-utils is not installed' + - '/src/build\.sh: line \d+: fuzzer/CMakeLists\.txt: No such file or directory' + - 'WARNING: png library not available - no png\.h' + - '\./aom_configure: No such file or directory' + - 'DWARF error: invalid or unhandled FORM value.*' + - 'clang\+\+: error: no such file or directory: ''\$LIB_FUZZING_ENGINE''' + good: + - "If CMake is used, check invocation and arguments." + - "Verify the correct generator (Ninja/Unix Makefiles) and presence of CMakeLists.txt." + - "Ensure the right source files are listed and targets are hooked up." + - "Make sure required packages are discoverable (set `_DIR` or CMAKE_PREFIX_PATH)." + - "Use verbose CMake output to diagnose configuration." + - "Check for missing dependencies and environment variables required by the build." + bad: + - "Proposing linker/compile flag changes when only source and a limited build script are allowed." + - "Removing configure/bootstrap pieces without understanding their role." + - "Copying sources around to satisfy a missing-target error instead of fixing the build script." + - "Misclassifying build-system errors as include or linker issues." diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 3319444453..b3b9d48b25 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -21,6 +21,7 @@ from abc import abstractmethod from typing import Any, Optional, Tuple +from helper.error_classifier import BuildErrorClassifier import jinja2 from data_prep import introspector, project_targets @@ -671,6 +672,48 @@ def build(self, return self._prompt +class PrototyperErrorClassifierTemplateBuilder(PrototyperTemplateBuilder): + def __init__(self, model, benchmark, build_result, compile_log, error_classifier: BuildErrorClassifier, initial=None): + super().__init__(model, benchmark, DEFAULT_TEMPLATE_DIR, initial) + self.build_result = build_result + self.compile_log = compile_log + self.error_classifier = error_classifier + self.priming_template_file = self._find_template(self.agent_templare_dir, + 'prototyper-error-classifier.txt') + + def build(self, project_dir='') -> prompts.Prompt: + classification = self.error_classifier.classify(self.compile_log) + + error_type = classification["type"] if classification else "UNKNOWN" + function_signature = self.benchmark.function_signature + fuzz_target_source = self.build_result.fuzz_target_source + binary_path = os.path.join('/out', self.benchmark.target_name) + + # Handle build script formatting + if self.build_result.build_script_source: + build_text = (f'\n{self.build_result.build_script_source}\n') + else: + build_text = 'Build script reuses `/src/build.bk.sh`.' + + # Format tips section + if classification: + good_lines = '\n'.join(f"- {line}" for line in classification["good"]) + bad_lines = '\n'.join(f"- {line}" for line in classification["bad"]) + tips = f"What to do (✓):\n{good_lines}\n\nWhat to avoid (✗):\n{bad_lines}" + else: + tips = "No specific suggestions found. Analyze the error log and apply best practices." + + # Load and format template + prompt = self._get_template(self.priming_template_file) + prompt = prompt.replace('{FUZZ_TARGET_SOURCE}', fuzz_target_source) + prompt = prompt.replace('{BUILD_TEXT}', build_text) + prompt = prompt.replace('{COMPILE_LOG}', self.compile_log) + prompt = prompt.replace('{FUNCTION_SIGNATURE}', function_signature) + prompt = prompt.replace('{PROJECT_DIR}', project_dir) + prompt = prompt.replace('{TIPS}', tips) + + self._prompt.append(prompt) + return self._prompt class CoverageAnalyzerTemplateBuilder(PrototyperTemplateBuilder): """Builder specifically targeted C (and excluding C++).""" diff --git a/prompts/agent/prototyper-error-classifier.txt b/prompts/agent/prototyper-error-classifier.txt new file mode 100644 index 0000000000..8b91af90ba --- /dev/null +++ b/prompts/agent/prototyper-error-classifier.txt @@ -0,0 +1,36 @@ +Failed to build fuzz target. Here is the fuzz target, build script, and compilation output: + + +{FUZZ_TARGET_SOURCE} + +{BUILD_TEXT} + +{COMPILE_LOG} + + +You are a careful, verification-first build fixer. +Your job is to identify the earliest blocking error and produce the minimal, correct changes to make the target compile. + +Rules (follow strictly): +1) Ground everything in evidence. Do NOT guess. Confirm every assumption (paths, headers, libraries, symbols) with Bash commands before changing code or the build script. +2) Respect the build system. Do not hand-tune environment/global flags outside the provided build script. Work through the build files (CMake/Make/…). +3) Fix the current stage only. If it’s a compile error, don’t propose link-only fixes, and vice versa. +4) Prefer minimal, surgical edits. Do not delete unrelated code. Keep the fuzz target structure intact and use the fuzzed input (`const uint8_t* data, size_t size`) to exercise the target. +5) Use existing working patterns. If similar files/targets show a working approach, mirror that configuration rather than inventing new flows. +6) Interpret file-not-found precisely: + - If it’s a header, consider include correctness and include paths. + - If it’s a source/object/archive (`.c/.cc/.o/.a`), treat it as a build path/configuration issue, not a header/include issue. +7) Explain briefly why your fix works (one or two sentences max). No long essays. + +YOU MUST first analyze the error messages with the fuzz target and the build script carefully to identify the root cause. +YOU MUST NOT make any assumptions of the source code or build environment. Always confirm assumptions with source code evidence, obtained via Bash commands. + +Once you are absolutely certain of the error root cause: +- Provide the FULL SOURCE CODE of the corrected fuzz target. +- If `/src/build.bk.sh` is insufficient, also provide the FULL SOURCE CODE of the updated build script. + +TIPS (binding; follow ✓ and avoid ✗): +{TIPS} + +Focus on writing a compilable fuzz target that calls the function-under-test {FUNCTION_SIGNATURE}. +Coverage and bug finding are NOT priorities now; successful compilation and correctness are. From 5a90aad7747e51786cd288107fb699d935749d01 Mon Sep 17 00:00:00 2001 From: iany0 <820447623@qq.com> Date: Fri, 15 Aug 2025 15:45:30 +1000 Subject: [PATCH 02/11] updated tips --- helper/error_patterns.yaml | 171 ++++++++++++++++++++++--------------- 1 file changed, 102 insertions(+), 69 deletions(-) diff --git a/helper/error_patterns.yaml b/helper/error_patterns.yaml index 46ba2d6918..ecc12491cd 100644 --- a/helper/error_patterns.yaml +++ b/helper/error_patterns.yaml @@ -15,27 +15,26 @@ INCLUDE ERROR: - "forward declaration of '.*'" - "cannot open source file" good: - - "Suggest including the standard header file for a missing type or function." - - "Suggest including a project-specific header file using quotes and the correct relative path." + - "Suggest the correct `#include` directive with the accurate path, considering relative paths from the fuzz target's location." + - "Suggest adding the necessary include directory to the compiler's search path (e.g., `-I/path/to/include`)." - "Leverage umbrella headers that might provide necessary include paths for other project headers." - - "Verify the actual location of header files and use the correct path in include directives." - "Remove unnecessary includes of internal headers that are not exposed by the public API." - - "Fallback to forward declarations or typedefs if the specific types are not directly accessible." + - "If the symbol is from a standard library, suggest the correct standard header." + - "If a symbol belongs to a specific library, ensure the library is linked in the build script." - "If a header file is not found, suggest adding an include search path to the build script using `-I`." - - "Distinguish between system headers (angle brackets) and local/project headers (quotes)." - - "If a standard library header is included with angle brackets in a project header, and the project provides its own version, consider replacing the angle brackets with quotes and a relative path." - - "If unsure about a header's location, suggest trying both angle brackets and quotes." + - "Suggest using include-what-you-use for better include management." + - "If multiple headers could provide the symbol, prioritize the most specific or commonly used one." bad: - "Not verifying if a header file exists in the standard include paths or if it requires a specific include path setting in the build script." - "Using absolute paths when relative paths would be more appropriate and portable." - - "Assuming that including a high-level header automatically resolves all dependencies without verifying its contents." + - "Adding unnecessary or irrelevant includes." + - "Removing include paths without ensuring the header is accessible." - "Insisting on including internal headers that are not meant for direct inclusion." + - "Assuming header file existence based on naming conventions." - "Not recognizing that the compiler's include search path might differ from the project's root directory." - - "Failing to account for the project's directory structure and include search paths." - - "Incorrectly assuming the location of a header file." - - "Not considering that a required header might be indirectly included through another header, leading to redundant includes." - - "Suggesting incorrect header files or paths." - - "Not considering that an undefined reference might stem from a missing library link rather than a missing header." + - "Copying headers to the fuzz target directory as a workaround." + - "Including source files (`.c`, `.cpp`) directly, leading to potential linker errors." + - "Not considering that the required header might be indirectly included through another header, leading to redundant includes." SYNTACTIC ERROR: patterns: @@ -76,28 +75,23 @@ SYNTACTIC ERROR: - 'class member cannot be redeclared' - 'expected member name or ''\;'' after declaration specifiers' good: - - "Include necessary header files for built-in types like `bool` (`stdbool.h`)." - "Remove code that accesses non-existent members of structs, simplifying logic if necessary." + - "Provide correct function prototypes if missing, including `extern \"C\"` if needed." + - "If the error involves a macro, explain its correct usage with concrete, minimal, working examples." + - "Ensure proper usage of parentheses, braces, and semicolons." + - "Check for mismatched types or incorrect function arguments." - "Replace undefined constants or macros with valid alternatives if available, or remove them if their usage is incorrect." - - "Use code search tools to locate the definition of undefined symbols and include the correct header." - - "Correctly identify and fix HTML/encoding issues with operators like `>=` or `<=`." - - "Define functions before their usage in struct initializers." - - "If a class needs to be instantiated, ensure it's done correctly before using its methods." - - "Correctly identify the issue of using a class name as a function name." - - "Inspect similar working examples to understand the correct usage pattern." + - "Use code search tools to locate the definition of undefined symbols within the project's source code and include the correct header file." + - "If the error involves templates or generics, verify correct instantiation." - "Remove redundant definitions of functions or variables if a 'redefinition' error occurs." bad: + - "Renaming functions or variables without understanding their purpose." + - "Making assumptions about type definitions or function signatures." - "Incorrectly assuming the existence of members in structs based on similar data structures." - - "Failing to recognize that certain constants or macros might be tied to specific types or contexts." - - "Relying on guesses about the project's structure to pick a header file." - - "Including incorrect headers based on partial matches or similar symbol names." - - "Ignoring that some types may be defined in-project but not exposed via public headers." - - "Guessing header locations without verifying." - - "Leaving related undefined types/symbols unresolved after a fix." - - "Not proposing a direct replacement for an incorrect function call once identified." - - "Suggesting adding include guards via build script (instead of fixing headers)." - - "Using a class name as a function name." - - "Ignoring namespace impacts." + - "Failing to recognize that certain constants or macros might be tied to specific data types or contexts and cannot be used interchangeably." + - "Relying solely on assumptions or incomplete knowledge of the project's structure to guess the header file." + - "Including incorrect header files based on partial matches or similar symbol names." + - "Not recognizing that certain types might be defined within the project but not exposed through the main public header files." UNDEFINED REFERENCE ERROR: patterns: @@ -109,24 +103,24 @@ UNDEFINED REFERENCE ERROR: - "error: undefined reference to `.*`" - "error: use of undeclared identifier '.*'" good: - - "Suggest including the standard header files that contain the declarations of the undefined functions." - - "Correctly identify the missing header based on the undefined function name." - - "Explain the missing declaration and its purpose clearly." - - "Suggest adding `extern \"C\"` to a fuzz target when C linkage is required." - - "If the symbol is in an external library, ensure it is linked in the build script." - - "Use `nm` to inspect symbols in objects/libraries." - - "Replicate linking approach from a similar working target." - - "If explicit linking fails, use build-system variables (e.g., `$LIB_FUZZING_ENGINE`) for the fuzzing engine." - - "Add include paths for headers containing the undefined symbols." - - "Remove unnecessary internal headers not exposed by the public API." + - "Correctly identify the undefined symbol and suggest including the header file containing its declaration." + - "If the symbol is in a library, verify that the library is linked correctly and in the proper order." + - "Suggest adding the `extern \"C\"` linkage specifier to a fuzz target function if it's missing and the linker cannot find the function." + - "Verify function signature compatibility between declaration and definition." + - "If the symbol is a class member, ensure the class is fully defined and accessible." + - "If the symbol is in a namespace, ensure the namespace is correctly used." + - "If the existing fuzz target works correctly, try to replicate its linking approach." + - "If the undefined reference is to a function in an external library, ensure the library is linked correctly in the build script." + - "If explicit linking fails, revert to using build system variables (like `$LIB_FUZZING_ENGINE`) for linking the fuzzing engine library." bad: - - "Linking random libraries without confirming the symbol actually lives there." - - "Adding prototypes without including headers or linking the defining libraries." - - "Changing compiler/linker flags when only source/build.sh edits are allowed." - - "Misreading a missing entry-point linker error as a source-code issue." - - "Blaming flags or libraries when the root cause is build misconfiguration." - - "Dismissing relevant diagnostics (e.g., debug info parse errors) without investigation." - - "Assuming declarations are in different headers when the linker indicates a missing definition." + - "Forward declaring a function without providing its definition." + - "Suggesting alternative functions without ensuring they have the required functionality." + - "Providing an incorrect definition for the undefined symbol." + - "Confusing free functions with member functions or static methods." + - "Not considering the possibility of missing library linkages." + - "Suggesting adding function prototypes without including the necessary header files or linking the libraries where the functions are defined." + - "Suggesting changes to compiler or linker flags when restricted to modifying only the source code or build script." + - "Failing to recognize that the undefined reference to the fuzz target entry point is a fundamental requirement and cannot be removed." LINKER ERROR: patterns: @@ -145,19 +139,20 @@ LINKER ERROR: - 'no such file or directory: .*\.a' - '/usr/bin/ld: cannot find -ljsoncpp' good: - - "After building a library, verify the location of the built library file." - - "Ensure the linker knows where to find libraries using `-L` and link with `-l`." - - "Link dependent libraries as well when needed." - - "If dynamic linking fails, try static variants if available." - - "Inspect the build system config (e.g., CMakeLists.txt, Makefile) to see how libraries are linked." - - "Place the C++ standard library and system libs after other libs in the link line." - - "Use verbose linker flags (e.g., `-v`) for more detail." + - "Check for inconsistencies in symbol declarations and definitions across different files." + - "Ensure that the linker knows where to find the built library using `-L`." + - "Link the library using `-l` followed by the library name." + - "If the library has dependencies, ensure those are also built and linked." + - "If dynamic linking fails, try relying on statically linked libraries if they are available." + - "Inspect the build system configuration (CMakeLists.txt, Makefile.am) to understand how libraries are linked." + - "Combine wildcard and explicit linking to ensure all necessary libraries are included." + - "Place the C++ standard library and other system libraries after other libraries in the linker command." bad: - - "Suggesting `.c/.cpp` inclusion directly (causing multiple definitions)." - - "Confusing linker errors with compile-time errors." - - "Failing to provide concrete library path/linking advice (`-L`, `-l`)." - - "Misclassifying multiple-definition errors as undefined references." - - "Suggesting irrelevant text changes when the issue is link-time." + - "Suggesting including `.c/.cpp` files directly, leading to multiple definition errors." + - "Misunderstanding the one definition rule." + - "Incorrectly stating that the linker is unable to find specific libraries when the error message doesn't mention those libraries." + - "Assuming the linker needs dynamic versions of libraries when static versions are already linked." + - "Not providing a concrete solution for missing library paths, such as using `-L`." BUILD_CONFIGURATION_ERROR: patterns: @@ -188,14 +183,52 @@ BUILD_CONFIGURATION_ERROR: - 'DWARF error: invalid or unhandled FORM value.*' - 'clang\+\+: error: no such file or directory: ''\$LIB_FUZZING_ENGINE''' good: - - "If CMake is used, check invocation and arguments." - - "Verify the correct generator (Ninja/Unix Makefiles) and presence of CMakeLists.txt." - - "Ensure the right source files are listed and targets are hooked up." - - "Make sure required packages are discoverable (set `_DIR` or CMAKE_PREFIX_PATH)." - - "Use verbose CMake output to diagnose configuration." - - "Check for missing dependencies and environment variables required by the build." + - "If CMake is being used, check how it is invoked and whether the correct arguments are being passed." + - "Verify that the correct source files are specified in the CMakeLists.txt file." + - "Check for missing or incorrect CMake commands or options." + - "If using a `find_package` command, ensure that the package's config file is accessible or that the `*_DIR` variable is set correctly." + - "If a variable is unbound, initialize it with the correct value or ensure it's set before use." + - "Ensure that environment variables are passed correctly to CMake or other build tools." bad: - - "Proposing linker/compile flag changes when only source and a limited build script are allowed." - - "Removing configure/bootstrap pieces without understanding their role." - - "Copying sources around to satisfy a missing-target error instead of fixing the build script." - - "Misclassifying build-system errors as include or linker issues." + - "Suggesting modifications to CMake commands or flags when the LLM is restricted to modifying only the source code or a limited build script." + - "Removing the sourcing of configure.sh without understanding its purpose." + - "Suggesting copying the fuzz target source file to the current directory as a solution to a 'no such file or directory' error." + - "Suggesting setting CFLAGS and CXXFLAGS before the configure step, potentially interfering with the configure process." + - "Incorrectly assuming the location of necessary files." + +CORRUPTED CODE ERROR: + patterns: + - "invalid preprocessing directive" + - "unexpected token" + - "unrecognized input" + - "junk after number" + - "unclosed comment" + - "unterminated comment" + - "unexpected end of file" + - "illegal start of expression" + - "missing terminating ' character" + - "missing terminating \" character" + - "segmentation fault" + - "corrupted" + good: + - "If the code is severely corrupted, suggest reverting to a previous working version." + - "If parts are salvageable, suggest specific changes to fix corrupted sections while preserving logic." + - "Identify and explain the reasons for the corruption (e.g., incorrect merge, bad generation)." + - "Prioritize preserving the original intent and structure of the code." + - "Test suggested changes to ensure they restore functionality." + - "If the corruption is due to missing code, try to generate the missing parts based on context." + - "If the code is partially generated, ensure consistency and correctness of generated parts." + - "Check for logical errors or inconsistencies introduced by the corruption." + - "If the corruption is related to data structures, verify their correctness and consistency." + - "If the cause is unclear, suggest debugging techniques to identify the corrupted sections." + bad: + - "Replacing the entire code with generic placeholder code." + - "Introducing new errors or making the code even more corrupted." + - "Not attempting to understand or preserve the original logic." + - "Failing to test changes or verify they improve the situation." + - "Providing inadequate reasoning that doesn't justify changes." + - "Making assumptions about the intended functionality of the corrupted code." + - "Not recognizing common corruption patterns (e.g., incorrect indentation, missing braces)." + - "Applying generic fixes without understanding the specific corruption." + - "Not considering the context of the corrupted code within the larger project." + - "Ignoring or dismissing the corruption without attempting a fix." \ No newline at end of file From 98415f6ccb9853c6eefdabfce688e4dc922c10c8 Mon Sep 17 00:00:00 2001 From: iany0 <820447623@qq.com> Date: Thu, 4 Sep 2025 14:31:56 +1000 Subject: [PATCH 03/11] enable RAG via --rag-classifier flag --- agent/prototyper.py | 60 ++++++++++++++++++++++++++++-------------- run_all_experiments.py | 4 +++ 2 files changed, 44 insertions(+), 20 deletions(-) diff --git a/agent/prototyper.py b/agent/prototyper.py index 5ab44afa9f..d9a9217f88 100644 --- a/agent/prototyper.py +++ b/agent/prototyper.py @@ -368,34 +368,54 @@ def _generate_prompt_from_build_result( # Preference 7: New fuzz target + both `build.sh`s cannot compile. No need # to mention the default build.sh. # return build_result - error_classifier = BuildErrorClassifier("helper/error_patterns.yaml") - classification = error_classifier.classify(compile_log) - logger.debug("=== Compilation Log Start ===\n%s\n=== Compilation Log End ===", compile_log, trial=build_result.trial) - - if classification: - logger.info("RAG match: identified build error type %s", classification["type"], trial=build_result.trial) - builder = prompt_builder.PrototyperErrorClassifierTemplateBuilder( + rag_enabled = False + try: + rag_enabled = bool(getattr(self, 'args', None)) and bool(getattr(self.args, 'rag_classifier', False)) + except Exception: + rag_enabled = False + if rag_enabled: + # Use RAG-based classifier to build a targeted prompt. + error_classifier = BuildErrorClassifier("helper/error_patterns.yaml") + classification = error_classifier.classify(compile_log) + logger.debug("=== Compilation Log Start ===\n%s\n=== Compilation Log End ===", compile_log, trial=build_result.trial) + + if classification: + logger.info("RAG match: identified build error type %s", classification["type"], trial=build_result.trial) + builder = prompt_builder.PrototyperErrorClassifierTemplateBuilder( + model=self.llm, + benchmark=build_result.benchmark, + build_result=build_result, + compile_log=compile_log, + error_classifier=error_classifier, + initial=prompt.get() + ) + prompt = builder.build(project_dir=self.inspect_tool.project_dir) + return build_result, prompt + + # If RAG could not classify, fall back to generic fixer template. + logger.warning("RAG match: classification failed, no error type matched", trial=build_result.trial) + builder = prompt_builder.PrototyperFixerTemplateBuilder( model=self.llm, benchmark=build_result.benchmark, build_result=build_result, compile_log=compile_log, - error_classifier=error_classifier, initial=prompt.get() ) - prompt = builder.build(project_dir=self.inspect_tool.project_dir) + prompt = builder.build(example_pair=[], project_dir=self.inspect_tool.project_dir) return build_result, prompt - # Fallback: uncategorized error - logger.warning("RAG match: classification failed, no error type matched", trial=build_result.trial) - builder = prompt_builder.PrototyperFixerTemplateBuilder( - model=self.llm, - benchmark=build_result.benchmark, - build_result=build_result, - compile_log=compile_log, - initial=prompt.get()) - prompt = builder.build(example_pair=[], - project_dir=self.inspect_tool.project_dir) - return build_result, prompt + else: + # RAG disabled -> always use the generic fixer template. + logger.info("RAG classifier disabled (no --rag-classifier flag); using FixerTemplateBuilder.", trial=build_result.trial) + builder = prompt_builder.PrototyperFixerTemplateBuilder( + model=self.llm, + benchmark=build_result.benchmark, + build_result=build_result, + compile_log=compile_log, + initial=prompt.get() + ) + prompt = builder.build(example_pair=[], project_dir=self.inspect_tool.project_dir) + return build_result, prompt def _container_handle_conclusion(self, cur_round: int, response: str, build_result: BuildResult, diff --git a/run_all_experiments.py b/run_all_experiments.py index 5568d4f55f..f4c2a374e5 100755 --- a/run_all_experiments.py +++ b/run_all_experiments.py @@ -251,6 +251,10 @@ def parse_args() -> argparse.Namespace: action='store_true', default=False, help='Enables agent enhancement.') + parser.add_argument('-rc', + '--rag-classifier', + action='store_true', + help='Enable the RAG-based build error classifier (off by default).') parser.add_argument('--custom-pipeline', type=str, default='') parser.add_argument('-mr', '--max-round', From 69fada59acdb8123e3245354b72ecefd007aec5b Mon Sep 17 00:00:00 2001 From: Wentao Gao <89055397+wenta0g@users.noreply.github.com> Date: Mon, 15 Sep 2025 18:21:44 +1000 Subject: [PATCH 04/11] update memory for build script related error --- helper/error_patterns.yaml | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/helper/error_patterns.yaml b/helper/error_patterns.yaml index ecc12491cd..b68b160f21 100644 --- a/helper/error_patterns.yaml +++ b/helper/error_patterns.yaml @@ -183,18 +183,21 @@ BUILD_CONFIGURATION_ERROR: - 'DWARF error: invalid or unhandled FORM value.*' - 'clang\+\+: error: no such file or directory: ''\$LIB_FUZZING_ENGINE''' good: - - "If CMake is being used, check how it is invoked and whether the correct arguments are being passed." - - "Verify that the correct source files are specified in the CMakeLists.txt file." - - "Check for missing or incorrect CMake commands or options." - - "If using a `find_package` command, ensure that the package's config file is accessible or that the `*_DIR` variable is set correctly." - - "If a variable is unbound, initialize it with the correct value or ensure it's set before use." - - "Ensure that environment variables are passed correctly to CMake or other build tools." + - "Inspect build system config files (Makefile, CMakeLists.txt, meson.build, configure.ac)." + - "Verify build rules and target dependencies are correctly defined and ordered." + - "Check for missing dependencies/incorrect paths; install exact packages (e.g., libssl-dev, zlib1g-dev)." + - "Clean build dir and rebuild (e.g., rm -rf build && mkdir build)." + - "Set compiler/linker flags via build system (CMAKE_C_FLAGS, target_link_libraries)." + - "Check environment variables (PKG_CONFIG_PATH, CMAKE_PREFIX_PATH, LD_LIBRARY_PATH)." + - "Leverage official project scripts (oss-fuzz.sh, build.sh) when available." bad: - "Suggesting modifications to CMake commands or flags when the LLM is restricted to modifying only the source code or a limited build script." - "Removing the sourcing of configure.sh without understanding its purpose." - "Suggesting copying the fuzz target source file to the current directory as a solution to a 'no such file or directory' error." - "Suggesting setting CFLAGS and CXXFLAGS before the configure step, potentially interfering with the configure process." - - "Incorrectly assuming the location of necessary files." + - "Incorrectly assuming the location of necessary files, the fuzz target is modified in place from the original fuzz target." + - "Using repeated or incorrect sed commands on build files, causing accumulated or broken changes." + CORRUPTED CODE ERROR: patterns: @@ -231,4 +234,4 @@ CORRUPTED CODE ERROR: - "Not recognizing common corruption patterns (e.g., incorrect indentation, missing braces)." - "Applying generic fixes without understanding the specific corruption." - "Not considering the context of the corrupted code within the larger project." - - "Ignoring or dismissing the corruption without attempting a fix." \ No newline at end of file + - "Ignoring or dismissing the corruption without attempting a fix." From d211c5a3b1ec99bc1efcab5867cc1d137a296b5c Mon Sep 17 00:00:00 2001 From: Wentao Gao <89055397+wenta0g@users.noreply.github.com> Date: Mon, 15 Sep 2025 18:34:15 +1000 Subject: [PATCH 05/11] update memory for build script related error --- helper/error_patterns.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/helper/error_patterns.yaml b/helper/error_patterns.yaml index b68b160f21..6d6e6d1d2f 100644 --- a/helper/error_patterns.yaml +++ b/helper/error_patterns.yaml @@ -197,6 +197,8 @@ BUILD_CONFIGURATION_ERROR: - "Suggesting setting CFLAGS and CXXFLAGS before the configure step, potentially interfering with the configure process." - "Incorrectly assuming the location of necessary files, the fuzz target is modified in place from the original fuzz target." - "Using repeated or incorrect sed commands on build files, causing accumulated or broken changes." + - "Overcomplicating build processes when official project script commands suffice." + - "Hardcoding invalid include paths, flags, or library names." CORRUPTED CODE ERROR: From 0624d656d91a092b740938ee5a7502764cae9352 Mon Sep 17 00:00:00 2001 From: iany0 <820447623@qq.com> Date: Tue, 16 Sep 2025 17:55:47 +1000 Subject: [PATCH 06/11] refined template --- helper/error_patterns.yaml | 5 ++++- prompts/agent/prototyper-error-classifier.txt | 8 ++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/helper/error_patterns.yaml b/helper/error_patterns.yaml index 6d6e6d1d2f..16effc7454 100644 --- a/helper/error_patterns.yaml +++ b/helper/error_patterns.yaml @@ -154,7 +154,7 @@ LINKER ERROR: - "Assuming the linker needs dynamic versions of libraries when static versions are already linked." - "Not providing a concrete solution for missing library paths, such as using `-L`." -BUILD_CONFIGURATION_ERROR: +BUILD CONFIGURATION ERROR: patterns: - 'make: \*\*\* No rule to make target .*' - 'CMake Error:.*' @@ -182,6 +182,9 @@ BUILD_CONFIGURATION_ERROR: - '\./aom_configure: No such file or directory' - 'DWARF error: invalid or unhandled FORM value.*' - 'clang\+\+: error: no such file or directory: ''\$LIB_FUZZING_ENGINE''' + - 'configure: error: .* (required|dependency) .* (not found|not available|missing)' + - 'configure: error: .* but required .* (library|package).* not available' + - 'configure: error: .* requires .* (but it is not installed|not found|not available)' good: - "Inspect build system config files (Makefile, CMakeLists.txt, meson.build, configure.ac)." - "Verify build rules and target dependencies are correctly defined and ordered." diff --git a/prompts/agent/prototyper-error-classifier.txt b/prompts/agent/prototyper-error-classifier.txt index 8b91af90ba..4999e24617 100644 --- a/prompts/agent/prototyper-error-classifier.txt +++ b/prompts/agent/prototyper-error-classifier.txt @@ -11,6 +11,9 @@ Failed to build fuzz target. Here is the fuzz target, build script, and compilat You are a careful, verification-first build fixer. Your job is to identify the earliest blocking error and produce the minimal, correct changes to make the target compile. +PINNED (read before doing anything) +- TIPS are **binding**. You must apply relevant ✓ items from **TIPS (binding)** below and avoid ✗ items. + Rules (follow strictly): 1) Ground everything in evidence. Do NOT guess. Confirm every assumption (paths, headers, libraries, symbols) with Bash commands before changing code or the build script. 2) Respect the build system. Do not hand-tune environment/global flags outside the provided build script. Work through the build files (CMake/Make/…). @@ -34,3 +37,8 @@ TIPS (binding; follow ✓ and avoid ✗): Focus on writing a compilable fuzz target that calls the function-under-test {FUNCTION_SIGNATURE}. Coverage and bug finding are NOT priorities now; successful compilation and correctness are. + +Process (do this in order): +A) Diagnose the **earliest blocking error** from the log. +B) Run only minimal verification commands to confirm the cause (e.g., `ls`, `grep`, `pkg-config --cflags --libs`, `cmake --version`, etc.). Show exact command outputs. +C) Propose the **smallest** change to either the fuzz target or the provided build script that resolves that error. \ No newline at end of file From 70c556eeb7a686acb82b5c35f5c25a6f9b624922 Mon Sep 17 00:00:00 2001 From: iany0 <820447623@qq.com> Date: Tue, 16 Sep 2025 18:57:36 +1000 Subject: [PATCH 07/11] changed conflicting arg --- run_all_experiments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_all_experiments.py b/run_all_experiments.py index f4c2a374e5..36a971c92d 100755 --- a/run_all_experiments.py +++ b/run_all_experiments.py @@ -251,7 +251,7 @@ def parse_args() -> argparse.Namespace: action='store_true', default=False, help='Enables agent enhancement.') - parser.add_argument('-rc', + parser.add_argument('-rag', '--rag-classifier', action='store_true', help='Enable the RAG-based build error classifier (off by default).') From f3fe27ebbb94520b40524561c425327b98da53f6 Mon Sep 17 00:00:00 2001 From: iany0 <820447623@qq.com> Date: Tue, 16 Sep 2025 20:38:20 +1000 Subject: [PATCH 08/11] rag default true --- run_all_experiments.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/run_all_experiments.py b/run_all_experiments.py index 36a971c92d..fc827c72b6 100755 --- a/run_all_experiments.py +++ b/run_all_experiments.py @@ -254,7 +254,8 @@ def parse_args() -> argparse.Namespace: parser.add_argument('-rag', '--rag-classifier', action='store_true', - help='Enable the RAG-based build error classifier (off by default).') + default=True, + help='Enable the RAG-based build error classifier (default: on).') parser.add_argument('--custom-pipeline', type=str, default='') parser.add_argument('-mr', '--max-round', @@ -531,6 +532,9 @@ def main(): args = parse_args() _setup_logging(args.log_level, is_cloud=args.cloud_experiment_name != '') + logger.info('[dbg] agent=%s rag_classifier=%s model=%s bench_yaml=%s bench_dir=%s', + args.agent, args.rag_classifier, args.model, + getattr(args, 'benchmark_yaml', ''), getattr(args, 'benchmarks_directory', '')) logger.info('Starting experiments on PR branch') # Capture time at start From 20f80e414807ced8f4d0446f74e1d97c7604f36f Mon Sep 17 00:00:00 2001 From: iany0 <820447623@qq.com> Date: Fri, 19 Sep 2025 14:13:47 +1000 Subject: [PATCH 09/11] rollback rag flag --- ci/k8s/pr-exp.yaml | 2 +- run_all_experiments.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/k8s/pr-exp.yaml b/ci/k8s/pr-exp.yaml index e14ec68ea1..46fd09b808 100644 --- a/ci/k8s/pr-exp.yaml +++ b/ci/k8s/pr-exp.yaml @@ -48,7 +48,7 @@ spec: name: results-volume env: - name: LLM_NUM_EXP - value: '40' + value: '20' - name: LLM_NUM_EVA value: '10' - name: VERTEX_AI_LOCATIONS diff --git a/run_all_experiments.py b/run_all_experiments.py index fc827c72b6..eef060baf8 100755 --- a/run_all_experiments.py +++ b/run_all_experiments.py @@ -254,8 +254,8 @@ def parse_args() -> argparse.Namespace: parser.add_argument('-rag', '--rag-classifier', action='store_true', - default=True, - help='Enable the RAG-based build error classifier (default: on).') + default=False, + help='Enable the RAG-based build error classifier (default: off).') parser.add_argument('--custom-pipeline', type=str, default='') parser.add_argument('-mr', '--max-round', From 0e92d44dd67bf7ad6dd4b1a7e5a562d46f7f4580 Mon Sep 17 00:00:00 2001 From: iany0 <820447623@qq.com> Date: Mon, 29 Sep 2025 01:25:43 +1000 Subject: [PATCH 10/11] default true to avoid extra args --- run_all_experiments.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/run_all_experiments.py b/run_all_experiments.py index eef060baf8..2ad1471e39 100755 --- a/run_all_experiments.py +++ b/run_all_experiments.py @@ -254,8 +254,8 @@ def parse_args() -> argparse.Namespace: parser.add_argument('-rag', '--rag-classifier', action='store_true', - default=False, - help='Enable the RAG-based build error classifier (default: off).') + default=True, + help='Enable the RAG-based build error classifier (default: on).') parser.add_argument('--custom-pipeline', type=str, default='') parser.add_argument('-mr', '--max-round', @@ -532,9 +532,9 @@ def main(): args = parse_args() _setup_logging(args.log_level, is_cloud=args.cloud_experiment_name != '') - logger.info('[dbg] agent=%s rag_classifier=%s model=%s bench_yaml=%s bench_dir=%s', - args.agent, args.rag_classifier, args.model, - getattr(args, 'benchmark_yaml', ''), getattr(args, 'benchmarks_directory', '')) + # logger.info('[dbg] agent=%s rag_classifier=%s model=%s bench_yaml=%s bench_dir=%s', + # args.agent, args.rag_classifier, args.model, + # getattr(args, 'benchmark_yaml', ''), getattr(args, 'benchmarks_directory', '')) logger.info('Starting experiments on PR branch') # Capture time at start From 5510ec3385fb0b2865ab667201c30729f17c6461 Mon Sep 17 00:00:00 2001 From: iany0 <820447623@qq.com> Date: Tue, 28 Oct 2025 23:00:53 +1100 Subject: [PATCH 11/11] memories from Gemini2.5 logs & updated error type classification --- agent/prototyper.py | 2 +- helper/error_classifier.py | 31 ++++++- helper/error_patterns.yaml | 160 ++++++++++++++++--------------------- 3 files changed, 102 insertions(+), 91 deletions(-) diff --git a/agent/prototyper.py b/agent/prototyper.py index d9a9217f88..eb8be76301 100644 --- a/agent/prototyper.py +++ b/agent/prototyper.py @@ -376,7 +376,7 @@ def _generate_prompt_from_build_result( if rag_enabled: # Use RAG-based classifier to build a targeted prompt. error_classifier = BuildErrorClassifier("helper/error_patterns.yaml") - classification = error_classifier.classify(compile_log) + classification = error_classifier.classify_by_line(compile_log, trial=build_result.trial) logger.debug("=== Compilation Log Start ===\n%s\n=== Compilation Log End ===", compile_log, trial=build_result.trial) if classification: diff --git a/helper/error_classifier.py b/helper/error_classifier.py index a6d40c43d6..9b9bd4c2f7 100644 --- a/helper/error_classifier.py +++ b/helper/error_classifier.py @@ -1,5 +1,6 @@ import re import yaml +import logger class BuildErrorClassifier: def __init__(self, error_db_path: str): @@ -17,6 +18,34 @@ def classify(self, compile_log: str) -> dict | None: } return None + def classify_by_line(self, compile_log: str, trial: int | None = None) -> dict | None: + """Return the first matching line's classification (bottom-up).""" + compile_log = compile_log or "" + + lines = compile_log.splitlines() + total_lines = len(lines) + + for rev_idx, line in enumerate(reversed(lines), start=1): + line_no = total_lines - rev_idx + 1 + for error_type, data in self.error_db.items(): + for pattern in data.get("patterns", []): + try: + if re.search(pattern, line, re.IGNORECASE): + logger.info(f"[DEBUG] Line {line_no}: matched {error_type}", trial=trial) + logger.info(f" └─ {line.strip()}", trial=trial) + return { + "type": error_type, + "good": data.get("good", []), + "bad": data.get("bad", []), + "matched_line": line.strip(), + "line_no": line_no, + } + except re.error: + logger.warning(f"[WARN] invalid regex: {pattern}", trial=trial) + continue + + return None + def _find_first_error_msg(self, compile_log: str) -> str | None: match = re.search(r"(.*?)", compile_log, re.DOTALL) if match: @@ -39,7 +68,7 @@ def trim_and_classify_err_msg(self, compile_log:str) -> dict | None: try: match = re.search(pattern, compile_log, re.IGNORECASE) except Exception: - print(f"Error with pattern: {pattern}") + logger.info(f"Error with pattern: {pattern}") continue if match: return { diff --git a/helper/error_patterns.yaml b/helper/error_patterns.yaml index 16effc7454..5e8a965b48 100644 --- a/helper/error_patterns.yaml +++ b/helper/error_patterns.yaml @@ -15,26 +15,16 @@ INCLUDE ERROR: - "forward declaration of '.*'" - "cannot open source file" good: - - "Suggest the correct `#include` directive with the accurate path, considering relative paths from the fuzz target's location." - - "Suggest adding the necessary include directory to the compiler's search path (e.g., `-I/path/to/include`)." - - "Leverage umbrella headers that might provide necessary include paths for other project headers." - - "Remove unnecessary includes of internal headers that are not exposed by the public API." - - "If the symbol is from a standard library, suggest the correct standard header." - - "If a symbol belongs to a specific library, ensure the library is linked in the build script." - - "If a header file is not found, suggest adding an include search path to the build script using `-I`." - - "Suggest using include-what-you-use for better include management." - - "If multiple headers could provide the symbol, prioritize the most specific or commonly used one." + - "Use repository-relative includes: compute the correct path from the fuzz target, verify the header exists, and include the shortest include-able path." + - "Add a single target-scoped `-I` that points exactly to the header's root; avoid broad/global include path changes." + - "Prefer public/umbrella headers; remove private/internal headers only if unnecessary and ensure required declarations remain available." + - "For standard library symbols, include the exact standard header (e.g., `memcpy` → ``/``, `size_t` → ``)." + - "If compilation succeeds but linking reports undefined references, add the correct `-l` and, if needed, `-L`; treat this as a link step fix, not an include fix." bad: - - "Not verifying if a header file exists in the standard include paths or if it requires a specific include path setting in the build script." - - "Using absolute paths when relative paths would be more appropriate and portable." - - "Adding unnecessary or irrelevant includes." - - "Removing include paths without ensuring the header is accessible." - - "Insisting on including internal headers that are not meant for direct inclusion." - - "Assuming header file existence based on naming conventions." - - "Not recognizing that the compiler's include search path might differ from the project's root directory." - - "Copying headers to the fuzz target directory as a workaround." - - "Including source files (`.c`, `.cpp`) directly, leading to potential linker errors." - - "Not considering that the required header might be indirectly included through another header, leading to redundant includes." + - "Do not include source files (`.c`/`.cpp`) directly to bypass missing headers." + - "Do not copy headers into the fuzz target directory as a workaround; fix the include path instead." + - "Do not assume the output location of built libraries; check the actual build output before setting `-L` or link flags." + - "Do not submit changes that leave the original compile or link error unresolved; rebuild and confirm the error is cleared." SYNTACTIC ERROR: patterns: @@ -48,6 +38,7 @@ SYNTACTIC ERROR: - 'error: assigning to '' .* '' from incompatible type '' .* ''' - 'error: no member named '' .* '' in '' .* ''' - 'no matching function for call to' + - 'no matching member function for call to' - 'candidate function not viable' - 'requires \d+ arguments, but \d+ were provided' - 'incompatible pointer to .* conversion' @@ -74,24 +65,27 @@ SYNTACTIC ERROR: - 'This header is only to be used internally to libarchive\.' - 'class member cannot be redeclared' - 'expected member name or ''\;'' after declaration specifiers' + - 'error: definition of type ''[^'']+'' conflicts with typedef of the same name' + - 'error: exception specification in declaration does not match previous declaration' + - 'error: static declaration of ''[^'']+'' follows non-static declaration' + - "error: member reference base type .* is not a structure or union" + - "mixing declarations and code is incompatible with standards before C99" + - "out-of-line definition of '.*' does not match any declaration" + - "error: alias must point to a defined variable or function" + - "error: the function or variable specified in an alias must refer to its mangled name" + - "error:\\s*'.+?'\\s+is a\\s+(?:private|protected)\\s+member of\\s+'.+?'" good: - - "Remove code that accesses non-existent members of structs, simplifying logic if necessary." - - "Provide correct function prototypes if missing, including `extern \"C\"` if needed." - - "If the error involves a macro, explain its correct usage with concrete, minimal, working examples." - - "Ensure proper usage of parentheses, braces, and semicolons." - - "Check for mismatched types or incorrect function arguments." - - "Replace undefined constants or macros with valid alternatives if available, or remove them if their usage is incorrect." - - "Use code search tools to locate the definition of undefined symbols within the project's source code and include the correct header file." - - "If the error involves templates or generics, verify correct instantiation." - - "Remove redundant definitions of functions or variables if a 'redefinition' error occurs." + - "Fix core syntax first: add missing semicolons, match braces/parentheses, remove stray commas/tokens; rebuild to confirm the first error disappears." + - "Resolve undeclared identifiers/types/macros by locating their in-repo definitions and including the correct header or qualifying the name; for C libraries included in C++ code, wrap the include with `extern \"C\"`." + - "For 'no member named …' errors, open the struct/class definition in the repo and replace the invalid access with an existing public member or provided accessor." + - "Match function signatures and argument types to the declaration found in headers; adjust prototypes/calls accordingly rather than guessing." + - "Use project macros exactly as shown in in-repo examples: supply required arguments, and wrap statement-like macros in braces to keep scope correct." + - "Remove duplicate definitions causing 'redefinition' errors; keep a single definition and rely on the header for declarations." bad: - - "Renaming functions or variables without understanding their purpose." - - "Making assumptions about type definitions or function signatures." - - "Incorrectly assuming the existence of members in structs based on similar data structures." - - "Failing to recognize that certain constants or macros might be tied to specific data types or contexts and cannot be used interchangeably." - - "Relying solely on assumptions or incomplete knowledge of the project's structure to guess the header file." - - "Including incorrect header files based on partial matches or similar symbol names." - - "Not recognizing that certain types might be defined within the project but not exposed through the main public header files." + - "Do not invent members, signatures, or macro arguments; confirm them from headers or in-repo examples before changing code." + - "Do not silence errors by adding ad-hoc prototypes or defining missing members/functions that are not declared by the library." + - "Do not modify upstream project sources or unrelated build targets when fixing a syntactic error in the fuzz target." + - "Do not apply superficial fixes (e.g., adding a semicolon) when the macro or construct changes statement/block structure; fix the real structure." UNDEFINED REFERENCE ERROR: patterns: @@ -103,24 +97,17 @@ UNDEFINED REFERENCE ERROR: - "error: undefined reference to `.*`" - "error: use of undeclared identifier '.*'" good: - - "Correctly identify the undefined symbol and suggest including the header file containing its declaration." - - "If the symbol is in a library, verify that the library is linked correctly and in the proper order." - - "Suggest adding the `extern \"C\"` linkage specifier to a fuzz target function if it's missing and the linker cannot find the function." - - "Verify function signature compatibility between declaration and definition." - - "If the symbol is a class member, ensure the class is fully defined and accessible." - - "If the symbol is in a namespace, ensure the namespace is correctly used." - - "If the existing fuzz target works correctly, try to replicate its linking approach." - - "If the undefined reference is to a function in an external library, ensure the library is linked correctly in the build script." - - "If explicit linking fails, revert to using build system variables (like `$LIB_FUZZING_ENGINE`) for linking the fuzzing engine library." + - "Make the missing symbol linkable: compile the needed source or add the providing library with -l..., include its search path with -L..., and keep link order correct (objects first, deps after)." + - "Include the correct header(s) that declare the symbol; replace any direct .cpp inclusions with headers and bring in required transitive includes." + - "Match the exact API: correct function signature, namespace/class membership, and identifier case; don't invent overloads." + - "For C++ calling C APIs, wrap the C headers (or a shim) with extern \"C\" to avoid name-mangling issues." + - "If the symbol lives in a sub-library or is gated by feature macros, link that sub-library and enable the macro via build flags (-D...) or a #define placed before relevant includes." + - "Mirror a known-working project setup (existing fuzzers/build scripts), including using the build system's fuzzing engine variable (e.g., $LIB_FUZZING_ENGINE) when appropriate." + - "Ensure the fuzz entrypoint LLVMFuzzerTestOneInput exists exactly once with the correct signature and is linked with the fuzzing engine." bad: - - "Forward declaring a function without providing its definition." - - "Suggesting alternative functions without ensuring they have the required functionality." - - "Providing an incorrect definition for the undefined symbol." - - "Confusing free functions with member functions or static methods." - - "Not considering the possibility of missing library linkages." - - "Suggesting adding function prototypes without including the necessary header files or linking the libraries where the functions are defined." - - "Suggesting changes to compiler or linker flags when restricted to modifying only the source code or build script." - - "Failing to recognize that the undefined reference to the fuzz target entry point is a fundamental requirement and cannot be removed." + - "Do not try to 'fix' undefined references by adding only forward declarations or by inventing stub definitions." + - "Do not suggest alternative functions without ensuring they have the required functionality." + - "Do not include .cpp files directly or add random headers that cause conflicting types." LINKER ERROR: patterns: @@ -139,22 +126,18 @@ LINKER ERROR: - 'no such file or directory: .*\.a' - '/usr/bin/ld: cannot find -ljsoncpp' good: - - "Check for inconsistencies in symbol declarations and definitions across different files." - - "Ensure that the linker knows where to find the built library using `-L`." - - "Link the library using `-l` followed by the library name." - - "If the library has dependencies, ensure those are also built and linked." - - "If dynamic linking fails, try relying on statically linked libraries if they are available." - - "Inspect the build system configuration (CMakeLists.txt, Makefile.am) to understand how libraries are linked." - - "Combine wildcard and explicit linking to ensure all necessary libraries are included." - - "Place the C++ standard library and other system libraries after other libraries in the linker command." + - "Ensure the missing symbol's definition is linked: compile needed sources or add the providing library with -l..., and add required search paths with -L.... If dynamic linking still fails at runtime, link available static archives by full path." + - "Derive required libraries/flags from the project's own build files and include needed sub-libraries when the API lives outside the primary library." + - "Order link inputs correctly: place objects/targets first, then their dependent libraries; put system/C++ standard libraries last." + - "Use proper artifacts (.a/.so) and avoid libtool .la files on the link line." + - "When C++ calls C APIs, wrap the C headers with extern \"C\" to avoid name-mangling issues." + - "Scope sanitizer/fuzzer flags to the fuzz target; build project libraries with their normal flags to avoid duplicate/conflicting symbols." bad: - - "Suggesting including `.c/.cpp` files directly, leading to multiple definition errors." - - "Misunderstanding the one definition rule." - - "Incorrectly stating that the linker is unable to find specific libraries when the error message doesn't mention those libraries." - - "Assuming the linker needs dynamic versions of libraries when static versions are already linked." - - "Not providing a concrete solution for missing library paths, such as using `-L`." + - "Do not include .c/.cpp files directly into the fuzz target—compile them separately and link the outputs." + - "Do not remove functional code (e.g., cleanup/free) as a primary fix; only consider after standard linking steps are exhausted." + - "Do not attempt to fix 'undefined reference' errors by only adding forward declarations; this addresses compiler-level declaration issues, not missing definitions at link time." -BUILD CONFIGURATION ERROR: +BUILD SCRIPT ERROR: patterns: - 'make: \*\*\* No rule to make target .*' - 'CMake Error:.*' @@ -162,46 +145,45 @@ BUILD CONFIGURATION ERROR: - 'CMAKE_(C|CXX)_COMPILER not set' - 'ninja: error: loading ''build\.ninja'': No such file or directory' - 'The source directory .* does not appear to contain CMakeLists\.txt' - - 'Policy CMP\d+ is not set' - - 'The OLD behavior for policy CMP\d+ will be removed' - - 'This warning is for project developers\. *Use -Wno-dev to suppress it' - '/src/build\.sh: line \d+: syntax error' - 'sed: -e expression #1, char \d+: (Invalid content of \{\}|extra characters after command)' - 'sed: can''t read.*No such file or directory' - - '/src/build\.sh: line \d+: unbound variable' - - 'unbound variable' + - '/src/build\.sh: line \d+:( .*:)? unbound variable' - 'configure: error: .* not found' - - 'configure: WARNING: .*' - - 'autoreconf: ''configure\.(ac|in)'' is required' + - "autoreconf:\\s*['\\\"]configure\\.(?:ac|in)['\\\"].*is required" - 'Could not find a package configuration file provided by .*' - 'CMake Error at CMakeLists\.txt:\d+ \(find_package\)' - 'By not providing "Find.*\.cmake" in CMAKE_MODULE_PATH.*' - - 'debconf: delaying package configuration, since apt-utils is not installed' - '/src/build\.sh: line \d+: fuzzer/CMakeLists\.txt: No such file or directory' - - 'WARNING: png library not available - no png\.h' - '\./aom_configure: No such file or directory' - - 'DWARF error: invalid or unhandled FORM value.*' - - 'clang\+\+: error: no such file or directory: ''\$LIB_FUZZING_ENGINE''' - 'configure: error: .* (required|dependency) .* (not found|not available|missing)' - 'configure: error: .* but required .* (library|package).* not available' - 'configure: error: .* requires .* (but it is not installed|not found|not available)' + - 'configure: error: C compiler cannot create executables' + - 'configure: error: in `.*`:' + - 'CMake Error at .*' + - 'CMake Generate step failed' + - 'No SOURCES given to target: .*' + - '^\.\/autogen\.sh:\s+\d+:\s+\S+: not found' + - '^libtool:\s+error:\s+cannot find the library\s+["'']?.*\.la["'']?' + - '^libtool:\s+error:\s+unhandled argument\s+["'']?.*\.la["'']?' + - '^Makefile\.am:\d+:\s+error:.*' good: + - "Systematically identify and enable/disable all necessary CMake options that conditionally define targets, recognizing interdependencies and potential conflicts." + - "Investigate specialized build directories (e.g., oss-fuzz/) and consider altering the cmake source directory if iterative flag changes fail." + - "Verify make target names precisely match definitions, including full relative paths for nested targets, using cp commands as clues." + - "Ensure all necessary libraries and object files are explicitly linked, especially when modifying build loops or target definitions." + - "Recognize and adapt to project-specific build setups for fuzzing or testing, which may differ from general project builds." - "Inspect build system config files (Makefile, CMakeLists.txt, meson.build, configure.ac)." - - "Verify build rules and target dependencies are correctly defined and ordered." - - "Check for missing dependencies/incorrect paths; install exact packages (e.g., libssl-dev, zlib1g-dev)." - - "Clean build dir and rebuild (e.g., rm -rf build && mkdir build)." - "Set compiler/linker flags via build system (CMAKE_C_FLAGS, target_link_libraries)." - - "Check environment variables (PKG_CONFIG_PATH, CMAKE_PREFIX_PATH, LD_LIBRARY_PATH)." - "Leverage official project scripts (oss-fuzz.sh, build.sh) when available." bad: - - "Suggesting modifications to CMake commands or flags when the LLM is restricted to modifying only the source code or a limited build script." - - "Removing the sourcing of configure.sh without understanding its purpose." - - "Suggesting copying the fuzz target source file to the current directory as a solution to a 'no such file or directory' error." - - "Suggesting setting CFLAGS and CXXFLAGS before the configure step, potentially interfering with the configure process." + - "Do not remove required setup steps (e.g., sourcing configure.sh or running configure) without providing an equivalent in the build script." + - "Avoid blindly adding or removing CMake flags without understanding their purpose or impact, especially if the error message persists. Do not assume a single flag is a silver bullet." + - "Do not assume a new build strategy will work without verifying intermediate steps (e.g., cmake successfully generating a Makefile). Do not assume a library's main archive links all internal symbols." + - "Do not suggest modifying source code when the error message clearly indicates a build system configuration problem." - "Incorrectly assuming the location of necessary files, the fuzz target is modified in place from the original fuzz target." - - "Using repeated or incorrect sed commands on build files, causing accumulated or broken changes." - - "Overcomplicating build processes when official project script commands suffice." - - "Hardcoding invalid include paths, flags, or library names." + - "Do not use repeated or incorrect sed commands on build files, causing accumulated or broken changes." CORRUPTED CODE ERROR: @@ -239,4 +221,4 @@ CORRUPTED CODE ERROR: - "Not recognizing common corruption patterns (e.g., incorrect indentation, missing braces)." - "Applying generic fixes without understanding the specific corruption." - "Not considering the context of the corrupted code within the larger project." - - "Ignoring or dismissing the corruption without attempting a fix." + - "Ignoring or dismissing the corruption without attempting a fix." \ No newline at end of file