diff --git a/.gitignore b/.gitignore index 85a8bb1..776b95e 100644 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,6 @@ run_nagini.py /tmp results results/* +/log_tries/ +/log_tries/* .direnv diff --git a/.gitmodules b/.gitmodules index cb6a2f1..e049f86 100644 --- a/.gitmodules +++ b/.gitmodules @@ -5,3 +5,7 @@ [submodule "benches/HumanEval-Dafny"] path = benches/HumanEval-Dafny url = https://github.com/JetBrains-Research/HumanEval-Dafny + +[submodule "benches/HumanEval-Nagini"] + path = benches/HumanEval-Nagini + url = https://github.com/JetBrains-Research/HumanEval-Nagini diff --git a/benches/HumanEval-Nagini b/benches/HumanEval-Nagini new file mode 160000 index 0000000..35d7203 --- /dev/null +++ b/benches/HumanEval-Nagini @@ -0,0 +1 @@ +Subproject commit 35d7203b4378359559f1f505b484962a33476ddd diff --git a/prompts/humaneval-nagini/add.txt b/prompts/humaneval-nagini/add.txt new file mode 100644 index 0000000..7cf6f4b --- /dev/null +++ b/prompts/humaneval-nagini/add.txt @@ -0,0 +1,8 @@ +Given the following Python program, and a set of Nagini invariants, output the program with invariants inserted into the correct place. +Don't add any additional text comments, your response must contain only program with invariants. +Do not provide ANY explanations. Don't include markdown backticks. Respond only in Python code, nothing else. +The program: +{program} +– +The invariants: +{checks} \ No newline at end of file diff --git a/prompts/humaneval-nagini/ask_for_fixed.txt b/prompts/humaneval-nagini/ask_for_fixed.txt new file mode 100644 index 0000000..38f5c7b --- /dev/null +++ b/prompts/humaneval-nagini/ask_for_fixed.txt @@ -0,0 +1,6 @@ +The following errors occurred during verification: +{error} + +Please fix the error by adding, removing or modifying the invariants and return the fixed program. +Don't add any additional text comments, your response must contain only program with invariants. +Do not provide ANY explanations. Don't include markdown backticks. Respond only in Python code, nothing else. \ No newline at end of file diff --git a/prompts/humaneval-nagini/ask_for_fixed_had_errors.txt b/prompts/humaneval-nagini/ask_for_fixed_had_errors.txt new file mode 100644 index 0000000..9e6f1ab --- /dev/null +++ b/prompts/humaneval-nagini/ask_for_fixed_had_errors.txt @@ -0,0 +1,6 @@ +There are still some errors: +{error} + +Could you please fix them? +Don't add any additional text comments, your response must contain only program with invariants. +Do not provide ANY explanations. Don't include markdown backticks. Respond only in Python code, nothing else. \ No newline at end of file diff --git a/prompts/humaneval-nagini/produce.txt b/prompts/humaneval-nagini/produce.txt new file mode 100644 index 0000000..284edfc --- /dev/null +++ b/prompts/humaneval-nagini/produce.txt @@ -0,0 +1,7 @@ +Given the following Python program, output Nagini invariants that should go into the `while` loop. +Ensure that the invariants are as comprehensive as they can be. +Even if you think some invariant is not totally necessary, better add it than not. +Don't add any additional text comments, your response must contain only program with invariants. +Do not provide ANY explanations. Don't include markdown backticks. Respond only in Python code, nothing else. +The program: +{program} \ No newline at end of file diff --git a/prompts/humaneval-nagini/rewrite.txt b/prompts/humaneval-nagini/rewrite.txt new file mode 100644 index 0000000..61be8aa --- /dev/null +++ b/prompts/humaneval-nagini/rewrite.txt @@ -0,0 +1,42 @@ +Rewrite the following Python program, adding correct Nagini invariants into `while` loops. +Do not change the code, only add the invariants. +Ensure that the invariants are as comprehensive as they can be. +Even if you think some invariant is not totally necessary, better add it than not. +Don't add any additional text comments, your response must contain only program with invariants. +Do not provide ANY explanations. Don't include markdown backticks. Respond only in Python code, nothing else. +Also add assertions in necessary places. +Do not change the code, only add invariants and assertions. Don't remove any helper functions, they are there to help you. +You might need to work with accumulating functions, such as sum, so here's an example of how to do that: +``` +from typing import cast, List, Dict, Set, Optional, Union +from nagini_contracts.contracts import * + +@Pure +def Sum(a : List[int], s : int, t : int) -> int : + Requires(Acc(list_pred(a))) + Requires(((0) <= (s)) and ((s) <= (t)) and ((t) <= (len(a)))) + + if s == t: + return 0 + else: + return (a)[t - 1] + (Sum(a, s, t - 1)) + +def sum_loop(numbers: List[int]) -> int: + Requires(Acc(list_pred(numbers))) + Ensures(Acc(list_pred(numbers))) + Ensures(Result() == Sum(numbers, 0, len(numbers))) + s = int(0) + i = int(0) + while (i) < (len(numbers)): + Invariant(Acc(list_pred(numbers))) + Invariant(0 <= i and i <= len(numbers)) + Invariant(Forall(int, lambda d_1_p_: + (Implies(0 <= d_1_p_ and d_1_p_ < len(numbers), Sum(numbers, 0, d_1_p_ + 1) == Sum(numbers, 0, d_1_p_) + numbers[d_1_p_]), [[Sum(numbers, 0, d_1_p_ + 1)]]))) + Invariant(s == Sum(numbers, 0, i)) + Assert(Sum(numbers, 0, i + 1) == Sum(numbers, 0, i) + numbers[i]) + s = s + (numbers)[i] + i = i + 1 + return s +``` +The program: +{program} diff --git a/prompts/humaneval-nagini/sys.txt b/prompts/humaneval-nagini/sys.txt new file mode 100644 index 0000000..e9979b1 --- /dev/null +++ b/prompts/humaneval-nagini/sys.txt @@ -0,0 +1,4 @@ +You are an expert in a Python verification framework Nagini. +You will be given tasks dealing with Python programs including precise docstrings and specifications. +Do not provide ANY explanations. Don't include markdown backticks. Respond only in Python code, nothing else. +Take into account that arrays inside the invariants are indexed by type `int`. \ No newline at end of file diff --git a/prompts/humaneval-nagini/timeout.txt b/prompts/humaneval-nagini/timeout.txt new file mode 100644 index 0000000..1c50276 --- /dev/null +++ b/prompts/humaneval-nagini/timeout.txt @@ -0,0 +1,3 @@ +The verifier timed out during the verification. +This usually means that the provided invariants were too broad or were difficult to check. +Could you please try to improve the invariants and try again? \ No newline at end of file diff --git a/tests/test_nagini.py b/tests/test_nagini.py index 1e994a1..0679ac8 100644 --- a/tests/test_nagini.py +++ b/tests/test_nagini.py @@ -11,8 +11,10 @@ def test_nagini_generate(): def main(value: int) -> int: Requires(value >= 10) Ensures(Result() >= 20) + # impl-start Assert(value * 2 >= 20) # assert-line - return value * 2""" + return value * 2 + # impl-end""" ) assert nagini_lang.generate_validators(code) == dedent( """\ @@ -43,8 +45,12 @@ def main(value: int) -> int: assert nagini_lang.generate_validators(code) == dedent( """\ def main_valid(value: int) -> int: + # pre-conditions-start Requires(value >= 10) + # pre-conditions-end + # post-conditions-start Ensures(Result() >= 20) + # post-conditions-end ret = main(value) return ret""" ) @@ -247,17 +253,23 @@ def alpha_valid(c : int) -> bool : ret = alpha(c) return ret def flip__char_valid(c : int) -> int : + # pre-conditions-start Ensures(lower(c) == upper(Result())) Ensures(upper(c) == lower(Result())) + # pre-conditions-end ret = flip__char(c) return ret def flip__case_valid(s : List[int]) -> List[int] : + # pre-conditions-start Requires(Acc(list_pred(s))) + # pre-conditions-end + # post-conditions-start Ensures(Acc(list_pred(s))) Ensures(Acc(list_pred(Result()))) Ensures((len(Result())) == (len(s))) Ensures(Forall(int, lambda d_0_i_: (Implies(((0) <= (d_0_i_)) and ((d_0_i_) < (len(s))), lower((s)[d_0_i_]) == upper((Result())[d_0_i_]))))) Ensures(Forall(int, lambda d_0_i_: (Implies(((0) <= (d_0_i_)) and ((d_0_i_) < (len(s))), upper((s)[d_0_i_]) == lower((Result())[d_0_i_]))))) + # post-conditions-end ret = flip__case(s) return ret""" ) @@ -287,8 +299,10 @@ def flip__char(c : int) -> int : assert nagini_lang.generate_validators(code) == dedent( """\ def flip__char_valid(c : int) -> int : + # pre-conditions-start Ensures(lower(c) == upper(Result())) Ensures(upper(c) == lower(Result())) + # pre-conditions-end ret = flip__char(c) return ret""" ) diff --git a/verified_cogen/args.py b/verified_cogen/args.py index 64ba3fe..22a604c 100644 --- a/verified_cogen/args.py +++ b/verified_cogen/args.py @@ -23,6 +23,7 @@ class ProgramArgs: output_style: str filter_by_ext: Optional[str] log_tries: Optional[str] + output_logging: bool @no_type_check def __init__(self, args): @@ -43,6 +44,7 @@ def __init__(self, args): self.output_style = args.output_style self.filter_by_ext = args.filter_by_ext self.log_tries = args.log_tries + self.output_logging = args.output_logging def get_default_parser(): @@ -95,6 +97,9 @@ def get_default_parser(): parser.add_argument( "--log-tries", help="Save output of every try to given dir", required=False ) + parser.add_argument( + "--output-logging", help="Print logs to standard output", default=False + ) return parser diff --git a/verified_cogen/experiments/incremental_run.py b/verified_cogen/experiments/incremental_run.py index 41c4fe4..dcdfb73 100644 --- a/verified_cogen/experiments/incremental_run.py +++ b/verified_cogen/experiments/incremental_run.py @@ -1,5 +1,6 @@ import logging import pathlib +import sys import json from verified_cogen.llm.llm import LLM @@ -15,6 +16,13 @@ logger = logging.getLogger(__name__) +def register_output_handler(): + handler = logging.StreamHandler(sys.stdout) + formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") + handler.setFormatter(formatter) + logger.addHandler(handler) + + def main(): register_basic_languages() @@ -26,6 +34,9 @@ def main(): assert args.runs == 1 assert args.retries == 0 + if args.output_logging: + register_output_handler() + directory = pathlib.Path(args.dir) log_tries = pathlib.Path(args.log_tries) if args.log_tries is not None else None results_directory = pathlib.Path("results") @@ -52,8 +63,9 @@ def main(): args.temperature, ) runner = ValidatingRunner( - wrapping=InvariantRunner(llm, logger, verifier, log_tries), + wrapping=InvariantRunner(llm, logger, verifier), language=language, + log_tries=log_tries, ) display_name = rename_file(file) marker_name = str(file.relative_to(directory)) diff --git a/verified_cogen/runners/languages/dafny.py b/verified_cogen/runners/languages/dafny.py index d6a58c4..7991632 100644 --- a/verified_cogen/runners/languages/dafny.py +++ b/verified_cogen/runners/languages/dafny.py @@ -23,4 +23,5 @@ def __init__(self): # type: ignore r" *// invariants-start.*?// invariants-end\n", ], "// assert-line", + "//", ) diff --git a/verified_cogen/runners/languages/language.py b/verified_cogen/runners/languages/language.py index 388feff..9182631 100644 --- a/verified_cogen/runners/languages/language.py +++ b/verified_cogen/runners/languages/language.py @@ -1,10 +1,10 @@ from abc import abstractmethod from typing import Pattern, Any -import re class Language: _instance = None + simple_comment: str def __new__(cls, *args: list[Any], **kwargs: dict[str, Any]): if not isinstance(cls._instance, cls): @@ -33,14 +33,15 @@ def __init__( # type: ignore validator_template: str, assert_invariants_pattern: list[str], inline_assert_comment: str, + simple_comment: str, ): + self.simple_comment = simple_comment self.method_regex = method_regex self.validator_template = validator_template self.assert_invariant_patterns = assert_invariants_pattern self.inline_assert_comment = inline_assert_comment def generate_validators(self, code: str) -> str: - code = re.sub(r"^ *#.*(\r\n|\r|\n)?", "", code, flags=re.MULTILINE) methods = self.method_regex.finditer(code) validators: list[str] = [] diff --git a/verified_cogen/runners/languages/nagini.py b/verified_cogen/runners/languages/nagini.py index b5b7455..8a9f421 100644 --- a/verified_cogen/runners/languages/nagini.py +++ b/verified_cogen/runners/languages/nagini.py @@ -16,7 +16,7 @@ class NaginiLanguage(GenericLanguage): def __init__(self): # type: ignore super().__init__( re.compile( - r"def\s+(\w+)\s*\((.*?)\)\s*->\s*(.*?):(:?(?:\r\n|\r|\n)?( *(?:Requires|Ensures)\([^\r\n]*\)(?:\r\n|\r|\n)?)*)", + r"def\s+(\w+)\s*\((.*?)\)\s*->\s*(.*?):(.*?(\r\n|\r|\n))\s+# impl-start", re.DOTALL, ), NAGINI_VALIDATOR_TEMPLATE, @@ -25,4 +25,5 @@ def __init__(self): # type: ignore r" *# invariants-start.*?# invariants-end\n?", ], "# assert-line", + "#", ) diff --git a/verified_cogen/runners/validating.py b/verified_cogen/runners/validating.py index 88f8c74..28f6d02 100644 --- a/verified_cogen/runners/validating.py +++ b/verified_cogen/runners/validating.py @@ -1,3 +1,4 @@ +import pathlib from typing import Optional from verified_cogen.runners import Runner from verified_cogen.runners.languages.language import Language @@ -9,14 +10,20 @@ class ValidatingRunner(Runner): language: Language prg: Optional[str] = None - def __init__(self, wrapping: Runner, language: Language): - super().__init__(wrapping.llm, wrapping.logger, wrapping.verifier) + def __init__( + self, + wrapping: Runner, + language: Language, + log_tries: Optional[pathlib.Path] = None, + ): + super().__init__(wrapping.llm, wrapping.logger, wrapping.verifier, log_tries) self.wrapped_runner = wrapping self.language = language def _add_validators(self, prg: str, inv_prg: str): validators = self.language.generate_validators(prg) - val_prg = inv_prg + "\n// ==== verifiers ==== //\n" + validators + comment = self.language.simple_comment + val_prg = inv_prg + "\n" + comment + " ==== verifiers ==== \n" + validators return val_prg def preprocess(self, prg: str, mode: Mode) -> str: diff --git a/verified_cogen/tools/verifier.py b/verified_cogen/tools/verifier.py index 274a14b..d3b0002 100644 --- a/verified_cogen/tools/verifier.py +++ b/verified_cogen/tools/verifier.py @@ -1,6 +1,5 @@ import logging import os -import signal import subprocess from pathlib import Path from typing import Optional @@ -15,13 +14,20 @@ def __init__(self, shell: str, verifier_cmd: str, timeout: int = 60): self.timeout = timeout def verify(self, file_path: Path) -> Optional[tuple[bool, str, str]]: - proc = subprocess.Popen( - [self.shell, "-i", "-l", "-c", f'{self.verifier_cmd} "{file_path}"'], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) try: - out, err = proc.communicate(timeout=self.timeout) - return proc.returncode == 0, out.decode(), err.decode() + res = subprocess.run( + '{} -i -l -c "{} "{}""; exit'.format( + self.shell, self.verifier_cmd, file_path + ), + capture_output=True, + shell=True, + timeout=self.timeout, + ) except subprocess.TimeoutExpired: - os.killpg(os.getpgid(proc.pid), signal.SIGTERM) + os.system("killall z3") + return None + return ( + res.returncode == 0, + res.stdout.decode("utf-8"), + res.stderr.decode("utf-8"), + )