Skip to content

Commit 78287b3

Browse files
committed
adapt for mac
1 parent 312c5e5 commit 78287b3

3 files changed

Lines changed: 12 additions & 8 deletions

File tree

human_eval/evaluate_functional_correctness.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,5 @@ def entry_point(
2424
def main():
2525
fire.Fire(entry_point)
2626

27-
28-
sys.exit(main())
27+
if __name__ == "__main__":
28+
sys.exit(main())

human_eval/evaluation.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def evaluate_functional_correctness(
4242
n_workers: int = 4,
4343
timeout: float = 3.0,
4444
problem_file: str = HUMAN_EVAL,
45+
ignore_incomplete: bool = True
4546
):
4647
"""
4748
Evaluates the functional correctness of generated samples, and writes
@@ -68,7 +69,8 @@ def evaluate_functional_correctness(
6869
completion_id[task_id] += 1
6970
n_samples += 1
7071

71-
assert len(completion_id) == len(problems), "Some problems are not attempted."
72+
if not ignore_incomplete:
73+
assert len(completion_id) == len(problems), "Some problems are not attempted."
7274

7375
print("Running test suites...")
7476
for future in tqdm.tqdm(as_completed(futures), total=len(futures)):

human_eval/execution.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import faulthandler
55
import io
66
import os
7-
import multiprocessing
7+
import multiprocess
88
import platform
99
import signal
1010
import tempfile
@@ -36,11 +36,13 @@ def unsafe_execute():
3636

3737
# Construct the check program and run it.
3838
check_program = (
39-
problem["prompt"] + completion + "\n" +
39+
#problem["prompt"] + completion + "\n" +
40+
completion + "\n" +
4041
problem["test"] + "\n" +
4142
f"check({problem['entry_point']})"
4243
)
4344

45+
print(check_program)
4446
try:
4547
exec_globals = {}
4648
with swallow_io():
@@ -55,7 +57,7 @@ def unsafe_execute():
5557
# information on how OpenAI sandboxes its code, see the accompanying paper.
5658
# Once you have read this disclaimer and taken appropriate precautions,
5759
# uncomment the following line and proceed at your own risk:
58-
# exec(check_program, exec_globals)
60+
exec(check_program, exec_globals)
5961
result.append("passed")
6062
except TimeoutException:
6163
result.append("timed out")
@@ -67,10 +69,10 @@ def unsafe_execute():
6769
os.rmdir = rmdir
6870
os.chdir = chdir
6971

70-
manager = multiprocessing.Manager()
72+
manager = multiprocess.Manager()
7173
result = manager.list()
7274

73-
p = multiprocessing.Process(target=unsafe_execute)
75+
p = multiprocess.Process(target=unsafe_execute)
7476
p.start()
7577
p.join(timeout=timeout + 1)
7678
if p.is_alive():

0 commit comments

Comments
 (0)