nuprl · rudolf-adamkovic · Feb 10, 2026
diff --git a/dataset_builder/humaneval_to_el.py b/dataset_builder/humaneval_to_el.py
@@ -0,0 +1,66 @@
+"""
+This script translates problems from the OpenAI HumanEval dataset into Emacs Lisp.
+
+- Home: https://www.gnu.org/software/emacs/
+- Reference manual: https://www.gnu.org/software/emacs/manual/elisp.html
+- Test library: https://www.gnu.org/software/emacs/manual/ert.html
+"""
+
+import ast
+from typing import List
+
+
+class Translator:
+
+    USub = "-"
+
+    stop = ["\n(defun", "\n;", "\n("]
+
+    def file_ext(self):
+        return "el"
+
+    def translate_prompt(
+        self, name: str, args: List[ast.arg], _returns, description: str
+    ) -> str:
+        self.entry_point = name
+        el_preamble = ";;; -*- lexical-binding: t; -*-"
+        el_args = " ".join(arg.arg for arg in args)
+        el_description = description.replace('"', '\\"')
+        return f'{el_preamble}\n(defun {name} ({el_args})\n  "{el_description}"\n  '
+
+    def test_suite_prefix_lines(self, entry_point) -> List[str]:
+        return [
+            f"(defalias #'candidate #'{entry_point})",
+            "(ert-deftest test-human-eval ()",
+        ]
+
+    def test_suite_suffix_lines(self) -> List[str]:
+        return [")"]
+
+    def deep_equality(self, left: str, right: str) -> str:
+        return f"  (should (equal {left} {right}))"
+
+    def gen_literal(self, c: bool | str | int | float):
+        if type(c) is bool:
+            return "t" if c else "nil"
+        elif type(c) is str:
+            return f'"{c}"'
+        elif c is None:
+            return "nil"
+        return repr(c)
+
+    def gen_var(self, variable: str) -> str:
+        return variable
+
+    def gen_list(self, list: List[str]) -> str:
+        return "(list " + " ".join(list) + ")"
+
+    def gen_tuple(self, tuple: List[str]) -> str:
+        return "(list " + " ".join(tuple) + ")"
+
+    def gen_dict(self, keys: List[str], values: List[str]) -> str:
+        pairs = " ".join(f"(cons {k} {v})" for k, v in zip(keys, values))
+        return "(list " + pairs + ")"
+
+    def gen_call(self, func: str, args: List[str]) -> str:
+        return "(" + func + " " + " ".join(args) + ")"
diff --git a/dataset_builder/libexperiments.py b/dataset_builder/libexperiments.py
@@ -42,6 +42,7 @@ def path(self) -> Path:
     "elixir",
     "clj",
     "ada",
+    "el",
 ]
 MODELS = ["davinci", "incoder", "codegen"]
 

diff --git a/dataset_builder/terms.csv b/dataset_builder/terms.csv
@@ -26,3 +26,4 @@ Matlab,m,array,array,array,dictionary,<missing>,true,false
 Haskell,hs,list,list,tuple,association list,Nothing,True,False
 Clojure,clj,vector,list,vector,map,nil,true,false
 Dart,dart,list,list,record,map,null,true,false
+Emacs Lisp,el,list,list,list,alist,nil,t,nil
diff --git a/evaluation/Dockerfile b/evaluation/Dockerfile
@@ -106,6 +106,9 @@ RUN apt-get update -yqq && apt-get install -yqq dart
 # Lean
 # RUN wget https://github.com/leanprover/lean4/releases/download/v4.6.0-rc1/lean-4.6.0-rc1-linux.zip -O /tmp/lean.zip && unzip /tmp/lean.zip -d /root/lean/ && ln -s /root/lean/bin/lean /bin/lean
 
+# Emacs Lisp (no-X/GUI version)
+RUN apt install -y emacs-nox
+
 # install numpy for humanevalplus
 RUN python3 -m pip install numpy
 

diff --git a/evaluation/src/containerized_eval.py b/evaluation/src/containerized_eval.py
@@ -29,6 +29,7 @@
 import eval_v
 import eval_lean
 import eval_dart
+import eval_el
 import tempfile
 
 
@@ -65,6 +66,7 @@
     "coq": (eval_v.eval_script, ".v"),
     "lean": (eval_lean.eval_script, ".lean"),
     "dart": (eval_dart.eval_script, ".dart"),
+    "el": (eval_el.eval_script, ".el"),
 }
 
 def eval_string_script(language, program):

diff --git a/evaluation/src/eval_el.py b/evaluation/src/eval_el.py
@@ -0,0 +1,33 @@
+"""
+Evaluates a generated Emacs Lisp program (.el).
+"""
+from pathlib import Path
+from safe_subprocess import run
+
+def eval_script(path: Path):
+
+    result = run([
+        "emacs", "-batch",
+        "-l", "ert",
+        "-l", str(path),
+        "-f", "ert-run-tests-batch-and-exit"
+    ])
+
+    if result.timeout:
+        status = "Timeout"
+    elif result.exit_code != 0:
+        status = "Exception"
+    elif "\nRan 1 tests, 1 results as expected, 0 unexpected" in result.stderr:
+        status = "OK"
+    else: # test failure
+        status = "Exception"
+
+    return {
+        "status": status,
+        "exit_code": result.exit_code,
+        "stdout": result.stdout,
+        "stderr": result.stderr,
+    }
+
+if __name__ == "__main__":
+    main()
diff --git a/results/humaneval-el-Qwen2.5_Coder_7B-0.2-reworded.csv b/results/humaneval-el-Qwen2.5_Coder_7B-0.2-reworded.csv
@@ -0,0 +1,162 @@
+problem,pass,total
+HumanEval_0_has_close_elements,3,200
+HumanEval_1_separate_paren_groups,0,200
+HumanEval_10_make_palindrome,0,200
+HumanEval_100_make_a_pile,4,200
+HumanEval_101_words_string,0,200
+HumanEval_102_choose_num,0,200
+HumanEval_103_rounded_avg,0,200
+HumanEval_104_unique_digits,0,200
+HumanEval_105_by_length,14,200
+HumanEval_106_f,0,200
+HumanEval_107_even_odd_palindrome,0,200
+HumanEval_108_count_nums,0,200
+HumanEval_109_move_one_ball,0,200
+HumanEval_11_string_xor,179,200
+HumanEval_110_exchange,0,200
+HumanEval_111_histogram,0,200
+HumanEval_112_reverse_delete,0,200
+HumanEval_113_odd_count,0,200
+HumanEval_114_minSubArraySum,25,200
+HumanEval_115_max_fill,1,200
+HumanEval_116_sort_array,0,200
+HumanEval_117_select_words,5,200
+HumanEval_118_get_closest_vowel,0,200
+HumanEval_119_match_parens,0,200
+HumanEval_12_longest,188,200
+HumanEval_120_maximum,2,200
+HumanEval_121_solution,0,200
+HumanEval_122_add_elements,61,200
+HumanEval_123_get_odd_collatz,0,200
+HumanEval_124_valid_date,19,200
+HumanEval_125_split_words,0,200
+HumanEval_126_is_sorted,0,200
+HumanEval_127_intersection,0,200
+HumanEval_128_prod_signs,145,200
+HumanEval_129_minPath,0,200
+HumanEval_13_greatest_common_divisor,200,200
+HumanEval_130_tri,0,200
+HumanEval_131_digits,0,200
+HumanEval_132_is_nested,0,200
+HumanEval_133_sum_squares,99,200
+HumanEval_134_check_if_last_char_is_a_letter,0,200
+HumanEval_135_can_arrange,11,200
+HumanEval_136_largest_smallest_integers,74,200
+HumanEval_137_compare_one,1,200
+HumanEval_138_is_equal_to_sum_even,0,200
+HumanEval_139_special_factorial,0,200
+HumanEval_14_all_prefixes,8,200
+HumanEval_140_fix_spaces,0,200
+HumanEval_141_file_name_check,0,200
+HumanEval_142_sum_squares,0,200
+HumanEval_143_words_in_sentence,0,200
+HumanEval_144_simplify,44,200
+HumanEval_145_order_by_points,0,200
+HumanEval_146_specialFilter,0,200
+HumanEval_147_get_max_triples,17,200
+HumanEval_148_bf,0,200
+HumanEval_149_sorted_list_sum,0,200
+HumanEval_15_string_sequence,200,200
+HumanEval_150_x_or_y,0,200
+HumanEval_151_double_the_difference,0,200
+HumanEval_152_compare,61,200
+HumanEval_153_Strongest_Extension,0,200
+HumanEval_154_cycpattern_check,0,200
+HumanEval_155_even_odd_count,0,200
+HumanEval_156_int_to_mini_roman,71,200
+HumanEval_157_right_angle_triangle,187,200
+HumanEval_158_find_max,20,200
+HumanEval_159_eat,175,200
+HumanEval_16_count_distinct_characters,13,200
+HumanEval_160_do_algebra,0,200
+HumanEval_161_solve,0,200
+HumanEval_162_string_to_md5,86,200
+HumanEval_163_generate_integers,0,200
+HumanEval_17_parse_music,0,200
+HumanEval_18_how_many_times,0,200
+HumanEval_19_sort_numbers,2,200
+HumanEval_2_truncate_number,190,200
+HumanEval_20_find_closest_elements,5,200
+HumanEval_21_rescale_to_unit,200,200
+HumanEval_22_filter_integers,179,200
+HumanEval_23_strlen,200,200
+HumanEval_24_largest_divisor,35,200
+HumanEval_25_factorize,38,200
+HumanEval_26_remove_duplicates,3,200
+HumanEval_27_flip_case,0,200
+HumanEval_28_concatenate,200,200
+HumanEval_29_filter_by_prefix,200,200
+HumanEval_3_below_zero,0,200
+HumanEval_30_get_positive,137,200
+HumanEval_31_is_prime,103,200
+HumanEval_33_sort_third,0,200
+HumanEval_34_unique,157,200
+HumanEval_35_max_element,182,200
+HumanEval_36_fizz_buzz,105,200
+HumanEval_37_sort_even,0,200
+HumanEval_39_prime_fib,0,200
+HumanEval_4_mean_absolute_deviation,85,200
+HumanEval_40_triples_sum_to_zero,0,200
+HumanEval_41_car_race_collision,0,200
+HumanEval_42_incr_list,200,200
+HumanEval_43_pairs_sum_to_zero,21,200
+HumanEval_44_change_base,199,200
+HumanEval_45_triangle_area,142,200
+HumanEval_46_fib4,12,200
+HumanEval_47_median,0,200
+HumanEval_48_is_palindrome,106,200
+HumanEval_49_modp,12,200
+HumanEval_5_intersperse,143,200
+HumanEval_51_remove_vowels,65,200
+HumanEval_52_below_threshold,0,200
+HumanEval_53_add,200,200
+HumanEval_54_same_chars,0,200
+HumanEval_55_fib,180,200
+HumanEval_56_correct_bracketing,0,200
+HumanEval_57_monotonic,6,200
+HumanEval_58_common,73,200
+HumanEval_59_largest_prime_factor,19,200
+HumanEval_6_parse_nested_parens,14,200
+HumanEval_60_sum_to_n,200,200
+HumanEval_61_correct_bracketing,0,200
+HumanEval_62_derivative,98,200
+HumanEval_63_fibfib,136,200
+HumanEval_64_vowels_count,0,200
+HumanEval_65_circular_shift,48,200
+HumanEval_66_digitSum,0,200
+HumanEval_67_fruit_distribution,6,200
+HumanEval_68_pluck,0,200
+HumanEval_69_search,187,200
+HumanEval_7_filter_by_substring,135,200
+HumanEval_70_strange_sort_list,15,200
+HumanEval_71_triangle_area,0,200
+HumanEval_72_will_it_fly,160,200
+HumanEval_73_smallest_change,190,200
+HumanEval_74_total_match,20,200
+HumanEval_75_is_multiply_prime,9,200
+HumanEval_76_is_simple_power,111,200
+HumanEval_77_iscube,1,200
+HumanEval_78_hex_key,22,200
+HumanEval_79_decimal_to_binary,0,200
+HumanEval_8_sum_product,200,200
+HumanEval_80_is_happy,4,200
+HumanEval_81_numerical_letter_grade,102,200
+HumanEval_82_prime_length,0,200
+HumanEval_83_starts_one_ends,0,200
+HumanEval_84_solve,0,200
+HumanEval_85_add,0,200
+HumanEval_86_anti_shuffle,47,200
+HumanEval_87_get_row,84,200
+HumanEval_88_sort_array,0,200
+HumanEval_89_encrypt,0,200
+HumanEval_9_rolling_max,71,200
+HumanEval_90_next_smallest,2,200
+HumanEval_91_is_bored,0,200
+HumanEval_92_any_int,195,200
+HumanEval_93_encode,0,200
+HumanEval_94_skjkasdkd,0,200
+HumanEval_95_check_dict_case,0,200
+HumanEval_96_count_up_to,0,200
+HumanEval_97_multiply,200,200
+HumanEval_98_count_upper,3,200
+HumanEval_99_closest_integer,1,200
-Original file line number
+Diff line change
@@ Expand Up / @@ -42,6 +42,7 @@ def path(self) -> Path: @@
         "elixir",
         "clj",
         "ada",
+        "el",
     ]
     MODELS = ["davinci", "incoder", "codegen"]
@@ Expand Down @@