XAI-liacs · nikivanstein · Oct 25, 2025 · Oct 25, 2025 · Oct 25, 2025 · Oct 25, 2025
diff --git a/README.md b/README.md
@@ -28,6 +28,7 @@
 - [Installation](#-installation)
 - [Quick Start](#-quick-start)
 - [Webapp](#-webapp)
+- [AlphaEvolve Benchmarks](#alphaevolve-benchmarks)
 - [Contributing](#-contributing)
 - [License](#-license)
 - [Citation](#-citation)
@@ -58,9 +59,20 @@ BLADE incorporates several benchmark function sets to provide a comprehensive ev
 | **SBOX-COST**                      | A set of 24 boundary-constrained functions focusing on strict box-constraint optimization scenarios. [Reference](https://inria.hal.science/hal-04403658/file/sboxcost-cmacomparison-authorversion.pdf) | 24                  | Yes                |
 | **MA-BBOB** (Many-Affine BBOB)     | An extension of the BBOB suite, generating functions through affine combinations and shifts. [Reference](https://dl.acm.org/doi/10.1145/3673908) | Generator-Based     | Yes                |
 | **GECCO MA-BBOB Competition Instances** | A collection of 1,000 pre-defined instances from the GECCO MA-BBOB competition, evaluating algorithm performance on diverse affine-combined functions. [Reference](https://iohprofiler.github.io/competitions) | 1,000               | Yes                |
+| **HLP** (High-Level Properties)   | Generated benchmarks guided by high-level property combinations (e.g., separable, multimodality). | Generator-Based     | Yes                |
 
 In addition, several real-world applications are included such as several photonics problems.
 
+### AlphaEvolve Benchmarks
+
+BLADE bundles benchmark instances inspired by the Google DeepMind
+AlphaEvolve paper. The ready-to-run reference scripts live in
+[`run_benchmarks/`](./run_benchmarks), while the reusable benchmark
+definitions are organized under [`iohblade/benchmarks`](./iohblade/benchmarks)
+by domain (analysis, combinatorics, geometry, matrix multiplication, number
+theory, packing, and Fourier). Each domain folder includes a short README that
+summarizes the task and instances.
+
 ### Included Search Methods
 
 The suite contains the state-of-the-art LLM-assisted search algorithms:

diff --git a/docs/Introduction.rst b/docs/Introduction.rst
@@ -11,3 +11,5 @@ Evolution of algorithms.
 - **Flexible**: BLADE is designed to be flexible and can be easily extended.
 - **Scalable**: BLADE is designed to be scalable and can be run on a single machine or a cluster.
 - **Open Source**: BLADE is open source and free to use.
+- **HLP benchmarks**: Includes high-level property (HLP) benchmark
+  generation for structured black-box optimization tasks.
diff --git a/docs/benchmarks.rst b/docs/benchmarks.rst
@@ -0,0 +1,23 @@
+AlphaEvolve Benchmarks
+======================
+
+BLADE includes benchmark instances inspired by the Google DeepMind
+AlphaEvolve paper. These instances are available in two complementary forms:
+
+- ``run_benchmarks/`` provides standalone reference scripts for running each
+  task directly.
+- ``iohblade/benchmarks`` packages the same tasks for programmatic use in
+  experiments and pipelines.
+
+The packaged benchmarks are grouped by domain:
+
+- Analysis (auto-correlation inequalities)
+- Combinatorics (Erdos min-overlap)
+- Geometry (Heilbronn problems, kissing number, and distance ratios)
+- Matrix multiplication
+- Number theory (sums vs differences)
+- Packing (rectangle, hexagon, and unit square packing)
+- Fourier (uncertainty inequalities)
+
+Each domain folder contains a README with task-specific details and citations
+to the original sources.
diff --git a/docs/index.rst b/docs/index.rst
@@ -185,6 +185,7 @@ If you use BLADE in your research, please consider citing the associated paper:
 
    Introduction
    Installation
+   benchmarks
    webapp
    modules
    notebooks/simple_experiment

diff --git a/iohblade/llm.py b/iohblade/llm.py
@@ -10,16 +10,38 @@
 from abc import ABC, abstractmethod
 from typing import Any
 
-import anthropic
-import ollama
-import openai
-from google import genai
-from tokencost import (
-    calculate_completion_cost,
-    calculate_prompt_cost,
-    count_message_tokens,
-    count_string_tokens,
-)
+try:
+    import anthropic
+except ImportError:
+    anthropic = None
+
+try:
+    import ollama
+except ImportError:
+    ollama = None
+
+try:
+    import openai
+except ImportError:
+    openai = None
+
+try:
+    from google import genai
+except ImportError:
+    genai = None
+
+try:
+    from tokencost import (
+        calculate_completion_cost,
+        calculate_prompt_cost,
+        count_message_tokens,
+        count_string_tokens,
+    )
+except ImportError:
+    calculate_completion_cost = None
+    calculate_prompt_cost = None
+    count_message_tokens = None
+    count_string_tokens = None
 
 from .solution import Solution
 from .utils import NoCodeException
@@ -99,6 +121,13 @@ def query(self, session: list):
         Returns:
             str: The text content of the LLM's response.
         """
+        if (
+            self.logger != None
+            and hasattr(self.logger, "budget_exhausted")
+            and self.logger.budget_exhausted()
+        ):
+            return "Budget exhausted."
+
         if self.log:
             input_msg = "\n".join([d["content"] for d in session])
             try:

diff --git a/iohblade/methods/mcts_ahd/mcts_node.py → iohblade/mcts_node.py b/iohblade/methods/mcts_ahd/mcts_node.py → iohblade/mcts_node.py
@@ -40,6 +40,20 @@ def __init__(
         self.Q = Q
         self.children: list[MCTS_Node] = []
 
+    def copy_attributes(self, other: MCTS_Node):
+        """
+        Copy attributes from another MCTS_Node instance to the current instance.
+
+        ## Args:
+        `other: MCTS_Node`: The MCTS_Node instance from which attributes are to be copied.
+        """
+        self.approach = other.approach
+        self.depth = other.depth
+        self.parent = other.parent
+        self.visit = other.visit
+        self.Q = other.Q
+        self.children = other.children.copy()
+
     def add_child(self, childNode: MCTS_Node):
         """
         Add a child node to the current instance of MCTS_Node.

diff --git a/iohblade/methods/lhns/lhns.py b/iohblade/methods/lhns/lhns.py
@@ -128,7 +128,7 @@ def initialise(self):
         for i in range(5):
             try:
                 solution = self.llm.sample_solution(
-                    [{"role": "client", "content": initialisation_prompt}]
+                    [{"role": "user", "content": initialisation_prompt}]
                 )
                 self.current_solution = solution
             except Exception as e:
@@ -137,7 +137,7 @@ def initialise(self):
 
     def evaluate(self, solution: Solution) -> Solution:
         """
-        Evaluates the solution with `problem.evaluate` function, and returns if it returns, else returns solution un-mutated.
+        Evaluates the solution with `problem()` function, and returns if it returns, else returns solution un-mutated.
 
         ## Args:
         `solution: Solution`: A solution object that needs to be evaluated.
@@ -147,7 +147,7 @@ def evaluate(self, solution: Solution) -> Solution:
         """
         print("Evaluate....")
         evaluated_solution = solution
-        evaluated_solution = self.problem.evaluate(evaluated_solution)
+        evaluated_solution = self.problem(evaluated_solution)
         if evaluated_solution:
             self._log_best_solution(evaluated_solution)
             return evaluated_solution
@@ -245,7 +245,7 @@ def mutate_lhns_vns(self, iteration_number: int) -> Solution:
         for i in range(5):
             try:
                 new = self.llm.sample_solution(
-                    [{"role": "client", "content": destruction_repair_prompt}]
+                    [{"role": "user", "content": destruction_repair_prompt}]
                 )
                 return new
             except Exception as e:
@@ -296,7 +296,7 @@ def mutate_lhns_ils(self, iteration_number: int) -> Solution:
             for i in range(5):
                 try:
                     new = self.llm.sample_solution(
-                        [{"role": "client", "content": ils_prompt}]
+                        [{"role": "user", "content": ils_prompt}]
                     )
                     return new
                 except Exception as e:
@@ -338,7 +338,7 @@ def mutate_lhns_ts(self, iteration_number: int) -> Solution:
                 for i in range(5):
                     try:
                         new = self.llm.sample_solution(
-                            [{"role": "client", "content": prompt}]
+                            [{"role": "user", "content": prompt}]
                         )
                         return new
                     except Exception as e:
@@ -356,7 +356,7 @@ def mutate_lhns_ts(self, iteration_number: int) -> Solution:
             for i in range(5):
                 try:
                     new = self.llm.sample_solution(
-                        [{"role": "client", "content": ils_prompt}]
+                        [{"role": "user", "content": ils_prompt}]
                     )
                     return new
                 except Exception as e:

diff --git a/iohblade/methods/mcts_ahd/mcts.py b/iohblade/methods/mcts_ahd/mcts.py
@@ -3,15 +3,16 @@
 import inspect
 from typing import Iterable, Optional
 
+from iohblade.llm import LLM
+from iohblade.problem import Problem
+from iohblade.solution import Solution
+from iohblade.method import Method
+
 import traceback
 
-from .mcts_node import MCTS_Node
+from iohblade.mcts_node import MCTS_Node
 from .prompts import MCTS_Prompts
 
-from iohblade import Solution, Problem
-from iohblade.llm import LLM
-from iohblade.method import Method
-
 
 # region Helper Functions:
 def safe_max(values: Iterable[Optional[float]]) -> Optional[float]:
@@ -147,7 +148,7 @@ def _get_new_node(
             case _:
                 error_msg = f"Error enconutered {approach} method, which is not in expected list [i1, m1, m2, e1, e2, s1]."
                 raise ValueError(error_msg)
-        message = [{"role": "client", "content": prompt}]
+        message = [{"role": "user", "content": prompt}]
 
         solution = None
         for i in range(5):  # Try upto 5 times.
@@ -162,9 +163,9 @@ def _get_new_node(
             refine_description_prompt = MCTS_Prompts.get_desctiption_prompt(
                 task_prompt, mcts_node
             )
-            message = [{"role": "client", "content": refine_description_prompt}]
-            descrpition = self.llm.query(message)
-            mcts_node.description = descrpition
+            message = [{"role": "user", "content": refine_description_prompt}]
+            description = self.llm.query(message)
+            mcts_node.description = description
             return mcts_node
         return MCTS_Node(Solution("error"), "error")
 
@@ -300,17 +301,20 @@ def simulate(self, node: MCTS_Node):
 
         """
         self.eval_remain -= 1
-        self.problem.evaluate(node)
-        if abs(node.fitness) == float("inf"):
-            node.Q = None
-            return
-        node.Q = node.fitness
-        self.q_min = safe_min([self.q_min, node.Q])
-        self.q_max = safe_max([self.q_max, node.Q])
-        if self.best_solution.fitness < node.fitness and self.maximisation:
-            self.best_solution = node
-        elif self.best_solution.fitness > node.fitness and not self.maximisation:
-            self.best_solution = node
+        new_node = self.problem(node)
+        new_node.copy_attributes(node)
+
+        if abs(new_node.fitness) == float("inf"):
+            new_node.Q = None
+            return new_node
+        new_node.Q = new_node.fitness
+        self.q_min = safe_min([self.q_min, new_node.Q])
+        self.q_max = safe_max([self.q_max, new_node.Q])
+        if self.best_solution.fitness < new_node.fitness and self.maximisation:
+            self.best_solution = new_node
+        elif self.best_solution.fitness > new_node.fitness and not self.maximisation:
+            self.best_solution = new_node
+        return new_node
 
     def selection(self) -> tuple[list[MCTS_Node], MCTS_Node]:
         """
@@ -480,10 +484,12 @@ def run(self):
 
         print(f"Initialised with {len(self.root.children)} nodes.")
 
-        for child in self.root.children:
+        for i, child in enumerate(self.root.children):
             print(f"\tEvaluating {child.id} node.")
 
-            self.simulate(child)
+            child = self.simulate(child)
+            child.parent = self.root
+            self.root.children[i] = child
 
             print(f"\t\tFitness {child.fitness}")
             print(f"\t\tFeedback {child.feedback}")
@@ -512,7 +518,13 @@ def run(self):
             for node in progressive_widening_nodes + expanded_nodes:
                 print(f"\tEvaluating {node.id} node.")
 
-                self.simulate(node)
+                new_node = self.simulate(node)
+                if new_node is not node and node.parent is not None:
+                    # replace in parent's children
+                    p = node.parent
+                    idx = p.children.index(node)
+                    p.children[idx] = new_node
+                    new_node.parent = p
 
                 print(f"\t\tFitness {node.fitness}.")
                 print(f"\t\tFeedback {node.feedback}")
@@ -571,7 +583,7 @@ def __init__(
         self.init_params = {
             k: getattr(self, k)
             for k in sig.parameters
-            if k not in ("self", "name", "budget")
+            if k not in ("self", "name", "budget", "llm")
         }
 
     def __call__(self, problem: Problem):
@@ -605,10 +617,11 @@ def to_dict(self):
         Returns:
             dict: Dictionary representation of the method.
         """
+        kwargs = dict(self.init_params)
         return {
             "method_name": self.name if self.name != None else "MCTS_AHD",
             "budget": self.budget,
-            "kwargs": self.init_params,
+            "kwargs": kwargs,
         }
 
 

diff --git a/iohblade/methods/mcts_ahd/prompts.py b/iohblade/methods/mcts_ahd/prompts.py
@@ -1,4 +1,4 @@
-from iohblade.methods.mcts_ahd.mcts_node import MCTS_Node
+from iohblade.mcts_node import MCTS_Node
 
 
 class MCTS_Prompts: