diff --git a/code_gen.py b/code_gen.py
new file mode 100644
index 0000000..2bc9920
--- /dev/null
+++ b/code_gen.py
@@ -0,0 +1,550 @@
+import sys
+import os.path
+from typing import *
+from dataclasses import dataclass
+from enum import Enum
+import reference_parser
+import utilities
+
+CCode = NewType("CCode", str)
+
+
+@dataclass
+class Template:
+    """
+    A class to produce a format string.
+
+     Used for calls to :code:`printf` and :code:`scanf`
+    """
+    template: CCode
+    values: Dict[str, str]
+    to_fill: List[str]
+
+    def complete(self) -> CCode:
+        """
+        Build the string from the template string.
+
+        Ensures all necessary fields are filled before attempting to fill.
+
+        :return: the filled in template
+        """
+        for key in self.to_fill:
+            assert key in self.values
+
+        return self.template.format(**self.values)
+
+    def fill(self, **kwargs) -> None:
+        """
+        Update the Template instance with the given values.
+
+        :param kwargs: (key, value) pairs to update the given template with
+        """
+        self.values.update(kwargs)
+
+
+class ScalarCType(Enum):
+    Int = 'int'
+    Char = 'char'
+    Float = 'float'
+    Double = 'double'
+    Bool = 'bool'
+
+    @property
+    def c_repr(self) -> CCode:
+        """
+        Convert a C type from object representation to the string used to denote it in C.
+
+        :return: the C representation of the specified type
+        """
+        return self.value
+
+    def placeholder(self, printf=True) -> str:
+        """
+        The placeholder string, as can be found in a C format string.
+
+        Note that to make parsing easier chars are read and written from their ASCII code.
+
+        :param printf: set to :code:`False` to get values for a :code:`scanf` format string instead
+        :return: the placeholder string
+        """
+        if self == ScalarCType.Int:
+            return "%d"
+        elif self == ScalarCType.Char:
+            return "%d"
+        elif self == ScalarCType.Float:
+            return "%f"
+        elif self == ScalarCType.Double:
+            return "%f" if printf else "%lf"
+        elif self == ScalarCType.Bool:
+            return "%d"
+
+    @staticmethod
+    def from_string(c_repr: CCode):
+        """
+        Find the object corresponding to the same type given a C representation of a type.
+
+        The inverse of :code:`c_repr`
+
+        :param c_repr: the C representation of the type
+        :return: the corresponding enum vale, :code:`None` if the representation is not valid
+        """
+        c_type: ScalarCType
+        for c_type in ScalarCType:
+            if c_type.value == c_repr:
+                return c_type
+
+        return None
+
+    def scanf_template(self) -> Template:
+        """
+        Build a template for calls to :code:`scanf` from a type instance.
+
+        :return: the partially filled template
+        """
+        template = '''
+        {c_repr} {name};
+        scanf("{placeholder}", &{name});
+        '''
+
+        filled = {"c_repr": self.c_repr, "placeholder": self.placeholder(printf=False)}
+
+        return Template(template, filled, ["name"])
+
+    def printf_template(self) -> Template:
+        """
+        Build a template for calls to :code:`printf` from a type instance.
+
+        :return: the partially filled template
+        """
+        template = '''
+        printf("{placeholder}\\n", {name});
+        '''
+
+        return Template(template, {"placeholder": self.placeholder()}, ["name"])
+
+
+@dataclass(init=False)
+class VoidCType:
+    def __init__(self):
+        self.c_repr = "void"
+
+
+@dataclass
+class ArrayCType:
+    scalar_c_type: ScalarCType
+    size: Optional[str]  # the name of the parameter
+
+    @property
+    def c_repr(self) -> CCode:
+        """
+        Convert a C type from object representation to the string used to denote it in C.
+
+        :return: the C representation of the specified type
+        """
+        return f"{self.scalar_c_type.c_repr}*"
+
+    def scanf_template(self) -> Template:
+        """
+        Build a template for calls to :code:`scanf` from a type instance.
+
+        :return: the partially filled template
+        """
+        if self.size is not None:
+            template = '''
+            {c_repr} {name} = malloc({size} * sizeof({scalar_c_repr}));
+            
+            if ({name} == NULL) {{
+                fprintf(stderr, "could not alloc array {name} (size: {size}=%d)\\n", {size});
+                return 1;
+            }}
+            
+            for (int idx = 0; idx < {size}; idx++) {{
+                scanf("{scalar_placeholder}", {name} + idx);
+            }}
+            '''
+        else:
+            template = '''
+            {c_repr} {name} = malloc({size} * sizeof({scalar_c_repr}));
+            
+            if ({name} == NULL) {{
+                fprintf(stderr, "could not alloc array {name} (size: {size}=%d)\\n", {size});
+                return 1;
+            }}
+
+            {name}[{size}-1] = '\\0';
+            for (int idx = 0; idx < {size}-1; idx++) {{
+                scanf("{scalar_placeholder}", {name} + idx);
+                if ({name}[idx] == '\\0')
+                    break;
+            }}
+            '''
+
+        filled = {"c_repr": self.c_repr,
+                  "scalar_c_repr": self.scalar_c_type.c_repr,
+                  "scalar_placeholder": self.scalar_c_type.placeholder(printf=False)}
+
+        unfilled = ["name"]
+
+        if self.size is not None:
+            filled["size"] = self.size
+        else:
+            unfilled.append("size")
+
+        return Template(template, filled, unfilled)
+
+    def printf_template(self) -> Template:
+        """
+        Build a template for calls to :code:`scanf` from a type instance.
+
+        :return: the partially filled template
+        """
+        if self.size is not None:
+            template = '''
+            for (int idx = 0; idx < {size}; idx++) {{
+                printf("{scalar_placeholder} ", {name}[idx]);
+            }}
+            putchar('\\n');
+            
+            free({name});
+            '''
+        else:
+            template = '''
+            for (int idx = 0; {name}[idx] && idx < {size}; idx++) {{
+                printf("{scalar_placeholder} ", {name}[idx]);
+            }}
+            putchar('\\n');
+            
+            free({name});
+            '''
+
+        filled = {"c_repr": self.c_repr,
+                  "scalar_c_repr": self.scalar_c_type.c_repr,
+                  "scalar_placeholder": self.scalar_c_type.placeholder()}
+
+        unfilled = ["name"]
+
+        if self.size is not None:
+            filled["size"] = self.size
+        else:
+            unfilled.append("size")
+
+        return Template(template, filled, unfilled)
+
+
+ParameterCType = Union[ScalarCType, ArrayCType]
+AnyCType = Union[ScalarCType, ArrayCType, VoidCType]
+
+
+@dataclass
+class CParameter:
+    name: str
+    c_type: ParameterCType
+    is_output: bool
+
+    @property
+    def c_repr(self) -> CCode:
+        """
+        Convert a C parameter from object representation to the string used to denote it in C.
+
+        :return: the C representation of the specified parameter
+        """
+        return f"{self.c_type.c_repr} {self.name}"
+
+    @property
+    def arr_size(self) -> CCode:
+        """
+        Gives the name of the variable containing the size of the array parameter.
+
+        :return: the variable name
+        """
+        if isinstance(self.c_type, ScalarCType):
+            raise Exception("tried to access scalar values size")
+
+        if self.c_type.size is not None:
+            return self.c_type.size
+        else:
+            return f"{self.name}_len"
+
+    def get_scanf(self) -> CCode:
+        """
+        Get the code to create and store this parameter from stdin.
+
+        :return: the C code
+        """
+        template = self.c_type.scanf_template()
+        self.fill_template(template)
+
+        return template.complete()
+
+    def get_printf(self) -> CCode:
+        """
+        Get the code to write the contents of this parameter to stdout.
+
+        :return: the C code
+        """
+        template = self.c_type.printf_template()
+        self.fill_template(template)
+
+        return template.complete()
+
+    def fill_template(self, template: Template) -> None:
+        """
+        Add the necessary values (from :code:`template.to_fill`) to the template.
+
+        :param template: the template to fill in
+        """
+        for needed in template.to_fill:
+            if needed == "name":
+                template.fill(name=self.name)
+            elif needed == "size":
+                template.fill(size=self.arr_size)
+
+
+@dataclass
+class CReference:
+    name: str
+    c_type: AnyCType
+    parameters: List[CParameter]
+    includes: List[str]
+    code: CCode
+
+    @staticmethod
+    def parse(prog_name: str, examples_dir: str):
+        """
+        Build a CReference from a function directory.
+
+        This is done using a :code:`reference_parser.FunctionReference` as an intermediate value.
+
+        :param prog_name: the name of function directory
+        :param examples_dir: the path to the directory containing the function reference
+        :return: the CReference instance built from that function
+        """
+        inter = reference_parser.FunctionReference.parse(os.path.join(examples_dir, prog_name))
+        issues = inter.validate()
+
+        if issues:
+            sys.stderr.write("Parse created issues!\n")
+            sys.stderr.write("\n".join(issue.value for issue in issues))
+
+        ignored_issues = {reference_parser.ParseIssue.ArrayReturnType, reference_parser.ParseIssue.ReturnAndOutputGiven}
+        if issues - ignored_issues:
+            raise Exception("did not produce a valid parse")
+
+        outputs = {arr for arr in inter.info.outputs}
+        params = [CParameter(param.name,
+                             CReference.get_c_type(param.type),
+                             param.name in outputs)
+                  for param in inter.parameters]
+
+        ref = CReference(inter.name,
+                         CReference.get_c_type(inter.type),
+                         params,
+                         inter.reference.includes,
+                         inter.code)
+
+        for size in inter.info.sizes:
+            ref.param_dict[size.array].c_type.size = size.var
+
+        return ref
+
+    @property
+    def param_dict(self) -> Dict[str, CParameter]:
+        """
+        Build a dictionary mapping parameter names to the parameter objects.
+
+        Useful for lookups.
+
+        :return: the mapping between parameter names and the corresponding object
+        """
+        return {param.name: param for param in self.parameters}
+
+    @property
+    def sizes(self) -> Set[str]:
+        sizes = set()
+        for param in self.parameters:
+            if isinstance(param, ArrayCType) and param.size is not None:
+                sizes.add(param.size)
+
+        return sizes
+
+    @staticmethod
+    def get_c_type(type_info: reference_parser.CType) -> AnyCType:
+        """
+        Convert an intermediate representation of a C type to the final form.
+
+        :param type_info: the C type in its intermediate representation
+        :return: the correct class matching that type
+        """
+        if type_info.contents == "void":
+            if type_info.pointer_level != 0:
+                raise Exception("void pointers are not supported")
+
+            return VoidCType()
+
+        c_type = ScalarCType.from_string(type_info.contents)
+        assert (c_type is not None)
+
+        if type_info.pointer_level == 0:
+            return c_type
+        elif type_info.pointer_level == 1:
+            return ArrayCType(c_type, None)
+        else:
+            raise Exception("trying to create a multi-level pointer type")
+
+    def get_func_call(self) -> CCode:
+        """
+        Build the call to this function, storing the result if non-void.
+
+        :return: the C code
+        """
+        call = f"{self.name}({', '.join(param.name for param in self.parameters)});"
+
+        if isinstance(self.c_type, VoidCType):
+            return call
+
+        # reserves the keyword `res'
+        res = CParameter("res", self.c_type, False)
+
+        return f"{res.c_repr} = {call}"
+
+    @property
+    def read_order(self) -> List[CParameter]:
+        """
+        Order the parameters so that they can be read correctly.
+
+        An array with a given size must be read after its size.
+        Since all sizes are scalar, the simplest way to achieve this is to read all scalars before arrays.
+
+        This ordering is *stable* with respect to scalar and array parameters.
+        In other words (while the order of scalar and array parameters is altered)
+        the order of any scalars is not changed, nor is the order between any arrays.
+
+        :return: the order to read parameters, with scalars coming first and arrays last
+        """
+        array_params = []
+        scalar_params = []
+        for param in self.parameters:
+            if isinstance(param.c_type, ScalarCType):
+                scalar_params.append(param)
+            else:
+                array_params.append(param)
+
+        return scalar_params + array_params
+
+    @property
+    def outputs(self):
+        return [param for param in self.parameters if param.is_output]
+
+    def main(self) -> CCode:
+        """
+        Build the main function for this reference.
+
+        This includes reading all parameters and array sizes, the call to the function, and outputting the results.
+
+        :return: the C code
+        """
+        strlens = '\n'.join(self.get_strlens())
+        scanfs = '\n'.join(param.get_scanf() for param in self.read_order)
+        func_call = self.get_func_call()
+
+        output_printfs = '\n'.join(param.get_printf() for param in self.parameters if param.is_output)
+        if isinstance(self.c_type, VoidCType):
+            printfs = output_printfs
+        else:
+            return_printf = self.c_type.printf_template()
+            return_printf.fill(placeholder=self.c_type.placeholder(), name="res")
+            return_printf = return_printf.complete()
+
+            printfs =  return_printf + output_printfs
+
+        return f'''
+        int main(int argc, char *argv[]) {{
+            {strlens}
+            {scanfs}
+            {func_call}
+            {printfs}
+        }}
+        '''
+
+    def program(self) -> CCode:
+        """
+        Build the whole implementation for the reference function.
+
+        :return: the C source code
+        """
+        includes = "\n".join(["#include <stdio.h>", "#include <stdlib.h>"] + self.includes) + "\n"
+
+        return f"{includes}{self.code}{self.main()}"
+
+    def get_strlens(self) -> List[CCode]:
+        """
+        Build the calls to get any string parameters sizes.
+
+        These are passed in as arguments, in the order the strings appear.
+        This process reserves the identifier <arr>_len for all strings,
+        where <arr> is the name of the string parameter.
+
+        :return: the C code for getting string lengths
+        """
+        strlen_template = "int {arr_size} = atoi(argv[{idx}]);"
+        unsized = [param.arr_size for param in self.parameters
+                   if isinstance(param.c_type, ArrayCType) and param.c_type.size is None]
+
+        return [strlen_template.format(arr_size=arr_size, idx=idx) for idx, arr_size in enumerate(unsized, start=1)]
+
+    def compile(self, exe: str = None, cleanup: bool = True) -> Optional[str]:
+        """
+        Compile the function.
+
+        :param exe: the name of the executable to compile to. If :code:`None` then a random string will be used
+        :param cleanup: set to :code:`True` to remove the source file afterwards
+        :return: the name of the executable. Returns :code:`None` if compilation failed
+        """
+        if exe is None:
+            exe = utilities.get_tmp_file_name(self.program(), ".o")
+
+        root, ext = os.path.splitext(exe)
+
+        if ext not in {".o", ""}:
+            raise Exception("invalid executable given")
+
+        src = root+".c"
+
+        if os.path.exists(exe):
+            ack = input(f"overwrite {exe}/{src}? [yN]\n")
+            if ack == "y" or ack == "Y":
+                print("overwriting...")
+            else:
+                print("not overwriting!")
+                return None
+
+        with open(src, "w") as f:
+            f.write(self.program())
+
+        _, stderr = utilities.run_command(f"gcc -Wall -O0 -o {exe} {src}")
+        if stderr:
+            sys.stderr.write(stderr)
+        print("COMPILED")
+
+        if cleanup:
+            utilities.run_command(f"rm {src}")
+
+        return exe
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("program", help="the program to use")
+    parser.add_argument("-p", "--path", help="path to the directory contatining the program", default=".")
+    parser.add_argument("-c", "--compile", help="pass to compile the function", action="store_true")
+
+    args = parser.parse_args()
+
+    parsed = CReference.parse(args.program, args.path)
+    if args.compile:
+        parsed.compile(exe="tmp.o", cleanup=False)
+    else:
+        print(parsed.program())
diff --git a/evaluate.py b/evaluate.py
new file mode 100644
index 0000000..5882c26
--- /dev/null
+++ b/evaluate.py
@@ -0,0 +1,309 @@
+import sys
+import utilities
+import os.path
+from code_gen import CReference, AnyCType, ScalarCType, ArrayCType, VoidCType, CParameter
+from examples import ExampleCollection, AnyCValue, SomeCValue, ArrayCValue, ScalarCValue
+from dataclasses import dataclass
+from typing import *
+
+ParameterExample = Tuple[CParameter, SomeCValue]
+ReturnExample = Tuple[AnyCType, AnyCValue]
+
+
+@dataclass
+class ExampleInstance:
+    """
+    A wrapper class for a single example
+
+    Contains all input/output examples and their mappings to parameters (or return type).
+    """
+    inputs: List[ParameterExample]
+    value: ReturnExample
+    outputs: List[ParameterExample]
+
+    def get_stdin(self) -> str:
+        """
+        Builds the input expected by the program when this example is being tested
+
+        :return: the stdin for this example
+        """
+        stdin = []
+        for param, val in self.inputs:
+            stdin.append(ExampleInstance.describe(val, param.c_type))
+
+        return '\n'.join(stdin)
+
+    def get_args(self) -> str:
+        """
+        Builds the arguments for the program when this example is being tested
+
+        The arguments are the sizes of any unsized arrays
+        (at this point only strings are handled correctly).
+        This size is determined here by checking the inputs and outputs for any unsized parameters,
+        and asserting that the parameter only needs to be large enough to accommodate the biggest value.
+
+        For example: if the function took parameter :code:`char *s`, and this parameter was an output,
+        then this function would look at the input and output values for this parameter
+        (e.g. "hi" -> "hello")
+        and use this to determine the required space in the array.
+        Here *s* has to accommodate both "hi" and "hello", so requires 6 bytes to be allocated.
+        :return:
+        """
+        # doing outputs first since then inputs can be done in one pass
+        sizes = {param.name: len(val) for param, val in self.outputs
+                 if isinstance(param.c_type, ArrayCType) and param.c_type.size is None}
+
+        args = []
+        for param, val in self.inputs:
+            if param.name in sizes:
+                args += str(1 + max(sizes[param.name], len(val)))
+            elif isinstance(param.c_type, ArrayCType) and param.c_type.size is None:
+                args += str(1+len(val))
+
+        return ' '.join(args)
+
+    @staticmethod
+    def parse(desc: str, c_type: AnyCType) -> AnyCValue:
+        """
+        Parse a value written by the reference function and parse it into the value it represents
+
+        For more detail on this format check the :code:`printf_template` methods of ScalarCType and ArrayCType.
+
+        :param desc: the line of output given by the function denoting a value
+        :param c_type: the expected type of the value
+        :return: the value parsed from the string
+        """
+        def parse_scalar(description: str, c_type: ScalarCType) -> ScalarCValue:
+            description = description.strip()
+            if c_type == ScalarCType.Int or c_type == ScalarCType.Bool:
+                return int(description)
+            elif c_type == ScalarCType.Float or c_type == ScalarCType.Double:
+                return float(description)
+            elif c_type == ScalarCType.Char:
+                return chr(int(description))
+
+        def parse_array(description: str, c_type: ArrayCType) -> ArrayCValue:
+            description = description.strip()
+            if description == "":
+                return []
+
+            return [parse_scalar(chunk, c_type.scalar_c_type) for chunk in description.split(" ")]
+
+        if isinstance(c_type, ScalarCType):
+            return parse_scalar(desc, c_type)
+        elif isinstance(c_type, ArrayCType):
+            parsed = parse_array(desc, c_type)
+            if c_type.scalar_c_type == ScalarCType.Char:
+                return "".join(parsed)
+
+            return parsed
+        else:
+            return None
+
+    def check_output(self, stdout: str) -> bool:
+        """
+        Tests the output generated by running the reference against the current example
+
+        Writes all issues to stderr
+
+        :param stdout: the output produced when the reference is run
+        :return: :code:`True` if and only if all values matched
+        """
+        pass_test = True
+
+        stdout = [line.strip() for line in stdout.splitlines() if line.strip()]
+
+        ret_type, ret_val = self.value
+        if not isinstance(ret_type, VoidCType):
+            ex_val = ExampleInstance.parse(stdout[0], ret_type)
+            if ret_val != ex_val:
+                sys.stderr.write(f"return value does not match! ({ret_val} vs. {ex_val})\n")
+                pass_test = False
+
+            stdout = stdout[1:]
+
+        assert len(stdout) == len(self.outputs)
+        for line, (param, value) in zip(stdout, self.outputs):
+            ex_value = ExampleInstance.parse(line, param.c_type)
+            if value != ExampleInstance.parse(line, param.c_type):
+                sys.stderr.write(f"output does not match! ({value} vs. {ex_value})\n")
+                pass_test = False
+
+        return pass_test
+
+    def __str__(self):
+        s = ["==== INPUTS ===="]
+        for param, val in self.inputs:
+            s.append(f"{param.c_repr} = {val}")
+
+        s.append("==== RETURN ====")
+        s.append(f"{self.value[0].c_repr} {self.value[1]}")
+
+        s.append("=== OUTPUTS ====")
+        for param, val in self.outputs:
+            s.append(f"{param.c_repr} = {val}")
+
+        s.append("================")
+
+        return '\n'.join(s)
+
+    @staticmethod
+    def describe(val: AnyCValue, c_type: AnyCType) -> str:
+        """
+        Convert a value to the format readable by the reference function
+
+        Inverse of :code:`parse`
+
+        :param val: the value to format correctly
+        :param c_type: the type of the value
+        :return: the formatted string
+        """
+        if isinstance(c_type, ScalarCType) and c_type == ScalarCType.Char:
+            return str(ord(val))
+        elif isinstance(c_type, ScalarCType):
+            return str(val)
+        elif c_type.scalar_c_type == ScalarCType.Char:
+            return ' '.join([str(ord(c)) for c in val] + ["0"])
+        else:
+            return ' '.join(str(v) for v in val)
+
+
+@dataclass
+class Evaluator:
+    """
+    Contains a reference and a bunch of examples, and allows them to be checked.
+
+    All inputs/outputs should be in the order that the reference is expecting.
+    This means the inputs must be in the read-order, and outputs must be ordered correctly too.
+    """
+    reference: CReference
+    inputs: List[List[SomeCValue]]
+    values: List[AnyCValue]
+    outputs: List[List[SomeCValue]]
+
+    @staticmethod
+    def build_from(ref: CReference, examples: ExampleCollection):
+        """
+        Generate an evaluator from a reference and examples
+
+        Note that the examples are transposed into a "stretched out" form,
+        where the examples for each input, output, and the return form are all in one list.
+
+        This will be used to allow some manipulation of examples, such as swapping parameters if the names don't line up
+        or predicting sizes of arrays if they are not given.
+
+        :param ref: the reference function
+        :param examples: examples for that reference
+        :return:
+        """
+        inp_vals, example_returns, outp_vals = examples.transposed_examples
+
+        example_inputs = {inp.name: (inp.type, inp_val) for inp, inp_val in zip(examples.inputs, inp_vals)}
+        example_outputs = {outp.name: (outp.type, outp_val) for outp, outp_val in zip(examples.outputs, outp_vals)}
+
+        # this is where the smart size inference etc. can come in
+        inputs = []
+        for param in ref.read_order:
+            if example_inputs.get(param.name) is not None:
+                c_type, values = example_inputs[param.name]
+
+                if CReference.get_c_type(c_type) != param.c_type:
+                    raise Exception(
+                        f"incorrect parameter for {param.name} (found {c_type}, expected {param.c_type.c_repr})")
+
+                inputs.append(values)
+            else:
+                raise Exception(f"parameter {param.name} could not be found")
+
+        if CReference.get_c_type(examples.ret_type) != ref.c_type:
+            raise Exception(f"incorrect return type (found {examples.ret_type}, expected {ref.c_type.c_repr})")
+        returns = example_returns
+
+        outputs = []
+        for param in ref.outputs:
+            if example_outputs.get(param.name) is not None:
+                c_type, values = example_outputs[param.name]
+
+                if CReference.get_c_type(c_type) != param.c_type:
+                    raise Exception(
+                        f"incorrect output parameter for {param.name} (found {c_type}, expected {param.c_type.c_repr})")
+
+                outputs.append(values)
+            else:
+                raise Exception(f"output parameter {param.name} could not be found")
+
+        return Evaluator(ref, inputs, returns, outputs)
+
+    def next_example(self) -> Generator[ExampleInstance, None, None]:
+        """
+        Returns a generator used to squash the internal representation
+
+        Since we want a full example (i.e. all parameters and return) to operate with the examples must be transposed.
+        This is as easy as taking a "slice" across the examples,
+        picking one value from all inputs, outputs and the return.
+        """
+        # size of return values is always equal to number of inputs, so safe to use for all
+        for i in range(len(self.values)):
+            inputs = [(param, inps[i]) for param, inps in zip(self.reference.read_order, self.inputs)]
+            ret = (self.reference.c_type, self.values[i])
+            outputs = [(param, outps[i]) for param, outps in zip(self.reference.outputs, self.outputs)]
+
+            yield ExampleInstance(inputs, ret, outputs)
+
+    def run(self, executable: str, example: ExampleInstance) -> str:
+        """
+        Runs a given example on the reference function
+
+        :param executable: the file containing the reference executable
+        :param example: the example to run on the reference
+        :return: the output of that function
+        """
+        args = example.get_args()
+        stdin = example.get_stdin()
+
+        stdout, stderr = utilities.run_command(f"./{executable} {args}", stdin=stdin)
+
+        if stderr:
+            print(stderr)
+
+        return stdout
+
+    def evaluate(self) -> Tuple[int, int]:
+        """
+        Compiles and runs a reference function on the stored examples
+
+        :return: (no. of successful runs, no. of failed runs)
+        """
+        exe = self.reference.compile(cleanup=False)
+        success = 0
+        failure = 0
+
+        for example in self.next_example():
+            stdout = self.run(exe, example)
+            if example.check_output(stdout):
+                success += 1
+            else:
+                failure += 1
+
+        return success, failure
+
+
+if __name__ == '__main__':
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("program", help="the reference program to evaluate")
+    parser.add_argument("-p", "--program-path", help="the path to the reference program", default=".")
+
+    parser.add_argument("examples", help="the file containing examples")
+    parser.add_argument("-P", "--example-path", help="path to examples directory", default=".")
+
+    args = parser.parse_args()
+
+    ref = CReference.parse(args.program, args.program_path)
+    exs = ExampleCollection.from_file(os.path.join(args.example_path, args.examples))
+
+    evaluator = Evaluator.build_from(ref, exs)
+
+    success, failure = evaluator.evaluate()
+    print(f"ran with {success}/{success + failure} successes")
diff --git a/examples.py b/examples.py
new file mode 100644
index 0000000..ffa523a
--- /dev/null
+++ b/examples.py
@@ -0,0 +1,311 @@
+from re import match
+from reference_parser import CParameter, CType
+from dataclasses import dataclass
+from typing import *
+
+# turn back now, regex grossness ahead
+# if this breaks somewhere down the line just replace it with a JSON reader or something
+
+ScalarCValue = Union[int, float, str]
+ArrayCValue = Union[List[ScalarCValue], str]
+SomeCValue = Union[ScalarCValue, ArrayCValue]
+AnyCValue = Union[SomeCValue, None]
+
+Example = Tuple[List[SomeCValue], AnyCValue, List[SomeCValue]]
+
+
+@dataclass
+class ExampleCollection:
+    """
+    Contains a list of examples from an examples file
+    """
+    inputs: List[CParameter]
+    ret_type: CType
+    outputs: List[CParameter]
+    examples: List[Example]
+
+    @property
+    def transposed_examples(self) -> Tuple[List[List[SomeCValue]], List[AnyCValue], List[List[SomeCValue]]]:
+        """
+        Allows the examples to be switched from a long list of "flat" examples to a flat list of parameters.
+
+        The original list has **n** examples where each example is a tuple of shape (**m**, 1, **o**).
+        The new list will have a shape of (**m*n**, **n**, **o*n**).
+
+        :return: the examples in the new shape
+        """
+        inps = [[] for _ in self.inputs]
+        rets = []
+        outps = [[] for _ in self.outputs]
+
+        for example in self.examples:
+            ex_inps, ex_ret, ex_outps = example
+
+            for inp, ex_inp in zip(inps, ex_inps):
+                inp.append(ex_inp)
+
+            rets.append(ex_ret)
+
+            for inp, ex_outp in zip(outps, ex_outps):
+                inp.append(ex_outp)
+
+        return inps, rets, outps
+
+    @staticmethod
+    def parse(sig: str, examples: List[str]):
+        """
+        Uses a signature string and a list of examples to build the collection
+
+        The signature string is composed of three sections:
+
+            (<inputs>) <return> (<outputs>)
+
+        where <inputs> and <outputs> are a comma-separated list of parameters, and <ret> is a type.
+
+        The examples are of a similar form, except instead of parameters the fields contain
+        the values of the corresponding parameter.
+        For a void return type the special value '_' is used.
+
+        Example file:
+            (int a, float b, char *s) void (char *s)
+            (1, 1.5, "a string") _ (" a new string")
+            (-4, 10.001, "a string with \" escaped characters") _ ("less chars")
+            ...
+
+        Another example file:
+            (int *a, int *b, int n) int ()
+            ([1, 2, 3], [4, 5, 6], 3) -1 ()
+            ([10. 15, 20, 25, 30], [1, -2, 3, -4, 5], 5) 10 ()
+            ...
+
+        Any examples that can not be parsed correctly are ignored.
+
+        :param sig: the signature string describing the examples
+        :param examples: a list of example strings
+        :return: the ExampleCollection built from this selection
+        """
+        inputs, ret_type, outputs = ExampleCollection.parse_sig(sig)
+        input_parsers = [ExampleCollection.parser_for(inp.type) for inp in inputs]
+        ret_parser = ExampleCollection.parser_for(ret_type)
+        output_parsers = [ExampleCollection.parser_for(outp.type) for outp in outputs]
+
+        vals = []
+        for example in examples:
+            val = ExampleCollection.parse_example(example, input_parsers, ret_parser, output_parsers)
+            if val is not None:
+                vals.append(val)
+
+        return ExampleCollection(inputs, ret_type, outputs, vals)
+
+    @staticmethod
+    def from_file(example_file: str):
+        """
+        Builds an ExampleCollection from a file.
+
+        Reads a file and extracts the signature and examples for a call to parse.
+        See parse for more information.
+
+        :param example_file: the file containing the examples
+        :return: the ExampleCollection built from that file
+        """
+        with open(example_file, "r") as examples:
+            sig = examples.readline()
+            return ExampleCollection.parse(sig, examples.readlines())
+
+    @staticmethod
+    def parse_int(s: str) -> (int, str):
+        if (m := match(r"\s*(-?\d+)", s)) is not None:
+            return int(m[1]), s[m.end():]
+        else:
+            return None
+
+    @staticmethod
+    def parse_real(s: str) -> (float, str):
+        if (m := match(r"\s*(-?\d+(?:\.\d+)?)", s)) is not None:
+            return float(m[1]), s[m.end():]
+        else:
+            raise None
+
+    @staticmethod
+    def parse_char(s: str) -> (str, str):
+        if (m := match(r"\s*'([^\\']|\\.)'", s)) is not None:
+            return m[1], s[m.end():]
+        else:
+            return None
+
+    @staticmethod
+    def parse_bool(s: str) -> (bool, str):
+        if (m := match(r"\s*(True|False)", s)) is not None:
+            return m[1] == "True", s[m.end():]
+        else:
+            return None
+
+    @staticmethod
+    def parse_string(s: str) -> (str, str):
+        if (m := match(r'\s*"((?:[^\\"]|\\.)*)"', s)) is not None:
+            return m[1], s[m.end():]
+        else:
+            return None
+
+    @staticmethod
+    def parse_list(s: str, elem) -> (list, str):
+        if (m := match(r"\s*\[", s)) is None:
+            return None
+
+        res = []
+        rem = s[m.end():]
+        while (inner_m := elem(rem)) is not None:
+            v, rem = inner_m
+            res.append(v)
+
+            if (sep := match(r"\s*,", rem)) is None:
+                break
+
+            rem = rem[sep.end():]
+
+        if (m := match(r"\s*]", rem)) is not None:
+            return res, rem[m.end():]
+        else:
+            return None
+
+    @staticmethod
+    def parse_missing(s: str) -> (None, str):
+        """
+        A special parser meant to parse the void value '_'
+
+        :param s: the string to parse
+        :return: a tuple, shifting the input string correctly, if parsing occurred otherwise :code:`None`
+        """
+        if (m := match(r"\s*_", s)) is not None:
+            return None, s[m.end():]
+        else:
+            return None
+
+    @staticmethod
+    def parser_for(c_type: CType) -> Callable:
+        """
+        Fetch the correct parser for a given type
+
+        Recursively wraps pointers in lists if necessary.
+
+        :param c_type: the type to parse
+        :return: a function taking a string as input and returning a the parse of the string for the given type
+        """
+        if c_type.contents == "void":
+            return ExampleCollection.parse_missing
+
+        if c_type == CType("char", 1):
+            return ExampleCollection.parse_string
+
+        if c_type.pointer_level >= 1:
+            inner_parser = ExampleCollection.parser_for(CType(c_type.contents, c_type.pointer_level - 1))
+            return lambda s: ExampleCollection.parse_list(s, inner_parser)
+
+        if c_type.contents == "int":
+            return ExampleCollection.parse_int
+        elif c_type.contents == "float" or c_type.contents == "double":
+            return ExampleCollection.parse_real
+        elif c_type.contents == "char":
+            return ExampleCollection.parse_char
+        elif c_type.contents == "bool":
+            return ExampleCollection.parse_bool
+        else:
+            raise Exception(f"no parser exists for type: {c_type}")
+
+    @staticmethod
+    def parse_sig(s: str):
+        """
+        Parses a signature string into the corresponding parameters/types
+
+        :param s: the string to parse
+        :return: a tuple of the form ([inputs], return, [outputs])
+        """
+        inp_end = s.index(")") + 1
+        outp_start = s.rindex("(")
+
+        assert inp_end < outp_start
+
+        inps = s[:inp_end].strip()
+        ret = s[inp_end:outp_start].strip()
+        outps = s[outp_start:].strip()
+
+        assert inps[0] == outps[0] == "("
+        assert inps[-1] == outps[-1] == ")"
+
+        input_params = [CParameter.parse(param.strip()) for param in inps[1:-1].split(',') if param.strip()]
+        ret_type = CType.parse(ret)
+        output_params = [CParameter.parse(param.strip()) for param in outps[1:-1].split(',') if param.strip()]
+
+        return input_params, ret_type, output_params
+
+    @staticmethod
+    def parse_example(s: str, inps: List[Callable], ret: Callable, outps: List[Callable]) -> Optional[Example]:
+        """
+        Parses an example
+
+        Details of the example format can be found in the :code:`parse` method.
+
+        :param s: the string to parse
+        :param inps: parsers for the input values
+        :param ret: a parser for the return value
+        :param outps: parsers for the output values
+        :return: the example that has beem parsed. Returns :code:`None` if this example could not be parsed
+        """
+
+        def parse_group(s: str, grp: List[Callable]) -> Optional[Tuple[list, str]]:
+            """
+            Helper function to parse something of the form:
+
+                (<values>)
+
+            where <values> is a comma-separated list of values that can be parsed by the parsers in :code:`grp`.
+
+            :param s: the string to parse
+            :param grp: the parsers to use to parse this group
+            :return: a standard parse result; the values and the new string position if successful
+            or :code:`None` if not
+            """
+            s = s[s.index("(") + 1:]
+
+            grp_vals = []
+            for parser in grp:
+                if (parsed := parser(s)) is None:
+                    return None
+
+                val, s = parsed
+                grp_vals.append(val)
+
+                if (m := match(r"\s*,", s)) is not None:
+                    s = s[m.end():]
+
+            if (m := match(r"\s*\)", s)) is not None:
+                s = s[m.end():]
+            else:
+                return None
+
+            return grp_vals, s
+
+        if (parsed := parse_group(s, inps)) is None:
+            return None
+        input_vals, s = parsed
+
+        if (parsed := ret(s)) is None:
+            return None
+        ret_val, s = parsed
+
+        if (parsed := parse_group(s, outps)) is None:
+            return None
+        output_vals, s = parsed
+
+        return input_vals, ret_val, output_vals
+
+
+if __name__ == '__main__':
+    ExampleCollection.parser_for(CType("int", 1))
+    ec = ExampleCollection.parse("(int *a, int *b, int n) int ()",
+                                 [
+                                     "([1, 2, 3], [4, 5, 6], 3) -1 ()",
+                                     "([10, 15, 20, 25, 30], [1, -2, 3, -4, 5], 5) 10 ()",
+                                 ])
+
+    print(ec)
diff --git a/reference_parser.py b/reference_parser.py
new file mode 100644
index 0000000..533adc5
--- /dev/null
+++ b/reference_parser.py
@@ -0,0 +1,469 @@
+import re
+from typing import *
+import os.path
+from sys import stderr
+from json import dumps
+from enum import Enum
+from dataclasses import dataclass, asdict
+
+
+class ParseIssue(Enum):
+    """
+    Issues in a parsed reference implementation.
+
+    Can be matched if smarter error handling is desired, or a simple error message
+    can be accessed using :code:`issue.value`
+    """
+    ArrayReturnType = "Return type must be `void' or scalar"
+    MultiLevelPointer = "Multi-level pointers are not supported"
+    ScalarOutputParameter = "Output parameters must be pointers"
+    ScalarGivenSize = "Only array parameters can be given a size"
+    GivenInvalidSize = "Sizes must be a valid type"
+    UnsizedArrayParameter = "All unterminated arrays must be given a size"
+    ReferenceSignatureMismatch = "The signatures in `ref.c' and `props' differ"
+    InvalidIdentifierName = "All names must be valid C identifiers"
+    ReturnAndOutputGiven = "Functions should not be able to return a value and change output parameters"
+    NoOutputGiven = "Functions must output some values, either through a return value or output parameters"
+
+
+@dataclass
+class CType:
+    """
+    A wrapper for a C type.
+    """
+    contents: str
+    pointer_level: int
+
+    @staticmethod
+    def parse(type_sig: str):
+        """
+        Build a type instance from a type signature.
+
+        Type signatures can look like: :code:`int`, :code:`int *`, :code:`char*`, :code:`void ** *`, etc.
+
+        No checking is done here to determine whether the type is valid.
+
+        :param type_sig: the type signature
+        :return: an instance of that type
+        """
+        type_sig = type_sig.strip()
+
+        if '*' in type_sig:
+            ptr_idx = type_sig.index('*')
+            contents = type_sig[:ptr_idx].rstrip()
+            pointers = type_sig[ptr_idx:]
+
+            pointer_level = sum(1 for c in pointers if c == "*")
+        else:
+            contents = type_sig.strip()
+            pointer_level = 0
+
+        if pointer_level > 1:
+            raise Exception("multi-level pointers are not supported")
+
+        return CType(contents, pointer_level)
+
+    def __str__(self):
+        return f"{self.contents}{'*' * self.pointer_level}"
+
+
+@dataclass
+class CParameter:
+    """
+    A wrapper for a parameter.
+    """
+    name: str
+    type: CType
+
+    @staticmethod
+    def parse(param: str):
+        """
+        Builds a CParameter instance.
+
+        Does not check if the type is a valid name, just separates it from the type.
+
+        :param param: the parameter definition
+        :return: an instance from that definition
+        """
+        m = re.match("((?:int|char|float|double|bool|void)[* ]+)(.*)", param)
+        if m is None:
+            raise Exception("invalid parameter")
+
+        c_type, name = m.groups()
+
+        type_info = CType.parse(c_type)
+        return CParameter(name, type_info)
+
+    def __str__(self):
+        return f"{self.type} {self.name}"
+
+
+@dataclass
+class FunctionSignature:
+    """
+    A C function's full signature
+    """
+    name: str
+    type: CType
+    parameters: List[CParameter]
+
+    @staticmethod
+    def parse(sig: str):
+        """
+        Build a FunctionSignature instance from a signature string.
+
+        This string looks like:
+
+        .. code-block:: c
+
+            [func type] [func name]([parameter], ...)
+
+        :param sig: the signature
+        :return: the instance built from that signature
+        """
+        m = re.match(r"(.*)\((.*)\)", sig)
+        if m is None:
+            raise Exception("broken...")
+
+        func_def = CParameter.parse(m[1].strip())
+        params = [param.strip() for param in m[2].split(",")]
+
+        return FunctionSignature(func_def.name,
+                                 func_def.type,
+                                 [CParameter.parse(param) for param in params])
+
+    def __str__(self):
+        return f"{self.name}({', '.join(str(param) for param in self.parameters)}) -> {self.type}"
+
+    def c_sig(self) -> str:
+        """
+        The function signature as it would appear in C.
+
+        Note all pointer types look like :code:`type* name` (as opposed to :code:`type *name`)
+
+        :return: the signature string
+        """
+        return f"{self.type} {self.name}({', '.join(str(param) for param in self.parameters)})"
+
+
+@dataclass
+class ParamSize:
+    """
+    Denotes an association between a array parameter, and a scalar parameter containing the array's size
+    """
+    array: str
+    var: str
+
+    @staticmethod
+    def parse(size: str):
+        """
+        Build a ParamSize instance from a size description string.
+
+        These strings are in the form given in *props* files, e.g.
+
+        "size arr_name scalar_name"
+
+        :param size: the description
+        :return: the ParamSize instance
+        """
+        parts = size.removeprefix("size").strip().split(",")
+
+        assert (len(parts) == 2)
+
+        return ParamSize(parts[0].strip(), parts[1].strip())
+
+
+@dataclass
+class FunctionArrayInfo:
+    """
+    A wrapper for additional information found in a function's *props* file.
+
+    This includes the names of any output parameters, and the given sizes of any array parameters.
+    """
+    outputs: List[str]
+    sizes: List[ParamSize]
+
+    @staticmethod
+    def parse(info: List[str]):
+        """
+        Build a FunctionArrayInfo instance from a list of description strings.
+        These strings may be describing either sizes or outputs.
+
+
+        :param info: the description strings
+        :return: the instance containing the information
+        """
+        outputs = []
+        sizes = []
+
+        for line in info:
+            if line.startswith("output"):
+                outputs.append(line.removeprefix("output").strip())
+            elif line.startswith("size"):
+                size = ParamSize.parse(line.removeprefix("size").strip())
+                sizes.append(size)
+            else:
+                raise Exception("very bad")
+
+        return FunctionArrayInfo(outputs, sizes)
+
+
+@dataclass
+class FunctionProps:
+    """
+    Contains all information from a function's *props* file.
+
+    This includes the signature and any additional information about the parameters.
+    """
+    sig: FunctionSignature
+    arr_info: FunctionArrayInfo
+
+    @staticmethod
+    def parse(props_file: str):
+        """
+        Build a FunctionProps instance from a *props* file.
+
+        :param props_file: the path to the *props* file
+        :return: the instance built from that file
+        """
+        with open(props_file, "r") as props:
+            sig = FunctionSignature.parse(props.readline())
+            rest = FunctionArrayInfo.parse(props.readlines())
+
+        return FunctionProps(sig, rest)
+
+
+@dataclass
+class CReference:
+    """
+    Contains all relevant information from a function's *ref.c* file.
+
+    This is the :code:`#includes` found in the file, as well as the C implementation of the function itself.
+    """
+    includes: List[str]
+    code: str
+
+    @staticmethod
+    def parse(ref_file: str):
+        """
+        Build a CReference instance from a given *ref.c* file.
+
+        :param ref_file: the path to the *ref.c* file
+        :return: the instance built from that file
+        """
+        with open(ref_file, "r") as ref:
+            includes = []
+
+            # go through each line and:
+            #   1. store includes
+            #   2. ignore anything other than the function
+            #   3. store the function code
+            line = ""  # this is just to ensure line has SOME value, to shut the warning up
+            for line in ref:
+                line = line.lstrip()
+                if re.match("(int|float|double|char|bool|void)", line):
+                    break  # assumes everything from here is the actual function
+
+                if line.startswith("#include"):
+                    includes.append(line.rstrip())
+
+            func = line + ref.read()
+
+        return CReference(includes, func)
+
+
+@dataclass
+class FunctionReference:
+    """
+    Wrapper for all information about a given function.
+    """
+    signature: FunctionSignature
+    info: FunctionArrayInfo
+    reference: CReference
+
+    @property
+    def type(self):
+        return self.signature.type
+
+    @property
+    def parameters(self):
+        return self.signature.parameters
+
+    @property
+    def name(self):
+        return self.signature.name
+
+    @property
+    def code(self):
+        return self.reference.code
+
+    @staticmethod
+    def parse(prog_name: str):
+        """
+        Build a FunctionReference from an actual C function.
+        This function must have a directory containing *ref.c* and *props* files.
+
+        :param prog_name: the path to the function directory
+        :return: the instance built for that function
+        """
+        path = os.path.expanduser(prog_name)
+        props = FunctionProps.parse(os.path.join(path, "props"))
+        ref = CReference.parse(os.path.join(path, "ref.c"))
+
+        return FunctionReference(props.sig, props.arr_info, ref)
+
+    def validate(self) -> Set[ParseIssue]:
+        """
+        Check this FunctionReference for any issues.
+
+        :return: all issues found in the function
+        """
+        issues = set()
+
+        if self.type.pointer_level != 0:
+            issues.add(ParseIssue.ArrayReturnType)
+
+        # building lookup tables
+        param_dict = dict()
+        array_params = set()
+        scalar_params = set()
+        for param in self.parameters:
+            name = param.name
+            c_type = param.type
+
+            param_dict[name] = c_type
+            if c_type.pointer_level == 0:
+                scalar_params.add(name)
+            elif c_type.pointer_level == 1:
+                array_params.add(name)
+            else:
+                issues.add(ParseIssue.MultiLevelPointer)
+
+            # this is a SUPER simplified version of checking for valid C identifiers
+            # doesn't take keywords etc. into consideration
+            m = re.match(r"^[a-zA-Z_]\w*$", name, flags=re.ASCII)
+            if not m or m[0] != name:
+                issues.add(ParseIssue.InvalidIdentifierName)
+
+        for output in self.info.outputs:
+            if param_dict[output].pointer_level == 0:
+                issues.add(ParseIssue.ScalarOutputParameter)
+
+        sized = set()
+        for size in self.info.sizes:
+            array = size.array
+            var = size.var
+            sized.add(array)
+
+            if array not in array_params:
+                issues.add(ParseIssue.ScalarGivenSize)
+
+            if param_dict[var].contents not in {"int"}:
+                issues.add(ParseIssue.GivenInvalidSize)
+
+        for array in array_params - sized:
+            if param_dict[array].contents not in {"char"}:
+                issues.add(ParseIssue.UnsizedArrayParameter)
+
+        code = self.code
+        ref_signature = FunctionSignature.parse(code[:code.find("{")])
+
+        if ref_signature != self.signature:
+            issues.add(ParseIssue.ReferenceSignatureMismatch)
+
+        if self.signature.type.contents != "void" and self.info.outputs:
+            issues.add(ParseIssue.ReturnAndOutputGiven)
+        elif self.signature.type.contents == "void" and not self.info.outputs:
+            issues.add(ParseIssue.NoOutputGiven)
+
+        return issues
+
+    def show_issues(self, verbose: bool = False, ignore_good: bool = False) -> None:
+        """
+        Write any issues in the function to stderr
+
+        :param verbose: set to :code:`True` to include a full breakdown of any issues found
+        :param ignore_good: set to :code:`True` to write to stderr even if no issues are found
+        """
+        issues = self.validate()
+
+        if issues:
+            stderr.write(f"error: {self.name} is broken!\n")
+            for issue in issues:
+                stderr.write(f" - {issue.value}\n")
+
+            if verbose:
+                stderr.write(dumps(asdict(self), indent=4) + "\n\n")
+        elif not ignore_good:
+            stderr.write(f"{self.name} is good\n")
+
+
+def show_all(base_path: str) -> None:
+    """
+    Parse and show the C signature for all functions in a given directory.
+
+    Also flags errors if they occur, writing the results to stderr.
+
+    :param base_path: the path to the directory containing all of the functions
+    """
+    base_path = os.path.expanduser(base_path)
+
+    for directory in os.listdir(base_path):
+        # breaking these up cos one big if was ugly
+        if directory.startswith("__"):
+            continue
+
+        if directory.startswith("."):
+            continue
+
+        # building out the proper path to the function
+        dir_path = os.path.join(base_path, directory)
+
+        if not os.path.isdir(dir_path):
+            continue
+
+        if "ref.c" not in os.listdir(dir_path):
+            continue
+
+        if "props" not in os.listdir(dir_path):
+            continue
+
+        parsed = FunctionReference.parse(dir_path)
+        parsed.show_issues(ignore_good=True)
+        print(parsed.signature.c_sig())
+
+
+def show_single(base_path: str, prog_name: str) -> None:
+    """
+    Parse and display the signature for a single program.
+
+    Signature is given in functional form, and the full information is given if issues are found.
+
+    :param base_path: the full path to the directory containing a function
+    :param prog_name: the name of the function directory
+    """
+    contents = FunctionReference.parse(os.path.join(base_path, prog_name))
+    print(dumps(asdict(contents), indent=4))
+    contents.show_issues(verbose=True)
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("-a", "--all", help="display and debug all available references", action="store_true")
+    parser.add_argument("program", nargs="?", help="parse and output the given program")
+    parser.add_argument("-p", "--path", help="path to example directory", default=".")
+
+    args = parser.parse_args()
+
+    if (args.program is None) != args.all:  # this is confusing as hell, either program is set, or all is (XOR)
+        parser.print_usage(file=stderr)
+        stderr.write(f"{parser.prog}: error: exactly one argument must be set from (--all, program)\n")
+        exit(1)
+
+    if args.all:
+        show_all(args.path)
+    else:
+        show_single(args.path, args.program)
diff --git a/utilities.py b/utilities.py
new file mode 100644
index 0000000..96e733e
--- /dev/null
+++ b/utilities.py
@@ -0,0 +1,47 @@
+import uuid
+from typing import Tuple
+import subprocess
+import os
+import numpy as np
+import random
+import logging
+import time
+from typing import Callable, Any, Optional
+
+def get_tmp_file_name(content: str, extension: str = '') -> str:
+    return uuid.uuid4().hex + extension
+
+
+def get_tmp_file(content: str, extension: str = '') -> str:
+    filename = uuid.uuid4().hex + extension
+    with open(filename, 'w') as f:
+        f.write(content)
+    return filename
+
+
+def get_tmp_path() -> str:
+    filename = uuid.uuid4().hex
+    return filename
+
+
+def run_command(command: str, stdin: Optional[str] = None) -> Tuple[str, str]:
+    output = subprocess.run(command.split(), capture_output=True, text=True, input=stdin)
+    return output.stdout, output.stderr
+
+
+def deterministic(seed: int):
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    np.random.seed(seed)  # Numpy module.
+    random.seed(seed)  # Python random module.
+
+
+def timeit(func: Callable) -> Any:
+    def wrapped(*args, **kwargs):
+        func_name = func.__name__
+        logging.info(f'Running {func_name}')
+        t0 = time.time()
+        res = func(*args, **kwargs)
+        t1 = time.time()
+        logging.info(f'Run {func_name} in {t1-t0}s')
+        return res
+    return wrapped