diff --git a/code_gen.py b/code_gen.py new file mode 100644 index 0000000..2bc9920 --- /dev/null +++ b/code_gen.py @@ -0,0 +1,550 @@ +import sys +import os.path +from typing import * +from dataclasses import dataclass +from enum import Enum +import reference_parser +import utilities + +CCode = NewType("CCode", str) + + +@dataclass +class Template: + """ + A class to produce a format string. + + Used for calls to :code:`printf` and :code:`scanf` + """ + template: CCode + values: Dict[str, str] + to_fill: List[str] + + def complete(self) -> CCode: + """ + Build the string from the template string. + + Ensures all necessary fields are filled before attempting to fill. + + :return: the filled in template + """ + for key in self.to_fill: + assert key in self.values + + return self.template.format(**self.values) + + def fill(self, **kwargs) -> None: + """ + Update the Template instance with the given values. + + :param kwargs: (key, value) pairs to update the given template with + """ + self.values.update(kwargs) + + +class ScalarCType(Enum): + Int = 'int' + Char = 'char' + Float = 'float' + Double = 'double' + Bool = 'bool' + + @property + def c_repr(self) -> CCode: + """ + Convert a C type from object representation to the string used to denote it in C. + + :return: the C representation of the specified type + """ + return self.value + + def placeholder(self, printf=True) -> str: + """ + The placeholder string, as can be found in a C format string. + + Note that to make parsing easier chars are read and written from their ASCII code. + + :param printf: set to :code:`False` to get values for a :code:`scanf` format string instead + :return: the placeholder string + """ + if self == ScalarCType.Int: + return "%d" + elif self == ScalarCType.Char: + return "%d" + elif self == ScalarCType.Float: + return "%f" + elif self == ScalarCType.Double: + return "%f" if printf else "%lf" + elif self == ScalarCType.Bool: + return "%d" + + @staticmethod + def from_string(c_repr: CCode): + """ + Find the object corresponding to the same type given a C representation of a type. + + The inverse of :code:`c_repr` + + :param c_repr: the C representation of the type + :return: the corresponding enum vale, :code:`None` if the representation is not valid + """ + c_type: ScalarCType + for c_type in ScalarCType: + if c_type.value == c_repr: + return c_type + + return None + + def scanf_template(self) -> Template: + """ + Build a template for calls to :code:`scanf` from a type instance. + + :return: the partially filled template + """ + template = ''' + {c_repr} {name}; + scanf("{placeholder}", &{name}); + ''' + + filled = {"c_repr": self.c_repr, "placeholder": self.placeholder(printf=False)} + + return Template(template, filled, ["name"]) + + def printf_template(self) -> Template: + """ + Build a template for calls to :code:`printf` from a type instance. + + :return: the partially filled template + """ + template = ''' + printf("{placeholder}\\n", {name}); + ''' + + return Template(template, {"placeholder": self.placeholder()}, ["name"]) + + +@dataclass(init=False) +class VoidCType: + def __init__(self): + self.c_repr = "void" + + +@dataclass +class ArrayCType: + scalar_c_type: ScalarCType + size: Optional[str] # the name of the parameter + + @property + def c_repr(self) -> CCode: + """ + Convert a C type from object representation to the string used to denote it in C. + + :return: the C representation of the specified type + """ + return f"{self.scalar_c_type.c_repr}*" + + def scanf_template(self) -> Template: + """ + Build a template for calls to :code:`scanf` from a type instance. + + :return: the partially filled template + """ + if self.size is not None: + template = ''' + {c_repr} {name} = malloc({size} * sizeof({scalar_c_repr})); + + if ({name} == NULL) {{ + fprintf(stderr, "could not alloc array {name} (size: {size}=%d)\\n", {size}); + return 1; + }} + + for (int idx = 0; idx < {size}; idx++) {{ + scanf("{scalar_placeholder}", {name} + idx); + }} + ''' + else: + template = ''' + {c_repr} {name} = malloc({size} * sizeof({scalar_c_repr})); + + if ({name} == NULL) {{ + fprintf(stderr, "could not alloc array {name} (size: {size}=%d)\\n", {size}); + return 1; + }} + + {name}[{size}-1] = '\\0'; + for (int idx = 0; idx < {size}-1; idx++) {{ + scanf("{scalar_placeholder}", {name} + idx); + if ({name}[idx] == '\\0') + break; + }} + ''' + + filled = {"c_repr": self.c_repr, + "scalar_c_repr": self.scalar_c_type.c_repr, + "scalar_placeholder": self.scalar_c_type.placeholder(printf=False)} + + unfilled = ["name"] + + if self.size is not None: + filled["size"] = self.size + else: + unfilled.append("size") + + return Template(template, filled, unfilled) + + def printf_template(self) -> Template: + """ + Build a template for calls to :code:`scanf` from a type instance. + + :return: the partially filled template + """ + if self.size is not None: + template = ''' + for (int idx = 0; idx < {size}; idx++) {{ + printf("{scalar_placeholder} ", {name}[idx]); + }} + putchar('\\n'); + + free({name}); + ''' + else: + template = ''' + for (int idx = 0; {name}[idx] && idx < {size}; idx++) {{ + printf("{scalar_placeholder} ", {name}[idx]); + }} + putchar('\\n'); + + free({name}); + ''' + + filled = {"c_repr": self.c_repr, + "scalar_c_repr": self.scalar_c_type.c_repr, + "scalar_placeholder": self.scalar_c_type.placeholder()} + + unfilled = ["name"] + + if self.size is not None: + filled["size"] = self.size + else: + unfilled.append("size") + + return Template(template, filled, unfilled) + + +ParameterCType = Union[ScalarCType, ArrayCType] +AnyCType = Union[ScalarCType, ArrayCType, VoidCType] + + +@dataclass +class CParameter: + name: str + c_type: ParameterCType + is_output: bool + + @property + def c_repr(self) -> CCode: + """ + Convert a C parameter from object representation to the string used to denote it in C. + + :return: the C representation of the specified parameter + """ + return f"{self.c_type.c_repr} {self.name}" + + @property + def arr_size(self) -> CCode: + """ + Gives the name of the variable containing the size of the array parameter. + + :return: the variable name + """ + if isinstance(self.c_type, ScalarCType): + raise Exception("tried to access scalar values size") + + if self.c_type.size is not None: + return self.c_type.size + else: + return f"{self.name}_len" + + def get_scanf(self) -> CCode: + """ + Get the code to create and store this parameter from stdin. + + :return: the C code + """ + template = self.c_type.scanf_template() + self.fill_template(template) + + return template.complete() + + def get_printf(self) -> CCode: + """ + Get the code to write the contents of this parameter to stdout. + + :return: the C code + """ + template = self.c_type.printf_template() + self.fill_template(template) + + return template.complete() + + def fill_template(self, template: Template) -> None: + """ + Add the necessary values (from :code:`template.to_fill`) to the template. + + :param template: the template to fill in + """ + for needed in template.to_fill: + if needed == "name": + template.fill(name=self.name) + elif needed == "size": + template.fill(size=self.arr_size) + + +@dataclass +class CReference: + name: str + c_type: AnyCType + parameters: List[CParameter] + includes: List[str] + code: CCode + + @staticmethod + def parse(prog_name: str, examples_dir: str): + """ + Build a CReference from a function directory. + + This is done using a :code:`reference_parser.FunctionReference` as an intermediate value. + + :param prog_name: the name of function directory + :param examples_dir: the path to the directory containing the function reference + :return: the CReference instance built from that function + """ + inter = reference_parser.FunctionReference.parse(os.path.join(examples_dir, prog_name)) + issues = inter.validate() + + if issues: + sys.stderr.write("Parse created issues!\n") + sys.stderr.write("\n".join(issue.value for issue in issues)) + + ignored_issues = {reference_parser.ParseIssue.ArrayReturnType, reference_parser.ParseIssue.ReturnAndOutputGiven} + if issues - ignored_issues: + raise Exception("did not produce a valid parse") + + outputs = {arr for arr in inter.info.outputs} + params = [CParameter(param.name, + CReference.get_c_type(param.type), + param.name in outputs) + for param in inter.parameters] + + ref = CReference(inter.name, + CReference.get_c_type(inter.type), + params, + inter.reference.includes, + inter.code) + + for size in inter.info.sizes: + ref.param_dict[size.array].c_type.size = size.var + + return ref + + @property + def param_dict(self) -> Dict[str, CParameter]: + """ + Build a dictionary mapping parameter names to the parameter objects. + + Useful for lookups. + + :return: the mapping between parameter names and the corresponding object + """ + return {param.name: param for param in self.parameters} + + @property + def sizes(self) -> Set[str]: + sizes = set() + for param in self.parameters: + if isinstance(param, ArrayCType) and param.size is not None: + sizes.add(param.size) + + return sizes + + @staticmethod + def get_c_type(type_info: reference_parser.CType) -> AnyCType: + """ + Convert an intermediate representation of a C type to the final form. + + :param type_info: the C type in its intermediate representation + :return: the correct class matching that type + """ + if type_info.contents == "void": + if type_info.pointer_level != 0: + raise Exception("void pointers are not supported") + + return VoidCType() + + c_type = ScalarCType.from_string(type_info.contents) + assert (c_type is not None) + + if type_info.pointer_level == 0: + return c_type + elif type_info.pointer_level == 1: + return ArrayCType(c_type, None) + else: + raise Exception("trying to create a multi-level pointer type") + + def get_func_call(self) -> CCode: + """ + Build the call to this function, storing the result if non-void. + + :return: the C code + """ + call = f"{self.name}({', '.join(param.name for param in self.parameters)});" + + if isinstance(self.c_type, VoidCType): + return call + + # reserves the keyword `res' + res = CParameter("res", self.c_type, False) + + return f"{res.c_repr} = {call}" + + @property + def read_order(self) -> List[CParameter]: + """ + Order the parameters so that they can be read correctly. + + An array with a given size must be read after its size. + Since all sizes are scalar, the simplest way to achieve this is to read all scalars before arrays. + + This ordering is *stable* with respect to scalar and array parameters. + In other words (while the order of scalar and array parameters is altered) + the order of any scalars is not changed, nor is the order between any arrays. + + :return: the order to read parameters, with scalars coming first and arrays last + """ + array_params = [] + scalar_params = [] + for param in self.parameters: + if isinstance(param.c_type, ScalarCType): + scalar_params.append(param) + else: + array_params.append(param) + + return scalar_params + array_params + + @property + def outputs(self): + return [param for param in self.parameters if param.is_output] + + def main(self) -> CCode: + """ + Build the main function for this reference. + + This includes reading all parameters and array sizes, the call to the function, and outputting the results. + + :return: the C code + """ + strlens = '\n'.join(self.get_strlens()) + scanfs = '\n'.join(param.get_scanf() for param in self.read_order) + func_call = self.get_func_call() + + output_printfs = '\n'.join(param.get_printf() for param in self.parameters if param.is_output) + if isinstance(self.c_type, VoidCType): + printfs = output_printfs + else: + return_printf = self.c_type.printf_template() + return_printf.fill(placeholder=self.c_type.placeholder(), name="res") + return_printf = return_printf.complete() + + printfs = return_printf + output_printfs + + return f''' + int main(int argc, char *argv[]) {{ + {strlens} + {scanfs} + {func_call} + {printfs} + }} + ''' + + def program(self) -> CCode: + """ + Build the whole implementation for the reference function. + + :return: the C source code + """ + includes = "\n".join(["#include ", "#include "] + self.includes) + "\n" + + return f"{includes}{self.code}{self.main()}" + + def get_strlens(self) -> List[CCode]: + """ + Build the calls to get any string parameters sizes. + + These are passed in as arguments, in the order the strings appear. + This process reserves the identifier _len for all strings, + where is the name of the string parameter. + + :return: the C code for getting string lengths + """ + strlen_template = "int {arr_size} = atoi(argv[{idx}]);" + unsized = [param.arr_size for param in self.parameters + if isinstance(param.c_type, ArrayCType) and param.c_type.size is None] + + return [strlen_template.format(arr_size=arr_size, idx=idx) for idx, arr_size in enumerate(unsized, start=1)] + + def compile(self, exe: str = None, cleanup: bool = True) -> Optional[str]: + """ + Compile the function. + + :param exe: the name of the executable to compile to. If :code:`None` then a random string will be used + :param cleanup: set to :code:`True` to remove the source file afterwards + :return: the name of the executable. Returns :code:`None` if compilation failed + """ + if exe is None: + exe = utilities.get_tmp_file_name(self.program(), ".o") + + root, ext = os.path.splitext(exe) + + if ext not in {".o", ""}: + raise Exception("invalid executable given") + + src = root+".c" + + if os.path.exists(exe): + ack = input(f"overwrite {exe}/{src}? [yN]\n") + if ack == "y" or ack == "Y": + print("overwriting...") + else: + print("not overwriting!") + return None + + with open(src, "w") as f: + f.write(self.program()) + + _, stderr = utilities.run_command(f"gcc -Wall -O0 -o {exe} {src}") + if stderr: + sys.stderr.write(stderr) + print("COMPILED") + + if cleanup: + utilities.run_command(f"rm {src}") + + return exe + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + + parser.add_argument("program", help="the program to use") + parser.add_argument("-p", "--path", help="path to the directory contatining the program", default=".") + parser.add_argument("-c", "--compile", help="pass to compile the function", action="store_true") + + args = parser.parse_args() + + parsed = CReference.parse(args.program, args.path) + if args.compile: + parsed.compile(exe="tmp.o", cleanup=False) + else: + print(parsed.program()) diff --git a/evaluate.py b/evaluate.py new file mode 100644 index 0000000..5882c26 --- /dev/null +++ b/evaluate.py @@ -0,0 +1,309 @@ +import sys +import utilities +import os.path +from code_gen import CReference, AnyCType, ScalarCType, ArrayCType, VoidCType, CParameter +from examples import ExampleCollection, AnyCValue, SomeCValue, ArrayCValue, ScalarCValue +from dataclasses import dataclass +from typing import * + +ParameterExample = Tuple[CParameter, SomeCValue] +ReturnExample = Tuple[AnyCType, AnyCValue] + + +@dataclass +class ExampleInstance: + """ + A wrapper class for a single example + + Contains all input/output examples and their mappings to parameters (or return type). + """ + inputs: List[ParameterExample] + value: ReturnExample + outputs: List[ParameterExample] + + def get_stdin(self) -> str: + """ + Builds the input expected by the program when this example is being tested + + :return: the stdin for this example + """ + stdin = [] + for param, val in self.inputs: + stdin.append(ExampleInstance.describe(val, param.c_type)) + + return '\n'.join(stdin) + + def get_args(self) -> str: + """ + Builds the arguments for the program when this example is being tested + + The arguments are the sizes of any unsized arrays + (at this point only strings are handled correctly). + This size is determined here by checking the inputs and outputs for any unsized parameters, + and asserting that the parameter only needs to be large enough to accommodate the biggest value. + + For example: if the function took parameter :code:`char *s`, and this parameter was an output, + then this function would look at the input and output values for this parameter + (e.g. "hi" -> "hello") + and use this to determine the required space in the array. + Here *s* has to accommodate both "hi" and "hello", so requires 6 bytes to be allocated. + :return: + """ + # doing outputs first since then inputs can be done in one pass + sizes = {param.name: len(val) for param, val in self.outputs + if isinstance(param.c_type, ArrayCType) and param.c_type.size is None} + + args = [] + for param, val in self.inputs: + if param.name in sizes: + args += str(1 + max(sizes[param.name], len(val))) + elif isinstance(param.c_type, ArrayCType) and param.c_type.size is None: + args += str(1+len(val)) + + return ' '.join(args) + + @staticmethod + def parse(desc: str, c_type: AnyCType) -> AnyCValue: + """ + Parse a value written by the reference function and parse it into the value it represents + + For more detail on this format check the :code:`printf_template` methods of ScalarCType and ArrayCType. + + :param desc: the line of output given by the function denoting a value + :param c_type: the expected type of the value + :return: the value parsed from the string + """ + def parse_scalar(description: str, c_type: ScalarCType) -> ScalarCValue: + description = description.strip() + if c_type == ScalarCType.Int or c_type == ScalarCType.Bool: + return int(description) + elif c_type == ScalarCType.Float or c_type == ScalarCType.Double: + return float(description) + elif c_type == ScalarCType.Char: + return chr(int(description)) + + def parse_array(description: str, c_type: ArrayCType) -> ArrayCValue: + description = description.strip() + if description == "": + return [] + + return [parse_scalar(chunk, c_type.scalar_c_type) for chunk in description.split(" ")] + + if isinstance(c_type, ScalarCType): + return parse_scalar(desc, c_type) + elif isinstance(c_type, ArrayCType): + parsed = parse_array(desc, c_type) + if c_type.scalar_c_type == ScalarCType.Char: + return "".join(parsed) + + return parsed + else: + return None + + def check_output(self, stdout: str) -> bool: + """ + Tests the output generated by running the reference against the current example + + Writes all issues to stderr + + :param stdout: the output produced when the reference is run + :return: :code:`True` if and only if all values matched + """ + pass_test = True + + stdout = [line.strip() for line in stdout.splitlines() if line.strip()] + + ret_type, ret_val = self.value + if not isinstance(ret_type, VoidCType): + ex_val = ExampleInstance.parse(stdout[0], ret_type) + if ret_val != ex_val: + sys.stderr.write(f"return value does not match! ({ret_val} vs. {ex_val})\n") + pass_test = False + + stdout = stdout[1:] + + assert len(stdout) == len(self.outputs) + for line, (param, value) in zip(stdout, self.outputs): + ex_value = ExampleInstance.parse(line, param.c_type) + if value != ExampleInstance.parse(line, param.c_type): + sys.stderr.write(f"output does not match! ({value} vs. {ex_value})\n") + pass_test = False + + return pass_test + + def __str__(self): + s = ["==== INPUTS ===="] + for param, val in self.inputs: + s.append(f"{param.c_repr} = {val}") + + s.append("==== RETURN ====") + s.append(f"{self.value[0].c_repr} {self.value[1]}") + + s.append("=== OUTPUTS ====") + for param, val in self.outputs: + s.append(f"{param.c_repr} = {val}") + + s.append("================") + + return '\n'.join(s) + + @staticmethod + def describe(val: AnyCValue, c_type: AnyCType) -> str: + """ + Convert a value to the format readable by the reference function + + Inverse of :code:`parse` + + :param val: the value to format correctly + :param c_type: the type of the value + :return: the formatted string + """ + if isinstance(c_type, ScalarCType) and c_type == ScalarCType.Char: + return str(ord(val)) + elif isinstance(c_type, ScalarCType): + return str(val) + elif c_type.scalar_c_type == ScalarCType.Char: + return ' '.join([str(ord(c)) for c in val] + ["0"]) + else: + return ' '.join(str(v) for v in val) + + +@dataclass +class Evaluator: + """ + Contains a reference and a bunch of examples, and allows them to be checked. + + All inputs/outputs should be in the order that the reference is expecting. + This means the inputs must be in the read-order, and outputs must be ordered correctly too. + """ + reference: CReference + inputs: List[List[SomeCValue]] + values: List[AnyCValue] + outputs: List[List[SomeCValue]] + + @staticmethod + def build_from(ref: CReference, examples: ExampleCollection): + """ + Generate an evaluator from a reference and examples + + Note that the examples are transposed into a "stretched out" form, + where the examples for each input, output, and the return form are all in one list. + + This will be used to allow some manipulation of examples, such as swapping parameters if the names don't line up + or predicting sizes of arrays if they are not given. + + :param ref: the reference function + :param examples: examples for that reference + :return: + """ + inp_vals, example_returns, outp_vals = examples.transposed_examples + + example_inputs = {inp.name: (inp.type, inp_val) for inp, inp_val in zip(examples.inputs, inp_vals)} + example_outputs = {outp.name: (outp.type, outp_val) for outp, outp_val in zip(examples.outputs, outp_vals)} + + # this is where the smart size inference etc. can come in + inputs = [] + for param in ref.read_order: + if example_inputs.get(param.name) is not None: + c_type, values = example_inputs[param.name] + + if CReference.get_c_type(c_type) != param.c_type: + raise Exception( + f"incorrect parameter for {param.name} (found {c_type}, expected {param.c_type.c_repr})") + + inputs.append(values) + else: + raise Exception(f"parameter {param.name} could not be found") + + if CReference.get_c_type(examples.ret_type) != ref.c_type: + raise Exception(f"incorrect return type (found {examples.ret_type}, expected {ref.c_type.c_repr})") + returns = example_returns + + outputs = [] + for param in ref.outputs: + if example_outputs.get(param.name) is not None: + c_type, values = example_outputs[param.name] + + if CReference.get_c_type(c_type) != param.c_type: + raise Exception( + f"incorrect output parameter for {param.name} (found {c_type}, expected {param.c_type.c_repr})") + + outputs.append(values) + else: + raise Exception(f"output parameter {param.name} could not be found") + + return Evaluator(ref, inputs, returns, outputs) + + def next_example(self) -> Generator[ExampleInstance, None, None]: + """ + Returns a generator used to squash the internal representation + + Since we want a full example (i.e. all parameters and return) to operate with the examples must be transposed. + This is as easy as taking a "slice" across the examples, + picking one value from all inputs, outputs and the return. + """ + # size of return values is always equal to number of inputs, so safe to use for all + for i in range(len(self.values)): + inputs = [(param, inps[i]) for param, inps in zip(self.reference.read_order, self.inputs)] + ret = (self.reference.c_type, self.values[i]) + outputs = [(param, outps[i]) for param, outps in zip(self.reference.outputs, self.outputs)] + + yield ExampleInstance(inputs, ret, outputs) + + def run(self, executable: str, example: ExampleInstance) -> str: + """ + Runs a given example on the reference function + + :param executable: the file containing the reference executable + :param example: the example to run on the reference + :return: the output of that function + """ + args = example.get_args() + stdin = example.get_stdin() + + stdout, stderr = utilities.run_command(f"./{executable} {args}", stdin=stdin) + + if stderr: + print(stderr) + + return stdout + + def evaluate(self) -> Tuple[int, int]: + """ + Compiles and runs a reference function on the stored examples + + :return: (no. of successful runs, no. of failed runs) + """ + exe = self.reference.compile(cleanup=False) + success = 0 + failure = 0 + + for example in self.next_example(): + stdout = self.run(exe, example) + if example.check_output(stdout): + success += 1 + else: + failure += 1 + + return success, failure + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("program", help="the reference program to evaluate") + parser.add_argument("-p", "--program-path", help="the path to the reference program", default=".") + + parser.add_argument("examples", help="the file containing examples") + parser.add_argument("-P", "--example-path", help="path to examples directory", default=".") + + args = parser.parse_args() + + ref = CReference.parse(args.program, args.program_path) + exs = ExampleCollection.from_file(os.path.join(args.example_path, args.examples)) + + evaluator = Evaluator.build_from(ref, exs) + + success, failure = evaluator.evaluate() + print(f"ran with {success}/{success + failure} successes") diff --git a/examples.py b/examples.py new file mode 100644 index 0000000..ffa523a --- /dev/null +++ b/examples.py @@ -0,0 +1,311 @@ +from re import match +from reference_parser import CParameter, CType +from dataclasses import dataclass +from typing import * + +# turn back now, regex grossness ahead +# if this breaks somewhere down the line just replace it with a JSON reader or something + +ScalarCValue = Union[int, float, str] +ArrayCValue = Union[List[ScalarCValue], str] +SomeCValue = Union[ScalarCValue, ArrayCValue] +AnyCValue = Union[SomeCValue, None] + +Example = Tuple[List[SomeCValue], AnyCValue, List[SomeCValue]] + + +@dataclass +class ExampleCollection: + """ + Contains a list of examples from an examples file + """ + inputs: List[CParameter] + ret_type: CType + outputs: List[CParameter] + examples: List[Example] + + @property + def transposed_examples(self) -> Tuple[List[List[SomeCValue]], List[AnyCValue], List[List[SomeCValue]]]: + """ + Allows the examples to be switched from a long list of "flat" examples to a flat list of parameters. + + The original list has **n** examples where each example is a tuple of shape (**m**, 1, **o**). + The new list will have a shape of (**m*n**, **n**, **o*n**). + + :return: the examples in the new shape + """ + inps = [[] for _ in self.inputs] + rets = [] + outps = [[] for _ in self.outputs] + + for example in self.examples: + ex_inps, ex_ret, ex_outps = example + + for inp, ex_inp in zip(inps, ex_inps): + inp.append(ex_inp) + + rets.append(ex_ret) + + for inp, ex_outp in zip(outps, ex_outps): + inp.append(ex_outp) + + return inps, rets, outps + + @staticmethod + def parse(sig: str, examples: List[str]): + """ + Uses a signature string and a list of examples to build the collection + + The signature string is composed of three sections: + + () () + + where and are a comma-separated list of parameters, and is a type. + + The examples are of a similar form, except instead of parameters the fields contain + the values of the corresponding parameter. + For a void return type the special value '_' is used. + + Example file: + (int a, float b, char *s) void (char *s) + (1, 1.5, "a string") _ (" a new string") + (-4, 10.001, "a string with \" escaped characters") _ ("less chars") + ... + + Another example file: + (int *a, int *b, int n) int () + ([1, 2, 3], [4, 5, 6], 3) -1 () + ([10. 15, 20, 25, 30], [1, -2, 3, -4, 5], 5) 10 () + ... + + Any examples that can not be parsed correctly are ignored. + + :param sig: the signature string describing the examples + :param examples: a list of example strings + :return: the ExampleCollection built from this selection + """ + inputs, ret_type, outputs = ExampleCollection.parse_sig(sig) + input_parsers = [ExampleCollection.parser_for(inp.type) for inp in inputs] + ret_parser = ExampleCollection.parser_for(ret_type) + output_parsers = [ExampleCollection.parser_for(outp.type) for outp in outputs] + + vals = [] + for example in examples: + val = ExampleCollection.parse_example(example, input_parsers, ret_parser, output_parsers) + if val is not None: + vals.append(val) + + return ExampleCollection(inputs, ret_type, outputs, vals) + + @staticmethod + def from_file(example_file: str): + """ + Builds an ExampleCollection from a file. + + Reads a file and extracts the signature and examples for a call to parse. + See parse for more information. + + :param example_file: the file containing the examples + :return: the ExampleCollection built from that file + """ + with open(example_file, "r") as examples: + sig = examples.readline() + return ExampleCollection.parse(sig, examples.readlines()) + + @staticmethod + def parse_int(s: str) -> (int, str): + if (m := match(r"\s*(-?\d+)", s)) is not None: + return int(m[1]), s[m.end():] + else: + return None + + @staticmethod + def parse_real(s: str) -> (float, str): + if (m := match(r"\s*(-?\d+(?:\.\d+)?)", s)) is not None: + return float(m[1]), s[m.end():] + else: + raise None + + @staticmethod + def parse_char(s: str) -> (str, str): + if (m := match(r"\s*'([^\\']|\\.)'", s)) is not None: + return m[1], s[m.end():] + else: + return None + + @staticmethod + def parse_bool(s: str) -> (bool, str): + if (m := match(r"\s*(True|False)", s)) is not None: + return m[1] == "True", s[m.end():] + else: + return None + + @staticmethod + def parse_string(s: str) -> (str, str): + if (m := match(r'\s*"((?:[^\\"]|\\.)*)"', s)) is not None: + return m[1], s[m.end():] + else: + return None + + @staticmethod + def parse_list(s: str, elem) -> (list, str): + if (m := match(r"\s*\[", s)) is None: + return None + + res = [] + rem = s[m.end():] + while (inner_m := elem(rem)) is not None: + v, rem = inner_m + res.append(v) + + if (sep := match(r"\s*,", rem)) is None: + break + + rem = rem[sep.end():] + + if (m := match(r"\s*]", rem)) is not None: + return res, rem[m.end():] + else: + return None + + @staticmethod + def parse_missing(s: str) -> (None, str): + """ + A special parser meant to parse the void value '_' + + :param s: the string to parse + :return: a tuple, shifting the input string correctly, if parsing occurred otherwise :code:`None` + """ + if (m := match(r"\s*_", s)) is not None: + return None, s[m.end():] + else: + return None + + @staticmethod + def parser_for(c_type: CType) -> Callable: + """ + Fetch the correct parser for a given type + + Recursively wraps pointers in lists if necessary. + + :param c_type: the type to parse + :return: a function taking a string as input and returning a the parse of the string for the given type + """ + if c_type.contents == "void": + return ExampleCollection.parse_missing + + if c_type == CType("char", 1): + return ExampleCollection.parse_string + + if c_type.pointer_level >= 1: + inner_parser = ExampleCollection.parser_for(CType(c_type.contents, c_type.pointer_level - 1)) + return lambda s: ExampleCollection.parse_list(s, inner_parser) + + if c_type.contents == "int": + return ExampleCollection.parse_int + elif c_type.contents == "float" or c_type.contents == "double": + return ExampleCollection.parse_real + elif c_type.contents == "char": + return ExampleCollection.parse_char + elif c_type.contents == "bool": + return ExampleCollection.parse_bool + else: + raise Exception(f"no parser exists for type: {c_type}") + + @staticmethod + def parse_sig(s: str): + """ + Parses a signature string into the corresponding parameters/types + + :param s: the string to parse + :return: a tuple of the form ([inputs], return, [outputs]) + """ + inp_end = s.index(")") + 1 + outp_start = s.rindex("(") + + assert inp_end < outp_start + + inps = s[:inp_end].strip() + ret = s[inp_end:outp_start].strip() + outps = s[outp_start:].strip() + + assert inps[0] == outps[0] == "(" + assert inps[-1] == outps[-1] == ")" + + input_params = [CParameter.parse(param.strip()) for param in inps[1:-1].split(',') if param.strip()] + ret_type = CType.parse(ret) + output_params = [CParameter.parse(param.strip()) for param in outps[1:-1].split(',') if param.strip()] + + return input_params, ret_type, output_params + + @staticmethod + def parse_example(s: str, inps: List[Callable], ret: Callable, outps: List[Callable]) -> Optional[Example]: + """ + Parses an example + + Details of the example format can be found in the :code:`parse` method. + + :param s: the string to parse + :param inps: parsers for the input values + :param ret: a parser for the return value + :param outps: parsers for the output values + :return: the example that has beem parsed. Returns :code:`None` if this example could not be parsed + """ + + def parse_group(s: str, grp: List[Callable]) -> Optional[Tuple[list, str]]: + """ + Helper function to parse something of the form: + + () + + where is a comma-separated list of values that can be parsed by the parsers in :code:`grp`. + + :param s: the string to parse + :param grp: the parsers to use to parse this group + :return: a standard parse result; the values and the new string position if successful + or :code:`None` if not + """ + s = s[s.index("(") + 1:] + + grp_vals = [] + for parser in grp: + if (parsed := parser(s)) is None: + return None + + val, s = parsed + grp_vals.append(val) + + if (m := match(r"\s*,", s)) is not None: + s = s[m.end():] + + if (m := match(r"\s*\)", s)) is not None: + s = s[m.end():] + else: + return None + + return grp_vals, s + + if (parsed := parse_group(s, inps)) is None: + return None + input_vals, s = parsed + + if (parsed := ret(s)) is None: + return None + ret_val, s = parsed + + if (parsed := parse_group(s, outps)) is None: + return None + output_vals, s = parsed + + return input_vals, ret_val, output_vals + + +if __name__ == '__main__': + ExampleCollection.parser_for(CType("int", 1)) + ec = ExampleCollection.parse("(int *a, int *b, int n) int ()", + [ + "([1, 2, 3], [4, 5, 6], 3) -1 ()", + "([10, 15, 20, 25, 30], [1, -2, 3, -4, 5], 5) 10 ()", + ]) + + print(ec) diff --git a/reference_parser.py b/reference_parser.py new file mode 100644 index 0000000..533adc5 --- /dev/null +++ b/reference_parser.py @@ -0,0 +1,469 @@ +import re +from typing import * +import os.path +from sys import stderr +from json import dumps +from enum import Enum +from dataclasses import dataclass, asdict + + +class ParseIssue(Enum): + """ + Issues in a parsed reference implementation. + + Can be matched if smarter error handling is desired, or a simple error message + can be accessed using :code:`issue.value` + """ + ArrayReturnType = "Return type must be `void' or scalar" + MultiLevelPointer = "Multi-level pointers are not supported" + ScalarOutputParameter = "Output parameters must be pointers" + ScalarGivenSize = "Only array parameters can be given a size" + GivenInvalidSize = "Sizes must be a valid type" + UnsizedArrayParameter = "All unterminated arrays must be given a size" + ReferenceSignatureMismatch = "The signatures in `ref.c' and `props' differ" + InvalidIdentifierName = "All names must be valid C identifiers" + ReturnAndOutputGiven = "Functions should not be able to return a value and change output parameters" + NoOutputGiven = "Functions must output some values, either through a return value or output parameters" + + +@dataclass +class CType: + """ + A wrapper for a C type. + """ + contents: str + pointer_level: int + + @staticmethod + def parse(type_sig: str): + """ + Build a type instance from a type signature. + + Type signatures can look like: :code:`int`, :code:`int *`, :code:`char*`, :code:`void ** *`, etc. + + No checking is done here to determine whether the type is valid. + + :param type_sig: the type signature + :return: an instance of that type + """ + type_sig = type_sig.strip() + + if '*' in type_sig: + ptr_idx = type_sig.index('*') + contents = type_sig[:ptr_idx].rstrip() + pointers = type_sig[ptr_idx:] + + pointer_level = sum(1 for c in pointers if c == "*") + else: + contents = type_sig.strip() + pointer_level = 0 + + if pointer_level > 1: + raise Exception("multi-level pointers are not supported") + + return CType(contents, pointer_level) + + def __str__(self): + return f"{self.contents}{'*' * self.pointer_level}" + + +@dataclass +class CParameter: + """ + A wrapper for a parameter. + """ + name: str + type: CType + + @staticmethod + def parse(param: str): + """ + Builds a CParameter instance. + + Does not check if the type is a valid name, just separates it from the type. + + :param param: the parameter definition + :return: an instance from that definition + """ + m = re.match("((?:int|char|float|double|bool|void)[* ]+)(.*)", param) + if m is None: + raise Exception("invalid parameter") + + c_type, name = m.groups() + + type_info = CType.parse(c_type) + return CParameter(name, type_info) + + def __str__(self): + return f"{self.type} {self.name}" + + +@dataclass +class FunctionSignature: + """ + A C function's full signature + """ + name: str + type: CType + parameters: List[CParameter] + + @staticmethod + def parse(sig: str): + """ + Build a FunctionSignature instance from a signature string. + + This string looks like: + + .. code-block:: c + + [func type] [func name]([parameter], ...) + + :param sig: the signature + :return: the instance built from that signature + """ + m = re.match(r"(.*)\((.*)\)", sig) + if m is None: + raise Exception("broken...") + + func_def = CParameter.parse(m[1].strip()) + params = [param.strip() for param in m[2].split(",")] + + return FunctionSignature(func_def.name, + func_def.type, + [CParameter.parse(param) for param in params]) + + def __str__(self): + return f"{self.name}({', '.join(str(param) for param in self.parameters)}) -> {self.type}" + + def c_sig(self) -> str: + """ + The function signature as it would appear in C. + + Note all pointer types look like :code:`type* name` (as opposed to :code:`type *name`) + + :return: the signature string + """ + return f"{self.type} {self.name}({', '.join(str(param) for param in self.parameters)})" + + +@dataclass +class ParamSize: + """ + Denotes an association between a array parameter, and a scalar parameter containing the array's size + """ + array: str + var: str + + @staticmethod + def parse(size: str): + """ + Build a ParamSize instance from a size description string. + + These strings are in the form given in *props* files, e.g. + + "size arr_name scalar_name" + + :param size: the description + :return: the ParamSize instance + """ + parts = size.removeprefix("size").strip().split(",") + + assert (len(parts) == 2) + + return ParamSize(parts[0].strip(), parts[1].strip()) + + +@dataclass +class FunctionArrayInfo: + """ + A wrapper for additional information found in a function's *props* file. + + This includes the names of any output parameters, and the given sizes of any array parameters. + """ + outputs: List[str] + sizes: List[ParamSize] + + @staticmethod + def parse(info: List[str]): + """ + Build a FunctionArrayInfo instance from a list of description strings. + These strings may be describing either sizes or outputs. + + + :param info: the description strings + :return: the instance containing the information + """ + outputs = [] + sizes = [] + + for line in info: + if line.startswith("output"): + outputs.append(line.removeprefix("output").strip()) + elif line.startswith("size"): + size = ParamSize.parse(line.removeprefix("size").strip()) + sizes.append(size) + else: + raise Exception("very bad") + + return FunctionArrayInfo(outputs, sizes) + + +@dataclass +class FunctionProps: + """ + Contains all information from a function's *props* file. + + This includes the signature and any additional information about the parameters. + """ + sig: FunctionSignature + arr_info: FunctionArrayInfo + + @staticmethod + def parse(props_file: str): + """ + Build a FunctionProps instance from a *props* file. + + :param props_file: the path to the *props* file + :return: the instance built from that file + """ + with open(props_file, "r") as props: + sig = FunctionSignature.parse(props.readline()) + rest = FunctionArrayInfo.parse(props.readlines()) + + return FunctionProps(sig, rest) + + +@dataclass +class CReference: + """ + Contains all relevant information from a function's *ref.c* file. + + This is the :code:`#includes` found in the file, as well as the C implementation of the function itself. + """ + includes: List[str] + code: str + + @staticmethod + def parse(ref_file: str): + """ + Build a CReference instance from a given *ref.c* file. + + :param ref_file: the path to the *ref.c* file + :return: the instance built from that file + """ + with open(ref_file, "r") as ref: + includes = [] + + # go through each line and: + # 1. store includes + # 2. ignore anything other than the function + # 3. store the function code + line = "" # this is just to ensure line has SOME value, to shut the warning up + for line in ref: + line = line.lstrip() + if re.match("(int|float|double|char|bool|void)", line): + break # assumes everything from here is the actual function + + if line.startswith("#include"): + includes.append(line.rstrip()) + + func = line + ref.read() + + return CReference(includes, func) + + +@dataclass +class FunctionReference: + """ + Wrapper for all information about a given function. + """ + signature: FunctionSignature + info: FunctionArrayInfo + reference: CReference + + @property + def type(self): + return self.signature.type + + @property + def parameters(self): + return self.signature.parameters + + @property + def name(self): + return self.signature.name + + @property + def code(self): + return self.reference.code + + @staticmethod + def parse(prog_name: str): + """ + Build a FunctionReference from an actual C function. + This function must have a directory containing *ref.c* and *props* files. + + :param prog_name: the path to the function directory + :return: the instance built for that function + """ + path = os.path.expanduser(prog_name) + props = FunctionProps.parse(os.path.join(path, "props")) + ref = CReference.parse(os.path.join(path, "ref.c")) + + return FunctionReference(props.sig, props.arr_info, ref) + + def validate(self) -> Set[ParseIssue]: + """ + Check this FunctionReference for any issues. + + :return: all issues found in the function + """ + issues = set() + + if self.type.pointer_level != 0: + issues.add(ParseIssue.ArrayReturnType) + + # building lookup tables + param_dict = dict() + array_params = set() + scalar_params = set() + for param in self.parameters: + name = param.name + c_type = param.type + + param_dict[name] = c_type + if c_type.pointer_level == 0: + scalar_params.add(name) + elif c_type.pointer_level == 1: + array_params.add(name) + else: + issues.add(ParseIssue.MultiLevelPointer) + + # this is a SUPER simplified version of checking for valid C identifiers + # doesn't take keywords etc. into consideration + m = re.match(r"^[a-zA-Z_]\w*$", name, flags=re.ASCII) + if not m or m[0] != name: + issues.add(ParseIssue.InvalidIdentifierName) + + for output in self.info.outputs: + if param_dict[output].pointer_level == 0: + issues.add(ParseIssue.ScalarOutputParameter) + + sized = set() + for size in self.info.sizes: + array = size.array + var = size.var + sized.add(array) + + if array not in array_params: + issues.add(ParseIssue.ScalarGivenSize) + + if param_dict[var].contents not in {"int"}: + issues.add(ParseIssue.GivenInvalidSize) + + for array in array_params - sized: + if param_dict[array].contents not in {"char"}: + issues.add(ParseIssue.UnsizedArrayParameter) + + code = self.code + ref_signature = FunctionSignature.parse(code[:code.find("{")]) + + if ref_signature != self.signature: + issues.add(ParseIssue.ReferenceSignatureMismatch) + + if self.signature.type.contents != "void" and self.info.outputs: + issues.add(ParseIssue.ReturnAndOutputGiven) + elif self.signature.type.contents == "void" and not self.info.outputs: + issues.add(ParseIssue.NoOutputGiven) + + return issues + + def show_issues(self, verbose: bool = False, ignore_good: bool = False) -> None: + """ + Write any issues in the function to stderr + + :param verbose: set to :code:`True` to include a full breakdown of any issues found + :param ignore_good: set to :code:`True` to write to stderr even if no issues are found + """ + issues = self.validate() + + if issues: + stderr.write(f"error: {self.name} is broken!\n") + for issue in issues: + stderr.write(f" - {issue.value}\n") + + if verbose: + stderr.write(dumps(asdict(self), indent=4) + "\n\n") + elif not ignore_good: + stderr.write(f"{self.name} is good\n") + + +def show_all(base_path: str) -> None: + """ + Parse and show the C signature for all functions in a given directory. + + Also flags errors if they occur, writing the results to stderr. + + :param base_path: the path to the directory containing all of the functions + """ + base_path = os.path.expanduser(base_path) + + for directory in os.listdir(base_path): + # breaking these up cos one big if was ugly + if directory.startswith("__"): + continue + + if directory.startswith("."): + continue + + # building out the proper path to the function + dir_path = os.path.join(base_path, directory) + + if not os.path.isdir(dir_path): + continue + + if "ref.c" not in os.listdir(dir_path): + continue + + if "props" not in os.listdir(dir_path): + continue + + parsed = FunctionReference.parse(dir_path) + parsed.show_issues(ignore_good=True) + print(parsed.signature.c_sig()) + + +def show_single(base_path: str, prog_name: str) -> None: + """ + Parse and display the signature for a single program. + + Signature is given in functional form, and the full information is given if issues are found. + + :param base_path: the full path to the directory containing a function + :param prog_name: the name of the function directory + """ + contents = FunctionReference.parse(os.path.join(base_path, prog_name)) + print(dumps(asdict(contents), indent=4)) + contents.show_issues(verbose=True) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + + parser.add_argument("-a", "--all", help="display and debug all available references", action="store_true") + parser.add_argument("program", nargs="?", help="parse and output the given program") + parser.add_argument("-p", "--path", help="path to example directory", default=".") + + args = parser.parse_args() + + if (args.program is None) != args.all: # this is confusing as hell, either program is set, or all is (XOR) + parser.print_usage(file=stderr) + stderr.write(f"{parser.prog}: error: exactly one argument must be set from (--all, program)\n") + exit(1) + + if args.all: + show_all(args.path) + else: + show_single(args.path, args.program) diff --git a/utilities.py b/utilities.py new file mode 100644 index 0000000..96e733e --- /dev/null +++ b/utilities.py @@ -0,0 +1,47 @@ +import uuid +from typing import Tuple +import subprocess +import os +import numpy as np +import random +import logging +import time +from typing import Callable, Any, Optional + +def get_tmp_file_name(content: str, extension: str = '') -> str: + return uuid.uuid4().hex + extension + + +def get_tmp_file(content: str, extension: str = '') -> str: + filename = uuid.uuid4().hex + extension + with open(filename, 'w') as f: + f.write(content) + return filename + + +def get_tmp_path() -> str: + filename = uuid.uuid4().hex + return filename + + +def run_command(command: str, stdin: Optional[str] = None) -> Tuple[str, str]: + output = subprocess.run(command.split(), capture_output=True, text=True, input=stdin) + return output.stdout, output.stderr + + +def deterministic(seed: int): + os.environ['PYTHONHASHSEED'] = str(seed) + np.random.seed(seed) # Numpy module. + random.seed(seed) # Python random module. + + +def timeit(func: Callable) -> Any: + def wrapped(*args, **kwargs): + func_name = func.__name__ + logging.info(f'Running {func_name}') + t0 = time.time() + res = func(*args, **kwargs) + t1 = time.time() + logging.info(f'Run {func_name} in {t1-t0}s') + return res + return wrapped