From 6807dd38f93985c0eeb7254b149aa544aebcb4f8 Mon Sep 17 00:00:00 2001 From: Chris LeBlanc Date: Tue, 10 Jul 2018 11:47:33 -0300 Subject: [PATCH 01/10] Initial compiler commit --- Compiler.py | 111 ++ ToolChain/Assembler/Parser/test_Parser.py | 8 - ToolChain/Compiler/Compiler.py | 1161 +++++++++++++++++++++ ToolChain/Compiler/Constants.py | 94 ++ ToolChain/Compiler/MathParser.py | 298 ++++++ ToolChain/Compiler/__init__.py | 0 6 files changed, 1664 insertions(+), 8 deletions(-) create mode 100644 Compiler.py create mode 100644 ToolChain/Compiler/Compiler.py create mode 100644 ToolChain/Compiler/Constants.py create mode 100644 ToolChain/Compiler/MathParser.py create mode 100644 ToolChain/Compiler/__init__.py diff --git a/Compiler.py b/Compiler.py new file mode 100644 index 0000000..54ddf41 --- /dev/null +++ b/Compiler.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python +# -*- coding: -*- + +""" +This file is part of Spartacus project +Copyright (C) 2018 CSE + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" + +from ToolChain.Compiler.Compiler import Compiler +from ToolChain.Compiler.Constants import UNDEFINED, DEFAULT_OUTPUT_EXTENSION + +import argparse +import os + +__author__ = "CSE" +__copyright__ = "Copyright 2018, CSE" +__credits__ = ["CSE"] +__license__ = "GPL" +__version__ = "3.0" +__maintainer__ = "CSE" +__status__ = "Dev" + + +def parseCommandLineArgs(): + """ + As implied by the name, this will parse the command line arguments so we can use them. Important note, + after this function is called, no need to use the "extension" attribute since this one is concatenated + with the "output". This results in cleaner code since those two are always(? most of the time at least) + used together. + :return: A parsed object as provided by argparse.parse_args() + """ + + parser = argparse.ArgumentParser(prog="Compiler.py", + description="Capua Compiler Version {}".format(__version__,), + epilog="This tool is provided as part of Spartacus learning environment under {} " + "licence. Feel free to distribute, modify, " + "contribute and learn!".format(__license__,)) + parser.add_argument("-i", "--input", + required=True, + nargs=1, + type=str, + help="Define the input file to be used by the compiler.") + + parser.add_argument("-o", "--output", + required=False, + nargs=1, + type=str, + default=UNDEFINED, + help="Define the output file where the assembled data will be written. If not specified, this " + "will default to the input file name, minus the extension, plus the --extension " + "provided value.") + + parser.add_argument("-e", "--extension", + required=False, + nargs=1, + type=str, + default=DEFAULT_OUTPUT_EXTENSION, + help="Default output extension for the output file. This is useful if changing extension value " + "while keeping default output file name. Default value for this is {} please note that " + "the '.' has to be provided by the user!".format(DEFAULT_OUTPUT_EXTENSION,)) + + args = parser.parse_args() + args.input = args.input[0] # This originally come out as a list + args.output = args.output[0] if type(args.output) is not str else args.input.split(".")[0] # Using input as default + args.extension = args.extension[0] if type(args.extension) is not str else args.extension + args.output = args.output + args.extension if args.output.split(".")[-1] != "o" else args.output + + return args + + +def validatePaths(argsWithPaths): + """ + This function will simply validate that the input path exists + :param argsWithPaths: An input parsed object as provided by argparse.parse_args() + :return: This does not return. Simply raises ValueError in cases where paths are not valid. + """ + if not os.path.exists(argsWithPaths.input): + raise ValueError("ERROR: file {} does not exists.".format(argsWithPaths.input,)) + + +if __name__ == '__main__': + usableArgs = parseCommandLineArgs() + validatePaths(usableArgs) # Make sure the parsed info is usable before using it! + + print("Compiler about to begin, following options will be used") + print(" input file: {}".format(usableArgs.input,)) + print(" output file: {}".format(usableArgs.output,)) + + compiler = Compiler(usableArgs.input, usableArgs.output) + if os.path.exists(usableArgs.output): + # The compiler did the job correctly and the out file has been written to disk! + print("Compiler done, output file has been written to {}". format(usableArgs.output,)) + else: + raise ValueError("An unknown error occurred while compiling the input file. Please validate " + "input file syntax (I know this error sucks... Sorry :) ). This 'should not' " + "have happened, please report this error to maintainer so we can all get a more " + "stable compiler. If you don't report, don't whine about this!") diff --git a/ToolChain/Assembler/Parser/test_Parser.py b/ToolChain/Assembler/Parser/test_Parser.py index d3e21b7..92d26fc 100644 --- a/ToolChain/Assembler/Parser/test_Parser.py +++ b/ToolChain/Assembler/Parser/test_Parser.py @@ -47,14 +47,6 @@ import struct import os -__author__ = "CSE" -__copyright__ = "Copyright 2015, CSE" -__credits__ = ["CSE"] -__license__ = "GPL" -__version__ = "2.0" -__maintainer__ = "CSE" -__status__ = "Dev" - class TestParser(unittest.TestCase): diff --git a/ToolChain/Compiler/Compiler.py b/ToolChain/Compiler/Compiler.py new file mode 100644 index 0000000..b2e2f49 --- /dev/null +++ b/ToolChain/Compiler/Compiler.py @@ -0,0 +1,1161 @@ +#!/usr/bin/env python +# -*- coding: -*- + +""" +This file is part of Spartacus project +Copyright (C) 2018 CSE + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" + +from ToolChain.Compiler.Constants import ACCEPTED_TYPES, \ + IGNORE_CHARS, \ + BOOLEAN_OPERATORS, \ + ALLOWED_CHARS + +from ToolChain.Compiler.MathParser import tokenize, \ + infixToPostfix, \ + evaluatePostfix + +import re + +__author__ = "CSE" +__copyright__ = "Copyright 2018, CSE" +__credits__ = ["CSE"] +__license__ = "GPL" +__version__ = "3.0" +__maintainer__ = "CSE" +__status__ = "Dev" + + +class Compiler: + + state = 0 # "States" are used to determine our next path for processing the C file + currentVar = "" # Name of variable being evaluated + currentType = "" # Current data type being read, before method/variable declaration + currentMethod = "" # String containing the current method being evaluated + expectFlag = 0 # Used to control what input we expect next + mathFormula = "" # Will contain our fully assembled math expressions for variable assignments + memoryLocation = 0x40000000 # Memory location for local variables. + varList = [] # Contains a list of variable names + varLocation = {} # Contains the memory location for all variables + methodList = {} # List of methods, along with their return type, variables (and types), and # of args + argCount = 0 # Used for number of operands in math expression, args in function calls, etc. + variableCount = 0 # Number of variables declared in current function. + identifier = "" # Used to determine first token of a line + functionCall = "" # Name of the function we're calling when doing variable assignment + whileFlag = 0 # Lets the compiler know if we're in a while loop + ifOperator = "" # Holds the logical operator between two sides of an if boolean expression + nestedFlag = 0 # Lets the compiler know if we're in an if statement + ifLabel = 0 # For jump instructions, we need a unique label for every if statement + lineno = 0 # Line number for printing error messages + functionArg = "" # Used to read a function call's arguments + whileLabel = 0 # For while loops, we need a unique label + labelList = [] # List containing names of labels for if/while jumps + whileList = [] # List containing the names of while loops + arrayList = {} # Dict containing variables that are arrays + arrayLength = "" # Length of current array variable being evaluated + + def __init__(self, inputFile=None, outputFile=None): + """ + This allows for simple initialisation of the Compiler. It will check if the input/output files are valid, + and then call the parseFile method to begin the compiling process. + :param inputFile: inputFile: str, name of file to read from + :param outputFile: str, name of file that will be created and written to + :return: + """ + + if type(inputFile) is not str or len(inputFile) is 0: + # File is invalid + raise ValueError("Assembler error - Invalid input file selected") + if type(outputFile) is not str or len(outputFile) is 0: + # File is invalid + raise ValueError("Assembler error - Invalid output file selected") + + self.readFile(inputFile, outputFile) + + def readFile(self, inputFile, outputFile): + """ + Initializes parsing process. Opens the input file to read from, and initializes the output file we will create. + We then read each line and feed every character into the parse method individually. Once done, we close the + input/output files. + :param inputFile: str, name of file to read from + :param outputFile: str, name of file that will be created and written to + :return: + """ + + try: + file = open(inputFile, mode="r") + inputFile = file.readlines() + except OSError as e: + raise OSError("Couldn't open file {}".format(inputFile)) + try: + output = open(outputFile, mode="w") + except OSError as e: + raise OSError("Couldn't open file {}".format(outputFile)) + + for line in inputFile: + self.lineno += 1 + for x in line: + self.parse(x, output) + + output.write("end:\n") + + try: + file.close() + output.close() + except OSError as e: + raise OSError("Couldn't close file.") + + def parse(self, char, output): + """ + Receives characters one by one and makes use of states to determine the next appropriate action. Each state has + its own restrictions based on what input it expects next. This parse method is essentially the controller that + directs input. + :param char: char, Individual character read from input file + :param output: file, output file to write to + :return: + """ + + if self.state == 0: + self.state0(char, output) + + elif self.state == 1: + self.state1(char, output) + + elif self.state == 2: + self.state2(char, output) + + elif self.state == 3: + self.state3(char, output) + + elif self.state == 4: + self.state4(char, output) + + elif self.state == 5: + self.state5(char, output) + + elif self.state == 6: + self.state6(char, output) + + elif self.state == 7: + self.state7(char, output) + + elif self.state == 8: + self.state8(char, output) + + elif self.state == 9: + self.state9(char, output) + + elif self.state == 10: + self.state10(char, output) + + elif self.state == 11: + self.state11(char, output) + + elif self.state == 12: + self.state12(char, output) + + elif self.state == 13: + self.state13(char, output) + + def state0(self, char, output): + """ + First step in parsing data. At this step, we begin to read the method header. We expect to read the return data + type. + :param char: char, Individual character read from input file + :param output: file, output file to write to + :return: + """ + + if char in IGNORE_CHARS and self.expectFlag == 0: + pass + elif char in IGNORE_CHARS and self.expectFlag == 1: + if self.currentType in ACCEPTED_TYPES: + self.state = 1 + self.expectFlag = 0 + else: + raise ValueError("Incorrect return type for method declaration at line {}.".format(self.lineno)) + else: + self.currentType += char + self.expectFlag = 1 + + def state1(self, char, output): + """ + Here we expect to read the method's name. Once we reach a space or an opening parentheses, we add the method + to the methodlist along with its data type. + TODO: figure out a way to have the main method printed first in the .casm file + :param char: char, Individual character read from input file + :param output: file, output file to write to + :return: + """ + + if char in IGNORE_CHARS and self.expectFlag == 0: + # white space or new line before any relevant information + pass + + elif char == " " and self.expectFlag == 1: + # we have our method name, we expect an opening parentheses some time after the first space + self.currentVar = "" + self.currentType = "" + self.state = 2 + + elif char == "(": + # We read the opening parentheses after the method name, no need to check for it later + self.methodList[self.currentMethod] = {"retType": self.currentType} + output.write(self.currentMethod + ":\n") + self.currentVar = "" + self.currentType = "" + self.state = 2 + self.expectFlag = 0 + + else: + self.currentMethod += char + self.expectFlag = 1 + + def state2(self, char, output): + """ + Deals with an argument's data type. This is the first step in determining the tuple: arg data type/arg name. + :param char: char, Individual character read from input file + :param output: file, output file to write to + :return: + """ + + if char in IGNORE_CHARS and self.expectFlag == 1: + # we have our method name and return type, but have not yet seen the opening bracket for arguments "(" + pass + + elif char == "(" and self.expectFlag == 1: + # We have our opening parentheses for arguments, we can now look for the first variable's data type + self.expectFlag = 0 + self.methodList[self.currentMethod] = {"retType": self.currentType} + output.write(self.currentMethod + ":\n") + if self.currentMethod == "main": + output.write(" MOV end $S\n") + + elif self.expectFlag == 0: + # Here we expect to read the first character of the variable's data type + if char == ")": + # If instead we simply read a closing parentheses, we assume there are no arguments. + self.state = 4 + elif char == " ": + # If we have a space, we have not yet seen the first char of the variable's data type. + pass + else: + # We read the first character of the variable's data type + self.currentType += char + self.expectFlag = 2 + + elif self.expectFlag == 2: + # Here we read the remainder of the variable's data type. If we read a comma, there are other + if char == ")": + self.state = 4 + self.expectFlag = 0 + elif char == " ": + # we need to read a space before our variable's name. Now we're ready to read the name itself. + if self.currentType in ACCEPTED_TYPES: + self.state = 3 + self.expectFlag = 0 + else: + raise ValueError("Data type not supported for method variable: {}.".format(self.currentType)) + else: + + # append the character to the current type being read. + self.currentType += char + + def state3(self, char, output): + """ + This state reads the name of a method argument. Once we have the full name, we couple it with the data type + read in state2, and we add it to the method's dict of variables. If we read a comma, we know we're ready to + read another argument, and then we jump back to state2. + :param char: char, Individual character read from input file + :param output: file, output file to write to + :return: + """ + + if self.expectFlag == 0 and char == " ": + # Here we're still reading whitespace after variable's data type declaration + pass + + elif self.expectFlag == 1 and char == " ": + # We read the variable's name, now we wait for the next character to decide where to go + self.addVariableToMethodDict() + self.expectFlag = 2 + + elif self.expectFlag == 1 and char == ",": + # We read a comma immediately after variable name (no space), so we expect to read more variables + self.addVariableToMethodDict() + self.expectFlag = 3 + + elif self.expectFlag == 1 and char == ")": + # Closing parentheses right after variable name. We go to method's body + self.addVariableToMethodDict() + self.state = 4 + self.expectFlag = 0 + + elif self.expectFlag == 2: + # We've read a space after variable's name, now we wait for the next key character to know where to go + if char == " ": + pass + elif char == ",": + self.expectFlag = 3 + elif char == ")": + self.state = 4 + self.expectFlag = 0 + else: + raise ValueError("Syntax error at line {}.".format(self.lineno)) + + elif self.expectFlag == 3: + # After reading a comma, we either read the beginning of a new variable declaration, or some more whitespace + if char == " ": + pass + else: + self.currentType = char + self.state = 2 + self.expectFlag = 2 + + else: + # append the character to the current variable's name + self.currentVar += char + self.expectFlag = 1 + + def state4(self, char, output): + """ + In this state, we've read all the arguments of a method declaration. Now we simply expect to read the opening + curly brace "{" to signify the opening body of the method. Here, we also write the appropriate casm instructions + to the output file. The stack pointer gets moved to "end" if it's the main method, and the S2 pointer must point + to the first argument pushed to the stack (if any). + :param char: char, Individual character read from input file + :param output: file, output file to write to + :return: + """ + + self.expectFlag = 0 + + if char in IGNORE_CHARS: + # whitespace or newline characters when not expecting a particular input + pass + + elif char == "{": + # We add the total amount of variables present in the method's argument list. Used for function calls + # in the body of another method to ensure the correct amount of variables are passed in. + self.methodList[self.currentMethod]["totalVars"] = self.argCount + self.state = 5 + if self.currentMethod == "main": + output.write(" MOV end $S\n") + else: + if self.argCount > 0: + output.write(" MOV $S $S2\n") + output.write(" SUB #" + str(self.argCount * 4 + 4) + " $S2\n") + + self.argCount = 0 + + else: + raise ValueError("Syntax error, expecting \"{\", got {}".format(char)) + + def state5(self, char, output): + """ + Initial evaluation of a line within the body of a method. We read the input and concatenate to identifier + string. Once we read a key token we check various cases to see where we need to go with out identifier: + space: + -valid data type + -if statement (we later check for opening parentheses) + -variable (already declared) + -while loop + -return statement + "=": + -variable assignment only + "(": + -if statement + -while loop + -function call (e.g. add(a,b)) + "}" + -end of method, loop, or if statement + + :param char: char, Individual character read from input file + :param output: file, output file to write to + :return: + """ + + if char in IGNORE_CHARS and self.expectFlag == 0: + # whitespace or newline characters when not expecting a particular input + pass + + elif char == " " and self.expectFlag == 1: + # we read a space, now we evaluate our indicator to determine what sort of operation we're dealing with + if self.identifier == "if": + # identifier is an if statement + self.state = 9 + self.identifier = "" + self.expectFlag = 0 + self.nestedFlag += 1 + + elif self.identifier == "while": + # identifier is a while loop indicator + output.write("LOOP" + str(self.whileLabel) + ":\n") + self.whileList.append("LOOP" + str(self.whileLabel)) + self.state = 10 + self.whileLabel += 1 + self.nestedFlag += 1 + self.whileFlag += 1 + self.identifier = "" + self.expectFlag = 0 + + elif self.identifier == "return": + # identifier is a return statement + self.expectFlag = 0 + self.state = 11 + self.identifier = "" + + elif (self.identifier in self.varList) or self.identifier in self.methodList[self.currentMethod]: + # the identifier is a variable that has already been declared + self.currentVar = self.identifier + self.identifier = "" + self.state = 6 + self.expectFlag = 2 + + elif self.identifier in ACCEPTED_TYPES: + # identifier is a data type, new variable declaration + self.currentType = self.identifier + self.identifier = "" + self.state = 6 + self.expectFlag = 0 + + elif self.identifier in self.methodList: + # identifier is a function call + self.expectFlag = 0 + self.state = 8 + self.functionCall = self.identifier + self.identifier = "" + + else: + # identifier was not valid + raise ValueError("Error at line {}".format(self.lineno)) + + elif char == "=" and self.expectFlag == 1: + # here we have a variable assignment. Variable must be already declared in this case + if (self.identifier in self.varList) or self.identifier in self.methodList[self.currentMethod]: + self.expectFlag = 0 + self.state = 7 + else: + raise ValueError("Invalid assignment at line {}: must be valid variable".format(self.lineno)) + + elif char == "[" and self.expectFlag == 1: + # this implies an already declared array + self.expectFlag = 0 + self.identifier = "" + self.state = 13 + + elif char == "(" and self.expectFlag == 1: + # immediately after the identifier, we read an opening parentheses. Here we cover all possible cases + if self.identifier in self.methodList: + # identifier is a function call + self.state = 8 + self.functionCall = self.identifier + self.identifier = "" + + elif self.identifier == "if": + # identifier is an if statement + self.state = 9 + self.identifier = "" + self.nestedFlag += 1 + + elif self.identifier == "while": + # identifier is a while loop indicator + output.write("LOOP" + str(self.whileLabel) + ":\n") + self.whileList.append("LOOP" + str(self.whileLabel)) + self.state = 10 + self.whileLabel += 1 + self.whileFlag += 1 + self.nestedFlag += 1 + self.identifier = "" + + else: + # identifier was not valid + raise ValueError("Error at line {}".format(self.lineno)) + + elif char == "}": + # end of method, if statement, or while loop + if self.nestedFlag == 0: + # if we aren't in any while/if statements, this is the end of our method + self.state = 0 + self.currentMethod = "" + self.argCount = 0 + self.currentVar = "" + self.currentType = "" + self.varList.clear() + self.varLocation.clear() + + else: + # otherwise, we print the appropriate instructions to end the while loop or if statement + self.nestedFlag -= 1 + if self.whileFlag > 0: + self.whileFlag -= 1 + output.write(" JMP <> " + self.whileList.pop() + "\n") + + output.write(self.labelList.pop() + ":\n") + + else: + # append the character to the identifier string is nothing else of interest was read. + self.identifier += char + self.expectFlag = 1 + + def state6(self, char, output): + """ + Initial variable name declaration. We already have the data type, so now we read its name until we get a + relevant token to determine what to do with the variable. + :param char: char, Individual character read from input file + :param output: file, output file to write to + :return: + """ + + if char in IGNORE_CHARS and self.expectFlag == 0: + # ignore spaces/new line chars if we're not expecting any input in particular + pass + + elif char == " " and self.expectFlag == 1: + # we have the variable name, now we move to the next phase to determine appropriate action + self.expectFlag = 2 + + elif char == "=" and self.expectFlag == 1: + # we have the variable name, and we see that an assignment will happen + self.verifyVariable() + self.state = 7 + self.expectFlag = 0 + + elif char == ";" and self.expectFlag == 1: + # end of variable declaration. we assign its memory location and add it to the variable list + self.verifyVariable() + self.currentVar = "" + self.currentType = "" + self.state = 5 + self.expectFlag = 0 + + elif char == "[" and self.expectFlag == 1: + # Here we're ready to declare a new array + self.validName(self.currentVar) + self.state = 12 + self.expectFlag = 0 + + elif self.expectFlag == 2: + # We reach this step if we have the variable name and we read at least one space + if char in IGNORE_CHARS: + # we may keep reading spaces/ new line until we reach a relevant token + pass + + elif char == "=": + # variable assignment. if the variable was not in the list, we add it + if (self.currentVar not in self.varList) and self.currentVar not in self.methodList[self.currentMethod]: + self.varList.append(self.currentVar) + self.varLocation[self.currentVar] = self.memoryLocation + self.memoryLocation += 4 + self.variableCount += 1 + + self.validName(self.currentVar) + self.state = 7 + self.expectFlag = 0 + + elif char == ";": + # simple declaration (e.g. int a;), we add it to the variable list and allocate a memory location + self.verifyVariable() + self.currentVar = "" + self.currentType = "" + self.state = 5 + self.expectFlag = 0 + + else: + raise ValueError("Incorrect syntax at line {}".format(self.lineno)) + + else: + # append the character to the current variable's name + self.currentVar += char + self.expectFlag = 1 + + def state7(self, char, output): + """ + Begins variable assignment. This could either be a math formula, or a function call + :param char: char, Individual character read from input file + :param output: file, output file to write to + :return: + """ + + if char in IGNORE_CHARS and self.expectFlag == 0: + # whitespace or newline character before any important tokens are read + pass + + elif char == "(" and self.expectFlag == 1: + # we read an opening parentheses, it could either be a function call or part of a normal math expression + if self.mathFormula in self.methodList: + # if we have a function call for a variable assignment, we jump to state 8 which deals with functions + self.functionCall = self.mathFormula + self.state = 8 + else: + # otherwise, the parentheses is just part of a normal math expression + self.mathFormula += char + + elif char == " " and self.expectFlag == 1: + # we read a space, so we evaluate what the math formula holds thus far. + if self.mathFormula in self.methodList: + # if we have a function call for a variable assignment, we jump to state 8 which deals with functions + self.functionCall = self.mathFormula + self.state = 8 + else: + # otherwise, the parentheses is just part of a normal math expression + self.mathFormula += char + + elif char == ";": + # End of our math statement. We may begin the evaluation and assign the result to the current variable + tokens = tokenize(self.mathFormula) + postfix = infixToPostfix(tokens) + evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], + self.arrayList, output) + + if self.currentVar in self.methodList[self.currentMethod]: + # The variable is an argument passed into the function. We use the stack pointer to fetch its + # location before writing the value. + output.write(" MOV $A2 $S2\n") + output.write(" ADD #" + str(self.methodList[self.currentMethod][self.currentVar][1] * 4) + + " $A2\n") + output.write(" MEMW [4] $A $A2\n") + + else: + # The variable is local, so we just write the result to its memory location from the local list. + output.write(" MEMW [4] $A #" + str(self.varLocation[self.currentVar]) + "\n") + + # now we reset everything + self.state = 5 + self.mathFormula = "" + self.currentType = "" + self.currentVar = "" + self.expectFlag = 0 + + else: + # if we don't read anything else of interest, we simply append the character to the math formula string + self.mathFormula += char + self.expectFlag = 1 + + def state8(self, char, output): + """ + This deals with a function call. This may be on its own line or part of a variable assignment. + :param char: char, Individual character read from input file + :param output: file, output file to write to + :return: + """ + + if char in IGNORE_CHARS and self.expectFlag == 0: + # whitespace or newline before any relevant tokens are read + pass + + elif char == "(" and self.expectFlag == 0: + # this is in case we haven't read the opening parentheses of a function call yet. + self.expectFlag = 1 + + elif self.expectFlag == 1: + # here we read our opening parentheses, so we read whitespace until we get our first char for an argument + if char in IGNORE_CHARS: + pass + else: + self.functionArg += char + self.expectFlag = 2 + + elif self.expectFlag == 2: + # Here we read the argument name. If we read a space, we wait for appropriate token. + # Tokens ("," and ")") may show up without spaces, so we handle that here too + if char == ",": + if self.functionArg in self.varList: + output.write(" PUSH #" + str(self.varLocation[self.functionArg]) + "\n") + else: + raise ValueError("Invalid variable at line {}".format(self.lineno)) + self.expectFlag = 1 + self.functionArg = "" + self.argCount += 1 + + elif char in IGNORE_CHARS: + self.expectFlag = 3 + + elif char == ")": + # we're done reading arguments for the function. Now we expect to read ";" to end the statement + if self.functionArg in self.varList: + # must be a valid variable to pass into function + output.write(" PUSH #" + str(self.varLocation[self.functionArg]) + "\n") + else: + raise ValueError("Invalid variable at line {}".format(self.lineno)) + self.expectFlag = 4 + self.argCount += 1 + + else: + # append char to the current argument being passed into the function + self.functionArg += char + + elif self.expectFlag == 3: + # We fully read the argument name, now we wait for valid token + if char == ",": + # here we're notified that other variables will be read. + if self.functionArg in self.varList: + output.write(" PUSH #" + str(self.varLocation[self.functionArg]) + "\n") + else: + raise ValueError("Invalid variable at line {}".format(self.lineno)) + + self.expectFlag = 1 + self.functionArg = "" + self.argCount += 1 + + elif char in IGNORE_CHARS: + # we can keep ignoring whitespace/new line until we read a correct token + pass + elif char == ")": + # end of arguments. we now expect ";" to end the statement + if self.functionArg in self.varList: + output.write(" PUSH #" + str(self.varLocation[self.functionArg]) + "\n") + else: + raise ValueError("Invalid variable at line {}".format(self.lineno)) + self.expectFlag = 4 + self.argCount += 1 + else: + raise ValueError("Error at line {}".format(self.lineno)) + + elif self.expectFlag == 4: + # Here we're done our function call. We need to read ";" to end the statement and write function to output + + if char == ";": + # we make sure the amount of arguments passed in matches how many are accepted by the method + if self.argCount == self.methodList[self.functionCall]["totalVars"]: + + output.write(" CALL " + self.functionCall + "\n") + self.state = 5 + + if self.currentVar != "": + output.write(" MEMW [4] $A #" + str(self.varLocation[self.currentVar]) + "\n") + output.write(" SUB #" + str(self.argCount * 4) + " $S\n") + self.functionCall = "" + self.functionArg = "" + self.mathFormula = "" + self.argCount = 0 + self.expectFlag = 0 + + else: + raise ValueError("# of arguments don't match that of function call at line {}".format(self.lineno)) + + elif char in IGNORE_CHARS: + # we can read more whitespace before the semi-colon + pass + + else: + # we read something that wasn't whitespace or a semi-colon, invalid statement + raise ValueError("invalid syntax after function call at line {}".format(self.lineno)) + + else: + # after a valid function call, we don't read an opening parentheses or whitespace. this is invalid syntax + raise ValueError("Invalid syntax after function call at line {}".format(self.lineno)) + + def state9(self, char, output): + """ + This state will deal with if statements. We begin by evaluating the left hand side and placing the result in + register C2. Then we evaluate the right hand side and place in register D2. It's important to note that at this + time, while loops don't support expressions that contain additional parentheses. + TODO: figure out how to handle multiple parentheses + :param char: char, Individual character read from input file + :param output: file, output file to write to + :return: + """ + + if char in IGNORE_CHARS and self.expectFlag == 0: + # whitespace or newline before anything relevant is read + pass + + elif char == "(" and self.expectFlag == 0: + # if we haven't read the opening parentheses for an if statement + self.expectFlag = 1 + + elif self.expectFlag == 1: + # we're expecting to read a part of the left hand side's expression. if we read an operator, we evaluate + # the expression and move on to reading the right hand side's expression. + if char in BOOLEAN_OPERATORS: + if self.mathFormula == "": + # empty expression (e.g. if ( a > ) ) + raise ValueError("Empty expression in if statement at line {}".format(self.lineno)) + + tokens = tokenize(self.mathFormula) + postfix = infixToPostfix(tokens) + evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], + self.arrayList, output) + self.ifOperator = self.convertOperatorToFlags(char) + self.expectFlag = 2 + self.mathFormula = "" + output.write(" MOV $A $C2\n") + + else: + # otherwise we keep appending to our left hand side's math formula string + self.mathFormula += char + + elif self.expectFlag == 2: + # here we evaluate the right hand side of an if expression. + if char == ")": + # closing parentheses, this indicates the end of our expression + if self.mathFormula == "": + # empty expression (e.g. if ( a > ) ) + raise ValueError("Empty expression in if statement at line {}".format(self.lineno)) + + tokens = tokenize(self.mathFormula) + postfix = infixToPostfix(tokens) + evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], + self.arrayList, output) + self.expectFlag = 2 + self.mathFormula = "" + output.write(" MOV $A $D2\n") + self.expectFlag = 3 + + else: + # otherwise, append to the right hand side's math formula string + self.mathFormula += char + + elif self.expectFlag == 3: + # we've finished reading our if statement, now we wait for an opening curly brace + + if char in IGNORE_CHARS: + # we can still read whitespace or newline chars meanwhile + pass + + elif char == "{": + # we have our opening curly brace, we can go back to state 5 and begin evaluating a new line + output.write(" CMP $D2 $C2\n") + output.write(" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") + self.labelList.append(" L" + str(self.ifLabel)) + self.ifLabel += 1 + self.state = 5 + self.expectFlag = 0 + self.mathFormula = "" + + else: + # we read something other than whitespace or an opening curly brace, this is invalid + raise ValueError("Syntax error at line {}".format(self.lineno)) + + def state10(self, char, output): + """ + This state will deal with while loops. We begin by evaluating the left hand side and placing the result in + register C2. Then we evaluate the right hand side and place in register D2. When writing the assembly code, we + do exactly as an if statement; however, at the end of the while loop, we need to have a jump condition to go + back to the beginning of the loop. It's important to note that at this time, while loops don't support + expressions that contain additional parentheses. TODO: figure out how to handle multiple parentheses + :param char: char, Individual character read from input file + :param output: file, output file to write to + :return: + """ + + if char in IGNORE_CHARS and self.expectFlag == 0: + # whitespace or newline before anything relevant is read + pass + + elif char == "(" and self.expectFlag == 0: + # if we haven't read the opening parentheses for an if statement + self.expectFlag = 1 + + elif self.expectFlag == 1: + # we're expecting to read a part of the left hand side's expression. if we read an operator, we evaluate + # the expression and move on to reading the right hand side's expression. + if char in BOOLEAN_OPERATORS: + if self.mathFormula == "": + # empty expression (e.g. if ( a > ) ) + raise ValueError("Empty expression in if statement at line {}".format(self.lineno)) + + tokens = tokenize(self.mathFormula) + postfix = infixToPostfix(tokens) + evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], + self.arrayList, output) + self.ifOperator = self.convertOperatorToFlags(char) + self.expectFlag = 2 + self.mathFormula = "" + output.write(" MOV $A $C2\n") + + else: + # otherwise we keep appending to our left hand side's math formula string + self.mathFormula += char + + elif self.expectFlag == 2: + # here we evaluate the right hand side of a while loop. + if char == ")": + # closing parentheses, this indicates the end of our expression + if self.mathFormula == "": + # empty expression (e.g. if ( a > ) ) + raise ValueError("Empty expression in if statement at line {}".format(self.lineno)) + + tokens = tokenize(self.mathFormula) + postfix = infixToPostfix(tokens) + evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], + self.arrayList, output) + self.expectFlag = 2 + self.mathFormula = "" + output.write(" MOV $A $D2\n") + self.expectFlag = 3 + + else: + # otherwise, append to the right hand side's math formula string + self.mathFormula += char + + elif self.expectFlag == 3: + # we've finished reading our if statement, now we wait for an opening curly brace + + if char in IGNORE_CHARS: + # we can still read whitespace or newline chars meanwhile + pass + + elif char == "{": + # we have our opening curly brace, we can go back to state 5 and begin evaluating a new line + output.write(" CMP $D2 $C2\n") + output.write(" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") + self.labelList.append(" L" + str(self.ifLabel)) + self.ifLabel += 1 + self.state = 5 + self.expectFlag = 0 + self.mathFormula = "" + + else: + # we read something other than whitespace or an opening curly brace, this is invalid + raise ValueError("Syntax error at line {}".format(self.lineno)) + + def state11(self, char, output): + """ + This state deals with return statements. When returning values, we follow the cdecl calling convention. + Variables in function calls are pushed onto the stack, and the values returned are placed into register A. + However, unlike cdecl, we push the arguments onto the stack from left to right, not right to left. + :param char: char, Individual character read from input file + :param output: file, output file to write to + :return: + """ + + if char in IGNORE_CHARS and self.expectFlag == 0: + # whitespace or newline before anything relevant is read + pass + + elif char == ";": + # End of our math statement. + tokens = tokenize(self.mathFormula) + postfix = infixToPostfix(tokens) + evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], + self.arrayList, output) + self.expectFlag = 0 + self.mathFormula = "" + self.state = 5 + output.write(" RET\n") + + else: + # we continue to append the chars to our math formula for the return statement + self.expectFlag = 1 + self.mathFormula += char + + def state12(self, char, output): + """ + This state handles the declaration of an array. We check to see if any character before closing bracket "]" + is a valid integer for the size of the array. We also verify if there is a value assignment following the array + declaration. + :param char: char, Individual character read from input file + :param output: file, output file to write to + :return: + """ + + if self.expectFlag == 0: + if char != "]": + try: + int(char) + except ValueError as e: + raise ValueError("Array size declaration invalid at line {}".format(self.lineno)) + self.arrayLength += char + else: + self.arrayList[self.currentVar] = int(self.arrayLength) + self.varLocation[self.currentVar] = self.memoryLocation + self.memoryLocation += int(self.arrayLength) * 4 + self.arrayLength = "" + self.expectFlag = 1 + + elif self.expectFlag == 1: + if char == " ": + pass + elif char == "=": + self.expectFlag = 2 + elif char == ";": + self.expectFlag = 0 + self.state = 5 + else: + raise ValueError("Invalid syntax at line {}".format(self.lineno)) + + elif self.expectFlag == 2: + # array has an assignment immediately after its declaration + if char == " ": + pass + elif char == "{": + self.expectFlag = 3 + else: + raise ValueError("Invalid array value assignment at line {}".format(self.lineno)) + + elif self.expectFlag == 3: + # Here we're declaring the variables inside our array + if char == "}": + self.expectFlag = 4 + + else: + self.mathFormula += char + + elif self.expectFlag == 4: + if char == " ": + pass + elif char == ";": + self.assignArrayValues(output) + self.expectFlag = 0 + self.mathFormula = "" + self.arrayLength = "" + self.state = 5 + else: + raise ValueError("Incorrect syntax at line {}".format(self.lineno)) + + def state13(self, char, output): + """ + This state deals with assigning a value to a specific array index + :param char: + :param output: + :return: + """ + + if self.expectFlag == 0: + if char == "]": + try: + int(self.arrayLength) + except ValueError as e: + raise ValueError("Invalid array index at line {}".format(self.lineno)) + + if int(self.arrayLength) > int(self.arrayList[self.currentVar] - 1) or int(self.arrayLength) < 0: + raise ValueError("Array index out of bounds at line {}".format(self.lineno)) + + self.expectFlag = 1 + + else: + self.arrayLength += char + + elif self.expectFlag == 1: + if char == " ": + pass + elif char == "=": + self.expectFlag = 3 + + elif self.expectFlag == 3: + if char == ";": + tokens = tokenize(self.mathFormula) + postfix = infixToPostfix(tokens) + evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], + self.arrayList, output) + output.write(" MEMW [4] $A #" + str(self.varLocation[self.currentVar] + int(self.arrayLength) * 4) + + "\n") + + self.expectFlag = 0 + self.mathFormula = "" + self.arrayLength = "" + self.state = 5 + else: + self.mathFormula += char + + def validName(self, name): + """ + Verifies whether the variable's name contains only acceptable characters (A-Z, $, _, #) + :param name: str, the variable name that we're verifying + :return: + """ + + for char in name: + if char not in ALLOWED_CHARS: + raise ValueError("Illegal variable name declaration at line {}".format(self.lineno)) + + def verifyVariable(self): + """ + Method checks whether the variable is already present in the list. If so, we raise an error since we can't have + duplicate variable names. Otherwise, we add it to the list, assign it a memory location, increment the memory + location counter, and increase the total variable count. + :return: + """ + + self.validName(self.currentVar) + + if (self.currentVar not in self.varList) and self.currentVar not in self.methodList[self.currentMethod]: + self.varList.append(self.currentVar) + self.varLocation[self.currentVar] = self.memoryLocation + self.memoryLocation += 4 + self.variableCount += 1 + else: + # can't have duplicate variable names + raise ValueError("Duplicate variable declaration at line {}".format(self.lineno)) + + def addVariableToMethodDict(self): + """ + When called, we assume we have a new variable declaration to add to our method's dict of variables. The info + should already be stored in the static variables, so we don't need to pass in any arguments. We take this + opportunity to reset the values for current data type and current variable name, and we increase the argcount + counter. Argcount holds the cumulative number of the variable being evaluated. In the method's dict, we also + store in which order of appearance the variables are read. This is useful for our math parser, among other + tools, to determine how far the S2 pointer must travel to reach that specific variable. + :return: + """ + + self.methodList[self.currentMethod][self.currentVar] = (self.currentType, self.argCount) + self.argCount += 1 + self.currentType = "" + self.currentVar = "" + + def convertOperatorToFlags(self, char): + """ + This converts an operator to the appropriate flags for a JMP instruction + :param char: char, our operator (<, >, =) to convert to a JMP flag + :return: + """ + + if char == "<": + flag = "" + elif char == "<=": + flag = "" + elif char == "=": + flag = "" + elif char == ">": + flag = "" + elif char == ">=": + flag = "" + else: + raise ValueError("Incorrect operator for if statement.") + + return flag + + def assignArrayValues(self, output): + """ + This method takes in a list of values to populate an array. These values are already kept in our global + mathFormula string variable. + :return: + """ + + list = self.mathFormula.split(",") + startingLocation = self.varLocation[self.currentVar] + + if len(list) != self.arrayList[self.currentVar]: + print(list) + raise ValueError("Incorrect number of values for array assignment at line {}".format(self.lineno)) + + for element in list: + try: + int(element) + except ValueError as e: + raise ValueError("Invalid value for array assignment at line {}".format(self.lineno)) + + output.write(" MEMW [4] #" + str(element) + " #" + str(startingLocation) + "\n") + startingLocation += 4 + + + def arrayBoundsCheck(self): + pass + diff --git a/ToolChain/Compiler/Constants.py b/ToolChain/Compiler/Constants.py new file mode 100644 index 0000000..1fe6b17 --- /dev/null +++ b/ToolChain/Compiler/Constants.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python +# -*- coding: -*- + +""" +This file is part of Spartacus project +Copyright (C) 2018 CSE + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" + +__author__ = "CSE" +__copyright__ = "Copyright 2018, CSE" +__credits__ = ["CSE"] +__license__ = "GPL" +__version__ = "3.0" +__maintainer__ = "CSE" +__status__ = "Dev" + +import re + +ACCEPTED_TYPES = ["int"] +OPERATORS = ['+', '-', '*', '/'] +BOOLEAN_OPERATORS = ["<", ">", "="] +IGNORE_CHARS = [" ", "\n"] +REGISTER_NAMES = ["A", "B", "C", "D", "E", "F", "G"] + + +L_PARENTHESES = '(' +R_PARENTHESES = ')' +PLUS = '+' +MINUS = '-' +MULTIPLICATION = '*' +DIVISION = '/' + +UNDEFINED = "&&undefined&&" +DEFAULT_OUTPUT_EXTENSION = ".casm" + +REGISTERS = { + 0: "A", + 1: "B", + 2: "C", + 3: "D", + 4: "E", + 5: "F", + 6: "G" +} + +ALLOWED_CHARS = [ + "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", + "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", + "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", + "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", + "s", "t", "u", "v", "w", "x", "y", "z", "$", "_", "#" +] + +OPERATIONS = { + PLUS: {'priority': 1, 'function': lambda a, b: a + b}, + MINUS: {'priority': 1, 'function': lambda a, b: a - b}, + MULTIPLICATION: {'priority': 2, 'function': lambda a, b: a * b}, + DIVISION: {'priority': 2, 'function': lambda a, b: a / b}, +} + +INSTRUCTIONS = { + PLUS: "ADD", + MINUS: "SUB", + MULTIPLICATION: "MUL", + DIVISION: "DIV" +} + +TOKEN_SEPARATOR = re.compile(r'\s*(%s|%s|%s|%s|%s|%s)\s*' % ( + re.escape(L_PARENTHESES), + re.escape(R_PARENTHESES), + re.escape(PLUS), + re.escape(MINUS), + re.escape(MULTIPLICATION), + re.escape(DIVISION)) +) + +ARRAY_PATTERN = ".\[[0-9]\]" + + + diff --git a/ToolChain/Compiler/MathParser.py b/ToolChain/Compiler/MathParser.py new file mode 100644 index 0000000..fb62cb3 --- /dev/null +++ b/ToolChain/Compiler/MathParser.py @@ -0,0 +1,298 @@ +#!/usr/bin/env python +# -*- coding: -*- + +""" +This file is part of Spartacus project +Copyright (C) 2018 CSE + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" + +from ToolChain.Compiler.Constants import L_PARENTHESES, \ + R_PARENTHESES, \ + OPERATIONS, \ + INSTRUCTIONS, \ + TOKEN_SEPARATOR, \ + REGISTERS, \ + REGISTER_NAMES, \ + ARRAY_PATTERN + +import re + +__author__ = "CSE" +__copyright__ = "Copyright 2018, CSE" +__credits__ = ["CSE"] +__license__ = "GPL" +__version__ = "3.0" +__maintainer__ = "CSE" +__status__ = "Dev" + + +def tokenize(expression): + """ + Transforms an expression into individual tokens, separating operators from operands. + :param expression: str, mathematical expression to tokenize. + :return: str list, each element of the expression split into a list. + """ + return [t.strip() for t in TOKEN_SEPARATOR.split(expression.strip()) if t] + + +def infixToPostfix(tokens): + """ + Takes a tokenized mathematical expression and returns its postfix representation. For example, the formula: + "4 + a * b" will return: "4 a b * +". + :param tokens: str list, Math expression tokenized into list of individual strings + :return: str list, our postfix representation + """ + + stack = [] # temporary stack to hold operators and operands + postfix = [] # confirmed list of elements for the postfix representation + + for element in tokens: + # Evaluate each token in sequence + if element in OPERATIONS: + # Here we find an operator (+, -, *, /), so we push the operands on the stack + # and reorganize them based on the order of precedence of operations. The ordered operations are then + # Pushed onto the postfix "stack" + + while len(stack) > 0 and stack[-1] in OPERATIONS\ + and OPERATIONS[element]['priority'] <= OPERATIONS[stack[-1]]['priority']: + postfix.append(stack.pop()) + stack.append(element) + + elif element == L_PARENTHESES: + # Left parentheses found, simply append to the stack + stack.append(element) + + elif element == R_PARENTHESES: + # Right parentheses found, so we pop all the elements and push onto postfix stack + # until we reach left parentheses + while stack[-1] != L_PARENTHESES: + postfix.append(stack.pop()) + stack.pop() + + else: + # Otherwise no significant order of operations needs to be followed, so we simply push to postfix stack + postfix.append(element) + + # We need to append any remaining operands from the stack, so we add them to the postfix "stack" in reverse + postfix.extend(reversed(stack)) + + return postfix + + +def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, arrayList, output): + """ + Evaluates the postfix math expression. Variables have their values read and loaded into registers before executing + the operation. If variables are in the methodVariables list, we make use of the stack frame pointer "S2" to fetch + their memory location.For immediate values, since multiplications and divisions may only be done with registers, we + also load them into registers for ease of automation. + :param postfix: str list, our postfix mathematical expression to evaluate. + :param variableList: str list, list of all the variables in the method thus far + :param variableLocation: dict, each variable has a mapped memory location (e.g. 0x40000000). + :param methodVariables: dict, variables passed into the method as arguments. + :param arrayList: dict, variables declared as arrays. Index is variable name, value is array size + :param output: file, our output file we write to. + :return: + """ + + stack = [] # Stack that will contain our pushed operands from the postfix expression + immediateCount = 0 # Keeps count of how many immediate values are being expressed (not variables) + sourceRegister = 1 # Source register starts at 1: "B", and increments as needed + destRegister = 0 # Destination register starts at 0: 'A" and increments as needed + immFlag = 0 # Used to determine whether source or destination register holds an immediate + + for element in postfix: + # Evaluate each postfix element one by one to determine appropriate action + + if sourceRegister > 6 or destRegister > 6: + # We cap the total amount of registers used to 7 (0-6) + raise ValueError("Too many operands in formula.") + + if element in OPERATIONS: + # Here, our element is an operator. This means we need to pop the top two values from the stack and + # execute the given operation. + operand1, operand2 = stack.pop(), stack.pop() + + if operand1 in variableList: + # The operand is in the list of local variables, so we read the value from memory + output.write(" MEMR [4] #" + str(variableLocation[operand1]) + " $" + REGISTERS[sourceRegister] + "\n") + operand1 = REGISTERS[sourceRegister] + + elif operand1 in methodVariables: + # The operand is in the list of arguments passed into the method. We consult the methodVariables list + # to determine the appropriate offset from the stack pointer register S2. + output.write(" MOV $A2 $S2\n") + output.write(" ADD #" + str(int(methodVariables[operand1][1]) * 4) + " $A2\n") + output.write(" MEMR [4] $A2 $" + REGISTERS[sourceRegister] + "\n") + operand1 = REGISTERS[sourceRegister] + + elif operand1 in REGISTER_NAMES: + # This is simply a register that was pushed onto the stack. We can keep it as is + pass + + elif re.match(ARRAY_PATTERN, operand1): + match = re.search(ARRAY_PATTERN, operand1) + operands = match.group(0) + operands = operands.split("[") + operands[1] = operands[1].replace("]", "") + + if operands[0] in arrayList: + # name of variable must be a valid array declaration + if int(operands[1]) > arrayList[operands[0]] - 1: + # Can't access an index that doesn't exist! + raise ValueError("Array index out of bounds.") + + output.write(" MEMR [4] #" + str(variableLocation[operands[0]] + int(operands[1]) * 4) + " $" + + REGISTERS[sourceRegister] + "\n") + + operand1 = REGISTERS[sourceRegister] + else: + raise ValueError("Invalid variable.") + + else: + # The operand is an immediate value. We test to see if it's a valid integer + try: + isinstance(operand1, int) + immediateCount += 1 + immFlag = 1 + except ValueError as e: + raise ValueError("Invalid operand") + + if operand2 in variableList: + # The operand is in the list of local variables, so we read the value from memory + output.write(" MEMR [4] #" + str(variableLocation[operand2]) + " $" + REGISTERS[destRegister] + "\n") + operand2 = REGISTERS[destRegister] + + elif operand2 in methodVariables: + # The operand is in the list of arguments passed into the method. We consult the methodVariables list + # to determine the appropriate offset from the stack pointer register S2. + output.write(" MOV $B2 $S2\n") + output.write(" ADD #" + str(int(methodVariables[operand2][1]) * 4) + " $B2\n") + output.write(" MEMR [4] $B2 $" + REGISTERS[destRegister] + "\n") + operand2 = REGISTERS[destRegister] + + elif operand2 in REGISTER_NAMES: + # This is simply a register that was pushed onto the stack. We can keep it as is + pass + + elif re.match(ARRAY_PATTERN, operand2): + match = re.search(ARRAY_PATTERN, operand2) + operands = match.group(0) + operands = operands.split("[") + operands[1] = operands[1].replace("]", "") + + if operands[0] in arrayList: + # name of variable must be a valid array declaration + if int(operands[1]) > int(arrayList[operands[0]] - 1): + # Can't access an index that doesn't exist! + raise ValueError("Array index out of bounds.") + + output.write(" MEMR [4] #" + str(variableLocation[operands[0]] + int(operands[1]) * 4) + " $" + + REGISTERS[destRegister] + "\n") + + operand2 = REGISTERS[destRegister] + else: + raise ValueError("Invalid variable.") + + else: + # The operand is an immediate value. We test to see if it's a valid integer + try: + isinstance(operand2, int) + immediateCount += 1 + immFlag = 2 + except ValueError as e: + raise ValueError("Invalid operand") + + if immediateCount == 2: + # If we have two immediate values, we don't really need to calculate the arithmetic in Capua ASM. + # We discretely do the calculations in the background and push the value to the stack. This avoids + # unnecessary processing. + try: + stack.append(int(OPERATIONS[element]['function'](float(operand2), float(operand1)))) + + except ZeroDivisionError: + raise ValueError("Error: Division by zero! - {} {} {}".format(operand2, element, operand1)) + + else: + if immediateCount == 1: + # only one of the operands was an immediate value. We determine which one is the immediate value, + # as the correct instruction output depends on it. + if immFlag == 1: + output.write(" MOV #" + str(int(operand1)) + " $" + REGISTERS[sourceRegister] + "\n") + operand1 = REGISTERS[sourceRegister] + + elif immFlag == 2: + output.write(" MOV #" + str(int(operand2)) + " $" + REGISTERS[destRegister] + "\n") + operand2 = REGISTERS[destRegister] + + else: + # No operands were immediate values. We can do the arithmetic operation as is. + # We move the source and destination registers up one letter for the next operation + sourceRegister += 1 + destRegister += 1 + + output.write(" " + INSTRUCTIONS[element] + " $" + str(operand1) + " $" + str(operand2) + "\n") + stack.append(operand2) + + immediateCount = 0 + + else: + # We have an operand to push onto the stack + stack.append(element) + + if len(stack) != 1: + # If the stack has more than or less than one element, the expression is incorrect. + raise ValueError("invalid expression.") + + # our result is then "saved" into register A. The assignment can now be completed. + result = stack.pop() + + if result in REGISTER_NAMES: + # If we just have a register at the bottom of the stack, we assume the result is already in register A + pass + + elif result in variableList: + # if our last operand is in the variable list, we simply read it from memory + output.write(" MEMR [4] #" + str(variableLocation[result]) + " $A\n") + + elif result in methodVariables: + # our last operand is passed in as an argument into the method, so we read it to register A + output.write(" MOV $B2 $S2\n") + output.write(" ADD #" + str(int(methodVariables[result][1]) * 4) + " $B2\n") + output.write(" MEMR [4] $B2 $A\n") + + elif re.match(ARRAY_PATTERN, result): + # our last operand is an array at a specific index. We find the index, and add the offset to the variable loc. + match = re.search(ARRAY_PATTERN, result) + operands = match.group(0) + operands = operands.split("[") + operands[1] = operands[1].replace("]", "") + + if operands[0] not in arrayList: + raise ValueError("Invalid variable.") + if int(operands[1]) > int(arrayList[operands[0]] - 1): + # Can't access an index that doesn't exist! + raise ValueError("Array index out of bounds.") + + output.write(" MEMR [4] #" + str(variableLocation[operands[0]] + int(operands[1]) * 4) + " $A\n") + + else: + # last operand is an immediate value. we test to see if it's a valid integer, and we move to register A + try: + isinstance(int(result), int) + output.write(" MOV #" + str(result) + " $A\n") + except ValueError as e: + raise ValueError("Invalid mathematical expression") diff --git a/ToolChain/Compiler/__init__.py b/ToolChain/Compiler/__init__.py new file mode 100644 index 0000000..e69de29 From fe4ea1986be273801cfdbfd08ce6b7244a1c283d Mon Sep 17 00:00:00 2001 From: Chris LeBlanc Date: Tue, 10 Jul 2018 15:44:27 -0300 Subject: [PATCH 02/10] Adding functionality to Compiler, created documentation --- ToolChain/Compiler/Compiler.py | 81 +++++++++++++++++++++++++----- ToolChain/Compiler/CompilerInfo.md | 40 +++++++++++++++ ToolChain/Compiler/Constants.py | 2 +- ToolChain/Compiler/MathParser.py | 4 ++ 4 files changed, 114 insertions(+), 13 deletions(-) create mode 100644 ToolChain/Compiler/CompilerInfo.md diff --git a/ToolChain/Compiler/Compiler.py b/ToolChain/Compiler/Compiler.py index b2e2f49..4236755 100644 --- a/ToolChain/Compiler/Compiler.py +++ b/ToolChain/Compiler/Compiler.py @@ -23,7 +23,8 @@ from ToolChain.Compiler.Constants import ACCEPTED_TYPES, \ IGNORE_CHARS, \ BOOLEAN_OPERATORS, \ - ALLOWED_CHARS + ALLOWED_CHARS, \ + ARRAY_PATTERN from ToolChain.Compiler.MathParser import tokenize, \ infixToPostfix, \ @@ -690,7 +691,17 @@ def state8(self, char, output): if self.functionArg in self.varList: # must be a valid variable to pass into function output.write(" PUSH #" + str(self.varLocation[self.functionArg]) + "\n") + + elif re.match(ARRAY_PATTERN, self.functionArg): + # variable is an array index + match = re.search(ARRAY_PATTERN, self.functionArg) + operands = match.group(0) + operands = operands.split("[") + operands[1] = operands[1].replace("]", "") + output.write(" PUSH #" + str(self.varLocation[operands[0]] + int(operands[1]) * 4) + "\n") + else: + # variable wasn't declared or isn't valid raise ValueError("Invalid variable at line {}".format(self.lineno)) self.expectFlag = 4 self.argCount += 1 @@ -966,37 +977,63 @@ def state12(self, char, output): """ if self.expectFlag == 0: + # we expect to read the array's size until we read "]" + if char != "]": + # if the character read isn't a closing bracket, we assume it's part of the size try: int(char) except ValueError as e: raise ValueError("Array size declaration invalid at line {}".format(self.lineno)) self.arrayLength += char + else: - self.arrayList[self.currentVar] = int(self.arrayLength) - self.varLocation[self.currentVar] = self.memoryLocation - self.memoryLocation += int(self.arrayLength) * 4 - self.arrayLength = "" - self.expectFlag = 1 + # otherwise, we read "]" and we're ready to prepare the newly declared array + if self.arrayLength is not None: + self.arrayList[self.currentVar] = int(self.arrayLength) + self.varLocation[self.currentVar] = self.memoryLocation + self.memoryLocation += int(self.arrayLength) * 4 + self.arrayLength = "" + self.expectFlag = 1 + else: + # we have a case where nothing was put in the brackets, ex: "int a[];" + raise ValueError("Empty array size at line {}".format(self.lineno)) elif self.expectFlag == 1: + # at this point, we're either done with the array declaration, or we're assigning values to the array + if char == " ": + # we can still accept empty spaces before a key token is read pass + elif char == "=": + # we're assigning values to the array, so we move on to the next section self.expectFlag = 2 + elif char == ";": + # we're done with the declaration. self.expectFlag = 0 + self.currentVar = "" + self.currentType = "" self.state = 5 + else: + # we read a character that's not a semicolon, equal sign, or space raise ValueError("Invalid syntax at line {}".format(self.lineno)) elif self.expectFlag == 2: # array has an assignment immediately after its declaration + if char == " ": + # we can still accept empty spaces before a key token is read pass + elif char == "{": + # initial array declarations can only be assigned values in its entirety. ex: int a[2] = {1,2,3}; self.expectFlag = 3 + else: + # we can only accept an opening curly brace at this point raise ValueError("Invalid array value assignment at line {}".format(self.lineno)) elif self.expectFlag == 3: @@ -1005,30 +1042,45 @@ def state12(self, char, output): self.expectFlag = 4 else: + # otherwise we just append the character to the math formula self.mathFormula += char elif self.expectFlag == 4: + # here we're just waiting for a semi-colon since nothing else can be added at this point + if char == " ": + # we can still accept empty spaces before a key token is read pass + elif char == ";": + # end of statement, math expression is done, everything is set to go back to state 5. self.assignArrayValues(output) self.expectFlag = 0 self.mathFormula = "" self.arrayLength = "" + self.currentVar = "" + self.currentType = "" self.state = 5 + else: + # we read something other than a semi-colon or a space raise ValueError("Incorrect syntax at line {}".format(self.lineno)) def state13(self, char, output): """ - This state deals with assigning a value to a specific array index - :param char: - :param output: + This state deals with assigning a value to a specific array index. Here we assume the array has already been + declared, and we're simply assigning a value to a specific index. + :param char: char, Individual character read from input file + :param output: file, output file to write to :return: """ if self.expectFlag == 0: + # here we expect to read the index of the array + if char == "]": + # we're done reading characters for the index, so we check if it's a valid integer and within bounds + try: int(self.arrayLength) except ValueError as e: @@ -1040,16 +1092,22 @@ def state13(self, char, output): self.expectFlag = 1 else: + # otherwise, we're still reading the index (though realistically it'll probably just be 1-2 chars) self.arrayLength += char elif self.expectFlag == 1: + if char == " ": + # we can still accept empty spaces before a key token is read pass elif char == "=": self.expectFlag = 3 elif self.expectFlag == 3: + # here we keep reading input for the math formula until the end of the input ";" + if char == ";": + # we're done reading the math expression, so we call the mathparser functions and reset tokens = tokenize(self.mathFormula) postfix = infixToPostfix(tokens) evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], @@ -1061,7 +1119,9 @@ def state13(self, char, output): self.mathFormula = "" self.arrayLength = "" self.state = 5 + else: + # otherwise the char gets added to the math formula self.mathFormula += char def validName(self, name): @@ -1156,6 +1216,3 @@ def assignArrayValues(self, output): startingLocation += 4 - def arrayBoundsCheck(self): - pass - diff --git a/ToolChain/Compiler/CompilerInfo.md b/ToolChain/Compiler/CompilerInfo.md new file mode 100644 index 0000000..e3902d3 --- /dev/null +++ b/ToolChain/Compiler/CompilerInfo.md @@ -0,0 +1,40 @@ +# Spartacus Compiler Info +## Supported features +The compiler currently supports the following features: + +* Integer data type +* Variable assignment +* If statements +* While loops +* Return statements +* Arrays +* Function calls +* Multiple function declaration + +##Restrictions +The compiler currently has some restrictions: + +* Currently, arrays may only be indexed with hard coded integers, not variables +* If statement and while loop operands can't contain parentheses (we may opt to remove the requirement +for parentheses to fix this) +* Math expressions for variable assignment may only have a maximum of 6 operands. This isn't the real +maximum, as it varies depending on how many variables are used. This maximum is simply for consistency. +* Function calls can't be used in complex variable assignment. They can only be used if it's the only operand. + +- - - +This file is part of Spartacus project +Copyright (C) 2018 CSE + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \ No newline at end of file diff --git a/ToolChain/Compiler/Constants.py b/ToolChain/Compiler/Constants.py index 1fe6b17..4e575f6 100644 --- a/ToolChain/Compiler/Constants.py +++ b/ToolChain/Compiler/Constants.py @@ -88,7 +88,7 @@ re.escape(DIVISION)) ) -ARRAY_PATTERN = ".\[[0-9]\]" +ARRAY_PATTERN = "\w*\[[0-9]\]" diff --git a/ToolChain/Compiler/MathParser.py b/ToolChain/Compiler/MathParser.py index fb62cb3..27702a4 100644 --- a/ToolChain/Compiler/MathParser.py +++ b/ToolChain/Compiler/MathParser.py @@ -144,6 +144,7 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar pass elif re.match(ARRAY_PATTERN, operand1): + # Our variable is an array, and must be in the pattern "var[1]". We use regex to sort the information match = re.search(ARRAY_PATTERN, operand1) operands = match.group(0) operands = operands.split("[") @@ -189,6 +190,7 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar pass elif re.match(ARRAY_PATTERN, operand2): + # Our variable is an array, and must be in the pattern "var[1]". We use regex to sort the information match = re.search(ARRAY_PATTERN, operand2) operands = match.group(0) operands = operands.split("[") @@ -282,7 +284,9 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar operands[1] = operands[1].replace("]", "") if operands[0] not in arrayList: + # name of variable must be a valid array declaration raise ValueError("Invalid variable.") + if int(operands[1]) > int(arrayList[operands[0]] - 1): # Can't access an index that doesn't exist! raise ValueError("Array index out of bounds.") From 4c9fb656e5bea75e8c0fb8755ee4dfeecea6e2a2 Mon Sep 17 00:00:00 2001 From: Chris LeBlanc Date: Wed, 11 Jul 2018 15:43:05 -0300 Subject: [PATCH 03/10] Adding functionality to Compiler, pointers now supported --- ToolChain/Compiler/Compiler.py | 319 +++++++++++++++++++++++++++++ ToolChain/Compiler/CompilerInfo.md | 2 + ToolChain/Compiler/MathParser.py | 6 +- 3 files changed, 324 insertions(+), 3 deletions(-) diff --git a/ToolChain/Compiler/Compiler.py b/ToolChain/Compiler/Compiler.py index 4236755..2de0475 100644 --- a/ToolChain/Compiler/Compiler.py +++ b/ToolChain/Compiler/Compiler.py @@ -68,6 +68,7 @@ class Compiler: whileList = [] # List containing the names of while loops arrayList = {} # Dict containing variables that are arrays arrayLength = "" # Length of current array variable being evaluated + pointerList = [] # List containing variables that are pointers def __init__(self, inputFile=None, outputFile=None): """ @@ -172,6 +173,18 @@ def parse(self, char, output): elif self.state == 13: self.state13(char, output) + elif self.state == 14: + self.state14(char, output) + + elif self.state == 15: + self.state15(char, output) + + elif self.state == 16: + self.state16(char, output) + + elif self.state == 17: + self.state17(char, output) + def state0(self, char, output): """ First step in parsing data. At this step, we begin to read the method header. We expect to read the return data @@ -394,6 +407,10 @@ def state5(self, char, output): # whitespace or newline characters when not expecting a particular input pass + elif char == "*" and self.expectFlag == 0: + # assigning a value to a pointer, it should be the first thing we read + self.state = 17 + elif char == " " and self.expectFlag == 1: # we read a space, now we evaluate our indicator to determine what sort of operation we're dealing with if self.identifier == "if": @@ -441,20 +458,39 @@ def state5(self, char, output): self.functionCall = self.identifier self.identifier = "" + elif self.identifier in self.pointerList: + # identifier is a pointer + self.expectFlag = 2 + self.state = 14 + self.currentVar = self.identifier + self.identifier = "" + else: # identifier was not valid + print(self.identifier) raise ValueError("Error at line {}".format(self.lineno)) elif char == "=" and self.expectFlag == 1: # here we have a variable assignment. Variable must be already declared in this case if (self.identifier in self.varList) or self.identifier in self.methodList[self.currentMethod]: self.expectFlag = 0 + self.currentVar = self.identifier self.state = 7 + self.identifier = "" + + elif self.identifier in self.pointerList: + # identifier is a pointer + self.expectFlag = 0 + self.state = 15 + self.currentVar = self.identifier + self.identifier = "" + else: raise ValueError("Invalid assignment at line {}: must be valid variable".format(self.lineno)) elif char == "[" and self.expectFlag == 1: # this implies an already declared array + self.currentVar = self.identifier self.expectFlag = 0 self.identifier = "" self.state = 13 @@ -526,6 +562,10 @@ def state6(self, char, output): # ignore spaces/new line chars if we're not expecting any input in particular pass + elif char == "*" and self.expectFlag == 0: + # we're dealing with a new pointer variable in this case + self.state = 14 + elif char == " " and self.expectFlag == 1: # we have the variable name, now we move to the next phase to determine appropriate action self.expectFlag = 2 @@ -559,6 +599,7 @@ def state6(self, char, output): elif char == "=": # variable assignment. if the variable was not in the list, we add it if (self.currentVar not in self.varList) and self.currentVar not in self.methodList[self.currentMethod]: + # we're dealing with a new variable self.varList.append(self.currentVar) self.varLocation[self.currentVar] = self.memoryLocation self.memoryLocation += 4 @@ -606,6 +647,10 @@ def state7(self, char, output): # otherwise, the parentheses is just part of a normal math expression self.mathFormula += char + elif char == "*": + # we're dereferencing a pointer, so we need to handle this differently than a normal variable assignment + self.state = 16 + elif char == " " and self.expectFlag == 1: # we read a space, so we evaluate what the math formula holds thus far. if self.mathFormula in self.methodList: @@ -677,6 +722,15 @@ def state8(self, char, output): if char == ",": if self.functionArg in self.varList: output.write(" PUSH #" + str(self.varLocation[self.functionArg]) + "\n") + + elif re.match(ARRAY_PATTERN, self.functionArg): + # variable is an array index + match = re.search(ARRAY_PATTERN, self.functionArg) + operands = match.group(0) + operands = operands.split("[") + operands[1] = operands[1].replace("]", "") + output.write(" PUSH #" + str(self.varLocation[operands[0]] + int(operands[1]) * 4) + "\n") + else: raise ValueError("Invalid variable at line {}".format(self.lineno)) self.expectFlag = 1 @@ -753,6 +807,7 @@ def state8(self, char, output): self.functionCall = "" self.functionArg = "" self.mathFormula = "" + self.currentVar = "" self.argCount = 0 self.expectFlag = 0 @@ -1124,6 +1179,270 @@ def state13(self, char, output): # otherwise the char gets added to the math formula self.mathFormula += char + def state14(self, char, output): + """ + This method deals with initialization of pointers. Pointer variable can only be assigned a single variable, + with the & prefix. The variable must already be declared, and cannot be paired with any other operand. + :param char: char, Individual character read from input file + :param output: file, output file to write to + :return: + """ + + if self.expectFlag == 0 and char == " ": + pass + + elif self.expectFlag == 0: + self.currentVar += char + self.expectFlag = 1 + + elif self.expectFlag == 1: + if char in IGNORE_CHARS: + self.expectFlag = 2 + self.validName(self.currentVar) + elif char == "=": + self.state = 15 + self.validName(self.currentVar) + self.pointerList.append(self.currentVar) + self.varLocation[self.currentVar] = self.memoryLocation + self.memoryLocation += 4 + elif char == ";": + self.state = 5 + self.pointerList.append(self.currentVar) + self.varLocation[self.currentVar] = self.memoryLocation + self.memoryLocation += 4 + self.currentVar = "" + self.currentType = "" + self.expectFlag = 0 + + else: + self.currentVar += char + + elif self.expectFlag == 2: + if char in IGNORE_CHARS: + pass + elif char == "=": + self.state = 15 + self.expectFlag = 0 + self.pointerList.append(self.currentVar) + self.varLocation[self.currentVar] = self.memoryLocation + self.memoryLocation += 4 + + elif char == ";": + self.state = 5 + self.pointerList.append(self.currentVar) + self.varLocation[self.currentVar] = self.memoryLocation + self.memoryLocation += 4 + self.currentVar = "" + self.currentType = "" + self.expectFlag = 0 + + else: + raise ValueError("Incorrect syntax at line {}".format(self.lineno)) + + def state15(self, char, output): + """ + This method assigns a value to a pointer. The value must be a valid memory address (and must thus be referenced + by a valid variable using the & character). + :param char: char, Individual character read from input file + :param output: file, output file to write to + :return: + """ + + if self.expectFlag == 0: + if char in IGNORE_CHARS: + pass + elif char == "&": + self.expectFlag = 1 + else: + print(char) + raise ValueError("Incorrect syntax at line {}".format(self.lineno)) + + elif self.expectFlag == 1: + if char in IGNORE_CHARS: + if self.mathFormula in self.varList: + pass + elif self.mathFormula in self.methodList[self.currentMethod]: + pass + elif re.match(ARRAY_PATTERN, self.functionArg): + # variable is an array index + match = re.search(ARRAY_PATTERN, self.functionArg) + operands = match.group(0) + operands = operands.split("[") + operands[1] = operands[1].replace("]", "") + else: + raise ValueError("Invalid variable name at line {}".format(self.lineno)) + self.expectFlag = 2 + + elif char == ";": + if self.mathFormula in self.varList: + output.write(" MEMW [4] #" + str(self.varLocation[self.mathFormula]) + " #" + + str(self.varLocation[self.currentVar]) + "\n") + + elif self.mathFormula in self.methodList[self.currentMethod]: + output.write(" MOV $A2 $S2\n") + output.write(" ADD #" + str(self.methodList[self.currentMethod][self.currentVar][1] * 4) + + " $A2\n") + output.write(" MEMW [4] $A2 #" + str(self.varLocation[self.currentVar]) + "\n") + + elif re.match(ARRAY_PATTERN, self.functionArg): + # variable is an array index + match = re.search(ARRAY_PATTERN, self.functionArg) + operands = match.group(0) + operands = operands.split("[") + operands[1] = operands[1].replace("]", "") + output.write(" #" + str(self.varLocation[self.mathFormula] + operands[1] * 4) + " #" + + str(self.varLocation[self.currentVar]) + "\n") + + else: + raise ValueError("Invalid variable name at line {}".format(self.lineno)) + self.expectFlag = 0 + self.currentVar = "" + self.currentType = "" + self.mathFormula = "" + self.state = 5 + + else: + self.mathFormula += char + + elif self.expectFlag == 2: + if char in IGNORE_CHARS: + pass + elif char == ";": + if self.mathFormula in self.varList: + output.write(" MEMW [4] #" + str(self.varLocation[self.mathFormula]) + " #" + + str(self.varLocation[self.currentVar] + "\n")) + + elif self.mathFormula in self.methodList[self.currentMethod]: + output.write(" MOV $A2 $S2\n") + output.write(" ADD #" + str(self.methodList[self.currentMethod][self.currentVar][1] * 4) + + " $A2\n") + output.write(" MEMW [4] $A2 #" + str(self.varLocation[self.currentVar]) + "\n") + + elif re.match(ARRAY_PATTERN, self.functionArg): + # variable is an array index + match = re.search(ARRAY_PATTERN, self.functionArg) + operands = match.group(0) + operands = operands.split("[") + operands[1] = operands[1].replace("]", "") + output.write(" #" + str(self.varLocation[self.mathFormula] + operands[1] * 4) + " #" + + str(self.varLocation[self.currentVar]) + "\n") + + else: + raise ValueError("Invalid variable name at line {}".format(self.lineno)) + self.expectFlag = 0 + self.currentVar = "" + self.currentType = "" + self.mathFormula = "" + self.state = 5 + else: + raise ValueError("Syntax error at line {}".format(self.lineno)) + + def state16(self, char, output): + """ + This state deals with dereferencing a pointer. Any variable can be assigned a pointer dereference, but it must + stand alone as an operand. The memory location stored in the pointer must be a valid variable. It should be + noted that pointers cannot dereference other pointers. + :param char: char, Individual character read from input file + :param output: file, output file to write to + :return: + """ + + if char in IGNORE_CHARS and self.expectFlag == 0: + pass + + elif self.expectFlag == 0: + self.mathFormula += char + self.expectFlag = 1 + + elif self.expectFlag == 1: + if char == " ": + self.expectFlag = 2 + + elif char == ";": + if self.mathFormula not in self.pointerList: + raise ValueError("Invalid pointer variable at line {}".format(self.lineno)) + output.write(" MEMR [4] #" + str(self.varLocation[self.mathFormula]) + " $A\n") + output.write(" MEMR [4] $A $B\n") + output.write(" MEMW [4] $B #" + str(self.varLocation[self.currentVar]) + "\n") + self.expectFlag = 0 + self.currentVar = "" + self.currentType = "" + self.state = 5 + else: + self.mathFormula += char + + elif self.expectFlag == 2: + if char in IGNORE_CHARS: + pass + elif char == ";": + if self.mathFormula not in self.pointerList: + raise ValueError("Invalid pointer variable at line {}".format(self.lineno)) + output.write(" MEMR [4] #" + str(self.varLocation[self.mathFormula]) + " $A\n") + output.write(" MEMR [4] $A $B\n") + output.write(" MEMW [4] $B #" + str(self.varLocation[self.currentVar]) + "\n") + self.expectFlag = 0 + self.currentVar = "" + self.currentType = "" + self.state = 5 + + else: + raise ValueError("Incorrect syntax at line {}".format(self.lineno)) + + def state17(self, char, output): + """ + This method allows you to assign an immediate value to a pointer. You're risking accessing an invalid memory + location by doing this, however. The format should be "*var = int" + :param char: + :param output: + :return: + """ + + if char in IGNORE_CHARS and self.expectFlag == 0: + pass + + elif self.expectFlag == 0: + self.currentVar += char + self.expectFlag = 1 + + elif self.expectFlag == 1: + + if char == "=": + self.expectFlag = 3 + if self.currentVar not in self.pointerList: + raise ValueError("Invalid pointer variable at line {}".format(self.lineno)) + + elif char in IGNORE_CHARS: + self.expectFlag = 2 + + else: + self.currentVar += char + + elif self.expectFlag == 2: + if char in IGNORE_CHARS: + pass + elif char == "=": + self.expectFlag = 3 + if self.currentVar not in self.pointerList: + raise ValueError("Invalid pointer variable at line {}".format(self.lineno)) + else: + raise ValueError("Invalid syntax at line {}".format(self.lineno)) + + elif self.expectFlag == 3: + if char == ";": + tokens = tokenize(self.mathFormula) + postfix = infixToPostfix(tokens) + evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], + self.arrayList, output) + output.write(" MEMW [4] $A #" + str(self.varLocation[self.currentVar]) + "\n") + self.currentVar = "" + self.currentType = "" + self.expectFlag = 0 + self.mathFormula = "" + self.state = 5 + + else: + self.mathFormula += char + def validName(self, name): """ Verifies whether the variable's name contains only acceptable characters (A-Z, $, _, #) diff --git a/ToolChain/Compiler/CompilerInfo.md b/ToolChain/Compiler/CompilerInfo.md index e3902d3..e6617db 100644 --- a/ToolChain/Compiler/CompilerInfo.md +++ b/ToolChain/Compiler/CompilerInfo.md @@ -10,6 +10,7 @@ The compiler currently supports the following features: * Arrays * Function calls * Multiple function declaration +* Pointers ##Restrictions The compiler currently has some restrictions: @@ -20,6 +21,7 @@ for parentheses to fix this) * Math expressions for variable assignment may only have a maximum of 6 operands. This isn't the real maximum, as it varies depending on how many variables are used. This maximum is simply for consistency. * Function calls can't be used in complex variable assignment. They can only be used if it's the only operand. +* - - - This file is part of Spartacus project diff --git a/ToolChain/Compiler/MathParser.py b/ToolChain/Compiler/MathParser.py index 27702a4..4c8417d 100644 --- a/ToolChain/Compiler/MathParser.py +++ b/ToolChain/Compiler/MathParser.py @@ -143,7 +143,7 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar # This is simply a register that was pushed onto the stack. We can keep it as is pass - elif re.match(ARRAY_PATTERN, operand1): + elif re.match(ARRAY_PATTERN, str(operand1)): # Our variable is an array, and must be in the pattern "var[1]". We use regex to sort the information match = re.search(ARRAY_PATTERN, operand1) operands = match.group(0) @@ -189,7 +189,7 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar # This is simply a register that was pushed onto the stack. We can keep it as is pass - elif re.match(ARRAY_PATTERN, operand2): + elif re.match(ARRAY_PATTERN, str(operand2)): # Our variable is an array, and must be in the pattern "var[1]". We use regex to sort the information match = re.search(ARRAY_PATTERN, operand2) operands = match.group(0) @@ -276,7 +276,7 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar output.write(" ADD #" + str(int(methodVariables[result][1]) * 4) + " $B2\n") output.write(" MEMR [4] $B2 $A\n") - elif re.match(ARRAY_PATTERN, result): + elif re.match(ARRAY_PATTERN, str(result)): # our last operand is an array at a specific index. We find the index, and add the offset to the variable loc. match = re.search(ARRAY_PATTERN, result) operands = match.group(0) From c3d9a19677c04591fb8c87fb7672b097fbfc2a54 Mon Sep 17 00:00:00 2001 From: Chris LeBlanc Date: Fri, 13 Jul 2018 10:47:30 -0300 Subject: [PATCH 04/10] Added commenting, cleaned up code --- ToolChain/Compiler/Compiler.py | 97 ++++++++++++++++++++++++++++++++-- 1 file changed, 93 insertions(+), 4 deletions(-) diff --git a/ToolChain/Compiler/Compiler.py b/ToolChain/Compiler/Compiler.py index 2de0475..dcdc4c2 100644 --- a/ToolChain/Compiler/Compiler.py +++ b/ToolChain/Compiler/Compiler.py @@ -1189,23 +1189,32 @@ def state14(self, char, output): """ if self.expectFlag == 0 and char == " ": + # we can still accept empty spaces before a key token is read pass elif self.expectFlag == 0: + # here we read the first non-space character self.currentVar += char self.expectFlag = 1 elif self.expectFlag == 1: + # after reading the first non-space character, we read the name of the variable for the pointer + if char in IGNORE_CHARS: + # we can still accept empty spaces before a key token is read self.expectFlag = 2 self.validName(self.currentVar) + elif char == "=": + # equals sign means we're assigning a value to the pointer (memory address) self.state = 15 self.validName(self.currentVar) self.pointerList.append(self.currentVar) self.varLocation[self.currentVar] = self.memoryLocation self.memoryLocation += 4 + elif char == ";": + # end of declaration, we simple allocate memory location without giving a value self.state = 5 self.pointerList.append(self.currentVar) self.varLocation[self.currentVar] = self.memoryLocation @@ -1215,12 +1224,18 @@ def state14(self, char, output): self.expectFlag = 0 else: + # otherwise we assume we're still reading the name of the variable being declared self.currentVar += char elif self.expectFlag == 2: + # We read a space, so we expect either another space or newline, or a valid operator + if char in IGNORE_CHARS: + # we can still accept empty spaces before a key token is read pass + elif char == "=": + # equals sign means we're assigning a value to the pointer (memory address) self.state = 15 self.expectFlag = 0 self.pointerList.append(self.currentVar) @@ -1228,6 +1243,7 @@ def state14(self, char, output): self.memoryLocation += 4 elif char == ";": + # end of declaration, we simple allocate memory location without giving a value self.state = 5 self.pointerList.append(self.currentVar) self.varLocation[self.currentVar] = self.memoryLocation @@ -1237,6 +1253,7 @@ def state14(self, char, output): self.expectFlag = 0 else: + # we read something other than "=" or ";" in this context, which would be incorrect raise ValueError("Incorrect syntax at line {}".format(self.lineno)) def state15(self, char, output): @@ -1249,43 +1266,67 @@ def state15(self, char, output): """ if self.expectFlag == 0: + # we can read spaces or new line chars, but the first non-empty character MUST be an ampersand "&" + if char in IGNORE_CHARS: pass + elif char == "&": self.expectFlag = 1 + else: - print(char) + # We read something other than space, new line, or ampersand raise ValueError("Incorrect syntax at line {}".format(self.lineno)) elif self.expectFlag == 1: + # here we read the name of the variable whose address we're assigning to the pointer + if char in IGNORE_CHARS: + # if we read a space, we evaluate the variable name to see if it exists + if self.mathFormula in self.varList: + # variable is in regular variable list, we're good and can move on to flag 2 pass + elif self.mathFormula in self.methodList[self.currentMethod]: + # variable is argument passed into current method, so variable is valid pass + elif re.match(ARRAY_PATTERN, self.functionArg): - # variable is an array index + # variable is an array index. we parse the variable name and index to determine if they're valid match = re.search(ARRAY_PATTERN, self.functionArg) operands = match.group(0) operands = operands.split("[") operands[1] = operands[1].replace("]", "") + + if operands[0] not in self.arrayList: + raise ValueError("Invalid array variable at line {}".format(self.lineno)) + if operands[1] > self.arrayList[self.currentVar] - 1: + raise ValueError("Array index out of bounds at line {}".format(self.lineno)) + else: + # variable didn't match any pattern or was not present in any valid list raise ValueError("Invalid variable name at line {}".format(self.lineno)) + self.expectFlag = 2 elif char == ";": + # we immediately read the end of the statement, so we evaluate the variable and write the correct output + if self.mathFormula in self.varList: + # variable is in regular list, so we assign its memory location to the pointer output.write(" MEMW [4] #" + str(self.varLocation[self.mathFormula]) + " #" + str(self.varLocation[self.currentVar]) + "\n") elif self.mathFormula in self.methodList[self.currentMethod]: + # variable is passed in as argument, we just write the pointer register's value at the right index output.write(" MOV $A2 $S2\n") output.write(" ADD #" + str(self.methodList[self.currentMethod][self.currentVar][1] * 4) + " $A2\n") output.write(" MEMW [4] $A2 #" + str(self.varLocation[self.currentVar]) + "\n") elif re.match(ARRAY_PATTERN, self.functionArg): - # variable is an array index + # variable is an array index, we get the memory location at index 0 and add the correct offset match = re.search(ARRAY_PATTERN, self.functionArg) operands = match.group(0) operands = operands.split("[") @@ -1309,17 +1350,19 @@ def state15(self, char, output): pass elif char == ";": if self.mathFormula in self.varList: + # variable is in regular list, so we assign its memory location to the pointer output.write(" MEMW [4] #" + str(self.varLocation[self.mathFormula]) + " #" + str(self.varLocation[self.currentVar] + "\n")) elif self.mathFormula in self.methodList[self.currentMethod]: + # variable is passed in as argument, we just write the pointer register's value at the right index output.write(" MOV $A2 $S2\n") output.write(" ADD #" + str(self.methodList[self.currentMethod][self.currentVar][1] * 4) + " $A2\n") output.write(" MEMW [4] $A2 #" + str(self.varLocation[self.currentVar]) + "\n") elif re.match(ARRAY_PATTERN, self.functionArg): - # variable is an array index + # variable is an array index, we get the memory location at index 0 and add the correct offset match = re.search(ARRAY_PATTERN, self.functionArg) operands = match.group(0) operands = operands.split("[") @@ -1328,13 +1371,17 @@ def state15(self, char, output): str(self.varLocation[self.currentVar]) + "\n") else: + # we already did the check in flag 1, so this technically shouldn't execute and something went wrong raise ValueError("Invalid variable name at line {}".format(self.lineno)) + self.expectFlag = 0 self.currentVar = "" self.currentType = "" self.mathFormula = "" self.state = 5 + else: + # we're expecting the end of the statement ";", so anything else in invalid raise ValueError("Syntax error at line {}".format(self.lineno)) def state16(self, char, output): @@ -1348,44 +1395,63 @@ def state16(self, char, output): """ if char in IGNORE_CHARS and self.expectFlag == 0: + # we can still accept empty spaces before a key token is read pass elif self.expectFlag == 0: + # we read our first non space character, so we append to mathformula to determine our variable name self.mathFormula += char self.expectFlag = 1 elif self.expectFlag == 1: + # here we read the rest of the variable name until we reach a space character or semi colon + if char == " ": + # we read a space, so we're no longer reading variable name self.expectFlag = 2 elif char == ";": + # end of statement, the variable must be a valid pointer. We grab the value at the memory location + # stored inside the pointer + if self.mathFormula not in self.pointerList: raise ValueError("Invalid pointer variable at line {}".format(self.lineno)) output.write(" MEMR [4] #" + str(self.varLocation[self.mathFormula]) + " $A\n") output.write(" MEMR [4] $A $B\n") output.write(" MEMW [4] $B #" + str(self.varLocation[self.currentVar]) + "\n") + self.expectFlag = 0 self.currentVar = "" self.currentType = "" self.state = 5 + else: + # otherwise, we're still reading the pointer variable's name self.mathFormula += char elif self.expectFlag == 2: + # at this point, we can keep reading spaces but the next non-space character must be a semi-colon + if char in IGNORE_CHARS: pass + elif char == ";": + # end of statement, the variable must be a valid pointer. We grab the value at the memory location + # stored inside the pointer + if self.mathFormula not in self.pointerList: raise ValueError("Invalid pointer variable at line {}".format(self.lineno)) output.write(" MEMR [4] #" + str(self.varLocation[self.mathFormula]) + " $A\n") output.write(" MEMR [4] $A $B\n") output.write(" MEMW [4] $B #" + str(self.varLocation[self.currentVar]) + "\n") + self.expectFlag = 0 self.currentVar = "" self.currentType = "" self.state = 5 else: + # we didn't read a semi colon or space character, so the syntax is incorrect raise ValueError("Incorrect syntax at line {}".format(self.lineno)) def state17(self, char, output): @@ -1398,42 +1464,64 @@ def state17(self, char, output): """ if char in IGNORE_CHARS and self.expectFlag == 0: + # we can still accept empty spaces before a key token is read pass elif self.expectFlag == 0: + # first non-space character is read, so we start reading the variable's name self.currentVar += char self.expectFlag = 1 elif self.expectFlag == 1: + # here we read the pointer variable's name if char == "=": + # assignment operator means we're done reading the name. We check if it's in the list self.expectFlag = 3 if self.currentVar not in self.pointerList: raise ValueError("Invalid pointer variable at line {}".format(self.lineno)) elif char in IGNORE_CHARS: + # we read a space character, so we're done reading the variable name self.expectFlag = 2 else: + # otherwise we keep reading the variable's name self.currentVar += char elif self.expectFlag == 2: + # we expect to read an assignment operator, since no other operations are valid for *pointer + if char in IGNORE_CHARS: + # we can still read space characters pass + elif char == "=": + # assignment operator means we're done reading the name. We check if it's in the list self.expectFlag = 3 if self.currentVar not in self.pointerList: raise ValueError("Invalid pointer variable at line {}".format(self.lineno)) + else: + # there are no other valid characters we can read, so the syntax is incorrect raise ValueError("Invalid syntax at line {}".format(self.lineno)) elif self.expectFlag == 3: + # here we read the value that will be assigned to the pointer + if char == ";": + # end of statement, we evaluate the mathformula and assign the value to the pointer variable + + if len(self.mathFormula) == 0: + # can't have an empty expression (ex: *pointer = ;) + raise ValueError("Empty operand at line {}".format(self.lineno)) + tokens = tokenize(self.mathFormula) postfix = infixToPostfix(tokens) evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], self.arrayList, output) output.write(" MEMW [4] $A #" + str(self.varLocation[self.currentVar]) + "\n") + self.currentVar = "" self.currentType = "" self.expectFlag = 0 @@ -1441,6 +1529,7 @@ def state17(self, char, output): self.state = 5 else: + # otherwise we keep appending to our math formula self.mathFormula += char def validName(self, name): From 0a47d3e3c14363218369351da134070493be50d6 Mon Sep 17 00:00:00 2001 From: Chris LeBlanc Date: Mon, 16 Jul 2018 11:29:50 -0300 Subject: [PATCH 05/10] Added functionality for char variables --- ToolChain/Compiler/Compiler.py | 176 +++++++++++++++++++++++++---- ToolChain/Compiler/CompilerInfo.md | 3 +- ToolChain/Compiler/Constants.py | 4 +- 3 files changed, 162 insertions(+), 21 deletions(-) diff --git a/ToolChain/Compiler/Compiler.py b/ToolChain/Compiler/Compiler.py index dcdc4c2..def93c0 100644 --- a/ToolChain/Compiler/Compiler.py +++ b/ToolChain/Compiler/Compiler.py @@ -24,7 +24,9 @@ IGNORE_CHARS, \ BOOLEAN_OPERATORS, \ ALLOWED_CHARS, \ - ARRAY_PATTERN + ARRAY_PATTERN, \ + SINGLE_QUOTE, \ + DOUBLE_QUOTE from ToolChain.Compiler.MathParser import tokenize, \ infixToPostfix, \ @@ -42,6 +44,13 @@ class Compiler: + """ + This is a compiler that converts C code to Capua ASM. It currently supports a small subset of the C programming + language, and functionality will progressively be added. It makes use of a finite state machine type model, using + states to determine the next expected input, and how to handle the information. Consult the documentation for + details on supported features. + and limitations. + """ state = 0 # "States" are used to determine our next path for processing the C file currentVar = "" # Name of variable being evaluated @@ -69,6 +78,8 @@ class Compiler: arrayList = {} # Dict containing variables that are arrays arrayLength = "" # Length of current array variable being evaluated pointerList = [] # List containing variables that are pointers + charList = [] # List containing variables that are chars + quoteFlag = "" # Keeps track of whether we used single or double quote to declare a char variable def __init__(self, inputFile=None, outputFile=None): """ @@ -185,6 +196,12 @@ def parse(self, char, output): elif self.state == 17: self.state17(char, output) + elif self.state == 18: + self.state18(char, output) + + elif self.state == 19: + self.state19(char, output) + def state0(self, char, output): """ First step in parsing data. At this step, we begin to read the method header. We expect to read the return data @@ -382,22 +399,8 @@ def state4(self, char, output): def state5(self, char, output): """ Initial evaluation of a line within the body of a method. We read the input and concatenate to identifier - string. Once we read a key token we check various cases to see where we need to go with out identifier: - space: - -valid data type - -if statement (we later check for opening parentheses) - -variable (already declared) - -while loop - -return statement - "=": - -variable assignment only - "(": - -if statement - -while loop - -function call (e.g. add(a,b)) - "}" - -end of method, loop, or if statement - + string. Once we read a key token we check various cases to see where we need to go with out identifier. This is + where most features can be implemented later. :param char: char, Individual character read from input file :param output: file, output file to write to :return: @@ -448,7 +451,13 @@ def state5(self, char, output): # identifier is a data type, new variable declaration self.currentType = self.identifier self.identifier = "" - self.state = 6 + + if self.currentType == "int": + # new integer variable declaration goes to state 6 + self.state = 6 + elif self.currentType == "char": + # new char variable declaration goes to state 18 + self.state = 18 self.expectFlag = 0 elif self.identifier in self.methodList: @@ -465,6 +474,12 @@ def state5(self, char, output): self.currentVar = self.identifier self.identifier = "" + elif self.identifier in self.charList: + self.expectFlag = 0 + self.currentVar = self.identifier + self.identifier = "" + self.state = 19 + else: # identifier was not valid print(self.identifier) @@ -472,6 +487,7 @@ def state5(self, char, output): elif char == "=" and self.expectFlag == 1: # here we have a variable assignment. Variable must be already declared in this case + if (self.identifier in self.varList) or self.identifier in self.methodList[self.currentMethod]: self.expectFlag = 0 self.currentVar = self.identifier @@ -497,6 +513,7 @@ def state5(self, char, output): elif char == "(" and self.expectFlag == 1: # immediately after the identifier, we read an opening parentheses. Here we cover all possible cases + if self.identifier in self.methodList: # identifier is a function call self.state = 8 @@ -525,6 +542,7 @@ def state5(self, char, output): elif char == "}": # end of method, if statement, or while loop + if self.nestedFlag == 0: # if we aren't in any while/if statements, this is the end of our method self.state = 0 @@ -592,6 +610,7 @@ def state6(self, char, output): elif self.expectFlag == 2: # We reach this step if we have the variable name and we read at least one space + if char in IGNORE_CHARS: # we may keep reading spaces/ new line until we reach a relevant token pass @@ -639,6 +658,7 @@ def state7(self, char, output): elif char == "(" and self.expectFlag == 1: # we read an opening parentheses, it could either be a function call or part of a normal math expression + if self.mathFormula in self.methodList: # if we have a function call for a variable assignment, we jump to state 8 which deals with functions self.functionCall = self.mathFormula @@ -653,6 +673,7 @@ def state7(self, char, output): elif char == " " and self.expectFlag == 1: # we read a space, so we evaluate what the math formula holds thus far. + if self.mathFormula in self.methodList: # if we have a function call for a variable assignment, we jump to state 8 which deals with functions self.functionCall = self.mathFormula @@ -1147,7 +1168,7 @@ def state13(self, char, output): self.expectFlag = 1 else: - # otherwise, we're still reading the index (though realistically it'll probably just be 1-2 chars) + # otherwise, we're still reading the index self.arrayLength += char elif self.expectFlag == 1: @@ -1157,6 +1178,8 @@ def state13(self, char, output): pass elif char == "=": self.expectFlag = 3 + else: + raise ValueError("Syntax error at line {}".format(self.lineno)) elif self.expectFlag == 3: # here we keep reading input for the math formula until the end of the input ";" @@ -1532,6 +1555,121 @@ def state17(self, char, output): # otherwise we keep appending to our math formula self.mathFormula += char + def state18(self, char, output): + """ + This state takes in the name of the char variable, then appends it to the char list. + :param char: + :param output: + :return: + """ + + if char in IGNORE_CHARS and self.expectFlag == 0: + pass + elif self.expectFlag == 0: + self.currentVar += char + self.expectFlag = 1 + + elif self.expectFlag == 1: + + if char in IGNORE_CHARS: + self.expectFlag = 2 + + elif char == ";": + self.charList.append(self.currentVar) + self.varLocation[self.currentVar] = self.memoryLocation + self.memoryLocation += 4 + self.currentVar = "" + self.state = 5 + self.expectFlag = 0 + self.currentType = "" + + elif char == "=": + self.charList.append(self.currentVar) + self.varLocation[self.currentVar] = self.memoryLocation + self.memoryLocation += 4 + self.state = 19 + self.expectFlag = 0 + + else: + self.currentVar += char + + elif self.expectFlag == 2: + if char in IGNORE_CHARS: + pass + + elif char == ";": + self.charList.append(self.currentVar) + self.varLocation[self.currentVar] = self.memoryLocation + self.memoryLocation += 4 + self.currentVar = "" + self.state = 5 + self.expectFlag = 0 + self.currentType = "" + + elif char == "=": + self.charList.append(self.currentVar) + self.varLocation[self.currentVar] = self.memoryLocation + self.memoryLocation += 4 + self.state = 19 + self.expectFlag = 0 + + else: + raise ValueError("Invalid syntax at line {}".format(self.lineno)) + + def state19(self, char, output): + """ + This method accepts a value for a char variable. It should be noted that chars will always be a single + character. The char must be surrounded by either single quotes or double quotes. These must match, meaning we + can't use a single quote and double quote at the same time. + :param char: + :param output: + :return: + """ + + if char in IGNORE_CHARS and self.expectFlag == 0: + pass + + elif self.expectFlag == 0: + # here we read the value expected for the char variable. This must be a single or double quote + self.expectFlag = 1 + if char == SINGLE_QUOTE: + self.quoteFlag = SINGLE_QUOTE + elif char == DOUBLE_QUOTE: + self.quoteFlag = DOUBLE_QUOTE + else: + raise ValueError("Incorrect syntax at line {}. Char should begin with \" or \'".format(self.lineno)) + + elif self.expectFlag == 1: + if char in IGNORE_CHARS: + pass + else: + self.mathFormula = char + self.expectFlag = 2 + + elif self.expectFlag == 2: + + if char == SINGLE_QUOTE and self.quoteFlag == SINGLE_QUOTE: + self.expectFlag = 3 + elif char == DOUBLE_QUOTE and self.quoteFlag == DOUBLE_QUOTE: + self.expectFlag = 3 + else: + raise ValueError("Incorrect syntax at line {}".format(self.lineno)) + + elif self.expectFlag == 3: + + if char in IGNORE_CHARS: + pass + elif char == ";": + output.write(" MEMW [4] #" + str(ord(self.mathFormula)) + " #" + str(self.varLocation[self.currentVar]) + "\n") + self.currentVar = "" + self.state = 5 + self.expectFlag = 0 + self.currentType = "" + self.mathFormula = "" + self.quoteFlag = "" + else: + raise ValueError("Incorrect syntax at line {}".format(self.lineno)) + def validName(self, name): """ Verifies whether the variable's name contains only acceptable characters (A-Z, $, _, #) diff --git a/ToolChain/Compiler/CompilerInfo.md b/ToolChain/Compiler/CompilerInfo.md index e6617db..1ac783e 100644 --- a/ToolChain/Compiler/CompilerInfo.md +++ b/ToolChain/Compiler/CompilerInfo.md @@ -3,6 +3,7 @@ The compiler currently supports the following features: * Integer data type +* Char data type * Variable assignment * If statements * While loops @@ -21,7 +22,7 @@ for parentheses to fix this) * Math expressions for variable assignment may only have a maximum of 6 operands. This isn't the real maximum, as it varies depending on how many variables are used. This maximum is simply for consistency. * Function calls can't be used in complex variable assignment. They can only be used if it's the only operand. -* + - - - This file is part of Spartacus project diff --git a/ToolChain/Compiler/Constants.py b/ToolChain/Compiler/Constants.py index 4e575f6..020c53f 100644 --- a/ToolChain/Compiler/Constants.py +++ b/ToolChain/Compiler/Constants.py @@ -30,11 +30,13 @@ import re -ACCEPTED_TYPES = ["int"] +ACCEPTED_TYPES = ["int", "char"] OPERATORS = ['+', '-', '*', '/'] BOOLEAN_OPERATORS = ["<", ">", "="] IGNORE_CHARS = [" ", "\n"] REGISTER_NAMES = ["A", "B", "C", "D", "E", "F", "G"] +SINGLE_QUOTE = "\'" +DOUBLE_QUOTE = "\"" L_PARENTHESES = '(' From 8f1b861a4fd9a388a1fc817c02e697a97bd1acf4 Mon Sep 17 00:00:00 2001 From: Chris LeBlanc Date: Tue, 17 Jul 2018 13:16:33 -0300 Subject: [PATCH 06/10] Fixed some bugs regarding chars and arrays --- ToolChain/Compiler/Compiler.py | 27 +++++++++++++++++++++++---- ToolChain/Compiler/CompilerInfo.md | 6 +++--- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/ToolChain/Compiler/Compiler.py b/ToolChain/Compiler/Compiler.py index def93c0..b17ca7e 100644 --- a/ToolChain/Compiler/Compiler.py +++ b/ToolChain/Compiler/Compiler.py @@ -48,8 +48,7 @@ class Compiler: This is a compiler that converts C code to Capua ASM. It currently supports a small subset of the C programming language, and functionality will progressively be added. It makes use of a finite state machine type model, using states to determine the next expected input, and how to handle the information. Consult the documentation for - details on supported features. - and limitations. + details on supported features and limitations. """ state = 0 # "States" are used to determine our next path for processing the C file @@ -126,6 +125,9 @@ def readFile(self, inputFile, outputFile): output.write("end:\n") + if self.currentMethod != "": + raise ValueError("Missing closing curly brace for end of method/if/while.") + try: file.close() output.close() @@ -475,7 +477,8 @@ def state5(self, char, output): self.identifier = "" elif self.identifier in self.charList: - self.expectFlag = 0 + # identifier is a char variable + self.expectFlag = 4 self.currentVar = self.identifier self.identifier = "" self.state = 19 @@ -501,6 +504,13 @@ def state5(self, char, output): self.currentVar = self.identifier self.identifier = "" + elif self.identifier in self.charList: + # identifier is a char variable + self.expectFlag = 4 + self.currentVar = self.identifier + self.identifier = "" + self.state = 19 + else: raise ValueError("Invalid assignment at line {}: must be valid variable".format(self.lineno)) @@ -1637,6 +1647,7 @@ def state19(self, char, output): elif char == DOUBLE_QUOTE: self.quoteFlag = DOUBLE_QUOTE else: + print(char) raise ValueError("Incorrect syntax at line {}. Char should begin with \" or \'".format(self.lineno)) elif self.expectFlag == 1: @@ -1670,6 +1681,14 @@ def state19(self, char, output): else: raise ValueError("Incorrect syntax at line {}".format(self.lineno)) + elif self.expectFlag == 4: + if char in IGNORE_CHARS: + pass + elif char == "=": + self.expectFlag = 0 + else: + raise ValueError("Incorrect syntax at line {}".format(self.lineno)) + def validName(self, name): """ Verifies whether the variable's name contains only acceptable characters (A-Z, $, _, #) @@ -1685,7 +1704,7 @@ def verifyVariable(self): """ Method checks whether the variable is already present in the list. If so, we raise an error since we can't have duplicate variable names. Otherwise, we add it to the list, assign it a memory location, increment the memory - location counter, and increase the total variable count. + location counter, and increase the total variable count. This is used for state 6 to clean up the code :return: """ diff --git a/ToolChain/Compiler/CompilerInfo.md b/ToolChain/Compiler/CompilerInfo.md index 1ac783e..09923c3 100644 --- a/ToolChain/Compiler/CompilerInfo.md +++ b/ToolChain/Compiler/CompilerInfo.md @@ -16,13 +16,13 @@ The compiler currently supports the following features: ##Restrictions The compiler currently has some restrictions: -* Currently, arrays may only be indexed with hard coded integers, not variables +* Currently, arrays may only be indexed with hard coded integers, not variables. * If statement and while loop operands can't contain parentheses (we may opt to remove the requirement -for parentheses to fix this) +for parentheses to fix this). * Math expressions for variable assignment may only have a maximum of 6 operands. This isn't the real maximum, as it varies depending on how many variables are used. This maximum is simply for consistency. * Function calls can't be used in complex variable assignment. They can only be used if it's the only operand. - +* Pointers can't dereference other pointers. - - - This file is part of Spartacus project From 56f1787a7c785e4833ee19dd50dcc080c388fec0 Mon Sep 17 00:00:00 2001 From: Chris LeBlanc Date: Tue, 17 Jul 2018 13:19:41 -0300 Subject: [PATCH 07/10] Misplaced version info in test_parser.py --- ToolChain/Assembler/Parser/test_Parser.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/ToolChain/Assembler/Parser/test_Parser.py b/ToolChain/Assembler/Parser/test_Parser.py index 92d26fc..fb8b31a 100644 --- a/ToolChain/Assembler/Parser/test_Parser.py +++ b/ToolChain/Assembler/Parser/test_Parser.py @@ -20,15 +20,6 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """ -__author__ = "CSE" -__copyright__ = "Copyright 2018, CSE" -__credits__ = ["CSE"] -__license__ = "GPL" -__version__ = "3.0" -__maintainer__ = "CSE" -__status__ = "Dev" - - from ToolChain.Assembler.Parser.Parser import Parser from ToolChain.Assembler.Assembler import Assembler from ToolChain.Assembler.Constants import STATE0, \ @@ -47,6 +38,13 @@ import struct import os +__author__ = "CSE" +__copyright__ = "Copyright 2018, CSE" +__credits__ = ["CSE"] +__license__ = "GPL" +__version__ = "3.0" +__maintainer__ = "CSE" +__status__ = "Dev" class TestParser(unittest.TestCase): From d194d699a0345b8e5d0f1ea51265409d56cc5046 Mon Sep 17 00:00:00 2001 From: Chris LeBlanc Date: Thu, 19 Jul 2018 08:34:57 -0300 Subject: [PATCH 08/10] Adding binary operator functionality for if/while --- ToolChain/Compiler/Compiler.py | 222 ++++++++++++++++++++++--------- ToolChain/Compiler/Constants.py | 1 + ToolChain/Compiler/MathParser.py | 2 + 3 files changed, 161 insertions(+), 64 deletions(-) diff --git a/ToolChain/Compiler/Compiler.py b/ToolChain/Compiler/Compiler.py index b17ca7e..0eb5ab8 100644 --- a/ToolChain/Compiler/Compiler.py +++ b/ToolChain/Compiler/Compiler.py @@ -26,7 +26,8 @@ ALLOWED_CHARS, \ ARRAY_PATTERN, \ SINGLE_QUOTE, \ - DOUBLE_QUOTE + DOUBLE_QUOTE, \ + BINARY_OPERATORS from ToolChain.Compiler.MathParser import tokenize, \ infixToPostfix, \ @@ -69,6 +70,9 @@ class Compiler: ifOperator = "" # Holds the logical operator between two sides of an if boolean expression nestedFlag = 0 # Lets the compiler know if we're in an if statement ifLabel = 0 # For jump instructions, we need a unique label for every if statement + binaryLabel = 0 # For unique labels when dealing with binary operators in if/while statements + binaryList = [] # To pop/push labels when dealing with binary operators in if/while statements + binaryOperator = "" # Holds the current binary operator being used in if/while statements lineno = 0 # Line number for printing error messages functionArg = "" # Used to read a function call's arguments whileLabel = 0 # For while loops, we need a unique label @@ -485,7 +489,6 @@ def state5(self, char, output): else: # identifier was not valid - print(self.identifier) raise ValueError("Error at line {}".format(self.lineno)) elif char == "=" and self.expectFlag == 1: @@ -879,19 +882,11 @@ def state9(self, char, output): elif self.expectFlag == 1: # we're expecting to read a part of the left hand side's expression. if we read an operator, we evaluate # the expression and move on to reading the right hand side's expression. - if char in BOOLEAN_OPERATORS: - if self.mathFormula == "": - # empty expression (e.g. if ( a > ) ) - raise ValueError("Empty expression in if statement at line {}".format(self.lineno)) - tokens = tokenize(self.mathFormula) - postfix = infixToPostfix(tokens) - evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], - self.arrayList, output) - self.ifOperator = self.convertOperatorToFlags(char) - self.expectFlag = 2 - self.mathFormula = "" - output.write(" MOV $A $C2\n") + if char in BOOLEAN_OPERATORS: + # if we read an operator, we may need to read another operator, so we go to flag 4 + self.ifOperator = char + self.expectFlag = 4 else: # otherwise we keep appending to our left hand side's math formula string @@ -899,19 +894,33 @@ def state9(self, char, output): elif self.expectFlag == 2: # here we evaluate the right hand side of an if expression. - if char == ")": - # closing parentheses, this indicates the end of our expression + if char == "{": + # opening curly brace, end of entire expression if self.mathFormula == "": # empty expression (e.g. if ( a > ) ) raise ValueError("Empty expression in if statement at line {}".format(self.lineno)) + # check the math expression to see if it ends with a closing parentheses (needed for if/while) + self.checkForClosingParentheses() tokens = tokenize(self.mathFormula) postfix = infixToPostfix(tokens) evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], self.arrayList, output) - self.expectFlag = 2 self.mathFormula = "" output.write(" MOV $A $D2\n") + output.write(" CMP $D2 $C2\n") + output.write(" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") + self.labelList.append(" L" + str(self.ifLabel)) + self.ifLabel += 1 + self.state = 5 + self.expectFlag = 0 + + if len(self.binaryList) > 0: + output.write("B" + self.binaryList.pop() + ":\n") + + elif char in BINARY_OPERATORS: + # in this case we've got some more expressions to evaluate. + self.binaryOperator = char self.expectFlag = 3 else: @@ -919,25 +928,45 @@ def state9(self, char, output): self.mathFormula += char elif self.expectFlag == 3: - # we've finished reading our if statement, now we wait for an opening curly brace - - if char in IGNORE_CHARS: - # we can still read whitespace or newline chars meanwhile - pass + tokens = tokenize(self.mathFormula) + postfix = infixToPostfix(tokens) + evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], + self.arrayList, output) + if char == self.binaryOperator: - elif char == "{": - # we have our opening curly brace, we can go back to state 5 and begin evaluating a new line + output.write(" MOV $A $D2\n") output.write(" CMP $D2 $C2\n") - output.write(" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") - self.labelList.append(" L" + str(self.ifLabel)) - self.ifLabel += 1 - self.state = 5 - self.expectFlag = 0 + + if char == "|": + self.reverseFlag() + output.write(" JMP " + self.ifOperator + " B" + str(self.binaryLabel) + "\n") + + if self.binaryLabel not in self.binaryList: + self.binaryList.append(str(self.binaryLabel)) + self.binaryLabel += 1 + else: + output.write(" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") self.mathFormula = "" + self.expectFlag = 1 else: - # we read something other than whitespace or an opening curly brace, this is invalid - raise ValueError("Syntax error at line {}".format(self.lineno)) + raise ValueError("Mismatch in binary operator at line {}".format(self.lineno)) + + elif self.expectFlag == 4: + # here we expect to read another piece of the operator. if not, we just add the char to the RHS's formula + if char in BOOLEAN_OPERATORS: + self.ifOperator += char + else: + temp = char + + tokens = tokenize(self.mathFormula) + postfix = infixToPostfix(tokens) + evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], + self.arrayList, output) + self.ifOperator = self.convertOperatorToFlags(self.ifOperator) + self.expectFlag = 2 + self.mathFormula = temp + output.write(" MOV $A $C2\n") def state10(self, char, output): """ @@ -962,39 +991,45 @@ def state10(self, char, output): elif self.expectFlag == 1: # we're expecting to read a part of the left hand side's expression. if we read an operator, we evaluate # the expression and move on to reading the right hand side's expression. - if char in BOOLEAN_OPERATORS: - if self.mathFormula == "": - # empty expression (e.g. if ( a > ) ) - raise ValueError("Empty expression in if statement at line {}".format(self.lineno)) - tokens = tokenize(self.mathFormula) - postfix = infixToPostfix(tokens) - evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], - self.arrayList, output) - self.ifOperator = self.convertOperatorToFlags(char) - self.expectFlag = 2 - self.mathFormula = "" - output.write(" MOV $A $C2\n") + if char in BOOLEAN_OPERATORS: + # if we read an operator, we may need to read another operator, so we go to flag 4 + self.ifOperator = char + self.expectFlag = 4 else: # otherwise we keep appending to our left hand side's math formula string self.mathFormula += char elif self.expectFlag == 2: - # here we evaluate the right hand side of a while loop. - if char == ")": - # closing parentheses, this indicates the end of our expression + # here we evaluate the right hand side of an if expression. + if char == "{": + # opening curly brace, end of entire expression if self.mathFormula == "": # empty expression (e.g. if ( a > ) ) raise ValueError("Empty expression in if statement at line {}".format(self.lineno)) + # check the math expression to see if it ends with a closing parentheses (needed for if/while) + self.checkForClosingParentheses() tokens = tokenize(self.mathFormula) postfix = infixToPostfix(tokens) evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], self.arrayList, output) - self.expectFlag = 2 self.mathFormula = "" output.write(" MOV $A $D2\n") + output.write(" CMP $D2 $C2\n") + output.write(" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") + self.labelList.append(" L" + str(self.ifLabel)) + self.ifLabel += 1 + self.state = 5 + self.expectFlag = 0 + + if len(self.binaryList) > 0: + output.write("B" + self.binaryList.pop() + ":\n") + + elif char in BINARY_OPERATORS: + # in this case we've got some more expressions to evaluate. + self.binaryOperator = char self.expectFlag = 3 else: @@ -1002,25 +1037,45 @@ def state10(self, char, output): self.mathFormula += char elif self.expectFlag == 3: - # we've finished reading our if statement, now we wait for an opening curly brace - - if char in IGNORE_CHARS: - # we can still read whitespace or newline chars meanwhile - pass + tokens = tokenize(self.mathFormula) + postfix = infixToPostfix(tokens) + evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], + self.arrayList, output) + if char == self.binaryOperator: - elif char == "{": - # we have our opening curly brace, we can go back to state 5 and begin evaluating a new line + output.write(" MOV $A $D2\n") output.write(" CMP $D2 $C2\n") - output.write(" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") - self.labelList.append(" L" + str(self.ifLabel)) - self.ifLabel += 1 - self.state = 5 - self.expectFlag = 0 + + if char == "|": + self.reverseFlag() + output.write(" JMP " + self.ifOperator + " B" + str(self.binaryLabel) + "\n") + + if self.binaryLabel not in self.binaryList: + self.binaryList.append(str(self.binaryLabel)) + self.binaryLabel += 1 + else: + output.write(" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") self.mathFormula = "" + self.expectFlag = 1 else: - # we read something other than whitespace or an opening curly brace, this is invalid - raise ValueError("Syntax error at line {}".format(self.lineno)) + raise ValueError("Mismatch in binary operator at line {}".format(self.lineno)) + + elif self.expectFlag == 4: + # here we expect to read another piece of the operator. if not, we just add the char to the RHS's formula + if char in BOOLEAN_OPERATORS: + self.ifOperator += char + else: + temp = char + + tokens = tokenize(self.mathFormula) + postfix = infixToPostfix(tokens) + evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], + self.arrayList, output) + self.ifOperator = self.convertOperatorToFlags(self.ifOperator) + self.expectFlag = 2 + self.mathFormula = temp + output.write(" MOV $A $C2\n") def state11(self, char, output): """ @@ -1647,7 +1702,6 @@ def state19(self, char, output): elif char == DOUBLE_QUOTE: self.quoteFlag = DOUBLE_QUOTE else: - print(char) raise ValueError("Incorrect syntax at line {}. Char should begin with \" or \'".format(self.lineno)) elif self.expectFlag == 1: @@ -1746,7 +1800,7 @@ def convertOperatorToFlags(self, char): flag = "" elif char == "<=": flag = "" - elif char == "=": + elif char == "==": flag = "" elif char == ">": flag = "" @@ -1768,7 +1822,6 @@ def assignArrayValues(self, output): startingLocation = self.varLocation[self.currentVar] if len(list) != self.arrayList[self.currentVar]: - print(list) raise ValueError("Incorrect number of values for array assignment at line {}".format(self.lineno)) for element in list: @@ -1780,4 +1833,45 @@ def assignArrayValues(self, output): output.write(" MEMW [4] #" + str(element) + " #" + str(startingLocation) + "\n") startingLocation += 4 + def checkForClosingParentheses(self): + """ + This method checks if the math expression ends with a closing parentheses. This is necessary for if statements + and while loops. + :return: + """ + + found = False + + length = len(self.mathFormula) - 1 + while not found and length > 0: + + if self.mathFormula[length] in IGNORE_CHARS: + # reading in reverse, we can still read spaces until the first relevant token + pass + elif self.mathFormula[length] == ")": + # closing parentheses means this expression may be valid. function has served its purpose and we break + found = True + self.mathFormula = self.mathFormula[0:length] + else: + # we read something that is not a closing parentheses, thus the expression is not valid + raise ValueError("Missing closing parentheses in statement at line {}".format(self.lineno)) + length -= 1 + + def reverseFlag(self): + """ + Function takes a flag for || binary operator and reverses the logic. This is needed because if an OR binary + operation is true, we can just jump straight to the expression within an if statement or while loop. + :return: + """ + + if self.ifOperator == "": + self.ifOperator = "" + elif self.ifOperator == "": + self.ifOperator = "" + elif self.ifOperator == "": + self.ifOperator = "" + elif self.ifOperator == "": + self.ifOperator = "" + elif self.ifOperator == "": + self.ifOperator = "" diff --git a/ToolChain/Compiler/Constants.py b/ToolChain/Compiler/Constants.py index 020c53f..9dbe687 100644 --- a/ToolChain/Compiler/Constants.py +++ b/ToolChain/Compiler/Constants.py @@ -32,6 +32,7 @@ ACCEPTED_TYPES = ["int", "char"] OPERATORS = ['+', '-', '*', '/'] +BINARY_OPERATORS = ["|", "&"] BOOLEAN_OPERATORS = ["<", ">", "="] IGNORE_CHARS = [" ", "\n"] REGISTER_NAMES = ["A", "B", "C", "D", "E", "F", "G"] diff --git a/ToolChain/Compiler/MathParser.py b/ToolChain/Compiler/MathParser.py index 4c8417d..32e409a 100644 --- a/ToolChain/Compiler/MathParser.py +++ b/ToolChain/Compiler/MathParser.py @@ -257,6 +257,7 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar if len(stack) != 1: # If the stack has more than or less than one element, the expression is incorrect. + print(stack) raise ValueError("invalid expression.") # our result is then "saved" into register A. The assignment can now be completed. @@ -299,4 +300,5 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar isinstance(int(result), int) output.write(" MOV #" + str(result) + " $A\n") except ValueError as e: + print(result) raise ValueError("Invalid mathematical expression") From 22b573743db0fb954c87d010c7b4f605674a8359 Mon Sep 17 00:00:00 2001 From: Chris LeBlanc Date: Thu, 19 Jul 2018 13:38:13 -0300 Subject: [PATCH 09/10] Cleaned up code, removed redundancies with helper functions --- ToolChain/Compiler/Compiler.py | 432 ++++++++++++++--------------- ToolChain/Compiler/CompilerInfo.md | 3 +- 2 files changed, 214 insertions(+), 221 deletions(-) diff --git a/ToolChain/Compiler/Compiler.py b/ToolChain/Compiler/Compiler.py index 0eb5ab8..192e0ad 100644 --- a/ToolChain/Compiler/Compiler.py +++ b/ToolChain/Compiler/Compiler.py @@ -149,66 +149,66 @@ def parse(self, char, output): """ if self.state == 0: - self.state0(char, output) + self.parseFunctionReturnType(char, output) elif self.state == 1: - self.state1(char, output) + self.parseFunctionName(char, output) elif self.state == 2: - self.state2(char, output) + self.parseFunctionArgumentType(char, output) elif self.state == 3: - self.state3(char, output) + self.parseFunctionArgumentName(char, output) elif self.state == 4: - self.state4(char, output) + self.countFunctionArguments(char, output) elif self.state == 5: - self.state5(char, output) + self.parsePrimaryIdentifier(char, output) elif self.state == 6: - self.state6(char, output) + self.parseIntegerVariableName(char, output) elif self.state == 7: - self.state7(char, output) + self.beginIntegerAssignment(char, output) elif self.state == 8: - self.state8(char, output) + self.parseFunctionCall(char, output) elif self.state == 9: - self.state9(char, output) + self.parseIfStatement(char, output) elif self.state == 10: - self.state10(char, output) + self.parseWhileLoop(char, output) elif self.state == 11: - self.state11(char, output) + self.parseReturnStatement(char, output) elif self.state == 12: - self.state12(char, output) + self.parseArrayDeclaration(char, output) elif self.state == 13: - self.state13(char, output) + self.assignValueAtArrayIndex(char, output) elif self.state == 14: - self.state14(char, output) + self.parsePointerInitialization(char, output) elif self.state == 15: - self.state15(char, output) + self.assignPointerValue(char, output) elif self.state == 16: - self.state16(char, output) + self.dereferencePointer(char, output) elif self.state == 17: - self.state17(char, output) + self.assignImmediateValueToPointer(char, output) elif self.state == 18: - self.state18(char, output) + self.parseCharVariable(char, output) elif self.state == 19: - self.state19(char, output) + self.assignCharValue(char, output) - def state0(self, char, output): + def parseFunctionReturnType(self, char, output): """ First step in parsing data. At this step, we begin to read the method header. We expect to read the return data type. @@ -218,18 +218,24 @@ def state0(self, char, output): """ if char in IGNORE_CHARS and self.expectFlag == 0: + # white space or new line before any relevant information pass + elif char in IGNORE_CHARS and self.expectFlag == 1: + # if we read a space while reading the method's return type, then we assume we're done if self.currentType in ACCEPTED_TYPES: self.state = 1 self.expectFlag = 0 else: + # the return type read is invalid in this case raise ValueError("Incorrect return type for method declaration at line {}.".format(self.lineno)) + else: + # we simply append the char to the current type self.currentType += char self.expectFlag = 1 - def state1(self, char, output): + def parseFunctionName(self, char, output): """ Here we expect to read the method's name. Once we reach a space or an opening parentheses, we add the method to the methodlist along with its data type. @@ -245,24 +251,21 @@ def state1(self, char, output): elif char == " " and self.expectFlag == 1: # we have our method name, we expect an opening parentheses some time after the first space - self.currentVar = "" - self.currentType = "" + self.resetGlobalValues("01000000000000") self.state = 2 elif char == "(": # We read the opening parentheses after the method name, no need to check for it later self.methodList[self.currentMethod] = {"retType": self.currentType} output.write(self.currentMethod + ":\n") - self.currentVar = "" - self.currentType = "" + self.resetGlobalValues("11000000000000") self.state = 2 - self.expectFlag = 0 else: self.currentMethod += char self.expectFlag = 1 - def state2(self, char, output): + def parseFunctionArgumentType(self, char, output): """ Deals with an argument's data type. This is the first step in determining the tuple: arg data type/arg name. :param char: char, Individual character read from input file @@ -279,6 +282,7 @@ def state2(self, char, output): self.expectFlag = 0 self.methodList[self.currentMethod] = {"retType": self.currentType} output.write(self.currentMethod + ":\n") + if self.currentMethod == "main": output.write(" MOV end $S\n") @@ -312,7 +316,7 @@ def state2(self, char, output): # append the character to the current type being read. self.currentType += char - def state3(self, char, output): + def parseFunctionArgumentName(self, char, output): """ This state reads the name of a method argument. Once we have the full name, we couple it with the data type read in state2, and we add it to the method's dict of variables. If we read a comma, we know we're ready to @@ -368,12 +372,12 @@ def state3(self, char, output): self.currentVar += char self.expectFlag = 1 - def state4(self, char, output): + def countFunctionArguments(self, char, output): """ - In this state, we've read all the arguments of a method declaration. Now we simply expect to read the opening + In this method, we've read all the arguments of a method declaration. Now we simply expect to read the opening curly brace "{" to signify the opening body of the method. Here, we also write the appropriate casm instructions to the output file. The stack pointer gets moved to "end" if it's the main method, and the S2 pointer must point - to the first argument pushed to the stack (if any). + to the first argument pushed to the stack (if any). This offset is determined by the number of arguments counted :param char: char, Individual character read from input file :param output: file, output file to write to :return: @@ -390,9 +394,12 @@ def state4(self, char, output): # in the body of another method to ensure the correct amount of variables are passed in. self.methodList[self.currentMethod]["totalVars"] = self.argCount self.state = 5 + if self.currentMethod == "main": + # the main method would technically be the bottom of the stack frame, so we initialize the stack pointer output.write(" MOV end $S\n") else: + # we offset the S2 pointer by the amount of arguments passed into the method if self.argCount > 0: output.write(" MOV $S $S2\n") output.write(" SUB #" + str(self.argCount * 4 + 4) + " $S2\n") @@ -402,7 +409,7 @@ def state4(self, char, output): else: raise ValueError("Syntax error, expecting \"{\", got {}".format(char)) - def state5(self, char, output): + def parsePrimaryIdentifier(self, char, output): """ Initial evaluation of a line within the body of a method. We read the input and concatenate to identifier string. Once we read a key token we check various cases to see where we need to go with out identifier. This is @@ -425,8 +432,7 @@ def state5(self, char, output): if self.identifier == "if": # identifier is an if statement self.state = 9 - self.identifier = "" - self.expectFlag = 0 + self.resetGlobalValues("10000100000000") self.nestedFlag += 1 elif self.identifier == "while": @@ -437,26 +443,23 @@ def state5(self, char, output): self.whileLabel += 1 self.nestedFlag += 1 self.whileFlag += 1 - self.identifier = "" - self.expectFlag = 0 + self.resetGlobalValues("10000100000000") elif self.identifier == "return": # identifier is a return statement - self.expectFlag = 0 + self.resetGlobalValues("10000100000000") self.state = 11 - self.identifier = "" elif (self.identifier in self.varList) or self.identifier in self.methodList[self.currentMethod]: # the identifier is a variable that has already been declared self.currentVar = self.identifier - self.identifier = "" + self.resetGlobalValues("00000100000000") self.state = 6 self.expectFlag = 2 elif self.identifier in ACCEPTED_TYPES: # identifier is a data type, new variable declaration self.currentType = self.identifier - self.identifier = "" if self.currentType == "int": # new integer variable declaration goes to state 6 @@ -464,28 +467,28 @@ def state5(self, char, output): elif self.currentType == "char": # new char variable declaration goes to state 18 self.state = 18 - self.expectFlag = 0 + + self.resetGlobalValues("10000100000000") elif self.identifier in self.methodList: # identifier is a function call - self.expectFlag = 0 self.state = 8 self.functionCall = self.identifier - self.identifier = "" + self.resetGlobalValues("10000100000000") elif self.identifier in self.pointerList: # identifier is a pointer self.expectFlag = 2 self.state = 14 self.currentVar = self.identifier - self.identifier = "" + self.resetGlobalValues("00000100000000") elif self.identifier in self.charList: # identifier is a char variable self.expectFlag = 4 self.currentVar = self.identifier - self.identifier = "" self.state = 19 + self.resetGlobalValues("00000100000000") else: # identifier was not valid @@ -495,24 +498,22 @@ def state5(self, char, output): # here we have a variable assignment. Variable must be already declared in this case if (self.identifier in self.varList) or self.identifier in self.methodList[self.currentMethod]: - self.expectFlag = 0 self.currentVar = self.identifier self.state = 7 - self.identifier = "" + self.resetGlobalValues("10000100000000") elif self.identifier in self.pointerList: # identifier is a pointer - self.expectFlag = 0 self.state = 15 self.currentVar = self.identifier - self.identifier = "" + self.resetGlobalValues("10000100000000") elif self.identifier in self.charList: # identifier is a char variable self.expectFlag = 4 self.currentVar = self.identifier - self.identifier = "" self.state = 19 + self.resetGlobalValues("00000100000000") else: raise ValueError("Invalid assignment at line {}: must be valid variable".format(self.lineno)) @@ -520,9 +521,8 @@ def state5(self, char, output): elif char == "[" and self.expectFlag == 1: # this implies an already declared array self.currentVar = self.identifier - self.expectFlag = 0 - self.identifier = "" self.state = 13 + self.resetGlobalValues("10000100000000") elif char == "(" and self.expectFlag == 1: # immediately after the identifier, we read an opening parentheses. Here we cover all possible cases @@ -531,13 +531,13 @@ def state5(self, char, output): # identifier is a function call self.state = 8 self.functionCall = self.identifier - self.identifier = "" + self.resetGlobalValues("00000100000000") elif self.identifier == "if": # identifier is an if statement self.state = 9 - self.identifier = "" self.nestedFlag += 1 + self.resetGlobalValues("00000100000000") elif self.identifier == "while": # identifier is a while loop indicator @@ -547,7 +547,7 @@ def state5(self, char, output): self.whileLabel += 1 self.whileFlag += 1 self.nestedFlag += 1 - self.identifier = "" + self.resetGlobalValues("00000100000000") else: # identifier was not valid @@ -559,12 +559,9 @@ def state5(self, char, output): if self.nestedFlag == 0: # if we aren't in any while/if statements, this is the end of our method self.state = 0 - self.currentMethod = "" - self.argCount = 0 - self.currentVar = "" - self.currentType = "" self.varList.clear() self.varLocation.clear() + self.resetGlobalValues("01110100000010") else: # otherwise, we print the appropriate instructions to end the while loop or if statement @@ -580,7 +577,7 @@ def state5(self, char, output): self.identifier += char self.expectFlag = 1 - def state6(self, char, output): + def parseIntegerVariableName(self, char, output): """ Initial variable name declaration. We already have the data type, so now we read its name until we get a relevant token to determine what to do with the variable. @@ -605,21 +602,19 @@ def state6(self, char, output): # we have the variable name, and we see that an assignment will happen self.verifyVariable() self.state = 7 - self.expectFlag = 0 + self.resetGlobalValues("10000000000000") elif char == ";" and self.expectFlag == 1: # end of variable declaration. we assign its memory location and add it to the variable list self.verifyVariable() - self.currentVar = "" - self.currentType = "" self.state = 5 - self.expectFlag = 0 + self.resetGlobalValues("11100000000000") elif char == "[" and self.expectFlag == 1: # Here we're ready to declare a new array self.validName(self.currentVar) self.state = 12 - self.expectFlag = 0 + self.resetGlobalValues("10000000000000") elif self.expectFlag == 2: # We reach this step if we have the variable name and we read at least one space @@ -639,15 +634,13 @@ def state6(self, char, output): self.validName(self.currentVar) self.state = 7 - self.expectFlag = 0 + self.resetGlobalValues("10000000000000") elif char == ";": # simple declaration (e.g. int a;), we add it to the variable list and allocate a memory location self.verifyVariable() - self.currentVar = "" - self.currentType = "" self.state = 5 - self.expectFlag = 0 + self.resetGlobalValues("11100000000000") else: raise ValueError("Incorrect syntax at line {}".format(self.lineno)) @@ -657,7 +650,7 @@ def state6(self, char, output): self.currentVar += char self.expectFlag = 1 - def state7(self, char, output): + def beginIntegerAssignment(self, char, output): """ Begins variable assignment. This could either be a math formula, or a function call :param char: char, Individual character read from input file @@ -697,10 +690,7 @@ def state7(self, char, output): elif char == ";": # End of our math statement. We may begin the evaluation and assign the result to the current variable - tokens = tokenize(self.mathFormula) - postfix = infixToPostfix(tokens) - evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], - self.arrayList, output) + self.evaluateMathExpression(output) if self.currentVar in self.methodList[self.currentMethod]: # The variable is an argument passed into the function. We use the stack pointer to fetch its @@ -716,17 +706,14 @@ def state7(self, char, output): # now we reset everything self.state = 5 - self.mathFormula = "" - self.currentType = "" - self.currentVar = "" - self.expectFlag = 0 + self.resetGlobalValues("11101000000000") else: # if we don't read anything else of interest, we simply append the character to the math formula string self.mathFormula += char self.expectFlag = 1 - def state8(self, char, output): + def parseFunctionCall(self, char, output): """ This deals with a function call. This may be on its own line or part of a variable assignment. :param char: char, Individual character read from input file @@ -759,16 +746,14 @@ def state8(self, char, output): elif re.match(ARRAY_PATTERN, self.functionArg): # variable is an array index - match = re.search(ARRAY_PATTERN, self.functionArg) - operands = match.group(0) - operands = operands.split("[") - operands[1] = operands[1].replace("]", "") + operands = self.parseArrayPattern() output.write(" PUSH #" + str(self.varLocation[operands[0]] + int(operands[1]) * 4) + "\n") else: raise ValueError("Invalid variable at line {}".format(self.lineno)) + self.expectFlag = 1 - self.functionArg = "" + self.resetGlobalValues("00000001000000") self.argCount += 1 elif char in IGNORE_CHARS: @@ -782,10 +767,7 @@ def state8(self, char, output): elif re.match(ARRAY_PATTERN, self.functionArg): # variable is an array index - match = re.search(ARRAY_PATTERN, self.functionArg) - operands = match.group(0) - operands = operands.split("[") - operands[1] = operands[1].replace("]", "") + operands = self.parseArrayPattern() output.write(" PUSH #" + str(self.varLocation[operands[0]] + int(operands[1]) * 4) + "\n") else: @@ -808,12 +790,13 @@ def state8(self, char, output): raise ValueError("Invalid variable at line {}".format(self.lineno)) self.expectFlag = 1 - self.functionArg = "" + self.resetGlobalValues("00000001000000") self.argCount += 1 elif char in IGNORE_CHARS: # we can keep ignoring whitespace/new line until we read a correct token pass + elif char == ")": # end of arguments. we now expect ";" to end the statement if self.functionArg in self.varList: @@ -837,13 +820,9 @@ def state8(self, char, output): if self.currentVar != "": output.write(" MEMW [4] $A #" + str(self.varLocation[self.currentVar]) + "\n") + output.write(" SUB #" + str(self.argCount * 4) + " $S\n") - self.functionCall = "" - self.functionArg = "" - self.mathFormula = "" - self.currentVar = "" - self.argCount = 0 - self.expectFlag = 0 + self.resetGlobalValues("10101011000010") else: raise ValueError("# of arguments don't match that of function call at line {}".format(self.lineno)) @@ -860,12 +839,11 @@ def state8(self, char, output): # after a valid function call, we don't read an opening parentheses or whitespace. this is invalid syntax raise ValueError("Invalid syntax after function call at line {}".format(self.lineno)) - def state9(self, char, output): + def parseIfStatement(self, char, output): """ This state will deal with if statements. We begin by evaluating the left hand side and placing the result in register C2. Then we evaluate the right hand side and place in register D2. It's important to note that at this time, while loops don't support expressions that contain additional parentheses. - TODO: figure out how to handle multiple parentheses :param char: char, Individual character read from input file :param output: file, output file to write to :return: @@ -902,18 +880,16 @@ def state9(self, char, output): # check the math expression to see if it ends with a closing parentheses (needed for if/while) self.checkForClosingParentheses() - tokens = tokenize(self.mathFormula) - postfix = infixToPostfix(tokens) - evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], - self.arrayList, output) - self.mathFormula = "" + self.evaluateMathExpression(output) + output.write(" MOV $A $D2\n") output.write(" CMP $D2 $C2\n") output.write(" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") + self.labelList.append(" L" + str(self.ifLabel)) self.ifLabel += 1 self.state = 5 - self.expectFlag = 0 + self.resetGlobalValues("10001000110000") if len(self.binaryList) > 0: output.write("B" + self.binaryList.pop() + ":\n") @@ -928,25 +904,24 @@ def state9(self, char, output): self.mathFormula += char elif self.expectFlag == 3: - tokens = tokenize(self.mathFormula) - postfix = infixToPostfix(tokens) - evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], - self.arrayList, output) + self.evaluateMathExpression(output) if char == self.binaryOperator: output.write(" MOV $A $D2\n") output.write(" CMP $D2 $C2\n") if char == "|": + # or binary operator causes the conditional flag to flip, since if true, we can skip immediately self.reverseFlag() output.write(" JMP " + self.ifOperator + " B" + str(self.binaryLabel) + "\n") if self.binaryLabel not in self.binaryList: self.binaryList.append(str(self.binaryLabel)) self.binaryLabel += 1 + else: output.write(" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") - self.mathFormula = "" + self.resetGlobalValues("00001000000000") self.expectFlag = 1 else: @@ -956,25 +931,22 @@ def state9(self, char, output): # here we expect to read another piece of the operator. if not, we just add the char to the RHS's formula if char in BOOLEAN_OPERATORS: self.ifOperator += char + temp = "" else: temp = char - tokens = tokenize(self.mathFormula) - postfix = infixToPostfix(tokens) - evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], - self.arrayList, output) + self.evaluateMathExpression(output) self.ifOperator = self.convertOperatorToFlags(self.ifOperator) self.expectFlag = 2 self.mathFormula = temp output.write(" MOV $A $C2\n") - def state10(self, char, output): + def parseWhileLoop(self, char, output): """ This state will deal with while loops. We begin by evaluating the left hand side and placing the result in register C2. Then we evaluate the right hand side and place in register D2. When writing the assembly code, we do exactly as an if statement; however, at the end of the while loop, we need to have a jump condition to go - back to the beginning of the loop. It's important to note that at this time, while loops don't support - expressions that contain additional parentheses. TODO: figure out how to handle multiple parentheses + back to the beginning of the loop. :param char: char, Individual character read from input file :param output: file, output file to write to :return: @@ -1011,18 +983,16 @@ def state10(self, char, output): # check the math expression to see if it ends with a closing parentheses (needed for if/while) self.checkForClosingParentheses() - tokens = tokenize(self.mathFormula) - postfix = infixToPostfix(tokens) - evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], - self.arrayList, output) - self.mathFormula = "" + self.evaluateMathExpression(output) + output.write(" MOV $A $D2\n") output.write(" CMP $D2 $C2\n") output.write(" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") + self.labelList.append(" L" + str(self.ifLabel)) self.ifLabel += 1 self.state = 5 - self.expectFlag = 0 + self.resetGlobalValues("10001000110000") if len(self.binaryList) > 0: output.write("B" + self.binaryList.pop() + ":\n") @@ -1037,10 +1007,7 @@ def state10(self, char, output): self.mathFormula += char elif self.expectFlag == 3: - tokens = tokenize(self.mathFormula) - postfix = infixToPostfix(tokens) - evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], - self.arrayList, output) + self.evaluateMathExpression(output) if char == self.binaryOperator: output.write(" MOV $A $D2\n") @@ -1055,8 +1022,8 @@ def state10(self, char, output): self.binaryLabel += 1 else: output.write(" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") - self.mathFormula = "" self.expectFlag = 1 + self.resetGlobalValues("00001000000000") else: raise ValueError("Mismatch in binary operator at line {}".format(self.lineno)) @@ -1065,19 +1032,17 @@ def state10(self, char, output): # here we expect to read another piece of the operator. if not, we just add the char to the RHS's formula if char in BOOLEAN_OPERATORS: self.ifOperator += char + temp = "" else: temp = char - tokens = tokenize(self.mathFormula) - postfix = infixToPostfix(tokens) - evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], - self.arrayList, output) + self.evaluateMathExpression(output) self.ifOperator = self.convertOperatorToFlags(self.ifOperator) self.expectFlag = 2 self.mathFormula = temp output.write(" MOV $A $C2\n") - def state11(self, char, output): + def parseReturnStatement(self, char, output): """ This state deals with return statements. When returning values, we follow the cdecl calling convention. Variables in function calls are pushed onto the stack, and the values returned are placed into register A. @@ -1093,13 +1058,9 @@ def state11(self, char, output): elif char == ";": # End of our math statement. - tokens = tokenize(self.mathFormula) - postfix = infixToPostfix(tokens) - evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], - self.arrayList, output) - self.expectFlag = 0 - self.mathFormula = "" + self.evaluateMathExpression(output) self.state = 5 + self.resetGlobalValues("10001000000000") output.write(" RET\n") else: @@ -1107,7 +1068,7 @@ def state11(self, char, output): self.expectFlag = 1 self.mathFormula += char - def state12(self, char, output): + def parseArrayDeclaration(self, char, output): """ This state handles the declaration of an array. We check to see if any character before closing bracket "]" is a valid integer for the size of the array. We also verify if there is a value assignment following the array @@ -1134,7 +1095,7 @@ def state12(self, char, output): self.arrayList[self.currentVar] = int(self.arrayLength) self.varLocation[self.currentVar] = self.memoryLocation self.memoryLocation += int(self.arrayLength) * 4 - self.arrayLength = "" + self.resetGlobalValues("00000000001000") self.expectFlag = 1 else: # we have a case where nothing was put in the brackets, ex: "int a[];" @@ -1153,10 +1114,8 @@ def state12(self, char, output): elif char == ";": # we're done with the declaration. - self.expectFlag = 0 - self.currentVar = "" - self.currentType = "" self.state = 5 + self.resetGlobalValues("11100000000000") else: # we read a character that's not a semicolon, equal sign, or space @@ -1196,18 +1155,14 @@ def state12(self, char, output): elif char == ";": # end of statement, math expression is done, everything is set to go back to state 5. self.assignArrayValues(output) - self.expectFlag = 0 - self.mathFormula = "" - self.arrayLength = "" - self.currentVar = "" - self.currentType = "" self.state = 5 + self.resetGlobalValues("11101000001000") else: # we read something other than a semi-colon or a space raise ValueError("Incorrect syntax at line {}".format(self.lineno)) - def state13(self, char, output): + def assignValueAtArrayIndex(self, char, output): """ This state deals with assigning a value to a specific array index. Here we assume the array has already been declared, and we're simply assigning a value to a specific index. @@ -1251,23 +1206,17 @@ def state13(self, char, output): if char == ";": # we're done reading the math expression, so we call the mathparser functions and reset - tokens = tokenize(self.mathFormula) - postfix = infixToPostfix(tokens) - evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], - self.arrayList, output) + self.evaluateMathExpression(output) output.write(" MEMW [4] $A #" + str(self.varLocation[self.currentVar] + int(self.arrayLength) * 4) + "\n") - - self.expectFlag = 0 - self.mathFormula = "" - self.arrayLength = "" self.state = 5 + self.resetGlobalValues("10001000001000") else: # otherwise the char gets added to the math formula self.mathFormula += char - def state14(self, char, output): + def parsePointerInitialization(self, char, output): """ This method deals with initialization of pointers. Pointer variable can only be assigned a single variable, with the & prefix. The variable must already be declared, and cannot be paired with any other operand. @@ -1307,9 +1256,7 @@ def state14(self, char, output): self.pointerList.append(self.currentVar) self.varLocation[self.currentVar] = self.memoryLocation self.memoryLocation += 4 - self.currentVar = "" - self.currentType = "" - self.expectFlag = 0 + self.resetGlobalValues("11100000000000") else: # otherwise we assume we're still reading the name of the variable being declared @@ -1325,10 +1272,10 @@ def state14(self, char, output): elif char == "=": # equals sign means we're assigning a value to the pointer (memory address) self.state = 15 - self.expectFlag = 0 self.pointerList.append(self.currentVar) self.varLocation[self.currentVar] = self.memoryLocation self.memoryLocation += 4 + self.resetGlobalValues("10000000000000") elif char == ";": # end of declaration, we simple allocate memory location without giving a value @@ -1336,15 +1283,13 @@ def state14(self, char, output): self.pointerList.append(self.currentVar) self.varLocation[self.currentVar] = self.memoryLocation self.memoryLocation += 4 - self.currentVar = "" - self.currentType = "" - self.expectFlag = 0 + self.resetGlobalValues("11100000000000") else: # we read something other than "=" or ";" in this context, which would be incorrect raise ValueError("Incorrect syntax at line {}".format(self.lineno)) - def state15(self, char, output): + def assignPointerValue(self, char, output): """ This method assigns a value to a pointer. The value must be a valid memory address (and must thus be referenced by a valid variable using the & character). @@ -1382,10 +1327,7 @@ def state15(self, char, output): elif re.match(ARRAY_PATTERN, self.functionArg): # variable is an array index. we parse the variable name and index to determine if they're valid - match = re.search(ARRAY_PATTERN, self.functionArg) - operands = match.group(0) - operands = operands.split("[") - operands[1] = operands[1].replace("]", "") + operands = self.parseArrayPattern() if operands[0] not in self.arrayList: raise ValueError("Invalid array variable at line {}".format(self.lineno)) @@ -1415,20 +1357,14 @@ def state15(self, char, output): elif re.match(ARRAY_PATTERN, self.functionArg): # variable is an array index, we get the memory location at index 0 and add the correct offset - match = re.search(ARRAY_PATTERN, self.functionArg) - operands = match.group(0) - operands = operands.split("[") - operands[1] = operands[1].replace("]", "") + operands = self.parseArrayPattern() output.write(" #" + str(self.varLocation[self.mathFormula] + operands[1] * 4) + " #" + str(self.varLocation[self.currentVar]) + "\n") else: raise ValueError("Invalid variable name at line {}".format(self.lineno)) - self.expectFlag = 0 - self.currentVar = "" - self.currentType = "" - self.mathFormula = "" self.state = 5 + self.resetGlobalValues("111010000000000") else: self.mathFormula += char @@ -1451,10 +1387,7 @@ def state15(self, char, output): elif re.match(ARRAY_PATTERN, self.functionArg): # variable is an array index, we get the memory location at index 0 and add the correct offset - match = re.search(ARRAY_PATTERN, self.functionArg) - operands = match.group(0) - operands = operands.split("[") - operands[1] = operands[1].replace("]", "") + operands = self.parseArrayPattern() output.write(" #" + str(self.varLocation[self.mathFormula] + operands[1] * 4) + " #" + str(self.varLocation[self.currentVar]) + "\n") @@ -1462,17 +1395,14 @@ def state15(self, char, output): # we already did the check in flag 1, so this technically shouldn't execute and something went wrong raise ValueError("Invalid variable name at line {}".format(self.lineno)) - self.expectFlag = 0 - self.currentVar = "" - self.currentType = "" - self.mathFormula = "" self.state = 5 + self.resetGlobalValues("111010000000000") else: # we're expecting the end of the statement ";", so anything else in invalid raise ValueError("Syntax error at line {}".format(self.lineno)) - def state16(self, char, output): + def dereferencePointer(self, char, output): """ This state deals with dereferencing a pointer. Any variable can be assigned a pointer dereference, but it must stand alone as an operand. The memory location stored in the pointer must be a valid variable. It should be @@ -1508,10 +1438,8 @@ def state16(self, char, output): output.write(" MEMR [4] $A $B\n") output.write(" MEMW [4] $B #" + str(self.varLocation[self.currentVar]) + "\n") - self.expectFlag = 0 - self.currentVar = "" - self.currentType = "" self.state = 5 + self.resetGlobalValues("111000000000000") else: # otherwise, we're still reading the pointer variable's name @@ -1533,16 +1461,14 @@ def state16(self, char, output): output.write(" MEMR [4] $A $B\n") output.write(" MEMW [4] $B #" + str(self.varLocation[self.currentVar]) + "\n") - self.expectFlag = 0 - self.currentVar = "" - self.currentType = "" self.state = 5 + self.resetGlobalValues("111000000000000") else: # we didn't read a semi colon or space character, so the syntax is incorrect raise ValueError("Incorrect syntax at line {}".format(self.lineno)) - def state17(self, char, output): + def assignImmediateValueToPointer(self, char, output): """ This method allows you to assign an immediate value to a pointer. You're risking accessing an invalid memory location by doing this, however. The format should be "*var = int" @@ -1604,23 +1530,17 @@ def state17(self, char, output): # can't have an empty expression (ex: *pointer = ;) raise ValueError("Empty operand at line {}".format(self.lineno)) - tokens = tokenize(self.mathFormula) - postfix = infixToPostfix(tokens) - evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], - self.arrayList, output) + self.evaluateMathExpression(output) output.write(" MEMW [4] $A #" + str(self.varLocation[self.currentVar]) + "\n") - self.currentVar = "" - self.currentType = "" - self.expectFlag = 0 - self.mathFormula = "" self.state = 5 + self.resetGlobalValues("111010000000000") else: # otherwise we keep appending to our math formula self.mathFormula += char - def state18(self, char, output): + def parseCharVariable(self, char, output): """ This state takes in the name of the char variable, then appends it to the char list. :param char: @@ -1643,17 +1563,15 @@ def state18(self, char, output): self.charList.append(self.currentVar) self.varLocation[self.currentVar] = self.memoryLocation self.memoryLocation += 4 - self.currentVar = "" self.state = 5 - self.expectFlag = 0 - self.currentType = "" + self.resetGlobalValues("111000000000000") elif char == "=": self.charList.append(self.currentVar) self.varLocation[self.currentVar] = self.memoryLocation self.memoryLocation += 4 self.state = 19 - self.expectFlag = 0 + self.resetGlobalValues("100000000000000") else: self.currentVar += char @@ -1666,22 +1584,20 @@ def state18(self, char, output): self.charList.append(self.currentVar) self.varLocation[self.currentVar] = self.memoryLocation self.memoryLocation += 4 - self.currentVar = "" self.state = 5 - self.expectFlag = 0 - self.currentType = "" + self.resetGlobalValues("111000000000000") elif char == "=": self.charList.append(self.currentVar) self.varLocation[self.currentVar] = self.memoryLocation self.memoryLocation += 4 self.state = 19 - self.expectFlag = 0 + self.resetGlobalValues("100000000000000") else: raise ValueError("Invalid syntax at line {}".format(self.lineno)) - def state19(self, char, output): + def assignCharValue(self, char, output): """ This method accepts a value for a char variable. It should be noted that chars will always be a single character. The char must be surrounded by either single quotes or double quotes. These must match, meaning we @@ -1725,13 +1641,10 @@ def state19(self, char, output): if char in IGNORE_CHARS: pass elif char == ";": - output.write(" MEMW [4] #" + str(ord(self.mathFormula)) + " #" + str(self.varLocation[self.currentVar]) + "\n") - self.currentVar = "" + output.write(" MEMW [4] #" + str(ord(self.mathFormula)) + " #" + + str(self.varLocation[self.currentVar]) + "\n") self.state = 5 - self.expectFlag = 0 - self.currentType = "" - self.mathFormula = "" - self.quoteFlag = "" + self.resetGlobalValues("111010000000100") else: raise ValueError("Incorrect syntax at line {}".format(self.lineno)) @@ -1786,8 +1699,7 @@ def addVariableToMethodDict(self): self.methodList[self.currentMethod][self.currentVar] = (self.currentType, self.argCount) self.argCount += 1 - self.currentType = "" - self.currentVar = "" + self.resetGlobalValues("011000000000000") def convertOperatorToFlags(self, char): """ @@ -1875,3 +1787,83 @@ def reverseFlag(self): elif self.ifOperator == "": self.ifOperator = "" + def resetGlobalValues(self, binaryValue): + """ + Due to the large number of temporary variables, this method acts as a global "reset" method that handles every + class variable necessary. The binary number passed in represents whether or not each variable needs to be reset. + For example, 1010001 would mean the first, third, and last variable require a reset. This is used to simplify + the compiler's code, since many variables need resets at different intervals. + :param binaryValue: + :return: + """ + + if binaryValue[0] == "1": + self.expectFlag = 0 + + if binaryValue[1] == "1": + self.currentType = "" + + if binaryValue[2] == "1": + self.currentVar = "" + + if binaryValue[3] == "1": + self.currentMethod = "" + + if binaryValue[4] == "1": + self.mathFormula = "" + + if binaryValue[5] == "1": + self.identifier = "" + + if binaryValue[6] == "1": + self.functionCall = "" + + if binaryValue[7] == "1": + self.functionArg = "" + + if binaryValue[8] == "1": + self.ifOperator = "" + + if binaryValue[9] == "1": + self.binaryOperator = "" + + if binaryValue[10] == "1": + self.arrayLength = "" + + if binaryValue[11] == "1": + self.quoteFlag = "" + + if binaryValue[11] == "1": + self.argCount = 0 + + if binaryValue[11] == "1": + self.variableCount = 0 + + def evaluateMathExpression(self, output): + """ + This is a helper method that removes redundancy from the compiler code. When we want to evaluate a math + expression, the same steps are followed universally. We tokenize the string into individual arguments, + parse the tokens from infix to postfix, then we evaluate the postfix. + :return: + """ + + tokens = tokenize(self.mathFormula) + postfix = infixToPostfix(tokens) + evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], + self.arrayList, output) + + def parseArrayPattern(self): + """ + This is a helper method that removes redundancy from the compiler code. When we're looking to match a pattern + for an array value at a specific index, the same steps are followed universally. We search for an array pattern + (a value followed by a closing square bracket, i.e. "45]"), and extract the value which will be the array's + desired index. + :return: + """ + + match = re.search(ARRAY_PATTERN, self.functionArg) + operands = match.group(0) + operands = operands.split("[") + operands[1] = operands[1].replace("]", "") + + return operands diff --git a/ToolChain/Compiler/CompilerInfo.md b/ToolChain/Compiler/CompilerInfo.md index 09923c3..33f138d 100644 --- a/ToolChain/Compiler/CompilerInfo.md +++ b/ToolChain/Compiler/CompilerInfo.md @@ -23,7 +23,8 @@ for parentheses to fix this). maximum, as it varies depending on how many variables are used. This maximum is simply for consistency. * Function calls can't be used in complex variable assignment. They can only be used if it's the only operand. * Pointers can't dereference other pointers. - +* array values must be assigned directly after array variable declaration. However, specific array indices can be +assigned any time - - - This file is part of Spartacus project Copyright (C) 2018 CSE From 832ad34f60183d699474b60a927646780e079050 Mon Sep 17 00:00:00 2001 From: Chris LeBlanc Date: Mon, 23 Jul 2018 12:22:28 -0300 Subject: [PATCH 10/10] Organized how ASM code is printed, added C test file to show functionality --- ToolChain/Compiler/Compiler.py | 347 +++++++++++++++++------------ ToolChain/Compiler/CompilerInfo.md | 8 +- ToolChain/Compiler/Constants.py | 1 + ToolChain/Compiler/MathParser.py | 81 +++---- test.c | 50 +++++ 5 files changed, 307 insertions(+), 180 deletions(-) create mode 100644 test.c diff --git a/ToolChain/Compiler/Compiler.py b/ToolChain/Compiler/Compiler.py index 192e0ad..33d37d0 100644 --- a/ToolChain/Compiler/Compiler.py +++ b/ToolChain/Compiler/Compiler.py @@ -27,7 +27,8 @@ ARRAY_PATTERN, \ SINGLE_QUOTE, \ DOUBLE_QUOTE, \ - BINARY_OPERATORS + BINARY_OPERATORS, \ + MEMORY_START from ToolChain.Compiler.MathParser import tokenize, \ infixToPostfix, \ @@ -52,37 +53,45 @@ class Compiler: details on supported features and limitations. """ - state = 0 # "States" are used to determine our next path for processing the C file - currentVar = "" # Name of variable being evaluated - currentType = "" # Current data type being read, before method/variable declaration - currentMethod = "" # String containing the current method being evaluated - expectFlag = 0 # Used to control what input we expect next - mathFormula = "" # Will contain our fully assembled math expressions for variable assignments - memoryLocation = 0x40000000 # Memory location for local variables. - varList = [] # Contains a list of variable names - varLocation = {} # Contains the memory location for all variables - methodList = {} # List of methods, along with their return type, variables (and types), and # of args - argCount = 0 # Used for number of operands in math expression, args in function calls, etc. - variableCount = 0 # Number of variables declared in current function. - identifier = "" # Used to determine first token of a line - functionCall = "" # Name of the function we're calling when doing variable assignment - whileFlag = 0 # Lets the compiler know if we're in a while loop - ifOperator = "" # Holds the logical operator between two sides of an if boolean expression - nestedFlag = 0 # Lets the compiler know if we're in an if statement - ifLabel = 0 # For jump instructions, we need a unique label for every if statement - binaryLabel = 0 # For unique labels when dealing with binary operators in if/while statements - binaryList = [] # To pop/push labels when dealing with binary operators in if/while statements - binaryOperator = "" # Holds the current binary operator being used in if/while statements - lineno = 0 # Line number for printing error messages - functionArg = "" # Used to read a function call's arguments - whileLabel = 0 # For while loops, we need a unique label - labelList = [] # List containing names of labels for if/while jumps - whileList = [] # List containing the names of while loops - arrayList = {} # Dict containing variables that are arrays - arrayLength = "" # Length of current array variable being evaluated - pointerList = [] # List containing variables that are pointers - charList = [] # List containing variables that are chars - quoteFlag = "" # Keeps track of whether we used single or double quote to declare a char variable + memoryLocation = MEMORY_START # Memory location for local variables. + state = 0 # "States" are used to determine our next path for processing the C file + lineno = 0 # Line number for printing error messages + + currentVar = "" # Name of variable being evaluated + currentType = "" # Current data type being read, before method/variable declaration + currentMethod = "" # String containing the current method being evaluated + identifier = "" # Used to determine first token of a line + functionCall = "" # Name of the function we're calling when doing variable assignment + functionArg = "" # Used to read a function call's arguments + mathFormula = "" # Will contain our fully assembled math expressions for variable assignments + ifOperator = "" # Holds the logical operator between two sides of an if boolean expression + binaryOperator = "" # Holds the current binary operator being used in if/while statements + arrayLength = "" # Length of current array variable being evaluated + quoteFlag = "" # Keeps track of whether we used single or double quote to declare a char variable + + expectFlag = 0 # Used to control what input we expect next + whileFlag = 0 # Lets the compiler know if we're in a while loop + nestedFlag = 0 # Lets the compiler know if we're in an if statement + + argCount = 0 # Used for number of operands in math expression, args in function calls, etc. + variableCount = 0 # Number of variables declared in current function. + + ifLabel = 0 # For jump instructions, we need a unique label for every if statement + binaryLabel = 0 # For unique labels when dealing with binary operators in if/while statements + whileLabel = 0 # For while loops, we need a unique label + + binaryList = [] # To pop/push labels when dealing with binary operators in if/while statements + labelList = [] # List containing names of labels for if/while jumps + whileList = [] # List containing the names of while loops + pointerList = [] # List containing variables that are pointers + charList = [] # List containing variables that are chars + varList = [] # Contains a list of variable names + arrayList = {} # Dict containing variables that are arrays + varLocation = {} # Contains the memory location for all variables + methodList = {} # List of methods, along with their return type, variables (and types), and # of args + + mainFunctionASM = "" # String that will contain all the assembled casm code for the main function + otherFunctionASM = "" # String that will contain all the assembled casm code for all other functions def __init__(self, inputFile=None, outputFile=None): """ @@ -123,13 +132,29 @@ def readFile(self, inputFile, outputFile): raise OSError("Couldn't open file {}".format(outputFile)) for line in inputFile: + # read each line individually + line = line.split("//", maxsplit=1)[0] # Remove comments from line self.lineno += 1 + for x in line: - self.parse(x, output) + # parse each character at a time to make use of each state correctly + asmText = self.parse(x, "") + + if self.currentMethod == "main": + # we check if the method being evaluated is the main method + self.mainFunctionASM += asmText + else: + # otherwise, we assume it's another function and append to the otherFunctionASM string + self.otherFunctionASM += asmText + # We want the program to read the main function first, so we'll print that to the output first + output.write(self.mainFunctionASM) + output.write(" JMP <> end\n") + output.write(self.otherFunctionASM) output.write("end:\n") if self.currentMethod != "": + # If we finish reading input and we still have a method being evaluated, there's a curly brace missing raise ValueError("Missing closing curly brace for end of method/if/while.") try: @@ -149,64 +174,66 @@ def parse(self, char, output): """ if self.state == 0: - self.parseFunctionReturnType(char, output) + output = self.parseFunctionReturnType(char, output) elif self.state == 1: - self.parseFunctionName(char, output) + output = self.parseFunctionName(char, output) elif self.state == 2: - self.parseFunctionArgumentType(char, output) + output = self.parseFunctionArgumentType(char, output) elif self.state == 3: - self.parseFunctionArgumentName(char, output) + output = self.parseFunctionArgumentName(char, output) elif self.state == 4: - self.countFunctionArguments(char, output) + output = self.countFunctionArguments(char, output) elif self.state == 5: - self.parsePrimaryIdentifier(char, output) + output = self.parsePrimaryIdentifier(char, output) elif self.state == 6: - self.parseIntegerVariableName(char, output) + output = self.parseIntegerVariableName(char, output) elif self.state == 7: - self.beginIntegerAssignment(char, output) + output = self.beginIntegerAssignment(char, output) elif self.state == 8: - self.parseFunctionCall(char, output) + output = self.parseFunctionCall(char, output) elif self.state == 9: - self.parseIfStatement(char, output) + output = self.parseIfStatement(char, output) elif self.state == 10: - self.parseWhileLoop(char, output) + output = self.parseWhileLoop(char, output) elif self.state == 11: - self.parseReturnStatement(char, output) + output = self.parseReturnStatement(char, output) elif self.state == 12: - self.parseArrayDeclaration(char, output) + output = self.parseArrayDeclaration(char, output) elif self.state == 13: - self.assignValueAtArrayIndex(char, output) + output = self.assignValueAtArrayIndex(char, output) elif self.state == 14: - self.parsePointerInitialization(char, output) + output = self.parsePointerInitialization(char, output) elif self.state == 15: - self.assignPointerValue(char, output) + output = self.assignPointerValue(char, output) elif self.state == 16: - self.dereferencePointer(char, output) + output = self.dereferencePointer(char, output) elif self.state == 17: - self.assignImmediateValueToPointer(char, output) + output = self.assignImmediateValueToPointer(char, output) elif self.state == 18: - self.parseCharVariable(char, output) + output = self.parseCharVariable(char, output) elif self.state == 19: - self.assignCharValue(char, output) + output = self.assignCharValue(char, output) + + return output def parseFunctionReturnType(self, char, output): """ @@ -235,6 +262,8 @@ def parseFunctionReturnType(self, char, output): self.currentType += char self.expectFlag = 1 + return output + def parseFunctionName(self, char, output): """ Here we expect to read the method's name. Once we reach a space or an opening parentheses, we add the method @@ -257,7 +286,7 @@ def parseFunctionName(self, char, output): elif char == "(": # We read the opening parentheses after the method name, no need to check for it later self.methodList[self.currentMethod] = {"retType": self.currentType} - output.write(self.currentMethod + ":\n") + output += (self.currentMethod + ":\n") self.resetGlobalValues("11000000000000") self.state = 2 @@ -265,6 +294,8 @@ def parseFunctionName(self, char, output): self.currentMethod += char self.expectFlag = 1 + return output + def parseFunctionArgumentType(self, char, output): """ Deals with an argument's data type. This is the first step in determining the tuple: arg data type/arg name. @@ -281,10 +312,10 @@ def parseFunctionArgumentType(self, char, output): # We have our opening parentheses for arguments, we can now look for the first variable's data type self.expectFlag = 0 self.methodList[self.currentMethod] = {"retType": self.currentType} - output.write(self.currentMethod + ":\n") + output += (self.currentMethod + ":\n") if self.currentMethod == "main": - output.write(" MOV end $S\n") + output += " MOV end $S\n" elif self.expectFlag == 0: # Here we expect to read the first character of the variable's data type @@ -316,6 +347,8 @@ def parseFunctionArgumentType(self, char, output): # append the character to the current type being read. self.currentType += char + return output + def parseFunctionArgumentName(self, char, output): """ This state reads the name of a method argument. Once we have the full name, we couple it with the data type @@ -372,6 +405,8 @@ def parseFunctionArgumentName(self, char, output): self.currentVar += char self.expectFlag = 1 + return output + def countFunctionArguments(self, char, output): """ In this method, we've read all the arguments of a method declaration. Now we simply expect to read the opening @@ -397,18 +432,20 @@ def countFunctionArguments(self, char, output): if self.currentMethod == "main": # the main method would technically be the bottom of the stack frame, so we initialize the stack pointer - output.write(" MOV end $S\n") + output += " MOV end $S\n" else: # we offset the S2 pointer by the amount of arguments passed into the method if self.argCount > 0: - output.write(" MOV $S $S2\n") - output.write(" SUB #" + str(self.argCount * 4 + 4) + " $S2\n") + output += " MOV $S $S2\n" + output += (" SUB #" + str(self.argCount * 4 + 4) + " $S2\n") self.argCount = 0 else: raise ValueError("Syntax error, expecting \"{\", got {}".format(char)) + return output + def parsePrimaryIdentifier(self, char, output): """ Initial evaluation of a line within the body of a method. We read the input and concatenate to identifier @@ -437,7 +474,7 @@ def parsePrimaryIdentifier(self, char, output): elif self.identifier == "while": # identifier is a while loop indicator - output.write("LOOP" + str(self.whileLabel) + ":\n") + output += ("LOOP" + str(self.whileLabel) + ":\n") self.whileList.append("LOOP" + str(self.whileLabel)) self.state = 10 self.whileLabel += 1 @@ -541,7 +578,7 @@ def parsePrimaryIdentifier(self, char, output): elif self.identifier == "while": # identifier is a while loop indicator - output.write("LOOP" + str(self.whileLabel) + ":\n") + output += ("LOOP" + str(self.whileLabel) + ":\n") self.whileList.append("LOOP" + str(self.whileLabel)) self.state = 10 self.whileLabel += 1 @@ -568,15 +605,17 @@ def parsePrimaryIdentifier(self, char, output): self.nestedFlag -= 1 if self.whileFlag > 0: self.whileFlag -= 1 - output.write(" JMP <> " + self.whileList.pop() + "\n") + output += (" JMP <> " + self.whileList.pop() + "\n") - output.write(self.labelList.pop() + ":\n") + output += (self.labelList.pop() + ":\n") else: # append the character to the identifier string is nothing else of interest was read. self.identifier += char self.expectFlag = 1 + return output + def parseIntegerVariableName(self, char, output): """ Initial variable name declaration. We already have the data type, so now we read its name until we get a @@ -631,7 +670,6 @@ def parseIntegerVariableName(self, char, output): self.varLocation[self.currentVar] = self.memoryLocation self.memoryLocation += 4 self.variableCount += 1 - self.validName(self.currentVar) self.state = 7 self.resetGlobalValues("10000000000000") @@ -650,6 +688,8 @@ def parseIntegerVariableName(self, char, output): self.currentVar += char self.expectFlag = 1 + return output + def beginIntegerAssignment(self, char, output): """ Begins variable assignment. This could either be a math formula, or a function call @@ -673,7 +713,7 @@ def beginIntegerAssignment(self, char, output): # otherwise, the parentheses is just part of a normal math expression self.mathFormula += char - elif char == "*": + elif char == "*" and self.expectFlag == 0: # we're dereferencing a pointer, so we need to handle this differently than a normal variable assignment self.state = 16 @@ -690,19 +730,19 @@ def beginIntegerAssignment(self, char, output): elif char == ";": # End of our math statement. We may begin the evaluation and assign the result to the current variable - self.evaluateMathExpression(output) + output = self.evaluateMathExpression(output) if self.currentVar in self.methodList[self.currentMethod]: # The variable is an argument passed into the function. We use the stack pointer to fetch its # location before writing the value. - output.write(" MOV $A2 $S2\n") - output.write(" ADD #" + str(self.methodList[self.currentMethod][self.currentVar][1] * 4) + output += " MOV $A2 $S2\n" + output += (" ADD #" + str(self.methodList[self.currentMethod][self.currentVar][1] * 4) + " $A2\n") - output.write(" MEMW [4] $A $A2\n") + output += " MEMW [4] $A $A2\n" else: # The variable is local, so we just write the result to its memory location from the local list. - output.write(" MEMW [4] $A #" + str(self.varLocation[self.currentVar]) + "\n") + output += (" MEMW [4] $A #" + str(self.varLocation[self.currentVar]) + "\n") # now we reset everything self.state = 5 @@ -713,6 +753,8 @@ def beginIntegerAssignment(self, char, output): self.mathFormula += char self.expectFlag = 1 + return output + def parseFunctionCall(self, char, output): """ This deals with a function call. This may be on its own line or part of a variable assignment. @@ -742,12 +784,12 @@ def parseFunctionCall(self, char, output): # Tokens ("," and ")") may show up without spaces, so we handle that here too if char == ",": if self.functionArg in self.varList: - output.write(" PUSH #" + str(self.varLocation[self.functionArg]) + "\n") + output += (" PUSH #" + str(self.varLocation[self.functionArg]) + "\n") elif re.match(ARRAY_PATTERN, self.functionArg): # variable is an array index operands = self.parseArrayPattern() - output.write(" PUSH #" + str(self.varLocation[operands[0]] + int(operands[1]) * 4) + "\n") + output += (" PUSH #" + str(self.varLocation[operands[0]] + int(operands[1]) * 4) + "\n") else: raise ValueError("Invalid variable at line {}".format(self.lineno)) @@ -763,12 +805,12 @@ def parseFunctionCall(self, char, output): # we're done reading arguments for the function. Now we expect to read ";" to end the statement if self.functionArg in self.varList: # must be a valid variable to pass into function - output.write(" PUSH #" + str(self.varLocation[self.functionArg]) + "\n") + output += (" PUSH #" + str(self.varLocation[self.functionArg]) + "\n") elif re.match(ARRAY_PATTERN, self.functionArg): # variable is an array index operands = self.parseArrayPattern() - output.write(" PUSH #" + str(self.varLocation[operands[0]] + int(operands[1]) * 4) + "\n") + output += (" PUSH #" + str(self.varLocation[operands[0]] + int(operands[1]) * 4) + "\n") else: # variable wasn't declared or isn't valid @@ -785,7 +827,7 @@ def parseFunctionCall(self, char, output): if char == ",": # here we're notified that other variables will be read. if self.functionArg in self.varList: - output.write(" PUSH #" + str(self.varLocation[self.functionArg]) + "\n") + output += (" PUSH #" + str(self.varLocation[self.functionArg]) + "\n") else: raise ValueError("Invalid variable at line {}".format(self.lineno)) @@ -800,7 +842,7 @@ def parseFunctionCall(self, char, output): elif char == ")": # end of arguments. we now expect ";" to end the statement if self.functionArg in self.varList: - output.write(" PUSH #" + str(self.varLocation[self.functionArg]) + "\n") + output += (" PUSH #" + str(self.varLocation[self.functionArg]) + "\n") else: raise ValueError("Invalid variable at line {}".format(self.lineno)) self.expectFlag = 4 @@ -815,13 +857,13 @@ def parseFunctionCall(self, char, output): # we make sure the amount of arguments passed in matches how many are accepted by the method if self.argCount == self.methodList[self.functionCall]["totalVars"]: - output.write(" CALL " + self.functionCall + "\n") + output += (" CALL " + self.functionCall + "\n") self.state = 5 if self.currentVar != "": - output.write(" MEMW [4] $A #" + str(self.varLocation[self.currentVar]) + "\n") + output += (" MEMW [4] $A #" + str(self.varLocation[self.currentVar]) + "\n") - output.write(" SUB #" + str(self.argCount * 4) + " $S\n") + output += (" SUB #" + str(self.argCount * 4) + " $S\n") self.resetGlobalValues("10101011000010") else: @@ -839,6 +881,8 @@ def parseFunctionCall(self, char, output): # after a valid function call, we don't read an opening parentheses or whitespace. this is invalid syntax raise ValueError("Invalid syntax after function call at line {}".format(self.lineno)) + return output + def parseIfStatement(self, char, output): """ This state will deal with if statements. We begin by evaluating the left hand side and placing the result in @@ -880,11 +924,11 @@ def parseIfStatement(self, char, output): # check the math expression to see if it ends with a closing parentheses (needed for if/while) self.checkForClosingParentheses() - self.evaluateMathExpression(output) + output = self.evaluateMathExpression(output) - output.write(" MOV $A $D2\n") - output.write(" CMP $D2 $C2\n") - output.write(" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") + output += " MOV $A $D2\n" + output += " CMP $D2 $C2\n" + output += (" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") self.labelList.append(" L" + str(self.ifLabel)) self.ifLabel += 1 @@ -892,7 +936,7 @@ def parseIfStatement(self, char, output): self.resetGlobalValues("10001000110000") if len(self.binaryList) > 0: - output.write("B" + self.binaryList.pop() + ":\n") + output +=("B" + self.binaryList.pop() + ":\n") elif char in BINARY_OPERATORS: # in this case we've got some more expressions to evaluate. @@ -904,23 +948,24 @@ def parseIfStatement(self, char, output): self.mathFormula += char elif self.expectFlag == 3: - self.evaluateMathExpression(output) + output = self.evaluateMathExpression(output) if char == self.binaryOperator: - output.write(" MOV $A $D2\n") - output.write(" CMP $D2 $C2\n") + output += " MOV $A $D2\n" + output += " CMP $D2 $C2\n" if char == "|": # or binary operator causes the conditional flag to flip, since if true, we can skip immediately self.reverseFlag() - output.write(" JMP " + self.ifOperator + " B" + str(self.binaryLabel) + "\n") + output += (" JMP " + self.ifOperator + " B" + str(self.binaryLabel) + "\n") if self.binaryLabel not in self.binaryList: self.binaryList.append(str(self.binaryLabel)) self.binaryLabel += 1 + self.resetGlobalValues("00001000000000") else: - output.write(" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") + output += (" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") self.resetGlobalValues("00001000000000") self.expectFlag = 1 @@ -935,11 +980,13 @@ def parseIfStatement(self, char, output): else: temp = char - self.evaluateMathExpression(output) + output = self.evaluateMathExpression(output) self.ifOperator = self.convertOperatorToFlags(self.ifOperator) self.expectFlag = 2 self.mathFormula = temp - output.write(" MOV $A $C2\n") + output += " MOV $A $C2\n" + + return output def parseWhileLoop(self, char, output): """ @@ -983,11 +1030,11 @@ def parseWhileLoop(self, char, output): # check the math expression to see if it ends with a closing parentheses (needed for if/while) self.checkForClosingParentheses() - self.evaluateMathExpression(output) + output = self.evaluateMathExpression(output) - output.write(" MOV $A $D2\n") - output.write(" CMP $D2 $C2\n") - output.write(" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") + output += " MOV $A $D2\n" + output += " CMP $D2 $C2\n" + output += (" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") self.labelList.append(" L" + str(self.ifLabel)) self.ifLabel += 1 @@ -995,7 +1042,7 @@ def parseWhileLoop(self, char, output): self.resetGlobalValues("10001000110000") if len(self.binaryList) > 0: - output.write("B" + self.binaryList.pop() + ":\n") + output += ("B" + self.binaryList.pop() + ":\n") elif char in BINARY_OPERATORS: # in this case we've got some more expressions to evaluate. @@ -1007,21 +1054,21 @@ def parseWhileLoop(self, char, output): self.mathFormula += char elif self.expectFlag == 3: - self.evaluateMathExpression(output) + output = self.evaluateMathExpression(output) if char == self.binaryOperator: - output.write(" MOV $A $D2\n") - output.write(" CMP $D2 $C2\n") + output += " MOV $A $D2\n" + output += " CMP $D2 $C2\n" if char == "|": self.reverseFlag() - output.write(" JMP " + self.ifOperator + " B" + str(self.binaryLabel) + "\n") + output += (" JMP " + self.ifOperator + " B" + str(self.binaryLabel) + "\n") if self.binaryLabel not in self.binaryList: self.binaryList.append(str(self.binaryLabel)) self.binaryLabel += 1 else: - output.write(" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") + output += (" JMP " + self.ifOperator + " L" + str(self.ifLabel) + "\n") self.expectFlag = 1 self.resetGlobalValues("00001000000000") @@ -1036,11 +1083,13 @@ def parseWhileLoop(self, char, output): else: temp = char - self.evaluateMathExpression(output) + output = self.evaluateMathExpression(output) self.ifOperator = self.convertOperatorToFlags(self.ifOperator) self.expectFlag = 2 self.mathFormula = temp - output.write(" MOV $A $C2\n") + output += " MOV $A $C2\n" + + return output def parseReturnStatement(self, char, output): """ @@ -1058,16 +1107,20 @@ def parseReturnStatement(self, char, output): elif char == ";": # End of our math statement. - self.evaluateMathExpression(output) + output = self.evaluateMathExpression(output) self.state = 5 self.resetGlobalValues("10001000000000") - output.write(" RET\n") + if self.nestedFlag > 0 or self.currentMethod != "main": + # we don't need a return statement if it's the end of the main method + output += " RET\n" else: # we continue to append the chars to our math formula for the return statement self.expectFlag = 1 self.mathFormula += char + return output + def parseArrayDeclaration(self, char, output): """ This state handles the declaration of an array. We check to see if any character before closing bracket "]" @@ -1154,7 +1207,7 @@ def parseArrayDeclaration(self, char, output): elif char == ";": # end of statement, math expression is done, everything is set to go back to state 5. - self.assignArrayValues(output) + output = self.assignArrayValues(output) self.state = 5 self.resetGlobalValues("11101000001000") @@ -1162,6 +1215,8 @@ def parseArrayDeclaration(self, char, output): # we read something other than a semi-colon or a space raise ValueError("Incorrect syntax at line {}".format(self.lineno)) + return output + def assignValueAtArrayIndex(self, char, output): """ This state deals with assigning a value to a specific array index. Here we assume the array has already been @@ -1206,16 +1261,18 @@ def assignValueAtArrayIndex(self, char, output): if char == ";": # we're done reading the math expression, so we call the mathparser functions and reset - self.evaluateMathExpression(output) - output.write(" MEMW [4] $A #" + str(self.varLocation[self.currentVar] + int(self.arrayLength) * 4) + - "\n") + output = self.evaluateMathExpression(output) + output += (" MEMW [4] $A #" + str(self.varLocation[self.currentVar] + int(self.arrayLength) * 4) + + "\n") self.state = 5 - self.resetGlobalValues("10001000001000") + self.resetGlobalValues("11101100001000") else: # otherwise the char gets added to the math formula self.mathFormula += char + return output + def parsePointerInitialization(self, char, output): """ This method deals with initialization of pointers. Pointer variable can only be assigned a single variable, @@ -1289,6 +1346,8 @@ def parsePointerInitialization(self, char, output): # we read something other than "=" or ";" in this context, which would be incorrect raise ValueError("Incorrect syntax at line {}".format(self.lineno)) + return output + def assignPointerValue(self, char, output): """ This method assigns a value to a pointer. The value must be a valid memory address (and must thus be referenced @@ -1345,21 +1404,21 @@ def assignPointerValue(self, char, output): if self.mathFormula in self.varList: # variable is in regular list, so we assign its memory location to the pointer - output.write(" MEMW [4] #" + str(self.varLocation[self.mathFormula]) + " #" + + output += (" MEMW [4] #" + str(self.varLocation[self.mathFormula]) + " #" + str(self.varLocation[self.currentVar]) + "\n") elif self.mathFormula in self.methodList[self.currentMethod]: # variable is passed in as argument, we just write the pointer register's value at the right index - output.write(" MOV $A2 $S2\n") - output.write(" ADD #" + str(self.methodList[self.currentMethod][self.currentVar][1] * 4) + output += " MOV $A2 $S2\n" + output += (" ADD #" + str(self.methodList[self.currentMethod][self.currentVar][1] * 4) + " $A2\n") - output.write(" MEMW [4] $A2 #" + str(self.varLocation[self.currentVar]) + "\n") + output += (" MEMW [4] $A2 #" + str(self.varLocation[self.currentVar]) + "\n") elif re.match(ARRAY_PATTERN, self.functionArg): # variable is an array index, we get the memory location at index 0 and add the correct offset operands = self.parseArrayPattern() - output.write(" #" + str(self.varLocation[self.mathFormula] + operands[1] * 4) + " #" + - str(self.varLocation[self.currentVar]) + "\n") + output += (" #" + str(self.varLocation[self.mathFormula] + operands[1] * 4) + " #" + + str(self.varLocation[self.currentVar]) + "\n") else: raise ValueError("Invalid variable name at line {}".format(self.lineno)) @@ -1375,20 +1434,20 @@ def assignPointerValue(self, char, output): elif char == ";": if self.mathFormula in self.varList: # variable is in regular list, so we assign its memory location to the pointer - output.write(" MEMW [4] #" + str(self.varLocation[self.mathFormula]) + " #" + + output += (" MEMW [4] #" + str(self.varLocation[self.mathFormula]) + " #" + str(self.varLocation[self.currentVar] + "\n")) elif self.mathFormula in self.methodList[self.currentMethod]: # variable is passed in as argument, we just write the pointer register's value at the right index - output.write(" MOV $A2 $S2\n") - output.write(" ADD #" + str(self.methodList[self.currentMethod][self.currentVar][1] * 4) - + " $A2\n") - output.write(" MEMW [4] $A2 #" + str(self.varLocation[self.currentVar]) + "\n") + output += " MOV $A2 $S2\n" + output += (" ADD #" + str(self.methodList[self.currentMethod][self.currentVar][1] * 4) + + " $A2\n") + output += (" MEMW [4] $A2 #" + str(self.varLocation[self.currentVar]) + "\n") elif re.match(ARRAY_PATTERN, self.functionArg): # variable is an array index, we get the memory location at index 0 and add the correct offset operands = self.parseArrayPattern() - output.write(" #" + str(self.varLocation[self.mathFormula] + operands[1] * 4) + " #" + + output += (" #" + str(self.varLocation[self.mathFormula] + operands[1] * 4) + " #" + str(self.varLocation[self.currentVar]) + "\n") else: @@ -1402,6 +1461,8 @@ def assignPointerValue(self, char, output): # we're expecting the end of the statement ";", so anything else in invalid raise ValueError("Syntax error at line {}".format(self.lineno)) + return output + def dereferencePointer(self, char, output): """ This state deals with dereferencing a pointer. Any variable can be assigned a pointer dereference, but it must @@ -1434,12 +1495,12 @@ def dereferencePointer(self, char, output): if self.mathFormula not in self.pointerList: raise ValueError("Invalid pointer variable at line {}".format(self.lineno)) - output.write(" MEMR [4] #" + str(self.varLocation[self.mathFormula]) + " $A\n") - output.write(" MEMR [4] $A $B\n") - output.write(" MEMW [4] $B #" + str(self.varLocation[self.currentVar]) + "\n") + output += (" MEMR [4] #" + str(self.varLocation[self.mathFormula]) + " $A\n") + output += " MEMR [4] $A $B\n" + output += (" MEMW [4] $B #" + str(self.varLocation[self.currentVar]) + "\n") self.state = 5 - self.resetGlobalValues("111000000000000") + self.resetGlobalValues("111010000000000") else: # otherwise, we're still reading the pointer variable's name @@ -1457,17 +1518,19 @@ def dereferencePointer(self, char, output): if self.mathFormula not in self.pointerList: raise ValueError("Invalid pointer variable at line {}".format(self.lineno)) - output.write(" MEMR [4] #" + str(self.varLocation[self.mathFormula]) + " $A\n") - output.write(" MEMR [4] $A $B\n") - output.write(" MEMW [4] $B #" + str(self.varLocation[self.currentVar]) + "\n") + output += (" MEMR [4] #" + str(self.varLocation[self.mathFormula]) + " $A\n") + output += " MEMR [4] $A $B\n" + output += (" MEMW [4] $B #" + str(self.varLocation[self.currentVar]) + "\n") self.state = 5 - self.resetGlobalValues("111000000000000") + self.resetGlobalValues("111010000000000") else: # we didn't read a semi colon or space character, so the syntax is incorrect raise ValueError("Incorrect syntax at line {}".format(self.lineno)) + return output + def assignImmediateValueToPointer(self, char, output): """ This method allows you to assign an immediate value to a pointer. You're risking accessing an invalid memory @@ -1530,8 +1593,8 @@ def assignImmediateValueToPointer(self, char, output): # can't have an empty expression (ex: *pointer = ;) raise ValueError("Empty operand at line {}".format(self.lineno)) - self.evaluateMathExpression(output) - output.write(" MEMW [4] $A #" + str(self.varLocation[self.currentVar]) + "\n") + output = self.evaluateMathExpression(output) + output += (" MEMW [4] $A #" + str(self.varLocation[self.currentVar]) + "\n") self.state = 5 self.resetGlobalValues("111010000000000") @@ -1540,6 +1603,8 @@ def assignImmediateValueToPointer(self, char, output): # otherwise we keep appending to our math formula self.mathFormula += char + return output + def parseCharVariable(self, char, output): """ This state takes in the name of the char variable, then appends it to the char list. @@ -1597,6 +1662,8 @@ def parseCharVariable(self, char, output): else: raise ValueError("Invalid syntax at line {}".format(self.lineno)) + return output + def assignCharValue(self, char, output): """ This method accepts a value for a char variable. It should be noted that chars will always be a single @@ -1641,8 +1708,8 @@ def assignCharValue(self, char, output): if char in IGNORE_CHARS: pass elif char == ";": - output.write(" MEMW [4] #" + str(ord(self.mathFormula)) + " #" + - str(self.varLocation[self.currentVar]) + "\n") + output += (" MEMW [4] #" + str(ord(self.mathFormula)) + " #" + + str(self.varLocation[self.currentVar]) + "\n") self.state = 5 self.resetGlobalValues("111010000000100") else: @@ -1656,6 +1723,8 @@ def assignCharValue(self, char, output): else: raise ValueError("Incorrect syntax at line {}".format(self.lineno)) + return output + def validName(self, name): """ Verifies whether the variable's name contains only acceptable characters (A-Z, $, _, #) @@ -1742,9 +1811,11 @@ def assignArrayValues(self, output): except ValueError as e: raise ValueError("Invalid value for array assignment at line {}".format(self.lineno)) - output.write(" MEMW [4] #" + str(element) + " #" + str(startingLocation) + "\n") + output += (" MEMW [4] #" + str(element) + " #" + str(startingLocation) + "\n") startingLocation += 4 + return output + def checkForClosingParentheses(self): """ This method checks if the math expression ends with a closing parentheses. This is necessary for if statements @@ -1849,8 +1920,10 @@ def evaluateMathExpression(self, output): tokens = tokenize(self.mathFormula) postfix = infixToPostfix(tokens) - evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], - self.arrayList, output) + output += evaluatePostfix(postfix, self.varList, self.varLocation, self.methodList[self.currentMethod], + self.arrayList, output, self.lineno) + + return output def parseArrayPattern(self): """ diff --git a/ToolChain/Compiler/CompilerInfo.md b/ToolChain/Compiler/CompilerInfo.md index 33f138d..f528240 100644 --- a/ToolChain/Compiler/CompilerInfo.md +++ b/ToolChain/Compiler/CompilerInfo.md @@ -12,6 +12,7 @@ The compiler currently supports the following features: * Function calls * Multiple function declaration * Pointers +* Single-line comments (in-line comments also valid) ##Restrictions The compiler currently has some restrictions: @@ -19,12 +20,13 @@ The compiler currently has some restrictions: * Currently, arrays may only be indexed with hard coded integers, not variables. * If statement and while loop operands can't contain parentheses (we may opt to remove the requirement for parentheses to fix this). -* Math expressions for variable assignment may only have a maximum of 6 operands. This isn't the real -maximum, as it varies depending on how many variables are used. This maximum is simply for consistency. +* Math expressions may only have a maximum of 7 operands. * Function calls can't be used in complex variable assignment. They can only be used if it's the only operand. * Pointers can't dereference other pointers. -* array values must be assigned directly after array variable declaration. However, specific array indices can be +* Array values must be assigned directly after array variable declaration. However, specific array indices can be assigned any time +* Only array indices may be passed into functions as arguments at the moment +* Functions called within another function must already exist before the call - - - This file is part of Spartacus project Copyright (C) 2018 CSE diff --git a/ToolChain/Compiler/Constants.py b/ToolChain/Compiler/Constants.py index 9dbe687..207a0b7 100644 --- a/ToolChain/Compiler/Constants.py +++ b/ToolChain/Compiler/Constants.py @@ -38,6 +38,7 @@ REGISTER_NAMES = ["A", "B", "C", "D", "E", "F", "G"] SINGLE_QUOTE = "\'" DOUBLE_QUOTE = "\"" +MEMORY_START = 0x40000000 L_PARENTHESES = '(' diff --git a/ToolChain/Compiler/MathParser.py b/ToolChain/Compiler/MathParser.py index 32e409a..7efcab8 100644 --- a/ToolChain/Compiler/MathParser.py +++ b/ToolChain/Compiler/MathParser.py @@ -93,7 +93,7 @@ def infixToPostfix(tokens): return postfix -def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, arrayList, output): +def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, arrayList, output, line): """ Evaluates the postfix math expression. Variables have their values read and loaded into registers before executing the operation. If variables are in the methodVariables list, we make use of the stack frame pointer "S2" to fetch @@ -117,9 +117,9 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar for element in postfix: # Evaluate each postfix element one by one to determine appropriate action - if sourceRegister > 6 or destRegister > 6: + if sourceRegister > 6 or destRegister > 5: # We cap the total amount of registers used to 7 (0-6) - raise ValueError("Too many operands in formula.") + raise ValueError("Too many operands in formula at line {}".format(line)) if element in OPERATIONS: # Here, our element is an operator. This means we need to pop the top two values from the stack and @@ -128,20 +128,21 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar if operand1 in variableList: # The operand is in the list of local variables, so we read the value from memory - output.write(" MEMR [4] #" + str(variableLocation[operand1]) + " $" + REGISTERS[sourceRegister] + "\n") + output += (" MEMR [4] #" + str(variableLocation[operand1]) + " $" + REGISTERS[sourceRegister] + "\n") operand1 = REGISTERS[sourceRegister] elif operand1 in methodVariables: # The operand is in the list of arguments passed into the method. We consult the methodVariables list # to determine the appropriate offset from the stack pointer register S2. - output.write(" MOV $A2 $S2\n") - output.write(" ADD #" + str(int(methodVariables[operand1][1]) * 4) + " $A2\n") - output.write(" MEMR [4] $A2 $" + REGISTERS[sourceRegister] + "\n") + output += " MOV $A2 $S2\n" + output += (" ADD #" + str(int(methodVariables[operand1][1]) * 4) + " $A2\n") + output += (" MEMR [4] $A2 $" + REGISTERS[sourceRegister] + "\n") operand1 = REGISTERS[sourceRegister] elif operand1 in REGISTER_NAMES: # This is simply a register that was pushed onto the stack. We can keep it as is - pass + pass + elif re.match(ARRAY_PATTERN, str(operand1)): # Our variable is an array, and must be in the pattern "var[1]". We use regex to sort the information @@ -154,10 +155,11 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar # name of variable must be a valid array declaration if int(operands[1]) > arrayList[operands[0]] - 1: # Can't access an index that doesn't exist! - raise ValueError("Array index out of bounds.") + raise ValueError("Array index out of bounds at line {}".format(line) + ": " + + str(operands[0]) + "[" + str(operands[1]) + "]") - output.write(" MEMR [4] #" + str(variableLocation[operands[0]] + int(operands[1]) * 4) + " $" + - REGISTERS[sourceRegister] + "\n") + output += (" MEMR [4] #" + str(variableLocation[operands[0]] + int(operands[1]) * 4) + " $" + + REGISTERS[sourceRegister] + "\n") operand1 = REGISTERS[sourceRegister] else: @@ -170,19 +172,19 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar immediateCount += 1 immFlag = 1 except ValueError as e: - raise ValueError("Invalid operand") + raise ValueError("Invalid operand at line {}".format(line)) if operand2 in variableList: # The operand is in the list of local variables, so we read the value from memory - output.write(" MEMR [4] #" + str(variableLocation[operand2]) + " $" + REGISTERS[destRegister] + "\n") + output += (" MEMR [4] #" + str(variableLocation[operand2]) + " $" + REGISTERS[destRegister] + "\n") operand2 = REGISTERS[destRegister] elif operand2 in methodVariables: # The operand is in the list of arguments passed into the method. We consult the methodVariables list # to determine the appropriate offset from the stack pointer register S2. - output.write(" MOV $B2 $S2\n") - output.write(" ADD #" + str(int(methodVariables[operand2][1]) * 4) + " $B2\n") - output.write(" MEMR [4] $B2 $" + REGISTERS[destRegister] + "\n") + output += " MOV $B2 $S2\n" + output += (" ADD #" + str(int(methodVariables[operand2][1]) * 4) + " $B2\n") + output += (" MEMR [4] $B2 $" + REGISTERS[destRegister] + "\n") operand2 = REGISTERS[destRegister] elif operand2 in REGISTER_NAMES: @@ -200,14 +202,15 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar # name of variable must be a valid array declaration if int(operands[1]) > int(arrayList[operands[0]] - 1): # Can't access an index that doesn't exist! - raise ValueError("Array index out of bounds.") + raise ValueError("Array index out of bounds at line {}".format(line) + ": " + + str(operands[0]) + "[" + str(operands[1]) + "]") - output.write(" MEMR [4] #" + str(variableLocation[operands[0]] + int(operands[1]) * 4) + " $" + - REGISTERS[destRegister] + "\n") + output += (" MEMR [4] #" + str(variableLocation[operands[0]] + int(operands[1]) * 4) + " $" + + REGISTERS[destRegister] + "\n") operand2 = REGISTERS[destRegister] else: - raise ValueError("Invalid variable.") + raise ValueError("Invalid variable at line {}".format(line) + ": " + operands[0]) else: # The operand is an immediate value. We test to see if it's a valid integer @@ -216,7 +219,7 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar immediateCount += 1 immFlag = 2 except ValueError as e: - raise ValueError("Invalid operand") + raise ValueError("Invalid operand at line {}".format(line) + ": " + str(operand2)) if immediateCount == 2: # If we have two immediate values, we don't really need to calculate the arithmetic in Capua ASM. @@ -233,23 +236,19 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar # only one of the operands was an immediate value. We determine which one is the immediate value, # as the correct instruction output depends on it. if immFlag == 1: - output.write(" MOV #" + str(int(operand1)) + " $" + REGISTERS[sourceRegister] + "\n") + output += (" MOV #" + str(int(operand1)) + " $" + REGISTERS[sourceRegister] + "\n") operand1 = REGISTERS[sourceRegister] elif immFlag == 2: - output.write(" MOV #" + str(int(operand2)) + " $" + REGISTERS[destRegister] + "\n") + output += (" MOV #" + str(int(operand2)) + " $" + REGISTERS[destRegister] + "\n") operand2 = REGISTERS[destRegister] - else: - # No operands were immediate values. We can do the arithmetic operation as is. - # We move the source and destination registers up one letter for the next operation - sourceRegister += 1 - destRegister += 1 - - output.write(" " + INSTRUCTIONS[element] + " $" + str(operand1) + " $" + str(operand2) + "\n") + output += (" " + INSTRUCTIONS[element] + " $" + str(operand1) + " $" + str(operand2) + "\n") stack.append(operand2) immediateCount = 0 + sourceRegister += 1 + destRegister += 1 else: # We have an operand to push onto the stack @@ -258,7 +257,7 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar if len(stack) != 1: # If the stack has more than or less than one element, the expression is incorrect. print(stack) - raise ValueError("invalid expression.") + raise ValueError("invalid expression at line {}".format(line)) # our result is then "saved" into register A. The assignment can now be completed. result = stack.pop() @@ -269,13 +268,13 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar elif result in variableList: # if our last operand is in the variable list, we simply read it from memory - output.write(" MEMR [4] #" + str(variableLocation[result]) + " $A\n") + output += (" MEMR [4] #" + str(variableLocation[result]) + " $A\n") elif result in methodVariables: # our last operand is passed in as an argument into the method, so we read it to register A - output.write(" MOV $B2 $S2\n") - output.write(" ADD #" + str(int(methodVariables[result][1]) * 4) + " $B2\n") - output.write(" MEMR [4] $B2 $A\n") + output += " MOV $B2 $S2\n" + output += (" ADD #" + str(int(methodVariables[result][1]) * 4) + " $B2\n") + output += " MEMR [4] $B2 $A\n" elif re.match(ARRAY_PATTERN, str(result)): # our last operand is an array at a specific index. We find the index, and add the offset to the variable loc. @@ -286,19 +285,21 @@ def evaluatePostfix(postfix, variableList, variableLocation, methodVariables, ar if operands[0] not in arrayList: # name of variable must be a valid array declaration - raise ValueError("Invalid variable.") + raise ValueError("Invalid variable at line {}".format(line)) if int(operands[1]) > int(arrayList[operands[0]] - 1): # Can't access an index that doesn't exist! - raise ValueError("Array index out of bounds.") + raise ValueError("Array index out of bounds at line {}".format(line)) - output.write(" MEMR [4] #" + str(variableLocation[operands[0]] + int(operands[1]) * 4) + " $A\n") + output += (" MEMR [4] #" + str(variableLocation[operands[0]] + int(operands[1]) * 4) + " $A\n") else: # last operand is an immediate value. we test to see if it's a valid integer, and we move to register A try: isinstance(int(result), int) - output.write(" MOV #" + str(result) + " $A\n") + output += (" MOV #" + str(result) + " $A\n") except ValueError as e: print(result) - raise ValueError("Invalid mathematical expression") + raise ValueError("Invalid mathematical expression at line {}".format(line)) + + return output diff --git a/test.c b/test.c new file mode 100644 index 0000000..e7ac647 --- /dev/null +++ b/test.c @@ -0,0 +1,50 @@ +//This C program shows the various sections of the C language that the Capua compiler supports +//This of course includes single line comments + +//multiple functions +int add(int a, int b){ + return a + b; +} +int main(){ + //Regular integer variable assignment + int a = 5; + int b = 10; + //Integer pointer variable + int *c; + //Integer array + int d[3] = {1,2,3}; + //Referencing memory address to pointer variable + c = &a; + //while loop + while (a < (b*4) - 5){ //in-line comments are also possible + a = a + 1; + } + //if statement + binary operator + if (a > 5 || b < 10){ + //nested statements (if/while) + while (b > 4){ + b = b - 1; + } + a = 1; + } + //assigning value to array index + d[0] = a; + //assinging integer to value at array index + b = d[2]; + //char variable assignment + char e = 'z'; + //use of single/double quotes for char value + e = "f"; + //math expression with multiple operands, parentheses (max 7 operands) + a = (b*4) - a / 3 - (b + 4 * d[1]); + //assigning immediate value to pointer variable + *c = 40000000; + //dereferencing pointer, assigning value to variable + b = *c; + //assigning returned value from function call to variable + int g = add(a,b); + //lenient syntax for spaces/newline + int x = 5; int y=6+5+6+ a ; + //return statement + return 0; +}