Skip to content

Commit

Permalink
commit from archive
Browse files Browse the repository at this point in the history
  • Loading branch information
sorawit committed Mar 14, 2016
0 parents commit d1ec1dc
Show file tree
Hide file tree
Showing 617 changed files with 812,299 additions and 0 deletions.
19 changes: 19 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Foo files
foo*

# Temporary files
*~
\#*
*.swp
*.swo

# ant-generated files
build/
parser/autogen/
parser/classes/
parser/dist/
src/edu/mit/compilers/grammar

# osx
.DS_Store
scala/
8 changes: 8 additions & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Jeremy Kaplan ([email protected])
Sorawit Suriyakarn ([email protected])

Scala Skeleton prepared by Russell Cohen for 6.035 2013.
Additional code contributed by:
Russell Cohen (rcoh)
Joseph Lynch (jolynch)
Cameron Tenny (cjtenny)
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Two
===
2 changes: 2 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/sh
ant "$@"
174 changes: 174 additions & 0 deletions build.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Scala + JAVA + Antlr Build Script Written By Russell Cohen, 2013, modifed
from the original script by 6.035 TAs of yore.
This somewhat complex Ant script handles the necessary build dance to link
Antrl, Java and Scala together.
-->

<project name="ProjectName" default="build" basedir=".">
<description>Project Build Script</description>

<!-- targets -->
<target name="build" depends="package" description="Build whole project"/>

<target name="clean" depends="init" description="Remove previous build files">
<delete dir="${build.dir}" includeemptydirs="true" quiet="true"/>
<ant dir="parser" target="clean" />
</target>

<target name="init">
<property environment="env"/>

<!-- check for required tools -->
<fail message="Missing SCALA_HOME variable in OS environment">
<condition><isset property="${env.SCALA_HOME}"/></condition>
</fail>
<fail message="Missing JAVA_HOME variable in OS environment">
<condition><isset property="${env.JAVA_HOME}"/></condition>
</fail>

<!-- variables for paths and files -->
<property name="src.dir" location="${basedir}/src"/>
<property name="tests.dir" location="${basedir}/unittests"/>
<property name="lib.dir" location="${basedir}/lib"/>
<property name="build.dir" location="${basedir}/build"/>
<property name="build-classes.dir" location="${build.dir}/classes"/>
<property name="test-classes.dir" location="${build.dir}/test/classes"/>
<property name="build-lib.dir" location="${build.dir}/lib"/>
<property name="build-doc.dir" location="${build.dir}/doc"/>
<property name="java.dir" location="${env.JAVA_HOME}"/>
<property name="scala.dir" location="${env.SCALA_HOME}"/>
<property name="scala-library.jar" location="${scala.dir}/scala-library.jar"/>
<property name="scala-compiler.jar" location="${scala.dir}/scala-compiler.jar"/>
<property name="scala-reflect.jar" location="${scala.dir}/scala-reflect.jar"/>

<path id="project.classpath">
<pathelement location="${scala-library.jar}"/>
<pathelement location="${build-classes.dir}"/> <!-- used during recompilation -->
<pathelement location="${test-classes.dir}"/> <!-- used during recompilation -->
<fileset dir="${lib.dir}">
<include name="**/*.jar"/>
</fileset>
</path>

<path id="scala.classpath">
<pathelement location="${scala-compiler.jar}"/>
<pathelement location="${scala-library.jar}"/>
<pathelement location="${scala-reflect.jar}"/>
</path>

<!-- load scala's ant tasks -->
<taskdef resource="scala/tools/ant/antlib.xml" classpathref="scala.classpath"/>

<!-- print where this project will get scala and java from -->
<echo message="Init project"/>
<echo message=" with scala.dir = ${scala.dir}"/>
<echo message=" with java.dir = ${java.dir}"/>

<!-- check if any files has been modified since last build -->
<uptodate property="build.uptodate" targetfile="${build.dir}/build.done">
<srcfiles dir= "${src.dir}" includes="**"/>
<srcfiles dir= "${lib.dir}" includes="**"/>
</uptodate>
</target>

<target name="parser" depends="init">
<ant dir="parser" />
</target>

<target name="compile" depends="parser" unless="build.uptodate">
<mkdir dir="${build-classes.dir}"/>
<scalac
destdir="${build-classes.dir}"
classpathref="project.classpath"
deprecation="off"
unchecked="off">
<include name="**/*.scala"/>
<include name="**/*.java"/>
<src>
<pathelement location="${src.dir}"/>
</src>
</scalac>
<javac
destdir="${build-classes.dir}"
classpathref="project.classpath"
includeantruntime="false">
<include name="**/*.java"/>
<src><pathelement location="${src.dir}"/></src>
</javac>
</target>

<target name="compile-test" depends="compile" unless="tests.uptodate">
<mkdir dir="${test-classes.dir}"/>
<scalac
destdir="${test-classes.dir}"
classpathref="project.classpath"
deprecation="off"
unchecked="off">
<include name="**/*.scala"/>
<include name="**/*.java"/>
<src>
<pathelement location="${tests.dir}"/>
</src>
</scalac>
<javac
destdir="${test-classes.dir}"
classpathref="project.classpath"
includeantruntime="false">
<include name="**/*.java"/>
<src>
<pathelement location="${tests.dir}"/>
</src>
</javac>
<uptodate property="tests.uptodate" targetfile="${build.dir}/tests.done">
<srcfiles dir= "${tests.dir}" includes="**"/>
</uptodate>
</target>

<target name="test" depends="compile-test" unless="tests.uptodate">
<taskdef name="scalatest" classname="org.scalatest.tools.ScalaTestAntTask" classpathref="project.classpath">
<classpath>
<fileset dir="${scala.dir}">
<include name="**/*.jar"/>
</fileset>
<fileset dir="${lib.dir}">
<include name="**/*.jar"/>
</fileset>
</classpath>
</taskdef>
<scalatest runpath="${test-classes.dir}" haltonfailure="true" fork="no">
<reporter type="stdout" />
</scalatest>
</target>

<target name="tar" depends="clean">
<delete file="${env.USER}-handin.tar.gz" />
<delete file="${env.USER}-handin.tar" />
<tar destfile="${env.USER}-handin.tar">
<tarfileset dir="."
prefix="${env.USER}-handin/">
<include name="**/*" />
<exclude name="*.tar" />
<exclude name="*.tar.gz" />
<exclude name="**/.git/**" />
</tarfileset>
</tar>
<gzip destfile="${env.USER}-handin.tar.gz"
src="${env.USER}-handin.tar" />
</target>

<target name="package" depends="compile" unless="build.uptodate">
<mkdir dir="${build-lib.dir}"/>
<jar destfile="${build-lib.dir}/project.jar">
<fileset dir="${build-classes.dir}"/>
<fileset dir="${lib.dir}">
<include name="**/*.jar"/>
</fileset>
</jar>
<jar destfile="${build-lib.dir}/project-src.jar">
<fileset dir="${src.dir}" includes="**/*.scala"/>
</jar>
<touch file="${build.dir}/build.done"/> <!-- mark build as up-to-date -->
</target>
</project>
106 changes: 106 additions & 0 deletions doc/codegen/codegen.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
Code Generation
===============


Division of Work
----------------

Since there were two phases we each took one. Sorawit converted the high-level IR into a low-level
IR (IR -> LIR), and Jeremy converted the low-level IR into assembly code (LIR -> ASM).


Problem Statement Changes
-------------------------

The following are things we specify in our code generation that are not explicitly stated in the
problem statement.

* Any method (except `void` methods) whose execution "falls off" without returning a value triggers
an `exit` (a runtime error) with return status -2 (or 254). To circumvent this behavior for
`void` methods, they should always return 0. This allows a program's `main` method to return a
status code of zero if it happens to be `void`.

* All fields are initialized to zeroes on declaration. Scalar `int` and `boolean` fields are set to
0 and false, respectively. Arrays have each element initialized to the "zero" value for their
type.

* `while` loops with a limit test the iteration count before the condition. For example, if the
limit is 1, the condition and loop body are only executed once each.

Design
------

### IR -> LIR

Each subclass of IR implements a method called `refract`, which essentially takes a global map of
variable renaming book object (we need to rename shadowing variables), and returns an LIR
representing its representation plus the updated version of the renaming object. Most of the work is
done using the visitor pattern with some functional language features such as list mapping and
folding.

The refract method also, as it seems fit, generates temporary variables in order to evaluate certain
expressions. For example, when it refactoring a statement `x = (a + b) + c`, it firstly generates a
temp variable call `~tempX` where X is an increasing integer. Then it adds a statement `~tempX = a +b`,
and later adds `x = ~tempX + c`. Most of the refactor methods share the same signature, except
that of the `IrExpr` subclasses because it additionally returns a variable name where the caller of
this `refract` function can access the evaluated expression.

By recursively calling the `refract` method on the topmost `IrProgram`, the program returns the LIR
representation of the whole program, as well as the Set / Map of all global callouts and variables,
as well as the local variables of each function.

### LIR -> ASM

Each subclass of LIR implements a method called `toAssembly`, which takes in a `Vars` object and
returns a tuple of the `StringMap` generated for that node and the `String` text of the assembly
code generated. A `StringMap` associates the label for a string with its content. A `Vars` object
is a wrapped `Map[String, String]` that keeps track of the location of each named local variable in
the current scope. It also contains `breakLoc` and `contLoc`, which are strings containing the
labels to break to or continue from when `break` or `continue` statements are encountered.

For most statements, `toAssembly` just finds the assembly template associated with it and returns an
empty `StringMap` along with the filled template. The only time a `StringMap` is nonempty is if a
string literal is found within the statement. Otherwise the `StringMap`s just merge and bubble up
to the top. Any variable not found in `Vars` is assumed to be global and templated accordingly.

The `Templates` object stores all the necessary string manipulation required to generate the
assembly for each statement. Some abstraction exists, but since large chunks of its operation will
(hopefully) be optimized out, it was not a priority. The `Templates` object also contains a
function called `genSym`, which takes no arguments and returns a unique symbol to be used as a
label. This is used almost entirely within the templates themselves to make labels for jumps. The
only exceptions are string literals generating their labels and loops generating their `breakLoc`
and `contLoc` labels.

In these templates, `false` is represented by `0` and `true` by any value other than `0`.


Implementation Issues
---------------------

* Short-circuiting boolean `&&` and `||` was challenging in the LIR where everything was assigned to
a value. The flattening originally performed the following transformation:

x = a && b || c -> t1 = a && b
t2 = t1 || c
x = t2

If `b` and `c` were function calls, however, they would always be calculated, since the goal was
to pre-calculate the values in steps. The solution was to create an `if`-like structure that
jumped across calculating further values if an operand of `&&` was false or one of `||` was true.

* Modulo didn't have an obvious primitive, but we eventually solved that by reading some x86
documentation. We use the remainder stored in `%rdx` after the `idivq` instruction. This appears
to be similar to gcc's handling of it, and we checked our implementation's behavior against that.

* The syntax of the `cmp` instruction was somewhat confusing to us, especially when inequality-based
jumps had to be taken afterward. It made it harder to debug the fact that my Lexer had the
definitions of GT/LT and GTE/LTE flipped. That issue has been fixed.

* Similarly to modulo, the `not` instruction we wanted didn't exist. The unary `not` in x86 is
two's-complement bitwise NOT, so it turns 0 into -1 and 1 into -2. We wrote our own boolean NOT
template and used that instead.

Known Problems
--------------

None yet.
Loading

0 comments on commit d1ec1dc

Please sign in to comment.