From 27d6015b215391caa67330815e4f21007d2f9150 Mon Sep 17 00:00:00 2001
From: Samuel Thomas <sgpthomas@gmail.com>
Date: Tue, 18 Aug 2020 00:19:06 -0700
Subject: [PATCH] getting data for systolic arrays

---
 .../large_polybench/linear-algebra-trmm.fuse  |   7 +-
 .../small_polybench/linear-algebra-trmm.fuse  |   5 +-
 bin/gen_data.py                               |  22 +-
 bin/run-benchmark                             |   2 +-
 runt.toml                                     |   1 -
 systolic-lang/hls_gemm/gemm2.fuse             |  50 ++++
 systolic-lang/hls_gemm/gemm2.header           |   7 +
 systolic-lang/hls_gemm/gemm3.fuse             |  50 ++++
 systolic-lang/hls_gemm/gemm3.header           |   7 +
 systolic-lang/hls_gemm/gemm4.fuse             |  50 ++++
 systolic-lang/hls_gemm/gemm4.header           |   7 +
 systolic-lang/hls_gemm/gemm6.fuse             |  50 ++++
 systolic-lang/hls_gemm/gemm6.header           |   7 +
 systolic-lang/hls_gemm/gemm8.fuse             |  50 ++++
 systolic-lang/hls_gemm/gemm8.header           |   7 +
 systolic-lang/systolic.csv                    |   8 +
 systolic-lang/templates/s2.data               |  34 +++
 systolic-lang/templates/s2.meminit/l0.dat     |   2 +
 systolic-lang/templates/s2.meminit/l1.dat     |   2 +
 .../templates/s2.meminit/out_mem.dat          |   1 +
 .../templates/s2.meminit/out_mem0.dat         |   0
 systolic-lang/templates/s2.meminit/t0.dat     |   2 +
 systolic-lang/templates/s2.meminit/t1.dat     |   2 +
 systolic-lang/templates/s2.template           |  23 ++
 systolic-lang/templates/s3.data               |  54 +++++
 systolic-lang/templates/s3.meminit/l0.dat     |   3 +
 systolic-lang/templates/s3.meminit/l1.dat     |   3 +
 systolic-lang/templates/s3.meminit/l2.dat     |   3 +
 .../templates/s3.meminit/out_mem.dat          |   1 +
 .../templates/s3.meminit/out_mem0.dat         |   0
 systolic-lang/templates/s3.meminit/t0.dat     |   3 +
 systolic-lang/templates/s3.meminit/t1.dat     |   3 +
 systolic-lang/templates/s3.meminit/t2.dat     |   3 +
 systolic-lang/templates/s3.template           |  23 ++
 systolic-lang/templates/s4.data               |  78 +++++++
 systolic-lang/templates/s4.meminit/l0.dat     |   4 +
 systolic-lang/templates/s4.meminit/l1.dat     |   4 +
 systolic-lang/templates/s4.meminit/l2.dat     |   4 +
 systolic-lang/templates/s4.meminit/l3.dat     |   4 +
 .../templates/s4.meminit/out_mem.dat          |   1 +
 .../templates/s4.meminit/out_mem0.dat         |   0
 systolic-lang/templates/s4.meminit/t0.dat     |   4 +
 systolic-lang/templates/s4.meminit/t1.dat     |   4 +
 systolic-lang/templates/s4.meminit/t2.dat     |   4 +
 systolic-lang/templates/s4.meminit/t3.dat     |   4 +
 systolic-lang/templates/s4.template           |  23 ++
 systolic-lang/templates/s6.data               | 138 +++++++++++
 systolic-lang/templates/s6.meminit/l0.dat     |   6 +
 systolic-lang/templates/s6.meminit/l1.dat     |   6 +
 systolic-lang/templates/s6.meminit/l2.dat     |   6 +
 systolic-lang/templates/s6.meminit/l3.dat     |   6 +
 systolic-lang/templates/s6.meminit/l4.dat     |   6 +
 systolic-lang/templates/s6.meminit/l5.dat     |   6 +
 .../templates/s6.meminit/out_mem.dat          |   1 +
 .../templates/s6.meminit/out_mem0.dat         |   0
 systolic-lang/templates/s6.meminit/t0.dat     |   6 +
 systolic-lang/templates/s6.meminit/t1.dat     |   6 +
 systolic-lang/templates/s6.meminit/t2.dat     |   6 +
 systolic-lang/templates/s6.meminit/t3.dat     |   6 +
 systolic-lang/templates/s6.meminit/t4.dat     |   6 +
 systolic-lang/templates/s6.meminit/t5.dat     |   6 +
 systolic-lang/templates/s6.template           |  23 ++
 systolic-lang/templates/s8.data               | 214 ++++++++++++++++++
 systolic-lang/templates/s8.meminit/l0.dat     |   8 +
 systolic-lang/templates/s8.meminit/l1.dat     |   8 +
 systolic-lang/templates/s8.meminit/l2.dat     |   8 +
 systolic-lang/templates/s8.meminit/l3.dat     |   8 +
 systolic-lang/templates/s8.meminit/l4.dat     |   8 +
 systolic-lang/templates/s8.meminit/l5.dat     |   8 +
 systolic-lang/templates/s8.meminit/l6.dat     |   8 +
 systolic-lang/templates/s8.meminit/l7.dat     |   8 +
 .../templates/s8.meminit/out_mem.dat          |   1 +
 .../templates/s8.meminit/out_mem0.dat         |   0
 systolic-lang/templates/s8.meminit/t0.dat     |   8 +
 systolic-lang/templates/s8.meminit/t1.dat     |   8 +
 systolic-lang/templates/s8.meminit/t2.dat     |   8 +
 systolic-lang/templates/s8.meminit/t3.dat     |   8 +
 systolic-lang/templates/s8.meminit/t4.dat     |   8 +
 systolic-lang/templates/s8.meminit/t5.dat     |   8 +
 systolic-lang/templates/s8.meminit/t6.dat     |   8 +
 systolic-lang/templates/s8.meminit/t7.dat     |   8 +
 systolic-lang/templates/s8.template           |  23 ++
 82 files changed, 1215 insertions(+), 11 deletions(-)
 create mode 100644 systolic-lang/hls_gemm/gemm2.fuse
 create mode 100644 systolic-lang/hls_gemm/gemm2.header
 create mode 100644 systolic-lang/hls_gemm/gemm3.fuse
 create mode 100644 systolic-lang/hls_gemm/gemm3.header
 create mode 100644 systolic-lang/hls_gemm/gemm4.fuse
 create mode 100644 systolic-lang/hls_gemm/gemm4.header
 create mode 100644 systolic-lang/hls_gemm/gemm6.fuse
 create mode 100644 systolic-lang/hls_gemm/gemm6.header
 create mode 100644 systolic-lang/hls_gemm/gemm8.fuse
 create mode 100644 systolic-lang/hls_gemm/gemm8.header
 create mode 100644 systolic-lang/systolic.csv
 create mode 100644 systolic-lang/templates/s2.data
 create mode 100644 systolic-lang/templates/s2.meminit/l0.dat
 create mode 100644 systolic-lang/templates/s2.meminit/l1.dat
 create mode 100644 systolic-lang/templates/s2.meminit/out_mem.dat
 create mode 100644 systolic-lang/templates/s2.meminit/out_mem0.dat
 create mode 100644 systolic-lang/templates/s2.meminit/t0.dat
 create mode 100644 systolic-lang/templates/s2.meminit/t1.dat
 create mode 100644 systolic-lang/templates/s2.template
 create mode 100644 systolic-lang/templates/s3.data
 create mode 100644 systolic-lang/templates/s3.meminit/l0.dat
 create mode 100644 systolic-lang/templates/s3.meminit/l1.dat
 create mode 100644 systolic-lang/templates/s3.meminit/l2.dat
 create mode 100644 systolic-lang/templates/s3.meminit/out_mem.dat
 create mode 100644 systolic-lang/templates/s3.meminit/out_mem0.dat
 create mode 100644 systolic-lang/templates/s3.meminit/t0.dat
 create mode 100644 systolic-lang/templates/s3.meminit/t1.dat
 create mode 100644 systolic-lang/templates/s3.meminit/t2.dat
 create mode 100644 systolic-lang/templates/s3.template
 create mode 100644 systolic-lang/templates/s4.data
 create mode 100644 systolic-lang/templates/s4.meminit/l0.dat
 create mode 100644 systolic-lang/templates/s4.meminit/l1.dat
 create mode 100644 systolic-lang/templates/s4.meminit/l2.dat
 create mode 100644 systolic-lang/templates/s4.meminit/l3.dat
 create mode 100644 systolic-lang/templates/s4.meminit/out_mem.dat
 create mode 100644 systolic-lang/templates/s4.meminit/out_mem0.dat
 create mode 100644 systolic-lang/templates/s4.meminit/t0.dat
 create mode 100644 systolic-lang/templates/s4.meminit/t1.dat
 create mode 100644 systolic-lang/templates/s4.meminit/t2.dat
 create mode 100644 systolic-lang/templates/s4.meminit/t3.dat
 create mode 100644 systolic-lang/templates/s4.template
 create mode 100644 systolic-lang/templates/s6.data
 create mode 100644 systolic-lang/templates/s6.meminit/l0.dat
 create mode 100644 systolic-lang/templates/s6.meminit/l1.dat
 create mode 100644 systolic-lang/templates/s6.meminit/l2.dat
 create mode 100644 systolic-lang/templates/s6.meminit/l3.dat
 create mode 100644 systolic-lang/templates/s6.meminit/l4.dat
 create mode 100644 systolic-lang/templates/s6.meminit/l5.dat
 create mode 100644 systolic-lang/templates/s6.meminit/out_mem.dat
 create mode 100644 systolic-lang/templates/s6.meminit/out_mem0.dat
 create mode 100644 systolic-lang/templates/s6.meminit/t0.dat
 create mode 100644 systolic-lang/templates/s6.meminit/t1.dat
 create mode 100644 systolic-lang/templates/s6.meminit/t2.dat
 create mode 100644 systolic-lang/templates/s6.meminit/t3.dat
 create mode 100644 systolic-lang/templates/s6.meminit/t4.dat
 create mode 100644 systolic-lang/templates/s6.meminit/t5.dat
 create mode 100644 systolic-lang/templates/s6.template
 create mode 100644 systolic-lang/templates/s8.data
 create mode 100644 systolic-lang/templates/s8.meminit/l0.dat
 create mode 100644 systolic-lang/templates/s8.meminit/l1.dat
 create mode 100644 systolic-lang/templates/s8.meminit/l2.dat
 create mode 100644 systolic-lang/templates/s8.meminit/l3.dat
 create mode 100644 systolic-lang/templates/s8.meminit/l4.dat
 create mode 100644 systolic-lang/templates/s8.meminit/l5.dat
 create mode 100644 systolic-lang/templates/s8.meminit/l6.dat
 create mode 100644 systolic-lang/templates/s8.meminit/l7.dat
 create mode 100644 systolic-lang/templates/s8.meminit/out_mem.dat
 create mode 100644 systolic-lang/templates/s8.meminit/out_mem0.dat
 create mode 100644 systolic-lang/templates/s8.meminit/t0.dat
 create mode 100644 systolic-lang/templates/s8.meminit/t1.dat
 create mode 100644 systolic-lang/templates/s8.meminit/t2.dat
 create mode 100644 systolic-lang/templates/s8.meminit/t3.dat
 create mode 100644 systolic-lang/templates/s8.meminit/t4.dat
 create mode 100644 systolic-lang/templates/s8.meminit/t5.dat
 create mode 100644 systolic-lang/templates/s8.meminit/t6.dat
 create mode 100644 systolic-lang/templates/s8.meminit/t7.dat
 create mode 100644 systolic-lang/templates/s8.template

diff --git a/benchmarks/large_polybench/linear-algebra-trmm.fuse b/benchmarks/large_polybench/linear-algebra-trmm.fuse
index b0c75d464..b20d9004f 100644
--- a/benchmarks/large_polybench/linear-algebra-trmm.fuse
+++ b/benchmarks/large_polybench/linear-algebra-trmm.fuse
@@ -5,6 +5,9 @@
 
 
 
+
+
+
 // END macro definition
 
 decl alpha_int: ubit<32>[1];
@@ -13,10 +16,10 @@ decl B_int: ubit<32>[60][80];
 
 for (let i: ubit<6> = 0..60) {
   for (let j: ubit<7> = 0..80) {
-    let k: ubit<6> = i + 1;
+    let k: ubit<6> = i + (1 as ubit<6>);
     // XXX: Try rewriting this as a 'for' loop
     while (k < 60) {
-      decor "#pragma HLS loop_tripcount min=0 max=7 avg=4"
+      decor "#pragma HLS loop_tripcount WHILE0"
       let B_i_j: ubit<32> = B_int[i][j];
       ---
       let B_k_j: ubit<32> = B_int[k][j];
diff --git a/benchmarks/small_polybench/linear-algebra-trmm.fuse b/benchmarks/small_polybench/linear-algebra-trmm.fuse
index 6f47a5c0b..93d547089 100644
--- a/benchmarks/small_polybench/linear-algebra-trmm.fuse
+++ b/benchmarks/small_polybench/linear-algebra-trmm.fuse
@@ -5,6 +5,7 @@
 
 
 
+
 // END macro definition
 
 decl alpha_int: ubit<32>[1];
@@ -13,10 +14,10 @@ decl B_int: ubit<32>[8][12];
 
 for (let i: ubit<4> = 0..8) {
   for (let j: ubit<4> = 0..12) {
-    let k: ubit<4> = i + 1;
+    let k: ubit<4> = i + (1 as ubit<4>);
     // XXX: Try rewriting this as a 'for' loop
     while (k < 8) {
-      decor "#pragma HLS loop_tripcount min=0 max=7 avg=4"
+      decor "#pragma HLS loop_tripcount WHILE0"
       let B_i_j: ubit<32> = B_int[i][j];
       ---
       let B_k_j: ubit<32> = B_int[k][j];
diff --git a/bin/gen_data.py b/bin/gen_data.py
index feb9f1f51..50318ecee 100755
--- a/bin/gen_data.py
+++ b/bin/gen_data.py
@@ -26,14 +26,20 @@ def modulate_size(size, banks):
     else:
         return [0]
 
-def main(path):
+def replace(mapping, key):
+    if type(key) == int:
+        return key
+    else:
+        return mapping[key]
+
+def main(path, all_random):
     template = json.load(path.open())
     mapping = template['key']
     memory = template['memory']
     result = {}
     for key in memory:
-        size = [mapping[key] for key in memory[key]['data']]
-        banks = memory[key]['banks']
+        size = [replace(mapping, key) for key in memory[key]['data']]
+        banks = [replace(mapping, key) for key in memory[key]['banks']]
         variants = [""] # include empty string so that we have the empty variant
         if 'variants' in memory[key]:
             variants += memory[key]['variants']
@@ -42,13 +48,19 @@ def main(path):
         for var in variants:
             # result[f'{key}{var}'] = data # include unbanked for Dahlia
             for b in generate_bank_strings(banks):
-                result[f'{key}{var}{b}'] = data
+                if all_random:
+                    result[f'{key}{var}{b}'] = generate(modulate_size(size, banks), bitwidth)
+                else:
+                    result[f'{key}{var}{b}'] = data
     print(json.dumps(result, indent=2))
 
 if __name__ == "__main__":
     filename = Path(sys.argv[1])
+    all_random = False
+    if len(sys.argv) > 2:
+        all_random = True
     if filename.exists():
-        main(filename)
+        main(filename, all_random)
     else:
         print(f"{filename} doesn't exist.")
         exit(1)
diff --git a/bin/run-benchmark b/bin/run-benchmark
index 0b38b4eb3..b1fe12a63 100755
--- a/bin/run-benchmark
+++ b/bin/run-benchmark
@@ -32,7 +32,7 @@ $script_dir/json_to_dat.py --mode json --output "$meminit" "$data"
 
 $script_dir/find-dahlia $input --lower -b futil -l error > "$benchmark"
 
-$script_dir/../target/debug/futil "$benchmark" -b verilog --verilator -d static-timing -l $script_dir/.. \
+$script_dir/../target/debug/futil "$benchmark" -b verilog --verilator -l $script_dir/.. \
     | DATA="$meminit" $script_dir/gen-vcd - 2> "$tmp/log" > "$tmp/out.vcd"
 
 # Translate the outputs back to a JSON filetmp
diff --git a/runt.toml b/runt.toml
index 87c12a0db..fb9333401 100644
--- a/runt.toml
+++ b/runt.toml
@@ -134,6 +134,5 @@ name = "(systolic array) simulation"
 paths = [ "systolic-lang/tests/verilog/*.expect" ]
 cmd = """
 DATA=./systolic-lang/tests/data ./bin/gen-vcd {} 2>/dev/null | vcdump | jq -f {}.jq
-
 """
 expect_dir = "systolic-lang/tests/simulation/"
diff --git a/systolic-lang/hls_gemm/gemm2.fuse b/systolic-lang/hls_gemm/gemm2.fuse
new file mode 100644
index 000000000..b0ea1b13c
--- /dev/null
+++ b/systolic-lang/hls_gemm/gemm2.fuse
@@ -0,0 +1,50 @@
+// BEGIN macro defintions
+
+
+
+
+
+// END macro definitions
+decl C_int: ubit<32>[2][2];
+decl A_int: ubit<32>[2][2];
+decl B_int: ubit<32>[2][2];
+
+let C: ubit<32>[2 bank 2][2 bank 2];
+let A: ubit<32>[2 bank 2][2];
+let B: ubit<32>[2][2 bank 2];
+
+view C_sh = C[_: bank 1][_: bank 1];
+view A_sh = A[_: bank 1][_: bank 1];
+view B_sh = B[_: bank 1][_: bank 1];
+
+// Input interface.
+for (let i: ubit<4> = 0..2) {
+  for (let j: ubit<4> = 0..2) {
+    A_sh[i][j] := A_int[i][j];
+    B_sh[i][j] := B_int[i][j];
+    C_sh[i][j] := C_int[i][j];
+  }
+}
+
+---
+for (let i: ubit<4> = 0..2) unroll 2 {
+  // Loop order has to change since j is not defined in the combine
+  // block otherwise.
+  for (let j: ubit<4> = 0..2) unroll 2 {
+    for (let k: ubit<4> = 0..2) {
+      let v: ubit<32> = A[i][k] * B[k][j];
+    } combine {
+      C[i][j] += v;
+    }
+  }
+}
+
+---
+
+// Output interface.
+
+for (let i: ubit<4> = 0..2) {
+  for (let j: ubit<4> = 0..2) {
+    C_int[i][j] := C_sh[i][j];
+  }
+}
diff --git a/systolic-lang/hls_gemm/gemm2.header b/systolic-lang/hls_gemm/gemm2.header
new file mode 100644
index 000000000..a047b4848
--- /dev/null
+++ b/systolic-lang/hls_gemm/gemm2.header
@@ -0,0 +1,7 @@
+// BEGIN macro defintions
+
+define(N, 2)
+define(N_ur, 2)
+define(N_bw, ubit<4>)
+
+// END macro definitions
\ No newline at end of file
diff --git a/systolic-lang/hls_gemm/gemm3.fuse b/systolic-lang/hls_gemm/gemm3.fuse
new file mode 100644
index 000000000..40d7df52b
--- /dev/null
+++ b/systolic-lang/hls_gemm/gemm3.fuse
@@ -0,0 +1,50 @@
+// BEGIN macro defintions
+
+
+
+
+
+// END macro definitions
+decl C_int: ubit<32>[3][3];
+decl A_int: ubit<32>[3][3];
+decl B_int: ubit<32>[3][3];
+
+let C: ubit<32>[3 bank 3][3 bank 3];
+let A: ubit<32>[3 bank 3][3];
+let B: ubit<32>[3][3 bank 3];
+
+view C_sh = C[_: bank 1][_: bank 1];
+view A_sh = A[_: bank 1][_: bank 1];
+view B_sh = B[_: bank 1][_: bank 1];
+
+// Input interface.
+for (let i: ubit<4> = 0..3) {
+  for (let j: ubit<4> = 0..3) {
+    A_sh[i][j] := A_int[i][j];
+    B_sh[i][j] := B_int[i][j];
+    C_sh[i][j] := C_int[i][j];
+  }
+}
+
+---
+for (let i: ubit<4> = 0..3) unroll 3 {
+  // Loop order has to change since j is not defined in the combine
+  // block otherwise.
+  for (let j: ubit<4> = 0..3) unroll 3 {
+    for (let k: ubit<4> = 0..3) {
+      let v: ubit<32> = A[i][k] * B[k][j];
+    } combine {
+      C[i][j] += v;
+    }
+  }
+}
+
+---
+
+// Output interface.
+
+for (let i: ubit<4> = 0..3) {
+  for (let j: ubit<4> = 0..3) {
+    C_int[i][j] := C_sh[i][j];
+  }
+}
diff --git a/systolic-lang/hls_gemm/gemm3.header b/systolic-lang/hls_gemm/gemm3.header
new file mode 100644
index 000000000..bb3596e56
--- /dev/null
+++ b/systolic-lang/hls_gemm/gemm3.header
@@ -0,0 +1,7 @@
+// BEGIN macro defintions
+
+define(N, 3)
+define(N_ur, 3)
+define(N_bw, ubit<4>)
+
+// END macro definitions
\ No newline at end of file
diff --git a/systolic-lang/hls_gemm/gemm4.fuse b/systolic-lang/hls_gemm/gemm4.fuse
new file mode 100644
index 000000000..81c99f701
--- /dev/null
+++ b/systolic-lang/hls_gemm/gemm4.fuse
@@ -0,0 +1,50 @@
+// BEGIN macro defintions
+
+
+
+
+
+// END macro definitions
+decl C_int: ubit<32>[4][4];
+decl A_int: ubit<32>[4][4];
+decl B_int: ubit<32>[4][4];
+
+let C: ubit<32>[4 bank 4][4 bank 4];
+let A: ubit<32>[4 bank 4][4];
+let B: ubit<32>[4][4 bank 4];
+
+view C_sh = C[_: bank 1][_: bank 1];
+view A_sh = A[_: bank 1][_: bank 1];
+view B_sh = B[_: bank 1][_: bank 1];
+
+// Input interface.
+for (let i: ubit<4> = 0..4) {
+  for (let j: ubit<4> = 0..4) {
+    A_sh[i][j] := A_int[i][j];
+    B_sh[i][j] := B_int[i][j];
+    C_sh[i][j] := C_int[i][j];
+  }
+}
+
+---
+for (let i: ubit<4> = 0..4) unroll 4 {
+  // Loop order has to change since j is not defined in the combine
+  // block otherwise.
+  for (let j: ubit<4> = 0..4) unroll 4 {
+    for (let k: ubit<4> = 0..4) {
+      let v: ubit<32> = A[i][k] * B[k][j];
+    } combine {
+      C[i][j] += v;
+    }
+  }
+}
+
+---
+
+// Output interface.
+
+for (let i: ubit<4> = 0..4) {
+  for (let j: ubit<4> = 0..4) {
+    C_int[i][j] := C_sh[i][j];
+  }
+}
diff --git a/systolic-lang/hls_gemm/gemm4.header b/systolic-lang/hls_gemm/gemm4.header
new file mode 100644
index 000000000..56415021f
--- /dev/null
+++ b/systolic-lang/hls_gemm/gemm4.header
@@ -0,0 +1,7 @@
+// BEGIN macro defintions
+
+define(N, 4)
+define(N_ur, 4)
+define(N_bw, ubit<4>)
+
+// END macro definitions
\ No newline at end of file
diff --git a/systolic-lang/hls_gemm/gemm6.fuse b/systolic-lang/hls_gemm/gemm6.fuse
new file mode 100644
index 000000000..059b6ff97
--- /dev/null
+++ b/systolic-lang/hls_gemm/gemm6.fuse
@@ -0,0 +1,50 @@
+// BEGIN macro defintions
+
+
+
+
+
+// END macro definitions
+decl C_int: ubit<32>[6][6];
+decl A_int: ubit<32>[6][6];
+decl B_int: ubit<32>[6][6];
+
+let C: ubit<32>[6 bank 6][6 bank 6];
+let A: ubit<32>[6 bank 6][6];
+let B: ubit<32>[6][6 bank 6];
+
+view C_sh = C[_: bank 1][_: bank 1];
+view A_sh = A[_: bank 1][_: bank 1];
+view B_sh = B[_: bank 1][_: bank 1];
+
+// Input interface.
+for (let i: ubit<4> = 0..6) {
+  for (let j: ubit<4> = 0..6) {
+    A_sh[i][j] := A_int[i][j];
+    B_sh[i][j] := B_int[i][j];
+    C_sh[i][j] := C_int[i][j];
+  }
+}
+
+---
+for (let i: ubit<4> = 0..6) unroll 6 {
+  // Loop order has to change since j is not defined in the combine
+  // block otherwise.
+  for (let j: ubit<4> = 0..6) unroll 6 {
+    for (let k: ubit<4> = 0..6) {
+      let v: ubit<32> = A[i][k] * B[k][j];
+    } combine {
+      C[i][j] += v;
+    }
+  }
+}
+
+---
+
+// Output interface.
+
+for (let i: ubit<4> = 0..6) {
+  for (let j: ubit<4> = 0..6) {
+    C_int[i][j] := C_sh[i][j];
+  }
+}
diff --git a/systolic-lang/hls_gemm/gemm6.header b/systolic-lang/hls_gemm/gemm6.header
new file mode 100644
index 000000000..52dc8fcc2
--- /dev/null
+++ b/systolic-lang/hls_gemm/gemm6.header
@@ -0,0 +1,7 @@
+// BEGIN macro defintions
+
+define(N, 6)
+define(N_ur, 6)
+define(N_bw, ubit<4>)
+
+// END macro definitions
\ No newline at end of file
diff --git a/systolic-lang/hls_gemm/gemm8.fuse b/systolic-lang/hls_gemm/gemm8.fuse
new file mode 100644
index 000000000..441d7c0b8
--- /dev/null
+++ b/systolic-lang/hls_gemm/gemm8.fuse
@@ -0,0 +1,50 @@
+// BEGIN macro defintions
+
+
+
+
+
+// END macro definitions
+decl C_int: ubit<32>[8][8];
+decl A_int: ubit<32>[8][8];
+decl B_int: ubit<32>[8][8];
+
+let C: ubit<32>[8 bank 8][8 bank 8];
+let A: ubit<32>[8 bank 8][8];
+let B: ubit<32>[8][8 bank 8];
+
+view C_sh = C[_: bank 1][_: bank 1];
+view A_sh = A[_: bank 1][_: bank 1];
+view B_sh = B[_: bank 1][_: bank 1];
+
+// Input interface.
+for (let i: ubit<4> = 0..8) {
+  for (let j: ubit<4> = 0..8) {
+    A_sh[i][j] := A_int[i][j];
+    B_sh[i][j] := B_int[i][j];
+    C_sh[i][j] := C_int[i][j];
+  }
+}
+
+---
+for (let i: ubit<4> = 0..8) unroll 8 {
+  // Loop order has to change since j is not defined in the combine
+  // block otherwise.
+  for (let j: ubit<4> = 0..8) unroll 8 {
+    for (let k: ubit<4> = 0..8) {
+      let v: ubit<32> = A[i][k] * B[k][j];
+    } combine {
+      C[i][j] += v;
+    }
+  }
+}
+
+---
+
+// Output interface.
+
+for (let i: ubit<4> = 0..8) {
+  for (let j: ubit<4> = 0..8) {
+    C_int[i][j] := C_sh[i][j];
+  }
+}
diff --git a/systolic-lang/hls_gemm/gemm8.header b/systolic-lang/hls_gemm/gemm8.header
new file mode 100644
index 000000000..db2ea1a32
--- /dev/null
+++ b/systolic-lang/hls_gemm/gemm8.header
@@ -0,0 +1,7 @@
+// BEGIN macro defintions
+
+define(N, 8)
+define(N_ur, 8)
+define(N_bw, ubit<4>)
+
+// END macro definitions
\ No newline at end of file
diff --git a/systolic-lang/systolic.csv b/systolic-lang/systolic.csv
new file mode 100644
index 000000000..f0a5a8d49
--- /dev/null
+++ b/systolic-lang/systolic.csv
@@ -0,0 +1,8 @@
+out_1,26
+out_2,77
+out_3,132
+out_4,191
+out_5,254
+out_6,321
+out_7,392
+out_8,467
diff --git a/systolic-lang/templates/s2.data b/systolic-lang/templates/s2.data
new file mode 100644
index 000000000..71f0d054e
--- /dev/null
+++ b/systolic-lang/templates/s2.data
@@ -0,0 +1,34 @@
+{
+  "l0": {
+    "data": [
+      83,
+      40
+    ],
+    "bitwidth": 32
+  },
+  "l1": {
+    "data": [
+      5,
+      57
+    ],
+    "bitwidth": 32
+  },
+  "t0": {
+    "data": [
+      64,
+      91
+    ],
+    "bitwidth": 32
+  },
+  "t1": {
+    "data": [
+      98,
+      86
+    ],
+    "bitwidth": 32
+  },
+  "out_mem": {
+    "data": 95,
+    "bitwidth": 32
+  }
+}
diff --git a/systolic-lang/templates/s2.meminit/l0.dat b/systolic-lang/templates/s2.meminit/l0.dat
new file mode 100644
index 000000000..5179a00d1
--- /dev/null
+++ b/systolic-lang/templates/s2.meminit/l0.dat
@@ -0,0 +1,2 @@
+53
+28
diff --git a/systolic-lang/templates/s2.meminit/l1.dat b/systolic-lang/templates/s2.meminit/l1.dat
new file mode 100644
index 000000000..470183188
--- /dev/null
+++ b/systolic-lang/templates/s2.meminit/l1.dat
@@ -0,0 +1,2 @@
+5
+39
diff --git a/systolic-lang/templates/s2.meminit/out_mem.dat b/systolic-lang/templates/s2.meminit/out_mem.dat
new file mode 100644
index 000000000..c14ff41c3
--- /dev/null
+++ b/systolic-lang/templates/s2.meminit/out_mem.dat
@@ -0,0 +1 @@
+5f
diff --git a/systolic-lang/templates/s2.meminit/out_mem0.dat b/systolic-lang/templates/s2.meminit/out_mem0.dat
new file mode 100644
index 000000000..e69de29bb
diff --git a/systolic-lang/templates/s2.meminit/t0.dat b/systolic-lang/templates/s2.meminit/t0.dat
new file mode 100644
index 000000000..29ae67e2c
--- /dev/null
+++ b/systolic-lang/templates/s2.meminit/t0.dat
@@ -0,0 +1,2 @@
+40
+5b
diff --git a/systolic-lang/templates/s2.meminit/t1.dat b/systolic-lang/templates/s2.meminit/t1.dat
new file mode 100644
index 000000000..c3c24dfce
--- /dev/null
+++ b/systolic-lang/templates/s2.meminit/t1.dat
@@ -0,0 +1,2 @@
+62
+56
diff --git a/systolic-lang/templates/s2.template b/systolic-lang/templates/s2.template
new file mode 100644
index 000000000..3d1b43b40
--- /dev/null
+++ b/systolic-lang/templates/s2.template
@@ -0,0 +1,23 @@
+{
+  "key": {
+    "N": 4,
+    "B": 2
+  },
+  "memory": {
+    "l": {
+      "data": ["N"],
+      "bitwidth": 32,
+      "banks": ["B"]
+    },
+    "t": {
+      "data": ["N"],
+      "bitwidth": 32,
+      "banks": ["B"]
+    },
+    "out_mem": {
+      "data": [],
+      "bitwidth": 32,
+      "banks": []
+    }
+  }
+}
diff --git a/systolic-lang/templates/s3.data b/systolic-lang/templates/s3.data
new file mode 100644
index 000000000..83083a054
--- /dev/null
+++ b/systolic-lang/templates/s3.data
@@ -0,0 +1,54 @@
+{
+  "l0": {
+    "data": [
+      46,
+      19,
+      17
+    ],
+    "bitwidth": 32
+  },
+  "l1": {
+    "data": [
+      99,
+      46,
+      37
+    ],
+    "bitwidth": 32
+  },
+  "l2": {
+    "data": [
+      30,
+      23,
+      26
+    ],
+    "bitwidth": 32
+  },
+  "t0": {
+    "data": [
+      43,
+      36,
+      27
+    ],
+    "bitwidth": 32
+  },
+  "t1": {
+    "data": [
+      87,
+      25,
+      69
+    ],
+    "bitwidth": 32
+  },
+  "t2": {
+    "data": [
+      7,
+      19,
+      48
+    ],
+    "bitwidth": 32
+  },
+  "out_mem": {
+    "data": 35,
+    "bitwidth": 32
+  }
+}
diff --git a/systolic-lang/templates/s3.meminit/l0.dat b/systolic-lang/templates/s3.meminit/l0.dat
new file mode 100644
index 000000000..00d8a15c7
--- /dev/null
+++ b/systolic-lang/templates/s3.meminit/l0.dat
@@ -0,0 +1,3 @@
+2e
+13
+11
diff --git a/systolic-lang/templates/s3.meminit/l1.dat b/systolic-lang/templates/s3.meminit/l1.dat
new file mode 100644
index 000000000..d2ae23df5
--- /dev/null
+++ b/systolic-lang/templates/s3.meminit/l1.dat
@@ -0,0 +1,3 @@
+63
+2e
+25
diff --git a/systolic-lang/templates/s3.meminit/l2.dat b/systolic-lang/templates/s3.meminit/l2.dat
new file mode 100644
index 000000000..c0b604c9c
--- /dev/null
+++ b/systolic-lang/templates/s3.meminit/l2.dat
@@ -0,0 +1,3 @@
+1e
+17
+1a
diff --git a/systolic-lang/templates/s3.meminit/out_mem.dat b/systolic-lang/templates/s3.meminit/out_mem.dat
new file mode 100644
index 000000000..409940768
--- /dev/null
+++ b/systolic-lang/templates/s3.meminit/out_mem.dat
@@ -0,0 +1 @@
+23
diff --git a/systolic-lang/templates/s3.meminit/out_mem0.dat b/systolic-lang/templates/s3.meminit/out_mem0.dat
new file mode 100644
index 000000000..e69de29bb
diff --git a/systolic-lang/templates/s3.meminit/t0.dat b/systolic-lang/templates/s3.meminit/t0.dat
new file mode 100644
index 000000000..afaa990e4
--- /dev/null
+++ b/systolic-lang/templates/s3.meminit/t0.dat
@@ -0,0 +1,3 @@
+2b
+24
+1b
diff --git a/systolic-lang/templates/s3.meminit/t1.dat b/systolic-lang/templates/s3.meminit/t1.dat
new file mode 100644
index 000000000..f489510e9
--- /dev/null
+++ b/systolic-lang/templates/s3.meminit/t1.dat
@@ -0,0 +1,3 @@
+57
+19
+45
diff --git a/systolic-lang/templates/s3.meminit/t2.dat b/systolic-lang/templates/s3.meminit/t2.dat
new file mode 100644
index 000000000..1dcb02a97
--- /dev/null
+++ b/systolic-lang/templates/s3.meminit/t2.dat
@@ -0,0 +1,3 @@
+7
+13
+30
diff --git a/systolic-lang/templates/s3.template b/systolic-lang/templates/s3.template
new file mode 100644
index 000000000..ca1947960
--- /dev/null
+++ b/systolic-lang/templates/s3.template
@@ -0,0 +1,23 @@
+{
+  "key": {
+    "N": 9,
+    "B": 3
+  },
+  "memory": {
+    "l": {
+      "data": ["N"],
+      "bitwidth": 32,
+      "banks": ["B"]
+    },
+    "t": {
+      "data": ["N"],
+      "bitwidth": 32,
+      "banks": ["B"]
+    },
+    "out_mem": {
+      "data": [],
+      "bitwidth": 32,
+      "banks": []
+    }
+  }
+}
diff --git a/systolic-lang/templates/s4.data b/systolic-lang/templates/s4.data
new file mode 100644
index 000000000..62d62a42c
--- /dev/null
+++ b/systolic-lang/templates/s4.data
@@ -0,0 +1,78 @@
+{
+  "l0": {
+    "data": [
+      68,
+      45,
+      4,
+      97
+    ],
+    "bitwidth": 32
+  },
+  "l1": {
+    "data": [
+      72,
+      56,
+      7,
+      73
+    ],
+    "bitwidth": 32
+  },
+  "l2": {
+    "data": [
+      23,
+      72,
+      64,
+      51
+    ],
+    "bitwidth": 32
+  },
+  "l3": {
+    "data": [
+      37,
+      35,
+      85,
+      61
+    ],
+    "bitwidth": 32
+  },
+  "t0": {
+    "data": [
+      35,
+      86,
+      21,
+      16
+    ],
+    "bitwidth": 32
+  },
+  "t1": {
+    "data": [
+      9,
+      92,
+      31,
+      14
+    ],
+    "bitwidth": 32
+  },
+  "t2": {
+    "data": [
+      6,
+      85,
+      86,
+      80
+    ],
+    "bitwidth": 32
+  },
+  "t3": {
+    "data": [
+      96,
+      72,
+      23,
+      61
+    ],
+    "bitwidth": 32
+  },
+  "out_mem": {
+    "data": 11,
+    "bitwidth": 32
+  }
+}
diff --git a/systolic-lang/templates/s4.meminit/l0.dat b/systolic-lang/templates/s4.meminit/l0.dat
new file mode 100644
index 000000000..8b757af8e
--- /dev/null
+++ b/systolic-lang/templates/s4.meminit/l0.dat
@@ -0,0 +1,4 @@
+44
+2d
+4
+61
diff --git a/systolic-lang/templates/s4.meminit/l1.dat b/systolic-lang/templates/s4.meminit/l1.dat
new file mode 100644
index 000000000..ff8fcc3d3
--- /dev/null
+++ b/systolic-lang/templates/s4.meminit/l1.dat
@@ -0,0 +1,4 @@
+48
+38
+7
+49
diff --git a/systolic-lang/templates/s4.meminit/l2.dat b/systolic-lang/templates/s4.meminit/l2.dat
new file mode 100644
index 000000000..9213c5b0f
--- /dev/null
+++ b/systolic-lang/templates/s4.meminit/l2.dat
@@ -0,0 +1,4 @@
+17
+48
+40
+33
diff --git a/systolic-lang/templates/s4.meminit/l3.dat b/systolic-lang/templates/s4.meminit/l3.dat
new file mode 100644
index 000000000..ec3d69c8a
--- /dev/null
+++ b/systolic-lang/templates/s4.meminit/l3.dat
@@ -0,0 +1,4 @@
+25
+23
+55
+3d
diff --git a/systolic-lang/templates/s4.meminit/out_mem.dat b/systolic-lang/templates/s4.meminit/out_mem.dat
new file mode 100644
index 000000000..617807982
--- /dev/null
+++ b/systolic-lang/templates/s4.meminit/out_mem.dat
@@ -0,0 +1 @@
+b
diff --git a/systolic-lang/templates/s4.meminit/out_mem0.dat b/systolic-lang/templates/s4.meminit/out_mem0.dat
new file mode 100644
index 000000000..e69de29bb
diff --git a/systolic-lang/templates/s4.meminit/t0.dat b/systolic-lang/templates/s4.meminit/t0.dat
new file mode 100644
index 000000000..4c8cb52c0
--- /dev/null
+++ b/systolic-lang/templates/s4.meminit/t0.dat
@@ -0,0 +1,4 @@
+23
+56
+15
+10
diff --git a/systolic-lang/templates/s4.meminit/t1.dat b/systolic-lang/templates/s4.meminit/t1.dat
new file mode 100644
index 000000000..4696ce2d6
--- /dev/null
+++ b/systolic-lang/templates/s4.meminit/t1.dat
@@ -0,0 +1,4 @@
+9
+5c
+1f
+e
diff --git a/systolic-lang/templates/s4.meminit/t2.dat b/systolic-lang/templates/s4.meminit/t2.dat
new file mode 100644
index 000000000..8b3c16fc9
--- /dev/null
+++ b/systolic-lang/templates/s4.meminit/t2.dat
@@ -0,0 +1,4 @@
+6
+55
+56
+50
diff --git a/systolic-lang/templates/s4.meminit/t3.dat b/systolic-lang/templates/s4.meminit/t3.dat
new file mode 100644
index 000000000..a93a0b3aa
--- /dev/null
+++ b/systolic-lang/templates/s4.meminit/t3.dat
@@ -0,0 +1,4 @@
+60
+48
+17
+3d
diff --git a/systolic-lang/templates/s4.template b/systolic-lang/templates/s4.template
new file mode 100644
index 000000000..d6cbb7e1b
--- /dev/null
+++ b/systolic-lang/templates/s4.template
@@ -0,0 +1,23 @@
+{
+  "key": {
+    "N": 16,
+    "B": 4
+  },
+  "memory": {
+    "l": {
+      "data": ["N"],
+      "bitwidth": 32,
+      "banks": ["B"]
+    },
+    "t": {
+      "data": ["N"],
+      "bitwidth": 32,
+      "banks": ["B"]
+    },
+    "out_mem": {
+      "data": [],
+      "bitwidth": 32,
+      "banks": []
+    }
+  }
+}
diff --git a/systolic-lang/templates/s6.data b/systolic-lang/templates/s6.data
new file mode 100644
index 000000000..89cb165e1
--- /dev/null
+++ b/systolic-lang/templates/s6.data
@@ -0,0 +1,138 @@
+{
+  "l0": {
+    "data": [
+      64,
+      53,
+      46,
+      66,
+      49,
+      29
+    ],
+    "bitwidth": 32
+  },
+  "l1": {
+    "data": [
+      30,
+      3,
+      11,
+      49,
+      9,
+      80
+    ],
+    "bitwidth": 32
+  },
+  "l2": {
+    "data": [
+      15,
+      89,
+      28,
+      18,
+      10,
+      32
+    ],
+    "bitwidth": 32
+  },
+  "l3": {
+    "data": [
+      8,
+      9,
+      4,
+      6,
+      35,
+      47
+    ],
+    "bitwidth": 32
+  },
+  "l4": {
+    "data": [
+      23,
+      30,
+      19,
+      76,
+      50,
+      15
+    ],
+    "bitwidth": 32
+  },
+  "l5": {
+    "data": [
+      48,
+      79,
+      69,
+      34,
+      24,
+      61
+    ],
+    "bitwidth": 32
+  },
+  "t0": {
+    "data": [
+      90,
+      24,
+      62,
+      7,
+      85,
+      66
+    ],
+    "bitwidth": 32
+  },
+  "t1": {
+    "data": [
+      99,
+      10,
+      78,
+      36,
+      1,
+      41
+    ],
+    "bitwidth": 32
+  },
+  "t2": {
+    "data": [
+      22,
+      26,
+      21,
+      66,
+      92,
+      4
+    ],
+    "bitwidth": 32
+  },
+  "t3": {
+    "data": [
+      9,
+      11,
+      62,
+      0,
+      14,
+      13
+    ],
+    "bitwidth": 32
+  },
+  "t4": {
+    "data": [
+      26,
+      6,
+      6,
+      36,
+      49,
+      58
+    ],
+    "bitwidth": 32
+  },
+  "t5": {
+    "data": [
+      92,
+      77,
+      75,
+      31,
+      33,
+      1
+    ],
+    "bitwidth": 32
+  },
+  "out_mem": {
+    "data": 66,
+    "bitwidth": 32
+  }
+}
diff --git a/systolic-lang/templates/s6.meminit/l0.dat b/systolic-lang/templates/s6.meminit/l0.dat
new file mode 100644
index 000000000..d525a1876
--- /dev/null
+++ b/systolic-lang/templates/s6.meminit/l0.dat
@@ -0,0 +1,6 @@
+40
+35
+2e
+42
+31
+1d
diff --git a/systolic-lang/templates/s6.meminit/l1.dat b/systolic-lang/templates/s6.meminit/l1.dat
new file mode 100644
index 000000000..27ddd88a3
--- /dev/null
+++ b/systolic-lang/templates/s6.meminit/l1.dat
@@ -0,0 +1,6 @@
+1e
+3
+b
+31
+9
+50
diff --git a/systolic-lang/templates/s6.meminit/l2.dat b/systolic-lang/templates/s6.meminit/l2.dat
new file mode 100644
index 000000000..c7c00149f
--- /dev/null
+++ b/systolic-lang/templates/s6.meminit/l2.dat
@@ -0,0 +1,6 @@
+f
+59
+1c
+12
+a
+20
diff --git a/systolic-lang/templates/s6.meminit/l3.dat b/systolic-lang/templates/s6.meminit/l3.dat
new file mode 100644
index 000000000..352e07d22
--- /dev/null
+++ b/systolic-lang/templates/s6.meminit/l3.dat
@@ -0,0 +1,6 @@
+8
+9
+4
+6
+23
+2f
diff --git a/systolic-lang/templates/s6.meminit/l4.dat b/systolic-lang/templates/s6.meminit/l4.dat
new file mode 100644
index 000000000..ce9a12c5d
--- /dev/null
+++ b/systolic-lang/templates/s6.meminit/l4.dat
@@ -0,0 +1,6 @@
+17
+1e
+13
+4c
+32
+f
diff --git a/systolic-lang/templates/s6.meminit/l5.dat b/systolic-lang/templates/s6.meminit/l5.dat
new file mode 100644
index 000000000..cfc0d44c1
--- /dev/null
+++ b/systolic-lang/templates/s6.meminit/l5.dat
@@ -0,0 +1,6 @@
+30
+4f
+45
+22
+18
+3d
diff --git a/systolic-lang/templates/s6.meminit/out_mem.dat b/systolic-lang/templates/s6.meminit/out_mem.dat
new file mode 100644
index 000000000..d81cc0710
--- /dev/null
+++ b/systolic-lang/templates/s6.meminit/out_mem.dat
@@ -0,0 +1 @@
+42
diff --git a/systolic-lang/templates/s6.meminit/out_mem0.dat b/systolic-lang/templates/s6.meminit/out_mem0.dat
new file mode 100644
index 000000000..e69de29bb
diff --git a/systolic-lang/templates/s6.meminit/t0.dat b/systolic-lang/templates/s6.meminit/t0.dat
new file mode 100644
index 000000000..7749a99ec
--- /dev/null
+++ b/systolic-lang/templates/s6.meminit/t0.dat
@@ -0,0 +1,6 @@
+5a
+18
+3e
+7
+55
+42
diff --git a/systolic-lang/templates/s6.meminit/t1.dat b/systolic-lang/templates/s6.meminit/t1.dat
new file mode 100644
index 000000000..268316334
--- /dev/null
+++ b/systolic-lang/templates/s6.meminit/t1.dat
@@ -0,0 +1,6 @@
+63
+a
+4e
+24
+1
+29
diff --git a/systolic-lang/templates/s6.meminit/t2.dat b/systolic-lang/templates/s6.meminit/t2.dat
new file mode 100644
index 000000000..9bb8e2242
--- /dev/null
+++ b/systolic-lang/templates/s6.meminit/t2.dat
@@ -0,0 +1,6 @@
+16
+1a
+15
+42
+5c
+4
diff --git a/systolic-lang/templates/s6.meminit/t3.dat b/systolic-lang/templates/s6.meminit/t3.dat
new file mode 100644
index 000000000..d98d36b99
--- /dev/null
+++ b/systolic-lang/templates/s6.meminit/t3.dat
@@ -0,0 +1,6 @@
+9
+b
+3e
+0
+e
+d
diff --git a/systolic-lang/templates/s6.meminit/t4.dat b/systolic-lang/templates/s6.meminit/t4.dat
new file mode 100644
index 000000000..0eec85dc9
--- /dev/null
+++ b/systolic-lang/templates/s6.meminit/t4.dat
@@ -0,0 +1,6 @@
+1a
+6
+6
+24
+31
+3a
diff --git a/systolic-lang/templates/s6.meminit/t5.dat b/systolic-lang/templates/s6.meminit/t5.dat
new file mode 100644
index 000000000..08d67d30d
--- /dev/null
+++ b/systolic-lang/templates/s6.meminit/t5.dat
@@ -0,0 +1,6 @@
+5c
+4d
+4b
+1f
+21
+1
diff --git a/systolic-lang/templates/s6.template b/systolic-lang/templates/s6.template
new file mode 100644
index 000000000..f16d444d0
--- /dev/null
+++ b/systolic-lang/templates/s6.template
@@ -0,0 +1,23 @@
+{
+  "key": {
+    "N": 36,
+    "B": 6
+  },
+  "memory": {
+    "l": {
+      "data": ["N"],
+      "bitwidth": 32,
+      "banks": ["B"]
+    },
+    "t": {
+      "data": ["N"],
+      "bitwidth": 32,
+      "banks": ["B"]
+    },
+    "out_mem": {
+      "data": [],
+      "bitwidth": 32,
+      "banks": []
+    }
+  }
+}
diff --git a/systolic-lang/templates/s8.data b/systolic-lang/templates/s8.data
new file mode 100644
index 000000000..3fcb53a97
--- /dev/null
+++ b/systolic-lang/templates/s8.data
@@ -0,0 +1,214 @@
+{
+  "l0": {
+    "data": [
+      4,
+      9,
+      17,
+      49,
+      64,
+      70,
+      10,
+      24
+    ],
+    "bitwidth": 32
+  },
+  "l1": {
+    "data": [
+      46,
+      4,
+      98,
+      70,
+      29,
+      59,
+      26,
+      57
+    ],
+    "bitwidth": 32
+  },
+  "l2": {
+    "data": [
+      50,
+      45,
+      21,
+      52,
+      25,
+      24,
+      83,
+      47
+    ],
+    "bitwidth": 32
+  },
+  "l3": {
+    "data": [
+      16,
+      58,
+      76,
+      13,
+      90,
+      70,
+      8,
+      62
+    ],
+    "bitwidth": 32
+  },
+  "l4": {
+    "data": [
+      96,
+      95,
+      28,
+      2,
+      86,
+      70,
+      44,
+      53
+    ],
+    "bitwidth": 32
+  },
+  "l5": {
+    "data": [
+      67,
+      81,
+      10,
+      72,
+      69,
+      67,
+      92,
+      44
+    ],
+    "bitwidth": 32
+  },
+  "l6": {
+    "data": [
+      16,
+      32,
+      30,
+      81,
+      56,
+      17,
+      84,
+      19
+    ],
+    "bitwidth": 32
+  },
+  "l7": {
+    "data": [
+      57,
+      66,
+      60,
+      97,
+      68,
+      55,
+      70,
+      47
+    ],
+    "bitwidth": 32
+  },
+  "t0": {
+    "data": [
+      20,
+      76,
+      62,
+      30,
+      28,
+      91,
+      8,
+      22
+    ],
+    "bitwidth": 32
+  },
+  "t1": {
+    "data": [
+      39,
+      98,
+      72,
+      13,
+      38,
+      28,
+      48,
+      89
+    ],
+    "bitwidth": 32
+  },
+  "t2": {
+    "data": [
+      95,
+      4,
+      50,
+      2,
+      28,
+      10,
+      42,
+      98
+    ],
+    "bitwidth": 32
+  },
+  "t3": {
+    "data": [
+      89,
+      14,
+      8,
+      47,
+      68,
+      18,
+      75,
+      28
+    ],
+    "bitwidth": 32
+  },
+  "t4": {
+    "data": [
+      72,
+      95,
+      10,
+      12,
+      65,
+      53,
+      92,
+      26
+    ],
+    "bitwidth": 32
+  },
+  "t5": {
+    "data": [
+      16,
+      60,
+      9,
+      81,
+      10,
+      77,
+      15,
+      50
+    ],
+    "bitwidth": 32
+  },
+  "t6": {
+    "data": [
+      36,
+      22,
+      8,
+      29,
+      35,
+      15,
+      70,
+      47
+    ],
+    "bitwidth": 32
+  },
+  "t7": {
+    "data": [
+      46,
+      36,
+      64,
+      91,
+      78,
+      84,
+      73,
+      19
+    ],
+    "bitwidth": 32
+  },
+  "out_mem": {
+    "data": 81,
+    "bitwidth": 32
+  }
+}
diff --git a/systolic-lang/templates/s8.meminit/l0.dat b/systolic-lang/templates/s8.meminit/l0.dat
new file mode 100644
index 000000000..d0c31c8e7
--- /dev/null
+++ b/systolic-lang/templates/s8.meminit/l0.dat
@@ -0,0 +1,8 @@
+4
+9
+11
+31
+40
+46
+a
+18
diff --git a/systolic-lang/templates/s8.meminit/l1.dat b/systolic-lang/templates/s8.meminit/l1.dat
new file mode 100644
index 000000000..e7db41b4e
--- /dev/null
+++ b/systolic-lang/templates/s8.meminit/l1.dat
@@ -0,0 +1,8 @@
+2e
+4
+62
+46
+1d
+3b
+1a
+39
diff --git a/systolic-lang/templates/s8.meminit/l2.dat b/systolic-lang/templates/s8.meminit/l2.dat
new file mode 100644
index 000000000..aaaf96d00
--- /dev/null
+++ b/systolic-lang/templates/s8.meminit/l2.dat
@@ -0,0 +1,8 @@
+32
+2d
+15
+34
+19
+18
+53
+2f
diff --git a/systolic-lang/templates/s8.meminit/l3.dat b/systolic-lang/templates/s8.meminit/l3.dat
new file mode 100644
index 000000000..d70071f39
--- /dev/null
+++ b/systolic-lang/templates/s8.meminit/l3.dat
@@ -0,0 +1,8 @@
+10
+3a
+4c
+d
+5a
+46
+8
+3e
diff --git a/systolic-lang/templates/s8.meminit/l4.dat b/systolic-lang/templates/s8.meminit/l4.dat
new file mode 100644
index 000000000..d17a6183e
--- /dev/null
+++ b/systolic-lang/templates/s8.meminit/l4.dat
@@ -0,0 +1,8 @@
+60
+5f
+1c
+2
+56
+46
+2c
+35
diff --git a/systolic-lang/templates/s8.meminit/l5.dat b/systolic-lang/templates/s8.meminit/l5.dat
new file mode 100644
index 000000000..794d007f1
--- /dev/null
+++ b/systolic-lang/templates/s8.meminit/l5.dat
@@ -0,0 +1,8 @@
+43
+51
+a
+48
+45
+43
+5c
+2c
diff --git a/systolic-lang/templates/s8.meminit/l6.dat b/systolic-lang/templates/s8.meminit/l6.dat
new file mode 100644
index 000000000..4883c15ca
--- /dev/null
+++ b/systolic-lang/templates/s8.meminit/l6.dat
@@ -0,0 +1,8 @@
+10
+20
+1e
+51
+38
+11
+54
+13
diff --git a/systolic-lang/templates/s8.meminit/l7.dat b/systolic-lang/templates/s8.meminit/l7.dat
new file mode 100644
index 000000000..e2a49d2aa
--- /dev/null
+++ b/systolic-lang/templates/s8.meminit/l7.dat
@@ -0,0 +1,8 @@
+39
+42
+3c
+61
+44
+37
+46
+2f
diff --git a/systolic-lang/templates/s8.meminit/out_mem.dat b/systolic-lang/templates/s8.meminit/out_mem.dat
new file mode 100644
index 000000000..82cced27d
--- /dev/null
+++ b/systolic-lang/templates/s8.meminit/out_mem.dat
@@ -0,0 +1 @@
+51
diff --git a/systolic-lang/templates/s8.meminit/out_mem0.dat b/systolic-lang/templates/s8.meminit/out_mem0.dat
new file mode 100644
index 000000000..e69de29bb
diff --git a/systolic-lang/templates/s8.meminit/t0.dat b/systolic-lang/templates/s8.meminit/t0.dat
new file mode 100644
index 000000000..db7b4b46a
--- /dev/null
+++ b/systolic-lang/templates/s8.meminit/t0.dat
@@ -0,0 +1,8 @@
+14
+4c
+3e
+1e
+1c
+5b
+8
+16
diff --git a/systolic-lang/templates/s8.meminit/t1.dat b/systolic-lang/templates/s8.meminit/t1.dat
new file mode 100644
index 000000000..72d0666af
--- /dev/null
+++ b/systolic-lang/templates/s8.meminit/t1.dat
@@ -0,0 +1,8 @@
+27
+62
+48
+d
+26
+1c
+30
+59
diff --git a/systolic-lang/templates/s8.meminit/t2.dat b/systolic-lang/templates/s8.meminit/t2.dat
new file mode 100644
index 000000000..651594dcb
--- /dev/null
+++ b/systolic-lang/templates/s8.meminit/t2.dat
@@ -0,0 +1,8 @@
+5f
+4
+32
+2
+1c
+a
+2a
+62
diff --git a/systolic-lang/templates/s8.meminit/t3.dat b/systolic-lang/templates/s8.meminit/t3.dat
new file mode 100644
index 000000000..32629b2b2
--- /dev/null
+++ b/systolic-lang/templates/s8.meminit/t3.dat
@@ -0,0 +1,8 @@
+59
+e
+8
+2f
+44
+12
+4b
+1c
diff --git a/systolic-lang/templates/s8.meminit/t4.dat b/systolic-lang/templates/s8.meminit/t4.dat
new file mode 100644
index 000000000..82646c231
--- /dev/null
+++ b/systolic-lang/templates/s8.meminit/t4.dat
@@ -0,0 +1,8 @@
+48
+5f
+a
+c
+41
+35
+5c
+1a
diff --git a/systolic-lang/templates/s8.meminit/t5.dat b/systolic-lang/templates/s8.meminit/t5.dat
new file mode 100644
index 000000000..2db018761
--- /dev/null
+++ b/systolic-lang/templates/s8.meminit/t5.dat
@@ -0,0 +1,8 @@
+10
+3c
+9
+51
+a
+4d
+f
+32
diff --git a/systolic-lang/templates/s8.meminit/t6.dat b/systolic-lang/templates/s8.meminit/t6.dat
new file mode 100644
index 000000000..dfd0898f2
--- /dev/null
+++ b/systolic-lang/templates/s8.meminit/t6.dat
@@ -0,0 +1,8 @@
+24
+16
+8
+1d
+23
+f
+46
+2f
diff --git a/systolic-lang/templates/s8.meminit/t7.dat b/systolic-lang/templates/s8.meminit/t7.dat
new file mode 100644
index 000000000..1fab2bb6f
--- /dev/null
+++ b/systolic-lang/templates/s8.meminit/t7.dat
@@ -0,0 +1,8 @@
+2e
+24
+40
+5b
+4e
+54
+49
+13
diff --git a/systolic-lang/templates/s8.template b/systolic-lang/templates/s8.template
new file mode 100644
index 000000000..c65ec3b8b
--- /dev/null
+++ b/systolic-lang/templates/s8.template
@@ -0,0 +1,23 @@
+{
+  "key": {
+    "N": 64,
+    "B": 8
+  },
+  "memory": {
+    "l": {
+      "data": ["N"],
+      "bitwidth": 32,
+      "banks": ["B"]
+    },
+    "t": {
+      "data": ["N"],
+      "bitwidth": 32,
+      "banks": ["B"]
+    },
+    "out_mem": {
+      "data": [],
+      "bitwidth": 32,
+      "banks": []
+    }
+  }
+}