|
| 1 | +from __future__ import absolute_import |
| 2 | +from __future__ import print_function |
| 3 | +import sys |
| 4 | +import os |
| 5 | +import numpy as np |
| 6 | + |
| 7 | +# the next line can be removed after installation |
| 8 | +sys.path.insert(0, os.path.dirname(os.path.dirname( |
| 9 | + os.path.dirname(os.path.abspath(__file__))))) |
| 10 | + |
| 11 | +from veriloggen import * |
| 12 | +import veriloggen.thread as vthread |
| 13 | +import veriloggen.types.axi as axi |
| 14 | + |
| 15 | +datawidth = 8 |
| 16 | +addrwidth = 8 |
| 17 | + |
| 18 | +matrix_size = 8 |
| 19 | +a_offset = 16 |
| 20 | +b_offset = a_offset + matrix_size * matrix_size |
| 21 | +c_offset = b_offset + matrix_size * matrix_size |
| 22 | + |
| 23 | + |
| 24 | +def mkLed(): |
| 25 | + m = Module('user_module') |
| 26 | + clk = m.Input('clk') |
| 27 | + rst = m.Input('rst') |
| 28 | + start = m.Input('start') |
| 29 | + busy = m.OutputReg('busy', initval=0) |
| 30 | + |
| 31 | + ram = vthread.ExtRAM(m, 'ram', clk, rst, datawidth, addrwidth) |
| 32 | + |
| 33 | + def matmul(): |
| 34 | + while True: |
| 35 | + wait() |
| 36 | + matrix_size = read_matrix_size() |
| 37 | + offset_a = read_matrix_a_offset() |
| 38 | + offset_b = read_matrix_b_offset() |
| 39 | + offset_c = read_matrix_c_offset() |
| 40 | + comp(matrix_size, offset_a, offset_b, offset_c) |
| 41 | + done() |
| 42 | + |
| 43 | + def wait(): |
| 44 | + while not start: |
| 45 | + pass |
| 46 | + busy.value = 1 |
| 47 | + |
| 48 | + def read_matrix_size(): |
| 49 | + size0 = ram.read(0) |
| 50 | + size1 = ram.read(1) |
| 51 | + size = (size1 << 8) | size0 |
| 52 | + return size |
| 53 | + |
| 54 | + def read_matrix_a_offset(): |
| 55 | + offset0 = ram.read(4) & 0xff |
| 56 | + offset1 = ram.read(5) & 0xff |
| 57 | + offset = (offset1 << 8) | offset0 |
| 58 | + return offset |
| 59 | + |
| 60 | + def read_matrix_b_offset(): |
| 61 | + offset0 = ram.read(8) & 0xff |
| 62 | + offset1 = ram.read(9) & 0xff |
| 63 | + offset = (offset1 << 8) | offset0 |
| 64 | + return offset |
| 65 | + |
| 66 | + def read_matrix_c_offset(): |
| 67 | + offset0 = ram.read(12) & 0xff |
| 68 | + offset1 = ram.read(13) & 0xff |
| 69 | + offset = (offset1 << 8) | offset0 |
| 70 | + return offset |
| 71 | + |
| 72 | + def comp(matrix_size, a_offset, b_offset, c_offset): |
| 73 | + a_addr, c_addr = a_offset, c_offset |
| 74 | + |
| 75 | + for i in range(matrix_size): |
| 76 | + b_addr = b_offset |
| 77 | + for j in range(matrix_size): |
| 78 | + sum = 0 |
| 79 | + for k in range(matrix_size): |
| 80 | + x = ram.read(a_addr + k) |
| 81 | + y = ram.read(b_addr + k) |
| 82 | + sum += x * y |
| 83 | + ram.write(c_addr + j, sum) |
| 84 | + |
| 85 | + b_addr += matrix_size * (datawidth // 8) |
| 86 | + |
| 87 | + a_addr += matrix_size * (datawidth // 8) |
| 88 | + c_addr += matrix_size * (datawidth // 8) |
| 89 | + |
| 90 | + def done(): |
| 91 | + busy.value = 0 |
| 92 | + |
| 93 | + th = vthread.Thread(m, 'th_matmul', clk, rst, matmul, datawidth=16) |
| 94 | + fsm = th.start() |
| 95 | + |
| 96 | + return m |
| 97 | + |
| 98 | + |
| 99 | +def mkTest(memimg_name=None): |
| 100 | + |
| 101 | + a_shape = (matrix_size, matrix_size) |
| 102 | + b_shape = (matrix_size, matrix_size) |
| 103 | + c_shape = (a_shape[0], b_shape[0]) |
| 104 | + |
| 105 | + n_raw_a = axi.shape_to_length(a_shape) |
| 106 | + n_raw_b = axi.shape_to_length(b_shape) |
| 107 | + |
| 108 | + n_a = axi.shape_to_memory_size(a_shape, datawidth) |
| 109 | + n_b = axi.shape_to_memory_size(b_shape, datawidth) |
| 110 | + |
| 111 | + a = np.zeros(a_shape, dtype=np.int64) |
| 112 | + b = np.zeros(b_shape, dtype=np.int64) |
| 113 | + |
| 114 | + value = 1 |
| 115 | + for y in range(a_shape[0]): |
| 116 | + for x in range(a_shape[1]): |
| 117 | + if x == y: |
| 118 | + a[y][x] = value |
| 119 | + value += 1 |
| 120 | + else: |
| 121 | + a[y][x] = 0 |
| 122 | + |
| 123 | + for y in range(b_shape[0]): |
| 124 | + for x in range(b_shape[1]): |
| 125 | + if x == y: |
| 126 | + b[y][x] = 2 |
| 127 | + else: |
| 128 | + b[y][x] = 0 |
| 129 | + |
| 130 | + a_addr = a_offset |
| 131 | + size_a = n_a * datawidth // 8 |
| 132 | + b_addr = b_offset |
| 133 | + size_b = n_b * datawidth // 8 |
| 134 | + |
| 135 | + mem = np.zeros([2 ** addrwidth * (8 // datawidth)], dtype=np.int64) |
| 136 | + axi.set_memory(mem, a, datawidth, datawidth, a_addr) |
| 137 | + axi.set_memory(mem, b, datawidth, datawidth, b_addr) |
| 138 | + |
| 139 | + led = mkLed() |
| 140 | + |
| 141 | + m = Module('test') |
| 142 | + params = m.copy_params(led) |
| 143 | + ports = m.copy_sim_ports(led) |
| 144 | + clk = ports['clk'] |
| 145 | + rst = ports['rst'] |
| 146 | + |
| 147 | + start = ports['start'] |
| 148 | + busy = ports['busy'] |
| 149 | + |
| 150 | + start.initval = 0 |
| 151 | + |
| 152 | + memory = vthread.RAM(m, 'memory', clk, rst, datawidth, addrwidth, |
| 153 | + numports=2, initvals=mem.tolist()) |
| 154 | + memory.connect_rtl(0, ports['ram_0_addr'], ports['ram_0_wdata'], |
| 155 | + ports['ram_0_wenable'], ports['ram_0_rdata'], |
| 156 | + ports['ram_0_enable']) |
| 157 | + |
| 158 | + # Timer |
| 159 | + counter = m.Reg('counter', 32, initval=0) |
| 160 | + seq = Seq(m, 'seq', clk, rst) |
| 161 | + seq( |
| 162 | + counter.inc() |
| 163 | + ) |
| 164 | + |
| 165 | + def ctrl(): |
| 166 | + for i in range(100): |
| 167 | + pass |
| 168 | + |
| 169 | + awaddr = 0 |
| 170 | + v = (matrix_size & 0xff) |
| 171 | + print('# matrix_size[7:0] = %d' % v) |
| 172 | + memory.write(awaddr, v, port=1) |
| 173 | + |
| 174 | + awaddr = 1 |
| 175 | + v = ((matrix_size >> 8) & 0xff) |
| 176 | + print('# matrix_size[15:8] = %d' % v) |
| 177 | + memory.write(awaddr, v, port=1) |
| 178 | + |
| 179 | + awaddr = 4 |
| 180 | + v = (a_offset & 0xff) |
| 181 | + print('# a_offset[7:0] = %d' % v) |
| 182 | + memory.write(awaddr, v, port=1) |
| 183 | + |
| 184 | + awaddr = 5 |
| 185 | + v = ((a_offset >> 8) & 0xff) |
| 186 | + print('# a_offset[15:8] = %d' % v) |
| 187 | + memory.write(awaddr, v, port=1) |
| 188 | + |
| 189 | + awaddr = 8 |
| 190 | + v = (b_offset & 0xff) |
| 191 | + print('# b_offset[7:0] = %d' % v) |
| 192 | + memory.write(awaddr, v, port=1) |
| 193 | + |
| 194 | + awaddr = 9 |
| 195 | + v = ((b_offset >> 8) & 0xff) |
| 196 | + print('# b_offset[15:8] = %d' % v) |
| 197 | + memory.write(awaddr, v, port=1) |
| 198 | + |
| 199 | + awaddr = 12 |
| 200 | + v = (c_offset & 0xff) |
| 201 | + print('# c_offset[7:0] = %d' % v) |
| 202 | + memory.write(awaddr, v, port=1) |
| 203 | + |
| 204 | + awaddr = 13 |
| 205 | + v = ((c_offset >> 8) & 0xff) |
| 206 | + print('# c_offset[15:8] = %d' % v) |
| 207 | + memory.write(awaddr, v, port=1) |
| 208 | + |
| 209 | + start_time = counter |
| 210 | + print('# start time = %d' % start_time) |
| 211 | + start.value = 1 |
| 212 | + |
| 213 | + for _ in range(10): |
| 214 | + pass |
| 215 | + |
| 216 | + start.value = 0 |
| 217 | + |
| 218 | + while True: |
| 219 | + if not busy: |
| 220 | + break |
| 221 | + |
| 222 | + end_time = counter |
| 223 | + print('# end time = %d' % end_time) |
| 224 | + time = end_time - start_time |
| 225 | + print('# exec time = %d' % time) |
| 226 | + |
| 227 | + all_ok = True |
| 228 | + for y in range(matrix_size): |
| 229 | + for x in range(matrix_size): |
| 230 | + v = memory.read( |
| 231 | + c_offset + (y * matrix_size + x) * datawidth // 8, port=1) |
| 232 | + if y == x and vthread.verilog.NotEql(v, (y + 1) * 2): |
| 233 | + all_ok = False |
| 234 | + print("NG [%d,%d] = %d" % (y, x, v)) |
| 235 | + if y != x and vthread.verilog.NotEql(v, 0): |
| 236 | + all_ok = False |
| 237 | + print("NG [%d,%d] = %d" % (y, x, v)) |
| 238 | + |
| 239 | + if all_ok: |
| 240 | + print('# verify: PASSED') |
| 241 | + else: |
| 242 | + print('# verify: FAILED') |
| 243 | + |
| 244 | + vthread.finish() |
| 245 | + |
| 246 | + th = vthread.Thread(m, 'th_ctrl', clk, rst, ctrl) |
| 247 | + fsm = th.start() |
| 248 | + |
| 249 | + uut = m.Instance(led, 'uut', |
| 250 | + params=m.connect_params(led), |
| 251 | + ports=m.connect_ports(led)) |
| 252 | + |
| 253 | + # vcd_name = os.path.splitext(os.path.basename(__file__))[0] + '.vcd' |
| 254 | + # simulation.setup_waveform(m, uut, dumpfile=vcd_name) |
| 255 | + simulation.setup_clock(m, clk, hperiod=5) |
| 256 | + init = simulation.setup_reset(m, rst, m.make_reset(), period=100) |
| 257 | + |
| 258 | + init.add( |
| 259 | + Delay(1000000), |
| 260 | + Systask('finish'), |
| 261 | + ) |
| 262 | + |
| 263 | + # return m |
| 264 | + |
| 265 | + # for VCD dump |
| 266 | + top = Module('top') |
| 267 | + uut = Submodule(top, m, name='test') |
| 268 | + |
| 269 | + vcd_name = os.path.splitext(os.path.basename(__file__))[0] + '.vcd' |
| 270 | + simulation.setup_waveform(top, uut, dumpfile=vcd_name) |
| 271 | + |
| 272 | + return top |
| 273 | + |
| 274 | + |
| 275 | +def run(filename='tmp.v', simtype='iverilog', outputfile=None): |
| 276 | + |
| 277 | + if outputfile is None: |
| 278 | + outputfile = os.path.splitext(os.path.basename(__file__))[0] + '.out' |
| 279 | + |
| 280 | + memimg_name = 'memimg_' + outputfile |
| 281 | + |
| 282 | + test = mkTest(memimg_name=memimg_name) |
| 283 | + |
| 284 | + if filename is not None: |
| 285 | + test.to_verilog(filename) |
| 286 | + |
| 287 | + sim = simulation.Simulator(test, sim=simtype) |
| 288 | + rslt = sim.run(outputfile=outputfile) |
| 289 | + lines = rslt.splitlines() |
| 290 | + if simtype == 'verilator' and lines[-1].startswith('-'): |
| 291 | + rslt = '\n'.join(lines[:-1]) |
| 292 | + return rslt |
| 293 | + |
| 294 | + |
| 295 | +if __name__ == '__main__': |
| 296 | + rslt = run(filename='tmp.v') |
| 297 | + print(rslt) |
0 commit comments