-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathSRAM_controller.sv
More file actions
235 lines (177 loc) · 6.27 KB
/
SRAM_controller.sv
File metadata and controls
235 lines (177 loc) · 6.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
/*
SRAM_controller.sv
Finn Sinclair 2018
Pipelined, dual-clock SRAM memory controller for high-speed applications.
This is useful when you have an SRAM module clocked at a significantly higher
clock speed than your main FPGA fabric. This allows for multiple low-speed modules
to put memory requests (both reading and writing) into the FIFO ports of the memory
controller, and then be notified when the information is available from that port.
Each port is both read and write. The nature of the operation is stored in the FIFO
next to the address bits.
This SRAM controller allows the system to take advantage of the faster memory clock.
If the SRAM clock is twice the speed of the board clock, two board-clocked modules
can perform memory operations effectively at the same time, as the SRAM controller
can fulfill their requests twice as fast as they can give them.
Customer modules should watch the appropriate active-high DataReady signal.
*/
module SRAM_controller #(parameter numInputPorts = 4)(
inout wire[15:0] SRAM_DQ,
output logic[19:0] SRAM_ADDR,
output logic SRAM_UB_N, SRAM_LB_N, SRAM_CE_N, SRAM_OE_N, SRAM_WE_N = 1,
input logic[15:0] DataToSRAM[numInputPorts],
input logic[19:0] AddressToSRAM[numInputPorts],
input logic [numInputPorts-1:0] QueueReadReq, QueueWriteReq,
output logic [numInputPorts-1:0] DataReady = -1,
output logic [15:0] DataFromSRAM[numInputPorts],
input logic SRAM_CLK, //100 MHz SRAM clock, allow for "double data rate"
input logic BOARD_CLK //50 MHz FPGA clock, for incoming requests
);
logic [numInputPorts-1:0]FIFOread = 0; // Internal signal for memory fetch loop to read from FIFOs
logic [numInputPorts-1:0]FIFOempty; // Does each FIFO have some data for us?
logic[15:0] FIFOdata[numInputPorts];
logic[21:0] FIFOaddr[numInputPorts];
logic[7:0] lastRoundRobin = 8'd0;
logic[7:0] roundRobin = 8'd1; // Round robin counter
logic[7:0] nextRoundRobin = 8'd2;
logic override = 0; // High-priority port 0 override
logic nextOverride = 0;
logic[7:0] roundRobinCache = 8'd0; // Cache current RR index when servicing high-priority port
logic[15:0] DQ_buffer; // tri-state buffer
logic lastOpRead = 0;
logic controllerIdle = 1;
logic controllerIdleNext;
assign SRAM_CE_N = 0; // Always chip-enable
assign SRAM_UB_N = 0; // Always writing both bytes
assign SRAM_LB_N = 0;
assign SRAM_OE_N = 0; // Hmmm...
genvar i;
generate
for(i = 0; i < numInputPorts; i++) begin: DATA_FIFO_generate
SRAM_FIFO data_inputPort(
.rdclk(SRAM_CLK),
.wrclk(BOARD_CLK),
.data({16'b0, DataToSRAM[i]}),
.wrreq(QueueReadReq[i] || QueueWriteReq[i]),
.rdreq(FIFOread[i]),
.rdempty(FIFOempty[i]),
.q(FIFOdata[i])
);
end
// Encodes whether the request is a read or write in an extra two bits.
for(i = 0; i < numInputPorts; i++) begin: ADDR_FIFO_generate
SRAM_FIFO addr_inputPort(
.rdclk(SRAM_CLK),
.wrclk(BOARD_CLK),
.data({ 18'b0, QueueReadReq[i], QueueWriteReq[i], AddressToSRAM[i]}),
.wrreq(QueueReadReq[i] || QueueWriteReq[i]),
.rdreq(FIFOread[i]),
.q(FIFOaddr[i])
);
end
endgenerate
assign SRAM_DQ = ~SRAM_WE_N ? DQ_buffer : 16'bz;
always_ff @(posedge SRAM_CLK) begin: mainblock
// If some customer has queued a read or write, assert their dataReady signal low.
for(integer i = 0; i < numInputPorts; i++) begin: readLoop
if((QueueReadReq[i]) && DataReady[i] == 1) begin
DataReady[i] <= 0;
end
end
//At clock edge, read data from SRAM if last operation was a read
if(lastOpRead && ~controllerIdle) begin
DataReady[lastRoundRobin] <= 1;
DataFromSRAM[lastRoundRobin] <= SRAM_DQ; // Latch in output
end
SRAM_ADDR <= FIFOaddr[roundRobin][19:0]; // Assert 20-bit address on SRAM, always
if(FIFOaddr[roundRobin][21] == 1 && ~controllerIdle) begin // It's a read request
lastOpRead <= 1; // So we fetch data next clock
SRAM_WE_N <= 1;
end
else begin
if(~controllerIdle) begin
lastOpRead <= 0; // This op was not a read, so don't read next clock
DQ_buffer <= FIFOdata[roundRobin][15:0]; // Assert FIFO data on SRAM bus
SRAM_WE_N <= 0; // Bring WE low to write
end
end
FIFOread[roundRobin] <= 0;
lastRoundRobin <= roundRobin;
roundRobin <= nextRoundRobin;
controllerIdle <= controllerIdleNext;
override <= nextOverride;
if(controllerIdleNext == 0) begin
// nextRoundRobin should indicate valid FIFO
FIFOread[nextRoundRobin] <= 1;
end
else begin
//SRAM_OE_N <= 1;
SRAM_WE_N <= 1;
end
end
// Intelligent round-robin queueing. Cascades to the next non-empty FIFO
// if the immediately adjacent FIFO is empty.
always_comb begin
for(int i = 1; i < numInputPorts+2; i++) begin
// Memory port 0 is special; it's a high priority port
// that overrides all other ports in the round-robin scheduler
// If memory port 0 requests a transaction, the scheduler will service
// that transaction before all others.
//
// We use a separate "nextOverride" variable, as resetting the round robin
// counter back to zero would make the scheduler fundamentally unfair,
// giving more time to earlier-indexed ports.
/*
if(~FIFOempty[0] == -1) begin
nextOverride = 0;
nextRoundRobin = 0;
controllerIdleNext = 0;
break;
end
*/
// Wraparound case, no waiting ports, we idle
if(i == numInputPorts+1) begin
controllerIdleNext = 1;
nextOverride = 0;
nextRoundRobin = roundRobin;
break;
end
// Standard case. Not sure how expensive modulo is, it seems
// to synthesize a LPM_divide module which seems... odd.
if(~FIFOempty[(roundRobin+i)%numInputPorts] == -1) begin
nextOverride = 0;
nextRoundRobin = (roundRobin+i)%numInputPorts;
controllerIdleNext = 0;
break;
end
end
/*
if(~FIFOempty[roundRobin+2'b01] == -1) begin
nextRoundRobin = roundRobin + 2'd1;
controllerIdleNext = 0;
end
else begin
if(~FIFOempty[roundRobin+2'b10] == -1) begin
nextRoundRobin = roundRobin + 2'd2;
controllerIdleNext = 0;
end
else begin
if(~FIFOempty[roundRobin+2'b11] == -1) begin
nextRoundRobin = roundRobin + 2'd3;
controllerIdleNext = 0;
end
else begin
if(~FIFOempty[roundRobin] == -1) begin
nextRoundRobin = roundRobin;
controllerIdleNext = 0;
end
else begin
controllerIdleNext = 1;
nextRoundRobin = roundRobin;
// All next ports are empty, just idle
end
end
end
end
*/
end
endmodule