-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit f0f1651
Showing
74 changed files
with
123,155 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
Copyright (c) 2015, SAFARI Research Group at Carnegie Mellon University | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a | ||
copy of this software and associated documentation files (the | ||
"Software"), to deal in the Software without restriction, including | ||
without limitation the rights to use, copy, modify, merge, publish, | ||
distribute, sublicense, and/or sell copies of the Software, and to | ||
permit persons to whom the Software is furnished to do so, subject to | ||
the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included | ||
in all copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | ||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | ||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | ||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
SYSTEMC_HOME = /usr/local/systemc-2.3.1 | ||
SYSTEMC_INC_DIR = $(SYSTEMC_HOME)/include | ||
SYSTEMC_LIB_DIR = $(SYSTEMC_HOME)/lib/x86_64-linux-gnu | ||
|
||
SRCDIR := src | ||
OBJDIR := obj | ||
MAIN := $(SRCDIR)/Main.cpp | ||
SRCS := $(filter-out $(MAIN) $(SRCDIR)/Gem5Wrapper.cpp, $(wildcard $(SRCDIR)/*.cpp)) | ||
OBJS := $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.o, $(SRCS)) | ||
|
||
|
||
# Ramulator currently supports g++ 5.1+ or clang++ 3.4+. It will NOT work with | ||
# g++ 4.x due to an internal compiler error when processing lambda functions. | ||
#CXX := clang++ | ||
CXX := g++-5 | ||
CFLAGS = -std=c++11 -g -Wall -pedantic -Wno-long-long \ | ||
-DSC_INCLUDE_DYNAMIC_PROCESSES -fpermissive \ | ||
-I$(SYSTEMC_INC_DIR) | ||
|
||
LDFLAGS =-L$(SYSTEMC_LIB_DIR) -lsystemc -lm | ||
|
||
#CXXFLAGS := -O3 -std=c++11 -g -Wall | ||
#CXXFLAGS := --std=c++11 -g -Wall | ||
.PHONY: all clean depend | ||
|
||
all: depend ramulator | ||
|
||
clean: | ||
rm -f ramulator | ||
rm -rf $(OBJDIR) | ||
|
||
depend: $(OBJDIR)/.depend | ||
|
||
exe: | ||
./ramulator configs/DDR3-config.cfg --mode=acc dram.trace | ||
|
||
$(OBJDIR)/.depend: $(SRCS) | ||
@mkdir -p $(OBJDIR) | ||
@rm -f $(OBJDIR)/.depend | ||
@$(foreach SRC, $(SRCS), $(CXX) $(CFLAGS) -DRAMULATOR -MM -MT $(patsubst $(SRCDIR)/%.cpp, $(OBJDIR)/%.o, $(SRC)) $(SRC) >> $(OBJDIR)/.depend ;) | ||
|
||
ifneq ($(MAKECMDGOALS),clean) | ||
-include $(OBJDIR)/.depend | ||
endif | ||
|
||
|
||
ramulator: $(MAIN) $(OBJS) $(SRCDIR)/*.h | depend | ||
$(CXX) $(CFLAGS) -DRAMULATOR -o $@ $(MAIN) $(OBJS) $(LDFLAGS) | ||
|
||
libramulator.a: $(OBJS) $(OBJDIR)/Gem5Wrapper.o | ||
libtool -static -o $@ $(OBJS) $(OBJDIR)/Gem5Wrapper.o | ||
|
||
$(OBJS): | $(OBJDIR) | ||
|
||
$(OBJDIR): | ||
@mkdir -p $@ | ||
|
||
$(OBJDIR)/%.o: $(SRCDIR)/%.cpp | ||
$(CXX) $(CFLAGS) -DRAMULATOR -c -o $@ $< |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
# An example of using Ramulator as memory model in a cycle-accurate SystemC Design | ||
SystemC is convenient for developing cycle-accurate hardware accelerators. However, many | ||
hardware accelerators require delicate memory access for the sake of | ||
performance while there are few open source cycle-accurate DDR models immediately | ||
available for SystemC. Myoungsoo developed a SystemC interface on top of DRAMSim2 | ||
which has limited DDR models supported. Ramulator is a fast and cycle-accurate | ||
DRAM simulator \[1\] that supports a wide array of commercial, as well as | ||
academic, DRAM standards: | ||
|
||
- DDR3 (2007), DDR4 (2012) | ||
- LPDDR3 (2012), LPDDR4 (2014) | ||
- GDDR5 (2009) | ||
- WIO (2011), WIO2 (2014) | ||
- HBM (2013) | ||
- SALP \[2\] | ||
- TL-DRAM \[3\] | ||
- RowClone \[4\] | ||
- DSARP \[5\] | ||
|
||
[\[1\] Kim et al. *Ramulator: A Fast and Extensible DRAM Simulator.* IEEE CAL | ||
2015.](https://users.ece.cmu.edu/~omutlu/pub/ramulator_dram_simulator-ieee-cal15.pdf) | ||
[\[2\] Kim et al. *A Case for Exploiting Subarray-Level Parallelism (SALP) in | ||
DRAM.* ISCA 2012.](https://users.ece.cmu.edu/~omutlu/pub/salp-dram_isca12.pdf) | ||
[\[3\] Lee et al. *Tiered-Latency DRAM: A Low Latency and Low Cost DRAM | ||
Architecture.* HPCA 2013.](https://users.ece.cmu.edu/~omutlu/pub/tldram_hpca13.pdf) | ||
[\[4\] Seshadri et al. *RowClone: Fast and Energy-Efficient In-DRAM Bulk Data | ||
Copy and Initialization.* MICRO | ||
2013.](https://users.ece.cmu.edu/~omutlu/pub/rowclone_micro13.pdf) | ||
[\[5\] Chang et al. *Improving DRAM Performance by Parallelizing Refreshes with | ||
Accesses.* HPCA 2014.](https://users.ece.cmu.edu/~omutlu/pub/dram-access-refresh-parallelization_hpca14.pdf) | ||
[\[6\] Myoungsoo Jung. *SCIC: A System C Interface Converter for DRAMSim.* 2011.] (https://github.com/LBNL-CODEX/DRAMSim_SystemC) | ||
|
||
Thus we use it for the cycle-accurate memory model in SystemC design. | ||
In order to integrate ramulator for SystemC based hardware accelerator design, | ||
we mainly solved the following problems in this project. | ||
|
||
1) Both Ramulator and SystemC have its own timing management, we basically pack the ramulator | ||
as a SystemC thread and have it synchronized to the SystemC design. | ||
|
||
2) Ramulator provides only latency information but no memory content management | ||
which is needed in many accelerator design. In this project, we keep the memory as | ||
a dynamic vector and maintain the memory content based on sequential memory consistency. | ||
|
||
3) Ramulator provides only basic memory request i.e. each memory request operates on | ||
a determined aligned length of data which is 64-byte in most cases. This is | ||
not convenient for hardware accelerator design which has diverse burst transmission. | ||
In this work, we developed a memory wrapper that provides arbitrary burst memory access. | ||
|
||
4) Finally, we developed a vector addition accelerator as an example. The users | ||
may work on top of it for your own design. | ||
|
||
Also note that we also change part of the Ramulator source code (mostly the Request.h). | ||
|
||
## Getting Started | ||
Ramulator requires a C++11 compiler (e.g., `clang++`, `g++-5`). | ||
You may refer to [Ramulator git repo](https://github.com/CMU-SAFARI/ramulator) for more information about the details. In this project, we use [SystemC-2.3.1](http://accellera.org/downloads/standards/systemc) as the library. You need to download and compile it first. Then | ||
you may change the SystemC library path accordingly in the Makefile. | ||
|
||
$ cd ramulator | ||
$ make | ||
$ make exe | ||
|
||
## Simulation Output | ||
To be added. | ||
|
||
### Contributors | ||
- Cheng Liu (National University of Singapore) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
######################## | ||
# Example config file | ||
# Comments start with # | ||
# There are restrictions for valid channel/rank numbers | ||
standard = ALDRAM | ||
channels = 1 | ||
ranks = 1 | ||
speed = ALDRAM_1600K | ||
org = ALDRAM_4Gb_x8 | ||
# record_cmd_trace: (default is off): on, off | ||
record_cmd_trace = off | ||
# print_cmd_trace: (default is off): on, off | ||
print_cmd_trace = off | ||
|
||
### Below are parameters only for CPU trace | ||
cpu_tick = 4 | ||
mem_tick = 1 | ||
### Below are parameters only for multicore mode | ||
# When early_exit is on, all cores will be terminated when the earliest one finishes. | ||
early_exit = on | ||
# early_exit = on, off (default value is on) | ||
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit. | ||
expected_limit_insts = 200000000 | ||
cache = no | ||
# cache = no, L1L2, L3, all (default value is no) | ||
translation = None | ||
# translation = None, Random (default value is None) | ||
# | ||
######################## |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
######################## | ||
# Example config file | ||
# Comments start with # | ||
# There are restrictions for valid channel/rank numbers | ||
standard = DDR3 | ||
channels = 1 | ||
ranks = 1 | ||
speed = DDR3_1600K | ||
org = DDR3_2Gb_x8 | ||
# record_cmd_trace: (default is off): on, off | ||
record_cmd_trace = off | ||
# print_cmd_trace: (default is off): on, off | ||
print_cmd_trace = off | ||
|
||
### Below are parameters only for CPU trace | ||
cpu_tick = 4 | ||
mem_tick = 1 | ||
### Below are parameters only for multicore mode | ||
# When early_exit is on, all cores will be terminated when the earliest one finishes. | ||
early_exit = on | ||
# early_exit = on, off (default value is on) | ||
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit. | ||
expected_limit_insts = 200000000 | ||
cache = no | ||
# cache = no, L1L2, L3, all (default value is no) | ||
translation = None | ||
# translation = None, Random (default value is None) | ||
# | ||
######################## |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
######################## | ||
# Example config file | ||
# Comments start with # | ||
# There are restrictions for valid channel/rank numbers | ||
standard = DDR4 | ||
channels = 1 | ||
ranks = 1 | ||
speed = DDR4_2400R | ||
org = DDR4_4Gb_x8 | ||
# record_cmd_trace: (default is off): on, off | ||
record_cmd_trace = off | ||
# print_cmd_trace: (default is off): on, off | ||
print_cmd_trace = off | ||
|
||
### Below are parameters only for CPU trace | ||
cpu_tick = 8 | ||
mem_tick = 3 | ||
### Below are parameters only for multicore mode | ||
# When early_exit is on, all cores will be terminated when the earliest one finishes. | ||
early_exit = on | ||
# early_exit = on, off (default value is on) | ||
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit. | ||
expected_limit_insts = 200000000 | ||
cache = no | ||
# cache = no, L1L2, L3, all (default value is no) | ||
translation = None | ||
# translation = None, Random (default value is None) | ||
# | ||
######################## |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
######################## | ||
# Example config file | ||
# Comments start with # | ||
# There are restrictions for valid channel/rank numbers | ||
standard = DSARP | ||
subarrays = 8 | ||
channels = 1 | ||
ranks = 1 | ||
speed = DSARP_1333 | ||
org = DSARP_8Gb_x8 | ||
# record_cmd_trace: (default is off): on, off | ||
record_cmd_trace = off | ||
# print_cmd_trace: (default is off): on, off | ||
print_cmd_trace = off | ||
|
||
### Below are parameters only for CPU trace | ||
cpu_tick = 4 | ||
mem_tick = 1 | ||
### Below are parameters only for multicore mode | ||
# When early_exit is on, all cores will be terminated when the earliest one finishes. | ||
early_exit = on | ||
# early_exit = on, off (default value is on) | ||
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit. | ||
expected_limit_insts = 200000000 | ||
cache = no | ||
# cache = no, L1L2, L3, all (default value is no) | ||
translation = None | ||
# translation = None, Random (default value is None) | ||
# | ||
######################## |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
######################## | ||
# Example config file | ||
# Comments start with # | ||
# There are restrictions for valid channel/rank numbers | ||
standard = GDDR5 | ||
channels = 1 | ||
ranks = 1 | ||
speed = GDDR5_6000 | ||
org = GDDR5_8Gb_x16 | ||
# record_cmd_trace: (default is off): on, off | ||
record_cmd_trace = off | ||
# print_cmd_trace: (default is off): on, off | ||
print_cmd_trace = off | ||
|
||
### Below are parameters only for CPU trace | ||
cpu_tick = 2 | ||
mem_tick = 1 | ||
### Below are parameters only for multicore mode | ||
# When early_exit is on, all cores will be terminated when the earliest one finishes. | ||
early_exit = on | ||
# early_exit = on, off (default value is on) | ||
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit. | ||
expected_limit_insts = 200000000 | ||
cache = no | ||
# cache = no, L1L2, L3, all (default value is no) | ||
translation = None | ||
# translation = None, Random (default value is None) | ||
# | ||
######################## |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
######################## | ||
# Example config file | ||
# Comments start with # | ||
# There are restrictions for valid channel/rank numbers | ||
standard = HBM | ||
channels = 8 | ||
ranks = 1 | ||
speed = HBM_1Gbps | ||
org = HBM_4Gb | ||
# record_cmd_trace: (default is off): on, off | ||
record_cmd_trace = off | ||
# print_cmd_trace: (default is off): on, off | ||
print_cmd_trace = off | ||
|
||
### Below are parameters only for CPU trace | ||
cpu_tick = 32 | ||
mem_tick = 5 | ||
### Below are parameters only for multicore mode | ||
# When early_exit is on, all cores will be terminated when the earliest one finishes. | ||
early_exit = on | ||
# early_exit = on, off (default value is on) | ||
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit. | ||
expected_limit_insts = 200000000 | ||
cache = no | ||
# cache = no, L1L2, L3, all (default value is no) | ||
translation = None | ||
# translation = None, Random (default value is None) | ||
# | ||
######################## |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
######################## | ||
# Example config file | ||
# Comments start with # | ||
# There are restrictions for valid channel/rank numbers | ||
standard = LPDDR3 | ||
channels = 1 | ||
ranks = 1 | ||
speed = LPDDR3_1600 | ||
org = LPDDR3_8Gb_x16 | ||
# record_cmd_trace: (default is off): on, off | ||
record_cmd_trace = off | ||
# print_cmd_trace: (default is off): on, off | ||
print_cmd_trace = off | ||
|
||
### Below are parameters only for CPU trace | ||
cpu_tick = 4 | ||
mem_tick = 1 | ||
### Below are parameters only for multicore mode | ||
# When early_exit is on, all cores will be terminated when the earliest one finishes. | ||
early_exit = on | ||
# early_exit = on, off (default value is on) | ||
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit. | ||
expected_limit_insts = 200000000 | ||
cache = no | ||
# cache = no, L1L2, L3, all (default value is no) | ||
translation = None | ||
# translation = None, Random (default value is None) | ||
# | ||
######################## |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
######################## | ||
# Example config file | ||
# Comments start with # | ||
# There are restrictions for valid channel/rank numbers | ||
standard = LPDDR4 | ||
channels = 2 | ||
ranks = 1 | ||
speed = LPDDR4_2400 | ||
org = LPDDR4_8Gb_x16 | ||
# record_cmd_trace: (default is off): on, off | ||
record_cmd_trace = off | ||
# print_cmd_trace: (default is off): on, off | ||
print_cmd_trace = off | ||
|
||
### Below are parameters only for CPU trace | ||
cpu_tick = 8 | ||
mem_tick = 3 | ||
### Below are parameters only for multicore mode | ||
# When early_exit is on, all cores will be terminated when the earliest one finishes. | ||
early_exit = on | ||
# early_exit = on, off (default value is on) | ||
# If expected_limit_insts is set, some per-core statistics will be recorded when this limit (or the end of the whole trace if it's shorter than specified limit) is reached. The simulation won't stop and will roll back automatically until the last one reaches the limit. | ||
expected_limit_insts = 200000000 | ||
cache = no | ||
# cache = no, L1L2, L3, all (default value is no) | ||
translation = None | ||
# translation = None, Random (default value is None) | ||
# | ||
######################## |
Oops, something went wrong.