From dfe213698934a55f6d5142ff833afe83477b1dc2 Mon Sep 17 00:00:00 2001 From: "Zhenpeng (Jeffrey) Zuo" Date: Thu, 23 Aug 2018 00:24:18 -0700 Subject: [PATCH] Nv large vp --- README.md | 5 +- cl_nvdla/build/constraints/cl_pnr_user.xdc | 24 ++----- cl_nvdla/build/scripts/README.md | 8 ++- cl_nvdla/build/scripts/create_dcp_from_cl.tcl | 63 +++++++++++-------- cl_nvdla/build/scripts/filelist.sh | 48 +++++++++----- cl_nvdla/build/scripts/synth_cl_nvdla.tcl | 6 +- cl_nvdla/design/cl_dma_pcis_slv.sv | 16 +++-- cl_nvdla/design/cl_nvdla.sv | 13 +++- cl_nvdla/design/cl_nvdla_defines.vh | 22 +++++++ cl_nvdla/design/cl_nvdla_wrap.sv | 25 +++++--- cl_nvdla/verif/scripts/top.vcs.f | 25 +++++++- common/software/include/cl_common_utils.h | 15 ++++- common/software/src/cosim_top.cpp | 26 ++++---- .../trace_player/parser/nvdla_trace_parser.py | 19 +++++- common/trace_player/src/trace_player.c | 2 +- 15 files changed, 218 insertions(+), 99 deletions(-) diff --git a/README.md b/README.md index 1276c9d..a0e3f1f 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,9 @@ You can find the latest NVDLA Virtual Platform documentation for AWS FPGA [here] ## NVIDIA Sample AFI -agfi-0373fee35268723ca +nv_small: agfi-0373fee35268723ca +nv_large: agfi-0f541fc533cba53ef ## EC2 FPGA Hardware and Software Development Kits version -be3d41cb1b6b5db6bc24b577ab1af8d2eb4de102 \ No newline at end of file +v1.4.0 diff --git a/cl_nvdla/build/constraints/cl_pnr_user.xdc b/cl_nvdla/build/constraints/cl_pnr_user.xdc index 12a9812..455ae3b 100644 --- a/cl_nvdla/build/constraints/cl_pnr_user.xdc +++ b/cl_nvdla/build/constraints/cl_pnr_user.xdc @@ -1,26 +1,10 @@ # This contains the CL specific constraints for Top level PNR -create_pblock pblock_CL_top -resize_pblock [get_pblocks pblock_CL_top] -add {CLOCKREGION_X0Y10:CLOCKREGION_X5Y14} -set_property PARENT pblock_CL [get_pblocks pblock_CL_top] +# False path between vled on CL clock and Shell asynchronous clock +set_false_path -from [get_cells WRAPPER_INST/CL/vled_q_reg*] -create_pblock pblock_CL_mid -add_cells_to_pblock [get_pblocks pblock_CL_mid] [get_cells -quiet -hierarchical -filter {NAME =~ CL/CL_DMA_PCIS_SLV/AXI_CROSSBAR}] -add_cells_to_pblock [get_pblocks pblock_CL_mid] [get_cells -quiet -hierarchical -filter {NAME =~ CL/CL_PCIM_MSTR/CL_TST_PCI}] -clear_locs -add_cells_to_pblock [get_pblocks pblock_CL_mid] [get_cells [list CL/CL_PCIM_MSTR/PCI_AXI4_REG_SLC]] -resize_pblock [get_pblocks pblock_CL_mid] -add {CLOCKREGION_X0Y5:CLOCKREGION_X3Y9} -set_property PARENT pblock_CL [get_pblocks pblock_CL_mid] - -create_pblock pblock_CL_bot -add_cells_to_pblock [get_pblocks pblock_CL_bot] [get_cells [list CL/CL_DMA_PCIS_SLV/PCI_AXL_REG_SLC CL/CL_OCL_SLV/AXIL_OCL_REG_SLC CL/CL_SDA_SLV/AXIL_SDA_REG_SLC]] -add_cells_to_pblock [get_pblocks pblock_CL_bot] [get_cells -hierarchical -filter { NAME =~ "*CL/CL_OCL_SLV/slv_tst_wdata_reg[*][*]*" && PRIMITIVE_TYPE =~ REGISTER.*.* }] -#Reassign select cells to parent Pblock for better QoR -add_cells_to_pblock [get_pblocks pblock_CL] $pblock_cells -resize_pblock [get_pblocks pblock_CL_bot] -add {CLOCKREGION_X0Y0:CLOCKREGION_X3Y4} -set_property PARENT pblock_CL [get_pblocks pblock_CL_bot] - -set_clock_groups -name TIG_SRAI_1 -asynchronous -group [get_clocks -of_objects [get_pins static_sh/SH_DEBUG_BRIDGE/inst/bsip/inst/USE_SOFTBSCAN.U_TAP_TCKBUFG/O]] -group [get_clocks -of_objects [get_pins SH/kernel_clks_i/clkwiz_sys_clk/inst/CLK_CORE_DRP_I/clk_inst/mmcme3_adv_inst/CLKOUT0]] +# False paths between main clock and tck +set_clock_groups -name TIG_SRAI_1 -asynchronous -group [get_clocks -of_objects [get_pins static_sh/SH_DEBUG_BRIDGE/inst/bsip/inst/USE_SOFTBSCAN.U_TAP_TCKBUFG/O]] -group [get_clocks -of_objects [get_pins WRAPPER_INST/SH/kernel_clks_i/clkwiz_sys_clk/inst/CLK_CORE_DRP_I/clk_inst/mmcme3_adv_inst/CLKOUT0]] set_clock_groups -name TIG_SRAI_2 -asynchronous -group [get_clocks -of_objects [get_pins static_sh/SH_DEBUG_BRIDGE/inst/bsip/inst/USE_SOFTBSCAN.U_TAP_TCKBUFG/O]] -group [get_clocks drck] set_clock_groups -name TIG_SRAI_3 -asynchronous -group [get_clocks -of_objects [get_pins static_sh/SH_DEBUG_BRIDGE/inst/bsip/inst/USE_SOFTBSCAN.U_TAP_TCKBUFG/O]] -group [get_clocks -of_objects [get_pins static_sh/pcie_inst/inst/gt_top_i/diablo_gt.diablo_gt_phy_wrapper/phy_clk_i/bufg_gt_userclk/O]] - diff --git a/cl_nvdla/build/scripts/README.md b/cl_nvdla/build/scripts/README.md index ba52bce..3920f00 100644 --- a/cl_nvdla/build/scripts/README.md +++ b/cl_nvdla/build/scripts/README.md @@ -6,8 +6,10 @@ > git clone https://github.com/aws/aws-fpga.git 3. Souce aws-fpga hdk environment: > source aws-fpga/hdk_setup.sh -4. generate nvdla file list: - > ./filelist.sh -4. Command line to build nvdla in aws fpga: +4. Generate nvdla file list: + > ./filelist.sh [nv_large | nv_medium_1024_full | nv_medium_512 | nv_small_256_full | nv_small_256 | nv_small] +5. Change the verilog define in file synth_cl_nvdla.tcl for different configure + > -verilog_define NV_LARGE | NV_MEDIUM_1024_FULL | NV_MEDIUM_512 | NV_SMALL_256_FULL | NV_SMALL_256 | NV_SMALL +6. Command line to build nvdla in aws fpga: > $HDK_DIR/common/shell_stable/build/scripts/aws_build_dcp_from_cl.sh -foreground -clock_recipe_a A2 diff --git a/cl_nvdla/build/scripts/create_dcp_from_cl.tcl b/cl_nvdla/build/scripts/create_dcp_from_cl.tcl index b412255..242b969 100755 --- a/cl_nvdla/build/scripts/create_dcp_from_cl.tcl +++ b/cl_nvdla/build/scripts/create_dcp_from_cl.tcl @@ -13,16 +13,13 @@ # implied. See the License for the specific language governing permissions and # limitations under the License. -# Copyright (c) 2009-2017, NVIDIA CORPORATION. All rights reserved. -# NVIDIA’s contributions are offered under the Amazon Software License - package require tar ## Do not edit $TOP set TOP top_sp ## Replace with the name of your module -set CL_MODULE cl_nvdla +set CL_MODULE cl_nvdla ################################################# ## Command-line Arguments @@ -113,37 +110,28 @@ set_msg_config -string {AXI_QUAD_SPI} -suppress # may comment them out if they wish to see more information from warning # messages. set_msg_config -id {Common 17-55} -suppress -set_msg_config -id {Designutils 20-1567} -suppress +set_msg_config -id {Vivado 12-4739} -suppress +set_msg_config -id {Constraints 18-4866} -suppress set_msg_config -id {IP_Flow 19-2162} -suppress -set_msg_config -id {Project 1-498} -suppress set_msg_config -id {Route 35-328} -suppress +set_msg_config -id {Vivado 12-1008} -suppress set_msg_config -id {Vivado 12-508} -suppress -set_msg_config -id {Constraints 18-4866} -suppress set_msg_config -id {filemgmt 56-12} -suppress -set_msg_config -id {Constraints 18-4644} -suppress -set_msg_config -id {Coretcl 2-64} -suppress -set_msg_config -id {Vivado 12-4739} -suppress -set_msg_config -id {Vivado 12-5201} -suppress set_msg_config -id {DRC CKLD-1} -suppress +set_msg_config -id {DRC CKLD-2} -suppress set_msg_config -id {IP_Flow 19-2248} -suppress -set_msg_config -id {Opt 31-155} -suppress -set_msg_config -id {Synth 8-115} -suppress -set_msg_config -id {Synth 8-3936} -suppress -set_msg_config -id {Vivado 12-1023} -suppress +set_msg_config -id {Vivado 12-1580} -suppress set_msg_config -id {Constraints 18-550} -suppress set_msg_config -id {Synth 8-3295} -suppress set_msg_config -id {Synth 8-3321} -suppress set_msg_config -id {Synth 8-3331} -suppress set_msg_config -id {Synth 8-3332} -suppress +set_msg_config -id {Synth 8-6014} -suppress +set_msg_config -id {Timing 38-436} -suppress +set_msg_config -id {DRC REQP-1853} -suppress set_msg_config -id {Synth 8-350} -suppress set_msg_config -id {Synth 8-3848} -suppress set_msg_config -id {Synth 8-3917} -suppress -set_msg_config -id {Synth 8-6014} -suppress -set_msg_config -id {Vivado 12-1580} -suppress -set_msg_config -id {Constraints 18-619} -suppress -set_msg_config -id {DRC CKLD-2} -suppress -set_msg_config -id {DRC REQP-1853} -suppress -set_msg_config -id {Timing 38-436} -suppress puts "AWS FPGA: ([clock format [clock seconds] -format %T]) Calling the encrypt.tcl."; @@ -204,12 +192,16 @@ source $HDK_SHELL_DIR/build/scripts/step_user.tcl -notrace puts "AWS FPGA: ([clock format [clock seconds] -format %T]) Calling aws_gen_clk_constraints.tcl to generate clock constraints from developer's specified recipe."; source $HDK_SHELL_DIR/build/scripts/aws_gen_clk_constraints.tcl - +################################################################# +#### Do not remove this setting. Need to workaround bug in 2017.4 +################################################################# +set_param hd.clockRoutingWireReduction false ################################################## ### CL XPR OOC Synthesis ################################################## if {${cl.synth}} { source -notrace ./synth_${CL_MODULE}.tcl + set synth_dcp ${timestamp}.CL.post_synth.dcp } ################################################## @@ -227,7 +219,7 @@ if {$implement} { puts "\nAWS FPGA: ([clock format [clock seconds] -format %T]) - Combining Shell and CL design checkpoints"; add_files $HDK_SHELL_DIR/build/checkpoints/from_aws/SH_CL_BB_routed.dcp add_files $CL_DIR/build/checkpoints/${timestamp}.CL.post_synth.dcp - set_property SCOPED_TO_CELLS {CL} [get_files $CL_DIR/build/checkpoints/${timestamp}.CL.post_synth.dcp] + set_property SCOPED_TO_CELLS {WRAPPER_INST/CL} [get_files $CL_DIR/build/checkpoints/${timestamp}.CL.post_synth.dcp] #Read the constraints, note *DO NOT* read cl_clocks_aws (clocks originating from AWS shell) read_xdc [ list \ @@ -236,7 +228,7 @@ if {$implement} { set_property PROCESSING_ORDER late [get_files cl_pnr_user.xdc] puts "\nAWS FPGA: ([clock format [clock seconds] -format %T]) - Running link_design"; - link_design -top $TOP -part [DEVICE_TYPE] -reconfig_partitions {SH CL} + link_design -top $TOP -part [DEVICE_TYPE] -reconfig_partitions {WRAPPER_INST/SH WRAPPER_INST/CL} puts "\nAWS FPGA: ([clock format [clock seconds] -format %T]) - PLATFORM.IMPL==[get_property PLATFORM.IMPL [current_design]]"; ################################################## @@ -309,7 +301,11 @@ if {$implement} { # This is what will deliver to AWS puts "AWS FPGA: ([clock format [clock seconds] -format %T]) - Writing final DCP to to_aws directory."; - write_checkpoint -force $CL_DIR/build/checkpoints/to_aws/${timestamp}.SH_CL_routed.dcp + #writing unencrypted dcp for analysis to checkpoints dir. + write_checkpoint -force $CL_DIR/build/checkpoints/${timestamp}.SH_CL_routed.dcp + + #writing encrypted dcp which can be sent to AWS + write_checkpoint -encrypt -force $CL_DIR/build/checkpoints/to_aws/${timestamp}.SH_CL_routed.dcp # Generate debug probes file write_debug_probes -force -no_partial_ltxfile -file $CL_DIR/build/checkpoints/${timestamp}.debug_probes.ltx @@ -328,8 +324,18 @@ puts "AWS FPGA: ([clock format [clock seconds] -format %T]) - Compress files for # Create manifest file set manifest_file [open "$CL_DIR/build/checkpoints/to_aws/${timestamp}.manifest.txt" w] set hash [lindex [split [exec sha256sum $CL_DIR/build/checkpoints/to_aws/${timestamp}.SH_CL_routed.dcp] ] 0] - +set TOOL_VERSION $::env(VIVADO_TOOL_VERSION) +set vivado_version [version -short] +set ver_2017_4 2017.4 +puts "vivado_version is $vivado_version\n" + +if { [string first $ver_2017_4 $vivado_version] == 0 } { +puts $manifest_file "manifest_format_version=2\n" +puts "in 2017.4" +} else { puts $manifest_file "manifest_format_version=1\n" +puts "in 2017.1" +} puts $manifest_file "pci_vendor_id=$vendor_id\n" puts $manifest_file "pci_device_id=$device_id\n" puts $manifest_file "pci_subsystem_id=$subsystem_id\n" @@ -338,6 +344,9 @@ puts $manifest_file "dcp_hash=$hash\n" puts $manifest_file "shell_version=$shell_version\n" puts $manifest_file "dcp_file_name=${timestamp}.SH_CL_routed.dcp\n" puts $manifest_file "hdk_version=$hdk_version\n" +if { [string first $ver_2017_4 $vivado_version] == 0} { +puts $manifest_file "tool_version=v2017.4\n" +} puts $manifest_file "date=$timestamp\n" puts $manifest_file "clock_recipe_a=$clock_recipe_a\n" puts $manifest_file "clock_recipe_b=$clock_recipe_b\n" @@ -359,7 +368,7 @@ puts "AWS FPGA: ([clock format [clock seconds] -format %T]) - Finished creating if {[string compare $notify_via_sns "1"] == 0} { puts "AWS FPGA: ([clock format [clock seconds] -format %T]) - Calling notification script to send e-mail to $env(EMAIL)"; - exec $env(HDK_COMMON_DIR)/scripts/notify_via_sns.py + exec $env(AWS_FPGA_REPO_DIR)/shared/bin/scripts/notify_via_sns.py } puts "AWS FPGA: ([clock format [clock seconds] -format %T]) - Build complete."; diff --git a/cl_nvdla/build/scripts/filelist.sh b/cl_nvdla/build/scripts/filelist.sh index 7aba860..589a1eb 100755 --- a/cl_nvdla/build/scripts/filelist.sh +++ b/cl_nvdla/build/scripts/filelist.sh @@ -7,6 +7,8 @@ # ================================================================ rm -rf ./nvdla_file.tcl +rm -rf ../src_post_encryption +version=$1 SelecSource(){ local_folder=$1 @@ -29,11 +31,11 @@ sed '/'$local_file'/d' nvdla_file.tcl > nvdla_file.tcl_temp mv nvdla_file.tcl_temp nvdla_file.tcl } -ip_path=outdir/nv_small/spec/manual +ip_path=outdir/$version/spec/manual file_type=v SelecSource $ip_path $file_type -ip_path=outdir/nv_small/vmod/nvdla +ip_path=outdir/$version/vmod/nvdla file_type=v folder_list=`ls ${NV_HW_ROOT}/${ip_path}` echo ${folder_list} @@ -42,33 +44,49 @@ do SelecSource ${ip_path}/$folder $file_type done -ip_path=outdir/nv_small/vmod/include +ip_path=outdir/$version/vmod/include file_type=vh SelecSource ${ip_path} $file_type -ip_path=outdir/nv_small/vmod/vlibs +ip_path=outdir/$version/vmod/vlibs file_type=v SelecSource ${ip_path} $file_type -ip_path=outdir/nv_small/spec/defs +ip_path=outdir/$version/spec/defs file_type=vh SelecSource ${ip_path} $file_type -export PATH=.:$PATH -echo "dla_ramgen -m nv_ram_rwsp_8x65" >> ${NV_HW_ROOT}/vmod/rams/fpga/run_small_ram -echo "dla_ramgen -m nv_ram_rws_256x64" >> ${NV_HW_ROOT}/vmod/rams/fpga/run_small_ram -cur_path=`pwd` -cd ${NV_HW_ROOT}/vmod/rams/fpga -./run_small_ram -mkdir -p ${NV_HW_ROOT}/outdir/nv_small/vmod/rams/fpga/small_rams -mv ${NV_HW_ROOT}/vmod/rams/fpga/*.v ${NV_HW_ROOT}/outdir/nv_small/vmod/rams/fpga/small_rams -cd ${cur_path} +#export PATH=.:$PATH +#echo "dla_ramgen -m nv_ram_rwsp_8x65" >> ${NV_HW_ROOT}/vmod/rams/fpga/run_small_ram +#echo "dla_ramgen -m nv_ram_rws_256x64" >> ${NV_HW_ROOT}/vmod/rams/fpga/run_small_ram +#cur_path=`pwd` +#cd ${NV_HW_ROOT}/vmod/rams/fpga +#./run_small_ram +#mkdir -p ${NV_HW_ROOT}/outdir/$version/vmod/rams/fpga/small_rams +#mv ${NV_HW_ROOT}/vmod/rams/fpga/*.v ${NV_HW_ROOT}/outdir/$version/vmod/rams/fpga/small_rams +#cd ${cur_path} -ip_path=outdir/nv_small/vmod/rams/fpga/small_rams +ip_path=outdir/$version/vmod/rams/fpga/model file_type=v SelecSource ${ip_path} $file_type +ip_path=outdir/$version/vmod/fifos +file_type=v +SelecSource ${ip_path} $file_type + +if [ $version == "nv_large" ] +then +file_remove_list="NV_NVDLA_CDP_DP_bufferin_tp1.v NV_NVDLA_CVIF_WRITE_IG_arb.v" +elif [ $version == "nv_medium_1024_full" ] +then +file_remove_list="NV_NVDLA_CDP_DP_bufferin_tp1.v" +elif [ $version == "nv_medium_512" ] +then file_remove_list="NV_NVDLA_SDP_CORE_Y_lut.v NV_NVDLA_SDP_HLS_Y_cvt_top.v NV_NVDLA_SDP_HLS_Y_idx_top.v NV_NVDLA_SDP_HLS_Y_inp_top.v NV_NVDLA_SDP_HLS_Y_int_core.v" +else +file_remove_list="NV_NVDLA_SDP_CORE_Y_lut.v NV_NVDLA_SDP_HLS_Y_cvt_top.v NV_NVDLA_SDP_HLS_Y_idx_top.v NV_NVDLA_SDP_HLS_Y_inp_top.v NV_NVDLA_SDP_HLS_Y_int_core.v" +fi + for each_file in $file_remove_list do RemoveFile $each_file diff --git a/cl_nvdla/build/scripts/synth_cl_nvdla.tcl b/cl_nvdla/build/scripts/synth_cl_nvdla.tcl index 0e4fe3c..2292832 100644 --- a/cl_nvdla/build/scripts/synth_cl_nvdla.tcl +++ b/cl_nvdla/build/scripts/synth_cl_nvdla.tcl @@ -98,6 +98,9 @@ read_bd [ list \ #Read Xilinx interconnection IP read_ip [ list \ $CL_DIR/../common/design/xilinx_ip/axi_interconnect_nvdla_64b/axi_interconnect_nvdla_64b.xci \ + $CL_DIR/../common/design/xilinx_ip/axi_interconnect_nv_large/axi_interconnect_nv_large.xci \ + $CL_DIR/../common/design/xilinx_ip/axi_interconnect_nvdla_128b/axi_interconnect_nvdla_128b.xci \ + $CL_DIR/../common/design/xilinx_ip/axi_interconnect_nvdla_64b_cvsram/axi_interconnect_nvdla_64b_cvsram.xci \ $CL_DIR/../common/design/xilinx_ip/axi_apb_bridge_0/axi_apb_bridge_0.xci ] @@ -128,8 +131,9 @@ set_property PROCESSING_ORDER EARLY [get_files cl_clocks_aws.xdc] puts "AWS FPGA: ([clock format [clock seconds] -format %T]) Start design synthesis."; update_compile_order -fileset sources_1 +#-verilog_define NV_LARGE | NV_MEDIUM_1024_FULL | NV_MEDIUM_512 | NV_SMALL_256_FULL | NV_SMALL_256 | NV_SMALL puts "\nRunning synth_design for $CL_MODULE $CL_DIR/build/scripts \[[clock format [clock seconds] -format {%a %b %d %H:%M:%S %Y}]\]" -eval [concat synth_design -top $CL_MODULE -verilog_define XSDB_SLV_DIS -verilog_define FPGA -verilog_define SYNTHESIS -verilog_define DESIGNWARE_NOEXIST -verilog_define VLIB_BYPASS_POWER_CG -verilog_define NV_FPGA_SYSTEM -verilog_define NV_FPGA_FIFOGEN -verilog_define NV_FPGA_UNIT -part [DEVICE_TYPE] -mode out_of_context $synth_options -directive $synth_directive] +eval [concat synth_design -top $CL_MODULE -verilog_define XSDB_SLV_DIS -verilog_define FPGA -verilog_define SYNTHESIS -verilog_define DESIGNWARE_NOEXIST -verilog_define VLIB_BYPASS_POWER_CG -verilog_define NV_FPGA_SYSTEM -verilog_define NV_FPGA_FIFOGEN -verilog_define NV_FPGA_UNIT -verilog_define NV_LARGE -part [DEVICE_TYPE] -mode out_of_context $synth_options -directive $synth_directive] set failval [catch {exec grep "FAIL" failfast.csv}] if { $failval==0 } { diff --git a/cl_nvdla/design/cl_dma_pcis_slv.sv b/cl_nvdla/design/cl_dma_pcis_slv.sv index c5ff1c6..ae2e7c1 100644 --- a/cl_nvdla/design/cl_dma_pcis_slv.sv +++ b/cl_nvdla/design/cl_dma_pcis_slv.sv @@ -16,6 +16,8 @@ // Copyright (c) 2009-2017, NVIDIA CORPORATION. All rights reserved. // NVIDIA’s contributions are offered under the Amazon Software License +`include "cl_nvdla_defines.vh" + module cl_dma_pcis_slv #(parameter SCRB_MAX_ADDR = 64'h3FFFFFFFF, parameter SCRB_BURST_LEN_MINUS1 = 15, parameter NO_SCRB_INST = 1) ( @@ -126,10 +128,16 @@ lib_pipe #(.WIDTH(1), .STAGES(4)) SLR2_PIPE_RST_N (.clk(aclk), .rst_n(1'b1), .in //---------------------------- // axi interconnect for DDR address decodes //---------------------------- -`ifdef NVDLA_CVSRAM_PRESENT -(* dont_touch = "true" *) axi_interconnect_nvdla_512b AXI_INTERCONNECT ( -`else -(* dont_touch = "true" *) axi_interconnect_nvdla_64b AXI_INTERCONNECT ( +`ifdef NVDLA_AXI_WIDTH_256 + (* dont_touch = "true" *) axi_interconnect_nv_large AXI_INTERCONNECT ( +`elsif NVDLA_AXI_WIDTH_128 + (* dont_touch = "true" *) axi_interconnect_nvdla_128b AXI_INTERCONNECT ( +`else + `ifdef NVDLA_CVSRAM_PRESENT + (* dont_touch = "true" *) axi_interconnect_nvdla_64b_cvsram AXI_INTERCONNECT ( + `else + (* dont_touch = "true" *) axi_interconnect_nvdla_64b AXI_INTERCONNECT ( + `endif `endif .INTERCONNECT_ACLK (aclk) ,.INTERCONNECT_ARESETN (slr1_sync_aresetn) diff --git a/cl_nvdla/design/cl_nvdla.sv b/cl_nvdla/design/cl_nvdla.sv index baa2906..5e9de64 100644 --- a/cl_nvdla/design/cl_nvdla.sv +++ b/cl_nvdla/design/cl_nvdla.sv @@ -69,8 +69,17 @@ axi_bus_t sh_cl_dma_pcis_bus(); axi_bus_t sh_cl_dma_pcis_q(); // keep below nvdla_axi_0/1 i/f same as nvdla configuration. Note: nvdla, axi_data_width=512,axi_len_width=4 `ifdef NVDLA_CVSRAM_PRESENT -axi_bus_t #(.AWS_FPGA_AXI_BUS_DATA_WIDTH(512), .AWS_FPGA_AXI_BUS_ID_WIDTH(8), .AWS_FPGA_AXI_BUS_ADDR_WIDTH(64), .AWS_FPGA_AXI_BUS_LEN_WIDTH(4)) nvdla_dbb_axi_bus(); -axi_bus_t #(.AWS_FPGA_AXI_BUS_DATA_WIDTH(512), .AWS_FPGA_AXI_BUS_ID_WIDTH(8), .AWS_FPGA_AXI_BUS_ADDR_WIDTH(64), .AWS_FPGA_AXI_BUS_LEN_WIDTH(4)) nvdla_cvsram_axi_bus(); +`ifdef NVDLA_AXI_WIDTH_256 +axi_bus_t #(.AWS_FPGA_AXI_BUS_DATA_WIDTH(256), .AWS_FPGA_AXI_BUS_ID_WIDTH(8), .AWS_FPGA_AXI_BUS_ADDR_WIDTH(64), .AWS_FPGA_AXI_BUS_LEN_WIDTH(4)) nvdla_cvsram_axi_bus(); +`else +axi_bus_t #(.AWS_FPGA_AXI_BUS_DATA_WIDTH(64), .AWS_FPGA_AXI_BUS_ID_WIDTH(8), .AWS_FPGA_AXI_BUS_ADDR_WIDTH(64), .AWS_FPGA_AXI_BUS_LEN_WIDTH(4)) nvdla_cvsram_axi_bus(); +`endif +`endif + +`ifdef NVDLA_AXI_WIDTH_256 +axi_bus_t #(.AWS_FPGA_AXI_BUS_DATA_WIDTH(256), .AWS_FPGA_AXI_BUS_ID_WIDTH(8), .AWS_FPGA_AXI_BUS_ADDR_WIDTH(64), .AWS_FPGA_AXI_BUS_LEN_WIDTH(4)) nvdla_dbb_axi_bus(); +`elsif NVDLA_AXI_WIDTH_128 +axi_bus_t #(.AWS_FPGA_AXI_BUS_DATA_WIDTH(128), .AWS_FPGA_AXI_BUS_ID_WIDTH(8), .AWS_FPGA_AXI_BUS_ADDR_WIDTH(64), .AWS_FPGA_AXI_BUS_LEN_WIDTH(4)) nvdla_dbb_axi_bus(); `else axi_bus_t #(.AWS_FPGA_AXI_BUS_DATA_WIDTH(64), .AWS_FPGA_AXI_BUS_ID_WIDTH(8), .AWS_FPGA_AXI_BUS_ADDR_WIDTH(64), .AWS_FPGA_AXI_BUS_LEN_WIDTH(4)) nvdla_dbb_axi_bus(); `endif diff --git a/cl_nvdla/design/cl_nvdla_defines.vh b/cl_nvdla/design/cl_nvdla_defines.vh index 51f7585..c15843b 100644 --- a/cl_nvdla/design/cl_nvdla_defines.vh +++ b/cl_nvdla/design/cl_nvdla_defines.vh @@ -38,5 +38,27 @@ `define VLIB_BYPASS_POWER_CG `define NV_FPGA_UNIT `define NV_FPGA_FIFOGEN +`ifdef NV_LARGE +`define NVDLA_CVSRAM_PRESENT +`define NVDLA_AXI_WIDTH_256 +`endif +`ifdef NV_MEDIUM_1024_FULL +`define NVDLA_CVSRAM_PRESENT +`define NVDLA_AXI_WIDTH_256 +`endif +`ifdef NV_MEDIUM_512 +`define NVDLA_AXI_WIDTH_128 +`endif +`ifdef NV_SMALL_256_FULL +`define NVDLA_CVSRAM_PRESENT +`define NVDLA_AXI_WIDTH_64 +`endif +`ifdef NV_SMALL_256 +`define NVDLA_AXI_WIDTH_64 +`endif +`ifdef NV_SMALL +`define NVDLA_AXI_WIDTH_64 +`endif + `endif diff --git a/cl_nvdla/design/cl_nvdla_wrap.sv b/cl_nvdla/design/cl_nvdla_wrap.sv index 4f55129..afba2d9 100644 --- a/cl_nvdla/design/cl_nvdla_wrap.sv +++ b/cl_nvdla/design/cl_nvdla_wrap.sv @@ -7,6 +7,7 @@ // ================================================================ // File Name: cl_nvdla_wrap.sv +`include "cl_nvdla_defines.vh" module cl_nvdla_wrap ( @@ -186,17 +187,25 @@ NV_nvdla nvdla_top ( ,.nvdla_pwrbus_ram_a_pd (32'b0) ); // nvdla_top -`ifdef NVDLA_CVSRAM_PRESENT -assign cl_dut_axi_0.awsize = 3'b110; -assign cl_dut_axi_0.arsize = 3'b110; +`ifdef NVDLA_AXI_WIDTH_256 + assign cl_dut_axi_0.awsize = 3'b101; + assign cl_dut_axi_0.arsize = 3'b101; +`elsif NVDLA_AXI_WIDTH_128 + assign cl_dut_axi_0.awsize = 3'b100; + assign cl_dut_axi_0.arsize = 3'b100; `else -assign cl_dut_axi_0.awsize = 3'b011; -assign cl_dut_axi_0.arsize = 3'b011; -`endif + assign cl_dut_axi_0.awsize = 3'b011; + assign cl_dut_axi_0.arsize = 3'b011; +`endif `ifdef NVDLA_CVSRAM_PRESENT -assign cl_dut_axi_1.awsize = 3'b110; -assign cl_dut_axi_1.arsize = 3'b110; + `ifdef NVDLA_AXI_WIDTH_256 + assign cl_dut_axi_1.awsize = 3'b101; + assign cl_dut_axi_1.arsize = 3'b101; + `else + assign cl_dut_axi_1.awsize = 3'b011; + assign cl_dut_axi_1.arsize = 3'b011; + `endif `endif diff --git a/cl_nvdla/verif/scripts/top.vcs.f b/cl_nvdla/verif/scripts/top.vcs.f index e6d119c..12d8574 100644 --- a/cl_nvdla/verif/scripts/top.vcs.f +++ b/cl_nvdla/verif/scripts/top.vcs.f @@ -52,6 +52,7 @@ ${CL_ROOT}/design/cl_nvdla_wrap.sv ${CL_COMMON_ROOT}/design/xilinx_ip/axi_interconnect_nvdla_64b/axi_interconnect_nvdla_64b_sim_netlist.v +${CL_COMMON_ROOT}/design/xilinx_ip/axi_interconnect_nv_large/axi_interconnect_nv_large_sim_netlist.v ${CL_COMMON_ROOT}/design/xilinx_ip/axi_apb_bridge_0/axi_apb_bridge_0_sim_netlist.v ${CL_COMMON_ROOT}/design/xilinx_ip/axi_dwidth_converter_512b_to_64b/axi_dwidth_converter_512b_to_64b_sim_netlist.v ${CL_COMMON_ROOT}/design/xilinx_ip/axi_protocol_converter_axi_to_axil/axi_protocol_converter_axi_to_axil_sim_netlist.v @@ -88,11 +89,33 @@ -y ${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/nvdla/cfgrom -y ${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/vlibs +-y ${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos #-y ${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/rams #-y ${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/rams/model #-y ${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/rams/synth --y ${NVDLA_HW_ROOT}/vmod/rams/fpga/small_rams +-y ${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/rams/fpga/model + + +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_CDMA_WT_8ATMM_fifo.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_CDP_DP_data_fifo.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_CDP_DP_intpinfo_fifo.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_CDP_DP_sumpd_fifo.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_CDP_RDMA_lat_fifo.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_CDP_RDMA_ro_fifo.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_CDP_WDMA_dat_fifo.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_PDP_RDMA_lat_fifo.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_PDP_RDMA_ro_fifo.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_PDP_SDPIN_ro_fifo.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_PDP_WDMA_DAT_fifo.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_SDP_BRDMA_cq_lib.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_SDP_BRDMA_lat_fifo_lib.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_SDP_ERDMA_cq_lib.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_SDP_ERDMA_lat_fifo_lib.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_SDP_MRDMA_EG_lat_fifo_lib.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_SDP_MRDMA_cq_lib.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_SDP_NRDMA_cq_lib.v +${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/fifos/NV_NVDLA_SDP_NRDMA_lat_fifo_lib.v ${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/vlibs/nv_assert_no_x.vlib ${NVDLA_HW_ROOT}/outdir/${NVDLA_HW_PROJECT}/vmod/vlibs/NV_DW02_tree.v diff --git a/common/software/include/cl_common_utils.h b/common/software/include/cl_common_utils.h index 27def41..266835a 100644 --- a/common/software/include/cl_common_utils.h +++ b/common/software/include/cl_common_utils.h @@ -49,6 +49,7 @@ extern pci_bar_handle_t pci_bar_handle_pcis; #define SIZE_UINT32 2 #define SIZE_UINT64 3 +#define SIZE_UINT8 0 #define CFG_RESET_ADDR (0x14) @@ -101,6 +102,13 @@ extern pci_bar_handle_t pci_bar_handle_pcis; fpga_pci_poke(pci_bar_handle_pcis, addr, data); \ } else if (size == SIZE_UINT64) { \ fpga_pci_poke64(pci_bar_handle_pcis, addr, data); \ + } else if (size == SIZE_UINT8) { \ + int addr_offset = addr & 0x3; \ + uint32_t mem_temp; \ + fpga_pci_peek(pci_bar_handle_pcis, addr&0xfffffffc, (uint32_t*)(&mem_temp)); \ + /*log_printf("Addr=0x%x ,Data=0x%x, rw=r",addr, mem_temp);*/ \ + memcpy((uint8_t*)(&mem_temp) + addr_offset, (uint8_t *)(&data), 1); \ + fpga_pci_poke(pci_bar_handle_pcis, addr&0xfffffffc, (uint32_t)mem_temp); \ } else { \ log_printf("Unsupported size: %d, only support %d and %d!\n", size, SIZE_UINT32, SIZE_UINT64); \ assert(0); \ @@ -118,7 +126,12 @@ extern pci_bar_handle_t pci_bar_handle_pcis; fpga_pci_peek(pci_bar_handle_pcis, addr, (uint32_t*)data); \ } else if (size == SIZE_UINT64) { \ fpga_pci_peek64(pci_bar_handle_pcis, addr, data); \ - } else { \ + } else if (size == SIZE_UINT8) { \ + int addr_offset= addr & 0x3; \ + uint32_t mem_temp; \ + fpga_pci_peek(pci_bar_handle_pcis, addr&0xfffffffc, (uint32_t*)(&mem_temp)); \ + *(uint32_t *)data = *(uint8_t *)((uint8_t *)&mem_temp + addr_offset); \ + }else { \ log_printf("Unsupported size: %d, only support %d and %d!\n", size, SIZE_UINT32, SIZE_UINT64); \ assert(0); \ } \ diff --git a/common/software/src/cosim_top.cpp b/common/software/src/cosim_top.cpp index 03af9a5..69189fc 100644 --- a/common/software/src/cosim_top.cpp +++ b/common/software/src/cosim_top.cpp @@ -1,13 +1,13 @@ -// ================================================================ -// NVDLA Open Source Project -// -// Copyright(c) 2016 - 2017 NVIDIA Corporation. Licensed under the -// NVDLA Open Hardware License; Check "LICENSE" which comes with -// this distribution for more information. -// ================================================================ - -// File Name: cosim_top.cpp - +// ================================================================ +// NVDLA Open Source Project +// +// Copyright(c) 2016 - 2017 NVIDIA Corporation. Licensed under the +// NVDLA Open Hardware License; Check "LICENSE" which comes with +// this distribution for more information. +// ================================================================ + +// File Name: cosim_top.cpp + #include #include #include @@ -349,6 +349,7 @@ void cosim_top::ram_ipc_channel() void cosim_top::irq_ipc_channel() { uint32_t data = 0; + uint32_t data_pre = 0; // Make sure the RTL testbench is ready sc_core::wait(start_event); @@ -374,10 +375,11 @@ void cosim_top::irq_ipc_channel() set_ev_trigger(EVENT_IRQ_RD); sc_core::wait(irq_rd_done_event); - + + data_pre = data; data = irq_read_ack(); - if (data != 0) + if (data != data_pre) { struct irq_trans_payload irq_stat; irq_stat.value = data; diff --git a/common/trace_player/parser/nvdla_trace_parser.py b/common/trace_player/parser/nvdla_trace_parser.py index 0d0739b..ccb79ea 100755 --- a/common/trace_player/parser/nvdla_trace_parser.py +++ b/common/trace_player/parser/nvdla_trace_parser.py @@ -29,7 +29,11 @@ class TraceParser(object): re_result_checker_command = re.compile(r'^\s*(?P(check_crc|check_file|check_nothing))\s*\(\s*(?P\w+)\s*(,\s*(?P\w+)\s*,\s*(?P0x[0-9a-fA-F]+)\s*,\s*(?P(0x[0-9a-fA-F]+)|(\d+))\s*,\s*((?P0x[0-9a-fA-F]+)|"(?P[\w\.\/]+)")\s*)?\)\s*;\s*(\/\/.*)*$') re_memory_model_command = re.compile(r'^\s*(?P(mem_load|mem_init))\s*\(\s*(?P\w+)\s*,\s*(?P0x[0-9a-fA-F]+)\s*,\s*("(?P[\w\.\/]+)"|(?P(0x[0-9a-fA-F]+)|(\d+)))\s*(,\s*(?P\w+)\s*)?\)\s*;\s*(\/\/.*)*$') re_sequence_command_poll_reg = re.compile(r'^\s*(?Ppoll_reg_equal)\s*\(\s*(?P[\w\.]+)\s*,\s*(?P(\w+|\d+|0x[0-9a-fA-F]+))\s*\)\s*;\s*(\/\/.*)*$') - + + re_memory_release_command = re.compile (r'^\s*(?P(mem_release))\s*\(\s*(?P\w+)\s*,\s*(?P0x[0-9a-fA-F]+)\s*,\s*(?P\w+)\s*\)\s*;\s*(\/\/.*)*$') + re_memory_reserve_command = re.compile (r'^\s*(?P(mem_reserve))\s*\(\s*(?P\w+)\s*,\s*(?P0x[0-9a-fA-F]+)\s*,\s*(?P0x[0-9a-fA-F]+)\s*(,\s*(?P\w+)\s*\)\s*;|\);)\s*(\/\/.*)*$') + +# re_memory_reserve_command = re.compile (r'^\s*(?P(mem_reserve))\s*\(\s*(?P\w+)\s*,\s*(?P0x[0-9a-fA-F]+)\s*,\s*(?P(0x[0-9a-fA-F]+)\s*,\s*(?P\w+)\s*\)\s*;\s*(\/\/.*)*$') #mem_load ( sec_mem, 0x8000, "python/over/perl.dat"); #mem_init ( pri_mem, 0x2000, "python/over/perl.dat", RANDOM); #mem_init ( sec_mem, 0x5000, 0x2000, ALL_ZERO); @@ -107,7 +111,7 @@ def do_parsing(self): print (m.groupdict()) if m.group('intr_id') is not None: #f_ic_cmd.write("MULTI_SHOT %s %s\n" % (m.group('intr_id').upper(), m.group('sync_id'))) - f_c_cmd.write("\ttrace_player_thread_push_cmd_intr_notify(NVDLA_GLB_S_INTR_STATUS_0_%s_DONE_STATUS%s_FIELD, \"%s\");\n" % ( m.group('intr_id')[0:-2], m.group('intr_id')[-1], m.group('sync_id') ) ) + f_c_cmd.write("\ttrace_player_thread_push_cmd_intr_notify(NVDLA_GLB_S_INTR_STATUS_0_%s_DONE_STATUS%s_FIELD, \"%s\");\n" % ( m.group('intr_id')[0:-2].upper(), m.group('intr_id')[-1], m.group('sync_id') ) ) else: #f_ic_cmd.write("SINGLE_SHOT NA %s\n" % (m.group('sync_id'))) f_c_cmd.write("\ttrace_player_thread_push_cmd_intr_notify(0xFFFFFFFF, \"%s\");\n" % ( m.group('sync_id') ) ) @@ -139,6 +143,8 @@ def do_parsing(self): if "mem_load" == m.group('kind').lower(): #f_mm_cmd.write("%s %s %X 0 NA %s\n" % (m.group('kind').upper(), m.group('memory_type').upper(), int(m.group('base_addr'),0), os.path.join(trace_dir, m.group('file_path')) )) f_c_cmd.write("\ttrace_player_thread_push_cmd_%s(\"%s\", 0x%X, \"%s\");\n" % (m.group('kind'), m.group('memory_type').upper(), int(m.group('base_addr'),0), os.path.join(trace_dir, m.group('file_path')) )) + elif "mem_release" == m.group('kind').lower(): + f_c_cmd.write("\ttrace_player_thread_push_cmd_%s(\"%s\", 0x%X);\n" % (m.group('kind'), m.group('memory_type').upper(), int(m.group('base_addr'),0)) ) elif "mem_init" == m.group('kind').lower(): if m.group('size') is not None: ## Initialized memory by pattern @@ -155,6 +161,15 @@ def do_parsing(self): print (m.groupdict()) #f_seq_cmd.write("%s %s NA %X NA\n" % ( m.group('kind').upper(), ' '.join(m.group('name').split('.')), int(m.group('value'),0) ) ) f_c_cmd.write("\ttrace_player_thread_push_cmd_%s(\"%s\", %s, 0x%X);\n" % ( m.group('kind'), m.group('name').split(".")[0], m.group('name').replace(".", "_"), int(m.group('value'),0) ) ) + + continue + m = self.re_memory_release_command.match(line) + if m: + #print (m.groupdict()) + #f_c_cmd.write("\ttrace_player_thread_push_cmd_%s(\"%s\", 0x%X, \"%s\");\n" % (m.group('kind'), m.group('memory_type').upper(), int(m.group('base_addr'),0),m.group('sync_id')) ) + continue + m = self.re_memory_reserve_command.match(line) + if m: continue raise Exception("Unregconized line:\n%s" % line) #f_seq_cmd.close() diff --git a/common/trace_player/src/trace_player.c b/common/trace_player/src/trace_player.c index 7d58e1f..72750dd 100644 --- a/common/trace_player/src/trace_player.c +++ b/common/trace_player/src/trace_player.c @@ -23,7 +23,7 @@ typedef void (*PREPARE_CMD_FUNC)(void); -#define TRACE_TEST_TIMEOUT_SEC 120 +#define TRACE_TEST_TIMEOUT_SEC 1200 static int stop_on_error = 1; static int wait_interval = 1;