Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
2921036
sw: Add GEMM tests and start AXI dump logic in memory tile
Lore0599 Jul 10, 2025
c4c88a8
sw: Add flow for applications and adapt GEMM with mcast support
Lore0599 Jul 11, 2025
18f57e0
sw: Remove legacy GEMM scripts
colluca Jul 17, 2025
d0116ee
sw: Test GEMM parallelized over K
colluca Jul 17, 2025
94d79a1
sw: Streamline Snitch app integration
colluca Jul 17, 2025
31c66b2
target: Add Make target for Snitch app verification
colluca Jul 17, 2025
a2bd17e
gemm_picobello: Fix regression on parallelize_k config
colluca Jul 18, 2025
b048875
sw: Rename Picobello GEMM to support both GEMM versions
colluca Jul 18, 2025
ffa7c3b
sw: Add FlashAttention kernel
colluca Jul 21, 2025
e268149
sw: Fix Snitch integration
colluca Jul 21, 2025
de81188
sw: Add `fused_concat_linear` app
colluca Jul 21, 2025
5df282c
sw: Separate Snitch HAL build and source directories
colluca Jul 22, 2025
277494c
sw: Add MHA kernel
colluca Jul 23, 2025
1eaa72f
sw: Shrink FusedConcatLinear to only 2 clusters
colluca Jul 23, 2025
f17a3ca
sw: Start performance evaluation GEMM with mcast
Lore0599 Jul 24, 2025
bb2b43a
ci: Add Snitch apps
colluca Jul 17, 2025
30c117b
sw: Change GEMM configuration
colluca Jul 30, 2025
f7a16dc
bender: Bump snitch_cluster
colluca Jul 28, 2025
31fb9ec
sw: Use optimized GEMM in FusedConcatLinear
colluca Jul 31, 2025
5281efd
Start gemm2d optimization for NoC
Lore0599 Aug 19, 2025
2e7c293
sw: Optimize gemm2d for NoC architecture
Lore0599 Aug 26, 2025
12bdd96
sw: Initial Picobello NoC runtime
Lore0599 Sep 2, 2025
39c03d3
bender: Bump common_cells to remove false unstable data assertion errors
colluca Sep 4, 2025
7cc24e0
bender: Bump snitch_cluster to include sequencer bug fix
colluca Sep 4, 2025
2aa5ff5
sw: Align NoC API with systemRDL
Lore0599 Sep 5, 2025
62e288a
requirements.txt: Bump `peakrdl-rawheader`
colluca Sep 5, 2025
5fad5d8
Final runtime improvements
colluca Sep 5, 2025
e4abf0a
Fix `slink_32b_elf_preload`
colluca Sep 5, 2025
0a87358
Bump pd in CI
colluca Sep 8, 2025
cee0d5c
Exclude non-synthesizable code on synthesis
colluca Sep 8, 2025
5fdea71
Move new tb tasks to tb_picobello_tasks
colluca Sep 9, 2025
55aa6b7
Use new tb tasks in pd tb
colluca Sep 9, 2025
b7d3472
bender: Bump snitch_cluster (no hw changes)
colluca Sep 9, 2025
6485ded
Improve picobello runtime fumnctions naming
Lore0599 Sep 15, 2025
bc7d452
Bump PD repo
colluca Sep 15, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,16 @@ sim-vsim:
- vsim-compile
script:
# Run the simulation
- make vsim-run-batch
- make vsim-run-batch-verify
# Check either success or failure for non-zero exit codes
- 'if [ -z "${NZ_EXIT_CODE}" ]; then grep "] SUCCESS" transcript || (exit 1); else grep "] FAILED: return code ${NZ_EXIT_CODE}" transcript || (exit 1); fi'
# Check for UART output
- 'if [ ! -z "${USTR}" ]; then (grep " \[UART\] ${USTR}" transcript); fi'
# Check for any fatal errors
- 'if grep "Fatal:" transcript; then exit 1; fi'
# Check for any errors (except one for non-zero exit codes)
- 'if [ ! -z "${NZ_EXIT_CODE}" ]; then count=$(grep -c "Error:" transcript); if [ "$count" -ne 1 ]; then exit 1; fi; else if grep -q "Error:" transcript; then exit 1; fi; fi'
# Check for any non-fatal errors. One and only one error is expected with a non-zero exit code.
# Ignore all errors when using a separate verification script.
- 'if [ -z "${VERIFY_PY}" ]; then if [ ! -z "${NZ_EXIT_CODE}" ]; then count=$(grep -c "Error:" transcript); if [ "$count" -ne 1 ]; then exit 1; fi; else if grep -q "Error:" transcript; then exit 1; fi; fi; fi'
artifacts:
paths:
- transcript
Expand Down
6 changes: 4 additions & 2 deletions .gitlab/common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

variables:
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
PD_COMMIT: "c22baf1fcce6b0cf76b85d2009e146ba3bbb6807"
PD_COMMIT: "245dd3d4a7c33f6052a913691382871bd2c53fc0"
SPU_COMMIT: "c2e8815487bd713624d74ef3e3e0465196b6d67f"

# Check the cache for bender and python dependencies
Expand Down Expand Up @@ -66,11 +66,13 @@ variables:
- .cache-deps
- .init-env
script:
- make sn-tests
- make sn-tests DEBUG=ON
- make sn-apps DEBUG=ON
- make pb-sn-tests
artifacts:
paths:
- sw/snitch/tests/build/*.elf
- sw/snitch/apps/**/build/*.elf
expire_in: 1 day

# Compile the cheshire software tests
Expand Down
6 changes: 6 additions & 0 deletions .gitlab/sw-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
variables:
CHS_BUILD_DIR: sw/cheshire/tests
SN_BUILD_DIR: sw/snitch/tests/build
SN_ROOT: .deps/snitch_cluster
parallel:
matrix:
- { CHS_BINARY: $CHS_BUILD_DIR/sanity.spm.elf, PRELMODE: 0 }
Expand All @@ -29,3 +30,8 @@
- { CHS_BINARY: $CHS_BUILD_DIR/simple_offload.spm.elf, SN_BINARY: $SN_BUILD_DIR/redmule.elf }
- { CHS_BINARY: $CHS_BUILD_DIR/simple_offload.spm.elf, SN_BINARY: $SN_BUILD_DIR/redmule_quant.elf }
- { CHS_BINARY: $CHS_BUILD_DIR/simple_offload.spm.elf, SN_BINARY: $SN_BUILD_DIR/datamover.elf }
- { CHS_BINARY: $CHS_BUILD_DIR/simple_offload.spm.elf, SN_BINARY: sw/snitch/apps/gemm_2d/build/gemm_2d.elf, VERIFY_PY: $SN_ROOT/sw/blas/gemm/scripts/verify.py, PRELMODE: 3 }
- { CHS_BINARY: $CHS_BUILD_DIR/simple_offload.spm.elf, SN_BINARY: sw/snitch/apps/fused_concat_linear/build/fused_concat_linear.elf, VERIFY_PY: $SN_ROOT/sw/dnn/fused_concat_linear/scripts/verify.py, PRELMODE: 3 }
- { CHS_BINARY: $CHS_BUILD_DIR/simple_offload.spm.elf, SN_BINARY: sw/snitch/apps/mha/build/mha.elf, VERIFY_PY: $SN_ROOT/sw/dnn/mha/scripts/verify.py, PRELMODE: 3 }
- { CHS_BINARY: $CHS_BUILD_DIR/simple_offload.spm.elf, SN_BINARY: sw/snitch/apps/gemm/build/gemm.elf, VERIFY_PY: $SN_ROOT/sw/blas/gemm/scripts/verify.py, PRELMODE: 3 }
- { CHS_BINARY: $CHS_BUILD_DIR/simple_offload.spm.elf, SN_BINARY: sw/snitch/apps/axpy/build/axpy.elf, VERIFY_PY: $SN_ROOT/sw/blas/axpy/scripts/verify.py, PRELMODE: 3 }
4 changes: 2 additions & 2 deletions Bender.lock
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ packages:
dependencies:
- common_cells
common_cells:
revision: bef3d3c5ed0e2cc211e69a6dbd81c4fe3a97025c
revision: e1c09c75775c5f03eb45906d5145dbd2f5bcfb95
version: null
source:
Git: https://github.com/pulp-platform/common_cells.git
Expand Down Expand Up @@ -383,7 +383,7 @@ packages:
- common_cells
- register_interface
snitch_cluster:
revision: e4eaa0fb64767bb8f6b7d1f5fa705928171092b2
revision: ef3ece6c9e119fbfc25b26bb89a429ccdaacb5c6
version: null
source:
Git: https://github.com/pulp-platform/snitch_cluster.git
Expand Down
2 changes: 1 addition & 1 deletion Bender.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ dependencies:
axi: { git: "https://github.com/pulp-platform/axi.git", version: "0.39.6" }
common_cells: { git: "https://github.com/pulp-platform/common_cells.git", rev: "snitch" }
cheshire: { git: "https://github.com/pulp-platform/cheshire.git", rev: "picobello" }
snitch_cluster: { git: "https://github.com/pulp-platform/snitch_cluster.git", rev: "e4eaa0fb64767bb8f6b7d1f5fa705928171092b2" }
snitch_cluster: { git: "https://github.com/pulp-platform/snitch_cluster.git", rev: "ef3ece6c9e119fbfc25b26bb89a429ccdaacb5c6" }
floo_noc: { git: "https://github.com/pulp-platform/FlooNoC.git", rev: "develop" }
obi: { git: "https://github.com/pulp-platform/obi.git", rev: "acfcd0f80c7539aa8da7821a66d9acf2074a5b4e" }
redmule: { git: "https://github.com/pulp-platform/redmule.git", rev: "picobello" }
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ python-venv: .venv
python -m pip install --upgrade pip setuptools && \
python -m pip install --cache-dir $(PIP_CACHE_DIR) -r requirements.txt && \
python -m pip install --cache-dir $(PIP_CACHE_DIR) $(shell $(BENDER) path floo_noc) --no-deps && \
python -m pip install --cache-dir $(PIP_CACHE_DIR) $(shell $(BENDER) path snitch_cluster)
python -m pip install --cache-dir $(PIP_CACHE_DIR) "$(shell $(BENDER) path snitch_cluster)[kernels]"

python-venv-clean:
rm -rf .venv
Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,14 @@ Use the `vsim-run-batch` command to run tests in batch mode with RTL optimizatio

Use the `PRELMODE=3` flag to enable fast preload of the Snitch binary, and speed up the simulation.

Some applications produce a lot of output data, which would be time-consuming to check in simulation.
Said applications usually come with a Python verification script that can check the results from a dump of the memory contents at the end of the simulation.
For example, a verification script for the GEMM kernel can be found under `$(bender path snitch_cluster)/sw/blas/gemm/scripts/verify.py`
To run an application on Snitch and verify its results, do:
```bash
make vsim-run-batch-verify VERIFY_PY=$(bender path snitch_cluster)/sw/blas/gemm/scripts/verify.py PRELMODE=3 CHS_BINARY=sw/cheshire/tests/simple_offload.spm.elf SN_BINARY=sw/snitch/apps/blas/gemm/build/gemm.elf
```

### Additional help

Additionally, you can run the following command to get a list of all available commands:
Expand Down
22 changes: 21 additions & 1 deletion hw/mem_tile.sv
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ module mem_tile
#(
parameter bit AxiUserAtop = 1'b1,
parameter int unsigned AxiUserAtopMsb = 3,
parameter int unsigned AxiUserAtopLsb = 0
parameter int unsigned AxiUserAtopLsb = 0,
parameter int unsigned MemTileId = 0
) (
input logic clk_i,
input logic rst_ni,
Expand Down Expand Up @@ -275,6 +276,25 @@ module mem_tile
end
end

`ifndef SYNTHESIS
// AXI Monitor dumper to improvce debiugging
axi_dumper #(
.BusName ($sformatf("mem_tile_%d", MemTileId)),
.LogAW (1'b1),
.LogAR (1'b1),
.LogW (1'b1),
.LogB (1'b1),
.LogR (1'b1),
.axi_req_t (axi_nw_join_req_t),
.axi_resp_t(axi_nw_join_rsp_t)
) i_axi_monitor (
.clk_i,
.rst_ni,
.axi_req_i (axi_req),
.axi_resp_i(axi_rsp)
);
`endif

axi_to_obi #(
.ObiCfg (MgrObiCfg),
.obi_req_t (mgr_obi_req_t),
Expand Down
4 changes: 3 additions & 1 deletion hw/picobello_top.sv
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,9 @@ module picobello_top
localparam int MemTileX = int'(MemTilePhysicalId.x);
localparam int MemTileY = int'(MemTilePhysicalId.y);

mem_tile i_mem_tile (
mem_tile #(
.MemTileId(int'(m))
) i_mem_tile (
.clk_i,
.rst_ni,
.test_enable_i (test_mode_i),
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,5 @@ hjson # for reggen

# For peakrdl
peakrdl
peakrdl-rawheader @ git+https://github.com/micprog/peakrdl-rawheader.git
peakrdl-rawheader @ git+https://github.com/colluca/PeakRDL-rawheader.git@7b8dbc9ad5854dc1cdaf36d4ea024c29ffb00a4c
peakrdl-markdown
15 changes: 15 additions & 0 deletions sw/snitch/apps/axpy/app.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright 2025 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Luca Colagrande <[email protected]>

APP := axpy
$(APP)_BUILD_DIR ?= $(PB_SNITCH_SW_DIR)/apps/$(APP)/build
$(APP)_DATA_CFG := $(PB_SNITCH_SW_DIR)/apps/$(APP)/data/params.json
SRC_DIR := $(SN_ROOT)/sw/blas/$(APP)/src
SRCS := $(SRC_DIR)/main.c
$(APP)_INCDIRS := $(SN_ROOT)/sw/blas

include $(SN_ROOT)/sw/apps/common.mk
include $(SN_ROOT)/target/snitch_cluster/sw/apps/common.mk
9 changes: 9 additions & 0 deletions sw/snitch/apps/axpy/data/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// Copyright 2023 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

{
"n_tiles": 5,
"n": 2560,
"funcptr": "axpy_opt"
}
15 changes: 15 additions & 0 deletions sw/snitch/apps/fused_concat_linear/app.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright 2025 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Luca Colagrande <[email protected]>

APP := fused_concat_linear
$(APP)_BUILD_DIR ?= $(PB_SNITCH_SW_DIR)/apps/$(APP)/build
$(APP)_DATA_CFG := $(PB_SNITCH_SW_DIR)/apps/$(APP)/data/params.json
SRC_DIR := $(SN_ROOT)/sw/dnn/$(APP)/src
SRCS := $(SRC_DIR)/main.c
$(APP)_INCDIRS := $(SN_ROOT)/sw/dnn/src $(SN_ROOT)/sw/blas

include $(SN_ROOT)/sw/apps/common.mk
include $(SN_ROOT)/target/snitch_cluster/sw/apps/common.mk
11 changes: 11 additions & 0 deletions sw/snitch/apps/fused_concat_linear/data/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Copyright 2023 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51

{
num_inputs: 2,
input_shape: [16, 16],
output_shape: [16, 16],
dtype: "FP64",
gemm_implementation: "gemm_fp64_opt"
}
15 changes: 15 additions & 0 deletions sw/snitch/apps/gemm/app.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright 2025 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Luca Colagrande <[email protected]>

APP := gemm
$(APP)_BUILD_DIR ?= $(PB_SNITCH_SW_DIR)/apps/$(APP)/build
$(APP)_DATA_CFG := $(PB_SNITCH_SW_DIR)/apps/$(APP)/data/params.json
SRC_DIR := $(SN_ROOT)/sw/blas/$(APP)/src
SRCS := $(SRC_DIR)/main.c
$(APP)_INCDIRS := $(SN_ROOT)/sw/blas

include $(SN_ROOT)/sw/apps/common.mk
include $(SN_ROOT)/target/snitch_cluster/sw/apps/common.mk
25 changes: 25 additions & 0 deletions sw/snitch/apps/gemm/data/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright 2024 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

{
setup_ssr: 1,
parallelize_m: 1,
parallelize_k: 0,
m_tiles: 64, // number of tiles in M dimension
n_tiles: 1, // number of tiles in N dimension
k_tiles: 1, // number of tiles in K dimension
load_a: 1,
load_b: 1,
load_c: 1,
double_buffer: 1,
partition_banks: 0,
transa: false,
transb: false, // must be true for SIMD
m: 2048,
n: 16,
k: 16,
alpha: 1,
beta: 0,
gemm_fp: "gemm_fp64_opt"
}
17 changes: 17 additions & 0 deletions sw/snitch/apps/gemm_2d/app.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright 2025 ETH Zurich and University of Bologna.
# Licensed under the Apache License, Version 2.0, see LICENSE for details.
# SPDX-License-Identifier: Apache-2.0
#
# Lorenzo Leone <[email protected]>

APP := gemm_2d
$(APP)_BUILD_DIR ?= $(PB_SNITCH_SW_DIR)/apps/$(APP)/build
SRC_DIR := $(PB_SNITCH_SW_DIR)/apps/$(APP)/src
SRCS := $(SRC_DIR)/gemm_2d.c
$(APP)_INCDIRS := $(SN_ROOT)/sw/blas $(SN_ROOT)/sw/blas/gemm/src

# Refer to Snitch scripts
$(APP)_SCRIPT_DIR := $(SN_ROOT)/sw/blas/gemm/scripts

include $(SN_ROOT)/sw/apps/common.mk
include $(SN_ROOT)/target/snitch_cluster/sw/apps/common.mk
25 changes: 25 additions & 0 deletions sw/snitch/apps/gemm_2d/data/params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright 2024 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

{
setup_ssr: 1,
parallelize_m: 1,
parallelize_k: 0,
m_tiles: 16, // number of tiles in M dimension
n_tiles: 4, // number of tiles in N dimension
k_tiles: 1, // number of tiles in K dimension
load_a: 1,
load_b: 1,
load_c: 1,
double_buffer: 1,
partition_banks: 0,
transa: false,
transb: false, // must be true for SIMD
m: 128,
n: 32,
k: 16,
alpha: 1,
beta: 0,
gemm_fp: "gemm_fp64_opt"
}
30 changes: 30 additions & 0 deletions sw/snitch/apps/gemm_2d/roi.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
[
<% N_TILES = 4 %>

% for cluster in range(0,16):
// Compute cores
% for j in range(0, 8):
{
"thread": "${f'hart_{cluster * 9 + j + 1}'}",
"roi": [
% for i in range(0, N_TILES):
{"idx": ${2 * i + 1}, "label": "${f'tile_{i}'}"},
% endfor
]
},
% endfor

// DMA core
{
"thread": "${f'hart_{cluster * 9 + 8 + 1}'}",
"roi": [
{"idx": 1, "label": "${f'tile_in_0'}"},
% for i in range(0, N_TILES - 1):
{"idx": ${4*i + 3}, "label": "${f'tile_in_{i+1}'}"},
{"idx": ${4*i + 5}, "label": "${f'tile_out_{i}'}"},
% endfor
{"idx": ${N_TILES * 4 - 1}, "label": "${f'tile_out_{N_TILES-1}'}"},
]
},
% endfor
]
Loading