Official code for the CODI 2025 paper.
Refer to IsaNLP RST for running the trained multilingual parser.
python -m venv .venv && source .venv/bin/activate
pip install -U pip
pip install -r requirements.txt
pip install --index-url https://download.pytorch.org/whl/cu121 torchList available corpora:
bash src/commands.sh list# single corpus
bash src/commands.sh mono train eng.erst.gum
# run across all corpora used in the unified setups
bash src/commands.sh mono train all
# filter when running "all"
ONLY='eng|rus' bash src/commands.sh mono evaluate all
# data augmentation toggle
AUG=true bash src/commands.sh mono train eng.rst.rstdt# UniRST MuH. mseg = multiple segmentation heads, sseg = single head.
bash src/commands.sh unir-muh-mseg train
bash src/commands.sh unir-muh-mseg evaluate
# UniRST UU
bash src/commands.sh unir-uu train
bash src/commands.sh unir-uu evaluate
# UniRST MU (default).
bash src/commands.sh unir-mu-mseg train
bash src/commands.sh unir-mu-mseg evaluateRUNS=3 # number of random restarts
CUDA=0 # select GPU (maps to --cuda_device)
AUG=true # enable data augmentation
SSEG=true # single segmentation head (MuH/MU); false = multiple heads
DEBUG=1 # print shell commands