From e0136d9263fa1cecacddf828b75ee5d6f4e00087 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Mon, 21 Oct 2024 13:12:10 +0800 Subject: [PATCH] minor updates --- egs/libritts/CODEC/prepare.sh | 3 +- .../TTS/local/compute_spectrogram_libritts.py | 1 + egs/libritts/TTS/local/prepare_token_file.py | 1 + .../TTS/local/prepare_tokens_libritts.py | 75 ++++++++++ egs/libritts/TTS/local/validate_manifest.py | 1 + egs/libritts/TTS/prepare.sh | 131 ++++++++++++++++++ 6 files changed, 210 insertions(+), 2 deletions(-) create mode 120000 egs/libritts/TTS/local/compute_spectrogram_libritts.py create mode 120000 egs/libritts/TTS/local/prepare_token_file.py create mode 100644 egs/libritts/TTS/local/prepare_tokens_libritts.py create mode 120000 egs/libritts/TTS/local/validate_manifest.py create mode 100644 egs/libritts/TTS/prepare.sh diff --git a/egs/libritts/CODEC/prepare.sh b/egs/libritts/CODEC/prepare.sh index 6a471c3adc..47dcd4138a 100755 --- a/egs/libritts/CODEC/prepare.sh +++ b/egs/libritts/CODEC/prepare.sh @@ -45,12 +45,11 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then # to $dl_dir/LibriTTS mkdir -p data/manifests if [ ! -e data/manifests/.libritts.done ]; then - lhotse prepare libritts --num-jobs 32 $dl_dir/LibriTTS data/manifests + lhotse prepare libritts --num-jobs ${nj} $dl_dir/LibriTTS data/manifests touch data/manifests/.libritts.done fi fi - if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then log "Stage 2: Compute Spectrogram for LibriTTS" mkdir -p data/spectrogram diff --git a/egs/libritts/TTS/local/compute_spectrogram_libritts.py b/egs/libritts/TTS/local/compute_spectrogram_libritts.py new file mode 120000 index 0000000000..5a6ebba58c --- /dev/null +++ b/egs/libritts/TTS/local/compute_spectrogram_libritts.py @@ -0,0 +1 @@ +../../CODEC/local/compute_spectrogram_libritts.py \ No newline at end of file diff --git a/egs/libritts/TTS/local/prepare_token_file.py b/egs/libritts/TTS/local/prepare_token_file.py new file mode 120000 index 0000000000..afc29a22ba --- /dev/null +++ b/egs/libritts/TTS/local/prepare_token_file.py @@ -0,0 +1 @@ +../../../ljspeech/TTS/local/prepare_token_file.py \ No newline at end of file diff --git a/egs/libritts/TTS/local/prepare_tokens_libritts.py b/egs/libritts/TTS/local/prepare_tokens_libritts.py new file mode 100644 index 0000000000..6ac42755e1 --- /dev/null +++ b/egs/libritts/TTS/local/prepare_tokens_libritts.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +# Copyright 2023 Xiaomi Corp. (authors: Zengwei Yao, +# Zengrui Jin,) +# 2024 Tsinghua University (authors: Zengrui Jin,) +# +# See ../../../../LICENSE for clarification regarding multiple authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +""" +This file reads the texts in given manifest and save the new cuts with phoneme tokens. +""" + +import logging +from pathlib import Path + +import tacotron_cleaner.cleaners +from lhotse import CutSet, load_manifest +from piper_phonemize import phonemize_espeak +from tqdm.auto import tqdm + + +def prepare_tokens_libritts(): + output_dir = Path("data/spectrogram") + prefix = "libritts" + suffix = "jsonl.gz" + partitions = ( + "dev-clean", + "dev-other", + "test-clean", + "test-other", + "train-all-shuf", + "train-clean-460", + ) + + for partition in partitions: + cut_set = load_manifest(output_dir / f"{prefix}_cuts_{partition}.{suffix}") + + new_cuts = [] + for cut in tqdm(cut_set): + # Each cut only contains one supervision + assert len(cut.supervisions) == 1, (len(cut.supervisions), cut) + text = cut.supervisions[0].text + # Text normalization + text = tacotron_cleaner.cleaners.custom_english_cleaners(text) + # Convert to phonemes + tokens_list = phonemize_espeak(text, "en-us") + tokens = [] + for t in tokens_list: + tokens.extend(t) + cut.tokens = tokens + new_cuts.append(cut) + + new_cut_set = CutSet.from_cuts(new_cuts) + new_cut_set.to_file( + output_dir / f"{prefix}_cuts_with_tokens_{partition}.{suffix}" + ) + + +if __name__ == "__main__": + formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" + logging.basicConfig(format=formatter, level=logging.INFO) + + prepare_tokens_libritts() diff --git a/egs/libritts/TTS/local/validate_manifest.py b/egs/libritts/TTS/local/validate_manifest.py new file mode 120000 index 0000000000..b4d52ebca0 --- /dev/null +++ b/egs/libritts/TTS/local/validate_manifest.py @@ -0,0 +1 @@ +../../../ljspeech/TTS/local/validate_manifest.py \ No newline at end of file diff --git a/egs/libritts/TTS/prepare.sh b/egs/libritts/TTS/prepare.sh new file mode 100644 index 0000000000..d18f8a6772 --- /dev/null +++ b/egs/libritts/TTS/prepare.sh @@ -0,0 +1,131 @@ +#!/usr/bin/env bash + +# fix segmentation fault reported in https://github.com/k2-fsa/icefall/issues/674 +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + +set -eou pipefail + +stage=0 +stop_stage=100 +sampling_rate=24000 +nj=32 + +dl_dir=$PWD/download + +. shared/parse_options.sh || exit 1 + +# All files generated by this script are saved in "data". +# You can safely remove "data" and rerun this script to regenerate it. +mkdir -p data + +log() { + # This function is from espnet + local fname=${BASH_SOURCE[1]##*/} + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" +} + +log "dl_dir: $dl_dir" + +if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then + log "Stage -1: build monotonic_align lib" + if [ ! -d vits/monotonic_align/build ]; then + cd vits/monotonic_align + python setup.py build_ext --inplace + cd ../../ + else + log "monotonic_align lib already built" + fi +fi + +if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then + log "Stage 0: Download data" + + # If you have pre-downloaded it to /path/to/LibriTTS, + # you can create a symlink + # + # ln -sfv /path/to/LibriTTS $dl_dir/LibriTTS + # + if [ ! -d $dl_dir/LibriTTS ]; then + lhotse download libritts $dl_dir + fi + + if [ ! -d $dl_dir/xvector_nnet_1a_libritts_clean_460 ]; then + log "Downloading x-vector" + + git clone https://huggingface.co/datasets/zrjin/xvector_nnet_1a_libritts_clean_460 $dl_dir/xvector_nnet_1a_libritts_clean_460 + fi + +fi + +if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then + log "Stage 1: Prepare LibriTTS manifest" + # We assume that you have downloaded the LibriTTS corpus + # to $dl_dir/LibriTTS + mkdir -p data/manifests + if [ ! -e data/manifests/.libritts.done ]; then + lhotse prepare libritts --num-jobs ${nj} $dl_dir/LibriTTS data/manifests + touch data/manifests/.libritts.done + fi +fi + +if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then + log "Stage 2: Compute Spectrogram for LibriTTS" + mkdir -p data/spectrogram + if [ ! -e data/spectrogram/.libritts.done ]; then + ./local/compute_spectrogram_libritts.py --sampling-rate $sampling_rate + touch data/spectrogram/.libritts.done + fi + + # Here we shuffle and combine the train-clean-100, train-clean-360 and + # train-other-500 together to form the training set. + if [ ! -f data/spectrogram/libritts_cuts_train-all-shuf.jsonl.gz ]; then + cat <(gunzip -c data/spectrogram/libritts_cuts_train-clean-100.jsonl.gz) \ + <(gunzip -c data/spectrogram/libritts_cuts_train-clean-360.jsonl.gz) \ + <(gunzip -c /data/spectrogramlibritts_cuts_train-other-500.jsonl.gz) | \ + shuf | gzip -c > data/spectrogram/libritts_cuts_train-all-shuf.jsonl.gz + fi + + # Here we shuffle and combine the train-clean-100, train-clean-360 + # together to form the training set. + if [ ! -f data/spectrogram/libritts_cuts_train-clean-460.jsonl.gz ]; then + cat <(gunzip -c data/spectrogram/libritts_cuts_train-clean-100.jsonl.gz) \ + <(gunzip -c data/spectrogram/libritts_cuts_train-clean-360.jsonl.gz) \ + shuf | gzip -c > data/spectrogram/libritts_cuts_train-clean-460.jsonl.gz + fi + + if [ ! -e data/spectrogram/.libritts-validated.done ]; then + log "Validating data/spectrogram for LibriTTS" + ./local/validate_manifest.py \ + data/spectrogram/libritts_cuts_train-all-shuf.jsonl.gz + touch data/spectrogram/.libritts-validated.done + fi +fi + +if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then + log "Stage 3: Prepare phoneme tokens for LibriTTS" + # We assume you have installed piper_phonemize and espnet_tts_frontend. + # If not, please install them with: + # - piper_phonemize: + # refer to https://github.com/rhasspy/piper-phonemize, + # could install the pre-built wheels from https://github.com/csukuangfj/piper-phonemize/releases/tag/2023.12.5 + # - espnet_tts_frontend: + # `pip install espnet_tts_frontend`, refer to https://github.com/espnet/espnet_tts_frontend/ + if [ ! -e data/spectrogram/.libritts_with_token.done ]; then + ./local/prepare_tokens_libritts.py + touch data/spectrogram/.libritts_with_token.done + fi +fi + +if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then + log "Stage 4: Generate token file" + # We assume you have installed piper_phonemize and espnet_tts_frontend. + # If not, please install them with: + # - piper_phonemize: + # refer to https://github.com/rhasspy/piper-phonemize, + # could install the pre-built wheels from https://github.com/csukuangfj/piper-phonemize/releases/tag/2023.12.5 + # - espnet_tts_frontend: + # `pip install espnet_tts_frontend`, refer to https://github.com/espnet/espnet_tts_frontend/ + if [ ! -e data/tokens.txt ]; then + ./local/prepare_token_file.py --tokens data/tokens.txt + fi +fi