Skip to content
This repository was archived by the owner on May 27, 2022. It is now read-only.

Commit 678f28c

Browse files
committed
Update tts
1 parent 6fdac9d commit 678f28c

File tree

7 files changed

+183
-25
lines changed

7 files changed

+183
-25
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"cells": [],
3+
"metadata": {},
4+
"nbformat": 4,
5+
"nbformat_minor": 4
6+
}

examples/Untitled.ipynb

Lines changed: 152 additions & 0 deletions
Large diffs are not rendered by default.

pororo/models/tts/synthesizer.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,11 @@ def _spectrogram_postprocess(self, spectrogram):
110110

111111
def predict(self, text: str, speaker: str):
112112
speakers = speaker.split(',')
113-
114113
spectrogram = synthesize(self.tacotron, f"|{text}", device=self.device)
115114

116115
if len(speakers) > 1:
117116
spectrogram = self._spectrogram_postprocess(spectrogram)
118-
y_g_hat = self.vocoder_en(torch.Tensor(spectrogram).to(self.device).unsqueeze(0))
117+
y_g_hat = self.vocoder_ko(torch.Tensor(spectrogram).to(self.device).unsqueeze(0))
119118
audio = y_g_hat.squeeze()
120119
audio = audio * 32768.0
121120
return audio.cpu().detach().numpy()

pororo/pororo.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
PororoWordTranslationFactory,
4747
PororoZeroShotFactory,
4848
PororoSpeechTranslationFactory,
49+
PororoTtsFactory,
4950
)
5051

5152
SUPPORTED_TASKS = {
@@ -132,6 +133,9 @@
132133
"speech_recognition": PororoAsrFactory,
133134
"st": PororoSpeechTranslationFactory,
134135
"speech_translation": PororoSpeechTranslationFactory,
136+
"tts": PororoTtsFactory,
137+
"text_to_speech": PororoTtsFactory,
138+
"speech_synthesis": PororoTtsFactory,
135139
"ocr": PororoOcrFactory,
136140
"srl": PororoSrlFactory,
137141
"semantic_role_labeling": PororoSrlFactory,

pororo/tasks/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,4 @@
5151
from pororo.tasks.zero_shot_classification import PororoZeroShotFactory
5252
from pororo.tasks.review_scoring import PororoReviewFactory
5353
from pororo.tasks.speech_translation import PororoSpeechTranslationFactory
54+
from pororo.tasks.speech_synthesis import PororoTtsFactory

pororo/tasks/speech_synthesis.py

Lines changed: 19 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,24 @@ class PororoTtsFactory(PororoFactoryBase):
3131
3232
Examples:
3333
>>> import IPython
34-
>>> from IPython.display import Audio
34+
>>> from pororo import Pororo
3535
>>> model = Pororo('tts', lang='multi')
36-
>>> wave = model('how are you?', lang='en', speaker='en')
37-
>>> IPython.display.display(IPython.display.Audio(data=wave, rate=22050))
3836
39-
>>> model = Pororo('tts', lang='multi')
37+
>>> # Typical TTS
38+
>>> wave = model('how are you?', lang='en')
39+
>>> IPython.display.Audio(data=wave, rate=22050)
40+
41+
>>> # Voice Style Transfer
4042
>>> wave = model('저는 미국 사람이에요.', lang='ko', speaker='en')
41-
>>> IPython.display.display(IPython.display.Audio(data=wave, rate=22050))
43+
>>> IPython.display.Audio(data=wave, rate=22050)
44+
45+
>>> # Code-Switching
46+
>>> wave = model('저는 미국 사람이에요.', lang='ko', speaker='en-15,ko')
47+
>>> IPython.display.Audio(data=wave, rate=22050)
4248
4349
Notes:
44-
Currently 10 languages supports.
45-
Supported Languages: English, Korean, Japanese, Chinese, Jejueo, Dutch, German, Spanish, French, Russian
50+
Currently 11 languages supports.
51+
Supported Languages: English, Korean, Japanese, Chinese, Jejueo, Dutch, German, Spanish, French, Russian, Finnish
4652
This task can designate a speaker such as ko, en, zh etc.
4753
4854
"""
@@ -72,12 +78,8 @@ def load(self, device: str):
7278
7379
"""
7480
if self.config.n_model == "tacotron":
75-
from pororo.models.tts.synthesizer import (
76-
MultilingualSpeechSynthesizer,
77-
)
78-
from pororo.models.tts.utils.numerical_pinyin_converter import (
79-
convert_from_numerical_pinyin,
80-
)
81+
from pororo.models.tts.synthesizer import MultilingualSpeechSynthesizer
82+
from pororo.models.tts.utils.numerical_pinyin_converter import convert_from_numerical_pinyin
8183
from pororo.models.tts.utils.text import jejueo_romanize, romanize
8284

8385
tacotron_path = download_or_load("misc/tacotron2", self.config.lang)
@@ -142,7 +144,7 @@ def __init__(
142144
self.jejueo_romanize = jejueo_romanize
143145
self.convert_from_numerical_pinyin = convert_from_numerical_pinyin
144146

145-
def load_g2p_ja(self):
147+
def _load_g2p_ja(self):
146148
""" load g2p module for Japanese """
147149
self.g2p_ja = PororoG2pFactory(
148150
task="g2p",
@@ -151,7 +153,7 @@ def load_g2p_ja(self):
151153
)
152154
self.g2p_ja = self.g2p_ja.load(self.device)
153155

154-
def load_g2p_zh(self):
156+
def _load_g2p_zh(self):
155157
""" load g2p module for Chinese """
156158
self.g2p_zh = PororoG2pFactory(
157159
task="g2p",
@@ -182,11 +184,11 @@ def _preprocess(
182184
text = self.romanize(text)
183185
elif lang == "ja":
184186
if self.g2p_ja is None:
185-
self.load_g2p_ja()
187+
self._load_g2p_ja()
186188
text = self.g2p_ja(text)
187189
elif lang == "zh":
188190
if self.g2p_zh is None:
189-
self.load_g2p_zh()
191+
self._load_g2p_zh()
190192
text = self.g2p_zh(text).replace(" ", " ")
191193
text = self.convert_from_numerical_pinyin(text)
192194
elif lang == "je":
@@ -211,11 +213,5 @@ def __call__(self, text: str, lang: str = "en", speaker: str = None):
211213
if speaker is None:
212214
speaker = lang
213215

214-
if speaker == "ja":
215-
speaker = "jp"
216-
217-
assert (
218-
speaker in self.lang_dict.values()
219-
), f"Unsupported speaker: {speaker}\nSupported speaker: {self.lang_dict.keys()}"
220216
text, speaker = self._preprocess(text, lang, speaker)
221217
return self.predict(text, speaker)

tts-install.sh

100644100755
File mode changed.

0 commit comments

Comments
 (0)