Skip to content

Commit

Permalink
translations: make sure we do not re-translate
Browse files Browse the repository at this point in the history
  • Loading branch information
snejus committed Oct 29, 2024
1 parent fc0d45e commit 99de0b1
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 15 deletions.
42 changes: 30 additions & 12 deletions beetsplug/lyrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,7 @@ def scrape(cls, html: str) -> str | None:
class Translator(RequestHandler):
TRANSLATE_URL = "https://api.cognitive.microsofttranslator.com/translate"
LINE_PARTS_RE = re.compile(r"^(\[\d\d:\d\d.\d\d\]|) *(.*)$")
remove_translations = partial(re.compile(r" / [^\n]+").sub, "")

_log: beets.logging.Logger
api_key: str
Expand Down Expand Up @@ -800,23 +801,45 @@ def append_translations(self, lines: Iterable[str]) -> list[str]:
# only add the space between non-empty timestamps and texts
return [" ".join(filter(None, p)) for p in zip(timestamps, texts)]

def translate(self, lyrics: str) -> str:
def translate(self, new_lyrics: str, old_lyrics: str) -> str:
"""Translate the given lyrics to the target language.
Check old lyrics for existing translations and return them if their
original text matches the new lyrics. This is to avoid translating
the same lyrics multiple times.
If the lyrics are already in the target language or not in any of
of the source languages (if configured), they are returned as is.
The footer with the source URL is preserved, if present.
"""
lyrics_language = langdetect.detect(lyrics)
if lyrics_language == self.to_language or (
self.from_languages and lyrics_language not in self.from_languages
if (
" / " in old_lyrics
and self.remove_translations(old_lyrics) == new_lyrics
):
return lyrics
self.info("🔵 Translations already exist")
return old_lyrics

lyrics_language = langdetect.detect(new_lyrics)
if lyrics_language == self.to_language:
self.info(
"🔵 Lyrics are already in the target language {}",
self.to_language,
)
return new_lyrics

if self.from_languages and lyrics_language not in self.from_languages:
self.info(
"🔵 Configuration {} does not permit translating from {}",
self.from_languages,
lyrics_language.upper(),
)
return new_lyrics

lyrics, *url = lyrics.split("\n\nSource: ")
lyrics, *url = new_lyrics.split("\n\nSource: ")
with self.handle_request():
translated_lines = self.append_translations(lyrics.splitlines())
self.info("🟢 Translated lyrics to {}", self.to_language)
return "\n\nSource: ".join(["\n".join(translated_lines), *url])


Expand Down Expand Up @@ -1054,12 +1077,7 @@ def add_item_lyrics(self, item: Item, write: bool) -> None:
if lyrics := self.find_lyrics(item):
self.info("🟢 Found lyrics: {0}", item)
if translator := self.translator:
initial_lyrics = lyrics
if (lyrics := translator.translate(lyrics)) != initial_lyrics:
self.info(
"🟢 Added translation to {}",
self.config["translate_to"].get().upper(),
)
lyrics = translator.translate(lyrics, item.lyrics)
else:
self.info("🔴 Lyrics not found: {}", item)
lyrics = self.config["fallback"].get()
Expand Down
15 changes: 12 additions & 3 deletions test/plugins/test_lyrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ def callback(request, _):
requests_mock.post(lyrics.Translator.TRANSLATE_URL, json=callback)

@pytest.mark.parametrize(
"initial_lyrics, expected",
"new_lyrics, old_lyrics, expected",
[
pytest.param(
"""
Expand All @@ -569,6 +569,7 @@ def callback(request, _):
My body wouldn't let me hide it (Hide it)
No matter what, I wouldn't fold (Wouldn't fold, wouldn't fold)
Ridin' through the thunder, lightnin'""",
"",
"""
[Refrain: Doja Cat] / [Refrain : Doja Cat]
Hard for me to let you go (Let you go, let you go) / Difficile pour moi de te laisser partir (Te laisser partir, te laisser partir)
Expand All @@ -584,6 +585,7 @@ def callback(request, _):
[00:01.00] Some more synced lyrics
Source: https://lrclib.net/api/123""",
"",
"""
[00:00.00] Some synced lyrics / Quelques paroles synchronisées
[00:00:50]
Expand All @@ -594,17 +596,24 @@ def callback(request, _):
),
pytest.param(
"Quelques paroles",
"",
"Quelques paroles",
id="already in the target language",
),
pytest.param(
"Some lyrics",
"Some lyrics / Some translation",
"Some lyrics / Some translation",
id="already translated",
),
],
)
def test_translate(self, initial_lyrics, expected):
def test_translate(self, new_lyrics, old_lyrics, expected):
plugin = lyrics.LyricsPlugin()
bing = lyrics.Translator(plugin._log, "123", "fr", ["en"])

assert bing.translate(
textwrap.dedent(initial_lyrics)
textwrap.dedent(new_lyrics), old_lyrics
) == textwrap.dedent(expected)


Expand Down

0 comments on commit 99de0b1

Please sign in to comment.