From 86b6567160e4cf8819d96a854b1d581d8942e415 Mon Sep 17 00:00:00 2001 From: Kareem Shehata Date: Fri, 5 May 2023 21:31:41 +0800 Subject: [PATCH] Add support for Pinyin with tone numbers instead of accents. Uses a separate field PinyinNum for pinyin with tone numbers. --- chinese/behavior.py | 7 +++++++ chinese/config.json | 3 +++ chinese/transcribe.py | 4 ++++ 3 files changed, 14 insertions(+) diff --git a/chinese/behavior.py b/chinese/behavior.py index 5b5832e..412bf35 100644 --- a/chinese/behavior.py +++ b/chinese/behavior.py @@ -28,6 +28,7 @@ sanitize_transcript, split_transcript, transcribe, + tone_marks_to_numbers, ) from .translate import translate from .util import ( @@ -152,6 +153,12 @@ def fill_transcript(hanzi, note): else: reformat_transcript(note, key, target) + if get_first(config['fields']['pinyinNum'], note) == '': + t = transcribe(separated, target, type_) + s = tone_marks_to_numbers(str.join(" ", t)) + set_all(config['fields']['pinyinNum'], note, to=s) + n_filled += 1 + return n_filled diff --git a/chinese/config.json b/chinese/config.json index 5147a67..8bf9c96 100644 --- a/chinese/config.json +++ b/chinese/config.json @@ -41,6 +41,9 @@ "大陸拼音", "拼音" ], + "pinyinNum": [ + "PinyinNum" + ], "pinyinTaiwan": [ "Pinyin (Taiwan)", "台湾拼音", diff --git a/chinese/transcribe.py b/chinese/transcribe.py index 99926ed..2121f75 100644 --- a/chinese/transcribe.py +++ b/chinese/transcribe.py @@ -238,6 +238,10 @@ def _split(pattern, s): return list(filter(lambda s: s.strip(), separated)) +def tone_marks_to_numbers(s): + assert isinstance(s, str) + s2, *_ = replace_tone_marks([cleanup(s)]) + return s2 def tone_number(s): assert isinstance(s, str)