From 86b6567160e4cf8819d96a854b1d581d8942e415 Mon Sep 17 00:00:00 2001
From: Kareem Shehata <kareem@shehata.ca>
Date: Fri, 5 May 2023 21:31:41 +0800
Subject: [PATCH] Add support for Pinyin with tone numbers instead of accents.
 Uses a separate field PinyinNum for pinyin with tone numbers.

---
 chinese/behavior.py   | 7 +++++++
 chinese/config.json   | 3 +++
 chinese/transcribe.py | 4 ++++
 3 files changed, 14 insertions(+)

diff --git a/chinese/behavior.py b/chinese/behavior.py
index 5b5832e..412bf35 100644
--- a/chinese/behavior.py
+++ b/chinese/behavior.py
@@ -28,6 +28,7 @@
     sanitize_transcript,
     split_transcript,
     transcribe,
+    tone_marks_to_numbers,
 )
 from .translate import translate
 from .util import (
@@ -152,6 +153,12 @@ def fill_transcript(hanzi, note):
         else:
             reformat_transcript(note, key, target)
 
+    if get_first(config['fields']['pinyinNum'], note) == '':
+        t = transcribe(separated, target, type_)
+        s = tone_marks_to_numbers(str.join(" ", t))
+        set_all(config['fields']['pinyinNum'], note, to=s)
+        n_filled += 1
+
     return n_filled
 
 
diff --git a/chinese/config.json b/chinese/config.json
index 5147a67..8bf9c96 100644
--- a/chinese/config.json
+++ b/chinese/config.json
@@ -41,6 +41,9 @@
             "大陸拼音",
             "拼音"
         ],
+        "pinyinNum": [
+            "PinyinNum"
+        ],
         "pinyinTaiwan": [
             "Pinyin (Taiwan)",
             "台湾拼音",
diff --git a/chinese/transcribe.py b/chinese/transcribe.py
index 99926ed..2121f75 100644
--- a/chinese/transcribe.py
+++ b/chinese/transcribe.py
@@ -238,6 +238,10 @@ def _split(pattern, s):
 
     return list(filter(lambda s: s.strip(), separated))
 
+def tone_marks_to_numbers(s):
+    assert isinstance(s, str)
+    s2, *_ = replace_tone_marks([cleanup(s)])
+    return s2
 
 def tone_number(s):
     assert isinstance(s, str)