@@ -192,6 +192,8 @@ def standardize_quotes(text: str) -> str:
192
192
"〝" : "U+301D" , # REVERSED DOUBLE PRIME QUOTATION MARK
193
193
"〞" : "U+301E" , # DOUBLE PRIME QUOTATION MARK
194
194
"〟" : "U+301F" , # LOW DOUBLE PRIME QUOTATION MARK
195
+ """ : "U+FF02" , # FULLWIDTH QUOTATION MARK
196
+ ",," : "U+275E" , # LOW HEAVY DOUBLE COMMA ORNAMENT
195
197
}
196
198
197
199
# Single Quotes Dictionary
@@ -213,7 +215,6 @@ def standardize_quotes(text: str) -> str:
213
215
"﹂" : "U+FE42" , # PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET
214
216
"﹃" : "U+FE43" , # PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET
215
217
"﹄" : "U+FE44" , # PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET
216
- """ : "U+FF02" , # FULLWIDTH QUOTATION MARK
217
218
"'" : "U+FF07" , # FULLWIDTH APOSTROPHE
218
219
"「" : "U+FF62" , # HALFWIDTH LEFT CORNER BRACKET
219
220
"」" : "U+FF63" , # HALFWIDTH RIGHT CORNER BRACKET
@@ -225,14 +226,27 @@ def standardize_quotes(text: str) -> str:
225
226
# Apply double quote replacements
226
227
# Apply double quote replacements
227
228
for unicode_val in double_quotes .values ():
228
- unicode_char = chr ( int ( unicode_val . replace ( "U+" , "" ), 16 ) )
229
+ unicode_char = unicode_to_char ( unicode_val )
229
230
if unicode_char in text :
230
231
text = text .replace (unicode_char , double_quote_standard )
231
232
232
233
# Apply single quote replacements
233
234
for unicode_val in single_quotes .values ():
234
- unicode_char = chr ( int ( unicode_val . replace ( "U+" , "" ), 16 ) )
235
+ unicode_char = unicode_to_char ( unicode_val )
235
236
if unicode_char in text :
236
237
text = text .replace (unicode_char , single_quote_standard )
237
238
238
239
return text
240
+
241
+
242
+ def unicode_to_char (unicode_val : str ) -> str :
243
+ """
244
+ Converts a Unicode value to a character.
245
+
246
+ Args:
247
+ unicode_val (str): The Unicode value to convert.
248
+
249
+ Returns:
250
+ str: The character corresponding to the Unicode value.
251
+ """
252
+ return chr (int (unicode_val .replace ("U+" , "" ), 16 ))
0 commit comments