Skip to content

Commit a572852

Browse files
committed
gccrs: Type annotation for make-unicode-data.py
gcc/rust/ChangeLog: * util/make-rust-unicode.py: Add type annotation Signed-off-by: Raiki Tamura <[email protected]>
1 parent 35b67c3 commit a572852

File tree

1 file changed

+36
-32
lines changed

1 file changed

+36
-32
lines changed

gcc/rust/util/make-rust-unicode.py

Lines changed: 36 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
# > rust-unicode-data.h
2323

2424
import sys
25+
from typing import Tuple
26+
27+
Codepoint = int
28+
Range = Tuple[Codepoint, Codepoint]
2529

2630
COPYRIGHT = (
2731
"// Copyright (C) 2020-2023 Free Software Foundation, Inc.\n"
@@ -44,25 +48,25 @@
4448
)
4549

4650
# Decomposition_Mapping table
47-
decomposition_map = {}
51+
decomposition_map: dict[Codepoint, list[Codepoint]] = {}
4852
# Canonical_Combining_Class table
49-
ccc_table = {}
53+
ccc_table: dict[Codepoint, int] = {}
5054
# Ranges of codepoints with the Full_Composition_Exclusion property
51-
composition_exclusion_ranges = []
55+
composition_exclusion_ranges: list[Range] = []
5256
# Ranges of codepoints with the Full_Composition_Exclusion property
53-
alphabetic_ranges = []
57+
alphabetic_ranges: list[Range] = []
5458
# Ranges of codepoints with NFC_QC=No
55-
nfc_qc_no_ranges = []
59+
nfc_qc_no_ranges: list[Range] = []
5660
# Ranges of codepoints with NFC_QC=Maybe
57-
nfc_qc_maybe_ranges = []
58-
numeric_codepoints = []
61+
nfc_qc_maybe_ranges: list[Range] = []
62+
numeric_codepoints: list[Codepoint] = []
5963

6064
# Note that an element of range `[m, n]` (a list in python) represents [m, n)
6165

6266

63-
def binary_search_ranges(ranges, target):
64-
low = 0
65-
high = len(ranges) - 1
67+
def binary_search_ranges(ranges: list[Range], target: Codepoint) -> int:
68+
low: int = 0
69+
high: int = len(ranges) - 1
6670
while low <= high:
6771
mid = (low + high) // 2
6872
start, end = ranges[mid]
@@ -77,8 +81,8 @@ def binary_search_ranges(ranges, target):
7781

7882

7983
# Utility function to parse '<codepoint>...<codepoint>' or '<codepoint>'
80-
def parse_codepoint_range(range_str):
81-
codepoint_range = range_str.split("..")
84+
def parse_codepoint_range(range_str: str) -> Range:
85+
codepoint_range: list[str] = range_str.split("..")
8286
assert len(codepoint_range) == 1 or len(codepoint_range) == 2, "Invalid format"
8387
start_cp, end_cp = 0, 0
8488
if len(codepoint_range) == 1:
@@ -89,11 +93,11 @@ def parse_codepoint_range(range_str):
8993
# m => [m, m+1)
9094
start_cp = int(codepoint_range[0], 16)
9195
end_cp = int(codepoint_range[1], 16) + 1
92-
return [start_cp, end_cp]
96+
return start_cp, end_cp
9397

9498

95-
def read_unicode_data_txt(filepath):
96-
def process_line(line):
99+
def read_unicode_data_txt(filepath: str) -> None:
100+
def process_line(line: str) -> None:
97101
rows = line.split(";")
98102
if len(rows) != 15:
99103
return
@@ -124,13 +128,13 @@ def process_line(line):
124128
if len(decomp_cps) > 0:
125129
decomposition_map[cp] = decomp_cps
126130

127-
with open(sys.argv[1], "r", encoding="UTF-8") as file:
131+
with open(filepath, "r", encoding="UTF-8") as file:
128132
while line := file.readline():
129133
process_line(line.rstrip())
130134

131135

132-
def read_derived_norm_props_txt(filepath):
133-
def process_line(line):
136+
def read_derived_norm_props_txt(filepath: str) -> None:
137+
def process_line(line) -> None:
134138
# Ignore comments
135139
line = line.split("#")[0]
136140
rows = line.split(";")
@@ -157,8 +161,8 @@ def process_line(line):
157161
process_line(line.rstrip())
158162

159163

160-
def read_derived_core_props_txt(filepath):
161-
def process_line(line):
164+
def read_derived_core_props_txt(filepath: str) -> None:
165+
def process_line(line: str) -> None:
162166
# Ignore comments
163167
line = line.split("#")[0]
164168
rows = line.split(";")
@@ -169,15 +173,15 @@ def process_line(line):
169173
rows[1] = rows[1].lstrip().rstrip()
170174
if rows[1] != "Alphabetic":
171175
return
172-
cp_range = parse_codepoint_range(rows[0])
176+
cp_range: Range = parse_codepoint_range(rows[0])
173177
alphabetic_ranges.append(cp_range)
174178

175179
with open(filepath, "r", encoding="UTF-8") as file:
176180
while line := file.readline():
177181
process_line(line.rstrip())
178182

179183

180-
def write_decomposition():
184+
def write_decomposition() -> None:
181185
print("const std::map<uint32_t, std::vector<uint32_t>> DECOMPOSITION_MAP = {")
182186
print(" // clang-format off")
183187
for cp in sorted(decomposition_map):
@@ -190,14 +194,16 @@ def write_decomposition():
190194
print("};")
191195

192196

193-
def write_recomposition():
197+
def write_recomposition() -> None:
194198
print(
195199
"const std::map<std::pair<uint32_t, uint32_t>, uint32_t> RECOMPOSITION_MAP = {{"
196200
)
197201
print(" // clang-format off")
198202
for cp in decomposition_map:
199203
if binary_search_ranges(composition_exclusion_ranges, cp) != -1:
200204
continue
205+
d1: Codepoint
206+
d2: Codepoint
201207
if len(decomposition_map[cp]) == 1:
202208
d1 = decomposition_map[cp][0]
203209
d2 = 0
@@ -209,7 +215,7 @@ def write_recomposition():
209215
print("}};")
210216

211217

212-
def write_ccc():
218+
def write_ccc() -> None:
213219
print("const std::map<uint32_t, int32_t> CCC_TABLE = {")
214220
print(" // clang-format off")
215221
for cp in ccc_table:
@@ -218,7 +224,7 @@ def write_ccc():
218224
print("};")
219225

220226

221-
def write_alphabetic():
227+
def write_alphabetic() -> None:
222228
print(
223229
"const std::array<std::pair<uint32_t, uint32_t>, NUM_ALPHABETIC_RANGES> ALPHABETIC_RANGES = {{"
224230
)
@@ -229,7 +235,7 @@ def write_alphabetic():
229235
print("}};")
230236

231237

232-
def write_numeric():
238+
def write_numeric() -> None:
233239
print("const std::array<uint32_t, NUM_NUMERIC_CODEPOINTS> NUMERIC_CODEPOINTS = {{")
234240
print(" // clang-format off")
235241
for i, cp in enumerate(numeric_codepoints):
@@ -244,13 +250,13 @@ def write_numeric():
244250
print("}};")
245251

246252

247-
def main():
253+
def main() -> None:
248254
if len(sys.argv) != 4:
249255
print("too few arguments", file=sys.stderr)
250256
exit(-1)
251-
unicode_txt_path = sys.argv[1]
252-
norm_props_txt_path = sys.argv[2]
253-
core_props_txt_path = sys.argv[3]
257+
unicode_txt_path: str = sys.argv[1]
258+
norm_props_txt_path: str = sys.argv[2]
259+
core_props_txt_path: str = sys.argv[3]
254260

255261
read_unicode_data_txt(unicode_txt_path)
256262
read_derived_norm_props_txt(norm_props_txt_path)
@@ -271,8 +277,6 @@ def main():
271277
print()
272278
write_recomposition()
273279
print()
274-
# write_composition_exclusion()
275-
# print()
276280
write_ccc()
277281
print()
278282
write_alphabetic()

0 commit comments

Comments
 (0)