Skip to content

Commit cb2969d

Browse files
committed
[uloc] Implement to_language_tag()
Also: - Implement Rust macros for generating wrappers for the numerous `uloc` methods that output strings into a buffer of a fixed size.
1 parent c699eab commit cb2969d

File tree

1 file changed

+185
-80
lines changed

1 file changed

+185
-80
lines changed

rust_icu_uloc/src/lib.rs

+185-80
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,112 @@ impl TryFrom<&ffi::CStr> for ULoc {
6363
}
6464
}
6565

66+
/// Generates a method to wrap ICU4C `uloc` methods that require a resizable output string buffer.
67+
///
68+
/// The various `uloc` methods of this type have inconsistent signature patterns, with some putting
69+
/// all their input arguments _before_ the `buffer` and its `capacity`, and some splitting the input
70+
/// arguments.
71+
///
72+
/// Therefore, the macro supports input arguments in both positions.
73+
///
74+
/// For an invocation of the form
75+
/// ```
76+
/// buffered_string_method_with_retry!(
77+
/// my_method,
78+
/// BUFFER_CAPACITY,
79+
/// [before_arg_a: before_type_a, before_arg_b: before_type_b,],
80+
/// [after_arg_a: after_type_a, after_arg_b: after_type_b,]
81+
/// );
82+
/// ```
83+
/// the generated method has a signature of the form
84+
/// ```
85+
/// fn my_method(
86+
/// uloc_method: unsafe extern "C" fn(
87+
/// before_type_a,
88+
/// before_type_b,
89+
/// *mut raw::c_char,
90+
/// i32,
91+
/// after_type_a,
92+
/// after_type_b,
93+
/// *mut UErrorCode,
94+
/// ) -> i32,
95+
/// before_arg_a: before_type_a,
96+
/// before_arg_b: before_type_b,
97+
/// after_arg_a: after_type_a,
98+
/// after_arg_b: after_type_b
99+
/// ) -> Result<String, common::Error> {}
100+
/// ```
101+
macro_rules! buffered_string_method_with_retry {
102+
103+
($method_name:ident, $buffer_capacity:expr,
104+
[$($before_arg:ident: $before_arg_type:ty,)*],
105+
[$($after_arg:ident: $after_arg_type:ty,)*]) => {
106+
fn $method_name(
107+
uloc_method: unsafe extern "C" fn(
108+
$($before_arg_type,)*
109+
*mut raw::c_char,
110+
i32,
111+
$($after_arg_type,)*
112+
*mut UErrorCode,
113+
) -> i32,
114+
$($before_arg: $before_arg_type,)*
115+
$($after_arg: $after_arg_type,)*
116+
) -> Result<String, common::Error> {
117+
let mut status = common::Error::OK_CODE;
118+
let mut buf: Vec<u8> = vec![0; $buffer_capacity];
119+
120+
// Requires that any pointers that are passed in are valid.
121+
let full_len: i32 = unsafe {
122+
assert!(common::Error::is_ok(status));
123+
uloc_method(
124+
$($before_arg,)*
125+
buf.as_mut_ptr() as *mut raw::c_char,
126+
$buffer_capacity as i32,
127+
$($after_arg,)*
128+
&mut status,
129+
)
130+
};
131+
132+
if status == UErrorCode::U_BUFFER_OVERFLOW_ERROR ||
133+
(common::Error::is_ok(status) &&
134+
full_len > $buffer_capacity
135+
.try_into()
136+
.map_err(|e| common::Error::wrapper(format!("{:?}", e)))?) {
137+
138+
assert!(full_len > 0);
139+
let full_len: usize = full_len
140+
.try_into()
141+
.map_err(|e| common::Error::wrapper(format!("{:?}", e)))?;
142+
buf.resize(full_len, 0);
143+
144+
// Same unsafe requirements as above, plus full_len must be exactly the output
145+
// buffer size.
146+
unsafe {
147+
assert!(common::Error::is_ok(status));
148+
uloc_method(
149+
$($before_arg,)*
150+
buf.as_mut_ptr() as *mut raw::c_char,
151+
full_len as i32,
152+
$($after_arg,)*
153+
&mut status,
154+
)
155+
};
156+
}
157+
158+
common::Error::ok_or_warning(status)?;
159+
160+
// Adjust the size of the buffer here.
161+
if (full_len > 0) {
162+
let full_len: usize = full_len
163+
.try_into()
164+
.map_err(|e| common::Error::wrapper(format!("{:?}", e)))?;
165+
buf.resize(full_len, 0);
166+
}
167+
String::from_utf8(buf).map_err(|_| common::Error::string_with_interior_nul())
168+
}
169+
}
170+
}
171+
66172
impl ULoc {
67173
/// Implements `uloc_getLanguage`.
68174
pub fn language(&self) -> Result<String, common::Error> {
@@ -102,6 +208,25 @@ impl ULoc {
102208
.map(|repr| ULoc { repr })
103209
}
104210

211+
// Implements `uloc_toLanguageTag` from ICU4C.
212+
pub fn to_language_tag(&self, strict: bool) -> Result<String, common::Error> {
213+
buffered_string_method_with_retry!(
214+
buffered_string_to_language_tag,
215+
LOCALE_CAPACITY,
216+
[locale_id: *const raw::c_char,],
217+
[strict: rust_icu_sys::UBool,]
218+
);
219+
220+
let locale_id = self.as_c_str();
221+
// No `UBool` constants available in rust_icu_sys, unfortunately.
222+
let strict = if strict { 1 } else { 0 };
223+
buffered_string_to_language_tag(
224+
versioned_function!(uloc_toLanguageTag),
225+
locale_id.as_ptr(),
226+
strict,
227+
)
228+
}
229+
105230
/// Returns the current label of this locale.
106231
pub fn label(&self) -> &str {
107232
&self.repr
@@ -112,14 +237,24 @@ impl ULoc {
112237
ffi::CString::new(self.repr.clone()).expect("ULoc contained interior NUL bytes")
113238
}
114239

240+
// Implements `uloc_acceptLanguage` from ICU4C.
115241
pub fn accept_language(
116242
accept_list: impl IntoIterator<Item = impl Into<ULoc>>,
117243
available_locales: impl IntoIterator<Item = impl Into<ULoc>>,
118244
) -> Result<(Option<ULoc>, UAcceptResult), common::Error> {
119-
let mut buf: Vec<u8> = vec![0; LOCALE_CAPACITY];
120-
let mut accept_result: UAcceptResult = UAcceptResult::ULOC_ACCEPT_FAILED;
121-
let mut status = common::Error::OK_CODE;
245+
buffered_string_method_with_retry!(
246+
buffered_string_uloc_accept_language,
247+
LOCALE_CAPACITY,
248+
[],
249+
[
250+
out_result: *mut UAcceptResult,
251+
accept_list: *mut *const ::std::os::raw::c_char,
252+
accept_list_count: i32,
253+
available_locales: *mut UEnumeration,
254+
]
255+
);
122256

257+
let mut accept_result: UAcceptResult = UAcceptResult::ULOC_ACCEPT_FAILED;
123258
let mut accept_list_cstrings: Vec<ffi::CString> = vec![];
124259
// This is mutable only to satisfy the missing `const`s in the ICU4C API.
125260
let mut accept_list: Vec<*const raw::c_char> = accept_list
@@ -142,55 +277,25 @@ impl ULoc {
142277
available_locales.iter().map(|uloc| uloc.label()).collect();
143278
let mut available_locales = Enumeration::try_from(&available_locales[..])?;
144279

145-
let full_len = unsafe {
146-
versioned_function!(uloc_acceptLanguage)(
147-
buf.as_mut_ptr() as *mut raw::c_char,
148-
buf.len() as i32,
149-
&mut accept_result,
150-
accept_list.as_mut_ptr(),
151-
accept_list.len() as i32,
152-
available_locales.repr(),
153-
&mut status,
154-
)
155-
};
156-
157-
if status == UErrorCode::U_BUFFER_OVERFLOW_ERROR {
158-
assert!(full_len > 0);
159-
let full_len: usize = full_len
160-
.try_into()
161-
.map_err(|e| common::Error::wrapper(format!("{:?}", e)))?;
162-
buf.resize(full_len, 0);
163-
unsafe {
164-
versioned_function!(uloc_acceptLanguage)(
165-
buf.as_mut_ptr() as *mut raw::c_char,
166-
buf.len() as i32,
167-
&mut accept_result,
168-
accept_list.as_mut_ptr(),
169-
accept_list.len() as i32,
170-
available_locales.repr(),
171-
&mut status,
172-
);
173-
}
174-
}
280+
let matched_locale = buffered_string_uloc_accept_language(
281+
versioned_function!(uloc_acceptLanguage),
282+
&mut accept_result,
283+
accept_list.as_mut_ptr(),
284+
accept_list.len() as i32,
285+
available_locales.repr(),
286+
);
175287

176-
common::Error::ok_or_warning(status)?;
177288
// Having no match is a valid if disappointing result.
178289
if accept_result == UAcceptResult::ULOC_ACCEPT_FAILED {
179290
return Ok((None, accept_result));
180291
}
181292

182-
// Adjust the size of the buffer here.
183-
assert!(full_len > 0);
184-
buf.resize(full_len as usize, 0);
185-
186-
String::from_utf8(buf)
187-
.map_err(|_| common::Error::string_with_interior_nul())
293+
matched_locale
188294
.and_then(|s| ULoc::try_from(s.as_str()))
189295
.map(|uloc| (Some(uloc), accept_result))
190296
}
191297

192-
/// Call a `uloc_*` method with a particular signature (that clones and modifies the internal
193-
/// representation of the locale ID and requires a resizable buffer).
298+
/// Call a `uloc` method that takes this locale's ID and returns a string.
194299
fn call_buffered_string_method(
195300
&self,
196301
uloc_method: unsafe extern "C" fn(
@@ -200,40 +305,14 @@ impl ULoc {
200305
*mut UErrorCode,
201306
) -> i32,
202307
) -> Result<String, common::Error> {
203-
let mut status = common::Error::OK_CODE;
204-
let repr = ffi::CString::new(self.repr.clone())
205-
.map_err(|_| common::Error::string_with_interior_nul())?;
206-
let mut buf: Vec<u8> = vec![0; LOCALE_CAPACITY];
207-
208-
// Requires that repr is a valid pointer
209-
let full_len = unsafe {
210-
assert!(common::Error::is_ok(status));
211-
uloc_method(
212-
repr.as_ptr(),
213-
buf.as_mut_ptr() as *mut raw::c_char,
214-
LOCALE_CAPACITY as i32,
215-
&mut status,
216-
)
217-
} as usize;
218-
common::Error::ok_or_warning(status)?;
219-
if full_len > LOCALE_CAPACITY {
220-
buf.resize(full_len, 0);
221-
// Same unsafe requirements as above, plus full_len must be exactly
222-
// the output buffer size.
223-
unsafe {
224-
assert!(common::Error::is_ok(status));
225-
uloc_method(
226-
repr.as_ptr(),
227-
buf.as_mut_ptr() as *mut raw::c_char,
228-
full_len as i32,
229-
&mut status,
230-
)
231-
};
232-
common::Error::ok_or_warning(status)?;
233-
}
234-
// Adjust the size of the buffer here.
235-
buf.resize(full_len, 0);
236-
String::from_utf8(buf).map_err(|_| common::Error::string_with_interior_nul())
308+
buffered_string_method_with_retry!(
309+
buffered_string_char_star,
310+
LOCALE_CAPACITY,
311+
[char_star: *const raw::c_char,],
312+
[]
313+
);
314+
let asciiz = self.as_c_str();
315+
buffered_string_char_star(uloc_method, asciiz.as_ptr())
237316
}
238317
}
239318

@@ -316,18 +395,27 @@ mod tests {
316395
assert_eq!(minimized_subtags.label(), expected.label());
317396
}
318397

398+
#[test]
399+
fn test_to_language_tag() {
400+
let loc = ULoc::try_from("sr_Cyrl_RS").expect("get sr_Cyrl_RS locale");
401+
let language_tag = loc
402+
.to_language_tag(true)
403+
.expect("should convert to language tag");
404+
assert_eq!(language_tag, "sr-Cyrl-RS".to_string());
405+
}
406+
319407
#[test]
320408
fn test_accept_language_fallback() {
321409
let accept_list: Result<Vec<_>, _> = vec!["es_MX", "ar_EG", "fr_FR"]
322410
.into_iter()
323-
.map(|s| ULoc::try_from(s))
411+
.map(ULoc::try_from)
324412
.collect();
325413
let accept_list = accept_list.expect("make accept_list");
326414

327415
let available_locales: Result<Vec<_>, _> =
328416
vec!["de_DE", "en_US", "es", "nl_NL", "sr_RS_Cyrl"]
329417
.into_iter()
330-
.map(|s| ULoc::try_from(s))
418+
.map(ULoc::try_from)
331419
.collect();
332420
let available_locales = available_locales.expect("make available_locales");
333421

@@ -346,13 +434,13 @@ mod tests {
346434
fn test_accept_language_exact_match() {
347435
let accept_list: Result<Vec<_>, _> = vec!["es_ES", "ar_EG", "fr_FR"]
348436
.into_iter()
349-
.map(|s| ULoc::try_from(s))
437+
.map(ULoc::try_from)
350438
.collect();
351439
let accept_list = accept_list.expect("make accept_list");
352440

353441
let available_locales: Result<Vec<_>, _> = vec!["de_DE", "en_US", "es_MX", "ar_EG"]
354442
.into_iter()
355-
.map(|s| ULoc::try_from(s))
443+
.map(ULoc::try_from)
356444
.collect();
357445
let available_locales = available_locales.expect("make available_locales");
358446

@@ -366,4 +454,21 @@ mod tests {
366454
)
367455
);
368456
}
457+
458+
#[test]
459+
fn test_accept_language_no_match() {
460+
let accept_list: Result<Vec<_>, _> = vec!["es_ES", "ar_EG", "fr_FR"]
461+
.into_iter()
462+
.map(ULoc::try_from)
463+
.collect();
464+
let accept_list = accept_list.expect("make accept_list");
465+
466+
let available_locales: Result<Vec<_>, _> =
467+
vec!["el_GR"].into_iter().map(ULoc::try_from).collect();
468+
let available_locales = available_locales.expect("make available_locales");
469+
470+
let actual =
471+
ULoc::accept_language(accept_list, available_locales).expect("call accept_language");
472+
assert_eq!(actual, (None, UAcceptResult::ULOC_ACCEPT_FAILED))
473+
}
369474
}

0 commit comments

Comments
 (0)