Skip to content

Commit 6872653

Browse files
committed
String encoding: change Option<T> to Result<T, StringError>
1 parent c777bc6 commit 6872653

6 files changed

Lines changed: 108 additions & 28 deletions

File tree

godot-core/src/builtin/string/gstring.rs

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use sys::{ffi_methods, interface_fn, GodotFfi};
1515

1616
use crate::builtin::string::Encoding;
1717
use crate::builtin::{inner, NodePath, StringName, Variant};
18+
use crate::meta::error::StringError;
1819
use crate::meta::AsArg;
1920
use crate::{impl_shared_string_api, meta};
2021

@@ -88,21 +89,23 @@ impl GString {
8889
/// - **UTF-8**: The input is validated to be UTF-8.
8990
///
9091
/// Specifying incorrect encoding is safe, but may result in unintended string values.
91-
pub fn try_from_bytes(bytes: &[u8], encoding: Encoding) -> Option<Self> {
92+
pub fn try_from_bytes(bytes: &[u8], encoding: Encoding) -> Result<Self, StringError> {
9293
match encoding {
9394
Encoding::Ascii => {
9495
// If the bytes are ASCII, we can fall back to Latin-1, which is always valid (except for NUL).
9596
// is_ascii() does *not* check for the NUL byte, so the check in the Latin-1 branch is still necessary.
9697
if bytes.is_ascii() {
9798
Self::try_from_bytes(bytes, Encoding::Latin1)
99+
.map_err(|_e| StringError::new("intermediate NUL byte in ASCII string"))
98100
} else {
99-
None
101+
Err(StringError::new("invalid ASCII"))
100102
}
101103
}
102104
Encoding::Latin1 => {
103105
// Intermediate NUL bytes are not accepted in Godot. Both ASCII + Latin-1 encodings need to explicitly check for this.
104106
if bytes.contains(&0) {
105-
return None;
107+
// Error overwritten when called from ASCII branch.
108+
return Err(StringError::new("intermediate NUL byte in Latin-1 string"));
106109
}
107110

108111
let s = unsafe {
@@ -115,20 +118,22 @@ impl GString {
115118
);
116119
})
117120
};
118-
Some(s)
121+
Ok(s)
119122
}
120123
Encoding::Utf8 => {
121124
// from_utf8() also checks for intermediate NUL bytes.
122125
let utf8 = std::str::from_utf8(bytes);
123-
utf8.ok().map(GString::from)
126+
127+
utf8.map(GString::from)
128+
.map_err(|e| StringError::with_source("invalid UTF-8", e))
124129
}
125130
}
126131
}
127132

128-
/// Convert string from C-string with given encoding, returning `None` on validation errors.
133+
/// Convert string from C-string with given encoding, returning `Err` on validation errors.
129134
///
130135
/// Convenience function for [`try_from_bytes()`](Self::try_from_bytes); see its docs for more information.
131-
pub fn try_from_cstr(cstr: &std::ffi::CStr, encoding: Encoding) -> Option<Self> {
136+
pub fn try_from_cstr(cstr: &std::ffi::CStr, encoding: Encoding) -> Result<Self, StringError> {
132137
Self::try_from_bytes(cstr.to_bytes(), encoding)
133138
}
134139

godot-core/src/builtin/string/string_name.rs

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
* License, v. 2.0. If a copy of the MPL was not distributed with this
55
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
66
*/
7-
87
use std::fmt;
98

109
use godot_ffi as sys;
1110
use godot_ffi::interface_fn;
1211
use sys::{ffi_methods, GodotFfi};
1312

1413
use crate::builtin::{inner, Encoding, GString, NodePath, Variant};
14+
use crate::meta::error::StringError;
1515
use crate::meta::AsArg;
1616
use crate::{impl_shared_string_api, meta};
1717

@@ -72,16 +72,18 @@ impl StringName {
7272
/// - **UTF-8**: The input is validated to be UTF-8.
7373
///
7474
/// Specifying incorrect encoding is safe, but may result in unintended string values.
75-
pub fn try_from_bytes(bytes: &[u8], encoding: Encoding) -> Option<Self> {
75+
pub fn try_from_bytes(bytes: &[u8], encoding: Encoding) -> Result<Self, StringError> {
7676
match encoding {
7777
Encoding::Ascii => {
7878
// ASCII is a subset of UTF-8, and UTF-8 has a more direct implementation than Latin-1; thus use UTF-8 via `From<&str>`.
79-
if bytes.is_ascii() && !bytes.contains(&0) {
80-
// SAFETY: ASCII is a subset of UTF-8 and was just verified.
81-
let ascii = unsafe { std::str::from_utf8_unchecked(bytes) };
82-
Some(Self::from(ascii))
79+
if !bytes.is_ascii() {
80+
Err(StringError::new("invalid ASCII"))
81+
} else if bytes.contains(&0) {
82+
Err(StringError::new("intermediate NUL byte in ASCII string"))
8383
} else {
84-
None
84+
// SAFETY: ASCII is a subset of UTF-8 and was verified above.
85+
let ascii = unsafe { std::str::from_utf8_unchecked(bytes) };
86+
Ok(Self::from(ascii))
8587
}
8688
}
8789
Encoding::Latin1 => {
@@ -92,17 +94,19 @@ impl StringName {
9294
Encoding::Utf8 => {
9395
// from_utf8() also checks for intermediate NUL bytes.
9496
let utf8 = std::str::from_utf8(bytes);
95-
utf8.ok().map(StringName::from)
97+
98+
utf8.map(StringName::from)
99+
.map_err(|e| StringError::with_source("invalid UTF-8", e))
96100
}
97101
}
98102
}
99103

100-
/// Convert string from bytes with given encoding, returning `None` on validation errors.
104+
/// Convert string from bytes with given encoding, returning `Err` on validation errors.
101105
///
102106
/// Convenience function for [`try_from_bytes()`](Self::try_from_bytes); see its docs for more information.
103107
///
104108
/// When called with `Encoding::Latin1`, this can be slightly more efficient than `try_from_bytes()`.
105-
pub fn try_from_cstr(cstr: &std::ffi::CStr, encoding: Encoding) -> Option<Self> {
109+
pub fn try_from_cstr(cstr: &std::ffi::CStr, encoding: Encoding) -> Result<Self, StringError> {
106110
// Short-circuit the direct Godot 4.2 function for Latin-1, which takes a null-terminated C string.
107111
#[cfg(since_api = "4.2")]
108112
if encoding == Encoding::Latin1 {
@@ -119,7 +123,7 @@ impl StringName {
119123
);
120124
})
121125
};
122-
return Some(s);
126+
return Ok(s);
123127
}
124128

125129
Self::try_from_bytes(cstr.to_bytes(), encoding)

godot-core/src/meta/error/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
mod call_error;
1111
mod convert_error;
1212
mod io_error;
13+
mod string_error;
1314

1415
pub use call_error::*;
1516
pub use convert_error::*;
1617
pub use io_error::*;
18+
pub use string_error::*;
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Copyright (c) godot-rust; Bromeon and contributors.
3+
* This Source Code Form is subject to the terms of the Mozilla Public
4+
* License, v. 2.0. If a copy of the MPL was not distributed with this
5+
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
6+
*/
7+
8+
use std::error::Error;
9+
use std::fmt;
10+
11+
/// Error related to string encoding/decoding.
12+
#[derive(Debug)]
13+
pub struct StringError {
14+
message: String,
15+
source: Option<Box<(dyn Error + 'static)>>,
16+
}
17+
18+
impl fmt::Display for StringError {
19+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
20+
if let Some(source) = self.source() {
21+
write!(f, "{}: {}", self.message, source)
22+
} else {
23+
write!(f, "{}", self.message)
24+
}
25+
}
26+
}
27+
28+
impl Error for StringError {
29+
fn source(&self) -> Option<&(dyn Error + 'static)> {
30+
self.source.as_deref()
31+
}
32+
}
33+
34+
impl StringError {
35+
pub(crate) fn new(message: impl Into<String>) -> Self {
36+
Self {
37+
message: message.into(),
38+
source: None,
39+
}
40+
}
41+
42+
pub(crate) fn with_source(
43+
message: impl Into<String>,
44+
source: impl Into<Box<(dyn Error + 'static)>>,
45+
) -> Self {
46+
Self {
47+
message: message.into(),
48+
source: Some(source.into()),
49+
}
50+
}
51+
}

itest/rust/src/builtin_tests/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,10 @@ mod containers {
3535
}
3636

3737
mod string {
38-
mod string_test_macros;
3938
mod gstring_test;
4039
mod node_path_test;
4140
mod string_name_test;
41+
mod string_test_macros;
4242
}
4343

4444
mod script {

itest/rust/src/builtin_tests/string/string_test_macros.rs

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,20 @@ macro_rules! generate_string_bytes_and_cstr_tests {
2727
assert_eq!(ascii.len(), 5);
2828

2929
let ascii_nul = <$T>::try_from_bytes(b"Hello\0", Encoding::Ascii);
30-
assert_eq!(ascii_nul, None, "intermediate NUL byte is not valid ASCII"); // at end, but still not NUL terminator.
30+
let ascii_nul = ascii_nul.expect_err("intermediate NUL byte is not valid ASCII"); // at end, but still not NUL terminator.
31+
assert_eq!(
32+
ascii_nul.to_string(),
33+
"intermediate NUL byte in ASCII string"
34+
);
3135

3236
let latin1 = <$T>::try_from_bytes(b"/\xF0\xF5\xBE", Encoding::Ascii);
33-
assert_eq!(latin1, None, "Latin-1 is *not* valid ASCII");
37+
let latin1 = latin1.expect_err("Latin-1 is *not* valid ASCII");
38+
assert_eq!(latin1.to_string(), "invalid ASCII");
3439

3540
let utf8 =
3641
<$T>::try_from_bytes(b"\xF6\xF0\x9F\x8D\x8E\xF0\x9F\x92\xA1", Encoding::Ascii);
37-
assert_eq!(utf8, None, "UTF-8 is *not* valid ASCII");
42+
let utf8 = utf8.expect_err("UTF-8 is *not* valid ASCII");
43+
assert_eq!(utf8.to_string(), "invalid ASCII");
3844
}
3945

4046
#[itest]
@@ -45,10 +51,12 @@ macro_rules! generate_string_bytes_and_cstr_tests {
4551
assert_eq!(ascii.len(), 5);
4652

4753
let latin1 = <$T>::try_from_cstr(c"/ðõ¾", Encoding::Ascii);
48-
assert_eq!(latin1, None, "Latin-1 is *not* valid ASCII");
54+
let latin1 = latin1.expect_err("Latin-1 is *not* valid ASCII");
55+
assert_eq!(latin1.to_string(), "invalid ASCII");
4956

5057
let utf8 = <$T>::try_from_cstr(c"ö🍎A💡", Encoding::Ascii);
51-
assert_eq!(utf8, None, "UTF-8 is *not* valid ASCII");
58+
let utf8 = utf8.expect_err("UTF-8 is *not* valid ASCII");
59+
assert_eq!(utf8.to_string(), "invalid ASCII");
5260
}
5361

5462
#[itest]
@@ -64,9 +72,10 @@ macro_rules! generate_string_bytes_and_cstr_tests {
6472
assert_eq!(latin1.len(), 4);
6573

6674
let latin1_nul = <$T>::try_from_bytes(b"/\0\xF0\xF5\xBE", Encoding::Latin1);
75+
let latin1_nul = latin1_nul.expect_err("intermediate NUL byte is not valid Latin-1");
6776
assert_eq!(
68-
latin1_nul, None,
69-
"intermediate NUL byte is not valid Latin-1"
77+
latin1_nul.to_string(),
78+
"intermediate NUL byte in Latin-1 string"
7079
);
7180

7281
// UTF-8 -> Latin-1: always succeeds, even if result is garbage, since every byte is a valid Latin-1 character.
@@ -107,7 +116,12 @@ macro_rules! generate_string_bytes_and_cstr_tests {
107116
assert_eq!(ascii.len(), 5);
108117

109118
let latin1 = <$T>::try_from_bytes(b"/\xF0\xF5\xBE", Encoding::Utf8);
110-
assert_eq!(latin1, None, "Latin-1 is *not* valid UTF-8");
119+
let latin1 = latin1.expect_err("Latin-1 is *not* valid UTF-8");
120+
// Note: depends on exact output of std's Utf8Error; might need format!() if that changes.
121+
assert_eq!(
122+
latin1.to_string(),
123+
"invalid UTF-8: invalid utf-8 sequence of 1 bytes from index 1"
124+
);
111125

112126
let utf8 = <$T>::try_from_bytes(
113127
b"\xC3\xB6\xF0\x9F\x8D\x8E\x41\xF0\x9F\x92\xA1",
@@ -118,7 +132,11 @@ macro_rules! generate_string_bytes_and_cstr_tests {
118132
assert_eq!(utf8.len(), 4);
119133

120134
let utf8_nul = <$T>::try_from_bytes(b"\xC3\0A", Encoding::Utf8);
121-
assert_eq!(utf8_nul, None, "intermediate NUL byte is not valid UTF-8");
135+
let utf8_nul = utf8_nul.expect_err("intermediate NUL byte is not valid UTF-8");
136+
assert_eq!(
137+
utf8_nul.to_string(),
138+
"invalid UTF-8: invalid utf-8 sequence of 1 bytes from index 0"
139+
);
122140
}
123141

124142
#[itest]

0 commit comments

Comments
 (0)