Skip to content

Commit 508983e

Browse files
committed
chore: test ignored_chars with U+FEFF (ZWNBSP/BOM)
https://en.wikipedia.org/wiki/Byte_order_mark
1 parent e3d51cc commit 508983e

File tree

4 files changed

+27
-17
lines changed

4 files changed

+27
-17
lines changed

sqlx-core/src/config/reference.toml

+5-1
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,11 @@ migrations_dir = "foo/migrations"
155155
# ignored_chars = ["\r"]
156156

157157
# Ignore common whitespace characters (beware syntatically significant whitespace!)
158-
ignored_chars = [" ", "\t", "\r", "\n"] # Space, tab, CR, LF
158+
# Space, tab, CR, LF, zero-width non-breaking space (U+FEFF)
159+
#
160+
# U+FEFF is added by some editors as a magic number at the beginning of a text file indicating it is UTF-8 encoded,
161+
# where it is known as a byte-order mark (BOM): https://en.wikipedia.org/wiki/Byte_order_mark
162+
ignored_chars = [" ", "\t", "\r", "\n", "\uFEFF"]
159163

160164
# Specify reversible migrations by default (for `sqlx migrate create`).
161165
#

sqlx-core/src/config/tests.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ fn assert_migrate_config(config: &config::migrate::Config) {
8181
assert_eq!(config.table_name.as_deref(), Some("foo._sqlx_migrations"));
8282
assert_eq!(config.migrations_dir.as_deref(), Some("foo/migrations"));
8383

84-
let ignored_chars = BTreeSet::from([' ', '\t', '\r', '\n']);
84+
let ignored_chars = BTreeSet::from([' ', '\t', '\r', '\n', '\u{FEFF}']);
8585

8686
assert_eq!(config.ignored_chars, ignored_chars);
8787

sqlx-core/src/migrate/migration.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ pub fn checksum_fragments<'a>(fragments: impl Iterator<Item = &'a str>) -> Vec<u
7676
fn fragments_checksum_equals_full_checksum() {
7777
// Copied from `examples/postgres/axum-social-with-tests/migrations/3_comment.sql`
7878
let sql = "\
79-
create table comment (\r\n\
79+
\u{FEFF}create table comment (\r\n\
8080
\tcomment_id uuid primary key default gen_random_uuid(),\r\n\
8181
\tpost_id uuid not null references post(post_id),\r\n\
8282
\tuser_id uuid not null references \"user\"(user_id),\r\n\

sqlx-core/src/migrate/source.rs

+20-14
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@ impl MigrationSource<'static> for PathBuf {
5252
}
5353

5454
/// A [`MigrationSource`] implementation with configurable resolution.
55-
///
55+
///
5656
/// `S` may be `PathBuf`, `&Path` or any type that implements `Into<PathBuf>`.
57-
///
57+
///
5858
/// See [`ResolveConfig`] for details.
5959
#[derive(Debug)]
6060
pub struct ResolveWith<S>(pub S, pub ResolveConfig);
@@ -97,20 +97,20 @@ impl ResolveConfig {
9797
}
9898

9999
/// Ignore a character when hashing migrations.
100-
///
100+
///
101101
/// The migration SQL string itself will still contain the character,
102102
/// but it will not be included when calculating the checksum.
103-
///
103+
///
104104
/// This can be used to ignore whitespace characters so changing formatting
105105
/// does not change the checksum.
106-
///
106+
///
107107
/// Adding the same `char` more than once is a no-op.
108-
///
108+
///
109109
/// ### Note: Changes Migration Checksum
110-
/// This will change the checksum of resolved migrations,
110+
/// This will change the checksum of resolved migrations,
111111
/// which may cause problems with existing deployments.
112112
///
113-
/// **Use at your own risk.**
113+
/// **Use at your own risk.**
114114
pub fn ignore_char(&mut self, c: char) -> &mut Self {
115115
self.ignored_chars.insert(c);
116116
self
@@ -123,21 +123,21 @@ impl ResolveConfig {
123123
///
124124
/// This can be used to ignore whitespace characters so changing formatting
125125
/// does not change the checksum.
126-
///
126+
///
127127
/// Adding the same `char` more than once is a no-op.
128128
///
129129
/// ### Note: Changes Migration Checksum
130-
/// This will change the checksum of resolved migrations,
130+
/// This will change the checksum of resolved migrations,
131131
/// which may cause problems with existing deployments.
132132
///
133-
/// **Use at your own risk.**
133+
/// **Use at your own risk.**
134134
pub fn ignore_chars(&mut self, chars: impl IntoIterator<Item = char>) -> &mut Self {
135135
self.ignored_chars.extend(chars);
136136
self
137137
}
138138

139139
/// Iterate over the set of ignored characters.
140-
///
140+
///
141141
/// Duplicate `char`s are not included.
142142
pub fn ignored_chars(&self) -> impl Iterator<Item = char> + '_ {
143143
self.ignored_chars.iter().copied()
@@ -266,11 +266,17 @@ fn checksum_with(sql: &str, ignored_chars: &BTreeSet<char>) -> Vec<u8> {
266266
fn checksum_with_ignored_chars() {
267267
// Ensure that `checksum_with` returns the same digest for a given set of ignored chars
268268
// as the equivalent string with the characters removed.
269-
let ignored_chars = [' ', '\t', '\r', '\n'];
269+
let ignored_chars = [
270+
' ', '\t', '\r', '\n',
271+
// Zero-width non-breaking space (ZWNBSP), often added as a magic-number at the beginning
272+
// of UTF-8 encoded files as a byte-order mark (BOM):
273+
// https://en.wikipedia.org/wiki/Byte_order_mark
274+
'\u{FEFF}',
275+
];
270276

271277
// Copied from `examples/postgres/axum-social-with-tests/migrations/3_comment.sql`
272278
let sql = "\
273-
create table comment (\r\n\
279+
\u{FEFF}create table comment (\r\n\
274280
\tcomment_id uuid primary key default gen_random_uuid(),\r\n\
275281
\tpost_id uuid not null references post(post_id),\r\n\
276282
\tuser_id uuid not null references \"user\"(user_id),\r\n\

0 commit comments

Comments
 (0)