Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
536 changes: 450 additions & 86 deletions sqlx-mysql/src/collation.rs
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these changes are all a result of checking against SELECT ID, COLLATION_NAME FROM INFORMATION_SCHEMA.COLLATIONS ORDER BY ID; locally - let me know if any of them should be thrown out.

  • There was an additional ~100 collations on my machine, which I added and then commented out, rather than implementing all the other pieces necessary. let me know if you'd like me to finish that out, I'd be happy to, if that's something we want. otherwise, we can remove the comments of those extra collations.
  • uft8_* was displayed as utf8mb3_* on my machine. let me know if those changes should be reverted
  • also added impl TryFrom<u16> for Collation for use here

Large diffs are not rendered by default.

19 changes: 14 additions & 5 deletions sqlx-mysql/src/protocol/text/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::str::from_utf8;
use bitflags::bitflags;
use bytes::{Buf, Bytes};

use crate::collation::Collation;
use crate::error::Error;
use crate::io::Decode;
use crate::io::MySqlBufExt;
Expand Down Expand Up @@ -62,6 +63,10 @@ bitflags! {
}

// https://dev.mysql.com/doc/internals/en/com-query-response.html#column-type
// dead link ^
// https://dev.mysql.com/doc/dev/mysql-server/9.0.0/page_protocol_com_query_response_text_resultset_column_definition.html
// the "type of the column as defined in enum_field_types"
// https://dev.mysql.com/doc/dev/mysql-server/9.0.0/field__types_8h.html
Comment on lines 65 to +69
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

feel free to suggest which of these links we should keep - the original link here was dead when i tried it.


#[derive(Debug, Copy, Clone, PartialEq)]
#[cfg_attr(feature = "offline", derive(serde::Serialize, serde::Deserialize))]
Expand Down Expand Up @@ -112,8 +117,7 @@ pub(crate) struct ColumnDefinition {
table: Bytes,
alias: Bytes,
name: Bytes,
#[allow(unused)]
pub(crate) collation: u16,
pub(crate) collation: Collation,
pub(crate) max_size: u32,
pub(crate) r#type: ColumnType,
pub(crate) flags: ColumnFlags,
Expand Down Expand Up @@ -156,7 +160,7 @@ impl Decode<'_, Capabilities> for ColumnDefinition {
table,
alias,
name,
collation,
collation: Collation::try_from(collation)?,
max_size,
r#type: ColumnType::try_from_u16(type_id)?,
flags: ColumnFlags::from_bits_truncate(flags),
Expand All @@ -166,8 +170,13 @@ impl Decode<'_, Capabilities> for ColumnDefinition {
}

impl ColumnType {
pub(crate) fn name(self, flags: ColumnFlags, max_size: Option<u32>) -> &'static str {
let is_binary = flags.contains(ColumnFlags::BINARY);
pub(crate) fn name(
self,
collation: Collation,
flags: ColumnFlags,
max_size: Option<u32>,
) -> &'static str {
let is_binary = collation == Collation::binary;
let is_unsigned = flags.contains(ColumnFlags::UNSIGNED);
let is_enum = flags.contains(ColumnFlags::ENUM);

Expand Down
11 changes: 9 additions & 2 deletions sqlx-mysql/src/type_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@ use std::fmt::{self, Display, Formatter};

pub(crate) use sqlx_core::type_info::*;

use crate::protocol::text::{ColumnDefinition, ColumnFlags, ColumnType};
use crate::{
collation::Collation,
protocol::text::{ColumnDefinition, ColumnFlags, ColumnType},
};

/// Type information for a MySql type.
#[derive(Debug, Clone)]
#[cfg_attr(feature = "offline", derive(serde::Serialize, serde::Deserialize))]
pub struct MySqlTypeInfo {
pub(crate) r#type: ColumnType,
pub(crate) flags: ColumnFlags,
pub(crate) collation: Collation,

// [max_size] for integer types, this is (M) in BIT(M) or TINYINT(M)
#[cfg_attr(feature = "offline", serde(default))]
Expand All @@ -20,6 +24,7 @@ impl MySqlTypeInfo {
pub(crate) const fn binary(ty: ColumnType) -> Self {
Self {
r#type: ty,
collation: Collation::binary,
flags: ColumnFlags::BINARY,
max_size: None,
}
Expand All @@ -38,6 +43,7 @@ impl MySqlTypeInfo {
Self {
r#type: ColumnType::String,
flags: ColumnFlags::ENUM,
collation: Collation::binary,
max_size: None,
}
}
Expand All @@ -60,6 +66,7 @@ impl MySqlTypeInfo {
Self {
r#type: column.r#type,
flags: column.flags,
collation: column.collation,
max_size: Some(column.max_size),
}
}
Expand All @@ -77,7 +84,7 @@ impl TypeInfo for MySqlTypeInfo {
}

fn name(&self) -> &str {
self.r#type.name(self.flags, self.max_size)
self.r#type.name(self.collation, self.flags, self.max_size)
}
}

Expand Down
2 changes: 2 additions & 0 deletions sqlx-mysql/src/types/bool.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::collation::Collation;
use crate::decode::Decode;
use crate::encode::{Encode, IsNull};
use crate::error::BoxDynError;
Expand All @@ -12,6 +13,7 @@ impl Type<MySql> for bool {
// MySQL has no actual `BOOLEAN` type, the type is an alias of `TINYINT(1)`
MySqlTypeInfo {
flags: ColumnFlags::BINARY | ColumnFlags::UNSIGNED,
collation: Collation::binary,
max_size: Some(1),
r#type: ColumnType::Tiny,
}
Expand Down
4 changes: 3 additions & 1 deletion sqlx-mysql/src/types/str.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::collation::Collation;
use crate::decode::Decode;
use crate::encode::{Encode, IsNull};
use crate::error::BoxDynError;
Expand All @@ -12,6 +13,7 @@ impl Type<MySql> for str {
MySqlTypeInfo {
r#type: ColumnType::VarString, // VARCHAR
flags: ColumnFlags::empty(),
collation: Collation::utf8mb3_bin,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@DrewMcArthur

I think using utf8mb3_bin regardless of session state is misleading.
In my opinion, the only difference needed in this context is whether it is binary or string.
How about using an enum like this one?

CollationType {
    Binary,
    String
}

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was one of the pieces I wasn't sure about, what the default should be for some of these. I think you're right, _bin probably isn't right & I'll update this to utf8mb3_general_ci if you think that'd be correct.

As for changing the type of MySqlTypeInfo's collation field to an enum, I think the source of the issue on #3387 is that there are three cases being compressed into the ColumnFlag::binary flag:

  • a VARCHAR has a collation like utf8_general_ci, and the ColumnFlag::binary is false, so this is compatible with a Rust String
  • a BLOB has a collation of binary, ColumnFlag::binary is true, so it's not compatible with a Rust String, which is also correct
  • a VARCHAR has a collation of *_bin, so ColumnFlag::binary is true, so it's marked incompatible when we should be able to decode this into a String. This is the case that needs to be fixed.

This could be handled with that enum, if the value is String for both VARCHAR cases, but I feel like that could also be confusing to have a CollationType::String (as opposed to binary) while simultaneously having ColumnFlag::binary = true.

I think my preference is to keep track of each unique collation here, since it's only a byte, but I'll defer to you and @abonander to decide what makes more sense.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
collation: Collation::utf8mb3_bin,
collation: Collation::utf8mb3_general_ci,

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

^ @alu (sorry, I'm terrible at remembering to directly @ people in replies here)

max_size: None,
}
}
Expand All @@ -28,7 +30,7 @@ impl Type<MySql> for str {
| ColumnType::String
| ColumnType::VarString
| ColumnType::Enum
) && !ty.flags.contains(ColumnFlags::BINARY)
) && ty.collation != Collation::binary
}
}

Expand Down
2 changes: 2 additions & 0 deletions sqlx-mysql/src/types/uint.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::collation::Collation;
use crate::decode::Decode;
use crate::encode::{Encode, IsNull};
use crate::error::BoxDynError;
Expand All @@ -10,6 +11,7 @@ fn uint_type_info(ty: ColumnType) -> MySqlTypeInfo {
MySqlTypeInfo {
r#type: ty,
flags: ColumnFlags::BINARY | ColumnFlags::UNSIGNED,
collation: Collation::binary,
max_size: None,
}
}
Expand Down
46 changes: 46 additions & 0 deletions tests/mysql/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -384,3 +384,49 @@ CREATE TEMPORARY TABLE user_login (

Ok(())
}

mod string_collation_tests {
use super::*;

#[sqlx_macros::test]
async fn test_binary_collation() -> anyhow::Result<()> {
let mut conn = new::<MySql>().await?;

conn.execute(
r#"
CREATE TEMPORARY TABLE strings (
id INT PRIMARY KEY AUTO_INCREMENT,
char_bin CHAR(50) COLLATE utf8mb4_bin,
varchar_bin VARCHAR(255) COLLATE utf8mb4_bin,
text_bin TEXT COLLATE utf8mb4_bin
);
"#,
)
.await?;

// Insert sample data
conn.execute(
r#"
INSERT INTO strings (char_bin, varchar_bin, text_bin) VALUES
('char_binary', 'varchar_binary', 'text_binary');
"#,
)
.await?;

// Query the data back
let row: (String, String, String) = sqlx::query_as(
r#"
SELECT char_bin, varchar_bin, text_bin FROM strings
"#,
)
.fetch_one(&mut conn)
.await?;

// Assert the values
assert_eq!(row.0, "char_binary");
assert_eq!(row.1, "varchar_binary");
assert_eq!(row.2, "text_binary");

Ok(())
}
}