Automattic · adamziel · May 7, 2025 · May 6, 2025 · May 6, 2025 · May 6, 2025
diff --git a/tests/WP_SQLite_Driver_Tests.php b/tests/WP_SQLite_Driver_Tests.php
diff --git a/tests/WP_SQLite_Driver_Translation_Tests.php b/tests/WP_SQLite_Driver_Translation_Tests.php
diff --git a/wp-includes/mysql/class-wp-mysql-lexer.php b/wp-includes/mysql/class-wp-mysql-lexer.php
@@ -2130,7 +2130,7 @@ class WP_MySQL_Lexer {
 	 *
 	 * @var int
 	 */
-	private $sql_modes;
+	private $sql_modes = 0;
 
 	/**
 	 * How many bytes from the original SQL payload have been read and tokenized.
@@ -2181,16 +2181,28 @@ class WP_MySQL_Lexer {
 	/**
 	 * @param string $sql The SQL payload to tokenize.
 	 * @param int $mysql_version The version of the MySQL server that the SQL payload is intended for.
-	 * @param int $sql_modes The SQL modes that should be considered active during tokenization.
+	 * @param string[] $sql_modes The SQL modes that should be considered active during tokenization.
 	 */
 	public function __construct(
 		string $sql,
 		int $mysql_version = 80038,
-		int $sql_modes = 0
+		array $sql_modes = array()
 	) {
 		$this->sql           = $sql;
 		$this->mysql_version = $mysql_version;
-		$this->sql_modes     = $sql_modes;
+
+		foreach ( $sql_modes as $sql_mode ) {
+			$sql_mode = strtoupper( $sql_mode );
+			if ( 'HIGH_NOT_PRECEDENCE' === $sql_mode ) {
+				$this->sql_modes |= self::SQL_MODE_HIGH_NOT_PRECEDENCE;
+			} elseif ( 'PIPES_AS_CONCAT' === $sql_mode ) {
+				$this->sql_modes |= self::SQL_MODE_PIPES_AS_CONCAT;
+			} elseif ( 'IGNORE_SPACE' === $sql_mode ) {
+				$this->sql_modes |= self::SQL_MODE_IGNORE_SPACE;
+			} elseif ( 'NO_BACKSLASH_ESCAPES' === $sql_mode ) {
+				$this->sql_modes |= self::SQL_MODE_NO_BACKSLASH_ESCAPES;
+			}
+		}
 	}
 
 	/**
@@ -2251,7 +2263,8 @@ public function get_token(): ?WP_MySQL_Token {
 			$this->token_type,
 			$this->token_starts_at,
 			$this->bytes_already_read - $this->token_starts_at,
-			$this->sql
+			$this->sql,
+			$this->is_sql_mode_active( self::SQL_MODE_NO_BACKSLASH_ESCAPES )
 		);
 	}
 

diff --git a/wp-includes/mysql/class-wp-mysql-token.php b/wp-includes/mysql/class-wp-mysql-token.php
@@ -7,6 +7,33 @@
  * and consumed by WP_MySQL_Parser during the parsing process.
  */
 class WP_MySQL_Token extends WP_Parser_Token {
+	/**
+	 * Whether the NO_BACKSLASH_ESCAPES SQL mode is enabled.
+	 *
+	 * @var bool
+	 */
+	private $sql_mode_no_backslash_escapes_enabled;
+
+	/**
+	 * Constructor.
+	 *
+	 * @param int    $id                                    Token type.
+	 * @param int    $start                                 Byte offset in the input where the token begins.
+	 * @param int    $length                                Byte length of the token in the input.
+	 * @param string $input                                 Input bytes from which the token was parsed.
+	 * @param bool   $sql_mode_no_backslash_escapes_enabled Whether the NO_BACKSLASH_ESCAPES SQL mode is enabled.
+	 */
+	public function __construct(
+		int $id,
+		int $start,
+		int $length,
+		string $input,
+		bool $sql_mode_no_backslash_escapes_enabled
+	) {
+		parent::__construct( $id, $start, $length, $input );
+		$this->sql_mode_no_backslash_escapes_enabled = $sql_mode_no_backslash_escapes_enabled;
+	}
+
 	/**
 	 * Get the name of the token.
 	 *
@@ -24,6 +51,123 @@ public function get_name(): string {
 		return $name;
 	}
 
+	/**
+	 * Get the real unquoted value of the token.
+	 *
+	 * @return string The token value.
+	 */
+	public function get_value(): string {
+		$value = $this->get_bytes();
+		if (
+			WP_MySQL_Lexer::SINGLE_QUOTED_TEXT === $this->id
+			|| WP_MySQL_Lexer::DOUBLE_QUOTED_TEXT === $this->id
+			|| WP_MySQL_Lexer::BACK_TICK_QUOTED_ID === $this->id
+		) {
+			// Remove bounding quotes.
+			$quote = $value[0];
+			$value = substr( $value, 1, -1 );
+
+			/*
+			 * When the NO_BACKSLASH_ESCAPES SQL mode is enabled, we only need to
+			 * handle escaped bounding quotes, as the other characters preserve
+			 * their literal values.
+			 */
+			if ( $this->sql_mode_no_backslash_escapes_enabled ) {
+				return str_replace( $quote . $quote, $quote, $value );
+			}
+
+			/**
+			 * Unescape MySQL escape sequences.
+			 *
+			 * MySQL string literals use backslash as an escape character, and
+			 * the string bounding quotes can also be escaped by being doubled.
+			 *
+			 * The escaping is done according to the following rules:
+			 *
+			 *   1. Some special character escape sequences are recognized.
+			 *      For example, "\n" is a newline character, "\0" is ASCII NULL.
+			 *   2. A specific treatment is applied to "\%" and "\_" sequences.
+			 *      This is due to their special meaning for pattern matching.
+			 *   3. Other backslash-prefixed characters resolve to their literal
+			 *      values. For example, "\x" represents "x", "\\" represents "\".
+			 *
+			 * Despite looking similar, these rules are different from the C-style
+			 * string escaping, so we cannot use "strip(c)slashes()" in this case.
+			 *
+			 * See: https://dev.mysql.com/doc/refman/8.4/en/string-literals.html
+			 */
+			$backslash    = chr( 92 );
+			$replacements = array(
+				/*
+				 * MySQL special character escape sequences.
+				 */
+				( $backslash . '0' ) => chr( 0 ),  // An ASCII NULL character (\0).
+				( $backslash . "'" ) => chr( 39 ), // A single quote character (').
+				( $backslash . '"' ) => chr( 34 ), // A double quote character (").
+				( $backslash . 'b' ) => chr( 8 ),  // A backspace character.
+				( $backslash . 'n' ) => chr( 10 ), // A newline (linefeed) character (\n).
+				( $backslash . 'r' ) => chr( 13 ), // A carriage return character (\r).
+				( $backslash . 't' ) => chr( 9 ),  // A tab character (\t).
+				( $backslash . 'Z' ) => chr( 26 ), // An ASCII 26 (Control+Z) character.
+
+				/*
+				 * Normalize escaping of "%" and "_" characters.
+				 *
+				 * MySQL has unusual handling for "\%" and "\_" in all string literals.
+				 * While other sequences follow the C-style escaping ("\?" is "?", etc.),
+				 * "\%" resolves to "\%" and "\_" resolves to "\_" (unlike in C strings).
+				 *
+				 * This means that "\%" behaves like "\\%", and "\_" behaves like "\\_".
+				 * To preserve this behavior, we need to add a second backslash here.
+				 *
+				 * From https://dev.mysql.com/doc/refman/8.4/en/string-literals.html:
+				 *   > The \% and \_ sequences are used to search for literal instances
+				 *   > of % and _ in pattern-matching contexts where they would otherwise
+				 *   > be interpreted as wildcard characters. If you use \% or \_ outside
+				 *   > of pattern-matching contexts, they evaluate to the strings \% and
+				 *   > \_, not to % and _.
+				 */
+				( $backslash . '%' ) => $backslash . $backslash . '%',
+				( $backslash . '_' ) => $backslash . $backslash . '_',
+
+				/*
+				 * Preserve a double backslash as-is, so that the trailing backslash
+				 * is not consumed as the beginning of an escape sequence like "\n".
+				 *
+				 * Resolving "\\" to "\" will be handled in the next step, where all
+				 * other backslash-prefixed characters resolve to their literal values.
+				 */
+				( $backslash . $backslash )
+					=> $backslash . $backslash,
+
+				/*
+				 * The bounding quotes can also be escaped by being doubled.
+				 */
+				( $quote . $quote )  => $quote,
+			);
+
+			/*
+			 * Apply the replacements.
+			 *
+			 * It is important to use "strtr()" and not "str_replace()", because
+			 * "str_replace()" applies replacements one after another, modifying
+			 * intermediate changes rather than just the original string:
+			 *
+			 *   - str_replace( [ 'a', 'b' ], [ 'b', 'c' ], 'ab' ); // 'cc' (bad)
+			 *   - strtr( 'ab', [ 'a' => 'b', 'b' => 'c' ] );       // 'bc' (good)
+			 */
+			$value = strtr( $value, $replacements );
+
+			/*
+			 * A backslash with any other character represents the character itself.
+			 * That is, \x evaluates to x, \\ evaluates to \, and \🙂 evaluates to 🙂.
+			 */
+			$preg_quoted_backslash = preg_quote( $backslash );
+			$value                 = preg_replace( "/$preg_quoted_backslash(.)/u", '$1', $value );
+		}
+		return $value;
+	}
+
 	/**
 	 * Get the token representation as a string.
 	 *

diff --git a/wp-includes/parser/class-wp-parser-token.php b/wp-includes/parser/class-wp-parser-token.php
@@ -58,11 +58,20 @@ public function __construct(
 	}
 
 	/**
-	 * Get the token value as raw bytes from the input.
+	 * Get the raw bytes of the token from the input.
+	 *
+	 * @return string The token bytes.
+	 */
+	public function get_bytes(): string {
+		return substr( $this->input, $this->start, $this->length );
+	}
+
+	/**
+	 * Get the real unquoted value of the token.
 	 *
 	 * @return string The token value.
 	 */
 	public function get_value(): string {
-		return substr( $this->input, $this->start, $this->length );
+		return $this->get_bytes();
 	}
 }