Skip to content

Commit 6e9abed

Browse files
committed
Fix timezone parsing to support 3-component IANA names and hyphens
This commit fixes a parsing error in the MySQL SET statement parser that occurred when processing `SET time_zone` statements with: 1. Three-component IANA timezone names (e.g., America/Argentina/Buenos_Aires) 2. Timezone names containing hyphens (e.g., America/Port-au-Prince) Previously, the regex pattern `(?:\w+/\w+)` only matched 2-component timezone names and did not support hyphens. This caused parsing errors logged as: "[ERROR] Unable to parse query. If correct, report it as a bug: SET time_zone=\"America/Argentina/Buenos_Aires\";" When multiplexing is enabled, this bug causes timestamps to be incorrectly written to the database. Changes: - Updated timezone regex from `(?:\w+/\w+)` to `(?:[\w-]+(?:/[\w-]+){1,2})` - Supports 2-3 components: Area/Location or Area/Country/Location - Supports hyphens in component names (e.g., Port-au-Prince) - Added comprehensive Doxygen documentation for timezone parsing - Extended TAP test cases with new timezone formats Note: Bare words like 'SYSTEM' and 'UTC' were already supported via other patterns in the parser (vp2 pattern for word matching). Fixes: #4993 Related: gemini-code-assist review comments
1 parent a83bba1 commit 6e9abed

2 files changed

Lines changed: 69 additions & 8 deletions

File tree

lib/MySQL_Set_Stmt_Parser.cpp

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -244,16 +244,58 @@ void MySQL_Set_Stmt_Parser::generateRE_parse1v2() {
244244
vp = "(?:| *(?:\\+|\\-) *)\\d+(?:|\\.\\d+)"; // a signed or unsigned integer or decimal , N7 = merge of N3 and N6
245245
var_patterns.push_back(vp);
246246

247+
/**
248+
* @page time_zone_parsing MySQL time_zone Variable Parsing
249+
*
250+
* The MySQL @c time_zone system variable accepts two formats:
251+
*
252+
* @section tz_numeric_format Numeric Offset Format
253+
* Specifies timezone as an offset from UTC in the format [+|-]HH:MM.
254+
* Examples: @c '+08:00' , @c '-05:30' , @c '+00:00' (UTC)
255+
*
256+
* @section tz_iana_format IANA Timezone Name Format
257+
* Specifies timezone using IANA timezone database names.
258+
* The naming convention follows the pattern @c Area/Location or
259+
* @c Area/Country/Location.
260+
*
261+
* @subsection tz_iana_components IANA Timezone Name Components
262+
* - **Area**: Continent, ocean, or special region (e.g., America, Europe, Asia, Etc)
263+
* - **Location**: City, island, or country name
264+
*
265+
* IANA timezone names may contain:
266+
* - Word characters (letters, digits, underscores): @c [a-zA-Z0-9_]
267+
* - Hyphens in place of spaces: @c Port-au-Prince , @c Rio_Gallegos
268+
*
269+
* @subsection tz_iana_examples IANA Timezone Examples
270+
* - 2 components: @c Europe/London , @c America/New_York , @c Asia/Tokyo
271+
* - 3 components: @c America/Argentina/Buenos_Aires , @c America/Indiana/Indianapolis
272+
* - With hyphens: @c America/Port-au-Prince , @c America/Blanc-Sablon
273+
*
274+
* @subsection tz_special_values Special Values
275+
* The following special values are also supported (matched by other patterns):
276+
* - @c SYSTEM : Use the system's timezone
277+
* - @c UTC : Coordinated Universal Time
278+
*
279+
* @subsection tz_limitations Limitations
280+
* The regex pattern limits matching to 2-3 components (e.g., @c Area/Location or
281+
* @c Area/Country/Location). While IANA timezone names with 4+ components are
282+
* theoretically possible, they are extremely rare and not currently supported.
283+
*
284+
* @see https://dev.mysql.com/doc/refman/8.0/en/time-zone-support.html
285+
* @see https://www.iana.org/time-zones
286+
*/
247287
{
248-
// time_zone in numeric format:
249-
// - +/- sign
250-
// 1 or 2 digits
251-
// :
252-
// 2 digits
288+
// time_zone in numeric format: [+|-]HH:MM
289+
// Examples: '+08:00', '-05:30', '+00:00'
253290
string tzd = "(?:(?:\\+|\\-)(?:|\\d)\\d:\\d\\d)";
254-
// time_zone in string format:
255-
// word / word
256-
string tzw = "(?:\\w+/\\w+)";
291+
// time_zone in string format: IANA timezone names
292+
// Supports 2-3 components with optional hyphens:
293+
// - 2 components: Area/Location (e.g., Europe/London, America/New_York)
294+
// - 3 components: Area/Country/Location (e.g., America/Argentina/Buenos_Aires)
295+
// - With hyphens: America/Port-au-Prince, America/Blanc-Sablon
296+
// Note: Does not match bare words like 'SYSTEM' or 'UTC' - these are matched
297+
// by other patterns in var_patterns (e.g., vp2 at line ~197)
298+
string tzw = "(?:[\\w-]+(?:/[\\w-]+){1,2})";
257299
vp = "(?:" + tzd + "|" + tzw + ")"; // time_zone in numeric and string format
258300
}
259301
for (auto it = quote_symbol.begin(); it != quote_symbol.end(); it++) {

test/tap/tests/setparser_test_common.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ static Test syntax_errors[] = {
136136
};
137137

138138
static Test time_zone[] = {
139+
// Original tests - 2 component timezone names
139140
{ "SET @@time_zone = 'Europe/Paris'", { Expected("time_zone", {"Europe/Paris"}) } },
140141
{ "SET @@time_zone = '+00:00'", { Expected("time_zone", {"+00:00"}) } },
141142
{ "SET @@time_zone = \"Europe/Paris\"", { Expected("time_zone", {"Europe/Paris"}) } },
@@ -144,6 +145,24 @@ static Test time_zone[] = {
144145
{ "SET @@TIME_ZONE = @OLD_TIME_ZONE", { Expected("time_zone", {"@OLD_TIME_ZONE"}) } },
145146
{ "SET @@TIME_ZONE := 'SYSTEM'", { Expected("time_zone", {"SYSTEM"}) } },
146147
{ "SET time_zone := 'SYSTEM'", { Expected("time_zone", {"SYSTEM"}) } },
148+
// Special values - UTC and SYSTEM
149+
{ "SET time_zone = 'UTC'", { Expected("time_zone", {"UTC"}) } },
150+
{ "SET time_zone = SYSTEM", { Expected("time_zone", {"SYSTEM"}) } },
151+
{ "SET time_zone = UTC", { Expected("time_zone", {"UTC"}) } },
152+
// 3 component timezone names (bug fix for GitHub issue #4993)
153+
{ "SET time_zone = 'America/Argentina/Buenos_Aires'", { Expected("time_zone", {"America/Argentina/Buenos_Aires"}) } },
154+
{ "SET time_zone = 'America/Indiana/Indianapolis'", { Expected("time_zone", {"America/Indiana/Indianapolis"}) } },
155+
{ "SET time_zone = \"America/Kentucky/Louisville\"", { Expected("time_zone", {"America/Kentucky/Louisville"}) } },
156+
// Timezone names with hyphens (additional fix)
157+
{ "SET time_zone = 'America/Port-au-Prince'", { Expected("time_zone", {"America/Port-au-Prince"}) } },
158+
{ "SET time_zone = 'America/Blanc-Sablon'", { Expected("time_zone", {"America/Blanc-Sablon"}) } },
159+
{ "SET time_zone = \"Atlantic/Canary\"", { Expected("time_zone", {"Atlantic/Canary"}) } },
160+
// Various numeric offsets
161+
{ "SET time_zone = '+08:00'", { Expected("time_zone", {"+08:00"}) } },
162+
{ "SET time_zone = '-05:30'", { Expected("time_zone", {"-05:30"}) } },
163+
{ "SET time_zone = '-10:00'", { Expected("time_zone", {"-10:00"}) } },
164+
// Combined with other SET variables
165+
{ "SET time_zone = 'America/Argentina/Buenos_Aires', sql_mode = 'TRADITIONAL'", { Expected("time_zone", {"America/Argentina/Buenos_Aires"}), Expected("sql_mode", {"TRADITIONAL"}) } },
147166
};
148167

149168
static Test session_track_gtids[] = {

0 commit comments

Comments
 (0)