@@ -2107,8 +2107,9 @@ impl<'a> Tokenizer<'a> {
21072107 ) -> Result < Option < Token > , TokenizerError > {
21082108 let mut s = String :: new ( ) ;
21092109 let mut nested = 1 ;
2110+ let mut c_style_comments = false ;
21102111 let supports_nested_comments = self . dialect . supports_nested_comments ( ) ;
2111-
2112+ let supports_c_style_comments = self . dialect . supports_c_style_comments ( ) ;
21122113 loop {
21132114 match chars. next ( ) {
21142115 Some ( '/' ) if matches ! ( chars. peek( ) , Some ( '*' ) ) && supports_nested_comments => {
@@ -2117,10 +2118,37 @@ impl<'a> Tokenizer<'a> {
21172118 s. push ( '*' ) ;
21182119 nested += 1 ;
21192120 }
2121+ Some ( '!' ) if supports_c_style_comments => {
2122+ c_style_comments = true ;
2123+ while let Some ( '0' ) | Some ( '1' ) | Some ( '2' ) | Some ( '3' ) | Some ( '4' )
2124+ | Some ( '5' ) | Some ( '6' ) | Some ( '7' ) | Some ( '8' ) | Some ( '9' ) = chars. peek ( )
2125+ {
2126+ chars. next ( ) ; // consume the digit
2127+ }
2128+ }
2129+ // consume all leading whitespaces until the '*/' character if in a C-style comment
2130+ Some ( ch) if ch. is_whitespace ( ) && c_style_comments => {
2131+ let mut ws_count = 0 ;
2132+ while let Some ( & c) = chars. peek ( ) {
2133+ if c. is_whitespace ( ) {
2134+ ws_count += 1 ;
2135+ } else if c == '*' && chars. peek ( ) == Some ( & '/' ) {
2136+ for _ in 0 ..ws_count {
2137+ chars. next ( ) ;
2138+ }
2139+ break ;
2140+ } else {
2141+ break ;
2142+ }
2143+ }
2144+ }
21202145 Some ( '*' ) if matches ! ( chars. peek( ) , Some ( '/' ) ) => {
21212146 chars. next ( ) ; // consume the '/'
21222147 nested -= 1 ;
21232148 if nested == 0 {
2149+ if c_style_comments {
2150+ break Ok ( Some ( Token :: make_word ( & s, None ) ) ) ;
2151+ }
21242152 break Ok ( Some ( Token :: Whitespace ( Whitespace :: MultiLineComment ( s) ) ) ) ;
21252153 }
21262154 s. push ( '*' ) ;
@@ -4070,4 +4098,39 @@ mod tests {
40704098 panic ! ( "Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}" ) ;
40714099 }
40724100 }
4101+ #[ test]
4102+ fn tokenize_multiline_comment_with_c_style_comment ( ) {
4103+ let sql = String :: from ( "0/*! word */1" ) ;
4104+
4105+ let dialect = MySqlDialect { } ;
4106+ let tokens = Tokenizer :: new ( & dialect, & sql) . tokenize ( ) . unwrap ( ) ;
4107+ let expected = vec ! [
4108+ Token :: Number ( "0" . to_string( ) , false ) ,
4109+ Token :: Word ( Word {
4110+ value: "word" . to_string( ) ,
4111+ quote_style: None ,
4112+ keyword: Keyword :: NoKeyword ,
4113+ } ) ,
4114+ Token :: Number ( "1" . to_string( ) , false ) ,
4115+ ] ;
4116+ compare ( expected, tokens) ;
4117+ }
4118+
4119+ #[ test]
4120+ fn tokenize_multiline_comment_with_c_style_comment_and_version ( ) {
4121+ let sql = String :: from ( "0/*!8000000 word */1" ) ;
4122+
4123+ let dialect = MySqlDialect { } ;
4124+ let tokens = Tokenizer :: new ( & dialect, & sql) . tokenize ( ) . unwrap ( ) ;
4125+ let expected = vec ! [
4126+ Token :: Number ( "0" . to_string( ) , false ) ,
4127+ Token :: Word ( Word {
4128+ value: "word" . to_string( ) ,
4129+ quote_style: None ,
4130+ keyword: Keyword :: NoKeyword ,
4131+ } ) ,
4132+ Token :: Number ( "1" . to_string( ) , false ) ,
4133+ ] ;
4134+ compare ( expected, tokens) ;
4135+ }
40734136}
0 commit comments