Skip to content

Commit 183a4fb

Browse files
wkeesescriptcoded
authored andcommitted
fix: improve operator detection
This fixes the "special" segments to include some missing operators from https://www.w3schools.com/sql/sql_operators.asp and other sites. I took the conservative approach of listing all the operators, as opposed to the general regexp /(?<special>[^\w\s"'`]+)/, because the conservative approach works better in certain cases such as "x>-5". The downside is that you need to list all the operators, so it's a bit fragile, especially since the exact operators vary by SQL version (MySQL, TransactSQL, Postgres, etc.). I kept all the symbols previously specified as "special" even though some of them aren't operators, specifically: , ; : . Fixes #150
1 parent 02d459a commit 183a4fb

File tree

2 files changed

+222
-2
lines changed

2 files changed

+222
-2
lines changed

lib/index.js

+6-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,12 @@ const highlighters = [
3333

3434
/(?<bracket>[()])/,
3535

36-
/(?<special>!=|[=%*/\-+,;:<>.])/,
36+
// Arithmetic, bitwise, comparison, and compound operators as listed in
37+
// https://www.w3schools.com/sql/sql_operators.asp, https://www.tutorialspoint.com/sql/sql-operators.htm,
38+
// https://data-flair.training/blogs/sql-operators/.
39+
// Plus a few other symbols used in SQL statements: ,;:.
40+
// Operators are arranged so that multi-character operators (ex: ">=") are parsed as one operator rather than two.
41+
/(?<special>\^-=|\|\*=|\+=|-=|\*=|\/=|%=|&=|>=|<=|<>|!=|!<|!>|>>|<<|[+\-*/%&|^=><]|[,;:.])/,
3742

3843
/(?<identifier>\b\w+\b|`(?:[^`\\]|\\.)*`)/,
3944

test/index.test.js

+216-1
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,9 @@ describe('html', () => {
258258
})
259259

260260
describe('getSegments', () => {
261-
it('numbers and operators', () => {
261+
// Test that we can parse all forms of numbers.
262+
// See https://dev.mysql.com/doc/refman/8.0/en/number-literals.html for the syntax (at least for MYSQL).
263+
it('numbers', () => {
262264
expect(getSegments('34 - -.5 + +0.5 * 1.23E45 / 4E-3'))
263265
.toStrictEqual([
264266
{ name: 'number', content: '34' },
@@ -281,6 +283,219 @@ describe('getSegments', () => {
281283
])
282284
})
283285

286+
// Test that we can parse the non-logical operators, i.e. +, -, <>, etc. but not AND, OR, etc.
287+
//
288+
// All the non-logical operators are parsed into "special" segments, although the converse isn't true,
289+
// because ",", ";", ":", and "." are also parsed as "special" segments.
290+
// The logical operators like AND and BETWEEN are parsed as "keyword" segments.
291+
//
292+
// In particular, this describe() block tests that:
293+
//
294+
// * All non-logical operators listed at https://www.w3schools.com/sql/sql_operators.asp etc. are recognized.
295+
// * Multi-character operators like >= are parsed as a single segment, even though > and = are both operators too.
296+
// * Minus and dot are treated as part of a number when they are next to a digit, ex: "x > -5" or even "x>-5".
297+
// * Minus is treated as a binary operator when there are spaces around it, ex: "x - 5".
298+
//
299+
// Conversely, it avoids testing strings like "x-5" because our regex-lexer architecture isn't
300+
// sophisticated enough to realize the minus must be a binary operator.
301+
describe('non-logical operators', () => {
302+
it('arithmetic', () => {
303+
expect(getSegments('a + 1 - -.2 * 34 /.56 % 7'))
304+
.toStrictEqual([
305+
{ name: 'identifier', content: 'a' },
306+
{ name: 'whitespace', content: ' ' },
307+
{ name: 'special', content: '+' },
308+
{ name: 'whitespace', content: ' ' },
309+
{ name: 'number', content: '1' },
310+
{ name: 'whitespace', content: ' ' },
311+
{ name: 'special', content: '-' },
312+
{ name: 'whitespace', content: ' ' },
313+
{ name: 'number', content: '-.2' },
314+
{ name: 'whitespace', content: ' ' },
315+
{ name: 'special', content: '*' },
316+
{ name: 'whitespace', content: ' ' },
317+
{ name: 'number', content: '34' },
318+
{ name: 'whitespace', content: ' ' },
319+
{ name: 'special', content: '/' },
320+
{ name: 'number', content: '.56' },
321+
{ name: 'whitespace', content: ' ' },
322+
{ name: 'special', content: '%' },
323+
{ name: 'whitespace', content: ' ' },
324+
{ name: 'number', content: '7' }
325+
])
326+
})
327+
328+
it('bitwise', () => {
329+
expect(getSegments('a & 8 | 9 ^b>>c<<d'))
330+
.toStrictEqual([
331+
{ name: 'identifier', content: 'a' },
332+
{ name: 'whitespace', content: ' ' },
333+
{ name: 'special', content: '&' },
334+
{ name: 'whitespace', content: ' ' },
335+
{ name: 'number', content: '8' },
336+
{ name: 'whitespace', content: ' ' },
337+
{ name: 'special', content: '|' },
338+
{ name: 'whitespace', content: ' ' },
339+
{ name: 'number', content: '9' },
340+
{ name: 'whitespace', content: ' ' },
341+
{ name: 'special', content: '^' },
342+
{ name: 'identifier', content: 'b' },
343+
{ name: 'special', content: '>>' },
344+
{ name: 'identifier', content: 'c' },
345+
{ name: 'special', content: '<<' },
346+
{ name: 'identifier', content: 'd' }
347+
])
348+
})
349+
350+
it('single character comparison', () => {
351+
expect(getSegments('a = b'))
352+
.toStrictEqual([
353+
{ name: 'identifier', content: 'a' },
354+
{ name: 'whitespace', content: ' ' },
355+
{ name: 'special', content: '=' },
356+
{ name: 'whitespace', content: ' ' },
357+
{ name: 'identifier', content: 'b' }
358+
])
359+
expect(getSegments('a > b'))
360+
.toStrictEqual([
361+
{ name: 'identifier', content: 'a' },
362+
{ name: 'whitespace', content: ' ' },
363+
{ name: 'special', content: '>' },
364+
{ name: 'whitespace', content: ' ' },
365+
{ name: 'identifier', content: 'b' }
366+
])
367+
expect(getSegments('a<b'))
368+
.toStrictEqual([
369+
{ name: 'identifier', content: 'a' },
370+
{ name: 'special', content: '<' },
371+
{ name: 'identifier', content: 'b' }
372+
])
373+
})
374+
375+
it('multi character comparison', () => {
376+
expect(getSegments('a>=-5'))
377+
.toStrictEqual([
378+
{ name: 'identifier', content: 'a' },
379+
{ name: 'special', content: '>=' },
380+
{ name: 'number', content: '-5' }
381+
])
382+
expect(getSegments('a <= b'))
383+
.toStrictEqual([
384+
{ name: 'identifier', content: 'a' },
385+
{ name: 'whitespace', content: ' ' },
386+
{ name: 'special', content: '<=' },
387+
{ name: 'whitespace', content: ' ' },
388+
{ name: 'identifier', content: 'b' }
389+
])
390+
expect(getSegments('a!=.5'))
391+
.toStrictEqual([
392+
{ name: 'identifier', content: 'a' },
393+
{ name: 'special', content: '!=' },
394+
{ name: 'number', content: '.5' }
395+
])
396+
expect(getSegments('a!<b'))
397+
.toStrictEqual([
398+
{ name: 'identifier', content: 'a' },
399+
{ name: 'special', content: '!<' },
400+
{ name: 'identifier', content: 'b' }
401+
])
402+
expect(getSegments('a!>b'))
403+
.toStrictEqual([
404+
{ name: 'identifier', content: 'a' },
405+
{ name: 'special', content: '!>' },
406+
{ name: 'identifier', content: 'b' }
407+
])
408+
})
409+
410+
it('compound operators', () => {
411+
expect(getSegments('UPDATE STUDENTS SET MARKS+=10,A-=5,B*=6,C/=7,D%=8,E&=F,G^-=H,I|*=J WHERE MARKS<85;'))
412+
.toStrictEqual([
413+
{ name: 'keyword', content: 'UPDATE' },
414+
{ name: 'whitespace', content: ' ' },
415+
{ name: 'identifier', content: 'STUDENTS' },
416+
{ name: 'whitespace', content: ' ' },
417+
{ name: 'keyword', content: 'SET' },
418+
{ name: 'whitespace', content: ' ' },
419+
{ name: 'identifier', content: 'MARKS' },
420+
{ name: 'special', content: '+=' },
421+
{ name: 'number', content: '10' },
422+
{ name: 'special', content: ',' },
423+
{ name: 'identifier', content: 'A' },
424+
{ name: 'special', content: '-=' },
425+
{ name: 'number', content: '5' },
426+
{ name: 'special', content: ',' },
427+
{ name: 'identifier', content: 'B' },
428+
{ name: 'special', content: '*=' },
429+
{ name: 'number', content: '6' },
430+
{ name: 'special', content: ',' },
431+
{ name: 'identifier', content: 'C' },
432+
{ name: 'special', content: '/=' },
433+
{ name: 'number', content: '7' },
434+
{ name: 'special', content: ',' },
435+
{ name: 'identifier', content: 'D' },
436+
{ name: 'special', content: '%=' },
437+
{ name: 'number', content: '8' },
438+
{ name: 'special', content: ',' },
439+
{ name: 'identifier', content: 'E' },
440+
{ name: 'special', content: '&=' },
441+
{ name: 'identifier', content: 'F' },
442+
{ name: 'special', content: ',' },
443+
{ name: 'identifier', content: 'G' },
444+
{ name: 'special', content: '^-=' },
445+
{ name: 'identifier', content: 'H' },
446+
{ name: 'special', content: ',' },
447+
{ name: 'identifier', content: 'I' },
448+
{ name: 'special', content: '|*=' },
449+
{ name: 'identifier', content: 'J' },
450+
{ name: 'whitespace', content: ' ' },
451+
{ name: 'keyword', content: 'WHERE' },
452+
{ name: 'whitespace', content: ' ' },
453+
{ name: 'identifier', content: 'MARKS' },
454+
{ name: 'special', content: '<' },
455+
{ name: 'number', content: '85' },
456+
{ name: 'special', content: ';' }
457+
])
458+
})
459+
})
460+
461+
it('other special characters', () => {
462+
expect(getSegments('select foo.a, foo.b from foo;'))
463+
.toStrictEqual([
464+
{ name: 'keyword', content: 'select' },
465+
{ name: 'whitespace', content: ' ' },
466+
{ name: 'identifier', content: 'foo' },
467+
{ name: 'special', content: '.' },
468+
{ name: 'identifier', content: 'a' },
469+
{ name: 'special', content: ',' },
470+
{ name: 'whitespace', content: ' ' },
471+
{ name: 'identifier', content: 'foo' },
472+
{ name: 'special', content: '.' },
473+
{ name: 'identifier', content: 'b' },
474+
{ name: 'whitespace', content: ' ' },
475+
{ name: 'keyword', content: 'from' },
476+
{ name: 'whitespace', content: ' ' },
477+
{ name: 'identifier', content: 'foo' },
478+
{ name: 'special', content: ';' }
479+
])
480+
expect(getSegments('INSERT INTO MyTable (ID) VALUES (:myId)'))
481+
.toStrictEqual([
482+
{ name: 'keyword', content: 'INSERT INTO' },
483+
{ name: 'whitespace', content: ' ' },
484+
{ name: 'function', content: 'MyTable' },
485+
{ name: 'whitespace', content: ' ' },
486+
{ name: 'bracket', content: '(' },
487+
{ name: 'identifier', content: 'ID' },
488+
{ name: 'bracket', content: ')' },
489+
{ name: 'whitespace', content: ' ' },
490+
{ name: 'keyword', content: 'VALUES' },
491+
{ name: 'whitespace', content: ' ' },
492+
{ name: 'bracket', content: '(' },
493+
{ name: 'special', content: ':' },
494+
{ name: 'identifier', content: 'myId' },
495+
{ name: 'bracket', content: ')' }
496+
])
497+
})
498+
284499
it('complex query', () => {
285500
expect(getSegments("SELECT COUNT(id), `id`, `username` FROM `users` WHERE `email` = '[email protected]' AND `foo` = 'BAR' OR 1=1"))
286501
.toStrictEqual([

0 commit comments

Comments
 (0)