Skip to content

Commit d7be8da

Browse files
authored
Speed up parser token scanning (#279)
Skip the unrolled space/tab scan unless the next byte is whitespace, cache value/bare-key tables during tokenization, and advance array string elements without redundant kind checks.
1 parent ffef514 commit d7be8da

File tree

1 file changed

+41
-31
lines changed

1 file changed

+41
-31
lines changed

Sources/TOMLDecoder/ParserImplementation.swift

Lines changed: 41 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ extension Parser {
121121
}
122122

123123
func scanString(range: Range<Int>, lineNumber: Int) throws(TOMLError) {
124+
let isBareKeyChar = CodeUnits.isBareKeyChar
125+
let isValueChar = CodeUnits.isValueChar
124126
let start = range.lowerBound
125127
let head = bytes[start]
126128
if (head >= CodeUnits.lowerA && head <= CodeUnits.lowerZ) ||
@@ -130,7 +132,7 @@ extension Parser {
130132
var index = start + 1
131133
while index < range.upperBound {
132134
let ch = bytes[index]
133-
if CodeUnits.isBareKeyChar[Int(ch)] {
135+
if isBareKeyChar[Int(ch)] {
134136
index += 1
135137
continue
136138
}
@@ -396,7 +398,7 @@ extension Parser {
396398
if ch == CodeUnits.lf || ch == CodeUnits.dot {
397399
break
398400
}
399-
if CodeUnits.isBareKeyChar[Int(ch)] {
401+
if isBareKeyChar[Int(ch)] {
400402
index += 1
401403
continue
402404
}
@@ -415,7 +417,7 @@ extension Parser {
415417
if ch == CodeUnits.lf {
416418
break
417419
}
418-
if CodeUnits.isValueChar[Int(ch)] {
420+
if isValueChar[Int(ch)] {
419421
index += 1
420422
continue
421423
}
@@ -522,33 +524,43 @@ extension Parser {
522524
let count = bytes.count
523525

524526
while cursor < count {
525-
// 8x unrolling for space/tab skipping
526-
while cursor + 8 <= count {
527-
let c0 = bytes[cursor]
528-
let c1 = bytes[cursor + 1]
529-
let c2 = bytes[cursor + 2]
530-
let c3 = bytes[cursor + 3]
531-
let c4 = bytes[cursor + 4]
532-
let c5 = bytes[cursor + 5]
533-
let c6 = bytes[cursor + 6]
534-
let c7 = bytes[cursor + 7]
535-
536-
if c0 == CodeUnits.space || c0 == CodeUnits.tab,
537-
c1 == CodeUnits.space || c1 == CodeUnits.tab,
538-
c2 == CodeUnits.space || c2 == CodeUnits.tab,
539-
c3 == CodeUnits.space || c3 == CodeUnits.tab,
540-
c4 == CodeUnits.space || c4 == CodeUnits.tab,
541-
c5 == CodeUnits.space || c5 == CodeUnits.tab,
542-
c6 == CodeUnits.space || c6 == CodeUnits.tab,
543-
c7 == CodeUnits.space || c7 == CodeUnits.tab
544-
{
545-
cursor += 8
546-
} else {
547-
break
527+
let ch = bytes[cursor]
528+
if ch == CodeUnits.space || ch == CodeUnits.tab {
529+
// 8x unrolling for space/tab skipping
530+
while cursor + 8 <= count {
531+
let c0 = bytes[cursor]
532+
let c1 = bytes[cursor + 1]
533+
let c2 = bytes[cursor + 2]
534+
let c3 = bytes[cursor + 3]
535+
let c4 = bytes[cursor + 4]
536+
let c5 = bytes[cursor + 5]
537+
let c6 = bytes[cursor + 6]
538+
let c7 = bytes[cursor + 7]
539+
540+
if c0 == CodeUnits.space || c0 == CodeUnits.tab,
541+
c1 == CodeUnits.space || c1 == CodeUnits.tab,
542+
c2 == CodeUnits.space || c2 == CodeUnits.tab,
543+
c3 == CodeUnits.space || c3 == CodeUnits.tab,
544+
c4 == CodeUnits.space || c4 == CodeUnits.tab,
545+
c5 == CodeUnits.space || c5 == CodeUnits.tab,
546+
c6 == CodeUnits.space || c6 == CodeUnits.tab,
547+
c7 == CodeUnits.space || c7 == CodeUnits.tab
548+
{
549+
cursor += 8
550+
} else {
551+
break
552+
}
548553
}
549-
}
550554

551-
let ch = bytes[cursor]
555+
while cursor < count {
556+
let ws = bytes[cursor]
557+
if ws != CodeUnits.space, ws != CodeUnits.tab {
558+
break
559+
}
560+
cursor += 1
561+
}
562+
continue
563+
}
552564
switch ch {
553565
case CodeUnits.lf:
554566
currentLineNumber += 1
@@ -562,8 +574,6 @@ extension Parser {
562574
try nextToken(bytes: bytes, isDotSpecial: isDotSpecial)
563575
return
564576
}
565-
case CodeUnits.space, CodeUnits.tab:
566-
cursor += 1
567577
case CodeUnits.pound:
568578
// Comment
569579
cursor += 1
@@ -765,7 +775,7 @@ extension Parser {
765775

766776
arrays[arrayIndex].elements.append(.leaf(token))
767777

768-
try eatToken(bytes: bytes, kind: .string, isDotSpecial: true)
778+
try nextToken(bytes: bytes, isDotSpecial: true)
769779

770780
case .lbracket: // Nested array
771781
if arrays[arrayIndex].kind == nil {

0 commit comments

Comments
 (0)