Skip to content

Commit 7a9dc5c

Browse files
milsemanMichael Ilseman
authored andcommitted
Move to UTF8Span from BufferView
1 parent 983c099 commit 7a9dc5c

File tree

3 files changed

+129
-71
lines changed

3 files changed

+129
-71
lines changed

Sources/FoundationEssentials/Formatting/Date+HTTPFormatStyle.swift

Lines changed: 24 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -60,32 +60,23 @@ extension Date {
6060
}
6161

6262
fileprivate func parse(_ value: String, in range: Range<String.Index>) -> (String.Index, Date)? {
63-
var v = value[range]
63+
let v = value[range]
6464
guard !v.isEmpty else {
6565
return nil
6666
}
67-
68-
let result = v.withUTF8 { buffer -> (Int, Date)? in
69-
let view = BufferView(unsafeBufferPointer: buffer)!
7067

71-
guard let comps = try? componentsStyle.components(in: view) else {
72-
return nil
73-
}
74-
75-
// HTTP dates are always GMT
76-
guard let date = Calendar(identifier: .gregorian).date(from: comps.components) else {
77-
return nil
78-
}
79-
80-
return (comps.consumed, date)
68+
guard #available(FoundationSpan 6.2, *) else {
69+
fatalError("Span unavailable")
8170
}
82-
83-
guard let result else {
71+
72+
guard let comps = try? componentsStyle.components(in: v.utf8Span),
73+
let date = Calendar(identifier: .gregorian).date(from: comps.components)
74+
else {
8475
return nil
8576
}
86-
87-
let endIndex = value.utf8.index(v.startIndex, offsetBy: result.0)
88-
return (endIndex, result.1)
77+
78+
let endIndex = value.utf8.index(v.startIndex, offsetBy: comps.consumed)
79+
return (endIndex, date)
8980
}
9081
}
9182
}
@@ -316,30 +307,27 @@ extension DateComponents {
316307
}
317308

318309
private func parse(_ value: String, in range: Range<String.Index>) -> (String.Index, DateComponents)? {
319-
var v = value[range]
310+
let v = value[range]
320311
guard !v.isEmpty else {
321312
return nil
322313
}
323-
324-
let result = v.withUTF8 { buffer -> (Int, DateComponents)? in
325-
let view = BufferView(unsafeBufferPointer: buffer)!
326314

327-
guard let comps = try? components(in: view) else {
328-
return nil
329-
}
330-
331-
return (comps.consumed, comps.components)
315+
guard #available(FoundationSpan 6.2, *) else {
316+
fatalError("Span unavailable")
332317
}
333-
334-
guard let result else {
318+
319+
guard let comps = try? components(in: v.utf8Span) else {
335320
return nil
336321
}
337-
338-
let endIndex = value.utf8.index(v.startIndex, offsetBy: result.0)
339-
return (endIndex, result.1)
322+
323+
let endIndex = value.utf8.index(v.startIndex, offsetBy: comps.consumed)
324+
return (endIndex, comps.components)
340325
}
341326

342-
fileprivate func components(in view: borrowing BufferView<UInt8>) throws -> ComponentsParseResult {
327+
@available(FoundationSpan 6.2, *)
328+
fileprivate func components(
329+
in view: UTF8Span
330+
) throws -> ComponentsParseResult {
343331
// https://www.rfc-editor.org/rfc/rfc9110.html#http.date
344332
// <day-name>, <day> <month> <year> <hour>:<minute>:<second> GMT
345333

@@ -348,7 +336,7 @@ extension DateComponents {
348336
parseError(view, exampleFormattedString: Date.HTTPFormatStyle().format(Date.now), extendedDescription: extendedDescription)
349337
}
350338

351-
var it = view.makeIterator()
339+
var it = view.makeCursor()
352340
var dc = DateComponents()
353341

354342
// Despite the spec, we allow the weekday name to be optional.
@@ -500,7 +488,7 @@ extension DateComponents {
500488
dc.calendar = Calendar(identifier: .gregorian)
501489

502490
// Would be nice to see this functionality on BufferView, but for now we calculate it ourselves.
503-
let utf8CharactersRead = it.curPointer - view.startIndex._rawValue
491+
let utf8CharactersRead = it.currentOffset
504492

505493
return ComponentsParseResult(consumed: utf8CharactersRead, components: dc)
506494
}

Sources/FoundationEssentials/Formatting/DateComponents+ISO8601FormatStyle.swift

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -407,15 +407,16 @@ extension DateComponents.ISO8601FormatStyle {
407407
var components: DateComponents
408408
}
409409

410-
private func components(fillMissingUnits: Bool, defaultTimeZone: TimeZone, in view: borrowing BufferView<UInt8>) throws -> ComponentsParseResult {
410+
@available(FoundationSpan 6.2, *)
411+
private func components(fillMissingUnits: Bool, defaultTimeZone: TimeZone, in view: UTF8Span) throws -> ComponentsParseResult {
411412
let fields = formatFields
412413

413414
// Produce an error message to throw
414415
func error(_ extendedDescription: String? = nil) -> CocoaError {
415416
parseError(view, exampleFormattedString: Date.ISO8601FormatStyle(self).format(Date.now), extendedDescription: extendedDescription)
416417
}
417418

418-
var it = view.makeIterator()
419+
var it = view.makeCursor()
419420
var needsSeparator = false
420421

421422
// Keep these fields local and set them in the DateComponents once for improved performance
@@ -618,7 +619,7 @@ extension DateComponents.ISO8601FormatStyle {
618619
if let next = it.peek(), (next == UInt8(ascii: "+") || next == UInt8(ascii: "-")) {
619620
if next == UInt8(ascii: "+") { positive = true }
620621
else { positive = false }
621-
it.advance()
622+
it.uncheckedAdvance()
622623
} else {
623624
positive = true
624625
tzOffset = 0
@@ -631,7 +632,7 @@ extension DateComponents.ISO8601FormatStyle {
631632
if let next = it.peek(), (next == UInt8(ascii: "+") || next == UInt8(ascii: "-")) {
632633
if next == UInt8(ascii: "+") { positive = true }
633634
else { positive = false }
634-
it.advance()
635+
it.uncheckedAdvance()
635636
} else {
636637
positive = true
637638
tzOffset = 0
@@ -660,8 +661,8 @@ extension DateComponents.ISO8601FormatStyle {
660661
if let next = it.peek() {
661662
if next == UInt8(ascii: ":") {
662663
// Throw it away
663-
it.advance()
664-
664+
it.uncheckedAdvance()
665+
665666
// But we should have minutes after this
666667
expectMinutes = true
667668
} else if isASCIIDigit(next) {
@@ -732,7 +733,7 @@ extension DateComponents.ISO8601FormatStyle {
732733
rawDayOfYear: dayOfYear)
733734

734735
// Would be nice to see this functionality on BufferView, but for now we calculate it ourselves.
735-
let utf8CharactersRead = it.curPointer - view.startIndex._rawValue
736+
let utf8CharactersRead = it.currentOffset
736737
return ComponentsParseResult(consumed: utf8CharactersRead, components: dc)
737738
}
738739
}
@@ -772,27 +773,21 @@ extension DateComponents.ISO8601FormatStyle : ParseStrategy {
772773
}
773774

774775
internal func parse(_ value: String, fillMissingUnits: Bool, in range: Range<String.Index>) -> (String.Index, DateComponents)? {
775-
var v = value[range]
776+
let v = value[range]
776777
guard !v.isEmpty else {
777778
return nil
778779
}
779-
780-
let result = v.withUTF8 { buffer -> (Int, DateComponents)? in
781-
let view = BufferView(unsafeBufferPointer: buffer)!
782780

783-
guard let comps = try? components(fillMissingUnits: fillMissingUnits, defaultTimeZone: timeZone, in: view) else {
784-
return nil
785-
}
786-
787-
return (comps.consumed, comps.components)
781+
guard #available(FoundationSpan 6.2, *) else {
782+
fatalError("Span unavailable")
788783
}
789-
790-
guard let result else {
784+
785+
guard let comps = try? components(fillMissingUnits: fillMissingUnits, defaultTimeZone: timeZone, in: v.utf8Span) else {
791786
return nil
792787
}
793-
794-
let endIndex = value.utf8.index(v.startIndex, offsetBy: result.0)
795-
return (endIndex, result.1)
788+
789+
let endIndex = value.utf8.index(v.startIndex, offsetBy: comps.consumed)
790+
return (endIndex, comps.components)
796791
}
797792
}
798793

Sources/FoundationEssentials/Formatting/FormatParsingUtilities.swift

Lines changed: 89 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,15 @@ func parseError(
1818
parseError(String(decoding: value, as: UTF8.self), exampleFormattedString: exampleFormattedString, extendedDescription: extendedDescription)
1919
}
2020

21+
@available(FoundationSpan 6.2, *)
22+
func parseError(
23+
_ value: UTF8Span, exampleFormattedString: String?, extendedDescription: String? = nil
24+
) -> CocoaError {
25+
// TODO: change to UTF8Span, and prototype string append and interpolation taking UTF8Span
26+
parseError(String(copying: value), exampleFormattedString: exampleFormattedString, extendedDescription: extendedDescription)
27+
}
28+
29+
2130
package func parseError(_ value: String, exampleFormattedString: String?, extendedDescription: String? = nil) -> CocoaError {
2231
let errorStr: String
2332
if let exampleFormattedString = exampleFormattedString {
@@ -32,19 +41,66 @@ func isASCIIDigit(_ x: UInt8) -> Bool {
3241
x >= UInt8(ascii: "0") && x <= UInt8(ascii: "9")
3342
}
3443

35-
/**
3644

37-
Fundamental operations:
38-
- Peek: return the next portion of input, if it exists and matches the given criteria
39-
- Match: like peek, but also consumes the portion of input
40-
- Parse: like match, but produces a value by interpreting the portion of input
45+
@available(FoundationSpan 6.2, *)
46+
extension UTF8Span {
47+
// This is just an iterator style type, though for UTF8 we can
48+
// load scalars and Characters, presumably.
49+
//
50+
// NOTE: I'm calling this "Cursor" temporarily as "Iterator" might
51+
// end up being taken for other reasons.
52+
struct Cursor: ~Escapable {
53+
var span: UTF8Span
54+
var currentOffset: Int
55+
56+
@lifetime(copy span)
57+
init(_ span: UTF8Span) {
58+
self.span = span
59+
self.currentOffset = 0
60+
}
61+
}
62+
63+
@lifetime(copy self) // copy or borrow?
64+
func makeCursor() -> Cursor {
65+
.init(self)
66+
}
67+
}
68+
69+
@available(FoundationSpan 6.2, *)
70+
extension UTF8Span.Cursor {
71+
@lifetime(self: copy self)
72+
mutating func uncheckedAdvance() {
73+
assert(self.currentOffset < span.count)
74+
self.currentOffset += 1
75+
}
76+
77+
func peek() -> UInt8? {
78+
guard !isEmpty else { return nil }
79+
return span.span[unchecked: self.currentOffset]
80+
}
4181

42-
Notes on return types:
43-
`peek(_:(UInt8) -> Bool) -> UInt8?` is more descriptive than returning a `Bool`, but slighlty less ergonomic if you only care about the `Bool`. If we don't return the `UInt8`, some callers may need to store it from the function somehow or else double-load it.
44-
Match functions have different return types, depending on whether they always succeed, whether they match a variable length, etc. Since they also advance as part of matching, the return lengths are dicardable. They can also be retroactively calculated by the caller, we just return it because we can.
45-
Finally, the parse functions just return the value, as there's no way to have a discardable return value alongside a non-discardable one. Again, lengths can be retroactively calculated by the caller based on the iterator's new offset.
46-
*/
47-
extension BufferViewIterator<UInt8> {
82+
@lifetime(self: copy self)
83+
mutating func next() -> UInt8? {
84+
guard !isEmpty else { return nil }
85+
defer { uncheckedAdvance() }
86+
return peek()
87+
}
88+
89+
var isEmpty: Bool { self.currentOffset >= span.count }
90+
91+
@lifetime(self: copy self)
92+
mutating func consume(_ byte: UInt8) -> Bool {
93+
guard peek() == byte else {
94+
return false
95+
}
96+
uncheckedAdvance()
97+
return true
98+
}
99+
100+
}
101+
102+
@available(FoundationSpan 6.2, *)
103+
extension UTF8Span.Cursor {
48104
// Returns the next byte if there is one and it
49105
// matches the predicate, otherwise false
50106
func peek(_ f: (UInt8) -> Bool) -> UInt8? {
@@ -54,22 +110,38 @@ extension BufferViewIterator<UInt8> {
54110
return b
55111
}
56112

113+
@lifetime(self: copy self)
57114
mutating func matchByte(_ expected: UInt8) -> Bool {
58115
if peek() == expected {
59-
_uncheckedAdvance()
116+
uncheckedAdvance()
60117
return true
61118
}
62119
return false
63120
}
64121

122+
@lifetime(self: copy self)
65123
mutating func matchPredicate(_ f: (UInt8) -> Bool) -> UInt8? {
66124
guard let b = peek(f) else {
67125
return nil
68126
}
69-
_uncheckedAdvance()
127+
uncheckedAdvance()
70128
return b
71129
}
72130

131+
/**
132+
NOTE: We want a `match(anyOf:)` operation that takes an Array or Set
133+
literal (or String literal, clearly delineated to mean ASCII), but is guaranteed not to actually materialize a runtime managed object.
134+
135+
For example, that would handle this pattern from ISO8601:
136+
```
137+
if let next = it.peek(), (next == UInt8(ascii: "+") || next == UInt8(ascii: "-")) {
138+
if next == UInt8(ascii: "+") { positive = true }
139+
else { positive = false }
140+
it.uncheckedAdvance()
141+
```
142+
*/
143+
144+
@lifetime(self: copy self)
73145
@discardableResult
74146
mutating func matchZeroOrMore(_ expected: UInt8) -> Int {
75147
var count = 0
@@ -79,6 +151,7 @@ extension BufferViewIterator<UInt8> {
79151
return count
80152
}
81153

154+
@lifetime(self: copy self)
82155
@discardableResult
83156
mutating func matchOneOrMore(_ expected: UInt8) -> Int? {
84157
let c = matchZeroOrMore(expected)
@@ -87,16 +160,18 @@ extension BufferViewIterator<UInt8> {
87160

88161
// TODO: I think it would be cleaner to separate out
89162
// nanosecond handling here...
163+
@lifetime(self: copy self)
90164
mutating func parseNumber(minDigits: Int? = nil, maxDigits: Int? = nil, nanoseconds: Bool = false) -> Int? {
91165
// Consume all leading zeros, parse until we no longer see a digit
92166
var result = 0
93167
var count = 0
94168
// Cap at 10 digits max to avoid overflow
95169
let max = min(maxDigits ?? 10, 10)
96-
while let next = matchPredicate(isASCIIDigit) {
170+
while let next = peek(), isASCIIDigit(next) {
97171
let digit = Int(next - UInt8(ascii: "0"))
98172
result *= 10
99173
result += digit
174+
uncheckedAdvance()
100175
count += 1
101176
if count >= max { break }
102177
}

0 commit comments

Comments
 (0)