Skip to content

Commit 5bebbf5

Browse files
weissinormanmaurer
authored andcommitted
don't crash if close is called when close fails (#387)
Motivation: @vlm hit an interesting situation which is very likely the sign of another bug that we have yet to find: During a close, @vlm hit an error in close which lead to a user callout which then closed again. The immediate fix is simple (this PR) is as always not to call out to the user before reconciling our own state. But hitting this bug also means that either a `socket.close` or a `selectableEventLoop.deregister` failed as we only called out in those situations. That definitely hides a condition that is hard to imagine without another bug that we still need to find. Modifications: in `BaseSocketChannel.close0` follow rule 0 and don't call out before reconciling state. Result: fewer crashes, less potential to hide other bugs.
1 parent 359f359 commit 5bebbf5

File tree

4 files changed

+79
-6
lines changed

4 files changed

+79
-6
lines changed

Sources/NIO/BaseSocket.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ class BaseSocket: Selectable {
394394
/// After the socket was closed all other methods will throw an `IOError` when called.
395395
///
396396
/// - throws: An `IOError` if the operation failed.
397-
final func close() throws {
397+
func close() throws {
398398
try withUnsafeFileDescriptor { fd in
399399
try Posix.close(descriptor: fd)
400400
}

Sources/NIO/BaseSocketChannel.swift

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -664,33 +664,51 @@ class BaseSocketChannel<T: BaseSocket>: SelectableChannel, ChannelCore {
664664
return
665665
}
666666

667+
// === BEGIN: No user callouts ===
668+
669+
// this is to register all error callouts as all the callouts must happen after we transition out state
670+
var errorCallouts: [(ChannelPipeline) -> Void] = []
671+
667672
self.interestedEvent = .reset
668673
do {
669674
try selectableEventLoop.deregister(channel: self)
670675
} catch let err {
671-
pipeline.fireErrorCaught0(error: err)
676+
errorCallouts.append { pipeline in
677+
pipeline.fireErrorCaught0(error: err)
678+
}
672679
}
673680

674681
let p: EventLoopPromise<Void>?
675682
do {
676683
try socket.close()
677684
p = promise
678685
} catch {
679-
promise?.fail(error: error)
680-
// Set p to nil as we want to ensure we pass nil to becomeInactive0(...) so we not try to notify the promise again.
686+
errorCallouts.append { (_: ChannelPipeline) in
687+
promise?.fail(error: error)
688+
// Set p to nil as we want to ensure we pass nil to becomeInactive0(...) so we not try to notify the promise again.
689+
}
681690
p = nil
682691
}
683692

684693
// Transition our internal state.
685694
let callouts = self.lifecycleManager.close(promise: p)
686695

696+
// === END: No user callouts (now that our state is reconciled, we can call out to user code.) ===
697+
698+
// this must be the first to call out as it transitions the PendingWritesManager into the closed state
699+
// and we assert elsewhere that the PendingWritesManager has the same idea of 'open' as we have in here.
700+
self.cancelWritesOnClose(error: error)
701+
702+
// this should be a no-op as we shouldn't have any
703+
errorCallouts.forEach {
704+
$0(self.pipeline)
705+
}
706+
687707
if let connectPromise = self.pendingConnect {
688708
self.pendingConnect = nil
689709
connectPromise.fail(error: error)
690710
}
691711

692-
// Now that our state is sensible, we can call out to user code.
693-
self.cancelWritesOnClose(error: error)
694712
callouts(self.pipeline)
695713

696714
eventLoop.execute {

Tests/NIOTests/ChannelTests+XCTest.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ extension ChannelTests {
6868
("testSocketFailingAsyncCorrectlyTearsTheChannelDownAndDoesntCrash", testSocketFailingAsyncCorrectlyTearsTheChannelDownAndDoesntCrash),
6969
("testSocketErroringSynchronouslyCorrectlyTearsTheChannelDown", testSocketErroringSynchronouslyCorrectlyTearsTheChannelDown),
7070
("testConnectWithECONNREFUSEDGetsTheRightError", testConnectWithECONNREFUSEDGetsTheRightError),
71+
("testCloseInUnregister", testCloseInUnregister),
7172
]
7273
}
7374
}

Tests/NIOTests/ChannelTests.swift

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2263,6 +2263,60 @@ public class ChannelTests: XCTestCase {
22632263
}
22642264
}
22652265

2266+
func testCloseInUnregister() throws {
2267+
enum DummyError: Error { case dummy }
2268+
class SocketFailingClose: Socket {
2269+
init() throws {
2270+
try super.init(protocolFamily: PF_INET, type: Posix.SOCK_STREAM, setNonBlocking: true)
2271+
}
2272+
2273+
override func close() throws {
2274+
_ = try? super.close()
2275+
throw DummyError.dummy
2276+
}
2277+
}
2278+
2279+
let group = MultiThreadedEventLoopGroup(numThreads: 2)
2280+
defer {
2281+
XCTAssertNoThrow(try group.syncShutdownGracefully())
2282+
}
2283+
let sc = try SocketChannel(socket: SocketFailingClose(), eventLoop: group.next() as! SelectableEventLoop)
2284+
2285+
let serverChannel = try ServerBootstrap(group: group.next())
2286+
.bind(host: "127.0.0.1", port: 0)
2287+
.wait()
2288+
defer {
2289+
XCTAssertNoThrow(try serverChannel.syncCloseAcceptingAlreadyClosed())
2290+
}
2291+
2292+
XCTAssertNoThrow(try sc.eventLoop.submit {
2293+
sc.register().then {
2294+
sc.connect(to: serverChannel.localAddress!)
2295+
}
2296+
}.wait().wait() as Void)
2297+
2298+
do {
2299+
try sc.eventLoop.submit { () -> EventLoopFuture<Void> in
2300+
let p: EventLoopPromise<Void> = sc.eventLoop.newPromise()
2301+
// this callback must be attached before we call the close
2302+
let f = p.futureResult.map {
2303+
XCTFail("shouldn't be reached")
2304+
}.thenIfError { err in
2305+
XCTAssertNotNil(err as? DummyError)
2306+
return sc.close()
2307+
}
2308+
sc.close(promise: p)
2309+
return f
2310+
}.wait().wait()
2311+
XCTFail("shouldn't be reached")
2312+
} catch ChannelError.alreadyClosed {
2313+
// ok
2314+
} catch {
2315+
XCTFail("wrong error: \(error)")
2316+
}
2317+
2318+
}
2319+
22662320
}
22672321

22682322
fileprivate class VerifyConnectionFailureHandler: ChannelInboundHandler {

0 commit comments

Comments
 (0)