Skip to content

Commit e60aafe

Browse files
Merge pull request #876 from LinuxJedi/sftp-hang
Fix SFTP server hang on WS_WANT_WRITE with non-blocking sockets
2 parents 61d1094 + 67c7b9c commit e60aafe

File tree

6 files changed

+188
-9
lines changed

6 files changed

+188
-9
lines changed

.github/workflows/paramiko-sftp-test.yml

Lines changed: 62 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -132,45 +132,99 @@ jobs:
132132
import os
133133
import time
134134
import sys
135-
135+
import hashlib
136+
import signal
137+
138+
# Timeout handler for detecting hangs
139+
class TimeoutError(Exception):
140+
pass
141+
142+
def timeout_handler(signum, frame):
143+
raise TimeoutError("SFTP operation timed out - possible hang detected!")
144+
145+
def get_file_hash(filepath):
146+
"""Calculate MD5 hash of a file."""
147+
hash_md5 = hashlib.md5()
148+
with open(filepath, "rb") as f:
149+
for chunk in iter(lambda: f.read(8192), b""):
150+
hash_md5.update(chunk)
151+
return hash_md5.hexdigest()
152+
136153
def run_sftp_test():
137154
# Create SSH client
138155
ssh = paramiko.SSHClient()
139156
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
140-
157+
141158
# Connect to server using password authentication with testuser
142159
print("Connecting to wolfSSHd server...")
143160
try:
144161
ssh.connect('127.0.0.1', port=22222, username='testuser', password='testpassword')
145162
except Exception as e:
146163
print(f"Connection error: {e}")
147164
raise
148-
165+
149166
# Open SFTP session
150167
print("Opening SFTP session...")
151168
sftp = ssh.open_sftp()
152-
169+
153170
# Upload test
154171
print("Uploading 20MB test file...")
155172
start_time = time.time()
156173
sftp.put('/tmp/sftp_upload/test_upload.dat', '/tmp/test_upload.dat')
157174
upload_time = time.time() - start_time
158175
print(f"Upload completed in {upload_time:.2f} seconds")
159-
176+
160177
# Download test
161178
print("Downloading 20MB test file...")
162179
start_time = time.time()
163180
sftp.get('/tmp/test_upload.dat', '/tmp/sftp_download/test_download.dat')
164181
download_time = time.time() - start_time
165182
print(f"Download completed in {download_time:.2f} seconds")
166-
183+
184+
# Stress test: Repeated GET operations with prefetch
185+
# This tests the WS_WANT_WRITE handling during repeated transfers
186+
# which was the original bug trigger (SFTP hang on non-blocking sockets)
187+
print("\n=== Starting repeated GET stress test (prefetch enabled) ===")
188+
num_iterations = 10
189+
timeout_seconds = 30 # Per-operation timeout to detect hangs
190+
191+
orig_hash = get_file_hash('/tmp/sftp_upload/test_upload.dat')
192+
193+
for i in range(num_iterations):
194+
signal.signal(signal.SIGALRM, timeout_handler)
195+
signal.alarm(timeout_seconds)
196+
try:
197+
download_path = f'/tmp/sftp_download/stress_test_{i}.dat'
198+
start_time = time.time()
199+
# Paramiko uses prefetch by default for get()
200+
sftp.get('/tmp/test_upload.dat', download_path)
201+
elapsed = time.time() - start_time
202+
203+
# Verify integrity
204+
down_hash = get_file_hash(download_path)
205+
if orig_hash != down_hash:
206+
print(f" Iteration {i+1}: FAILED - hash mismatch!")
207+
return False
208+
209+
print(f" Iteration {i+1}/{num_iterations}: OK ({elapsed:.2f}s)")
210+
os.remove(download_path) # Cleanup
211+
212+
except TimeoutError as e:
213+
print(f" Iteration {i+1}: FAILED - {e}")
214+
print(" This may indicate the WS_WANT_WRITE hang bug!")
215+
return False
216+
finally:
217+
signal.alarm(0) # Always cancel alarm
218+
219+
print(f"=== Stress test completed: {num_iterations} iterations OK ===\n")
220+
167221
# Close connections
168222
sftp.close()
169223
ssh.close()
170-
224+
171225
print("SFTP session closed")
172226
return True
173-
227+
174228
if __name__ == "__main__":
175229
try:
176230
success = run_sftp_test()

src/internal.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3462,6 +3462,12 @@ int wolfSSH_SendPacket(WOLFSSH* ssh)
34623462
}
34633463

34643464

3465+
int wolfSSH_OutputPending(WOLFSSH* ssh)
3466+
{
3467+
return (ssh != NULL && ssh->outputBuffer.length > ssh->outputBuffer.idx);
3468+
}
3469+
3470+
34653471
static int GetInputData(WOLFSSH* ssh, word32 size)
34663472
{
34673473
int in;

src/wolfsftp.c

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ enum WS_SFTP_LSTAT_STATE_ID {
145145
STATE_LSTAT_CLEANUP
146146
};
147147

148-
/* This structure is to help with nonblocking and keeping track of state.
148+
/* SFTP buffer for nonblocking state tracking.
149149
* If adding any read/writes use the wolfSSH_SFTP_buffer_read/send functions */
150150
typedef struct WS_SFTP_BUFFER {
151151
byte* data;
@@ -526,6 +526,19 @@ static int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer)
526526
return WS_BUFFER_E;
527527
}
528528

529+
/* Flush any pending data in SSH output buffer first.
530+
* Handles case where previous send returned WS_WANT_WRITE
531+
* and data is still buffered at the SSH layer. */
532+
if (wolfSSH_OutputPending(ssh)) {
533+
ret = wolfSSH_SendPacket(ssh);
534+
if (ret == WS_WANT_WRITE) {
535+
return ret;
536+
}
537+
if (ret < 0) {
538+
return ret;
539+
}
540+
}
541+
529542
/* Call wolfSSH worker if rekeying or adjusting window size */
530543
err = wolfSSH_get_error(ssh);
531544
if (err == WS_WINDOW_FULL || err == WS_REKEYING) {
@@ -546,6 +559,20 @@ static int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer)
546559
}
547560

548561

562+
#ifdef WOLFSSH_TEST_INTERNAL
563+
int wolfSSH_TestSftpBufferSend(WOLFSSH* ssh,
564+
byte* data, word32 sz, word32 idx)
565+
{
566+
WS_SFTP_BUFFER buffer;
567+
568+
buffer.data = data;
569+
buffer.sz = sz;
570+
buffer.idx = idx;
571+
return wolfSSH_SFTP_buffer_send(ssh, &buffer);
572+
}
573+
#endif
574+
575+
549576
/* returns the amount read on success */
550577
static int wolfSSH_SFTP_buffer_read(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer,
551578
int readSz)
@@ -1603,6 +1630,13 @@ int wolfSSH_SFTP_read(WOLFSSH* ssh)
16031630
ssh->error = WS_WANT_WRITE;
16041631
return WS_FATAL_ERROR;
16051632
}
1633+
/* Check if SSH layer still has pending data from WS_WANT_WRITE.
1634+
* Even if SFTP buffer is fully consumed, the data may still be
1635+
* sitting in the SSH output buffer waiting to be sent. */
1636+
if (wolfSSH_OutputPending(ssh)) {
1637+
ssh->error = WS_WANT_WRITE;
1638+
return WS_FATAL_ERROR;
1639+
}
16061640
ret = WS_SUCCESS;
16071641
state->toSend = 0;
16081642
}

tests/regress.c

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@
4040
#include <wolfssh/port.h>
4141
#include <wolfssh/ssh.h>
4242
#include <wolfssh/internal.h>
43+
#ifdef WOLFSSH_SFTP
44+
#include <wolfssh/wolfsftp.h>
45+
#endif
4346
#include "apps/wolfssh/common.h"
4447

4548
#ifndef WOLFSSH_NO_ABORT
@@ -482,6 +485,78 @@ static void TestWorkerReadsWhenSendWouldBlock(void)
482485
#endif
483486

484487

488+
#ifdef WOLFSSH_SFTP
489+
/* Test that wolfSSH_SFTP_buffer_send() properly handles WS_WANT_WRITE when
490+
* SSH output buffer has pending data. This is a regression test for
491+
* the SFTP hang issue with non-blocking sockets.
492+
*
493+
* The fix checks for pending data in ssh->outputBuffer at the start of
494+
* wolfSSH_SFTP_buffer_send() and returns WS_WANT_WRITE if the flush fails. */
495+
static int sftpWantWriteCallCount = 0;
496+
497+
static int SftpWantWriteSendCb(WOLFSSH* ssh, void* buf, word32 sz, void* ctx)
498+
{
499+
(void)ssh; (void)buf; (void)ctx;
500+
sftpWantWriteCallCount++;
501+
/* First call returns WANT_WRITE, subsequent calls succeed */
502+
if (sftpWantWriteCallCount == 1) {
503+
return WS_CBIO_ERR_WANT_WRITE;
504+
}
505+
return (int)sz;
506+
}
507+
508+
static int SftpDummyRecv(WOLFSSH* ssh, void* buf, word32 sz, void* ctx)
509+
{
510+
(void)ssh; (void)buf; (void)sz; (void)ctx;
511+
return WS_CBIO_ERR_WANT_READ;
512+
}
513+
514+
static void TestSftpBufferSendPendingOutput(void)
515+
{
516+
WOLFSSH_CTX* ctx;
517+
WOLFSSH* ssh;
518+
byte testData[16];
519+
int ret;
520+
521+
ctx = wolfSSH_CTX_new(WOLFSSH_ENDPOINT_SERVER, NULL);
522+
AssertNotNull(ctx);
523+
524+
wolfSSH_SetIOSend(ctx, SftpWantWriteSendCb);
525+
wolfSSH_SetIORecv(ctx, SftpDummyRecv);
526+
527+
ssh = wolfSSH_new(ctx);
528+
AssertNotNull(ssh);
529+
530+
WMEMSET(testData, 0x42, sizeof(testData));
531+
532+
/* Simulate pending data in SSH output buffer (as if previous send
533+
* returned WS_WANT_WRITE and data was buffered).
534+
* Note: outputBuffer is initialized by BufferInit() with bufferSz set
535+
* to at least STATIC_BUFFER_LEN (16 bytes), so we use a smaller value. */
536+
ssh->outputBuffer.length = 8; /* 8 bytes pending */
537+
ssh->outputBuffer.idx = 0; /* none sent yet */
538+
539+
sftpWantWriteCallCount = 0;
540+
541+
/* Call wolfSSH_TestSftpBufferSend - should return WS_WANT_WRITE because
542+
* the fix detects pending data in outputBuffer and tries to flush it,
543+
* which fails with WS_WANT_WRITE from our callback.
544+
*
545+
* Before the fix, the function would ignore the pending SSH output buffer
546+
* data and proceed to send new SFTP data, leading to a hang because the
547+
* pending data was never flushed. */
548+
ret = wolfSSH_TestSftpBufferSend(ssh, testData, sizeof(testData), 0);
549+
AssertIntEQ(ret, WS_WANT_WRITE);
550+
551+
/* Verify the SSH output buffer still has pending data */
552+
AssertTrue(ssh->outputBuffer.length > ssh->outputBuffer.idx);
553+
554+
wolfSSH_free(ssh);
555+
wolfSSH_CTX_free(ctx);
556+
}
557+
#endif /* WOLFSSH_SFTP */
558+
559+
485560
int main(int argc, char** argv)
486561
{
487562
WOLFSSH_CTX* ctx;
@@ -515,6 +590,10 @@ int main(int argc, char** argv)
515590
TestWorkerReadsWhenSendWouldBlock();
516591
#endif
517592

593+
#ifdef WOLFSSH_SFTP
594+
TestSftpBufferSendPendingOutput();
595+
#endif
596+
518597
/* TODO: add app-level regressions that simulate stdin EOF/password
519598
* prompts and mid-session socket closes once the test harness can
520599
* drive the wolfssh client without real sockets/tty. */

wolfssh/internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1072,6 +1072,7 @@ enum ChannelOpenFailReasons {
10721072
WOLFSSH_LOCAL int DoReceive(WOLFSSH*);
10731073
WOLFSSH_LOCAL int DoProtoId(WOLFSSH*);
10741074
WOLFSSH_LOCAL int wolfSSH_SendPacket(WOLFSSH*);
1075+
WOLFSSH_LOCAL int wolfSSH_OutputPending(WOLFSSH*);
10751076
WOLFSSH_LOCAL int SendProtoId(WOLFSSH*);
10761077
WOLFSSH_LOCAL int SendKexInit(WOLFSSH*);
10771078
WOLFSSH_LOCAL int SendKexDhInit(WOLFSSH*);

wolfssh/wolfsftp.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,11 @@ WOLFSSL_LOCAL int SFTP_RemoveHandleNode(WOLFSSH* ssh, byte* handle,
282282

283283
WOLFSSH_LOCAL void wolfSSH_SFTP_ShowSizes(void);
284284

285+
#ifdef WOLFSSH_TEST_INTERNAL
286+
WOLFSSH_API int wolfSSH_TestSftpBufferSend(WOLFSSH* ssh,
287+
byte* data, word32 sz, word32 idx);
288+
#endif
289+
285290
#ifdef __cplusplus
286291
}
287292
#endif

0 commit comments

Comments
 (0)