Skip to content

Commit 2ebdfbb

Browse files
authored
Merge pull request #11915 from bosilca/topic/avoid_writev
Replace writev by sendmsg
2 parents a86c131 + 7074e59 commit 2ebdfbb

File tree

4 files changed

+32
-22
lines changed

4 files changed

+32
-22
lines changed

ompi/group/group.h

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2020 The University of Tennessee and The University
6+
* Copyright (c) 2004-2023 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -459,19 +459,17 @@ static inline struct ompi_proc_t *ompi_group_peer_lookup_existing (ompi_group_t
459459
*/
460460
static inline int ompi_group_proc_lookup_rank (ompi_group_t* group, ompi_proc_t* proc)
461461
{
462-
int i, np, v;
462+
int i, np, rank;
463+
opal_vpid_t v;
463464
assert( NULL != proc );
464465
assert( !ompi_proc_is_sentinel(proc) );
465466
np = ompi_group_size(group);
466467
if( 0 == np ) return MPI_PROC_NULL;
467468
/* heuristic: On comm_world, start the lookup from v=vpid, so that
468-
* when working on comm_world, the search is O(1);
469-
* Otherwise, wild guess: start from a proportional position
470-
* compared to comm_world position. */
469+
* when working on comm_world, on average, the search remains O(1). */
471470
v = proc->super.proc_name.vpid;
472-
v = (v<np)? v: v*ompi_proc_world_size()/np;
473471
for( i = 0; i < np; i++ ) {
474-
int rank = (i+v)%np;
472+
rank = (i+v)%np;
475473
/* procs are lazy initialized and may be a sentinel. Handle both cases. */
476474
ompi_proc_t* p = ompi_group_get_proc_ptr_raw(group, rank);
477475
if(OPAL_LIKELY(!ompi_proc_is_sentinel(p))) {

opal/mca/btl/tcp/btl_tcp_frag.c

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2020 The University of Tennessee and The University
6+
* Copyright (c) 2004-2023 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -105,18 +105,30 @@ bool mca_btl_tcp_frag_send(mca_btl_tcp_frag_t *frag, int sd)
105105
{
106106
ssize_t cnt;
107107
size_t i, num_vecs;
108+
struct msghdr msg;
109+
int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
108110

109-
/* non-blocking write, but continue if interrupted */
111+
msg.msg_name = NULL;
112+
msg.msg_namelen = 0;
113+
msg.msg_iov = frag->iov_ptr;
114+
msg.msg_iovlen = frag->iov_cnt;
115+
msg.msg_control = NULL;
116+
msg.msg_controllen = 0;
117+
118+
/* non-blocking write, continue if interrupted */
110119
do {
111-
cnt = writev(sd, frag->iov_ptr, frag->iov_cnt);
120+
/* Use sendmsg to avoid issues with SIGPIPE as described in
121+
* https://blog.erratasec.com/2018/10/tcpip-sockets-and-sigpipe.html#
122+
*/
123+
cnt = sendmsg(sd, &msg, msg_flags);
112124
if (cnt < 0) {
113125
switch (opal_socket_errno) {
114126
case EINTR:
115127
continue;
116128
case EWOULDBLOCK:
117129
return false;
118130
case EFAULT:
119-
BTL_ERROR(("mca_btl_tcp_frag_send: writev error (%p, %lu)\n\t%s(%lu)\n",
131+
BTL_ERROR(("mca_btl_tcp_frag_send: sendmsg error (%p, %lu)\n\t%s(%lu)\n",
120132
frag->iov_ptr[0].iov_base, (unsigned long) frag->iov_ptr[0].iov_len,
121133
strerror(opal_socket_errno), (unsigned long) frag->iov_cnt));
122134
/* send_lock held by caller */
@@ -125,7 +137,7 @@ bool mca_btl_tcp_frag_send(mca_btl_tcp_frag_t *frag, int sd)
125137
return false;
126138
default:
127139
BTL_PEER_ERROR(frag->endpoint->endpoint_proc->proc_opal,
128-
("mca_btl_tcp_frag_send: writev failed: %s (%d)",
140+
("mca_btl_tcp_frag_send: sendmsg failed: %s (%d)",
129141
strerror(opal_socket_errno), opal_socket_errno));
130142
/* send_lock held by caller */
131143
frag->endpoint->endpoint_state = MCA_BTL_TCP_FAILED;

opal/win32/opal_uio.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33
University Research and Technology
44
Corporation. All rights reserved.
5-
Copyright (c) 2004-2005 The University of Tennessee and The University
5+
Copyright (c) 2004-2023 The University of Tennessee and The University
66
of Tennessee Research Foundation. All rights
77
reserved.
88
Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -26,12 +26,12 @@
2626
of code to handle the windows error flags
2727
*/
2828

29-
int writev(int fd, struct iovec *iov, int cnt)
29+
ssize_t sendmsg(int fd, const struct msghdr *message, int flags)
3030
{
3131
int err;
3232
DWORD sendlen;
3333

34-
err = WSASend((SOCKET) fd, &(iov->data), cnt, &sendlen, 0, NULL, NULL);
34+
err = WSASendMsg((SOCKET) fd, message, flags, &sendlen, NULL, NULL);
3535

3636
if (err < 0) {
3737
return err;

opal/win32/opal_uio.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
33
* University Research and Technology
44
* Corporation. All rights reserved.
5-
* Copyright (c) 2004-2014 The University of Tennessee and The University
5+
* Copyright (c) 2004-2023 The University of Tennessee and The University
66
* of Tennessee Research Foundation. All rights
77
* reserved.
88
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
@@ -33,14 +33,14 @@ struct iovec {
3333
#define iov_len data.len
3434

3535
BEGIN_C_DECLS
36+
3637
/*
37-
* writev:
38-
writev writes data to file descriptor fd, and from the buffers
39-
described by iov. The number of buffers is specified by cnt. The
40-
buffers are used in the order specified. Operates just like write
41-
except that data is taken from iov instead of a contiguous buffer.
38+
* sendmsg:
39+
* writes data to a file descriptor. This is a convenience function to allow
40+
* the TCP BTL to support Windows. Overall is should behave similarly to the
41+
* POSIX sendmsg function.
4242
*/
43-
OPAL_DECLSPEC int writev(int fd, struct iovec *iov, int cnt);
43+
OPAL_DECLSPEC ssize_t sendmsg(int socket, const struct msghdr *message, int flags);
4444

4545
/*
4646
readv reads data from file descriptor fd, and puts the result in the

0 commit comments

Comments
 (0)