Skip to content

Commit 2e27ed5

Browse files
use exponential backoff in wait_for_partition
Signed-off-by: Axel Schneewind <[email protected]>
1 parent e0ee1c8 commit 2e27ed5

File tree

2 files changed

+42
-35
lines changed

2 files changed

+42
-35
lines changed

threaded/tst_threaded_ring_partitioned.c

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
static pthread_barrier_t thread_barrier;
2323

24+
static int ratio_send_to_receive = 1;
25+
2426
int tst_threaded_ring_partitioned_init(struct tst_env *env)
2527
{
2628
int comm_rank;
@@ -66,15 +68,16 @@ int tst_threaded_ring_partitioned_init(struct tst_env *env)
6668
return 0;
6769
}
6870

69-
// busy wait until partition arrived
71+
72+
// busy wait until partition arrived, using exponential backoff with initial backoff time given.
7073
// returns 1 if the partition has arrived and 0 if waiting was interupted
71-
static int wait_for_partition(MPI_Request *recv_request, int partition_num)
74+
static int wait_for_partition(MPI_Request *recv_request, int partition_num, useconds_t backoff_time)
7275
{
7376
int flag = 0;
7477
do
7578
{
7679
MPI_CHECK(MPI_Parrived(*recv_request, partition_num, &flag));
77-
} while (flag == 0 && usleep(2000) == 0);
80+
} while (flag == 0 && usleep((backoff_time = (backoff_time * 3) / 2)) == 0);
7881

7982
return flag;
8083
}
@@ -129,26 +132,31 @@ int tst_threaded_ring_partitioned_run(struct tst_env *env)
129132
send_to, recv_from, env->tag);
130133

131134
// number of partitions and values per partition
132-
int num_partitions = num_worker_threads;
133-
int partition_size = env->values_num; // number of elements
135+
int num_send_partitions = num_worker_threads;
136+
int num_recv_partitions = num_send_partitions / ratio_send_to_receive;
137+
int partition_size = env->values_num; // number of elements per send partition
138+
139+
// partition numbers for this thread
140+
int send_partition_num = thread_num;
141+
int recv_partition_num = (thread_num % ratio_send_to_receive == 0) ? thread_num / ratio_send_to_receive : -1;
134142

135143
// init send and recv and start both
136144
if (thread_num == TST_THREAD_MASTER)
137145
{
138-
tst_output_printf(DEBUG_LOG, TST_REPORT_MAX, "(Rank:%i, Thread:%i) initializing send to %i and recv from %i with %i partitions of size %i*%i bytes\n",
146+
tst_output_printf(DEBUG_LOG, TST_REPORT_MAX,"(Rank:%i, Thread:%i) initializing send to %i and recv from %i with %i partitions of size %i*%i bytes\n",
139147
comm_rank, thread_num,
140-
send_to, recv_from, num_partitions, partition_size, type_extent);
148+
send_to, recv_from, num_send_partitions, partition_size, type_extent);
141149

142-
MPI_CHECK(MPI_Psend_init(env->send_buffer, num_partitions, partition_size, type, send_to,
150+
MPI_CHECK(MPI_Psend_init(env->send_buffer, num_send_partitions, partition_size, type, send_to,
143151
0, comm, MPI_INFO_NULL, send_request));
144-
MPI_CHECK(MPI_Precv_init(env->recv_buffer, num_partitions, partition_size, type, recv_from,
152+
MPI_CHECK(MPI_Precv_init(env->recv_buffer, num_recv_partitions, partition_size * ratio_send_to_receive, type, recv_from,
145153
0, comm, MPI_INFO_NULL, recv_request));
146154

147155
MPI_CHECK(MPI_Startall(2, env->req_buffer));
148156

149157
// wait for all ranks to become ready
150158
MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD));
151-
};
159+
}
152160

153161
pthread_barrier_wait(&thread_barrier);
154162

@@ -157,30 +165,32 @@ int tst_threaded_ring_partitioned_run(struct tst_env *env)
157165
if (thread_num == TST_THREAD_MASTER)
158166
time_init = MPI_Wtime();
159167

160-
if (thread_num >= 0 && thread_num < num_partitions)
168+
if (send_partition_num >= 0 && send_partition_num < num_send_partitions)
161169
{
162170
// allow this partition to be sent
163-
MPI_CHECK(MPI_Pready(thread_num, *send_request));
171+
MPI_CHECK(MPI_Pready(send_partition_num, *send_request));
164172
}
165173

166-
if (thread_num >= 0 && thread_num < num_partitions)
174+
if (recv_partition_num >= 0 && recv_partition_num < num_recv_partitions)
167175
{
168-
wait_for_partition(recv_request, thread_num);
176+
wait_for_partition(recv_request, recv_partition_num, 512);
169177
}
170178
}
171179
else
172180
{
173-
if (thread_num >= 0 && thread_num < num_partitions)
181+
if (send_partition_num >= 0 && send_partition_num < num_send_partitions)
174182
{
175-
wait_for_partition(recv_request, thread_num);
183+
if (recv_partition_num >= 0 && recv_partition_num < num_recv_partitions) {
184+
wait_for_partition(recv_request, recv_partition_num, 128);
185+
}
176186

177187
// simply copy data from input to output buffer
178-
int begin_index = partition_size * thread_num * type_extent;
188+
int begin_index = partition_size * send_partition_num * type_extent;
179189
int size = partition_size * type_extent;
180190
memcpy(&env->send_buffer[begin_index], &env->recv_buffer[begin_index], size);
181191

182192
// allow sending of this partition
183-
MPI_CHECK(MPI_Pready(thread_num, *send_request));
193+
MPI_CHECK(MPI_Pready(send_partition_num, *send_request));
184194
}
185195
}
186196

threaded/tst_threaded_ring_partitioned_many_to_one.c

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,16 @@ int tst_threaded_ring_partitioned_many_to_one_init(struct tst_env *env)
7272
return 0;
7373
}
7474

75-
// busy wait until partition arrived
75+
76+
// busy wait until partition arrived, using exponential backoff with initial backoff time given.
7677
// returns 1 if the partition has arrived and 0 if waiting was interupted
77-
static int wait_for_partition(MPI_Request *recv_request, int partition_num)
78+
static int wait_for_partition(MPI_Request *recv_request, int partition_num, useconds_t backoff_time)
7879
{
7980
int flag = 0;
8081
do
8182
{
8283
MPI_CHECK(MPI_Parrived(*recv_request, partition_num, &flag));
83-
} while (flag == 0 && usleep(2000) == 0);
84+
} while (flag == 0 && usleep((backoff_time = (backoff_time * 3) / 2)) == 0);
8485

8586
return flag;
8687
}
@@ -137,7 +138,7 @@ int tst_threaded_ring_partitioned_many_to_one_run(struct tst_env *env)
137138
// number of partitions and values per partition
138139
int num_send_partitions = num_worker_threads;
139140
int num_recv_partitions = num_send_partitions / ratio_send_to_receive;
140-
int partition_size = env->values_num; // number of elements
141+
int partition_size = env->values_num; // number of elements per send partition
141142

142143
// partition numbers for this thread
143144
int send_partition_num = thread_num;
@@ -161,7 +162,7 @@ int tst_threaded_ring_partitioned_many_to_one_run(struct tst_env *env)
161162

162163
// wait for all ranks to become ready
163164
MPI_CHECK(MPI_Barrier(MPI_COMM_WORLD));
164-
};
165+
}
165166

166167
pthread_barrier_wait(&thread_barrier);
167168

@@ -178,24 +179,20 @@ int tst_threaded_ring_partitioned_many_to_one_run(struct tst_env *env)
178179

179180
if (recv_partition_num >= 0 && recv_partition_num < num_recv_partitions)
180181
{
181-
wait_for_partition(recv_request, recv_partition_num);
182+
wait_for_partition(recv_request, recv_partition_num, 512);
182183
}
183184
}
184185
else
185186
{
186187
if (send_partition_num >= 0 && send_partition_num < num_send_partitions)
187188
{
188-
if (recv_partition_num >= 0 && recv_partition_num < num_recv_partitions)
189-
{
190-
wait_for_partition(recv_request, recv_partition_num);
191-
192-
for (int i = 1; i < ratio_send_to_receive; i++)
193-
tst_thread_signal_send(thread_num + i);
194-
}
195-
else
196-
{
197-
tst_thread_signal_wait(thread_num);
198-
}
189+
if (recv_partition_num >= 0 && recv_partition_num < num_recv_partitions) {
190+
wait_for_partition(recv_request, recv_partition_num, 128);
191+
for (int i = 1; i < ratio_send_to_receive; i++)
192+
tst_thread_signal_send(send_partition_num + i);
193+
} else {
194+
tst_thread_signal_wait(send_partition_num);
195+
}
199196

200197
// simply copy data from input to output buffer
201198
int begin_index = partition_size * send_partition_num * type_extent;

0 commit comments

Comments
 (0)