Skip to content

Commit dfc2419

Browse files
asorbiniclalancette
authored andcommitted
Optimize QoS to improve responsiveness of reliable endpoints
Signed-off-by: Andrea Sorbini <[email protected]>
1 parent fff0668 commit dfc2419

File tree

5 files changed

+123
-0
lines changed

5 files changed

+123
-0
lines changed

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ variables.
151151
- [RMW_CONNEXT_CYCLONE_COMPATIBILITY_MODE](#RMW_CONNEXT_CYCLONE_COMPATIBILITY_MODE)
152152
- [RMW_CONNEXT_DISABLE_LARGE_DATA_OPTIMIZATIONS](#RMW_CONNEXT_DISABLE_LARGE_DATA_OPTIMIZATIONS)
153153
- [RMW_CONNEXT_DISABLE_FAST_ENDPOINT_DISCOVERY](#RMW_CONNEXT_DISABLE_FAST_ENDPOINT_DISCOVERY)
154+
- [RMW_CONNEXT_DISABLE_RELIABILITY_OPTIMIZATIONS](#RMW_CONNEXT_DISABLE_RELIABILITY_OPTIMIZATIONS)
154155
- [RMW_CONNEXT_ENDPOINT_QOS_OVERRIDE_POLICY](#RMW_CONNEXT_ENDPOINT_QOS_OVERRIDE_POLICY)
155156
- [RMW_CONNEXT_INITIAL_PEERS](#RMW_CONNEXT_INITIAL_PEERS)
156157
- [RMW_CONNEXT_LEGACY_RMW_COMPATIBILITY_MODE](#RMW_CONNEXT_LEGACY_RMW_COMPATIBILITY_MODE)
@@ -207,6 +208,17 @@ Variable `RMW_CONNEXT_DISABLE_FAST_ENDPOINT_DISCOVERY` may be used to disable
207208
these automatic optimizations, and to leave the DomainParticipant's QoS to
208209
its defaults.
209210
211+
### RMW_CONNEXT_DISABLE_RELIABILITY_OPTIMIZATIONS
212+
213+
By default, `rmw_connextdds` will modify the QoS of each realiable DataWriter
214+
and DataReader to improve the responsiveness of the RTPS [reliability protocol](https://community.rti.com/static/documentation/connext-dds/6.0.1/doc/manuals/connext_dds/html_files/RTI_ConnextDDS_CoreLibraries_UsersManual/Content/UsersManual/Using_QosPolicies_to_Tune_the_Reliable_P.htm?tocpath=Part%203%3A%20Advanced%20Concepts%7C11.%20Reliable%20Communications%7C11.3%20Using%20QosPolicies%20to%20Tune%20the%20Reliable%20Protocol%7C_____0#reliable_1394042328_776265).
215+
216+
For example, the ["heartbeat period"](https://community.rti.com/static/documentation/connext-dds/6.0.1/doc/manuals/connext_dds/html_files/RTI_ConnextDDS_CoreLibraries_UsersManual/Content/UsersManual/Controlling_Heartbeats_and_Retries.htm#reliable_1394042328_785637)
217+
is sped up from 3 seconds to 100 milliseconds.
218+
219+
These optimizations may be disabled using variable
220+
`RMW_CONNEXT_DISABLE_RELIABILITY_OPTIMIZATIONS`.
221+
210222
### RMW_CONNEXT_ENDPOINT_QOS_OVERRIDE_POLICY
211223
212224
When this variable is not set or set to `always`, the QoS settings specified in

rmw_connextdds_common/include/rmw_connextdds/context.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,9 @@ struct rmw_context_impl_s
100100
#if RMW_CONNEXT_DEFAULT_LARGE_DATA_OPTIMIZATIONS
101101
bool optimize_large_data{true};
102102
#endif /* RMW_CONNEXT_DEFAULT_LARGE_DATA_OPTIMIZATIONS */
103+
#if RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS
104+
bool optimize_reliability{true};
105+
#endif /* RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS */
103106

104107
enum class participant_qos_override_policy_t
105108
{

rmw_connextdds_common/include/rmw_connextdds/static_config.hpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@
8585
"RMW_CONNEXT_DISABLE_LARGE_DATA_OPTIMIZATIONS"
8686
#endif /* RMW_CONNEXT_ENV_DISABLE_LARGE_DATA_OPTIMIZATIONS */
8787

88+
#ifndef RMW_CONNEXT_ENV_DISABLE_RELIABILITY_OPTIMIZATIONS
89+
#define RMW_CONNEXT_ENV_DISABLE_RELIABILITY_OPTIMIZATIONS \
90+
"RMW_CONNEXT_DISABLE_RELIABILITY_OPTIMIZATIONS"
91+
#endif /* RMW_CONNEXT_ENV_DISABLE_RELIABILITY_OPTIMIZATIONS */
92+
8893
// TODO(security-wg): These are intended to be temporary, and need to be
8994
// refactored into a proper abstraction.
9095
#ifndef RMW_CONNEXT_ENV_SECURITY_LOG_FILE
@@ -226,6 +231,58 @@
226231
#define RMW_CONNEXT_TYPE_OBJECT_MAX_SERIALIZED_SIZE 65000
227232
#endif /* RMW_CONNEXT_TYPE_OBJECT_MAX_SERIALIZED_SIZE */
228233

234+
/******************************************************************************
235+
* Customize the RTPS reliability protocol to speed up its responsiveness.
236+
******************************************************************************/
237+
#ifndef RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS
238+
#define RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS 1
239+
#endif /* RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS */
240+
241+
/******************************************************************************
242+
* Regular hearbeat period used by any reliable RTPS Writer.
243+
* This is an initializer for an instance of type DDS_Duration_t.
244+
******************************************************************************/
245+
#ifndef RMW_CONNEXT_DEFAULT_HEARTBEAT_PERIOD
246+
#define RMW_CONNEXT_DEFAULT_HEARTBEAT_PERIOD {0, 100000000} /* 100ms */
247+
#endif /* RMW_CONNEXT_DEFAULT_HEARTBEAT_PERIOD */
248+
249+
/******************************************************************************
250+
* Fast hearbeat period used by any reliable RTPS Writer to allow
251+
* late joiners and out of sync readers to catch up.
252+
* This is an initializer for an instance of type DDS_Duration_t.
253+
******************************************************************************/
254+
#ifndef RMW_CONNEXT_DEFAULT_HEARTBEAT_PERIOD_FAST
255+
#define RMW_CONNEXT_DEFAULT_HEARTBEAT_PERIOD_FAST {0, 20000000} /* 20ms */
256+
#endif /* RMW_CONNEXT_DEFAULT_HEARTBEAT_PERIOD_FAST */
257+
258+
/******************************************************************************
259+
* When a DataWriter receives a request for missing DDS samples from a
260+
* DataReader and responds by resending the requested DDS samples, it will
261+
* ignore additional requests for the same DDS samples during the time period
262+
* max_nack_response_delay. We decrease this to be less than the HB period.
263+
******************************************************************************/
264+
#ifndef RMW_CONNEXT_DEFAULT_MAX_NACK_RESPONSE_DELAY
265+
#define RMW_CONNEXT_DEFAULT_MAX_NACK_RESPONSE_DELAY {0, 10000000} /* 10ms */
266+
#endif /* RMW_CONNEXT_DEFAULT_MAX_NACK_RESPONSE_DELAY */
267+
268+
/******************************************************************************
269+
* Maximum number of periodic heartbeats gone unanswered after which a
270+
* DataWriter will consider a DataReader as inactive.
271+
*
272+
******************************************************************************/
273+
#ifndef RMW_CONNEXT_DEFAULT_MAX_HEARTBEATS
274+
#define RMW_CONNEXT_DEFAULT_MAX_HEARTBEATS (10 * 60) /* 1m @ 10hz */
275+
#endif /* RMW_CONNEXT_DEFAULT_MAX_HEARTBEATS */
276+
277+
/******************************************************************************
278+
* When a reliable reader receives a heartbeat from a remote writer and finds
279+
* out that it needs to send back an ACK/NACK message, the reader can choose to
280+
* delay a while. We set this delay to be compatible with the HB period.
281+
******************************************************************************/
282+
#ifndef RMW_CONNEXT_DEFAULT_MAX_HEARTBEAT_RESPONSE_DELAY
283+
#define RMW_CONNEXT_DEFAULT_MAX_HEARTBEAT_RESPONSE_DELAY {0, 10000000} /* 10ms */
284+
#endif /* RMW_CONNEXT_DEFAULT_MAX_HEARTBEAT_RESPONSE_DELAY */
285+
229286
/******************************************************************************
230287
* Automatically tune DataWriterQos to better handle reliable "large data".
231288
******************************************************************************/

rmw_connextdds_common/src/common/rmw_context.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1281,6 +1281,25 @@ rmw_api_connextdds_init(
12811281
RMW_CONNEXT_LOG_DEBUG_A("initial DDS peers: %s", initial_peers)
12821282
}
12831283

1284+
#if RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS
1285+
// Check if we should disable the optimizations for the RTPS reliability protocol
1286+
const char * disable_optimize_reliability_env = nullptr;
1287+
lookup_rc = rcutils_get_env(
1288+
RMW_CONNEXT_ENV_DISABLE_RELIABILITY_OPTIMIZATIONS,
1289+
&disable_optimize_reliability_env);
1290+
1291+
if (nullptr != lookup_rc || nullptr == disable_optimize_reliability_env) {
1292+
RMW_CONNEXT_LOG_ERROR_A_SET(
1293+
"failed to lookup from environment: "
1294+
"var=%s, "
1295+
"rc=%s ",
1296+
RMW_CONNEXT_ENV_DISABLE_RELIABILITY_OPTIMIZATIONS,
1297+
lookup_rc)
1298+
return RMW_RET_ERROR;
1299+
}
1300+
ctx->optimize_reliability = '\0' == disable_optimize_reliability_env[0];
1301+
#endif /* RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS */
1302+
12841303
if (nullptr == RMW_Connext_gv_DomainParticipantFactory) {
12851304
RMW_CONNEXT_ASSERT(1 == RMW_Connext_gv_ContextCount)
12861305
RMW_CONNEXT_LOG_DEBUG("initializing DDS DomainParticipantFactory")

rmw_connextdds_common/src/ndds/dds_api_ndds.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,27 @@ rmw_connextdds_get_datawriter_qos(
501501
qos->publish_mode.kind = DDS_ASYNCHRONOUS_PUBLISH_MODE_QOS;
502502
}
503503

504+
#if RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS
505+
// The default settings for the RTPS reliability protocol are not very
506+
// responsive, and they cause some unit tests to fail. These optimizations
507+
// increase the Heartbeat period from 3s (default) to 100ms. Other vendors
508+
// seem to be in a similar range. We also lower the period for "late joiners"
509+
// and the "fast" period (used to speed up recovery of readers that reached
510+
// the "high watermark" of unacked samples -- 1 by default) to 20ms.
511+
if (ctx->optimize_reliability) {
512+
qos->protocol.rtps_reliable_writer.heartbeat_period =
513+
RMW_CONNEXT_DEFAULT_HEARTBEAT_PERIOD;
514+
qos->protocol.rtps_reliable_writer.late_joiner_heartbeat_period =
515+
RMW_CONNEXT_DEFAULT_HEARTBEAT_PERIOD_FAST;
516+
qos->protocol.rtps_reliable_writer.fast_heartbeat_period =
517+
RMW_CONNEXT_DEFAULT_HEARTBEAT_PERIOD_FAST;
518+
qos->protocol.rtps_reliable_writer.max_heartbeat_retries =
519+
RMW_CONNEXT_DEFAULT_MAX_HEARTBEATS;
520+
qos->protocol.rtps_reliable_writer.max_nack_response_delay =
521+
RMW_CONNEXT_DEFAULT_MAX_NACK_RESPONSE_DELAY;
522+
}
523+
#endif /* RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS */
524+
504525
#if RMW_CONNEXT_DEFAULT_LARGE_DATA_OPTIMIZATIONS
505526
// Unless disabled, optimize the DataWriter's reliability protocol to
506527
// better handle large data samples. These are *bounded* types whose
@@ -587,6 +608,17 @@ rmw_connextdds_get_datareader_qos(
587608
}
588609
}
589610

611+
#if RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS
612+
// The default settings for the RTPS reliability protocol are not very
613+
// responsive, and they cause some unit tests to fail. These optimizations
614+
// are dual to those applied in rmw_connextdds_get_datawriter_qos().
615+
if (ctx->optimize_reliability) {
616+
qos->protocol.rtps_reliable_reader.min_heartbeat_response_delay = DDS_DURATION_ZERO;
617+
qos->protocol.rtps_reliable_reader.max_heartbeat_response_delay =
618+
RMW_CONNEXT_DEFAULT_MAX_HEARTBEAT_RESPONSE_DELAY;
619+
}
620+
#endif /* RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS */
621+
590622
#if RMW_CONNEXT_DEFAULT_LARGE_DATA_OPTIMIZATIONS
591623
// Unless disabled, optimize the DataReader's reliability protocol to
592624
// better handle large data samples. These are *bounded* types whose

0 commit comments

Comments
 (0)