Skip to content

Commit b57d032

Browse files
authored
Optimize QoS to improve responsiveness of reliable endpoints (#26)
* Optimize QoS to improve responsiveness of reliable endpoints * Replace reliability optimizations with built-in profile Optimization.ReliabilityProtocol.Common Signed-off-by: Andrea Sorbini <[email protected]>
1 parent 1206113 commit b57d032

File tree

5 files changed

+83
-0
lines changed

5 files changed

+83
-0
lines changed

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ variables.
151151
- [RMW_CONNEXT_CYCLONE_COMPATIBILITY_MODE](#RMW_CONNEXT_CYCLONE_COMPATIBILITY_MODE)
152152
- [RMW_CONNEXT_DISABLE_LARGE_DATA_OPTIMIZATIONS](#RMW_CONNEXT_DISABLE_LARGE_DATA_OPTIMIZATIONS)
153153
- [RMW_CONNEXT_DISABLE_FAST_ENDPOINT_DISCOVERY](#RMW_CONNEXT_DISABLE_FAST_ENDPOINT_DISCOVERY)
154+
- [RMW_CONNEXT_DISABLE_RELIABILITY_OPTIMIZATIONS](#RMW_CONNEXT_DISABLE_RELIABILITY_OPTIMIZATIONS)
154155
- [RMW_CONNEXT_ENDPOINT_QOS_OVERRIDE_POLICY](#RMW_CONNEXT_ENDPOINT_QOS_OVERRIDE_POLICY)
155156
- [RMW_CONNEXT_INITIAL_PEERS](#RMW_CONNEXT_INITIAL_PEERS)
156157
- [RMW_CONNEXT_LEGACY_RMW_COMPATIBILITY_MODE](#RMW_CONNEXT_LEGACY_RMW_COMPATIBILITY_MODE)
@@ -207,6 +208,17 @@ Variable `RMW_CONNEXT_DISABLE_FAST_ENDPOINT_DISCOVERY` may be used to disable
207208
these automatic optimizations, and to leave the DomainParticipant's QoS to
208209
its defaults.
209210
211+
### RMW_CONNEXT_DISABLE_RELIABILITY_OPTIMIZATIONS
212+
213+
By default, `rmw_connextdds` will modify the QoS of each reliable DataWriter
214+
and DataReader to improve the responsiveness of the RTPS [reliability protocol](https://community.rti.com/static/documentation/connext-dds/6.0.1/doc/manuals/connext_dds/html_files/RTI_ConnextDDS_CoreLibraries_UsersManual/Content/UsersManual/Using_QosPolicies_to_Tune_the_Reliable_P.htm?tocpath=Part%203%3A%20Advanced%20Concepts%7C11.%20Reliable%20Communications%7C11.3%20Using%20QosPolicies%20to%20Tune%20the%20Reliable%20Protocol%7C_____0#reliable_1394042328_776265).
215+
216+
For example, the ["heartbeat period"](https://community.rti.com/static/documentation/connext-dds/6.0.1/doc/manuals/connext_dds/html_files/RTI_ConnextDDS_CoreLibraries_UsersManual/Content/UsersManual/Controlling_Heartbeats_and_Retries.htm#reliable_1394042328_785637)
217+
is sped up from 3 seconds to 100 milliseconds.
218+
219+
These optimizations may be disabled using variable
220+
`RMW_CONNEXT_DISABLE_RELIABILITY_OPTIMIZATIONS`.
221+
210222
### RMW_CONNEXT_ENDPOINT_QOS_OVERRIDE_POLICY
211223
212224
When this variable is not set or set to `always`, the QoS settings specified in

rmw_connextdds_common/include/rmw_connextdds/context.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,9 @@ struct rmw_context_impl_s
100100
#if RMW_CONNEXT_DEFAULT_LARGE_DATA_OPTIMIZATIONS
101101
bool optimize_large_data{true};
102102
#endif /* RMW_CONNEXT_DEFAULT_LARGE_DATA_OPTIMIZATIONS */
103+
#if RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS
104+
bool optimize_reliability{true};
105+
#endif /* RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS */
103106

104107
enum class participant_qos_override_policy_t
105108
{

rmw_connextdds_common/include/rmw_connextdds/static_config.hpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@
8585
"RMW_CONNEXT_DISABLE_LARGE_DATA_OPTIMIZATIONS"
8686
#endif /* RMW_CONNEXT_ENV_DISABLE_LARGE_DATA_OPTIMIZATIONS */
8787

88+
#ifndef RMW_CONNEXT_ENV_DISABLE_RELIABILITY_OPTIMIZATIONS
89+
#define RMW_CONNEXT_ENV_DISABLE_RELIABILITY_OPTIMIZATIONS \
90+
"RMW_CONNEXT_DISABLE_RELIABILITY_OPTIMIZATIONS"
91+
#endif /* RMW_CONNEXT_ENV_DISABLE_RELIABILITY_OPTIMIZATIONS */
92+
8893
// TODO(security-wg): These are intended to be temporary, and need to be
8994
// refactored into a proper abstraction.
9095
#ifndef RMW_CONNEXT_ENV_SECURITY_LOG_FILE
@@ -226,6 +231,13 @@
226231
#define RMW_CONNEXT_TYPE_OBJECT_MAX_SERIALIZED_SIZE 65000
227232
#endif /* RMW_CONNEXT_TYPE_OBJECT_MAX_SERIALIZED_SIZE */
228233

234+
/******************************************************************************
235+
* Customize the RTPS reliability protocol to speed up its responsiveness.
236+
******************************************************************************/
237+
#ifndef RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS
238+
#define RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS 1
239+
#endif /* RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS */
240+
229241
/******************************************************************************
230242
* Automatically tune DataWriterQos to better handle reliable "large data".
231243
******************************************************************************/

rmw_connextdds_common/src/common/rmw_context.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1281,6 +1281,25 @@ rmw_api_connextdds_init(
12811281
RMW_CONNEXT_LOG_DEBUG_A("initial DDS peers: %s", initial_peers)
12821282
}
12831283

1284+
#if RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS
1285+
// Check if we should disable the optimizations for the RTPS reliability protocol
1286+
const char * disable_optimize_reliability_env = nullptr;
1287+
lookup_rc = rcutils_get_env(
1288+
RMW_CONNEXT_ENV_DISABLE_RELIABILITY_OPTIMIZATIONS,
1289+
&disable_optimize_reliability_env);
1290+
1291+
if (nullptr != lookup_rc || nullptr == disable_optimize_reliability_env) {
1292+
RMW_CONNEXT_LOG_ERROR_A_SET(
1293+
"failed to lookup from environment: "
1294+
"var=%s, "
1295+
"rc=%s ",
1296+
RMW_CONNEXT_ENV_DISABLE_RELIABILITY_OPTIMIZATIONS,
1297+
lookup_rc)
1298+
return RMW_RET_ERROR;
1299+
}
1300+
ctx->optimize_reliability = '\0' == disable_optimize_reliability_env[0];
1301+
#endif /* RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS */
1302+
12841303
if (nullptr == RMW_Connext_gv_DomainParticipantFactory) {
12851304
RMW_CONNEXT_ASSERT(1 == RMW_Connext_gv_ContextCount)
12861305
RMW_CONNEXT_LOG_DEBUG("initializing DDS DomainParticipantFactory")

rmw_connextdds_common/src/ndds/dds_api_ndds.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,32 @@ rmw_connextdds_get_datawriter_qos(
501501
qos->publish_mode.kind = DDS_ASYNCHRONOUS_PUBLISH_MODE_QOS;
502502
}
503503

504+
#if RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS
505+
// The default settings for the RTPS reliability protocol are not very
506+
// responsive, and they cause some unit tests to fail. These optimizations
507+
// have been derived from profile `Optimization.ReliabilityProtocol.Common`
508+
// available in Connext 6+. `Generic.StrictReliable` is the equivalent
509+
// profile in 5.3.1. Changes are limited to `DDS_RtpsReliableWriterProtocol_t`.
510+
if (ctx->optimize_reliability) {
511+
// All write() calls will block (for at most max_blocking_time) once the send_window
512+
// is filled with samples that haven't yet been acknowledged by all active readers.
513+
qos->protocol.rtps_reliable_writer.min_send_window_size = 40;
514+
qos->protocol.rtps_reliable_writer.max_send_window_size = 40; // fixed size window
515+
qos->protocol.rtps_reliable_writer.heartbeats_per_max_samples = 10; // 1 every 4
516+
qos->protocol.rtps_reliable_writer.heartbeat_period = {0, 200000000}; // 200ms
517+
qos->protocol.rtps_reliable_writer.late_joiner_heartbeat_period = {0, 20000000}; // 20ms
518+
qos->protocol.rtps_reliable_writer.fast_heartbeat_period = {0, 20000000}; // 20ms
519+
qos->protocol.rtps_reliable_writer.max_heartbeat_retries = 500; // 10s @ 50hz
520+
// Force the writer to reply immediately to ACKNACK's received from a writer.
521+
qos->protocol.rtps_reliable_writer.max_nack_response_delay = DDS_DURATION_ZERO;
522+
// When the number of unack'd samples reaches the high_watermark the fast_heartbeat_period
523+
// is used. When the number dips below the low_watermark, the heartbeat_period is used.
524+
// These numbers are tied to the send_window size.
525+
qos->protocol.rtps_reliable_writer.high_watermark = 25;
526+
qos->protocol.rtps_reliable_writer.low_watermark = 10;
527+
}
528+
#endif /* RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS */
529+
504530
#if RMW_CONNEXT_DEFAULT_LARGE_DATA_OPTIMIZATIONS
505531
// Unless disabled, optimize the DataWriter's reliability protocol to
506532
// better handle large data samples. These are *bounded* types whose
@@ -587,6 +613,17 @@ rmw_connextdds_get_datareader_qos(
587613
}
588614
}
589615

616+
#if RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS
617+
// The default settings for the RTPS reliability protocol are not very
618+
// responsive, and they cause some unit tests to fail. These optimizations
619+
// are dual to those applied in rmw_connextdds_get_datawriter_qos().
620+
// Changes are limited to `DDS_RtpsReliableReaderProtocol_t`.
621+
if (ctx->optimize_reliability) {
622+
qos->protocol.rtps_reliable_reader.min_heartbeat_response_delay = DDS_DURATION_ZERO;
623+
qos->protocol.rtps_reliable_reader.max_heartbeat_response_delay = DDS_DURATION_ZERO;
624+
}
625+
#endif /* RMW_CONNEXT_DEFAULT_RELIABILITY_OPTIMIZATIONS */
626+
590627
#if RMW_CONNEXT_DEFAULT_LARGE_DATA_OPTIMIZATIONS
591628
// Unless disabled, optimize the DataReader's reliability protocol to
592629
// better handle large data samples. These are *bounded* types whose

0 commit comments

Comments
 (0)