Skip to content

Commit

Permalink
Merge pull request #365 from BSWANG/add-tcp-rebalance
Browse files Browse the repository at this point in the history
add tcp rebalance
  • Loading branch information
l1b0k authored Jun 1, 2022
2 parents b081a22 + a5df24c commit 0439efc
Showing 1 changed file with 110 additions and 0 deletions.
110 changes: 110 additions & 0 deletions policy/cilium/0009-tcp-rebalance.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
From 9d9ee57aed9b09109d3c08e2da077d6a6a8fa03b Mon Sep 17 00:00:00 2001
From: Amol Ambekar <[email protected]>
Date: Thu, 14 Apr 2022 15:23:25 -0700
Subject: [PATCH] rebalance

---
bpf/lib/conntrack.h | 34 +++++++++++++++++++++++++++++
bpf/node_config.h | 1 +
bpf/tests/bpf_ct_tests.c | 1 +
pkg/datapath/linux/config/config.go | 1 +
4 files changed, 37 insertions(+)

diff --git a/bpf/lib/conntrack.h b/bpf/lib/conntrack.h
index 6c303dbbb5..c663733055 100644
--- a/bpf/lib/conntrack.h
+++ b/bpf/lib/conntrack.h
@@ -183,6 +183,23 @@ static __always_inline bool ct_entry_alive(const struct ct_entry *entry)
return !entry->rx_closing || !entry->tx_closing;
}

+static __always_inline bool ct_entry_closing(const struct ct_entry *entry)
+{
+ return entry->tx_closing || entry->rx_closing;
+}
+
+static __always_inline bool
+ct_entry_closing_wait_before_rebalance(const struct ct_entry *entry)
+{
+ __u32 now = bpf_mono_now();
+ __u32 wait_time = bpf_sec_to_mono(CT_SERVICE_CLOSE_REBALANCE);
+
+ /* This doesn't check last_rx_report because we don't see closing
+ * in RX direction for CT_SERVICE.
+ */
+ return READ_ONCE(entry->last_tx_report) + wait_time >= now;
+}
+
/* Helper for holding 2nd service entry alive in nodeport case. */
static __always_inline bool __ct_entry_keep_alive(const void *map,
const void *tuple)
@@ -222,6 +239,22 @@ static __always_inline __u8 __ct_lookup(const void *map, struct __ctx_buff *ctx,
entry = map_lookup_elem(map, tuple);
if (entry) {
cilium_dbg(ctx, DBG_CT_MATCH, entry->lifetime, entry->rev_nat_index);
+ #ifdef HAVE_LARGE_INSN_LIMIT
+ if (dir == CT_SERVICE &&
+ ct_entry_closing(entry) &&
+ (seen_flags.value & TCP_FLAG_SYN) &&
+ !ct_entry_closing_wait_before_rebalance(entry)) {
+ /* There is an existing entry for this service. However,
+ * the old connection was already closed in the past.
+ * Since this is a new connection, we want it to pick a
+ * new backend. Hence don't reopen this entry if it's
+ * been longer than CT_SERVICE_CLOSE_REBALANCE seconds.
+ * (CT_SERVICE_CLOSE_REBALANCE is a grace period for any
+ * in-flight packets related to the old connection).
+ */
+ goto ct_new;
+ }
+ #endif
if (ct_entry_alive(entry))
*monitor = ct_update_timeout(entry, is_tcp, dir, seen_flags);
if (ct_state) {
@@ -290,6 +323,7 @@ static __always_inline __u8 __ct_lookup(const void *map, struct __ctx_buff *ctx,
return CT_ESTABLISHED;
}

+ct_new: __maybe_unused
*monitor = TRACE_PAYLOAD_LEN;
return CT_NEW;
}
diff --git a/bpf/node_config.h b/bpf/node_config.h
index 2f22721841..d8c12d4815 100644
--- a/bpf/node_config.h
+++ b/bpf/node_config.h
@@ -39,6 +39,7 @@ DEFINE_IPV6(HOST_IP, 0xbe, 0xef, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0xa, 0x
#define CT_CONNECTION_LIFETIME_NONTCP 60
#define CT_SERVICE_LIFETIME_TCP 21600
#define CT_SERVICE_LIFETIME_NONTCP 60
+#define CT_SERVICE_CLOSE_REBALANCE 30
#define CT_SYN_TIMEOUT 60
#define CT_CLOSE_TIMEOUT 10
#define CT_REPORT_INTERVAL 5
diff --git a/bpf/tests/bpf_ct_tests.c b/bpf/tests/bpf_ct_tests.c
index 9211c3067a..d28d002cd8 100644
--- a/bpf/tests/bpf_ct_tests.c
+++ b/bpf/tests/bpf_ct_tests.c
@@ -29,6 +29,7 @@
#define CT_CONNECTION_LIFETIME_NONTCP 60
#define CT_SERVICE_LIFETIME_TCP 21600
#define CT_SERVICE_LIFETIME_NONTCP 60
+#define CT_SERVICE_CLOSE_REBALANCE 30
#define CT_SYN_TIMEOUT 60
#define CT_CLOSE_TIMEOUT 10
#define CT_REPORT_INTERVAL 5
diff --git a/pkg/datapath/linux/config/config.go b/pkg/datapath/linux/config/config.go
index 0b4ae87c51..d433a6f52e 100644
--- a/pkg/datapath/linux/config/config.go
+++ b/pkg/datapath/linux/config/config.go
@@ -159,6 +159,7 @@ func (h *HeaderfileWriter) WriteNodeConfig(w io.Writer, cfg *datapath.LocalNodeC
cDefinesMap["CT_CONNECTION_LIFETIME_NONTCP"] = fmt.Sprintf("%d", int64(option.Config.CTMapEntriesTimeoutAny.Seconds()))
cDefinesMap["CT_SERVICE_LIFETIME_TCP"] = fmt.Sprintf("%d", int64(option.Config.CTMapEntriesTimeoutSVCTCP.Seconds()))
cDefinesMap["CT_SERVICE_LIFETIME_NONTCP"] = fmt.Sprintf("%d", int64(option.Config.CTMapEntriesTimeoutSVCAny.Seconds()))
+ cDefinesMap["CT_SERVICE_CLOSE_REBALANCE"] = fmt.Sprintf("%d", int64(option.Config.CTMapEntriesTimeoutSVCAny.Seconds()))
cDefinesMap["CT_SYN_TIMEOUT"] = fmt.Sprintf("%d", int64(option.Config.CTMapEntriesTimeoutSYN.Seconds()))
cDefinesMap["CT_CLOSE_TIMEOUT"] = fmt.Sprintf("%d", int64(option.Config.CTMapEntriesTimeoutFIN.Seconds()))
cDefinesMap["CT_REPORT_INTERVAL"] = fmt.Sprintf("%d", int64(option.Config.MonitorAggregationInterval.Seconds()))
--
2.32.0 (Apple Git-132)

0 comments on commit 0439efc

Please sign in to comment.