-
Notifications
You must be signed in to change notification settings - Fork 156
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #365 from BSWANG/add-tcp-rebalance
add tcp rebalance
- Loading branch information
Showing
1 changed file
with
110 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
From 9d9ee57aed9b09109d3c08e2da077d6a6a8fa03b Mon Sep 17 00:00:00 2001 | ||
From: Amol Ambekar <[email protected]> | ||
Date: Thu, 14 Apr 2022 15:23:25 -0700 | ||
Subject: [PATCH] rebalance | ||
|
||
--- | ||
bpf/lib/conntrack.h | 34 +++++++++++++++++++++++++++++ | ||
bpf/node_config.h | 1 + | ||
bpf/tests/bpf_ct_tests.c | 1 + | ||
pkg/datapath/linux/config/config.go | 1 + | ||
4 files changed, 37 insertions(+) | ||
|
||
diff --git a/bpf/lib/conntrack.h b/bpf/lib/conntrack.h | ||
index 6c303dbbb5..c663733055 100644 | ||
--- a/bpf/lib/conntrack.h | ||
+++ b/bpf/lib/conntrack.h | ||
@@ -183,6 +183,23 @@ static __always_inline bool ct_entry_alive(const struct ct_entry *entry) | ||
return !entry->rx_closing || !entry->tx_closing; | ||
} | ||
|
||
+static __always_inline bool ct_entry_closing(const struct ct_entry *entry) | ||
+{ | ||
+ return entry->tx_closing || entry->rx_closing; | ||
+} | ||
+ | ||
+static __always_inline bool | ||
+ct_entry_closing_wait_before_rebalance(const struct ct_entry *entry) | ||
+{ | ||
+ __u32 now = bpf_mono_now(); | ||
+ __u32 wait_time = bpf_sec_to_mono(CT_SERVICE_CLOSE_REBALANCE); | ||
+ | ||
+ /* This doesn't check last_rx_report because we don't see closing | ||
+ * in RX direction for CT_SERVICE. | ||
+ */ | ||
+ return READ_ONCE(entry->last_tx_report) + wait_time >= now; | ||
+} | ||
+ | ||
/* Helper for holding 2nd service entry alive in nodeport case. */ | ||
static __always_inline bool __ct_entry_keep_alive(const void *map, | ||
const void *tuple) | ||
@@ -222,6 +239,22 @@ static __always_inline __u8 __ct_lookup(const void *map, struct __ctx_buff *ctx, | ||
entry = map_lookup_elem(map, tuple); | ||
if (entry) { | ||
cilium_dbg(ctx, DBG_CT_MATCH, entry->lifetime, entry->rev_nat_index); | ||
+ #ifdef HAVE_LARGE_INSN_LIMIT | ||
+ if (dir == CT_SERVICE && | ||
+ ct_entry_closing(entry) && | ||
+ (seen_flags.value & TCP_FLAG_SYN) && | ||
+ !ct_entry_closing_wait_before_rebalance(entry)) { | ||
+ /* There is an existing entry for this service. However, | ||
+ * the old connection was already closed in the past. | ||
+ * Since this is a new connection, we want it to pick a | ||
+ * new backend. Hence don't reopen this entry if it's | ||
+ * been longer than CT_SERVICE_CLOSE_REBALANCE seconds. | ||
+ * (CT_SERVICE_CLOSE_REBALANCE is a grace period for any | ||
+ * in-flight packets related to the old connection). | ||
+ */ | ||
+ goto ct_new; | ||
+ } | ||
+ #endif | ||
if (ct_entry_alive(entry)) | ||
*monitor = ct_update_timeout(entry, is_tcp, dir, seen_flags); | ||
if (ct_state) { | ||
@@ -290,6 +323,7 @@ static __always_inline __u8 __ct_lookup(const void *map, struct __ctx_buff *ctx, | ||
return CT_ESTABLISHED; | ||
} | ||
|
||
+ct_new: __maybe_unused | ||
*monitor = TRACE_PAYLOAD_LEN; | ||
return CT_NEW; | ||
} | ||
diff --git a/bpf/node_config.h b/bpf/node_config.h | ||
index 2f22721841..d8c12d4815 100644 | ||
--- a/bpf/node_config.h | ||
+++ b/bpf/node_config.h | ||
@@ -39,6 +39,7 @@ DEFINE_IPV6(HOST_IP, 0xbe, 0xef, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0xa, 0x | ||
#define CT_CONNECTION_LIFETIME_NONTCP 60 | ||
#define CT_SERVICE_LIFETIME_TCP 21600 | ||
#define CT_SERVICE_LIFETIME_NONTCP 60 | ||
+#define CT_SERVICE_CLOSE_REBALANCE 30 | ||
#define CT_SYN_TIMEOUT 60 | ||
#define CT_CLOSE_TIMEOUT 10 | ||
#define CT_REPORT_INTERVAL 5 | ||
diff --git a/bpf/tests/bpf_ct_tests.c b/bpf/tests/bpf_ct_tests.c | ||
index 9211c3067a..d28d002cd8 100644 | ||
--- a/bpf/tests/bpf_ct_tests.c | ||
+++ b/bpf/tests/bpf_ct_tests.c | ||
@@ -29,6 +29,7 @@ | ||
#define CT_CONNECTION_LIFETIME_NONTCP 60 | ||
#define CT_SERVICE_LIFETIME_TCP 21600 | ||
#define CT_SERVICE_LIFETIME_NONTCP 60 | ||
+#define CT_SERVICE_CLOSE_REBALANCE 30 | ||
#define CT_SYN_TIMEOUT 60 | ||
#define CT_CLOSE_TIMEOUT 10 | ||
#define CT_REPORT_INTERVAL 5 | ||
diff --git a/pkg/datapath/linux/config/config.go b/pkg/datapath/linux/config/config.go | ||
index 0b4ae87c51..d433a6f52e 100644 | ||
--- a/pkg/datapath/linux/config/config.go | ||
+++ b/pkg/datapath/linux/config/config.go | ||
@@ -159,6 +159,7 @@ func (h *HeaderfileWriter) WriteNodeConfig(w io.Writer, cfg *datapath.LocalNodeC | ||
cDefinesMap["CT_CONNECTION_LIFETIME_NONTCP"] = fmt.Sprintf("%d", int64(option.Config.CTMapEntriesTimeoutAny.Seconds())) | ||
cDefinesMap["CT_SERVICE_LIFETIME_TCP"] = fmt.Sprintf("%d", int64(option.Config.CTMapEntriesTimeoutSVCTCP.Seconds())) | ||
cDefinesMap["CT_SERVICE_LIFETIME_NONTCP"] = fmt.Sprintf("%d", int64(option.Config.CTMapEntriesTimeoutSVCAny.Seconds())) | ||
+ cDefinesMap["CT_SERVICE_CLOSE_REBALANCE"] = fmt.Sprintf("%d", int64(option.Config.CTMapEntriesTimeoutSVCAny.Seconds())) | ||
cDefinesMap["CT_SYN_TIMEOUT"] = fmt.Sprintf("%d", int64(option.Config.CTMapEntriesTimeoutSYN.Seconds())) | ||
cDefinesMap["CT_CLOSE_TIMEOUT"] = fmt.Sprintf("%d", int64(option.Config.CTMapEntriesTimeoutFIN.Seconds())) | ||
cDefinesMap["CT_REPORT_INTERVAL"] = fmt.Sprintf("%d", int64(option.Config.MonitorAggregationInterval.Seconds())) | ||
-- | ||
2.32.0 (Apple Git-132) | ||
|