-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
handle unresponsive nodes #474
Changes from 6 commits
e11d09e
be070fb
901df8a
1b1d204
c0dd103
e536898
13c5c17
344a595
f7abdbe
b2ea119
b3ee8f2
f3171cc
ea2d3cb
20f85c3
74aa125
31221c8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
use crate::eip::v2::Eip; | ||
use crate::Error; | ||
use k8s_openapi::api::core::v1::Node; | ||
use kube::api::{Api, ListParams, Patch, PatchParams}; | ||
use std::collections::{BTreeMap, BTreeSet}; | ||
use tracing::{info, instrument}; | ||
|
||
use crate::EGRESS_GATEWAY_NODE_SELECTOR_LABEL_KEY; | ||
use crate::EGRESS_GATEWAY_NODE_SELECTOR_LABEL_VALUE; | ||
|
||
/// Applies label to node specifying the status of the egress gateway node. | ||
#[instrument(skip(api), err)] | ||
async fn add_gateway_status_label( | ||
api: &Api<Node>, | ||
name: &str, | ||
status: &str, | ||
) -> Result<Node, kube::Error> { | ||
info!( | ||
"Adding gateway status label {} value {} to node {}", | ||
crate::EGRESS_NODE_STATUS_LABEL, | ||
status, | ||
name | ||
); | ||
let patch = serde_json::json!({ | ||
"apiVersion": "v1", | ||
"kind": "Node", | ||
"metadata": { | ||
"labels": { | ||
// Ensure the status is lowercase to match conventions | ||
crate::EGRESS_NODE_STATUS_LABEL: status.to_lowercase(), | ||
} | ||
} | ||
}); | ||
let patch = Patch::Apply(&patch); | ||
let params = PatchParams::apply(crate::FIELD_MANAGER); | ||
api.patch(name, ¶ms, &patch).await | ||
} | ||
|
||
/// Retrieve all egress nodes in the cluster. | ||
async fn get_egress_nodes(api: &Api<Node>) -> Result<Vec<Node>, kube::Error> { | ||
let params = ListParams::default().labels( | ||
format!( | ||
"{}={}", | ||
EGRESS_GATEWAY_NODE_SELECTOR_LABEL_KEY, EGRESS_GATEWAY_NODE_SELECTOR_LABEL_VALUE | ||
) | ||
.as_str(), | ||
); | ||
|
||
match api.list(¶ms).await { | ||
Ok(node_list) => Ok(node_list.items), | ||
Err(e) => Err(e), | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We probably don't need this if we're using the node selector on the eip. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Much cleaner and removed the need for some CONST labels in egress improvements |
||
|
||
/// Update state label on egress nodes. | ||
/// Note: Egress traffic will be immediately dropped when the status label is changed away from "true". | ||
#[instrument(skip(), err)] | ||
pub(crate) async fn label_egress_nodes( | ||
eip_api: &Api<Eip>, | ||
node_api: &Api<Node>, | ||
) -> Result<(), Error> { | ||
info!("Updating egress node status labels."); | ||
let node_list = get_egress_nodes(&Api::all(node_api.clone().into())).await?; | ||
if node_list.is_empty() { | ||
info!("No egress nodes found. Skipping egress cleanup."); | ||
return Ok(()); | ||
} | ||
|
||
let node_names_and_status: BTreeMap<String, String> = | ||
crate::node::get_nodes_ready_status(node_list)?; | ||
let (nodes_status_ready, nodes_status_unknown): (BTreeSet<String>, BTreeSet<String>) = | ||
node_names_and_status.iter().fold( | ||
evanharmon marked this conversation as resolved.
Show resolved
Hide resolved
|
||
(BTreeSet::new(), BTreeSet::new()), | ||
|(mut ready, mut unknown), (name, status)| { | ||
match status.as_str() { | ||
"True" => { | ||
ready.insert(name.clone()); | ||
} | ||
"Unknown" => { | ||
unknown.insert(name.clone()); | ||
} | ||
// Ignore nodes in other states. | ||
&_ => { | ||
info!("Ignoring node {} with status {}", name, status); | ||
} | ||
} | ||
(ready, unknown) | ||
}, | ||
); | ||
|
||
// Ensure an egress node exists with an EIP and Ready state of `true`. | ||
let eip_resource_ids: BTreeSet<String> = eip_api | ||
.list(&ListParams::default()) | ||
.await? | ||
.items | ||
.into_iter() | ||
.filter_map(|eip| eip.status.and_then(|s| s.resource_id)) | ||
.collect(); | ||
let matched_ready_nodes_with_eip: BTreeSet<String> = nodes_status_ready | ||
.intersection(&eip_resource_ids) | ||
.cloned() | ||
.collect(); | ||
|
||
if matched_ready_nodes_with_eip.is_empty() { | ||
info!("No ready egress nodes found with EIPs. Skipping egress labeling."); | ||
return Ok(()); | ||
} | ||
|
||
info!( | ||
"Found ready egress nodes with EIPs: {:?}", | ||
matched_ready_nodes_with_eip | ||
); | ||
// Set egress status for nodes ready with an EIP attached. | ||
for node_name in nodes_status_ready { | ||
evanharmon marked this conversation as resolved.
Show resolved
Hide resolved
|
||
add_gateway_status_label(node_api, &node_name, "ready").await?; | ||
} | ||
// Attempt cleanup of nodes in a ready state of `Unknown` if another node is ready with an EIP. | ||
for node_name in nodes_status_unknown { | ||
add_gateway_status_label(node_api, &node_name, "unknown").await?; | ||
} | ||
|
||
Ok(()) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,16 @@ | ||
use std::collections::{HashMap, HashSet}; | ||
use std::pin::pin; | ||
use std::time::Duration; | ||
|
||
use aws_sdk_ec2::types::Filter; | ||
use aws_sdk_ec2::Client as Ec2Client; | ||
use aws_sdk_servicequotas::types::ServiceQuota; | ||
use aws_sdk_servicequotas::Client as ServiceQuotaClient; | ||
use futures::future::join_all; | ||
use futures::TryStreamExt; | ||
use json_patch::{PatchOperation, RemoveOperation, TestOperation}; | ||
use k8s_controller::Controller; | ||
use k8s_openapi::api::core::v1::Pod; | ||
use k8s_openapi::api::core::v1::{Node, Pod}; | ||
use kube::api::{Api, ListParams, Patch, PatchParams}; | ||
use kube::{Client, ResourceExt}; | ||
use tokio::task; | ||
|
@@ -20,8 +22,10 @@ use eip::v2::Eip; | |
|
||
mod aws; | ||
mod controller; | ||
mod egress; | ||
mod eip; | ||
mod kube_ext; | ||
mod node; | ||
|
||
const LEGACY_MANAGE_EIP_LABEL: &str = "eip.aws.materialize.com/manage"; | ||
const LEGACY_POD_FINALIZER_NAME: &str = "eip.aws.materialize.com/disassociate"; | ||
|
@@ -30,6 +34,9 @@ const FIELD_MANAGER: &str = "eip.materialize.cloud"; | |
const AUTOCREATE_EIP_LABEL: &str = "eip.materialize.cloud/autocreate_eip"; | ||
const EIP_ALLOCATION_ID_ANNOTATION: &str = "eip.materialize.cloud/allocation_id"; | ||
const EXTERNAL_DNS_TARGET_ANNOTATION: &str = "external-dns.alpha.kubernetes.io/target"; | ||
const EGRESS_GATEWAY_NODE_SELECTOR_LABEL_KEY: &str = "workload"; | ||
const EGRESS_GATEWAY_NODE_SELECTOR_LABEL_VALUE: &str = "materialize-egress"; | ||
const EGRESS_NODE_STATUS_LABEL: &str = "egress-gateway.materialize.cloud/status"; | ||
|
||
// See https://us-east-1.console.aws.amazon.com/servicequotas/home/services/ec2/quotas | ||
// and filter in the UI for EC2 quotas like this, or use the CLI: | ||
|
@@ -89,6 +96,9 @@ async fn run() -> Result<(), Error> { | |
None => Api::<Eip>::all(k8s_client.clone()), | ||
}; | ||
|
||
debug!("Getting node api"); | ||
let node_api = Api::<Node>::all(k8s_client.clone()); | ||
|
||
debug!("Cleaning up any orphaned EIPs"); | ||
cleanup_orphan_eips( | ||
&ec2_client, | ||
|
@@ -149,6 +159,30 @@ async fn run() -> Result<(), Error> { | |
task::spawn(eip_controller.run()) | ||
}); | ||
|
||
tasks.push({ | ||
let eip_api = eip_api.clone(); | ||
let node_api = node_api.clone(); | ||
let watch_config = | ||
kube_runtime::watcher::Config::default().labels(EGRESS_GATEWAY_NODE_SELECTOR_LABEL_KEY); | ||
|
||
task::spawn(async move { | ||
let mut watcher = pin!(kube_runtime::watcher(node_api.clone(), watch_config)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hm I'd think we'd want to watch separately, this watcher is also self-triggering, where setting the egress gateway status will trigger another reconciliation. not a correctness issue if it converges, but could create some read/write amplification or hard-to-debug issues if it does anything unexpected There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good find! The watcher is now correctly configured on Eip resources. |
||
|
||
while let Some(node) = watcher.try_next().await.unwrap_or_else(|e| { | ||
event!(Level::ERROR, err = %e, "Error watching nodes"); | ||
None | ||
}) { | ||
if let kube_runtime::watcher::Event::Applied(_) | ||
| kube_runtime::watcher::Event::Deleted(_) = node | ||
{ | ||
if let Err(err) = crate::egress::label_egress_nodes(&eip_api, &node_api).await { | ||
event!(Level::ERROR, err = %err, "Node egress labeling reporting error"); | ||
} | ||
} | ||
} | ||
}) | ||
}); | ||
|
||
join_all(tasks).await; | ||
|
||
debug!("exiting"); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
use std::collections::BTreeMap; | ||
|
||
use eip_operator_shared::Error; | ||
use k8s_openapi::api::core::v1::Node; | ||
|
||
/// Get Ready status from the node status field. | ||
pub(crate) fn get_ready_status_from_node(node: &Node) -> Option<String> { | ||
node.status | ||
.as_ref()? | ||
.conditions | ||
.as_ref()? | ||
.iter() | ||
.find(|c| c.type_ == "Ready") | ||
.map(|condition| condition.status.clone()) | ||
} | ||
|
||
/// Retrieve node names and ready status given a list of nodes. | ||
pub(crate) fn get_nodes_ready_status( | ||
node_list: Vec<Node>, | ||
) -> Result<BTreeMap<String, String>, Error> { | ||
let mut node_ready_status_map = BTreeMap::new(); | ||
|
||
for node in node_list { | ||
if let Some(ref node_name) = node.metadata.name { | ||
let ready_status = | ||
get_ready_status_from_node(&node).ok_or(Error::MissingNodeReadyCondition)?; | ||
|
||
node_ready_status_map.insert(node_name.to_string(), ready_status); | ||
} | ||
} | ||
|
||
Ok(node_ready_status_map) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
probably enough to say that it will soon be archived or
will soon be archived and is no longer under active development.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cleaned up the wording per node improvements. I left in the warning portion as I found that in another MZ public archive as a practice.