Skip to content

Commit d5b4428

Browse files
committed
Redo counting of active_resources
1 parent 3f4a4ab commit d5b4428

File tree

1 file changed

+44
-2
lines changed

1 file changed

+44
-2
lines changed

heartbeat/podman-etcd

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,6 +1029,48 @@ get_peer_node_name() {
10291029
crm_node -l | awk '{print $2}' | grep -v "$NODENAME"
10301030
}
10311031

1032+
# Calculate the count of truly active resources by excluding those being stopped.
1033+
# According to Pacemaker documentation, during "Post-notification (stop) /
1034+
# Pre-notification (start)" transitions, the true active resource count should be:
1035+
# Active resources = $OCF_RESKEY_CRM_meta_notify_active_resource
1036+
# minus $OCF_RESKEY_CRM_meta_notify_stop_resource
1037+
# This handles the case where a resource appears in both the active and stop lists
1038+
# during rapid restart scenarios (e.g., process crash recovery).
1039+
get_truly_active_resources_count() {
1040+
local active_list="$OCF_RESKEY_CRM_meta_notify_active_resource"
1041+
local stop_list="$OCF_RESKEY_CRM_meta_notify_stop_resource"
1042+
local truly_active=""
1043+
1044+
# If no active resources, return 0
1045+
if [ -z "$active_list" ]; then
1046+
echo "0"
1047+
return
1048+
fi
1049+
1050+
# If no resources being stopped, return count of active resources
1051+
if [ -z "$stop_list" ]; then
1052+
echo "$active_list" | wc -w
1053+
return
1054+
fi
1055+
1056+
# Filter out resources that are being stopped from the active list
1057+
for resource in $active_list; do
1058+
local is_stopping=0
1059+
for stop_resource in $stop_list; do
1060+
if [ "$resource" = "$stop_resource" ]; then
1061+
is_stopping=1
1062+
break
1063+
fi
1064+
done
1065+
if [ $is_stopping -eq 0 ]; then
1066+
truly_active="$truly_active $resource"
1067+
fi
1068+
done
1069+
1070+
# Count the truly active resources (trim leading space and count words)
1071+
echo "$truly_active" | wc -w
1072+
}
1073+
10321074
get_all_etcd_endpoints() {
10331075
for node in $(echo "$OCF_RESKEY_node_ip_map" | sed "s/\s//g;s/;/ /g"); do
10341076
name=$(echo "$node" | cut -d: -f1)
@@ -1529,8 +1571,8 @@ podman_start()
15291571
# - 0 active agents, 1 starting: we are starting; the peer is not starting
15301572
# - 0 active agents, 2 starting: both agents are starting simultaneously
15311573
local active_resources_count
1532-
active_resources_count=$(echo "$OCF_RESKEY_CRM_meta_notify_active_resource" | wc -w)
1533-
ocf_log info "found '$active_resources_count' active etcd resources (meta notify environment variable: '$OCF_RESKEY_CRM_meta_notify_active_resource')"
1574+
active_resources_count=$(get_truly_active_resources_count)
1575+
ocf_log info "found '$active_resources_count' active etcd resources (active: '$OCF_RESKEY_CRM_meta_notify_active_resource', stop: '$OCF_RESKEY_CRM_meta_notify_stop_resource')"
15341576
case "$active_resources_count" in
15351577
1)
15361578
if [ "$(attribute_learner_node get)" = "$(get_peer_node_name)" ]; then

0 commit comments

Comments
 (0)