Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(WiP) feat: Support multiple destinations for a healthcheck #2704

Open
wants to merge 11 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci-lite.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
with:
semantic_version: 18
extra_plugins: |
@semantic-release/exec
@semantic-release/exec@v6.0.3
@semantic-release/git
semantic-release-helm
@google/[email protected]
Expand Down Expand Up @@ -430,7 +430,7 @@ jobs:
with:
semantic_version: 18
extra_plugins: |
@semantic-release/exec
@semantic-release/exec@v6.0.3
@semantic-release/git
semantic-release-helm
@google/[email protected]
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/ci-main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
with:
semantic_version: 18
extra_plugins: |
@semantic-release/exec
@semantic-release/exec@v6.0.3
@semantic-release/git
semantic-release-helm
@google/[email protected]
Expand Down Expand Up @@ -456,7 +456,7 @@ jobs:
with:
semantic_version: 18
extra_plugins: |
@semantic-release/exec
@semantic-release/exec@v6.0.3
@semantic-release/git
semantic-release-helm
@google/[email protected]
Expand Down
41 changes: 31 additions & 10 deletions package/sbin/healthcheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import os
import subprocess
import re

app = Flask(__name__)

Expand All @@ -14,11 +15,24 @@ def str_to_bool(value):
'yes'
}

def get_list_of_destinations():
found_destinations = []
regex = r"^SC4S_DEST_SPLUNK_HEC_(.*)_URL$"

for env_var_key, _ in os.environ.items():
if re.search(regex, env_var_key):
found_destinations.append(env_var_key)

print("dest")
print(found_destinations)
return found_destinations

class Config:
SC4S_DEST_SPLUNK_HEC_DEFAULT_URL = os.getenv('SC4S_DEST_SPLUNK_HEC_DEFAULT_URL')
HEALTHCHECK_PORT = int(os.getenv('SC4S_LISTEN_STATUS_PORT', '8080'))
CHECK_QUEUE_SIZE = str_to_bool(os.getenv('HEALTHCHECK_CHECK_QUEUE_SIZE', "false"))
MAX_QUEUE_SIZE = int(os.getenv('HEALTHCHECK_MAX_QUEUE_SIZE', '10000'))
DESTINATIONS = get_list_of_destinations()

logging.basicConfig(
format=f"%(asctime)s - healthcheck.py - %(levelname)s - %(message)s",
Expand Down Expand Up @@ -48,11 +62,11 @@ def check_syslog_ng_health() -> bool:
return False

def check_queue_size(
sc4s_dest_splunk_hec_default=Config.SC4S_DEST_SPLUNK_HEC_DEFAULT_URL,
sc4s_dest_splunk_hec_destinations=Config.DESTINATIONS,
max_queue_size=Config.MAX_QUEUE_SIZE
) -> bool:
"""Check syslog-ng queue size and compare it against the configured maximum limit."""
if not sc4s_dest_splunk_hec_default:
if not sc4s_dest_splunk_hec_destinations:
logger.error(
"SC4S_DEST_SPLUNK_HEC_DEFAULT_URL not configured. "
"Ensure the default HEC destination is set, or disable HEALTHCHECK_CHECK_QUEUE_SIZE."
Expand All @@ -71,15 +85,22 @@ def check_queue_size(
return False

stats = result.stdout.splitlines()
destination_stat = next(
(s for s in stats if ";queued;" in s and sc4s_dest_splunk_hec_default in s),
None
)
if not destination_stat:
logger.error("No matching queue stats found for the destination URL.")
return False

queue_size = int(destination_stat.split(";")[-1])
# destination_stat = []
queue_sizes = []
for destination in sc4s_dest_splunk_hec_destinations:
destination_found = next(
(s for s in stats if ";queued;" in s and destination in s),
None
)
if destination_found:
queue_sizes.append(int(destination_found.split(";")[-1]))

# if not destination_stat:
# logger.error("No matching queue stats found for the destination URL.")
# return False

queue_size = max(queue_sizes)
if queue_size > max_queue_size:
logger.warning(
f"Queue size {queue_size} exceeds the maximum limit of {max_queue_size}."
Expand Down
24 changes: 23 additions & 1 deletion tests/test_healthcheck_unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
check_syslog_ng_health,
subprocess,
check_queue_size,
get_list_of_destinations,
)

# str_to_bool
Expand Down Expand Up @@ -139,4 +140,25 @@ def test_health_endpoint_no_queue_check(mock_run, client):

response = client.get("/health")
assert response.status_code == 200
assert response.json["status"] == "healthy"
assert response.json["status"] == "healthy"

@patch.dict(
os.environ,
{
"SC4S_DEST_SPLUNK_HEC_DEFAULT_URL": "http://test1:1234",
"SC4S_DEST_SPLUNK_HEC_OTHER_URL": "http://test2:1234",
"SOME_OTHER_URL": "http://test3:1234",
"SC4S_LISTEN_STATUS_PORT": "1234",
},
clear=True
)
@patch("subprocess.run")
def test_list_destinations(mock_run):
"""
If syslog-ng-ctl stats command fails (returncode != 0), check_queue_size should fail.
"""
destinations = get_list_of_destinations()

assert len(destinations) == 2

assert 1 == 2
Loading