Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 60 additions & 48 deletions openwisp-monitoring/files/monitoring.agent
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,41 @@ show_version() {

echoerr() { echo "$@" 1>&2 && exit 1; }

log() {
level="$1"
type="$2"
shift 2

if [ "$type" = "-v" ] && [ "$VERBOSE_MODE" -ne "1" ]; then
return 0
fi

case "$level" in
-i) level=daemon.info ;;
-w) level=daemon.warn ;;
-e) level=daemon.err ;;
-*)
echoerr "Invalid message level : $level"
;;
esac

logger -s "$@" -p "$level" -t openwisp-monitoring
}

time_to_seconds() {
time=$1

{ [ "$time" -ge 1 ] 2>/dev/null && seconds="$time"; } \
|| { [ "${time%s}" -ge 1 ] 2>/dev/null && seconds="${time%s}"; } \
|| { [ "${time%m}" -ge 1 ] 2>/dev/null && seconds=$((${time%m} * 60)); } \
|| { [ "${time%h}" -ge 1 ] 2>/dev/null && seconds=$((${time%h} * 3600)); } \
|| { [ "${time%d}" -ge 1 ] 2>/dev/null && seconds=$((${time%d} * 86400)); }

echo $seconds
unset seconds
unset time
}

check_available_memory() {
while true; do
total=$(ubus call system info | jsonfilter -e '@.memory.total')
Expand All @@ -50,9 +85,7 @@ check_available_memory() {
if [ -f "$file" ]; then
rm "$file"
else
[ "$VERBOSE_MODE" -eq "1" ] \
&& logger -s "Not enough memory available, skipping collect data." \
-p daemon.warn
log -w -n "Not enough memory available, skipping collect data."
return 1
fi
fi
Expand All @@ -61,14 +94,11 @@ check_available_memory() {

collect_data() {
n=0
[ "$VERBOSE_MODE" -eq "1" ] && logger -s "Collecting NetJSON Monitoring data." \
-p daemon.info
log -i -v "Collecting NetJSON Monitoring data."
until [ "$n" -ge 5 ]; do
/usr/sbin/netjson-monitoring --dump "$MONITORED_INTERFACES" && break

if [ "$n" -eq 5 ]; then
[ "$VERBOSE_MODE" -eq "1" ] && logger -s "Collecting data failed!" -p daemon.err
fi
[ "$n" -eq 5 ] && log -e -v "Collecting data failed!"
n=$((n + 1))
sleep 5
done
Expand Down Expand Up @@ -101,8 +131,7 @@ save_data() {
echo "$data" >"$TMP_DIR/$filename"
# compress data
gzip "$TMP_DIR/$filename"
[ "$VERBOSE_MODE" -eq "1" ] && logger -s "Data saved temporarily." \
-p daemon.info
log -i -v "Data saved temporarily."
fi
# get process id of the process sending data
pid=$(pgrep -f "openwisp-monitoring.*--mode send")
Expand All @@ -112,17 +141,15 @@ save_data() {
}

handle_sigusr1() {
[ "$VERBOSE_MODE" -eq "1" ] && logger -s "SIGUSR1 received! Sending data." \
-p daemon.info
log -i -v "SIGUSR1 received! Sending data."
return 0
}

send_data() {
while true; do
for file in "$TMP_DIR"/*; do
if [ ! -f "$file" ]; then
[ "$VERBOSE_MODE" -eq "1" ] && logger -s "No data file found to send." \
-p daemon.info
log -i -v "No data file found to send. Checking again in $INTERVAL seconds."
trap handle_sigusr1 USR1
# SIGUSR1 signal received, interrupt sleep and continue sending data
sleep "$INTERVAL" &
Expand All @@ -149,46 +176,36 @@ send_data() {
while true; do
if [ "$failures" -eq "$MAX_RETRIES" ]; then
[ -f "$RESPONSE_FILE" ] && error_message="\"$(cat "$RESPONSE_FILE")\"" || error_message='"".'
if [ "$VERBOSE_MODE" -eq "1" ]; then
logger -s "Data not sent successfully. Response code is \"$response_code\"." \
"Error message is $error_message" \
-p daemon.err
elif [ "$FAILING" -eq "0" ]; then
log -e -v "Data not sent successfully. Response code is \"$response_code\"." \
"Error message is $error_message"
# check if agent was already passing or not to avoid repeating log messages
if [ "$FAILING" -eq "0" ]; then
FAILING=1
logger -s "Data not sent successfully. Response code is \"$response_code\"." \
"Error message is $error_message" \
"Run with verbose mode to find more." \
-t openwisp-monitoring \
-p daemon.err
[ "$VERBOSE_MODE" -ne "1" ] && log -e -n "Data not sent successfully. Response code is \"$response_code\"." \
"Run with verbose mode to find more."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's still repetition here which makes the code hard to read.
I think we need to edit the log function to avoid this.

Ideally we could do something like:

log -e -n "Data not sent successfully. Response code is \"$response_code\"." \
    -v "Error message is $error_message"

The non verbose output should be printed also in verbose mode, while the verbose output is appended to the non verbose output but only when in verbose mode, so in non verbose mode we would have:

Data not sent successfully. Response code is "400".`

While in verbose mode we would have:

Data not sent successfully. Response code is "400". Error message is ERROR_MESSAGE_HERE.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The condition is here to ensure that the logs are not added unnecessarily when the connection is interrupted due to some issue for a long time.

Discussed here in "Non-verbose mode" #42 (review)

fi
break
fi
# send data
response_code=$($CURL_COMMAND -H "Content-Type: application/json" -d "$data" "$url")
if [ "$response_code" = "200" ]; then
if [ "$VERBOSE_MODE" -eq "1" ]; then
logger -s "Data sent successfully." \
-p daemon.info
elif [ "$FAILING" -eq "1" ]; then
logger -s "Data sent successfully." \
-t openwisp-monitoring \
-p daemon.info
log -i -v "Data sent successfully."
# check if agent was already failing or not to avoid repeating log messages
if [ "$FAILING" -eq "1" ]; then
FAILING=0
rm -f "$RESPONSE_FILE"
[ "$VERBOSE_MODE" -ne "1" ] && log -i -n "Data sent successfully."
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here: Data sent successfully. is repeated twice and there's another unnecessary if here.

Copy link
Member Author

@devkapilbansal devkapilbansal Jan 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this so that we don't fill logs uncessarily when data is transferred normally. In non-verbose mode, it will add this log "Data sent successfully" only once and the line will appear again if the connection is interrupted.
If the variable FAILING is set to 1, then only this line will be logged.

fi
# remove saved data
rm -f "$filename"
break
elif [ "$response_code" = "400" ]; then
logger -s "Data not sent successfully (HTTP status $response_code), discarding data." \
-t openwisp-monitoring \
-p daemon.err
log -i -n "Data not sent successfully (HTTP status $response_code), discarding data."
rm -f "$filename"
break
else
timeout=$(/usr/sbin/openwisp-get-random-number 2 15)
[ "$VERBOSE_MODE" -eq "1" ] && logger -s "Data not sent successfully. Retrying in $timeout seconds." \
-p daemon.warn
log -w -v "Data not sent successfully. Retrying in $timeout seconds."
failures=$((failures + 1))
sleep "$timeout"
fi
Expand All @@ -203,17 +220,13 @@ wait_until_registered() {
if [ -n "$UUID" ] && [ -n "$KEY" ]; then
return 0
fi
logger -s "Waiting for device to register." \
-t openwisp-monitoring \
-p daemon.info
log -i -n "Waiting for device to register."
UUID=$(uci get openwisp.http.uuid 2>/dev/null)
KEY=$(uci get openwisp.http.key 2>/dev/null)
if [ -z "$UUID" ] || [ -z "$KEY" ]; then
return 1
fi
logger -s "Setting uuid and key." \
-t openwisp-monitoring \
-p daemon.info
log -i -n "Setting uuid and key."
export UUID KEY
}

Expand All @@ -223,9 +236,7 @@ bootup_delay() {
if [ "$BOOTUP_DELAY" -ne "0" ]; then
# get a random number between zero and $BOOTUP_DELAY
DELAY=$(/usr/sbin/openwisp-get-random-number 0 "$BOOTUP_DELAY")
logger "Delaying initialization of the monitoring agent for $DELAY seconds." \
-t openwisp-monitoring \
-p daemon.info
log -i -n "Delaying initialization of the monitoring agent for $DELAY seconds."
sleep "$DELAY"
fi
# send bootup hotplug event
Expand Down Expand Up @@ -288,8 +299,7 @@ main() {
shift
;;
-*)
echo "Invalid option: $1"
exit 1
echoerr "Invalid option: $1."
;;
*) break ;;
esac
Expand All @@ -298,6 +308,8 @@ main() {

INTERVAL=${INTERVAL:-300}
REGISTRATION_INTERVAL=$((INTERVAL / 10))
INTERVAL="$(time_to_seconds "$INTERVAL")"
[ -z "$INTERVAL" ] && echoerr "Interval is invalid. Use time value(eg: '10', '2m', '3h', '1d')"
VERBOSE_MODE=${VERBOSE_MODE:-0}
BOOTUP_DELAY=${BOOTUP_DELAY:-10}
TMP_DIR="/tmp/openwisp/monitoring"
Expand Down Expand Up @@ -325,7 +337,7 @@ main() {
RESPONSE_FILE="$TMP_DIR"/response.txt
set_url_and_curl && send_data
else
echoerr "The supplied mode is invalid. Only send and collect are allowed"
echoerr "The supplied mode is invalid. Only send and collect are allowed."
fi
}

Expand Down
107 changes: 41 additions & 66 deletions openwisp-monitoring/files/monitoring.init
Original file line number Diff line number Diff line change
Expand Up @@ -4,93 +4,68 @@

# shellcheck disable=SC2034
START=99
STOP=15

USE_PROCD=1
PROG="/usr/sbin/openwisp-monitoring"
PROG_NAME="OpenWISP monitoring daemon"

time_to_seconds() {
time=$1

{ [ "$time" -ge 1 ] 2>/dev/null && seconds="$time"; } \
|| { [ "${time%s}" -ge 1 ] 2>/dev/null && seconds="${time%s}"; } \
|| { [ "${time%m}" -ge 1 ] 2>/dev/null && seconds=$((${time%m} * 60)); } \
|| { [ "${time%h}" -ge 1 ] 2>/dev/null && seconds=$((${time%h} * 3600)); } \
|| { [ "${time%d}" -ge 1 ] 2>/dev/null && seconds=$((${time%d} * 86400)); }
add_option() {
# shellcheck disable=SC3043
{
local cfg="$1"
local flag="$2"
local option="$3"
local default="$4"
local value
}

echo $seconds
unset seconds
unset time
config_get value "$cfg" "$option" "$default"
[ -n "$value" ] && procd_append_param command "$flag" "$value"
}

start_service() {
# for openwisp-config
config_load openwisp
config_get base_url http url
config_get uuid http uuid
config_get key http key
config_get_bool verify_ssl http verify_ssl "1"
config_get respawn_threshold http respawn_threshold
config_get respawn_timeout http respawn_timeout
config_get respawn_retry http respawn_retry

[ -n "$base_url" ] && base_url="--url $base_url"
[ -n "$uuid" ] && uuid="--uuid $uuid"
[ -n "$key" ] && key="--key $key"
[ -n "$verify_ssl" ] && verify_ssl="--verify_ssl $verify_ssl"

if [ -z "$base_url" ]; then
logger -s "url is not set, please add it to /etc/config/openwisp" \
-t openwisp-monitoring \
-p daemon.err
exit 1
fi

# for openwisp-monitoring

respawn_threshold=$(config_get http respawn_threshold)
respawn_timeout=$(config_get http respawn_timeout)
respawn_retry=$(config_get http respawn_retry)

procd_open_instance "openwisp-monitoring_send_data"
procd_set_param command $PROG

add_option "http" "--url" url
add_option "http" "--uuid" uuid
add_option "http" "--key" key
add_option "http" "--verify_ssl" verify_ssl "1"

config_load openwisp-monitoring
config_get monitored_interfaces monitoring monitored_interfaces "*"
config_get interval monitoring interval "300"
config_get_bool verbose_mode monitoring verbose_mode "0"
config_get required_memory monitoring required_memory "0.05"
config_get max_retries monitoring max_retries "5"
config_get bootup_delay monitoring bootup_delay "10"

interval="$(time_to_seconds "$interval")"
if [ "$interval" -lt 1 ]; then
logger -s "Interval is invalid. Use time value(eg: '10', '2m', '3h', '1d')" \
-t openwisp-monitoring \
-p daemon.err
exit 1
fi
interval="--interval $interval"
verbose="--verbose_mode ${verbose_mode:-0}"
required_memory="--required_memory $required_memory"
max_retries="--max_retries $max_retries"
bootup_delay="--bootup_delay $bootup_delay"

procd_open_instance "openwisp-monitoring_collect_data"
# shellcheck disable=SC2086,SC2154
procd_set_param command $PROG $interval $verbose $required_memory --mode collect --monitored_interfaces "$monitored_interfaces"
add_option "monitoring" "--verbose_mode" verbose_mode "0"
add_option "monitoring" "--max_retries" max_retries "5"
add_option "monitoring" "--interval" interval "300"
procd_append_param command "--mode" "send"

procd_set_param respawn "${respawn_threshold:-3600}" "${respawn_timeout:-5}" "${respawn_retry:-5}"
[ "$verbose_mode" -eq "1" ] && procd_set_param stdout 1 && procd_set_param stderr 1
procd_close_instance

procd_open_instance "openwisp-monitoring_send_data"
# shellcheck disable=SC2086
procd_set_param command $PROG $base_url $uuid $key $verify_ssl $interval $verbose $max_retries $bootup_delay --mode send
procd_open_instance "openwisp-monitoring_collect_data"
procd_set_param command $PROG

add_option "monitoring" "--monitored_interfaces" monitored_interfaces "*"
add_option "monitoring" "--required_memory" required_memory "0.05"
add_option "monitoring" "--verbose_mode" verbose_mode "0"
add_option "monitoring" "--interval" interval "300"
procd_append_param command "--mode" "collect"

procd_set_param respawn "${respawn_threshold:-3600}" "${respawn_timeout:-5}" "${respawn_retry:-5}"
[ "$verbose_mode" -eq "1" ] && procd_set_param stdout 1 && procd_set_param stderr 1
procd_close_instance

logger -s "$PROG_NAME started" \
-t openwisp-monitoring \
-p daemon.info
logger -s "$PROG_NAME started." -t openwisp-monitoring -p daemon.info
}

stop_service() {
logger -s "$PROG_NAME stopping" \
-t openwisp-monitoring \
-p daemon.info
logger -s "$PROG_NAME stopping." -t openwisp-monitoring -p daemon.info
}

service_triggers() {
Expand Down