Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 62 additions & 53 deletions openwisp-monitoring/files/monitoring.agent
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,41 @@ show_version() {

echoerr() { echo "$@" 1>&2 && exit 1; }

log() {
level="$1"
type="$2"
shift 2

if [ "$type" = "-v" ] && [ "$VERBOSE_MODE" -ne "1" ]; then
return 0
fi

case "$level" in
-i) level=daemon.info ;;
-w) level=daemon.warn ;;
-e) level=daemon.err ;;
-*)
echoerr "Invalid message level : $level"
;;
esac

logger -s "$@" -p "$level" -t openwisp-monitoring
}

time_to_seconds() {
time=$1

{ [ "$time" -ge 1 ] 2>/dev/null && seconds="$time"; } \
|| { [ "${time%s}" -ge 1 ] 2>/dev/null && seconds="${time%s}"; } \
|| { [ "${time%m}" -ge 1 ] 2>/dev/null && seconds=$((${time%m} * 60)); } \
|| { [ "${time%h}" -ge 1 ] 2>/dev/null && seconds=$((${time%h} * 3600)); } \
|| { [ "${time%d}" -ge 1 ] 2>/dev/null && seconds=$((${time%d} * 86400)); }

echo "$seconds"
unset seconds
unset time
}

check_available_memory() {
while true; do
total=$(ubus call system info | jsonfilter -e '@.memory.total')
Expand All @@ -52,9 +87,7 @@ check_available_memory() {
if [ -f "$file" ]; then
rm "$file"
else
[ "$VERBOSE_MODE" -eq "1" ] \
&& logger -s "Not enough memory available, skipping collect data." \
-p daemon.warn
log -w -n "Not enough memory available, skipping collect data."
return 1
fi
fi
Expand All @@ -63,14 +96,11 @@ check_available_memory() {

collect_data() {
n=0
[ "$VERBOSE_MODE" -eq "1" ] && logger -s "Collecting NetJSON Monitoring data." \
-p daemon.info
log -i -v "Collecting NetJSON Monitoring data."
until [ "$n" -ge 5 ]; do
/usr/sbin/netjson-monitoring --dump "$MONITORED_INTERFACES" && break

if [ "$n" -eq 5 ]; then
[ "$VERBOSE_MODE" -eq "1" ] && logger -s "Collecting data failed!" -p daemon.err
fi
[ "$n" -eq 4 ] && log -e -v "Collecting data failed!"
n=$((n + 1))
sleep 5
done
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Expand Down Expand Up @@ -105,8 +135,7 @@ save_data() {
echo "$data" >"$TMP_DIR/$filename"
# compress data
gzip "$TMP_DIR/$filename"
[ "$VERBOSE_MODE" -eq "1" ] && logger -s "Data saved temporarily." \
-p daemon.info
log -i -v "Data saved temporarily."
fi
# get process id of the process sending data
pid=$(pgrep -f "openwisp-monitoring.*--mode send")
Expand All @@ -116,8 +145,7 @@ save_data() {
}

handle_sigusr1() {
[ "$VERBOSE_MODE" -eq "1" ] && logger -s "SIGUSR1 received! Sending data." \
-p daemon.info
log -i -v "SIGUSR1 received! Sending data."
return 0
}

Expand All @@ -126,8 +154,7 @@ send_data() {
while true; do
for file in "$TMP_DIR"/*; do
if [ ! -f "$file" ]; then
[ "$VERBOSE_MODE" -eq "1" ] && logger -s "No data file found to send." \
-p daemon.info
log -i -v "No data file found to send. Checking again in $INTERVAL seconds."
trap handle_sigusr1 USR1
# SIGUSR1 signal received, interrupt sleep and continue sending data
sleep "$INTERVAL" &
Expand All @@ -154,63 +181,50 @@ send_data() {
if [ -f "$filename" ]; then
data=$(cat "$filename")
else
[ "$VERBOSE_MODE" -eq "1" ] && logger -s "data file $filename not found." \
-p daemon.info
log -i -v "data file $filename not found."
continue
fi
while true; do
if [ "$failures" -eq "$MAX_RETRIES" ]; then
[ -f "$RESPONSE_FILE" ] && error_message="\"$(cat "$RESPONSE_FILE")\"" || error_message='"".'
if [ "$VERBOSE_MODE" -eq "1" ]; then
logger -s "Data not sent successfully. Response code is \"$response_code\"." \
"Error message is $error_message" \
-p daemon.err
elif [ "$FAILING" -eq "0" ]; then
log -e -v "Data not sent successfully. Response code is \"$response_code\"." \
"Error message is $error_message"
# check if agent was already passing or not to avoid repeating log messages
if [ "$FAILING" -eq "0" ]; then
FAILING=1
logger -s "Data not sent successfully. Response code is \"$response_code\"." \
"Error message is $error_message" \
"Run with verbose mode to find more." \
-t openwisp-monitoring \
-p daemon.err
[ "$VERBOSE_MODE" -ne "1" ] && log -e -n "Data not sent successfully. Response code is \"$response_code\"." \
"Run with verbose mode to find more."
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's still repetition here which makes the code hard to read.
I think we need to edit the log function to avoid this.

Ideally we could do something like:

log -e -n "Data not sent successfully. Response code is \"$response_code\"." \
    -v "Error message is $error_message"

The non verbose output should be printed also in verbose mode, while the verbose output is appended to the non verbose output but only when in verbose mode, so in non verbose mode we would have:

Data not sent successfully. Response code is "400".`

While in verbose mode we would have:

Data not sent successfully. Response code is "400". Error message is ERROR_MESSAGE_HERE.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The condition is here to ensure that the logs are not added unnecessarily when the connection is interrupted due to some issue for a long time.

Discussed here in "Non-verbose mode" #42 (review)

fi
break
fi
# send data
response_code=$($CURL_COMMAND -H "Content-Type: application/json" -d "$data" "$url")
if [ "$response_code" = "200" ]; then
success=$((success + 1))
if [ "$VERBOSE_MODE" -eq "1" ]; then
logger -s "Data sent successfully." \
-p daemon.info
elif [ "$FAILING" -eq "1" ]; then
logger -s "Data sent successfully." \
-t openwisp-monitoring \
-p daemon.info
log -i -v "Data sent successfully."
# check if agent was already failing or not to avoid repeating log messages
if [ "$FAILING" -eq "1" ]; then
FAILING=0
rm -f "$RESPONSE_FILE"
[ "$VERBOSE_MODE" -ne "1" ] && log -i -n "Data sent successfully."
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here: Data sent successfully. is repeated twice and there's another unnecessary if here.

Copy link
Copy Markdown
Member Author

@devkapilbansal devkapilbansal Jan 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this so that we don't fill logs uncessarily when data is transferred normally. In non-verbose mode, it will add this log "Data sent successfully" only once and the line will appear again if the connection is interrupted.
If the variable FAILING is set to 1, then only this line will be logged.

fi
# remove saved data
rm -f "$filename"
break
elif [ "$response_code" = "400" ]; then
logger -s "Data not sent successfully (HTTP status $response_code), discarding data." \
-t openwisp-monitoring \
-p daemon.err
log -i -n "Data not sent successfully (HTTP status $response_code), discarding data."
rm -f "$filename"
break
else
timeout=$(/usr/sbin/openwisp-get-random-number 2 15)
[ "$VERBOSE_MODE" -eq "1" ] && logger -s "Data not sent successfully. Retrying in $timeout seconds." \
-p daemon.warn
log -w -v "Data not sent successfully. Retrying in $timeout seconds."
failures=$((failures + 1))
if [ "$response_code" = "404" ]; then
# If we get a HTTP 404 response, it could mean that the device has been deleted from OpenWISP
# Controller. We check if openwisp-config agent is running to determine if the device has been
# deleted. If openwisp-config agent is not running, the monitoring agent will also exit.
if ! pgrep -x "openwisp-config" >/dev/null; then
logger -s "Giving up and shutting down: the device may have been deleted from OpenWISP Controller" \
-t openwisp-monitoring \
-p daemon.err
log -e -n "Giving up and shutting down: the device may have been deleted from OpenWISP Controller"
# get process id of the process collecting data
pid=$(pgrep -f "openwisp-monitoring.*--mode collect")
kill -SIGKILL "$pid"
Expand All @@ -236,17 +250,13 @@ wait_until_registered() {
if [ -n "$UUID" ] && [ -n "$KEY" ]; then
return 0
fi
logger -s "Waiting for device to register." \
-t openwisp-monitoring \
-p daemon.info
log -i -n "Waiting for device to register."
UUID=$(uci get openwisp.http.uuid 2>/dev/null)
KEY=$(uci get openwisp.http.key 2>/dev/null)
if [ -z "$UUID" ] || [ -z "$KEY" ]; then
return 1
fi
logger -s "Setting uuid and key." \
-t openwisp-monitoring \
-p daemon.info
log -i -n "Setting uuid and key."
export UUID KEY
}

Expand All @@ -256,9 +266,7 @@ bootup_delay() {
if [ "$BOOTUP_DELAY" -ne "0" ]; then
# get a random number between zero and $BOOTUP_DELAY
DELAY=$(/usr/sbin/openwisp-get-random-number 0 "$BOOTUP_DELAY")
logger "Delaying initialization of the monitoring agent for $DELAY seconds." \
-t openwisp-monitoring \
-p daemon.info
log -i -n "Delaying initialization of the monitoring agent for $DELAY seconds."
sleep "$DELAY"
fi
# send bootup hotplug event
Expand Down Expand Up @@ -329,15 +337,16 @@ main() {
shift
;;
-*)
echo "Invalid option: $1"
exit 1
echoerr "Invalid option: $1."
;;
*) break ;;
esac
shift
done

INTERVAL=${INTERVAL:-300}
INTERVAL="$(time_to_seconds "$INTERVAL")"
[ -z "$INTERVAL" ] && echoerr "Interval is invalid. Use time value(eg: '10', '2m', '3h', '1d')"
Comment thread
coderabbitai[bot] marked this conversation as resolved.
REGISTRATION_INTERVAL=$((INTERVAL / 10))
VERBOSE_MODE=${VERBOSE_MODE:-0}
BOOTUP_DELAY=${BOOTUP_DELAY:-10}
Expand Down Expand Up @@ -366,7 +375,7 @@ main() {
RESPONSE_FILE="$TMP_DIR"/response.txt
set_url_and_curl && send_data
else
echoerr "The supplied mode is invalid. Only send and collect are allowed"
echoerr "The supplied mode is invalid. Only send and collect are allowed."
fi
}

Expand Down
113 changes: 43 additions & 70 deletions openwisp-monitoring/files/monitoring.init
Original file line number Diff line number Diff line change
Expand Up @@ -4,97 +4,70 @@

# shellcheck disable=SC2034
START=99
STOP=15

USE_PROCD=1
PROG="/usr/sbin/openwisp-monitoring"
PROG_NAME="OpenWISP monitoring daemon"

time_to_seconds() {
time=$1

{ [ "$time" -ge 1 ] 2>/dev/null && seconds="$time"; } \
|| { [ "${time%s}" -ge 1 ] 2>/dev/null && seconds="${time%s}"; } \
|| { [ "${time%m}" -ge 1 ] 2>/dev/null && seconds=$((${time%m} * 60)); } \
|| { [ "${time%h}" -ge 1 ] 2>/dev/null && seconds=$((${time%h} * 3600)); } \
|| { [ "${time%d}" -ge 1 ] 2>/dev/null && seconds=$((${time%d} * 86400)); }
add_option() {
# shellcheck disable=SC3043
{
local cfg="$1"
local flag="$2"
local option="$3"
local default="$4"
local value
}

echo "$seconds"
unset seconds
unset time
config_get value "$cfg" "$option" "$default"
[ -n "$value" ] && procd_append_param command "$flag" "$value"
}

start_service() {
# for openwisp-config
config_load openwisp
config_get base_url http url
config_get uuid http uuid
config_get key http key
config_get cacert http cacert
config_get capath http capath
config_get_bool verify_ssl http verify_ssl "1"
config_get respawn_threshold http respawn_threshold
config_get respawn_timeout http respawn_timeout
config_get respawn_retry http respawn_retry

[ -n "$base_url" ] && base_url="--url $base_url"
[ -n "$uuid" ] && uuid="--uuid $uuid"
[ -n "$key" ] && key="--key $key"
[ -n "$cacert" ] && cacert="--cacert $cacert"
[ -n "$capath" ] && capath="--capath $capath"
[ -n "$verify_ssl" ] && verify_ssl="--verify_ssl $verify_ssl"

if [ -z "$base_url" ]; then
logger -s "url is not set, please add it to /etc/config/openwisp" \
-t openwisp-monitoring \
-p daemon.err
exit 1
fi

# for openwisp-monitoring

respawn_threshold=$(config_get http respawn_threshold)
respawn_timeout=$(config_get http respawn_timeout)
respawn_retry=$(config_get http respawn_retry)
Comment on lines +30 to +32
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

find . -type f -name "monitoring.init" | head -5

Repository: openwisp/openwrt-openwisp-monitoring

Length of output: 123


🏁 Script executed:

# Search for config_get function definition
rg "config_get" -B 3 -A 5 | head -100

Repository: openwisp/openwrt-openwisp-monitoring

Length of output: 1345


🏁 Script executed:

cat -n ./openwisp-monitoring/files/monitoring.init

Repository: openwisp/openwrt-openwisp-monitoring

Length of output: 2700


🌐 Web query:

OpenWrt config_get function behavior libuci.sh

💡 Result:

Summary — config_get (libuci / functions.sh)

  • What it does: config_get

    [default] reads the option value for from the in-memory UCI config loaded by config_load and stores it in the shell variable named by . It does not call the uci binary each time — it reads the data exported to the environment by config_load. [1][2]

  • Lists: If the option is a UCI list, config_get returns the list items joined by spaces (e.g. "item1 item2"). This can break if list items contain spaces; use config_list_foreach to iterate list elements safely. [3]

  • Boolean helper: config_get_bool parses common truthy/falsey strings ("on","off","yes","no","true","false","enabled","disabled") and returns 1 or 0. [1]

  • Key difference vs uci_get: config_get operates on the config already loaded by config_load (faster, no disk I/O); uci_get queries the config files each call. Use config_get when you call config_load once and access multiple options. [2]

References:

  • OpenWrt scripting docs (config/list behavior). [1]
  • Explanation of config_/uci_ differences and implementation details. [2]
  • Notes about list handling and config_list_foreach. [3]

If you want, I can show the exact function implementation lines from your OpenWrt version (need target release or a copy of /lib/functions.sh).


config_get in command substitution drops respawn values.

config_get assigns to a variable and does not output; $(config_get …) leaves respawn_threshold, respawn_timeout, and respawn_retry empty. The values are then always replaced by defaults in lines 51 and 63: "${respawn_threshold:-3600}".

🛠️ Proposed fix
-respawn_threshold=$(config_get http respawn_threshold)
-respawn_timeout=$(config_get http respawn_timeout)
-respawn_retry=$(config_get http respawn_retry)
+config_get respawn_threshold http respawn_threshold
+config_get respawn_timeout http respawn_timeout
+config_get respawn_retry http respawn_retry
🤖 Prompt for AI Agents
In `@openwisp-monitoring/files/monitoring.init` around lines 30 - 32, The three
variables are being set with command substitution which yields nothing because
config_get assigns variables rather than printing them; replace the
"$(config_get ...)" usages with direct calls to config_get so that config_get
assigns respawn_threshold, respawn_timeout, and respawn_retry (i.e., call
config_get http respawn_threshold, config_get http respawn_timeout, config_get
http respawn_retry) and keep the existing default fallbacks
("${respawn_threshold:-3600}", etc.) intact.


procd_open_instance "openwisp-monitoring_send_data"
procd_set_param command $PROG

add_option "http" "--url" url
add_option "http" "--uuid" uuid
add_option "http" "--key" key
add_option "http" "--cacert" cacert
add_option "http" "--capath" capath
add_option "http" "--verify_ssl" verify_ssl "1"

config_load openwisp-monitoring
config_get monitored_interfaces monitoring monitored_interfaces "*"
config_get interval monitoring interval "300"
config_get_bool verbose_mode monitoring verbose_mode "0"
config_get required_memory monitoring required_memory "0.05"
config_get max_retries monitoring max_retries "5"
config_get bootup_delay monitoring bootup_delay "10"

interval="$(time_to_seconds "$interval")"
if [ "$interval" -lt 1 ]; then
logger -s "Interval is invalid. Use time value(eg: '10', '2m', '3h', '1d')" \
-t openwisp-monitoring \
-p daemon.err
exit 1
fi
interval="--interval $interval"
verbose="--verbose_mode ${verbose_mode:-0}"
required_memory="--required_memory $required_memory"
max_retries="--max_retries $max_retries"
bootup_delay="--bootup_delay $bootup_delay"

procd_open_instance "openwisp-monitoring_collect_data"
# shellcheck disable=SC2086,SC2154
procd_set_param command $PROG $interval $verbose $required_memory --mode collect --monitored_interfaces "$monitored_interfaces"
add_option "monitoring" "--verbose_mode" verbose_mode "0"
add_option "monitoring" "--max_retries" max_retries "5"
add_option "monitoring" "--interval" interval "300"
procd_append_param command "--mode" "send"

procd_set_param respawn "${respawn_threshold:-3600}" "${respawn_timeout:-5}" "${respawn_retry:-5}"
[ "$verbose_mode" -eq "1" ] && procd_set_param stdout 1 && procd_set_param stderr 1
procd_close_instance

procd_open_instance "openwisp-monitoring_send_data"
# shellcheck disable=SC2086
procd_set_param command $PROG $base_url $uuid $key $cacert $capath $verify_ssl $interval $verbose $max_retries $bootup_delay --mode send
procd_open_instance "openwisp-monitoring_collect_data"
procd_set_param command $PROG

add_option "monitoring" "--monitored_interfaces" monitored_interfaces "*"
add_option "monitoring" "--required_memory" required_memory "0.05"
add_option "monitoring" "--verbose_mode" verbose_mode "0"
add_option "monitoring" "--interval" interval "300"
procd_append_param command "--mode" "collect"

procd_set_param respawn "${respawn_threshold:-3600}" "${respawn_timeout:-5}" "${respawn_retry:-5}"
[ "$verbose_mode" -eq "1" ] && procd_set_param stdout 1 && procd_set_param stderr 1
procd_close_instance

logger -s "$PROG_NAME started" \
-t openwisp-monitoring \
-p daemon.info
logger -s "$PROG_NAME started." -t openwisp-monitoring -p daemon.info
}

stop_service() {
logger -s "$PROG_NAME stopping" \
-t openwisp-monitoring \
-p daemon.info
logger -s "$PROG_NAME stopping." -t openwisp-monitoring -p daemon.info
}

service_triggers() {
Expand Down