Skip to content

SAGE-749 locates and saves wifi-adapter loc on bus to reset on network stack reset #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ROOTFS/etc/waggle/nw/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ check_successive_seconds = 5.0
rssh_addrs = [ ('beehive', 'beehive', 20022), ('beekeeper', 'beehive.honeyhouse.one', 49190) ]
network_services = [ "NetworkManager", "ModemManager", "waggle-reverse-tunnel", "waggle-bk-reverse-tunnel" ]
sd_card_storage_loc = /media/scratch
wifi_adapter_loc_file = /etc/waggle/nw/adapter_loc_file
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since its only valid for this boot you can use a tmpfs path like /run/waggle/nw


[network-reboot]
num_resets = 9
Expand Down
62 changes: 61 additions & 1 deletion ROOTFS/usr/bin/waggle_network_watchdog.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import NamedTuple, Callable
import json
import ast
import os

MEDIA_MMC = 0
MEDIA_SD = 1
Expand Down Expand Up @@ -66,6 +67,7 @@ class NetworkWatchdogConfig(NamedTuple):
current_media: int
rssh_addrs: list
network_services: list
adapter_loc_file: str
network_resets: list
network_num_resets: int
network_reset_file: str
Expand Down Expand Up @@ -127,6 +129,7 @@ def read_network_watchdog_config(filename):
hard_num_resets=int(hard_reset_settings.get("num_resets", 0)),
hard_reset_file=sd_card_storage_loc+hard_reset_settings.get("current_reset_file", None),

adapter_loc_file=sd_card_storage_loc+all_settings.get("wifi_adapter_loc_file",None),
rssh_addrs=list(ast.literal_eval(all_settings.get("rssh_addrs",None))),
network_services=json.loads(all_settings.get("network_services",None)),
check_seconds=float(all_settings.get("check_seconds", 15.0)),
Expand Down Expand Up @@ -182,13 +185,68 @@ def fix_modem_port_settings():
subprocess.run(["chown", "root:root"] + ports)
subprocess.run(["chmod", "660"] + ports)

def write_wifi_adapter_loc_safe(wifi_adapt_save_file, adapter_loc):
try:
with open(wifi_adapt_save_file, 'w') as f:
f.write('%s' % adapter_loc)
except Exception:
logging.warning("Unable to write to file: %s", wifi_adapt_save_file)

def read_wifi_adapter_loc_safe(wifi_adapt_save_file):
try:
with open(wifi_adapt_save_file, 'r') as f:
return f.readline()
except Exception:
logging.warning("Unable to read from file: %s", wifi_adapt_save_file)
return ''

def locate_wifi_adapter(nwwd_config):
wifi_path = '/sys/class/net/wifi0/device/driver'

if not os.path.isdir(wifi_path):
logging.info("No WiFi adapter found")
return

#creating adapter loc file to read/write from
if not Path(nwwd_config.adapter_loc_file).exists():
last_dir_index = nwwd_config.adapter_loc_file.rfind("/")
folder = nwwd_config.adapter_loc_file[:last_dir_index]

Path(folder).mkdir(parents=True, exist_ok=True)
Path(nwwd_config.adapter_loc_file).touch()

#sorting through dir, usb devices start with #s so alphabetically first
dirDriver = os.listdir('/sys/class/net/wifi0/device/driver')
dirDriver.sort()
usb_addr = dirDriver[0].split(':')[0]

if usb_addr != read_wifi_adapter_loc_safe(nwwd_config.adapter_loc_file):
write_wifi_adapter_loc_safe(nwwd_config.adapter_loc_file, usb_addr)
logging.info("New WiFi Adapter found @ %s on usb bus", usb_addr)

def fix_wifi_adapter(nwwd_config):
usb_addr = read_wifi_adapter_loc_safe(nwwd_config.adapter_loc_file)

if usb_addr == '':
logging.info("No WiFi adapter known skipping adapter reset")
return

cmd_bind = 'echo ' + usb_addr + ' > /sys/bus/usb/drivers/usb/bind'
cmd_unbind = 'echo ' + usb_addr + ' > /sys/bus/usb/drivers/usb/unbind'

subprocess.Popen(cmd_unbind, shell=True, stdout=subprocess.PIPE)
time.sleep(1)
subprocess.Popen(cmd_bind, shell=True, stdout=subprocess.PIPE)

logging.info("WiFi adapter virtually reset")

# NOTE Revisit how much of the network stack we should restart. For now, I want to cover all
# cases of wifi and modems and ssh tunnel issues.
def restart_network_services(nwwd_config):
logging.warning("restarting network services")

fix_modem_port_settings()
fix_wifi_adapter(nwwd_config)

# restart network services
subprocess.run( ['systemctl', 'restart'] + nwwd_config.network_services )
Expand Down Expand Up @@ -323,12 +381,14 @@ def health_check():

health = health or curServerHealth
logging.debug(f"Reporting ssh connection of {alias} as {curServerHealth}")

try:
subprocess.check_call(["waggle-publish-metric", "sys.rssh_up", str(int(curServerHealth)), "--meta", "server=" + alias])
except Exception:
logging.warning("waggle-publish-metric not found. no metrics will be published")

locate_wifi_adapter(nwwd_config)

return health

def health_check_passed(timer):
Expand Down