As per Sean's ideas, here's my updated script that will give some juicy details next time the bug hits. Forgive the perl everywhere, I'm waaay more comfortable in perl than bash.
I'll report back when I get a hit.
internet-keep-up #!/bin/bash # # hack b/c Shaw's dhcp is farked up and hoses me
# don't thrash on initial boot, easiest to just sleep a long time sleep 300
log=/var/log/internet-keep-up.log
while : do
ping -c 1 -i 3 -w 15 anothershawhost.likeyourfriendshouse.ca >/dev/null 2>&1 || {
ping -c 1 -i 3 -w 15 130.179.16.8 >/dev/null 2>&1 || {
date >>$log 2>&1
# dump routing table netstat -rn >>$log 2>&1
# can we ping our next hop? nexthop=$(netstat -rn | perl -ne 'print($1),$f++ if /^0.0.0.0\s+(\S+)/; END { print "127.0.0.1" if !$f }') ping -c 3 -i 3 -w 10 $nexthop >>$log 2>&1
arp |grep $nexthop >>$log 2>&1
ifconfig eth0 >>$log 2>&1
tcpdump -c 5 -i eth0 >>$log 2>&1 & sleep 30 # cheesy kill -- can't remember how to get child pid in bash kill $(ps -ef | grep 'tcpdump -c 5 -i eth0' | grep -v grep | head -1 | perl -pe 's/^\S+\s+(\d+).*/$1/') >>$log 2>&1
ifdown eth0 >/dev/null 2>&1 sleep 2 ifup eth0 >/dev/null 2>&1 sleep 2
mail -s 'internet-keep-up: had to restart internet' you@yourhost.ca </dev/null >/dev/null 2>&1
}
sleep 30
}
done