Initial commit: Network monitoring script for OPNsense/TP-Link diagnostics
This commit is contained in:
259
network_monitor.py
Normal file
259
network_monitor.py
Normal file
@@ -0,0 +1,259 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Network Monitoring Script for OPNsense/TP-Link Diagnostics
|
||||||
|
Monitors multiple IPs for connectivity issues over a specified duration
|
||||||
|
"""
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import time
|
||||||
|
import datetime
|
||||||
|
from collections import defaultdict
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
MONITOR_IPS = {
|
||||||
|
"10.0.0.254": "OPNsense Firewall",
|
||||||
|
"10.0.0.253": "TP-Link AX5400 (253)",
|
||||||
|
"10.0.0.252": "TP-Link Device (252)",
|
||||||
|
"10.0.0.55": "DNS/Other Service"
|
||||||
|
}
|
||||||
|
|
||||||
|
PING_INTERVAL = 5 # seconds between ping checks
|
||||||
|
MONITOR_DURATION = 3600 # 1 hour in seconds
|
||||||
|
PING_TIMEOUT = 2 # seconds to wait for ping response
|
||||||
|
PING_COUNT = 1 # number of pings per check
|
||||||
|
|
||||||
|
|
||||||
|
class NetworkMonitor:
|
||||||
|
def __init__(self, ips, duration, interval):
|
||||||
|
self.ips = ips
|
||||||
|
self.duration = duration
|
||||||
|
self.interval = interval
|
||||||
|
self.start_time = None
|
||||||
|
self.end_time = None
|
||||||
|
|
||||||
|
# Statistics tracking
|
||||||
|
self.stats = {ip: {
|
||||||
|
'total_checks': 0,
|
||||||
|
'successes': 0,
|
||||||
|
'failures': 0,
|
||||||
|
'failure_times': [],
|
||||||
|
'consecutive_failures': 0,
|
||||||
|
'max_consecutive_failures': 0,
|
||||||
|
'last_status': None
|
||||||
|
} for ip in ips.keys()}
|
||||||
|
|
||||||
|
def ping(self, ip):
|
||||||
|
"""Ping an IP address and return True if successful"""
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
['ping', '-c', str(PING_COUNT), '-W', str(PING_TIMEOUT), ip],
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
timeout=PING_TIMEOUT + 1
|
||||||
|
)
|
||||||
|
return result.returncode == 0
|
||||||
|
except (subprocess.TimeoutExpired, Exception):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def check_all_ips(self):
|
||||||
|
"""Check all monitored IPs"""
|
||||||
|
results = {}
|
||||||
|
for ip, name in self.ips.items():
|
||||||
|
success = self.ping(ip)
|
||||||
|
results[ip] = success
|
||||||
|
|
||||||
|
# Update statistics
|
||||||
|
self.stats[ip]['total_checks'] += 1
|
||||||
|
|
||||||
|
if success:
|
||||||
|
self.stats[ip]['successes'] += 1
|
||||||
|
self.stats[ip]['consecutive_failures'] = 0
|
||||||
|
self.stats[ip]['last_status'] = 'UP'
|
||||||
|
else:
|
||||||
|
self.stats[ip]['failures'] += 1
|
||||||
|
self.stats[ip]['consecutive_failures'] += 1
|
||||||
|
self.stats[ip]['failure_times'].append(datetime.datetime.now())
|
||||||
|
self.stats[ip]['last_status'] = 'DOWN'
|
||||||
|
|
||||||
|
# Track max consecutive failures
|
||||||
|
if self.stats[ip]['consecutive_failures'] > self.stats[ip]['max_consecutive_failures']:
|
||||||
|
self.stats[ip]['max_consecutive_failures'] = self.stats[ip]['consecutive_failures']
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def print_status(self, elapsed_time, results):
|
||||||
|
"""Print current status update"""
|
||||||
|
clear_screen()
|
||||||
|
print("=" * 80)
|
||||||
|
print(f"Network Connectivity Monitor - {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
|
print("=" * 80)
|
||||||
|
print(f"\nMonitoring Duration: {elapsed_time:.0f}s / {self.duration}s ({(elapsed_time/self.duration)*100:.1f}%)")
|
||||||
|
print(f"Next check in: {self.interval}s\n")
|
||||||
|
|
||||||
|
print("-" * 80)
|
||||||
|
print(f"{'IP Address':<15} {'Device':<25} {'Status':<10} {'Success Rate':<15}")
|
||||||
|
print("-" * 80)
|
||||||
|
|
||||||
|
for ip, name in self.ips.items():
|
||||||
|
status = "🟢 UP" if results[ip] else "🔴 DOWN"
|
||||||
|
total = self.stats[ip]['total_checks']
|
||||||
|
successes = self.stats[ip]['successes']
|
||||||
|
rate = (successes / total * 100) if total > 0 else 0
|
||||||
|
|
||||||
|
print(f"{ip:<15} {name:<25} {status:<10} {rate:>6.2f}% ({successes}/{total})")
|
||||||
|
|
||||||
|
print("-" * 80)
|
||||||
|
|
||||||
|
# Show any current issues
|
||||||
|
issues = [ip for ip, result in results.items() if not result]
|
||||||
|
if issues:
|
||||||
|
print("\n⚠️ CURRENT ISSUES:")
|
||||||
|
for ip in issues:
|
||||||
|
consecutive = self.stats[ip]['consecutive_failures']
|
||||||
|
print(f" • {ip} ({self.ips[ip]}): {consecutive} consecutive failures")
|
||||||
|
else:
|
||||||
|
print("\n✅ All monitored devices are UP")
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
def print_final_report(self):
|
||||||
|
"""Print final summary report"""
|
||||||
|
clear_screen()
|
||||||
|
print("=" * 80)
|
||||||
|
print("FINAL MONITORING REPORT")
|
||||||
|
print("=" * 80)
|
||||||
|
print(f"Start Time: {self.start_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
|
print(f"End Time: {self.end_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
|
print(f"Duration: {(self.end_time - self.start_time).total_seconds():.0f} seconds")
|
||||||
|
print("=" * 80)
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Overall summary
|
||||||
|
print("DEVICE SUMMARY:")
|
||||||
|
print("-" * 80)
|
||||||
|
print(f"{'IP Address':<15} {'Device':<25} {'Success Rate':<15} {'Total Failures':<15}")
|
||||||
|
print("-" * 80)
|
||||||
|
|
||||||
|
for ip, name in self.ips.items():
|
||||||
|
stats = self.stats[ip]
|
||||||
|
rate = (stats['successes'] / stats['total_checks'] * 100) if stats['total_checks'] > 0 else 0
|
||||||
|
|
||||||
|
status_icon = "✅" if stats['failures'] == 0 else ("⚠️" if rate >= 95 else "❌")
|
||||||
|
print(f"{ip:<15} {name:<25} {status_icon} {rate:>6.2f}% {stats['failures']:>14}")
|
||||||
|
|
||||||
|
print("-" * 80)
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Detailed statistics
|
||||||
|
print("DETAILED STATISTICS:")
|
||||||
|
print("=" * 80)
|
||||||
|
|
||||||
|
for ip, name in self.ips.items():
|
||||||
|
stats = self.stats[ip]
|
||||||
|
print(f"\n{name} ({ip}):")
|
||||||
|
print(f" Total Checks: {stats['total_checks']}")
|
||||||
|
print(f" Successful: {stats['successes']} ({stats['successes']/stats['total_checks']*100:.2f}%)")
|
||||||
|
print(f" Failed: {stats['failures']} ({stats['failures']/stats['total_checks']*100:.2f}%)")
|
||||||
|
print(f" Max Consecutive Failures: {stats['max_consecutive_failures']}")
|
||||||
|
|
||||||
|
if stats['failure_times']:
|
||||||
|
print(f" First Failure: {stats['failure_times'][0].strftime('%H:%M:%S')}")
|
||||||
|
print(f" Last Failure: {stats['failure_times'][-1].strftime('%H:%M:%S')}")
|
||||||
|
|
||||||
|
# Group failures into incidents (gaps > 30 seconds = separate incident)
|
||||||
|
incidents = []
|
||||||
|
current_incident = [stats['failure_times'][0]]
|
||||||
|
|
||||||
|
for i in range(1, len(stats['failure_times'])):
|
||||||
|
time_gap = (stats['failure_times'][i] - stats['failure_times'][i-1]).total_seconds()
|
||||||
|
if time_gap <= 30: # Same incident
|
||||||
|
current_incident.append(stats['failure_times'][i])
|
||||||
|
else: # New incident
|
||||||
|
incidents.append(current_incident)
|
||||||
|
current_incident = [stats['failure_times'][i]]
|
||||||
|
incidents.append(current_incident)
|
||||||
|
|
||||||
|
print(f" Failure Incidents: {len(incidents)}")
|
||||||
|
|
||||||
|
if len(incidents) <= 10: # Only show details if not too many
|
||||||
|
print(f"\n Failure Timeline:")
|
||||||
|
for idx, incident in enumerate(incidents, 1):
|
||||||
|
start_time = incident[0].strftime('%H:%M:%S')
|
||||||
|
end_time = incident[-1].strftime('%H:%M:%S')
|
||||||
|
duration = (incident[-1] - incident[0]).total_seconds()
|
||||||
|
print(f" Incident {idx}: {start_time} - {end_time} ({len(incident)} failures, {duration:.0f}s)")
|
||||||
|
|
||||||
|
print("\n" + "=" * 80)
|
||||||
|
|
||||||
|
# Overall health assessment
|
||||||
|
print("\nOVERALL ASSESSMENT:")
|
||||||
|
all_good = all(self.stats[ip]['failures'] == 0 for ip in self.ips.keys())
|
||||||
|
|
||||||
|
if all_good:
|
||||||
|
print("✅ EXCELLENT: All devices maintained 100% uptime during monitoring period!")
|
||||||
|
else:
|
||||||
|
problem_devices = [ip for ip in self.ips.keys() if self.stats[ip]['failures'] > 0]
|
||||||
|
|
||||||
|
if len(problem_devices) == 1:
|
||||||
|
print(f"⚠️ ISSUE ISOLATED: Only {self.ips[problem_devices[0]]} ({problem_devices[0]}) had connectivity issues.")
|
||||||
|
print(" This suggests a device-specific problem rather than network-wide issue.")
|
||||||
|
else:
|
||||||
|
print(f"❌ MULTIPLE ISSUES: {len(problem_devices)} devices experienced connectivity problems.")
|
||||||
|
print(" This may indicate a broader network issue.")
|
||||||
|
|
||||||
|
print("=" * 80)
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
"""Run the monitoring loop"""
|
||||||
|
print("Starting network monitoring...")
|
||||||
|
print(f"Monitoring {len(self.ips)} devices for {self.duration} seconds ({self.duration/60:.0f} minutes)")
|
||||||
|
print("Press Ctrl+C to stop early\n")
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
self.start_time = datetime.datetime.now()
|
||||||
|
elapsed = 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
while elapsed < self.duration:
|
||||||
|
results = self.check_all_ips()
|
||||||
|
self.print_status(elapsed, results)
|
||||||
|
|
||||||
|
time.sleep(self.interval)
|
||||||
|
elapsed = (datetime.datetime.now() - self.start_time).total_seconds()
|
||||||
|
|
||||||
|
self.end_time = datetime.datetime.now()
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\n\nMonitoring stopped by user.")
|
||||||
|
self.end_time = datetime.datetime.now()
|
||||||
|
|
||||||
|
# Print final report
|
||||||
|
self.print_final_report()
|
||||||
|
|
||||||
|
|
||||||
|
def clear_screen():
|
||||||
|
"""Clear the terminal screen"""
|
||||||
|
print("\033[H\033[J", end="")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("=" * 80)
|
||||||
|
print("Network Monitoring Script for OPNsense/TP-Link Diagnostics")
|
||||||
|
print("=" * 80)
|
||||||
|
print(f"\nConfigured to monitor:")
|
||||||
|
for ip, name in MONITOR_IPS.items():
|
||||||
|
print(f" • {ip:<15} - {name}")
|
||||||
|
|
||||||
|
print(f"\nMonitoring interval: {PING_INTERVAL} seconds")
|
||||||
|
print(f"Total duration: {MONITOR_DURATION} seconds ({MONITOR_DURATION/60:.0f} minutes)")
|
||||||
|
print(f"Ping timeout: {PING_TIMEOUT} seconds")
|
||||||
|
print("\nStarting in 3 seconds...")
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
|
monitor = NetworkMonitor(MONITOR_IPS, MONITOR_DURATION, PING_INTERVAL)
|
||||||
|
monitor.run()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user