Add background domain reachability cache for service tile health

Agent-Logs-Url: https://github.com/naturallaw777/staging_alpha/sessions/038b6d9a-0298-41d7-949f-40069cd3320f

Co-authored-by: naturallaw777 <99053422+naturallaw777@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2026-04-15 16:07:00 +00:00
committed by GitHub
parent 4119a4ef61
commit da0c79d479

View File

@@ -20,6 +20,7 @@ import time
import urllib.error import urllib.error
import urllib.parse import urllib.parse
import urllib.request import urllib.request
from threading import Lock
from fastapi import FastAPI, HTTPException from fastapi import FastAPI, HTTPException
from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse
@@ -54,6 +55,10 @@ REBUILD_UNIT = "sovran-hub-rebuild.service"
# even when the frontend's offset is pointing past the pre-restart content. # even when the frontend's offset is pointing past the pre-restart content.
_update_recovery_happened: bool = False _update_recovery_happened: bool = False
_cached_external_ip: str = "unavailable" _cached_external_ip: str = "unavailable"
_domain_reachability_cache: dict[str, dict] = {}
_domain_reachability_cache_lock = Lock()
_DOMAIN_REACHABILITY_TTL = 60
_domain_reachability_task: asyncio.Task | None = None
BACKUP_LOG = "/var/log/sovran-hub-backup.log" BACKUP_LOG = "/var/log/sovran-hub-backup.log"
BACKUP_STATUS = "/var/log/sovran-hub-backup.status" BACKUP_STATUS = "/var/log/sovran-hub-backup.status"
@@ -970,6 +975,15 @@ def _check_domain_health_fast(domain: str | None, external_ip: str) -> bool:
return resolved_ip != external_ip return resolved_ip != external_ip
def _is_domain_reachable_cached(domain: str) -> bool | None:
"""Return cached reachability, or ``None`` if not yet checked."""
with _domain_reachability_cache_lock:
entry = _domain_reachability_cache.get(domain)
if entry is None:
return None
return bool(entry.get("reachable", False))
def _evaluate_domain_checklist(domain: str | None, external_ip: str, internal_ip: str | None = None) -> dict: def _evaluate_domain_checklist(domain: str | None, external_ip: str, internal_ip: str | None = None) -> dict:
"""Evaluate sequential domain diagnostics and return UI-ready checklist data.""" """Evaluate sequential domain diagnostics and return UI-ready checklist data."""
steps: list[dict] = [] steps: list[dict] = []
@@ -2391,6 +2405,10 @@ async def api_services():
domain, domain,
_cached_external_ip, _cached_external_ip,
) )
if not has_domain_issues and domain:
cached_reachable = _is_domain_reachable_cached(domain)
if cached_reachable is False:
has_domain_issues = True
health = "needs_attention" if (has_port_issues or has_domain_issues) else "healthy" health = "needs_attention" if (has_port_issues or has_domain_issues) else "healthy"
# Check Bitcoin IBD state # Check Bitcoin IBD state
if unit == "bitcoind.service" and enabled: if unit == "bitcoind.service" and enabled:
@@ -4333,3 +4351,71 @@ async def _startup_recover_stale_status():
if corrected: if corrected:
_update_recovery_happened = True _update_recovery_happened = True
await loop.run_in_executor(None, _recover_stale_status, REBUILD_STATUS, REBUILD_LOG, REBUILD_UNIT) await loop.run_in_executor(None, _recover_stale_status, REBUILD_STATUS, REBUILD_LOG, REBUILD_UNIT)
async def _background_domain_reachability_checker():
"""Periodically curl configured domains and cache reachability results."""
await asyncio.sleep(5)
while True:
try:
cfg = load_config()
services = cfg.get("services", [])
unit_to_feature = {
unit: feat_id
for feat_id, unit in FEATURE_SERVICE_MAP.items()
if unit is not None
}
loop = asyncio.get_event_loop()
overrides, *_ = await loop.run_in_executor(None, _read_hub_overrides)
domains_to_check: list[str] = []
for entry in services:
unit = entry.get("unit", "")
icon = entry.get("icon", "")
enabled = entry.get("enabled", True)
feat_id = unit_to_feature.get(unit)
if feat_id is None:
feat_id = FEATURE_ICON_MAP.get(icon)
if feat_id is not None and feat_id in overrides:
enabled = overrides[feat_id]
if not enabled:
continue
domain_key = SERVICE_DOMAIN_MAP.get(unit)
if not domain_key:
continue
domain_path = os.path.join(DOMAINS_DIR, domain_key)
try:
with open(domain_path, "r") as f:
domain = f.read(512).strip()
if domain:
domains_to_check.append(domain)
except OSError:
continue
if domains_to_check:
unique_domains = list(dict.fromkeys(domains_to_check))
results = await asyncio.gather(*[
loop.run_in_executor(None, _check_domain_reachable, domain)
for domain in unique_domains
])
checked_at = time.time()
with _domain_reachability_cache_lock:
for domain, result in zip(unique_domains, results):
result["checked_at"] = checked_at
_domain_reachability_cache[domain] = result
except Exception:
logger.exception("Background domain reachability checker error")
await asyncio.sleep(_DOMAIN_REACHABILITY_TTL)
@app.on_event("startup")
async def _startup_domain_reachability():
"""Start the background domain reachability checker."""
global _domain_reachability_task
if _domain_reachability_task is None or _domain_reachability_task.done():
_domain_reachability_task = asyncio.create_task(_background_domain_reachability_checker())