Add background domain reachability cache for service tile health
Agent-Logs-Url: https://github.com/naturallaw777/staging_alpha/sessions/038b6d9a-0298-41d7-949f-40069cd3320f Co-authored-by: naturallaw777 <99053422+naturallaw777@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
4119a4ef61
commit
da0c79d479
@@ -20,6 +20,7 @@ import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from threading import Lock
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse
|
||||
@@ -54,6 +55,10 @@ REBUILD_UNIT = "sovran-hub-rebuild.service"
|
||||
# even when the frontend's offset is pointing past the pre-restart content.
|
||||
_update_recovery_happened: bool = False
|
||||
_cached_external_ip: str = "unavailable"
|
||||
_domain_reachability_cache: dict[str, dict] = {}
|
||||
_domain_reachability_cache_lock = Lock()
|
||||
_DOMAIN_REACHABILITY_TTL = 60
|
||||
_domain_reachability_task: asyncio.Task | None = None
|
||||
|
||||
BACKUP_LOG = "/var/log/sovran-hub-backup.log"
|
||||
BACKUP_STATUS = "/var/log/sovran-hub-backup.status"
|
||||
@@ -970,6 +975,15 @@ def _check_domain_health_fast(domain: str | None, external_ip: str) -> bool:
|
||||
return resolved_ip != external_ip
|
||||
|
||||
|
||||
def _is_domain_reachable_cached(domain: str) -> bool | None:
|
||||
"""Return cached reachability, or ``None`` if not yet checked."""
|
||||
with _domain_reachability_cache_lock:
|
||||
entry = _domain_reachability_cache.get(domain)
|
||||
if entry is None:
|
||||
return None
|
||||
return bool(entry.get("reachable", False))
|
||||
|
||||
|
||||
def _evaluate_domain_checklist(domain: str | None, external_ip: str, internal_ip: str | None = None) -> dict:
|
||||
"""Evaluate sequential domain diagnostics and return UI-ready checklist data."""
|
||||
steps: list[dict] = []
|
||||
@@ -2391,6 +2405,10 @@ async def api_services():
|
||||
domain,
|
||||
_cached_external_ip,
|
||||
)
|
||||
if not has_domain_issues and domain:
|
||||
cached_reachable = _is_domain_reachable_cached(domain)
|
||||
if cached_reachable is False:
|
||||
has_domain_issues = True
|
||||
health = "needs_attention" if (has_port_issues or has_domain_issues) else "healthy"
|
||||
# Check Bitcoin IBD state
|
||||
if unit == "bitcoind.service" and enabled:
|
||||
@@ -4333,3 +4351,71 @@ async def _startup_recover_stale_status():
|
||||
if corrected:
|
||||
_update_recovery_happened = True
|
||||
await loop.run_in_executor(None, _recover_stale_status, REBUILD_STATUS, REBUILD_LOG, REBUILD_UNIT)
|
||||
|
||||
|
||||
async def _background_domain_reachability_checker():
|
||||
"""Periodically curl configured domains and cache reachability results."""
|
||||
await asyncio.sleep(5)
|
||||
while True:
|
||||
try:
|
||||
cfg = load_config()
|
||||
services = cfg.get("services", [])
|
||||
|
||||
unit_to_feature = {
|
||||
unit: feat_id
|
||||
for feat_id, unit in FEATURE_SERVICE_MAP.items()
|
||||
if unit is not None
|
||||
}
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
overrides, *_ = await loop.run_in_executor(None, _read_hub_overrides)
|
||||
|
||||
domains_to_check: list[str] = []
|
||||
for entry in services:
|
||||
unit = entry.get("unit", "")
|
||||
icon = entry.get("icon", "")
|
||||
enabled = entry.get("enabled", True)
|
||||
|
||||
feat_id = unit_to_feature.get(unit)
|
||||
if feat_id is None:
|
||||
feat_id = FEATURE_ICON_MAP.get(icon)
|
||||
if feat_id is not None and feat_id in overrides:
|
||||
enabled = overrides[feat_id]
|
||||
if not enabled:
|
||||
continue
|
||||
|
||||
domain_key = SERVICE_DOMAIN_MAP.get(unit)
|
||||
if not domain_key:
|
||||
continue
|
||||
domain_path = os.path.join(DOMAINS_DIR, domain_key)
|
||||
try:
|
||||
with open(domain_path, "r") as f:
|
||||
domain = f.read(512).strip()
|
||||
if domain:
|
||||
domains_to_check.append(domain)
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
if domains_to_check:
|
||||
unique_domains = list(dict.fromkeys(domains_to_check))
|
||||
results = await asyncio.gather(*[
|
||||
loop.run_in_executor(None, _check_domain_reachable, domain)
|
||||
for domain in unique_domains
|
||||
])
|
||||
checked_at = time.time()
|
||||
with _domain_reachability_cache_lock:
|
||||
for domain, result in zip(unique_domains, results):
|
||||
result["checked_at"] = checked_at
|
||||
_domain_reachability_cache[domain] = result
|
||||
except Exception:
|
||||
logger.exception("Background domain reachability checker error")
|
||||
|
||||
await asyncio.sleep(_DOMAIN_REACHABILITY_TTL)
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def _startup_domain_reachability():
|
||||
"""Start the background domain reachability checker."""
|
||||
global _domain_reachability_task
|
||||
if _domain_reachability_task is None or _domain_reachability_task.done():
|
||||
_domain_reachability_task = asyncio.create_task(_background_domain_reachability_checker())
|
||||
|
||||
Reference in New Issue
Block a user