From c7005c93b53351250ae8b64f81e3ce31026ab7d6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 12 Apr 2026 12:16:22 +0000 Subject: [PATCH] fix: user-friendly stale recovery messages and complete log on reconnect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - _recover_stale_status(): returns True when corrected; changes message from internal '[Hub] Stale RUNNING...' to user-friendly text - _startup_recover_stale_status(): sets _update_recovery_happened flag when update recovery happens at startup - api_updates_status(): uses offset=0 when recovery happened so frontend receives the full log, not just a stale delta - pollUpdateStatus(): when reconnecting after server-down with update done, resets offset to 0, re-fetches full log, shows '[Server restarted — update completed successfully.]' instead of '[Server reconnected]' Agent-Logs-Url: https://github.com/naturallaw777/staging_alpha/sessions/90b535d1-bc3b-4147-9d62-3c7a93b1c8e4 Co-authored-by: naturallaw777 <99053422+naturallaw777@users.noreply.github.com> --- app/sovran_systemsos_web/server.py | 53 ++++++++++++++++---- app/sovran_systemsos_web/static/js/update.js | 31 +++++++++++- 2 files changed, 72 insertions(+), 12 deletions(-) diff --git a/app/sovran_systemsos_web/server.py b/app/sovran_systemsos_web/server.py index eb19711..0afc007 100644 --- a/app/sovran_systemsos_web/server.py +++ b/app/sovran_systemsos_web/server.py @@ -43,6 +43,12 @@ REBUILD_LOG = "/var/log/sovran-hub-rebuild.log" REBUILD_STATUS = "/var/log/sovran-hub-rebuild.status" REBUILD_UNIT = "sovran-hub-rebuild.service" +# Set to True by _startup_recover_stale_status() when it corrects a stale +# RUNNING → SUCCESS/FAILED for the update unit. Consumed by the first call +# to api_updates_status() so that the full log is returned to the frontend +# even when the frontend's offset is pointing past the pre-restart content. +_update_recovery_happened: bool = False + BACKUP_LOG = "/var/log/sovran-hub-backup.log" BACKUP_STATUS = "/var/log/sovran-hub-backup.status" BACKUP_SCRIPT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "scripts", "sovran-hub-backup.sh") @@ -2432,19 +2438,34 @@ async def api_updates_status(offset: int = 0): If the status file says RUNNING but the systemd unit is no longer active (e.g. the hub was restarted mid-update), correct the stale state before returning so the frontend is never permanently stuck. + + When recovery is detected (either during this call or at startup), the log + is returned from offset 0 so the frontend receives the complete output. """ + global _update_recovery_happened loop = asyncio.get_event_loop() status = await loop.run_in_executor(None, _read_update_status) + use_full_log = False + # Detect and correct stale RUNNING state on every poll. if status == "RUNNING": - await loop.run_in_executor( + corrected = await loop.run_in_executor( None, _recover_stale_status, UPDATE_STATUS, UPDATE_LOG, UPDATE_UNIT ) + if corrected: + use_full_log = True status = await loop.run_in_executor(None, _read_update_status) - new_log, new_offset = await loop.run_in_executor(None, _read_log, offset) + # Honour a recovery that happened at server startup (stale RUNNING corrected + # before the frontend had a chance to reconnect). + if _update_recovery_happened: + use_full_log = True + _update_recovery_happened = False + + effective_offset = 0 if use_full_log else offset + new_log, new_offset = await loop.run_in_executor(None, _read_log, effective_offset) running = (status == "RUNNING") result = "pending" if running else status.lower() @@ -3611,23 +3632,25 @@ async def _startup_save_ip(): _SAFE_UNIT_RE = re.compile(r'^[a-zA-Z0-9@._\-]+\.service$') -def _recover_stale_status(status_file: str, log_file: str, unit_name: str): +def _recover_stale_status(status_file: str, log_file: str, unit_name: str) -> bool: """If status_file says RUNNING but the systemd unit is not active, correct the status. Queries the unit's Result property to distinguish SUCCESS from FAILED so that a completed-but-interrupted update is not wrongly marked as failed. + + Returns True if a correction was made, False otherwise. """ if not _SAFE_UNIT_RE.match(unit_name): - return + return False try: with open(status_file, "r") as f: status = f.read().strip() except FileNotFoundError: - return + return False if status != "RUNNING": - return + return False try: result = subprocess.run( @@ -3658,19 +3681,27 @@ def _recover_stale_status(status_file: str, log_file: str, unit_name: str): f.write(new_status) except OSError: pass + msg = ( + "\n[Update completed successfully while the server was restarting.]\n" + if new_status == "SUCCESS" + else "\n[Update encountered an error. See log above for details.]\n" + ) try: with open(log_file, "a") as f: - f.write( - f"\n[Hub] Stale RUNNING status detected; unit is not active." - f" Correcting to {new_status}.\n" - ) + f.write(msg) except OSError: pass + return True + + return False @app.on_event("startup") async def _startup_recover_stale_status(): """Reset stale RUNNING status files left by interrupted update/rebuild jobs.""" + global _update_recovery_happened loop = asyncio.get_event_loop() - await loop.run_in_executor(None, _recover_stale_status, UPDATE_STATUS, UPDATE_LOG, UPDATE_UNIT) + corrected = await loop.run_in_executor(None, _recover_stale_status, UPDATE_STATUS, UPDATE_LOG, UPDATE_UNIT) + if corrected: + _update_recovery_happened = True await loop.run_in_executor(None, _recover_stale_status, REBUILD_STATUS, REBUILD_LOG, REBUILD_UNIT) diff --git a/app/sovran_systemsos_web/static/js/update.js b/app/sovran_systemsos_web/static/js/update.js index 815756e..4e21d9a 100644 --- a/app/sovran_systemsos_web/static/js/update.js +++ b/app/sovran_systemsos_web/static/js/update.js @@ -94,7 +94,36 @@ async function pollUpdateStatus() { if (_updateFinished) return; try { var data = await apiFetch("/api/updates/status?offset=" + _updateLogOffset); - if (_serverWasDown) { _serverWasDown = false; appendLog("[Server reconnected]\n"); if ($modalStatus) $modalStatus.textContent = "Updating…"; } + if (_serverWasDown) { + _serverWasDown = false; + if (!data.running) { + // The update finished while the server was restarting. Reset to + // offset 0 and re-fetch so the complete log is shown from the top. + _updateLog = ""; + _updateLogOffset = 0; + if ($modalLog) $modalLog.textContent = ""; + try { + var fullData = await apiFetch("/api/updates/status?offset=0"); + if (fullData.log) appendLog(fullData.log); + _updateLogOffset = fullData.offset; + } catch (e) { + // If the re-fetch fails, fall through with whatever we have. + if (data.log) appendLog(data.log); + _updateLogOffset = data.offset; + } + if (data.result === "success") { + appendLog("[Server restarted — update completed successfully.]\n"); + } else { + appendLog("[Server restarted — update encountered an error.]\n"); + } + _updateFinished = true; + stopUpdatePoll(); + onUpdateDone(data.result === "success"); + return; + } + appendLog("[Server reconnected]\n"); + if ($modalStatus) $modalStatus.textContent = "Updating…"; + } if (data.log) appendLog(data.log); _updateLogOffset = data.offset; if (data.running) return;