updated logging
This commit is contained in:
@@ -7,7 +7,6 @@ import json
|
||||
import os
|
||||
import socket
|
||||
import subprocess
|
||||
import time
|
||||
import urllib.request
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
@@ -27,6 +26,7 @@ GITEA_API_BASE = "https://git.sovransystems.com/api/v1/repos/Sovran_Systems/Sovr
|
||||
|
||||
UPDATE_UNIT = "sovran-hub-update.service"
|
||||
UPDATE_LOG = "/var/log/sovran-hub-update.log"
|
||||
UPDATE_LOCK = "/run/sovran-hub-update.lock"
|
||||
|
||||
REBOOT_COMMAND = ["reboot"]
|
||||
|
||||
@@ -70,12 +70,6 @@ if os.path.isdir(_ICONS_DIR):
|
||||
|
||||
templates = Jinja2Templates(directory=os.path.join(_BASE_DIR, "templates"))
|
||||
|
||||
# ── Track when we started an update ──────────────────────────────
|
||||
# This timestamp lets us know that an update was recently kicked off,
|
||||
# so we don't prematurely declare it finished if the unit hasn't
|
||||
# transitioned to "active" yet.
|
||||
_update_started_at: float = 0.0
|
||||
|
||||
# ── Update check helpers ─────────────────────────────────────────
|
||||
|
||||
def _get_locked_info():
|
||||
@@ -173,15 +167,6 @@ def _update_is_active() -> bool:
|
||||
return r.returncode == 0
|
||||
|
||||
|
||||
def _update_state() -> str:
|
||||
"""Return the ActiveState of the update unit."""
|
||||
r = subprocess.run(
|
||||
["systemctl", "show", "-p", "ActiveState", "--value", UPDATE_UNIT],
|
||||
capture_output=True, text=True,
|
||||
)
|
||||
return r.stdout.strip()
|
||||
|
||||
|
||||
def _update_result() -> str:
|
||||
"""Return 'success', 'failed', or 'unknown'."""
|
||||
r = subprocess.run(
|
||||
@@ -196,6 +181,28 @@ def _update_result() -> str:
|
||||
return "unknown"
|
||||
|
||||
|
||||
def _update_lock_exists() -> bool:
|
||||
"""Check if the file-based update lock exists (survives server restart)."""
|
||||
return os.path.exists(UPDATE_LOCK)
|
||||
|
||||
|
||||
def _create_update_lock():
|
||||
"""Create the lock file to indicate an update is in progress."""
|
||||
try:
|
||||
with open(UPDATE_LOCK, "w") as f:
|
||||
f.write(str(os.getpid()))
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
def _remove_update_lock():
|
||||
"""Remove the lock file."""
|
||||
try:
|
||||
os.unlink(UPDATE_LOCK)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
|
||||
def _read_log(offset: int = 0) -> tuple[str, int]:
|
||||
"""Read the update log file from the given byte offset.
|
||||
Returns (new_text, new_offset)."""
|
||||
@@ -342,7 +349,6 @@ async def api_reboot():
|
||||
@app.post("/api/updates/run")
|
||||
async def api_updates_run():
|
||||
"""Kick off the detached update systemd unit."""
|
||||
global _update_started_at
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
running = await loop.run_in_executor(None, _update_is_active)
|
||||
@@ -356,8 +362,8 @@ async def api_updates_run():
|
||||
stderr=asyncio.subprocess.DEVNULL,
|
||||
)
|
||||
|
||||
# Record the start time so we can handle the race condition
|
||||
_update_started_at = time.monotonic()
|
||||
# Create a file-based lock that survives server restarts
|
||||
_create_update_lock()
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"systemctl", "start", "--no-block", UPDATE_UNIT,
|
||||
@@ -372,33 +378,37 @@ async def api_updates_run():
|
||||
@app.get("/api/updates/status")
|
||||
async def api_updates_status(offset: int = 0):
|
||||
"""Poll endpoint: returns running state, result, and new log content."""
|
||||
global _update_started_at
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
active = await loop.run_in_executor(None, _update_is_active)
|
||||
state = await loop.run_in_executor(None, _update_state)
|
||||
result = await loop.run_in_executor(None, _update_result)
|
||||
lock_exists = _update_lock_exists()
|
||||
new_log, new_offset = await loop.run_in_executor(None, _read_log, offset)
|
||||
|
||||
# Race condition guard: if we just started the unit and it hasn't
|
||||
# transitioned to "activating"/"active" yet, report it as still running.
|
||||
# Give it up to 10 seconds to appear as active.
|
||||
if not active and _update_started_at > 0:
|
||||
elapsed = time.monotonic() - _update_started_at
|
||||
if elapsed < 10 and state in ("inactive", ""):
|
||||
# Unit hasn't started yet — tell the frontend it's still running
|
||||
return {
|
||||
"running": True,
|
||||
"result": "pending",
|
||||
"log": new_log,
|
||||
"offset": new_offset,
|
||||
}
|
||||
else:
|
||||
# Either it finished or the grace period expired
|
||||
_update_started_at = 0.0
|
||||
# If the unit is active, it's definitely still running
|
||||
if active:
|
||||
return {
|
||||
"running": True,
|
||||
"result": "pending",
|
||||
"log": new_log,
|
||||
"offset": new_offset,
|
||||
}
|
||||
|
||||
# If the lock file exists but the unit is not active, the update
|
||||
# finished (or the server just restarted after nixos-rebuild switch).
|
||||
# The lock file persists across server restarts because it's on disk.
|
||||
if lock_exists:
|
||||
_remove_update_lock()
|
||||
return {
|
||||
"running": False,
|
||||
"result": result,
|
||||
"log": new_log,
|
||||
"offset": new_offset,
|
||||
}
|
||||
|
||||
# No lock, not active — nothing happening
|
||||
return {
|
||||
"running": active,
|
||||
"running": False,
|
||||
"result": result,
|
||||
"log": new_log,
|
||||
"offset": new_offset,
|
||||
|
||||
Reference in New Issue
Block a user