updated logging
This commit is contained in:
@@ -7,7 +7,6 @@ import json
|
|||||||
import os
|
import os
|
||||||
import socket
|
import socket
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
from fastapi import FastAPI, HTTPException
|
from fastapi import FastAPI, HTTPException
|
||||||
@@ -27,6 +26,7 @@ GITEA_API_BASE = "https://git.sovransystems.com/api/v1/repos/Sovran_Systems/Sovr
|
|||||||
|
|
||||||
UPDATE_UNIT = "sovran-hub-update.service"
|
UPDATE_UNIT = "sovran-hub-update.service"
|
||||||
UPDATE_LOG = "/var/log/sovran-hub-update.log"
|
UPDATE_LOG = "/var/log/sovran-hub-update.log"
|
||||||
|
UPDATE_LOCK = "/run/sovran-hub-update.lock"
|
||||||
|
|
||||||
REBOOT_COMMAND = ["reboot"]
|
REBOOT_COMMAND = ["reboot"]
|
||||||
|
|
||||||
@@ -70,12 +70,6 @@ if os.path.isdir(_ICONS_DIR):
|
|||||||
|
|
||||||
templates = Jinja2Templates(directory=os.path.join(_BASE_DIR, "templates"))
|
templates = Jinja2Templates(directory=os.path.join(_BASE_DIR, "templates"))
|
||||||
|
|
||||||
# ── Track when we started an update ──────────────────────────────
|
|
||||||
# This timestamp lets us know that an update was recently kicked off,
|
|
||||||
# so we don't prematurely declare it finished if the unit hasn't
|
|
||||||
# transitioned to "active" yet.
|
|
||||||
_update_started_at: float = 0.0
|
|
||||||
|
|
||||||
# ── Update check helpers ─────────────────────────────────────────
|
# ── Update check helpers ─────────────────────────────────────────
|
||||||
|
|
||||||
def _get_locked_info():
|
def _get_locked_info():
|
||||||
@@ -173,15 +167,6 @@ def _update_is_active() -> bool:
|
|||||||
return r.returncode == 0
|
return r.returncode == 0
|
||||||
|
|
||||||
|
|
||||||
def _update_state() -> str:
|
|
||||||
"""Return the ActiveState of the update unit."""
|
|
||||||
r = subprocess.run(
|
|
||||||
["systemctl", "show", "-p", "ActiveState", "--value", UPDATE_UNIT],
|
|
||||||
capture_output=True, text=True,
|
|
||||||
)
|
|
||||||
return r.stdout.strip()
|
|
||||||
|
|
||||||
|
|
||||||
def _update_result() -> str:
|
def _update_result() -> str:
|
||||||
"""Return 'success', 'failed', or 'unknown'."""
|
"""Return 'success', 'failed', or 'unknown'."""
|
||||||
r = subprocess.run(
|
r = subprocess.run(
|
||||||
@@ -196,6 +181,28 @@ def _update_result() -> str:
|
|||||||
return "unknown"
|
return "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def _update_lock_exists() -> bool:
|
||||||
|
"""Check if the file-based update lock exists (survives server restart)."""
|
||||||
|
return os.path.exists(UPDATE_LOCK)
|
||||||
|
|
||||||
|
|
||||||
|
def _create_update_lock():
|
||||||
|
"""Create the lock file to indicate an update is in progress."""
|
||||||
|
try:
|
||||||
|
with open(UPDATE_LOCK, "w") as f:
|
||||||
|
f.write(str(os.getpid()))
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _remove_update_lock():
|
||||||
|
"""Remove the lock file."""
|
||||||
|
try:
|
||||||
|
os.unlink(UPDATE_LOCK)
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def _read_log(offset: int = 0) -> tuple[str, int]:
|
def _read_log(offset: int = 0) -> tuple[str, int]:
|
||||||
"""Read the update log file from the given byte offset.
|
"""Read the update log file from the given byte offset.
|
||||||
Returns (new_text, new_offset)."""
|
Returns (new_text, new_offset)."""
|
||||||
@@ -342,7 +349,6 @@ async def api_reboot():
|
|||||||
@app.post("/api/updates/run")
|
@app.post("/api/updates/run")
|
||||||
async def api_updates_run():
|
async def api_updates_run():
|
||||||
"""Kick off the detached update systemd unit."""
|
"""Kick off the detached update systemd unit."""
|
||||||
global _update_started_at
|
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
|
|
||||||
running = await loop.run_in_executor(None, _update_is_active)
|
running = await loop.run_in_executor(None, _update_is_active)
|
||||||
@@ -356,8 +362,8 @@ async def api_updates_run():
|
|||||||
stderr=asyncio.subprocess.DEVNULL,
|
stderr=asyncio.subprocess.DEVNULL,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Record the start time so we can handle the race condition
|
# Create a file-based lock that survives server restarts
|
||||||
_update_started_at = time.monotonic()
|
_create_update_lock()
|
||||||
|
|
||||||
proc = await asyncio.create_subprocess_exec(
|
proc = await asyncio.create_subprocess_exec(
|
||||||
"systemctl", "start", "--no-block", UPDATE_UNIT,
|
"systemctl", "start", "--no-block", UPDATE_UNIT,
|
||||||
@@ -372,33 +378,37 @@ async def api_updates_run():
|
|||||||
@app.get("/api/updates/status")
|
@app.get("/api/updates/status")
|
||||||
async def api_updates_status(offset: int = 0):
|
async def api_updates_status(offset: int = 0):
|
||||||
"""Poll endpoint: returns running state, result, and new log content."""
|
"""Poll endpoint: returns running state, result, and new log content."""
|
||||||
global _update_started_at
|
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
|
|
||||||
active = await loop.run_in_executor(None, _update_is_active)
|
active = await loop.run_in_executor(None, _update_is_active)
|
||||||
state = await loop.run_in_executor(None, _update_state)
|
|
||||||
result = await loop.run_in_executor(None, _update_result)
|
result = await loop.run_in_executor(None, _update_result)
|
||||||
|
lock_exists = _update_lock_exists()
|
||||||
new_log, new_offset = await loop.run_in_executor(None, _read_log, offset)
|
new_log, new_offset = await loop.run_in_executor(None, _read_log, offset)
|
||||||
|
|
||||||
# Race condition guard: if we just started the unit and it hasn't
|
# If the unit is active, it's definitely still running
|
||||||
# transitioned to "activating"/"active" yet, report it as still running.
|
if active:
|
||||||
# Give it up to 10 seconds to appear as active.
|
return {
|
||||||
if not active and _update_started_at > 0:
|
"running": True,
|
||||||
elapsed = time.monotonic() - _update_started_at
|
"result": "pending",
|
||||||
if elapsed < 10 and state in ("inactive", ""):
|
"log": new_log,
|
||||||
# Unit hasn't started yet — tell the frontend it's still running
|
"offset": new_offset,
|
||||||
return {
|
}
|
||||||
"running": True,
|
|
||||||
"result": "pending",
|
|
||||||
"log": new_log,
|
|
||||||
"offset": new_offset,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
# Either it finished or the grace period expired
|
|
||||||
_update_started_at = 0.0
|
|
||||||
|
|
||||||
|
# If the lock file exists but the unit is not active, the update
|
||||||
|
# finished (or the server just restarted after nixos-rebuild switch).
|
||||||
|
# The lock file persists across server restarts because it's on disk.
|
||||||
|
if lock_exists:
|
||||||
|
_remove_update_lock()
|
||||||
|
return {
|
||||||
|
"running": False,
|
||||||
|
"result": result,
|
||||||
|
"log": new_log,
|
||||||
|
"offset": new_offset,
|
||||||
|
}
|
||||||
|
|
||||||
|
# No lock, not active — nothing happening
|
||||||
return {
|
return {
|
||||||
"running": active,
|
"running": False,
|
||||||
"result": result,
|
"result": result,
|
||||||
"log": new_log,
|
"log": new_log,
|
||||||
"offset": new_offset,
|
"offset": new_offset,
|
||||||
|
|||||||
@@ -4,8 +4,7 @@
|
|||||||
const POLL_INTERVAL_SERVICES = 5000; // 5 s
|
const POLL_INTERVAL_SERVICES = 5000; // 5 s
|
||||||
const POLL_INTERVAL_UPDATES = 1800000; // 30 min
|
const POLL_INTERVAL_UPDATES = 1800000; // 30 min
|
||||||
const ACTION_REFRESH_DELAY = 1500; // 1.5 s after start/stop/restart
|
const ACTION_REFRESH_DELAY = 1500; // 1.5 s after start/stop/restart
|
||||||
const UPDATE_POLL_INTERVAL = 1500; // 1.5 s while update is running
|
const UPDATE_POLL_INTERVAL = 2000; // 2 s while update is running
|
||||||
const UPDATE_POLL_DELAY = 3000; // 3 s before first poll (let unit start)
|
|
||||||
|
|
||||||
const CATEGORY_ORDER = [
|
const CATEGORY_ORDER = [
|
||||||
"infrastructure",
|
"infrastructure",
|
||||||
@@ -29,7 +28,6 @@ let _updatePollTimer = null;
|
|||||||
let _updateLogOffset = 0;
|
let _updateLogOffset = 0;
|
||||||
let _serverWasDown = false;
|
let _serverWasDown = false;
|
||||||
let _updateFinished = false;
|
let _updateFinished = false;
|
||||||
let _sawRunning = false;
|
|
||||||
|
|
||||||
// ── DOM refs ──────────────────────────────────────────────────────
|
// ── DOM refs ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -74,7 +72,7 @@ async function apiFetch(path, options = {}) {
|
|||||||
return res.json();
|
return res.json();
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Render: initial build ─────────────────────────────────────────
|
// ── Render: initial build ────────────────────────<EFBFBD><EFBFBD>────────────────
|
||||||
|
|
||||||
function buildTiles(services, categoryLabels) {
|
function buildTiles(services, categoryLabels) {
|
||||||
_servicesCache = services;
|
_servicesCache = services;
|
||||||
@@ -271,7 +269,6 @@ function openUpdateModal() {
|
|||||||
_updateLogOffset = 0;
|
_updateLogOffset = 0;
|
||||||
_serverWasDown = false;
|
_serverWasDown = false;
|
||||||
_updateFinished = false;
|
_updateFinished = false;
|
||||||
_sawRunning = false;
|
|
||||||
if ($modalLog) $modalLog.textContent = "";
|
if ($modalLog) $modalLog.textContent = "";
|
||||||
if ($modalStatus) $modalStatus.textContent = "Starting update…";
|
if ($modalStatus) $modalStatus.textContent = "Starting update…";
|
||||||
if ($modalSpinner) $modalSpinner.classList.add("spinning");
|
if ($modalSpinner) $modalSpinner.classList.add("spinning");
|
||||||
@@ -309,11 +306,9 @@ function startUpdate() {
|
|||||||
.then(data => {
|
.then(data => {
|
||||||
if (data.status === "already_running") {
|
if (data.status === "already_running") {
|
||||||
appendLog("[Update already in progress, attaching…]\n\n");
|
appendLog("[Update already in progress, attaching…]\n\n");
|
||||||
_sawRunning = true;
|
|
||||||
}
|
}
|
||||||
if ($modalStatus) $modalStatus.textContent = "Updating…";
|
if ($modalStatus) $modalStatus.textContent = "Updating…";
|
||||||
// Delay the first poll to give the systemd unit time to start
|
startUpdatePoll();
|
||||||
setTimeout(startUpdatePoll, UPDATE_POLL_DELAY);
|
|
||||||
})
|
})
|
||||||
.catch(err => {
|
.catch(err => {
|
||||||
appendLog(`[Error: failed to start update — ${err}]\n`);
|
appendLog(`[Error: failed to start update — ${err}]\n`);
|
||||||
@@ -342,6 +337,7 @@ async function pollUpdateStatus() {
|
|||||||
// Server came back after being down
|
// Server came back after being down
|
||||||
if (_serverWasDown) {
|
if (_serverWasDown) {
|
||||||
_serverWasDown = false;
|
_serverWasDown = false;
|
||||||
|
appendLog("[Server reconnected]\n");
|
||||||
if ($modalStatus) $modalStatus.textContent = "Updating…";
|
if ($modalStatus) $modalStatus.textContent = "Updating…";
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -351,14 +347,8 @@ async function pollUpdateStatus() {
|
|||||||
}
|
}
|
||||||
_updateLogOffset = data.offset;
|
_updateLogOffset = data.offset;
|
||||||
|
|
||||||
// Track if we ever saw the unit as running
|
// Check if finished
|
||||||
if (data.running) {
|
if (!data.running) {
|
||||||
_sawRunning = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only declare finished if we previously saw it running (or server says so)
|
|
||||||
// This prevents the race where the unit hasn't started yet
|
|
||||||
if (!data.running && _sawRunning) {
|
|
||||||
_updateFinished = true;
|
_updateFinished = true;
|
||||||
stopUpdatePoll();
|
stopUpdatePoll();
|
||||||
if (data.result === "success") {
|
if (data.result === "success") {
|
||||||
@@ -368,12 +358,10 @@ async function pollUpdateStatus() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
// Server is likely restarting during nixos-rebuild switch
|
// Server is likely restarting during nixos-rebuild switch — keep polling
|
||||||
// This counts as "saw running" since it was running before it died
|
|
||||||
_sawRunning = true;
|
|
||||||
if (!_serverWasDown) {
|
if (!_serverWasDown) {
|
||||||
_serverWasDown = true;
|
_serverWasDown = true;
|
||||||
appendLog("\n[Server restarting — waiting for it to come back…]\n\n");
|
appendLog("\n[Server restarting — waiting for it to come back…]\n");
|
||||||
if ($modalStatus) $modalStatus.textContent = "Server restarting…";
|
if ($modalStatus) $modalStatus.textContent = "Server restarting…";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ let
|
|||||||
{ name = "Matrix-Synapse"; unit = "matrix-synapse.service"; type = "system"; icon = "synapse"; enabled = cfg.services.synapse; category = "communication"; }
|
{ name = "Matrix-Synapse"; unit = "matrix-synapse.service"; type = "system"; icon = "synapse"; enabled = cfg.services.synapse; category = "communication"; }
|
||||||
{ name = "Element-Call"; unit = "livekit.service"; type = "system"; icon = "livekit"; enabled = cfg.features.element-calling; category = "communication"; }
|
{ name = "Element-Call"; unit = "livekit.service"; type = "system"; icon = "livekit"; enabled = cfg.features.element-calling; category = "communication"; }
|
||||||
]
|
]
|
||||||
# ── Self-Hosted Apps ───────────────────────────────────────
|
# ── Self-Hosted Apps ────────────────<EFBFBD><EFBFBD>──────────────────────
|
||||||
++ [
|
++ [
|
||||||
{ name = "VaultWarden"; unit = "vaultwarden.service"; type = "system"; icon = "vaultwarden"; enabled = cfg.services.vaultwarden; category = "apps"; }
|
{ name = "VaultWarden"; unit = "vaultwarden.service"; type = "system"; icon = "vaultwarden"; enabled = cfg.services.vaultwarden; category = "apps"; }
|
||||||
{ name = "Nextcloud"; unit = "phpfpm-nextcloud.service"; type = "system"; icon = "nextcloud"; enabled = cfg.services.nextcloud; category = "apps"; }
|
{ name = "Nextcloud"; unit = "phpfpm-nextcloud.service"; type = "system"; icon = "nextcloud"; enabled = cfg.services.nextcloud; category = "apps"; }
|
||||||
@@ -58,11 +58,18 @@ let
|
|||||||
export PATH="${lib.makeBinPath [ pkgs.nix pkgs.nixos-rebuild pkgs.git pkgs.flatpak pkgs.coreutils ]}:$PATH"
|
export PATH="${lib.makeBinPath [ pkgs.nix pkgs.nixos-rebuild pkgs.git pkgs.flatpak pkgs.coreutils ]}:$PATH"
|
||||||
|
|
||||||
LOG="/var/log/sovran-hub-update.log"
|
LOG="/var/log/sovran-hub-update.log"
|
||||||
|
LOCK="/run/sovran-hub-update.lock"
|
||||||
|
|
||||||
|
# Create lock file (survives server restarts, cleared on reboot since /run is tmpfs)
|
||||||
|
echo $$ > "$LOCK"
|
||||||
|
|
||||||
# Truncate the log and redirect ALL output (stdout + stderr) into it
|
# Truncate the log and redirect ALL output (stdout + stderr) into it
|
||||||
: > "$LOG"
|
: > "$LOG"
|
||||||
exec > >(tee -a "$LOG") 2>&1
|
exec > >(tee -a "$LOG") 2>&1
|
||||||
|
|
||||||
|
# Ensure lock is removed on exit (success or failure)
|
||||||
|
trap 'rm -f "$LOCK"' EXIT
|
||||||
|
|
||||||
echo "══════════════════════════════════════════════════"
|
echo "══════════════════════════════════════════════════"
|
||||||
echo " Sovran_SystemsOS Update — $(date)"
|
echo " Sovran_SystemsOS Update — $(date)"
|
||||||
echo "══════════════════════════════════════════════════"
|
echo "══════════════════════════════════════════════════"
|
||||||
|
|||||||
Reference in New Issue
Block a user