Merge pull request #211 from naturallaw777/copilot/fix-headscale-cli-syntax

[WIP] Fix Headscale CLI syntax issues in documentation
This commit is contained in:
Sovran_Systems
2026-04-12 15:54:01 -05:00
committed by GitHub
2 changed files with 523 additions and 6 deletions

View File

@@ -130,17 +130,23 @@ services.tailscale.enable = true;
### Join the Tailnet ### Join the Tailnet
```bash ```bash
sudo tailscale up --login-server https://hs.yourdomain.com sudo tailscale up --login-server https://hs.yourdomain.com --accept-dns=false
``` ```
> **Note:** The `--accept-dns=false` flag prevents Tailscale from taking over your system DNS resolver. This is important if you are behind a VPN (see [Troubleshooting](#troubleshooting) below).
Tailscale prints a URL. Open it and copy the node key (starts with `mkey:`). Tailscale prints a URL. Open it and copy the node key (starts with `mkey:`).
### Approve the Node in Headscale ### Approve the Node in Headscale
On the VPS: On the VPS, first find the numeric user ID for the `admin` user, then register the node:
```bash ```bash
headscale nodes register --user admin --key mkey:xxxxxxxxxxxxxxxx # Look up the numeric ID for the admin user (Headscale 0.28.0 requires -u <id>)
headscale users list -o json
# Register the node using the numeric user ID
headscale nodes register -u <admin-user-id> --key mkey:xxxxxxxxxxxxxxxx
``` ```
Your workstation is now on the Tailnet. You can list nodes: Your workstation is now on the Tailnet. You can list nodes:
@@ -219,7 +225,7 @@ The resulting ISO is in `./result/iso/`.
--role server \ --role server \
--deploy-key "$(cat ~/.ssh/sovran-deploy.pub)" \ --deploy-key "$(cat ~/.ssh/sovran-deploy.pub)" \
--headscale-server "https://hs.yourdomain.com" \ --headscale-server "https://hs.yourdomain.com" \
--headscale-key "$(headscale preauthkeys create --user sovran-deploy --expiration 2h --output json | jq -r '.key')" --headscale-key "$(headscale preauthkeys create -u $(headscale users list -o json | jq -r '.[] | select(.name=="sovran-deploy") | .id') -e 2h -o json | jq -r '.key')"
``` ```
6. **Machine reboots into Sovran_SystemsOS** — `deploy-tailscale-connect.service` runs: 6. **Machine reboots into Sovran_SystemsOS** — `deploy-tailscale-connect.service` runs:
@@ -360,8 +366,96 @@ This stops the Tailscale connect service.
### Revoke All Active Pre-Auth Keys ### Revoke All Active Pre-Auth Keys
```bash ```bash
headscale preauthkeys list --user sovran-deploy # List pre-auth keys (Headscale 0.28.0: no --user flag on list)
headscale preauthkeys expire --user sovran-deploy --key <key> headscale preauthkeys list
# Expire a specific key — use numeric user ID (-u <id>)
# First find the user ID:
headscale users list -o json
# Then expire the key:
headscale preauthkeys expire -u <user-id> --key <key>
```
---
## Troubleshooting
### VPN Conflicts (Mullvad, WireGuard, etc.)
**Symptom:** `tailscale up` hangs or fails with `connection refused` on port 443, even though `curl https://hs.yourdomain.com/health` works fine.
**Cause:** VPNs like Mullvad route all traffic — including Tailscale's control-plane connections — through the VPN tunnel. Additionally, Tailscale's DNS handler (`--accept-dns=true` by default) hijacks DNS resolution and may prevent correct resolution of your Headscale server even when logged out.
**Solution:**
1. Disconnect your VPN temporarily and retry `tailscale up`.
2. If you need the VPN active, use split tunneling to exclude `tailscaled`:
```bash
# Mullvad CLI
mullvad split-tunnel add $(pidof tailscaled)
```
Or in the Mullvad GUI: **Settings → Split tunneling → Add tailscaled**.
3. Always pass `--accept-dns=false` when enrolling to avoid DNS hijacking:
```bash
sudo tailscale up --login-server https://hs.yourdomain.com --authkey <key> --accept-dns=false
```
---
### "RATELIMIT" in tailscaled Logs
**Symptom:** `journalctl -u tailscaled` shows lines like:
```
[RATELIMIT] format("Received error: %v")
```
**Cause:** This is **NOT** a server-side rate limit from Headscale. It is tailscaled's internal log suppressor de-duplicating repeated connection-refused error messages. The real underlying error is `connection refused`.
**What to check:**
1. Is Headscale actually running? `curl https://hs.yourdomain.com/health`
2. Is your VPN blocking the connection? (see VPN Conflicts above)
3. Is there a firewall blocking port 443?
---
### "connection refused" on Port 443
If `tailscale up` fails but `curl` works, the issue is usually DNS or VPN:
```bash
# Does curl reach Headscale successfully?
curl -v https://hs.yourdomain.com/health
# Force IPv4 vs IPv6 to identify if it's an address-family issue
curl -4 https://hs.yourdomain.com/health
curl -6 https://hs.yourdomain.com/health
# Check what IP headscale resolves to
dig +short hs.yourdomain.com
# What resolver is the system using?
cat /etc/resolv.conf
```
If curl works but tailscale doesn't, tailscaled may be using a different DNS resolver (e.g. its own `100.100.100.100` stub resolver). Fix: pass `--accept-dns=false`.
---
### Headscale User ID Lookup (0.28.0)
Headscale 0.28.0 removed `--user <name>` in favour of `-u <numeric-id>`. To find the numeric ID for a user:
```bash
headscale users list -o json
# Output: [{"id": "1", "name": "sovran-deploy", ...}, ...]
# One-liner to get the ID for a specific user
headscale users list -o json | jq -r '.[] | select(.name=="sovran-deploy") | .id'
```
Then use the numeric ID in subsequent commands:
```bash
headscale preauthkeys create -u 1 -e 1h -o json
headscale nodes register -u 1 --key mkey:xxxx
``` ```
--- ---

View File

@@ -0,0 +1,423 @@
{ config, lib, pkgs, ... }:
# ── sovran-provisioner.nix ────────────────────────────────────────────────────
# NixOS module for the Sovran Systems VPS provisioning server.
#
# Deploys:
# - Headscale (coordination server, listening on 127.0.0.1:8080)
# - Python Flask provisioning API (port 9090)
# - Caddy reverse proxy (80/443 with automatic TLS)
# - Bootstrap service (creates Headscale users + enrollment token on first boot)
#
# Headscale 0.28.0 compatible — uses numeric user IDs (-u <id>) throughout.
# ─────────────────────────────────────────────────────────────────────────────
let
cfg = config.sovranProvisioner;
# ── Python Flask provisioner script ────────────────────────────────────────
provisionerScript = pkgs.writeText "sovran-provisioner.py" ''
#!/usr/bin/env python3
"""
Sovran Systems provisioning API Headscale 0.28.0 compatible.
Endpoints:
POST /register register a new machine and return a Headscale pre-auth key
GET /machines list registered machines (requires Bearer token)
GET /health liveness check
"""
import json
import os
import subprocess
import time
from collections import defaultdict
from functools import wraps
from pathlib import Path
from flask import Flask, request, jsonify, abort
app = Flask(__name__)
# Configuration
DATA_DIR = Path(os.environ.get("PROVISIONER_DATA_DIR", "/var/lib/sovran-provisioner"))
TOKEN_FILE = DATA_DIR / "enroll-token"
MACHINES_FILE = DATA_DIR / "machines.json"
HEADSCALE_USER = os.environ.get("HEADSCALE_USER", "sovran-deploy")
KEY_EXPIRY = os.environ.get("KEY_EXPIRY", "1h")
RATE_LIMIT_MAX = int(os.environ.get("RATE_LIMIT_MAX", "10"))
RATE_LIMIT_WIN = int(os.environ.get("RATE_LIMIT_WINDOW", "60"))
# Simple in-memory rate limiter
_rate_buckets: dict = defaultdict(list)
def _rate_limit_check(key: str) -> bool:
"""Return True if the request is allowed, False if rate-limited."""
now = time.monotonic()
bucket = _rate_buckets[key]
# Purge entries outside the window
_rate_buckets[key] = [t for t in bucket if now - t < RATE_LIMIT_WIN]
if len(_rate_buckets[key]) >= RATE_LIMIT_MAX:
return False
_rate_buckets[key].append(now)
return True
# Helper: read enrollment token
def _get_token() -> str:
try:
return TOKEN_FILE.read_text().strip()
except FileNotFoundError:
return ""
# Helper: require Bearer token
def require_token(f):
@wraps(f)
def decorated(*args, **kwargs):
auth = request.headers.get("Authorization", "")
if not auth.startswith("Bearer "):
abort(401)
token = auth[len("Bearer "):].strip()
expected = _get_token()
if not expected or token != expected:
abort(401)
return f(*args, **kwargs)
return decorated
# Helper: persist machine record
def _save_machine(hostname: str, mac: str, tailscale_ip: str = ""):
machines = _load_machines()
machines[mac] = {
"hostname": hostname,
"mac": mac,
"registered_at": time.time(),
"tailscale_ip": tailscale_ip,
}
MACHINES_FILE.write_text(json.dumps(machines, indent=2))
def _load_machines() -> dict:
try:
return json.loads(MACHINES_FILE.read_text())
except (FileNotFoundError, json.JSONDecodeError):
return {}
# Headscale helpers (0.28.0 compatible)
def get_user_id(username: str):
"""Look up numeric user ID from username for Headscale 0.28.0."""
result = subprocess.run(
["headscale", "users", "list", "-o", "json"],
capture_output=True, text=True
)
if result.returncode != 0:
app.logger.error("headscale users list failed: %s", result.stderr)
return None
try:
users = json.loads(result.stdout)
except json.JSONDecodeError:
app.logger.error("headscale users list returned invalid JSON: %s", result.stdout)
return None
for user in users:
if user.get("name") == username:
return user.get("id")
return None
def create_preauthkey(user_id, expiry: str = "1h") -> str | None:
"""Create a pre-auth key using the numeric user ID (Headscale 0.28.0)."""
result = subprocess.run(
["headscale", "preauthkeys", "create",
"-u", str(user_id),
"-e", expiry,
"-o", "json"],
capture_output=True, text=True
)
if result.returncode != 0:
app.logger.error("headscale preauthkeys create failed: %s", result.stderr)
return None
try:
key_data = json.loads(result.stdout)
except json.JSONDecodeError:
app.logger.error("preauthkeys create returned invalid JSON: %s", result.stdout)
return None
return key_data.get("key")
# Routes
@app.route("/health")
def health():
return jsonify({"status": "ok"})
@app.route("/register", methods=["POST"])
@require_token
def register():
# Rate-limit by source IP
client_ip = request.remote_addr or "unknown"
if not _rate_limit_check(client_ip):
return jsonify({"error": "rate limit exceeded"}), 429
data = request.get_json(silent=True)
if not data:
return jsonify({"error": "JSON body required"}), 400
hostname = data.get("hostname", "").strip()
mac = data.get("mac", "").strip()
if not hostname or not mac:
return jsonify({"error": "hostname and mac are required"}), 400
# Look up the numeric user ID (Headscale 0.28.0 requires -u <id>)
user_id = get_user_id(HEADSCALE_USER)
if user_id is None:
app.logger.error("Headscale user '%s' not found", HEADSCALE_USER)
return jsonify({"error": "provisioning user not found on Headscale server"}), 500
# Create a single-use pre-auth key
key = create_preauthkey(user_id, expiry=KEY_EXPIRY)
if key is None:
return jsonify({"error": "failed to create pre-auth key"}), 500
# Persist the registration record
_save_machine(hostname, mac)
login_server = os.environ.get("HEADSCALE_URL", "")
return jsonify({
"headscale_key": key,
"login_server": login_server,
"hostname": hostname,
})
@app.route("/machines")
@require_token
def machines():
return jsonify(list(_load_machines().values()))
# Entry point
if __name__ == "__main__":
DATA_DIR.mkdir(parents=True, exist_ok=True)
app.run(host="127.0.0.1", port=9090)
'';
# ── Headscale YAML config ──────────────────────────────────────────────────
headscaleConfig = pkgs.writeText "headscale.yaml" ''
server_url: https://${cfg.headscaleDomain}
listen_addr: 127.0.0.1:8080
metrics_listen_addr: 127.0.0.1:9090
# Logging
log:
level: info
# Database
database:
type: sqlite
sqlite:
path: /var/lib/headscale/db.sqlite
# DERP (relay/STUN)
derp:
server:
enabled: false
urls:
- https://controlplane.tailscale.com/derpmap/default
auto_update_enabled: true
update_frequency: 24h
# Disable magic DNS by default (clients opt in)
dns:
magic_dns: false
base_domain: sovran.internal
# Node expiry
node_update_check_interval: 10s
'';
in
{
# ── Module options ─────────────────────────────────────────────────────────
options.sovranProvisioner = {
enable = lib.mkEnableOption "Sovran Systems provisioning server (Headscale + Flask API + Caddy)";
domain = lib.mkOption {
type = lib.types.str;
description = "Public FQDN for the provisioning API (e.g. prov.yourdomain.com)";
};
headscaleDomain = lib.mkOption {
type = lib.types.str;
description = "Public FQDN for the Headscale coordination server (e.g. hs.yourdomain.com)";
};
headscaleUser = lib.mkOption {
type = lib.types.str;
default = "sovran-deploy";
description = "Headscale user namespace for deployed machines";
};
adminUser = lib.mkOption {
type = lib.types.str;
default = "admin";
description = "Headscale user namespace for admin workstations";
};
keyExpiry = lib.mkOption {
type = lib.types.str;
default = "1h";
description = "Lifetime of generated pre-auth keys (e.g. 1h, 2h, 24h)";
};
rateLimitMax = lib.mkOption {
type = lib.types.int;
default = 10;
description = "Maximum number of /register calls per rateLimitWindow seconds per IP";
};
rateLimitWindow = lib.mkOption {
type = lib.types.int;
default = 60;
description = "Rate-limit sliding window in seconds";
};
};
# ── Module implementation ──────────────────────────────────────────────────
config = lib.mkIf cfg.enable {
# ── Headscale ─────────────────────────────────────────────────────────────
services.headscale = {
enable = true;
address = "127.0.0.1";
port = 8080;
settings = {
server_url = "https://${cfg.headscaleDomain}";
listen_addr = "127.0.0.1:8080";
database = {
type = "sqlite";
sqlite = { path = "/var/lib/headscale/db.sqlite"; };
};
dns = {
magic_dns = false;
base_domain = "sovran.internal";
};
derp = {
server.enabled = false;
urls = [ "https://controlplane.tailscale.com/derpmap/default" ];
auto_update_enabled = true;
update_frequency = "24h";
};
log.level = "info";
};
};
# ── Python / Flask dependencies ────────────────────────────────────────────
environment.systemPackages = [
pkgs.headscale
(pkgs.python3.withPackages (ps: [ ps.flask ]))
];
# ── Provisioner systemd service ────────────────────────────────────────────
systemd.services.sovran-provisioner = {
description = "Sovran provisioning API";
after = [ "network-online.target" "headscale.service" ];
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
environment = {
PROVISIONER_DATA_DIR = "/var/lib/sovran-provisioner";
HEADSCALE_USER = cfg.headscaleUser;
KEY_EXPIRY = cfg.keyExpiry;
RATE_LIMIT_MAX = toString cfg.rateLimitMax;
RATE_LIMIT_WINDOW = toString cfg.rateLimitWindow;
HEADSCALE_URL = "https://${cfg.headscaleDomain}";
};
serviceConfig = {
Type = "simple";
Restart = "on-failure";
RestartSec = "5s";
DynamicUser = false;
User = "sovran-provisioner";
Group = "sovran-provisioner";
StateDirectory = "sovran-provisioner";
RuntimeDirectory = "sovran-provisioner";
ExecStart = "${pkgs.python3.withPackages (ps: [ ps.flask ])}/bin/python3 ${provisionerScript}";
};
};
# ── Dedicated system user for the provisioner ──────────────────────────────
users.users.sovran-provisioner = {
isSystemUser = true;
group = "sovran-provisioner";
description = "Sovran provisioning API service user";
};
users.groups.sovran-provisioner = {};
# Allow the provisioner user to call headscale CLI
security.sudo.extraRules = [{
users = [ "sovran-provisioner" ];
commands = [{
command = "${pkgs.headscale}/bin/headscale";
options = [ "NOPASSWD" ];
}];
}];
# ── Bootstrap service (first-boot: create Headscale users + enroll token) ──
systemd.services.sovran-provisioner-bootstrap = {
description = "Bootstrap Headscale users and enrollment token";
after = [ "headscale.service" ];
wants = [ "headscale.service" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
StateDirectory = "sovran-provisioner";
};
path = [ pkgs.headscale pkgs.coreutils pkgs.openssl ];
script = ''
DATA_DIR="/var/lib/sovran-provisioner"
TOKEN_FILE="$DATA_DIR/enroll-token"
STAMP="$DATA_DIR/.bootstrap-done"
# Idempotent only run once
[ -f "$STAMP" ] && exit 0
# Wait for headscale socket to be ready
for i in $(seq 1 30); do
headscale users list -o json >/dev/null 2>&1 && break
sleep 2
done
# Create headscale users if they don't exist
headscale users list -o json | grep -q '"name":"${cfg.headscaleUser}"' \
|| headscale users create ${cfg.headscaleUser}
headscale users list -o json | grep -q '"name":"${cfg.adminUser}"' \
|| headscale users create ${cfg.adminUser}
# Generate enrollment token if not already present
if [ ! -f "$TOKEN_FILE" ] || [ ! -s "$TOKEN_FILE" ]; then
openssl rand -hex 32 > "$TOKEN_FILE"
chmod 600 "$TOKEN_FILE"
fi
touch "$STAMP"
echo "Bootstrap complete."
'';
};
# ── Caddy reverse proxy ────────────────────────────────────────────────────
services.caddy = {
enable = true;
virtualHosts."${cfg.headscaleDomain}" = {
extraConfig = ''
reverse_proxy 127.0.0.1:8080
'';
};
virtualHosts."${cfg.domain}" = {
extraConfig = ''
reverse_proxy 127.0.0.1:9090
'';
};
};
# ── Firewall ────────────────────────────────────────────────────────────────
networking.firewall = {
allowedTCPPorts = [ 80 443 ];
allowedUDPPorts = [ 3478 ];
};
};
}