#!/bin/bash
# svcrotate — Hardened WireGuard key rotator with atomic peer updates
set -euo pipefail
shopt -s inherit_errexit 2>/dev/null || true
IFS=$'\n\t'

readonly NODE_ID="${NODE_ID:-}"
readonly ENGINE_IP="${ENGINE_IP:-10.69.69.6}"
readonly VAULT_KEY_FILE="${VAULT_KEY_FILE:-/dev/shm/vault.key}"
readonly ROT_STATE="/run/se/state/rotkey"
readonly LOG_DIR="/run/se/logs"
readonly CONF="/run/se/.nodeconf"
readonly ROT_INTERVAL="${ROT_INTERVAL:-86400}"
readonly LOCK_FILE="/run/se/.svcrotate.lock"
readonly WG_IFACE="wg0"

if [[ -z "${NODE_ID}" ]]; then printf 'ERR: NODE_ID not set\n' >&2; exit 1; fi

exec 300>"${LOCK_FILE}"
if ! flock -n 300; then printf 'ERR: svcrotate already running\n' >&2; exit 1; fi

t004_loadconf() {
    if [[ -f "${CONF}" ]]; then
        local perms
        perms="$(stat -c '%a' "${CONF}" 2>/dev/null || printf '%s' '000')"
        [[ "${perms}" == "600" || "${perms}" == "400" ]] && source "${CONF}"
    fi
}

t004_log() {
    local m="${1:-}"
    logger -t "rotkey-${NODE_ID}" "${m}" 2>/dev/null || true
    mkdir -p "${LOG_DIR}"
    printf '%s %s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "${m}" >> "${LOG_DIR}/rotkey.log"
}

t004_report() {
    local ev="${1:-}"
    local ts n p s
    ts="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
    n="$(openssl rand -hex 16)"
    p="${ts}:${n}:${NODE_ID}"
    if [[ -f "${VAULT_KEY_FILE}" ]]; then
        s="$(printf '%s' "${p}" | openssl dgst -sha256 -hmac "$(cat "${VAULT_KEY_FILE}")" | awk '{print $2}')"
    else
        s='none'
    fi
    curl -sf --connect-timeout 3 --max-time 10 \
        -X POST "http://${ENGINE_IP}:8301/a/security/event" \
        -H 'Content-Type: application/json' \
        -d "{\"node\":\"${NODE_ID}\",\"event\":\"${ev}\",\"sig\":\"${ts}:${n}:${s}\"}" \
        2>/dev/null || true
}

# ── Key rotation with atomic peer preservation ─────────────────────────────
t004_rotate() {
    t004_loadconf
    local iface newpk newpub oldpub peers pfile
    iface="${WG_IFACE}"
    pfile="/run/se/wg.${iface}.conf"

    if ! ip link show "${iface}" &>/dev/null; then
        t004_log "iface ${iface} not found"
        return 1
    fi

    oldpub="$(wg show "${iface}" public-key 2>/dev/null || printf '%s' '')"
    newpk="$(wg genkey)"
    newpub="$(printf '%s' "${newpk}" | wg pubkey)"

    if [[ -n "${oldpub}" ]]; then
        # Save peer info BEFORE changing key
        peers="$(wg show "${iface}" peers 2>/dev/null || printf '%s' '')"
        local peer_data=""
        local p ep aip
        for p in ${peers}; do
            ep="$(wg show "${iface}" endpoints 2>/dev/null | grep "^${p}" | awk '{print $2}' || printf '%s' '')"
            aip="$(wg show "${iface}" allowed-ips 2>/dev/null | grep "^${p}" | awk '{print $2}' || printf '%s' '')"
            peer_data="${peer_data}${p}|${ep}|${aip}"
        done

        # Apply new key
        wg set "${iface}" private-key <(printf '%s' "${newpk}")

        # Restore peers one by one
        local IFS='|'
        while IFS='|' read -r p ep aip rest; do
            [[ -n "${p}" ]] || continue
            if [[ -n "${ep}" && -n "${aip}" ]]; then
                wg set "${iface}" peer "${p}" endpoint "${ep}" allowed-ips "${aip}" 2>/dev/null || true
            elif [[ -n "${aip}" ]]; then
                wg set "${iface}" peer "${p}" allowed-ips "${aip}" 2>/dev/null || true
            fi
        done <<< "${peer_data}"
    else
        wg set "${iface}" private-key <(printf '%s' "${newpk}")
    fi

    printf '%s\n' "${newpub}" > "${ROT_STATE}.pub"
    printf '%s' "$(date +%s)" > "${ROT_STATE}.last"
    t004_log "key rotated old=${oldpub} new=${newpub}"
    t004_report "key-rotated:${newpub}"
}

t004_check() {
    t004_loadconf
    local last now
    last="$(cat "${ROT_STATE}.last" 2>/dev/null || printf '%s' '0')"
    now="$(date +%s)"
    if [[ $((now - last)) -ge "${ROT_INTERVAL}" ]]; then
        t004_rotate
    fi
}

t004_init() {
    mkdir -p "${LOG_DIR}" "$(dirname "${ROT_STATE}")"
    printf '%s' "0" > "${ROT_STATE}.last"
    t004_log 'rotkey init'
}

t004_start() {
    (
        while true; do
            t004_check
            sleep 3600
        done
    ) &
    printf '%s' "$!" > "${ROT_STATE}.pid"
    t004_log 'rotkey started'
}

t004_stop() {
    if [[ -f "${ROT_STATE}.pid" ]]; then
        local pid
        pid="$(cat "${ROT_STATE}.pid")"
        if kill -0 "${pid}" 2>/dev/null; then
            kill "${pid}" 2>/dev/null || true
            wait "${pid}" 2>/dev/null || true
        fi
        rm -f "${ROT_STATE}.pid"
    fi
    t004_log 'rotkey stopped'
}

t004_status() {
    if [[ -f "${ROT_STATE}.pid" ]]; then
        local pid
        pid="$(cat "${ROT_STATE}.pid")"
        if kill -0 "${pid}" 2>/dev/null; then
            printf '%s\n' 'running'
        else
            printf '%s\n' 'stopped (stale pid)'
        fi
    else
        printf '%s\n' 'stopped'
    fi
    cat "${ROT_STATE}.pub" 2>/dev/null || printf '%s\n' 'none'
    cat "${ROT_STATE}.last" 2>/dev/null || printf '%s\n' '0'
}

t004_run() {
    t004_check
}

cleanup_rot() { flock -u 300 2>/dev/null || true; }
trap cleanup_rot EXIT

case "${1:-}" in
    init)   t004_init ;;
    start)  t004_start ;;
    stop)   t004_stop ;;
    status) t004_status ;;
    run)    t004_run ;;
    *)      printf 'usage: %s init|start|stop|status|run\n' "$0"; exit 1 ;;
esac
