#!/bin/bash
# svcbeat — Hardened heartbeat sender
set -euo pipefail
shopt -s inherit_errexit 2>/dev/null || true
IFS=$'\n\t'

readonly NODE_ID="${NODE_ID:-}"
readonly ENGINE_IP="${ENGINE_IP:-10.69.69.6}"
readonly VAULT_KEY_FILE="${VAULT_KEY_FILE:-/dev/shm/vault.key}"
readonly HB_PORT="${HB_PORT:-8301}"
readonly HB_INTERVAL="${HB_INTERVAL:-30}"
readonly HB_STATE="/run/se/state/hb.pid"
readonly LOG_DIR="/run/se/logs"
readonly CONF="/run/se/.nodeconf"
readonly LOCK_FILE="/run/se/.svcbeat.lock"

# ── Validate node_id ───────────────────────────────────────────────────────
if [[ -z "${NODE_ID}" ]]; then
    printf 'ERR: NODE_ID not set\n' >&2
    exit 1
fi

# ── Lock ───────────────────────────────────────────────────────────────────
exec 300>"${LOCK_FILE}"
if ! flock -n 300; then
    printf 'ERR: svcbeat already running\n' >&2
    exit 1
fi

# ── Config loader ──────────────────────────────────────────────────────────
t001_loadconf() {
    if [[ -f "${CONF}" ]]; then
        # Validate config file permissions
        local perms
        perms="$(stat -c '%a' "${CONF}" 2>/dev/null || printf '%s' '000')"
        if [[ "${perms}" == "600" || "${perms}" == "400" ]]; then
            source "${CONF}"
        else
            t001_log "WARN" "config file has insecure permissions: ${perms}"
        fi
    fi
}

t001_hmac() {
    local p="${1:-}" k="${2:-}"
    printf '%s' "${p}" | openssl dgst -sha256 -hex -hmac "${k}" | awk '{print $NF}'
}

t001_sign() {
    # Returns: ts nonce sig (space-separated for caller parsing)
    local ts n body sig_input s
    ts="$(date +%s)"
    n="$(openssl rand -hex 16)"
    body="{\"node\":\"${NODE_ID}\",\"ts\":\"$(date -u +%Y-%m-%dT%H:%M:%SZ)\"}"
    sig_input="${ts}.${n}.POST./a/hb.${body}"
    if [[ -f "${VAULT_KEY_FILE}" ]]; then
        s="$(t001_hmac "${sig_input}" "$(cat "${VAULT_KEY_FILE}")")"
    else
        s='none'
    fi
    printf '%s %s %s %s\n' "${ts}" "${n}" "${s}" "${body}"
}

t001_send() {
    local signed ts nonce sig body
    signed="$(t001_sign)"
    ts="$(echo "${signed}" | awk '{print $1}')"
    nonce="$(echo "${signed}" | awk '{print $2}')"
    sig="$(echo "${signed}" | awk '{print $3}')"
    body="$(echo "${signed}" | cut -d' ' -f4-)"
    curl -sf --connect-timeout 3 --max-time 10 \
        -o /dev/null -X POST "http://${ENGINE_IP}:${HB_PORT}/a/hb" \
        -H 'Content-Type: application/json' \
        -H "x-engine-ts: ${ts}" \
        -H "x-engine-nonce: ${nonce}" \
        -H "x-engine-sig: ${sig}" \
        -H "x-engine-node: ${NODE_ID}" \
        -d "${body}" \
        2>/dev/null && printf '%s' 'ok' || printf '%s' 'fail'
}

t001_log() {
    local m="${1:-}"
    logger -t "hb-${NODE_ID}" "${m}" 2>/dev/null || true
    mkdir -p "${LOG_DIR}"
    printf '%s %s\n' "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "${m}" >> "${LOG_DIR}/hb.log"
}

t001_run() {
    t001_loadconf
    local r c
    r="$(t001_send)"
    if [[ "${r}" == 'ok' ]]; then
        t001_log "hb ok"
        printf '%s' '0' > "${HB_STATE}.miss"
    else
        t001_log "hb fail"
        c="$(cat "${HB_STATE}.miss" 2>/dev/null || printf '%s' '0')"
        printf '%s' "$((c+1))" > "${HB_STATE}.miss"
    fi
}

t001_start() {
    # Check if already running via lock file
    t001_loadconf
    mkdir -p "${LOG_DIR}" "$(dirname "${HB_STATE}")"
    printf '%s' '0' > "${HB_STATE}.miss"
    (
        while true; do
            t001_run
            sleep "${HB_INTERVAL}"
        done
    ) &
    printf '%s' "$!" > "${HB_STATE}"
    t001_log 'hb sender started'
}

t001_stop() {
    if [[ -f "${HB_STATE}" ]]; then
        local pid
        pid="$(cat "${HB_STATE}")"
        if kill -0 "${pid}" 2>/dev/null; then
            kill "${pid}" 2>/dev/null || true
            wait "${pid}" 2>/dev/null || true
        fi
        rm -f "${HB_STATE}"
    fi
    t001_log 'hb sender stopped'
}

t001_status() {
    if [[ -f "${HB_STATE}" ]]; then
        local pid
        pid="$(cat "${HB_STATE}")"
        if kill -0 "${pid}" 2>/dev/null; then
            printf '%s\n' 'running'
        else
            printf '%s\n' 'stopped (stale pid)'
        fi
    else
        printf '%s\n' 'stopped'
    fi
    cat "${HB_STATE}.miss" 2>/dev/null || printf '%s\n' '0'
}

t001_init() {
    mkdir -p "${LOG_DIR}" "$(dirname "${HB_STATE}")"
    printf '%s' '0' > "${HB_STATE}.miss"
    t001_log 'hb init'
}

# ── Cleanup ────────────────────────────────────────────────────────────────
cleanup_beat() {
    flock -u 300 2>/dev/null || true
}
trap cleanup_beat EXIT

# ── Main ───────────────────────────────────────────────────────────────────
case "${1:-}" in
    init)  t001_init ;;
    start) t001_start ;;
    stop)  t001_stop ;;
    status)t001_status ;;
    run)   t001_run ;;
    *)     printf 'usage: %s init|start|stop|status|run\n' "$0"; exit 1 ;;
esac
