#!/bin/sh
# dbgaze-agent installer (Faz 2, B-2 minimal).
#
# Run as root on a Linux host. The dbgaze backend renders the invocation:
#   curl -fsSL $BACKEND/install.sh | DBGAZE_BACKEND_URL=... DBGAZE_AGENT_TOKEN=... sudo -E sh
#
# It lays down a dedicated unprivileged user, the /opt/dbgaze layout, a systemd
# unit + root-only EnvironmentFile, and starts the agent. The agent self-derives
# its machine-bound key + X25519 keypair on first start (it writes agent.key /
# agent.privkey at 0600 itself), so this script only prepares the directories.
#
# SCOPE (Faz 2, visible tradeoff): minimal install. .deb/.rpm packaging, signed
# binaries + checksums, an uninstall path, and CDN distribution are Faz 6/7.
# Binary delivery: if $DBGAZE_AGENT_BIN points at a present binary (or one is
# already staged at the install path) it is used as-is; otherwise the script
# downloads $DBGAZE_BACKEND_URL/dl/dbgaze-agent-linux-<arch> (the backend serves
# /dl with an arch whitelist) — this is the real onboarding path the UI's
# install command runs, verified end-to-end by tests/agent_install_e2e.sh.
set -eu

# ---- configuration (env, with defaults) -------------------------------------
DBGAZE_USER="${DBGAZE_USER:-dbgaze}"
DBGAZE_HOME="${DBGAZE_HOME:-/opt/dbgaze}"
DBGAZE_BIN_DIR="$DBGAZE_HOME/bin"
DBGAZE_ETC_DIR="$DBGAZE_HOME/etc"
DBGAZE_BIN="$DBGAZE_BIN_DIR/dbgaze-agent"
DBGAZE_ENV_FILE="$DBGAZE_ETC_DIR/agent.env"
DBGAZE_UNIT="/etc/systemd/system/dbgaze-agent.service"
DBGAZE_BACKEND_URL="${DBGAZE_BACKEND_URL:-}"
DBGAZE_AGENT_TOKEN="${DBGAZE_AGENT_TOKEN:-}"
# H9: the install one-liner now passes a short-lived single-use ENROLLMENT CODE
# instead of the raw token, so the long-lived token never lands in shell history /
# argv. The code is exchanged for the real token over TLS below (POST
# /v1/agent/enroll-token). Passing DBGAZE_AGENT_TOKEN directly still works.
DBGAZE_ENROLL_CODE="${DBGAZE_ENROLL_CODE:-}"
DBGAZE_AGENT_ID="${DBGAZE_AGENT_ID:-}"
# DBGAZE_AGENT_BIN: optional path to a pre-staged binary (used by the container
# E2E and any out-of-band delivery). DBGAZE_NO_SYSTEMD=1 skips unit/start (for
# environments without a running systemd, e.g. plain container smoke).
DBGAZE_AGENT_BIN="${DBGAZE_AGENT_BIN:-}"
DBGAZE_NO_SYSTEMD="${DBGAZE_NO_SYSTEMD:-0}"
# DBGAZE_UPGRADE=1 upgrades an EXISTING install in place: it force-downloads the
# latest binary and restarts the service, reusing the existing env/keys/token. No
# token is required (a plain re-run otherwise REUSES the staged binary and does not
# restart a running service, so it would never pick up a new version).
DBGAZE_UPGRADE="${DBGAZE_UPGRADE:-}"

log() { echo "[dbgaze-install] $*"; }
die() { echo "[dbgaze-install] ERROR: $*" >&2; exit 1; }

# ---- preconditions ----------------------------------------------------------
[ "$(id -u)" = "0" ] || die "must run as root"
if [ -n "$DBGAZE_UPGRADE" ]; then
  # Upgrade an existing install: token not needed (reuse the stored one), and the
  # backend URL is read from the existing env file when not supplied.
  [ -f "$DBGAZE_ENV_FILE" ] || die "DBGAZE_UPGRADE set but no existing install at $DBGAZE_ENV_FILE — run a full install first"
  if [ -z "$DBGAZE_BACKEND_URL" ]; then
    DBGAZE_BACKEND_URL="$(grep '^DBGAZE_BACKEND_URL=' "$DBGAZE_ENV_FILE" | cut -d= -f2-)"
  fi
  [ -n "$DBGAZE_BACKEND_URL" ] || die "could not determine DBGAZE_BACKEND_URL (not in env, none supplied)"
else
  [ -n "$DBGAZE_BACKEND_URL" ] || die "DBGAZE_BACKEND_URL is required"
  # H9: exchange a short-lived enrollment code for the real agent token over TLS
  # (so the token is never in argv/shell-history). Only when a code was supplied
  # and no token was passed directly. Response is {"agent_token":"..."}.
  if [ -z "$DBGAZE_AGENT_TOKEN" ] && [ -n "$DBGAZE_ENROLL_CODE" ]; then
    log "exchanging enrollment code for agent token"
    # No -f: a 422 (bad/expired code) should fall through to the empty-token check
    # below for a precise "rejected" message; -f would mask it as a curl failure.
    # A genuine network/DNS error still makes curl exit non-zero → die here.
    _resp="$(curl -sS -X POST "$DBGAZE_BACKEND_URL/v1/agent/enroll-token" \
      -H 'Content-Type: application/json' \
      -d "{\"code\":\"$DBGAZE_ENROLL_CODE\"}")" \
      || die "enrollment code exchange failed (network or backend error)"
    DBGAZE_AGENT_TOKEN="$(printf '%s' "$_resp" | sed -n 's/.*"agent_token"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p')"
    [ -n "$DBGAZE_AGENT_TOKEN" ] || die "enrollment code rejected (invalid, expired, or already used)"
  fi
  [ -n "$DBGAZE_AGENT_TOKEN" ] || die "DBGAZE_AGENT_TOKEN or DBGAZE_ENROLL_CODE is required"
fi

# The agent binds its at-rest key to /etc/machine-id (HKDF). A host provisioned
# by systemd always has one; a bare container image may not. Seed it so the
# key derivation is stable instead of failing fast on an empty id.
if [ ! -s /etc/machine-id ]; then
  if command -v systemd-machine-id-setup >/dev/null 2>&1; then
    systemd-machine-id-setup >/dev/null 2>&1 || true
  fi
  if [ ! -s /etc/machine-id ]; then
    # Last resort: a stable random id (32 hex chars), same format systemd writes.
    od -An -tx1 -N16 /dev/urandom | tr -d ' \n' > /etc/machine-id
  fi
  log "seeded /etc/machine-id"
fi

# ---- user + directories -----------------------------------------------------
if ! id "$DBGAZE_USER" >/dev/null 2>&1; then
  # System account, no login shell, no home beyond $DBGAZE_HOME (least privilege).
  useradd --system --no-create-home --home-dir "$DBGAZE_HOME" --shell /usr/sbin/nologin "$DBGAZE_USER" 2>/dev/null \
    || adduser --system --no-create-home --home "$DBGAZE_HOME" --shell /usr/sbin/nologin "$DBGAZE_USER"
  log "created system user $DBGAZE_USER"
fi

mkdir -p "$DBGAZE_BIN_DIR" "$DBGAZE_ETC_DIR"
# etc holds secrets (agent.key salt, encrypted privkey, env file): owner-only.
chown -R "$DBGAZE_USER":"$DBGAZE_USER" "$DBGAZE_HOME"
chmod 0755 "$DBGAZE_HOME" "$DBGAZE_BIN_DIR"
chmod 0700 "$DBGAZE_ETC_DIR"

# ---- binary -----------------------------------------------------------------
if [ -n "$DBGAZE_AGENT_BIN" ] && [ -f "$DBGAZE_AGENT_BIN" ]; then
  install -m 0755 -o "$DBGAZE_USER" -g "$DBGAZE_USER" "$DBGAZE_AGENT_BIN" "$DBGAZE_BIN"
  log "installed staged binary from $DBGAZE_AGENT_BIN"
elif [ -x "$DBGAZE_BIN" ] && [ -z "$DBGAZE_UPGRADE" ]; then
  # Reuse keeps a plain re-run idempotent (reconnect without re-download). An
  # upgrade (DBGAZE_UPGRADE=1) deliberately skips this so the latest is fetched.
  log "using already-staged binary at $DBGAZE_BIN"
else
  # Map the host machine arch to the artifact name the backend serves
  # (/dl/dbgaze-agent-linux-<arch>, whitelisted server-side). uname -m reports the
  # kernel arch; translate the common aliases to Go's GOARCH naming.
  case "$(uname -m)" in
    x86_64 | amd64) arch=amd64 ;;
    aarch64 | arm64) arch=arm64 ;;
    *) die "unsupported architecture $(uname -m) (supported: amd64, arm64)" ;;
  esac
  url="$DBGAZE_BACKEND_URL/dl/dbgaze-agent-linux-$arch"
  log "downloading agent binary from $url"
  # Download to a temp path then rename over the target: replacing a RUNNING
  # executable in place fails with ETXTBSY, but rename swaps the directory entry
  # atomically (the live process keeps its old inode until the restart below).
  tmpbin="$DBGAZE_BIN.new"
  curl -fsSL "$url" -o "$tmpbin" \
    || die "binary download failed (set DBGAZE_AGENT_BIN to a staged binary, or check the backend serves $url)"

  # ---- integrity: SHA256 checksum verification (sec-audit 2026-06-17, CWE-494) ----
  # The backend serves a "<artifact>.sha256" sidecar (single hex digest, optionally
  # "<digest>  <name>" sha256sum format). We verify the download against it BEFORE
  # installing so an in-transit truncation/corruption never lands as the live binary.
  # NOTE (deferral): this is a SAME-ORIGIN checksum — it does NOT defend against a
  # compromised backend (the attacker would serve a matching digest). Out-of-band
  # signature verification (cosign/minisign with a key shipped in this script) is the
  # real origin-trust control and is deferred to Faz 7 (needs key infrastructure;
  # tracked in DEFERRALS). The sidecar fetch is best-effort: if the backend does not
  # serve one yet (404), we warn loudly and proceed so current onboarding keeps working;
  # set DBGAZE_REQUIRE_CHECKSUM=1 to make a missing/failed checksum FATAL.
  sumfile="$tmpbin.sha256"
  if curl -fsSL "$url.sha256" -o "$sumfile" 2>/dev/null && [ -s "$sumfile" ]; then
    want="$(tr -d '\r' < "$sumfile" | awk '{print $1; exit}' | tr 'A-F' 'a-f')"
    if printf '%s' "$want" | grep -Eq '^[0-9a-f]{64}$'; then
      if command -v sha256sum >/dev/null 2>&1; then
        got="$(sha256sum "$tmpbin" | awk '{print $1}')"
      else
        got="$(shasum -a 256 "$tmpbin" | awk '{print $1}')" # macOS / BSD fallback
      fi
      if [ "$got" != "$want" ]; then
        rm -f "$tmpbin" "$sumfile"
        die "checksum MISMATCH for $url (expected $want, got $got) — refusing to install"
      fi
      log "checksum verified (sha256 $got)"
    else
      rm -f "$sumfile"
      if [ -n "${DBGAZE_REQUIRE_CHECKSUM:-}" ]; then
        rm -f "$tmpbin"; die "malformed checksum sidecar at $url.sha256 and DBGAZE_REQUIRE_CHECKSUM is set"
      fi
      log "WARNING: malformed checksum sidecar at $url.sha256 — proceeding unverified"
    fi
    rm -f "$sumfile"
  else
    rm -f "$sumfile"
    if [ -n "${DBGAZE_REQUIRE_CHECKSUM:-}" ]; then
      rm -f "$tmpbin"; die "no checksum sidecar at $url.sha256 and DBGAZE_REQUIRE_CHECKSUM is set"
    fi
    log "WARNING: backend served no checksum at $url.sha256 — proceeding unverified (relying on HTTPS)"
  fi

  chown "$DBGAZE_USER":"$DBGAZE_USER" "$tmpbin"
  chmod 0755 "$tmpbin"
  mv -f "$tmpbin" "$DBGAZE_BIN"
fi
[ -x "$DBGAZE_BIN" ] || die "agent binary is not executable at $DBGAZE_BIN"

# ---- env file (root-only, J-2) ----------------------------------------------
# An upgrade reuses the existing env (token + backend url + keys stay put): never
# rewrite it, so an upgrade can be run token-free without clobbering the token.
if [ -z "$DBGAZE_UPGRADE" ]; then
  umask 077
  {
    echo "DBGAZE_BACKEND_URL=$DBGAZE_BACKEND_URL"
    echo "DBGAZE_AGENT_TOKEN=$DBGAZE_AGENT_TOKEN"
    [ -n "$DBGAZE_AGENT_ID" ] && echo "DBGAZE_AGENT_ID=$DBGAZE_AGENT_ID"
  } > "$DBGAZE_ENV_FILE"
  chown "$DBGAZE_USER":"$DBGAZE_USER" "$DBGAZE_ENV_FILE"
  chmod 0600 "$DBGAZE_ENV_FILE"
  log "wrote $DBGAZE_ENV_FILE (0600)"
else
  log "upgrade: keeping existing $DBGAZE_ENV_FILE (token/keys untouched)"
fi

# ---- systemd unit -----------------------------------------------------------
cat > "$DBGAZE_UNIT" <<EOF
[Unit]
Description=dbgaze monitoring agent
Documentation=https://dbgaze.io/docs/agent
After=network-online.target
Wants=network-online.target

[Service]
Type=simple
User=$DBGAZE_USER
Group=$DBGAZE_USER
EnvironmentFile=$DBGAZE_ENV_FILE
ExecStart=$DBGAZE_BIN
Restart=on-failure
RestartSec=5
# Hardening: the agent is outbound-only and needs no elevated capabilities.
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
PrivateTmp=true
ReadWritePaths=$DBGAZE_ETC_DIR

[Install]
WantedBy=multi-user.target
EOF
chmod 0644 "$DBGAZE_UNIT"
log "wrote $DBGAZE_UNIT"

# ---- start ------------------------------------------------------------------
if [ "$DBGAZE_NO_SYSTEMD" = "1" ]; then
  log "DBGAZE_NO_SYSTEMD=1 — unit written but not started (verify-only mode)"
  exit 0
fi
if command -v systemctl >/dev/null 2>&1 && [ -d /run/systemd/system ]; then
  systemctl daemon-reload
  systemctl enable dbgaze-agent.service >/dev/null 2>&1 || true
  # restart (not just `enable --now`): a plain start is a no-op when the service is
  # already running, so an upgrade/re-run must restart to load the new binary.
  systemctl restart dbgaze-agent.service
  log "dbgaze-agent.service enabled and (re)started"
else
  log "systemd not running here; unit installed. Start manually or via your init."
fi
