#!/usr/bin/env bash # Cold-restore script for Port Nimara CRM. # # Two modes: # --drill Restore to a sandbox DB ($DRILL_DATABASE_URL) + a tagged # sandbox path on the live MinIO bucket. Used by the weekly # cron drill so the runbook stays accurate. # (no --drill) Interactive production restore. Prompts before each # destructive step; refuses to run if the live DB has # non-empty tables (caller is expected to drop first). # # Common args: # --snapshot YYYY-MM-DD/HH Specific dump to restore. Defaults to "latest". set -euo pipefail DRILL=0 SNAPSHOT="latest" while [[ $# -gt 0 ]]; do case "$1" in --drill) DRILL=1; shift ;; --snapshot) SNAPSHOT="$2"; shift 2 ;; *) echo "unknown arg: $1" >&2; exit 2 ;; esac done : "${BACKUP_S3_BUCKET:?BACKUP_S3_BUCKET not set}" : "${BACKUP_S3_ENDPOINT:?BACKUP_S3_ENDPOINT not set}" : "${BACKUP_S3_ACCESS_KEY:?BACKUP_S3_ACCESS_KEY not set}" : "${BACKUP_S3_SECRET_KEY:?BACKUP_S3_SECRET_KEY not set}" if [[ "$DRILL" -eq 1 ]]; then : "${DRILL_DATABASE_URL:?DRILL_DATABASE_URL not set}" TARGET_DB="$DRILL_DATABASE_URL" echo "[drill] target DB = $TARGET_DB" else : "${DATABASE_URL:?DATABASE_URL not set}" TARGET_DB="$DATABASE_URL" read -rp "About to overwrite $TARGET_DB. Type 'restore' to continue: " confirm [[ "$confirm" == "restore" ]] || { echo "aborted"; exit 1; } fi HOST="${BACKUP_HOST_OVERRIDE:-$(hostname -s)}" WORKDIR="$(mktemp -d)" trap 'rm -rf "$WORKDIR"' EXIT MC_ALIAS="bk-$$" mc alias set "$MC_ALIAS" "$BACKUP_S3_ENDPOINT" \ "$BACKUP_S3_ACCESS_KEY" "$BACKUP_S3_SECRET_KEY" --api S3v4 >/dev/null trap 'rm -rf "$WORKDIR"; mc alias remove "$MC_ALIAS" 2>/dev/null || true' EXIT # Resolve the snapshot path. if [[ "$SNAPSHOT" == "latest" ]]; then REMOTE=$(mc ls --recursive "${MC_ALIAS}/${BACKUP_S3_BUCKET}/pg/${HOST}/" \ | awk '{print $NF}' | sort | tail -1) if [[ -z "$REMOTE" ]]; then echo "no snapshots found under ${BACKUP_S3_BUCKET}/pg/${HOST}/" >&2 exit 1 fi REMOTE="${MC_ALIAS}/${BACKUP_S3_BUCKET}/pg/${HOST}/${REMOTE}" else REMOTE="${MC_ALIAS}/${BACKUP_S3_BUCKET}/pg/${HOST}/${SNAPSHOT}.dump.gz" # If GPG was used, the file lives at .dump.gz.gpg. Try both. if ! mc stat "$REMOTE" >/dev/null 2>&1; then REMOTE="${REMOTE}.gpg" fi fi echo "[$(date -u +%FT%TZ)] Pulling $REMOTE" LOCAL="$WORKDIR/$(basename "$REMOTE")" mc cp --quiet "$REMOTE" "$LOCAL" # Decrypt if needed. if [[ "$LOCAL" == *.gpg ]]; then echo "[$(date -u +%FT%TZ)] Decrypting" gpg --batch --yes --decrypt --output "${LOCAL%.gpg}" "$LOCAL" rm "$LOCAL" LOCAL="${LOCAL%.gpg}" fi # Decompress. gunzip "$LOCAL" LOCAL="${LOCAL%.gz}" echo "[$(date -u +%FT%TZ)] Restoring into $TARGET_DB" # Drop & recreate to guarantee no half-state from a prior run. DB_NAME=$(echo "$TARGET_DB" | sed -E 's|.*/([^?]+).*|\1|') ADMIN_URL=$(echo "$TARGET_DB" | sed -E "s|/${DB_NAME}|/postgres|") psql "$ADMIN_URL" -v ON_ERROR_STOP=1 < pg_backend_pid(); DROP DATABASE IF EXISTS "${DB_NAME}"; CREATE DATABASE "${DB_NAME}"; SQL pg_restore --no-owner --no-privileges --dbname "$TARGET_DB" "$LOCAL" # Drill mode: compare row counts vs the live producer for parity. if [[ "$DRILL" -eq 1 ]]; then echo "[$(date -u +%FT%TZ)] Drill row-count diff (live vs restored):" TABLES=$(psql -At "$TARGET_DB" -c \ "SELECT tablename FROM pg_tables WHERE schemaname='public' ORDER BY tablename;") diff_count=0 while IFS= read -r tbl; do [[ -z "$tbl" ]] && continue live=$(psql -At "${LIVE_DATABASE_URL:-$DATABASE_URL}" -c "SELECT count(*) FROM \"$tbl\";") restored=$(psql -At "$TARGET_DB" -c "SELECT count(*) FROM \"$tbl\";") delta=$((live - restored)) if [[ "$delta" -ne 0 ]]; then echo " ⚠ $tbl: live=$live restored=$restored delta=$delta" diff_count=$((diff_count + 1)) fi done <<< "$TABLES" if [[ "$diff_count" -eq 0 ]]; then echo " ✓ row counts match across all tables" fi fi echo "[$(date -u +%FT%TZ)] Restore complete."