#!/bin/bash
# test-pxe-deploy.sh — Validate PXE imaging pipeline after WIM deploy
# Run this after every WIM deploy to catch issues before a device tries to boot

set -e
PASS=0
FAIL=0
WARN=0

pass() { echo "  ✓ $1"; PASS=$((PASS+1)); }
fail() { echo "  ✗ $1"; FAIL=$((FAIL+1)); }
warn() { echo "  ! $1"; WARN=$((WARN+1)); }

echo "=== PXE Deploy Validation ==="
echo ""

# 1. Verify boot.wim exists and is valid
echo "[1/8] Boot WIM integrity"
WIM_SIZE=$(ssh beast 'stat -f%z /usr/local/var/tftpboot/Boot/boot.wim 2>/dev/null || echo 0')
if [ "$WIM_SIZE" -gt 300000000 ]; then
    pass "boot.wim exists ($(echo "$WIM_SIZE/1048576" | bc)MB)"
else
    fail "boot.wim missing or too small ($WIM_SIZE bytes)"
fi

# Verify WIM structure locally
if wimlib-imagex info /tmp/boot-pxe-new.wim >/dev/null 2>&1; then
    pass "WIM structure valid"
else
    fail "WIM structure invalid"
fi

# Verify deploy scripts inside WIM
for f in deploy.bat extract-hash.ps1 run-extract.cmd diskpart.txt; do
    if wimlib-imagex dir /tmp/boot-pxe-new.wim 1 --path="/deploy/$f" >/dev/null 2>&1; then
        pass "WIM contains /deploy/$f"
    else
        fail "WIM missing /deploy/$f"
    fi
done

# 2. Verify TFTP service
echo ""
echo "[2/8] TFTP service"
if ssh beast 'pgrep -q dnsmasq'; then
    pass "dnsmasq running"
else
    fail "dnsmasq NOT running"
fi

# 3. Verify boot chain files on TFTP
echo ""
echo "[3/8] TFTP boot chain"
for f in EFI/Microsoft/Boot/bootmgfw.efi Boot/BCD Boot/boot.sdi Boot/boot.wim; do
    if ssh beast "test -f /usr/local/var/tftpboot/$f"; then
        pass "TFTP has $f"
    else
        fail "TFTP missing $f"
    fi
done

# 4. Flush PF states and verify
echo ""
echo "[4/8] PF state flush"
KILLED=$(ssh beast 'sudo pfctl -k 10.10.0.0/24 2>&1 | grep killed | grep -o "[0-9]* states" || echo "0 states"')
pass "Flushed $KILLED for imaging VLAN"

# 5. Deploy image validation (XPRESS WIM on Beast)
echo ""
echo "[5/8] Deploy image validation"
DEPLOY_IMG="/usr/local/var/deploy/images/win11-dell-3450-homewood.wim"
DEPLOY_LZX="/usr/local/var/deploy/images/win11-dell-3450-homewood-lzx.wim"

# Check active deploy image exists and size
IMG_SIZE=$(ssh beast "stat -f%z $DEPLOY_IMG 2>/dev/null || echo 0")
if [ "$IMG_SIZE" -gt 3000000000 ]; then
    pass "Deploy image exists ($(echo "$IMG_SIZE/1073741824" | bc)GB)"
else
    fail "Deploy image missing or too small ($IMG_SIZE bytes)"
fi
if [ "$IMG_SIZE" -gt 30000000000 ]; then
    warn "Deploy image unusually large ($(echo "$IMG_SIZE/1073741824" | bc)GB) — verify compression"
fi

# Validate WIM structure and compression type
# Try wimlib-imagex on Beast first, fall back to local copy at /tmp/win11-xpress.wim
WIM_INFO=$(ssh beast "wimlib-imagex info $DEPLOY_IMG 2>&1" || true)
if ! echo "$WIM_INFO" | grep -qi 'image count'; then
    # wimlib not on Beast — validate local copy and verify sizes match
    LOCAL_WIM="/tmp/win11-xpress.wim"
    if [ -f "$LOCAL_WIM" ]; then
        LOCAL_SIZE=$(stat -f%z "$LOCAL_WIM" 2>/dev/null || echo 0)
        if [ "$LOCAL_SIZE" = "$IMG_SIZE" ]; then
            pass "Local copy matches Beast (both $IMG_SIZE bytes)"
            WIM_INFO=$(wimlib-imagex info "$LOCAL_WIM" 2>&1 || true)
        else
            warn "Local copy size mismatch (local=$LOCAL_SIZE, beast=$IMG_SIZE) — skipping WIM validation"
        fi
    else
        warn "wimlib-imagex not on Beast and no local copy — skipping WIM validation"
    fi
fi
if echo "$WIM_INFO" | grep -qi 'image count'; then
    pass "Deploy WIM structure valid"
else
    fail "Deploy WIM structure invalid or wimlib-imagex not available on Beast"
fi

# Check compression type
COMPRESS_TYPE=$(echo "$WIM_INFO" | grep -i 'compression' | head -1 | awk '{print $NF}')
if echo "$COMPRESS_TYPE" | grep -qi 'xpress'; then
    pass "Compression type: XPRESS (expected)"
elif echo "$COMPRESS_TYPE" | grep -qi 'lzx'; then
    warn "Compression type: LZX (expected XPRESS — recompression may not be deployed yet)"
else
    warn "Compression type: $COMPRESS_TYPE (unexpected)"
fi

# Check image count
IMG_COUNT=$(echo "$WIM_INFO" | grep -i 'image count' | awk '{print $NF}')
if [ "$IMG_COUNT" = "1" ]; then
    pass "Image count: 1"
else
    fail "Image count: $IMG_COUNT (expected 1)"
fi

# Report file count and total bytes
FILE_COUNT=$(echo "$WIM_INFO" | grep -i 'file count' | awk '{print $NF}')
TOTAL_BYTES=$(echo "$WIM_INFO" | grep -i 'total bytes' | awk '{print $NF}')
if [ -n "$FILE_COUNT" ]; then
    pass "Image file count: $FILE_COUNT"
fi
if [ -n "$TOTAL_BYTES" ]; then
    pass "Image total bytes: $TOTAL_BYTES"
fi

# Check LZX backup exists
LZX_SIZE=$(ssh beast "stat -f%z $DEPLOY_LZX 2>/dev/null || echo 0")
if [ "$LZX_SIZE" -gt 3000000000 ]; then
    pass "LZX backup exists ($(echo "$LZX_SIZE/1073741824" | bc)GB)"
else
    warn "LZX backup not found — no rollback available"
fi

# 6. Beast deploy share (SMB)
echo ""
echo "[6/8] Beast deploy share (SMB)"

# Verify smbd is running
if ssh beast 'pgrep -q smbd'; then
    pass "smbd running on Beast"
else
    fail "smbd NOT running on Beast"
fi

# Verify deploy share is configured and shared via SMB
SHARE_INFO=$(ssh beast 'sharing -l 2>/dev/null' || true)
if echo "$SHARE_INFO" | grep -A10 'name:.*deploy' | grep -q 'shared:.*1'; then
    pass "Deploy share configured in macOS sharing"
else
    fail "Deploy share NOT configured in macOS sharing"
fi
if echo "$SHARE_INFO" | grep -A10 'name:.*deploy' | grep -q 'guest access:.*1'; then
    pass "Deploy share has guest access enabled"
else
    warn "Deploy share guest access not confirmed"
fi

# Verify deploy image accessible on filesystem
if ssh beast "test -f $DEPLOY_IMG"; then
    pass "Deploy image accessible on Beast filesystem"
else
    fail "Deploy image NOT found on Beast"
fi

# Verify PF rule allows SMB from imaging VLANs
PF_SMB=$(ssh beast 'sudo pfctl -sr 2>/dev/null | grep "port = 445"' || true)
if [ -n "$PF_SMB" ]; then
    pass "PF rule allows SMB (port 445) from imaging VLANs"
else
    warn "No PF rule found for port 445 — imaging clients may not reach SMB"
fi

# Flush PF states after SMB test
ssh beast 'sudo pfctl -k 10.10.0.0/24 >/dev/null 2>&1'
pass "PF states re-flushed after SMB test"

# 7. Verify MKL hash upload API
echo ""
echo "[7/8] MKL hash upload API"
if ssh -o ConnectTimeout=10 mkl 'echo ok' >/dev/null 2>&1; then
    API_RESP=$(ssh mkl 'curl -s -o /dev/null -w "%{http_code}" -X POST http://localhost:3001/api/tech/intune/hash -H "Content-Type: application/json" -d "{\"serial_number\":\"PXETEST\",\"hardware_hash\":\"test\",\"device_model\":\"Test\"}" 2>/dev/null')
    if [ "$API_RESP" = "200" ]; then
        pass "Hash upload API responding (HTTP $API_RESP)"
        # Clean up test entry
        ssh mkl 'cd /var/www/html/mkl && node -e "require(\"dotenv\").config();const{query}=require(\"./core/db/connection\");(async()=>{await query(\"DELETE FROM autopilot_hashes WHERE serial_number=?\", [\"PXETEST\"]);process.exit();})()" 2>/dev/null'
        pass "Test hash cleaned up"
    else
        fail "Hash upload API returned HTTP $API_RESP"
    fi
else
    warn "Cannot reach MKL server (SSH timeout) — check routing if ethernet is connected"
fi

# 8. Verify deploy.bat content (check for NUL corruption and key references)
echo ""
echo "[8/8] Script content validation"
DEPLOY_BAT=$(wimlib-imagex extract /tmp/boot-pxe-new.wim 1 '/deploy/deploy.bat' --to-stdout 2>/dev/null)
if echo "$DEPLOY_BAT" | grep -q '>NUL'; then
    pass "deploy.bat NUL redirects intact"
else
    fail "deploy.bat NUL redirects corrupted"
fi
if echo "$DEPLOY_BAT" | grep -q '10.10.0.1'; then
    pass "deploy.bat has correct Beast IP (10.10.0.1)"
else
    fail "deploy.bat missing Beast IP (10.10.0.1)"
fi
if echo "$DEPLOY_BAT" | grep -q 'SetupType'; then
    pass "deploy.bat sets SetupType (CmdLine/SetupType method)"
else
    fail "deploy.bat missing SetupType injection"
fi
if echo "$DEPLOY_BAT" | grep -q 'CmdLine'; then
    pass "deploy.bat sets CmdLine for first-boot extraction"
else
    fail "deploy.bat missing CmdLine injection"
fi
if echo "$DEPLOY_BAT" | grep -q 'run-extract.cmd'; then
    pass "deploy.bat copies run-extract.cmd"
else
    fail "deploy.bat missing run-extract.cmd reference"
fi
if echo "$DEPLOY_BAT" | grep -q 'extract-hash.ps1'; then
    pass "deploy.bat copies extract-hash.ps1"
else
    fail "deploy.bat missing extract-hash.ps1 reference"
fi

# Verify run-extract.cmd content
RUN_EXTRACT=$(wimlib-imagex extract /tmp/boot-pxe-new.wim 1 '/deploy/run-extract.cmd' --to-stdout 2>/dev/null)
if echo "$RUN_EXTRACT" | grep -q 'original-cmdline'; then
    pass "run-extract.cmd restores original CmdLine"
else
    fail "run-extract.cmd missing CmdLine restore"
fi
if echo "$RUN_EXTRACT" | grep -q 'extract-hash'; then
    pass "run-extract.cmd calls extract-hash"
else
    fail "run-extract.cmd missing extract-hash call"
fi

# Summary
echo ""
echo "=== Results: $PASS passed, $FAIL failed, $WARN warnings ==="
if [ $FAIL -gt 0 ]; then
    echo "*** DEPLOY HAS ISSUES — DO NOT PXE BOOT ***"
    exit 1
else
    echo "All clear — safe to PXE boot."
    exit 0
fi
