Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ concurrency:
permissions:
contents: read
issues: write
pull-requests: write

jobs:
build-windows-kernel:
Expand Down Expand Up @@ -197,6 +198,22 @@ jobs:
$ManifestPath = ".\src\manifest\MsQuicEtw.man"
wevtutil.exe um $ManifestPath
wevtutil.exe im $ManifestPath /rf:$($MsQuicDll) /mf:$($MsQuicDll)
- name: System Diagnostics (pre-test)
if: matrix.vec.plat == 'linux' && matrix.vec.xdp == '-UseXdp'
run: |
echo "=== Memory ==="
free -h
echo "=== Disk ==="
df -h / /tmp
echo "=== Kernel / XDP ==="
uname -r
sudo dmesg -T 2>/dev/null | tail -30 || true
echo "=== Core pattern ==="
cat /proc/sys/kernel/core_pattern || true
echo "=== ulimits ==="
ulimit -a
echo "=== CPU ==="
nproc && cat /proc/loadavg
- name: Test
if: matrix.vec.os == 'WinServerPrerelease'
shell: pwsh
Expand All @@ -206,6 +223,8 @@ jobs:
if: matrix.vec.os != 'WinServerPrerelease'
shell: pwsh
timeout-minutes: 120
env:
GITHUB_TOKEN: ${{ github.token }}
run: scripts/test.ps1 -Config ${{ matrix.vec.config }} -Arch ${{ matrix.vec.arch }} -Tls ${{ matrix.vec.tls }} -OsRunner ${{ matrix.vec.os }} -GHA -LogProfile ${{ matrix.vec.log || (inputs.log_level || 'Full.Light') }} -GenerateXmlResults ${{ matrix.vec.xdp }} ${{ matrix.vec.qtip }} ${{ inputs.filter && '-Filter' }} ${{ inputs.filter || '' }}
- name: Fix log permissions for Linux XDP
if: failure() && matrix.vec.plat == 'linux' # (matrix.vec.plat == 'linux' && matrix.vec.xdp == '-UseXdp') doesn't work for some reason
Expand Down
146 changes: 145 additions & 1 deletion scripts/test.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,151 @@ for ($iteration = 1; $iteration -le $NumIterations; $iteration++) {
foreach ($TestPath in $TestPaths) {
if ($IsLinux -and $UseXdp) {
$NOFILE = Invoke-Expression "bash -c 'ulimit -n'"
Invoke-Expression ('/usr/bin/sudo bash -c "ulimit -n $NOFILE && pwsh $RunTest -Path $TestPath $TestArguments"')
$DiagDir = Join-Path $RootDir "artifacts" "xdp_diagnostics"
New-Item -ItemType Directory -Path $DiagDir -Force | Out-Null
$DiagFile = Join-Path $DiagDir "resource_monitor.log"
$BinaryName = Split-Path $TestPath -Leaf

# Helper: post a diagnostic comment to the PR via GitHub API.
function Post-XdpDiag($Title, $Body) {
if (-not $env:GITHUB_TOKEN -or -not $env:GITHUB_REPOSITORY) {
Write-Host ">>> [XDP Diag] Missing GITHUB_TOKEN or GITHUB_REPOSITORY"
return
}
if (-not ($env:GITHUB_REF -match 'refs/pull/(\d+)')) {
Write-Host ">>> [XDP Diag] Not a PR (REF=$($env:GITHUB_REF))"
return
}
$PrNum = $Matches[1]
$Full = "### XDP Diag: $Title`n$Body"
$TmpFile = Join-Path $DiagDir "comment.json"
@{ body = $Full } | ConvertTo-Json -Depth 2 | Set-Content -Path $TmpFile
$result = bash -c "curl -sS -w '%{http_code}' -X POST -H 'Authorization: Bearer $($env:GITHUB_TOKEN)' -H 'Content-Type: application/json' -d @$TmpFile 'https://api.github.com/repos/$($env:GITHUB_REPOSITORY)/issues/$PrNum/comments' -o /dev/null 2>&1"
Write-Host ">>> [XDP Diag] Post '$Title' to PR #$PrNum -> HTTP $result"
}

# Post pre-flight diagnostics BEFORE the test binary starts
$PreDiag = bash -c "echo 'mem:'; free -h | head -2; echo 'disk:'; df -h / | tail -1; echo 'load:'; cat /proc/loadavg; echo 'cores:'; nproc; echo 'kernel:'; uname -r"
Post-XdpDiag "Starting $BinaryName" "``````n$($PreDiag -join "`n")`n``````"

# Start background resource monitor that writes to a log file
$MonitorScript = Join-Path $DiagDir "monitor.sh"
@'
#!/bin/bash
while true; do
echo "[$(date +%H:%M:%S)] mem=$(free -m | awk 'NR==2{print $3"/"$2"MB"}') disk=$(df -h / | awk 'NR==2{print $3"/"$2}') load=$(cut -d' ' -f1-3 /proc/loadavg)" >> "$1"
sleep 30
done
'@ | Set-Content -Path $MonitorScript -NoNewline
bash -c "chmod +x $MonitorScript"
$MonitorPid = $null
try {
$MonitorPid = (Start-Process -FilePath "bash" -ArgumentList $MonitorScript, $DiagFile -PassThru -NoNewWindow).Id
} catch {
Write-Host "Warning: Could not start resource monitor: $_"
}

# Start background heartbeat only for msquictest (where the crash
# happens). Posts PR comments every 60 seconds so we capture the
# system state just before the runner crash.
$HeartbeatPid = $null
if ($BinaryName -eq "msquictest") {
$HeartbeatScript = Join-Path $DiagDir "heartbeat.sh"
@"
#!/bin/bash
BINARY_NAME="$BinaryName"
DIAG_DIR="$DiagDir"
DIAG_FILE="$DiagFile"
"@ | Set-Content -Path $HeartbeatScript -NoNewline
@'

COUNTER=0
while true; do
COUNTER=$((COUNTER + 1))
# Collect system state
MEM=$(free -h | head -2)
DISK=$(df -h / | tail -1)
LOAD=$(cat /proc/loadavg)
# Broad dmesg check: kernel oops, BUG, OOM, XDP, segfault, panic, hung_task, slab
DMESG=$(sudo dmesg -T --since '2 minutes ago' 2>/dev/null | grep -iE 'oom|kill|xdp|bpf|segfault|oops|BUG|panic|Call Trace|RIP:|WARNING|hung_task|page allocation|slab|out of memory' | tail -20)
if [ -z "$DMESG" ]; then
DMESG="(no relevant kernel messages)"
fi
# Get last 5 lines of resource monitor
MONITOR_TAIL=""
if [ -f "$DIAG_FILE" ]; then
MONITOR_TAIL=$(tail -5 "$DIAG_FILE")
fi
# Get process tree for test processes (top 10 by memory)
PROCS=$(ps aux --sort=-%mem | head -10)
# Check kernel memory (slab + page cache details)
KMEM=$(cat /proc/meminfo | grep -E 'Slab|SReclaimable|SUnreclaim|Committed_AS|VmallocUsed|AnonPages|Mapped|PageTables')
# Build the comment body
BODY="### XDP Heartbeat #${COUNTER}: ${BINARY_NAME} (+${COUNTER} min)
\`\`\`
mem:
${MEM}
kernel mem:
${KMEM}
disk:
${DISK}
load:
${LOAD}
dmesg (last 2 min):
${DMESG}
top processes by memory:
${PROCS}
resource monitor:
${MONITOR_TAIL}
\`\`\`"
# Post to PR if possible
if [ -n "$GITHUB_TOKEN" ] && [ -n "$GITHUB_REPOSITORY" ] && [ -n "$GITHUB_REF" ]; then
PR_NUM=$(echo "$GITHUB_REF" | grep -oP 'refs/pull/\K\d+')
if [ -n "$PR_NUM" ]; then
TMPFILE="${DIAG_DIR}/heartbeat_comment.json"
python3 -c "import json,sys; print(json.dumps({'body': sys.stdin.read()}))" <<< "$BODY" > "$TMPFILE"
curl -sS -w '%{http_code}' -X POST \
-H "Authorization: Bearer $GITHUB_TOKEN" \
-H "Content-Type: application/json" \
-d @"$TMPFILE" \
"https://api.github.com/repos/$GITHUB_REPOSITORY/issues/$PR_NUM/comments" \
-o /dev/null 2>&1
fi
fi
sleep 60 # heartbeat every 60 seconds
done
'@ | Add-Content -Path $HeartbeatScript -NoNewline
bash -c "chmod +x $HeartbeatScript"
try {
$HeartbeatPid = (Start-Process -FilePath "bash" -ArgumentList $HeartbeatScript -PassThru -NoNewWindow).Id
Write-Host ">>> [XDP Diag] Heartbeat monitor started (PID=$HeartbeatPid) for $BinaryName"
} catch {
Write-Host "Warning: Could not start heartbeat monitor: $_"
}
} # end if msquictest

Write-Host ">>> [XDP Diag] Before ${BinaryName}:"
bash -c "free -h; echo '---'; df -h / /tmp; echo '---'; cat /proc/loadavg"
# Disable core dumps entirely via hard limit. Use timeout as
# a safety net.
Invoke-Expression ('/usr/bin/sudo bash -c "ulimit -n $NOFILE && ulimit -Hc 0 && timeout --signal=KILL --foreground 6000 pwsh $RunTest -Path $TestPath $TestArguments"')
$TestExitCode = $LASTEXITCODE

# Post post-test diagnostics
$PostDiag = bash -c "echo 'mem:'; free -h | head -2; echo 'disk:'; df -h / | tail -1; echo 'load:'; cat /proc/loadavg; echo 'dmesg:'; sudo dmesg -T --since '2 hours ago' 2>/dev/null | grep -iE 'oom|kill|xdp|bpf|segfault|oops|BUG|panic|Call Trace|RIP:|WARNING|hung_task|page allocation|slab|out of memory' | tail -20 || echo 'none'"
$MonitorLog = if (Test-Path $DiagFile) { Get-Content $DiagFile -Raw } else { "no data" }
Post-XdpDiag "Finished $BinaryName (exit=$TestExitCode)" "``````n$($PostDiag -join "`n")`n``````n`nResource monitor:`n``````n$MonitorLog`n``````"

Write-Host ">>> [XDP Diag] After ${BinaryName} (exit=$TestExitCode):"
$PostDiag | ForEach-Object { Write-Host $_ }

# Stop the background monitors
if ($MonitorPid) {
Stop-Process -Id $MonitorPid -ErrorAction SilentlyContinue
}
if ($HeartbeatPid) {
Stop-Process -Id $HeartbeatPid -ErrorAction SilentlyContinue
}
} else {
Invoke-Expression ($RunTest + " -Path $TestPath " + $TestArguments)
}
Expand Down
Loading