Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/triage-tuning.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"schema_version": 1,
"last_updated": null,
"last_run": null,
"recommended_confidence_cutoff": null,
"calibrated_at": null,
"sample_size": 0
}
48 changes: 48 additions & 0 deletions .github/visual-triage-config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"schema_version": 1,
"thresholds": {
"pixel_channel_threshold": 16,
"noise_changed_area_ratio": 0.001,
"full_page_changed_area_ratio": 0.6,
"confidence_cutoff": 0.6,
"auto_accept_min_confidence": 0.8,
"crop_padding_px": 16,
"max_regions": 3,
"max_full_image_width": 1200,
"target_regression_precision": 0.95,
"min_samples": 50,
"eval_min_accuracy": 0.8
},
"routing": {
"high_risk_globs": [
"web/src/components/auth/**",
"web/src/lib/auth.tsx",
"web/src/lib/api.ts",
"web/e2e/auth-drift/**",
"web/src/**/*security*",
"web/src/**/*billing*",
"pkg/api/**/auth*",
"cmd/console/**/auth*"
],
"auto_update_baselines": true
},
"model": {
"provider": "openai-compatible",
"api_url_env": "VISUAL_TRIAGE_API_URL",
"api_key_env": "VISUAL_TRIAGE_API_KEY",
"model_env": "VISUAL_TRIAGE_MODEL",
"default_api_url": "https://api.openai.com/v1/chat/completions",
"default_model": "gpt-4.1-mini",
"timeout_seconds": 60,
"temperature": 0,
"max_tokens": 500,
"max_model_calls_per_run": 50,
"max_total_tokens_per_run": 200000
},
"optional_baseline_free_check": {
"enabled_env": "VISUAL_TRIAGE_BASELINE_FREE_CHECK",
"default_enabled": false
},
"tuning_file": ".github/triage-tuning.json",
"ledger_file": ".github/triage-ledger.jsonl"
}
280 changes: 280 additions & 0 deletions .github/workflows/visual-regression-close-issue.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
name: Visual Regression Close Issue

# Closes the open visual-regression-failure issue for a branch once Visual Regression goes green
# again (close-on-green), posts a recovery comment, and — for the learning loop (Phase 5) — derives a
# resolution-based verdict and writes it back to the in-repo triage ledger via `ingest-verdict`.
#
# MVP = one-issue-per-branch: the failure issue carries a machine-readable `<!-- triage-autofix -->`
# block whose `branch` field we match against this run's head branch.

on:
workflow_run:
workflows:
- Visual Regression
types:
- completed
workflow_dispatch:
inputs:
run_id:
description: Successful Visual Regression workflow run ID to process.
required: true
type: string

permissions:
contents: write
actions: read
issues: write
pull-requests: read

jobs:
close-on-green:
name: Close Visual Regression Failure Issue On Green
runs-on: ubuntu-latest
timeout-minutes: 10
if: github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success'
env:
SOURCE_RUN_ID: ${{ github.event.workflow_run.id || inputs.run_id }}

steps:
- name: Find matching failure issue and derive verdict
id: find
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
env:
SOURCE_RUN_ID: ${{ env.SOURCE_RUN_ID }}
with:
script: |
const owner = context.repo.owner;
const repo = context.repo.repo;
const runId = Number(process.env.SOURCE_RUN_ID);

const { data: run } = await github.rest.actions.getWorkflowRun({ owner, repo, run_id: runId });
if (run.name !== 'Visual Regression') {
core.info(`Run ${runId} is "${run.name}", not "Visual Regression"; skipping.`);
return;
}
if (run.conclusion !== 'success') {
core.info(`Run ${runId} concluded with ${run.conclusion}; only green runs close issues.`);
return;
}
const branch = run.head_branch || '';
if (!branch) {
core.info('No head branch on the run; nothing to match.');
return;
}

// Parse the machine-readable autofix block emitted by the failure-issue workflow.
function parseAutofix(text) {
const match = /<!-- triage-autofix\s*(\{[\s\S]*?\})\s*-->/.exec(text || '');
if (!match) return null;
try {
return JSON.parse(match[1]);
} catch (error) {
core.warning(`Could not parse triage-autofix block: ${error.message}`);
return null;
}
}

const issues = await github.paginate(github.rest.issues.listForRepo, {
owner,
repo,
state: 'open',
labels: 'visual-regression-failure',
per_page: 100,
});

const branchTableMarker = `| Branch | \`${branch}\` |`;
let matched = null;
let autofix = null;
for (const issue of issues) {
const fromBody = parseAutofix(issue.body);
if (fromBody && fromBody.branch === branch) {
matched = issue;
autofix = fromBody;
break;
}
}
// Fallback: older issues without a branch in the block but with the run-context branch row.
if (!matched) {
for (const issue of issues) {
if (issue.body && issue.body.includes(branchTableMarker)) {
matched = issue;
autofix = parseAutofix(issue.body);
break;
}
}
}

if (!matched) {
core.info(`No open visual-regression-failure issue for branch ${branch}.`);
return;
}

// Derive a resolution-based verdict from how the PR changed files between fail and green:
// baseline PNG updated -> intended_change
// web/src code changed -> regression (a real fix landed)
// neither -> noise (flake / quarantined / unrelated green)
let changedFiles = [];
for (const prRef of run.pull_requests || []) {
try {
const files = await github.paginate(github.rest.pulls.listFiles, {
owner,
repo,
pull_number: prRef.number,
per_page: 100,
});
changedFiles.push(...files.map((file) => file.filename));
} catch (error) {
core.warning(`Could not list files for PR #${prRef.number}: ${error.message}`);
}
}
// Forks often omit run.pull_requests — fall back to the open/merged PR for this head branch.
if (changedFiles.length === 0) {
try {
const prs = await github.paginate(github.rest.pulls.list, {
owner,
repo,
state: 'all',
head: `${owner}:${branch}`,
per_page: 20,
});
const pr = prs.sort((a, b) => new Date(b.updated_at) - new Date(a.updated_at))[0];
if (pr) {
const files = await github.paginate(github.rest.pulls.listFiles, {
owner,
repo,
pull_number: pr.number,
per_page: 100,
});
changedFiles.push(...files.map((file) => file.filename));
}
} catch (error) {
core.warning(`Could not resolve PR for branch ${branch}: ${error.message}`);
}
}

const baselineChanged = changedFiles.some((file) => /web\/e2e\/visual\/.*-snapshots\/.*\.png$/.test(file));
const sourceChanged = changedFiles.some((file) => file.startsWith('web/src/'));
let verdict = 'noise';
if (baselineChanged) verdict = 'intended_change';
else if (sourceChanged) verdict = 'regression';

// Find the most recent FAILED Visual Regression run on this branch so we can recover the
// ledger rows it emitted (decision_ids alone cannot be ingested without their base rows).
let failingRunId = '';
try {
const runs = await github.paginate(github.rest.actions.listWorkflowRunsForRepo, {
owner,
repo,
branch,
event: 'pull_request',
per_page: 100,
});
const failed = runs
.filter((candidate) => candidate.name === 'Visual Regression' && candidate.conclusion === 'failure')
.sort((a, b) => new Date(b.created_at) - new Date(a.created_at))[0];
if (failed) failingRunId = String(failed.id);
} catch (error) {
core.warning(`Could not list prior failed runs: ${error.message}`);
}

const decisionIds = (autofix && Array.isArray(autofix.decision_ids)) ? autofix.decision_ids : [];
core.setOutput('issue_number', String(matched.number));
core.setOutput('branch', branch);
core.setOutput('verdict', verdict);
core.setOutput('decision_ids', decisionIds.join(' '));
core.setOutput('failing_run_id', failingRunId);
core.info(`Matched issue #${matched.number} (branch ${branch}); verdict=${verdict}; decisions=${decisionIds.length}.`);

- name: Checkout the resolved head branch
# Check out the PR head branch (not the default branch) so the verdict commit rides into the
# default branch when the PR merges. Best-effort: a deleted/merged branch simply skips ingestion.
if: steps.find.outputs.issue_number != '' && steps.find.outputs.decision_ids != ''
id: checkout
continue-on-error: true
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ steps.find.outputs.branch }}
fetch-depth: 0

- name: Download failing-run ledger artifact
if: steps.checkout.outcome == 'success' && steps.find.outputs.failing_run_id != ''
continue-on-error: true
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
REPOSITORY: ${{ github.repository }}
FAILING_RUN_ID: ${{ steps.find.outputs.failing_run_id }}
run: |
mkdir -p failing-artifact
gh run download "$FAILING_RUN_ID" \
--repo "$REPOSITORY" \
--name app-visual-diff \
--dir failing-artifact || echo "No app-visual-diff artifact found for run $FAILING_RUN_ID."

- name: Ingest resolution verdict into the ledger
if: steps.checkout.outcome == 'success' && steps.find.outputs.decision_ids != ''
continue-on-error: true
env:
VERDICT: ${{ steps.find.outputs.verdict }}
DECISION_IDS: ${{ steps.find.outputs.decision_ids }}
HEAD_REF: ${{ steps.find.outputs.branch }}
run: |
set -euo pipefail
export LEDGER=".github/triage-ledger.jsonl"
# Seed the canonical ledger with the rows the failing run emitted (append-only, dedup by id).
export ARTIFACT_LEDGER="$(find failing-artifact -name 'triage-ledger.jsonl' 2>/dev/null | head -n1 || true)"
python3 scripts/merge_ledger.py
for did in $DECISION_IDS; do
python3 scripts/visual-diff-triage.py ingest-verdict \
--ledger "$LEDGER" \
--decision-id "$did" \
--outcome "$VERDICT" \
--source resolution || echo "ingest-verdict failed for $did (non-fatal)."
done
if git diff --quiet -- "$LEDGER"; then
echo "No ledger changes to commit."
exit 0
fi
git -c user.name="github-actions[bot]" \
-c user.email="41898282+github-actions[bot]@users.noreply.github.com" \
add "$LEDGER"
git -c user.name="github-actions[bot]" \
-c user.email="41898282+github-actions[bot]@users.noreply.github.com" \
commit -m "Record resolution verdict (${VERDICT}) in triage ledger"
# Persist verdicts on the branch so they ride into the default branch when the PR merges.
git push origin "HEAD:${HEAD_REF}" || echo "Ledger push failed (non-fatal); verdict not persisted."

- name: Close the failure issue
if: steps.find.outputs.issue_number != ''
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
env:
ISSUE_NUMBER: ${{ steps.find.outputs.issue_number }}
BRANCH: ${{ steps.find.outputs.branch }}
VERDICT: ${{ steps.find.outputs.verdict }}
SOURCE_RUN_ID: ${{ env.SOURCE_RUN_ID }}
with:
script: |
const owner = context.repo.owner;
const repo = context.repo.repo;
const issueNumber = Number(process.env.ISSUE_NUMBER);
const branch = process.env.BRANCH;
const verdict = process.env.VERDICT;
const runId = Number(process.env.SOURCE_RUN_ID);
const { data: run } = await github.rest.actions.getWorkflowRun({ owner, repo, run_id: runId });

const comment = [
`✅ Visual Regression is green again on \`${branch}\`. Auto-closing this issue.`,
'',
`- Recovery run: [#${runId}](${run.html_url})`,
`- Commit: \`${run.head_sha}\``,
`- Resolution verdict written to the triage ledger: \`${verdict}\``,
].join('\n');

await github.rest.issues.createComment({ owner, repo, issue_number: issueNumber, body: comment });
await github.rest.issues.update({
owner,
repo,
issue_number: issueNumber,
state: 'closed',
state_reason: 'completed',
});
core.info(`Closed visual-regression-failure issue #${issueNumber} (verdict=${verdict}).`);
Loading
Loading