Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 61 additions & 21 deletions .github/scripts/pr_labeler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
"""PR labeler: compute size, risk, and checkbox labels for one or more PRs.
"""PR labeler: compute size, risk, and template-field labels for one or more PRs.

Inputs come from environment variables set by the calling workflow:
GITHUB_REPOSITORY e.g. "owner/repo" (always set on Actions)
Expand All @@ -11,7 +11,13 @@
1. Fetching additions, deletions, body, and current labels from the GitHub API.
2. Computing the size bucket from additions+deletions.
3. Parsing the Bugbot CURSOR_SUMMARY block for a risk level.
4. Parsing the PR template checkboxes for `urgent` and `high complexity`.
4. Parsing the PR template fields for `urgent` and `high complexity`.
Two formats are supported:
- Current: ``- **Urgent** (...): yes`` / ``: no``
- Legacy: ``- [x] **Urgent** ...`` / ``- [ ] **Urgent** ...``
The current format is preferred; the legacy format is matched as a
fallback so PRs opened before the template change keep working until
the queue rolls over.
5. Reconciling with current labels and applying adds/removes via `gh pr edit`.

For backfill mode (PR_NUMBER == "all"), per-PR failures are logged but do not
Expand All @@ -32,6 +38,9 @@
URGENT_LABEL = "review/urgent"
COMPLEXITY_LABEL = "complexity/high"

URGENT_KEYWORD = "urgent"
COMPLEXITY_KEYWORD = "high complexity"

CURSOR_SUMMARY_MARKER = "<!-- CURSOR_SUMMARY -->"
RISK_REGEX = re.compile(r"\*\*(\w+)\s+Risk\*\*", re.IGNORECASE)
RISK_MAP = {
Expand All @@ -43,21 +52,38 @@
RISK_FALLBACK = "risk/high"


# Match a markdown checkbox followed (with whitespace and optional bold/markdown
# punctuation) by a target keyword. The `[xX ]` part captures the state.
def yesno_regex(keyword: str) -> re.Pattern[str]:
"""Match the current template format and capture ``yes`` or ``no``.

Examples that match (state captured):
- **Urgent** (needs same-day review): yes
- **High complexity** (non-obvious logic, careful review): no
* **urgent**: YES
"""
return re.compile(
rf"[-*]\s*[*_`]*\s*{re.escape(keyword)}\b[^:\n]*:\s*(yes|no)\b",
re.IGNORECASE,
)
Comment thread
cursor[bot] marked this conversation as resolved.


def checkbox_regex(keyword: str) -> re.Pattern[str]:
# Examples that should match (state captured):
# - [x] **Urgent**: needs same-day review
# - [ ] **High complexity**: ...
# * [X] urgent
"""Match the legacy template format and capture the checkbox state.

Examples that match (state captured):
- [x] **Urgent**: needs same-day review
- [ ] **High complexity**: ...
* [X] urgent
"""
return re.compile(
rf"[-*]\s*\[\s*([xX ])\s*\]\s*[*_`]*\s*{re.escape(keyword)}",
re.IGNORECASE,
)


URGENT_REGEX = checkbox_regex("urgent")
COMPLEXITY_REGEX = checkbox_regex("high complexity")
URGENT_YESNO_REGEX = yesno_regex(URGENT_KEYWORD)
COMPLEXITY_YESNO_REGEX = yesno_regex(COMPLEXITY_KEYWORD)
URGENT_CHECKBOX_REGEX = checkbox_regex(URGENT_KEYWORD)
COMPLEXITY_CHECKBOX_REGEX = checkbox_regex(COMPLEXITY_KEYWORD)


@dataclass
Expand Down Expand Up @@ -154,12 +180,26 @@ def risk_from_body(body: str, plan: LabelPlan) -> str | None:
return label


def checkbox_state(body: str, regex: re.Pattern[str]) -> str | None:
"""Return 'on', 'off', or None (not present)."""
match = regex.search(body)
if not match:
return None
return "on" if match.group(1).lower() == "x" else "off"
def field_state(
body: str,
*,
yesno: re.Pattern[str],
checkbox: re.Pattern[str],
) -> str | None:
"""Return ``'on'``, ``'off'``, or ``None`` for a template field.

Tries the current ``**Field**: yes/no`` syntax first and falls back to the
legacy ``- [x] **Field**`` syntax. The legacy regex is retained so PRs
opened before the template change keep being labeled correctly until the
queue rolls over (~2 weeks). It will be removed in a follow-up.
"""
match = yesno.search(body)
if match:
return "on" if match.group(1).lower() == "yes" else "off"
match = checkbox.search(body)
if match:
return "on" if match.group(1).lower() == "x" else "off"
return None


def reconcile(
Expand Down Expand Up @@ -192,12 +232,12 @@ def reconcile(
# strip a manually-set risk label just because Bugbot didn't comment.
plan.remove.append(label)

# Checkboxes: three-state (on/off/absent).
for regex, label in [
(URGENT_REGEX, URGENT_LABEL),
(COMPLEXITY_REGEX, COMPLEXITY_LABEL),
# Template fields: three-state (on/off/absent).
for yesno_re, checkbox_re, label in [
(URGENT_YESNO_REGEX, URGENT_CHECKBOX_REGEX, URGENT_LABEL),
(COMPLEXITY_YESNO_REGEX, COMPLEXITY_CHECKBOX_REGEX, COMPLEXITY_LABEL),
]:
state = checkbox_state(body, regex)
state = field_state(body, yesno=yesno_re, checkbox=checkbox_re)
if state == "on" and label not in current_labels:
plan.add.append(label)
elif state == "off" and label in current_labels:
Expand Down
142 changes: 128 additions & 14 deletions .github/scripts/test_pr_labeler.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import sys
from pathlib import Path

import pytest

sys.path.insert(0, str(Path(__file__).parent))

import pr_labeler # noqa: E402
Expand Down Expand Up @@ -88,45 +90,136 @@ def test_text_before_marker_ignored(self):
assert pr_labeler.risk_from_body(body, _plan()) == "risk/high"


# ---- checkbox_state --------------------------------------------------------
# ---- field_state -----------------------------------------------------------


def _urgent_state(body: str) -> str | None:
return pr_labeler.field_state(
body,
yesno=pr_labeler.URGENT_YESNO_REGEX,
checkbox=pr_labeler.URGENT_CHECKBOX_REGEX,
)


def _complexity_state(body: str) -> str | None:
return pr_labeler.field_state(
body,
yesno=pr_labeler.COMPLEXITY_YESNO_REGEX,
checkbox=pr_labeler.COMPLEXITY_CHECKBOX_REGEX,
)


class TestFieldStateYesNo:
"""Current template format: ``- **Field** (...): yes|no``."""

@pytest.mark.parametrize(
"body",
[
"- **Urgent** (needs same-day review): yes",
"- **Urgent**: yes",
"* **urgent**: YES",
"- **Urgent** (needs same-day review): yes, plus extra context",
"- **Urgent** (needs same-day review) : yes",
],
)
def test_urgent_yes_variants(self, body):
assert _urgent_state(body) == "on"

@pytest.mark.parametrize(
"body",
[
"- **Urgent** (needs same-day review): no",
"- **Urgent**: no",
"* **urgent**: NO",
],
)
def test_urgent_no_variants(self, body):
assert _urgent_state(body) == "off"

def test_complexity_yes(self):
body = "- **High complexity** (non-obvious logic, careful review): yes"
assert _complexity_state(body) == "on"

def test_complexity_no(self):
body = "- **High complexity** (non-obvious logic, careful review): no"
assert _complexity_state(body) == "off"

def test_value_must_be_yes_or_no(self):
# "maybe" is not yes/no; field is treated as absent.
body = "- **Urgent**: maybe"
assert _urgent_state(body) is None

def test_value_word_boundary(self):
# "nothing" must not be parsed as "no".
body = "- **Urgent**: nothing here"
assert _urgent_state(body) is None

def test_yes_or_no_inside_parenthetical_is_ignored(self):
# The inline "yes or no" hint inside the parenthetical is descriptive;
# only the value after the colon counts.
body = "- **Urgent** (answer yes or no): no"
assert _urgent_state(body) == "off"

def test_absent_returns_none(self):
assert _urgent_state("body with no template") is None


class TestFieldStateLegacyCheckbox:
"""Legacy template format: ``- [x] **Field**`` (in-flight PRs)."""

class TestCheckboxState:
def test_urgent_checked(self):
body = "- [x] **Urgent**: needs same-day review"
assert pr_labeler.checkbox_state(body, pr_labeler.URGENT_REGEX) == "on"
assert _urgent_state(body) == "on"

def test_urgent_unchecked(self):
body = "- [ ] **Urgent**: needs same-day review"
assert pr_labeler.checkbox_state(body, pr_labeler.URGENT_REGEX) == "off"
assert _urgent_state(body) == "off"

def test_urgent_capital_x(self):
body = "- [X] **Urgent**"
assert pr_labeler.checkbox_state(body, pr_labeler.URGENT_REGEX) == "on"
assert _urgent_state(body) == "on"

def test_urgent_absent(self):
body = "no template here"
assert pr_labeler.checkbox_state(body, pr_labeler.URGENT_REGEX) is None
assert _urgent_state(body) is None

def test_urgent_without_bold(self):
body = "- [x] urgent: needs same-day review"
assert pr_labeler.checkbox_state(body, pr_labeler.URGENT_REGEX) == "on"
assert _urgent_state(body) == "on"

def test_complexity_checked(self):
body = "- [x] **High complexity**: non-obvious logic"
assert pr_labeler.checkbox_state(body, pr_labeler.COMPLEXITY_REGEX) == "on"
assert _complexity_state(body) == "on"

def test_complexity_unchecked(self):
body = "- [ ] **High complexity**: non-obvious logic"
assert pr_labeler.checkbox_state(body, pr_labeler.COMPLEXITY_REGEX) == "off"
assert _complexity_state(body) == "off"

def test_extra_whitespace(self):
body = "- [ x ] **Urgent**: needs same-day review"
assert pr_labeler.checkbox_state(body, pr_labeler.URGENT_REGEX) == "on"
assert _urgent_state(body) == "on"

def test_asterisk_bullet(self):
body = "* [x] urgent"
assert pr_labeler.checkbox_state(body, pr_labeler.URGENT_REGEX) == "on"
assert _urgent_state(body) == "on"


class TestFieldStatePrecedence:
"""When both formats appear in the same body, yes/no wins."""

def test_yesno_wins_over_legacy_checkbox(self):
body = (
"- [ ] **Urgent**: stale legacy line\n"
"- **Urgent** (needs same-day review): yes"
)
assert _urgent_state(body) == "on"

def test_yesno_no_wins_over_legacy_checked(self):
body = (
"- [x] **Urgent**: stale legacy line\n"
"- **Urgent** (needs same-day review): no"
)
assert _urgent_state(body) == "off"


# ---- reconcile -------------------------------------------------------------
Expand Down Expand Up @@ -185,13 +278,28 @@ def test_swaps_risk_label_when_bugbot_changes(self):
assert "risk/low" in plan.add
assert "risk/high" in plan.remove

def test_urgent_checkbox_on_adds_label(self):
def test_urgent_yesno_yes_adds_label(self):
plan = _plan()
body = "- **Urgent** (needs same-day review): yes"
pr_labeler.reconcile(_pr(additions=5, body=body), plan=plan)
assert pr_labeler.URGENT_LABEL in plan.add

def test_urgent_yesno_no_removes_label(self):
plan = _plan()
body = "- **Urgent** (needs same-day review): no"
pr_labeler.reconcile(
_pr(additions=5, body=body, labels=(pr_labeler.URGENT_LABEL,)),
plan=plan,
)
assert pr_labeler.URGENT_LABEL in plan.remove

def test_urgent_legacy_checked_adds_label(self):
plan = _plan()
body = "- [x] **Urgent**: needs same-day review"
pr_labeler.reconcile(_pr(additions=5, body=body), plan=plan)
assert pr_labeler.URGENT_LABEL in plan.add

def test_urgent_checkbox_off_removes_label(self):
def test_urgent_legacy_unchecked_removes_label(self):
plan = _plan()
body = "- [ ] **Urgent**: needs same-day review"
pr_labeler.reconcile(
Expand All @@ -200,14 +308,20 @@ def test_urgent_checkbox_off_removes_label(self):
)
assert pr_labeler.URGENT_LABEL in plan.remove

def test_urgent_checkbox_absent_leaves_manual_label(self):
def test_urgent_absent_leaves_manual_label(self):
plan = _plan()
pr_labeler.reconcile(
_pr(additions=5, body="no template", labels=(pr_labeler.URGENT_LABEL,)),
plan=plan,
)
assert pr_labeler.URGENT_LABEL not in plan.remove

def test_complexity_yesno_yes_adds_label(self):
plan = _plan()
body = "- **High complexity** (non-obvious logic, careful review): yes"
pr_labeler.reconcile(_pr(additions=5, body=body), plan=plan)
assert pr_labeler.COMPLEXITY_LABEL in plan.add


# ---- determine_targets ------------------------------------------------------

Expand Down
Loading