diff --git a/src/dependamerge/cli.py b/src/dependamerge/cli.py index e62fb58..0e29207 100644 --- a/src/dependamerge/cli.py +++ b/src/dependamerge/cli.py @@ -334,6 +334,7 @@ class _MergeContext: netrc_optional: bool github2gerrit_mode: str include_human_prs: bool = False + rebase_local: bool = True # Derived / mutable state github_client: GitHubClient | None = None @@ -728,8 +729,11 @@ def _run_parallel_merge( concurrency: Maximum number of concurrent merge workers. For org-wide merges (PRs spread across repos) the default of 10 is fine. For repo-scoped merges (all PRs in the - same repo) use 1 to serialise operations and give GitHub - time to propagate approvals between merges. + same repo) ``AsyncMergeManager`` serialises the actual + ``merge_pull_request`` API call via a per-repo dispatch + lock, so a value > 1 is safe and lets the worker pool + keep processing other PRs while one PR sits in Step 5.5's + auto-merge wait loop — see ``_get_merge_dispatch_lock``. """ async def _do_merge(): @@ -747,6 +751,7 @@ async def _do_merge(): github2gerrit_mode=ctx.github2gerrit_mode, no_netrc=ctx.no_netrc, netrc_file=ctx.netrc_file, + rebase_local=ctx.rebase_local, ) as merge_manager: if not preview: console.print( @@ -1095,12 +1100,23 @@ def _is_auto(author: str | None) -> bool: ctx.progress_tracker.start() try: - # Serialise merges for repo-scoped operations: all PRs target - # the same repository, so parallel approve+merge would race - # against GitHub's branch-protection propagation and cause - # spurious "branch protection" failures. + # Per-repo merge dispatch is serialised inside + # ``AsyncMergeManager`` (see ``_get_merge_dispatch_lock``), + # so it is now safe to run multiple workers against PRs + # that target the same repository — only the actual + # ``merge_pull_request`` call queues, while approve, + # rebase, and the Step 5.5 auto-merge wait run in + # parallel. merge_results = _run_parallel_merge( - ctx, all_prs_to_merge, preview=not ctx.no_confirm, concurrency=1 + ctx, + all_prs_to_merge, + preview=not ctx.no_confirm, + # Allow parallel workers; the merge dispatch itself is + # serialised per repo by ``AsyncMergeManager`` so PRs + # parked in Step 5.5's wait loop no longer block other + # PRs in the batch. Cap by PR count so we don't spawn + # more workers than there is work. + concurrency=min(5, len(all_prs_to_merge)) or 1, ) finally: if ctx.show_progress and ctx.progress_tracker: @@ -1213,7 +1229,12 @@ def _execute_repo_confirmed_merge( try: real_results = _run_parallel_merge( - ctx, mergeable_prs, preview=False, concurrency=1 + ctx, + mergeable_prs, + preview=False, + # Per-repo merge dispatch lock makes parallel workers + # safe; cap by PR count. + concurrency=min(5, len(mergeable_prs)) or 1, ) finally: if ctx.show_progress and ctx.progress_tracker: @@ -1482,6 +1503,19 @@ def merge( "--no-fix", help="Do not attempt to automatically fix out-of-date branches", ), + rebase_local: bool = typer.Option( + True, + "--rebase-local/--no-rebase-local", + help=( + "When rebasing a behind PR, prefer a local ``git`` clone + " + "rebase + force-push-with-lease over the GitHub REST " + "``update-branch`` endpoint when the base branch requires " + "verified signatures or the PR is from pre-commit-ci[bot]. " + "The local path inherits ``~/.gitconfig`` so commits stay " + "signed; the REST path is faster but produces unsigned " + "commits that break verification. Default: enabled." + ), + ), merge_timeout: float = typer.Option( DEFAULT_MERGE_TIMEOUT, "--merge-timeout", @@ -1699,6 +1733,7 @@ def merge( netrc_optional=netrc_optional, github2gerrit_mode=github2gerrit_mode, include_human_prs=include_human_prs, + rebase_local=rebase_local, ) try: _handle_repo_merge(parsed_repo, repo_ctx) @@ -1765,6 +1800,7 @@ def merge( netrc_optional=netrc_optional, github2gerrit_mode=github2gerrit_mode, include_human_prs=include_human_prs, + rebase_local=rebase_local, ) try: diff --git a/src/dependamerge/github_async.py b/src/dependamerge/github_async.py index f110704..59ef8bf 100644 --- a/src/dependamerge/github_async.py +++ b/src/dependamerge/github_async.py @@ -930,57 +930,60 @@ async def post_issue_comment( async def check_pr_commit_signatures( self, owner: str, repo: str, number: int ) -> tuple[bool, list[str]]: - """ - Check whether all commits on a pull request have verified signatures. + """Check whether all commits on a pull request have verified signatures. REST: GET /repos/{owner}/{repo}/pulls/{pull_number}/commits Returns: - Tuple of (all_verified: bool, unverified_shas: list[str]). - ``all_verified`` is True when every commit carries a valid - signature according to GitHub. ``unverified_shas`` contains - the abbreviated SHAs of any commits whose verification failed. + Tuple of ``(all_verified, unverified_shas)``. + ``all_verified`` is True when every commit carries a + valid signature according to GitHub. + ``unverified_shas`` contains the abbreviated SHAs of + any commits whose verification failed. + + Raises: + Exception: surfaces the underlying API/network error + on failure rather than silently returning a default. + Callers that want fail-open or fail-closed semantics + should wrap the call in ``try``/``except`` and decide + for themselves — the previous fail-open default + (returning ``(True, [])``) collided with the + signature-preservation gate in ``rebase.py``, which + documents "verified" as a positive confirmation. """ unverified: list[str] = [] - try: - # Iterate over all pages of commits to ensure we don't miss - # unverified commits on pull requests with >100 commits. - async for commits in self.get_paginated( - f"/repos/{owner}/{repo}/pulls/{number}/commits", - per_page=100, - ): - if not isinstance(commits, list): - # Unexpected response shape – assume OK to avoid false positives - return True, [] - - for commit_data in commits: - if not isinstance(commit_data, dict): - continue - raw_sha = commit_data.get("sha") - sha = str(raw_sha)[:8] if isinstance(raw_sha, str) else "unknown" - commit_obj = commit_data.get("commit") - if not isinstance(commit_obj, dict): - unverified.append(sha) - continue - verification = commit_obj.get("verification") - if not isinstance(verification, dict): - unverified.append(sha) - continue - if not verification.get("verified", False): - unverified.append(sha) + # Iterate over all pages of commits to ensure we don't miss + # unverified commits on pull requests with >100 commits. + async for commits in self.get_paginated( + f"/repos/{owner}/{repo}/pulls/{number}/commits", + per_page=100, + ): + if not isinstance(commits, list): + # Unexpected response shape — assume OK to avoid + # false positives. The API returned 200 OK but in + # an unexpected shape, which is distinct from a + # network/HTTP error and arguably means the page + # is empty. + return True, [] + + for commit_data in commits: + if not isinstance(commit_data, dict): + continue + raw_sha = commit_data.get("sha") + sha = str(raw_sha)[:8] if isinstance(raw_sha, str) else "unknown" + commit_obj = commit_data.get("commit") + if not isinstance(commit_obj, dict): + unverified.append(sha) + continue + verification = commit_obj.get("verification") + if not isinstance(verification, dict): + unverified.append(sha) + continue + if not verification.get("verified", False): + unverified.append(sha) - all_verified = len(unverified) == 0 - return all_verified, unverified - except Exception as e: - self.log.debug( - "Failed to check commit signatures for PR %s/%s#%s: %s", - owner, - repo, - number, - e, - ) - # On error, assume verified to avoid blocking merges unnecessarily - return True, [] + all_verified = len(unverified) == 0 + return all_verified, unverified async def requires_commit_signatures( self, owner: str, repo: str, branch: str = "main" diff --git a/src/dependamerge/github_client.py b/src/dependamerge/github_client.py index 5a4d6f1..1d0956d 100644 --- a/src/dependamerge/github_client.py +++ b/src/dependamerge/github_client.py @@ -151,6 +151,27 @@ async def _run() -> PullRequestInfo: repository_full_name=f"{owner}/{repo}", html_url=pr.get("html_url") or "", reviews=reviews, + # Populate head/base repo identity so the + # signature-preserving local-rebase path can + # tell whether the PR is from a fork (and + # which remote to push to). Without these, + # ``rebase.local_rebase_pr()`` fails closed + # to avoid pushing to the wrong repository. + head_repo_full_name=( + ((pr.get("head") or {}).get("repo") or {}).get("full_name") + ), + head_repo_clone_url=( + ((pr.get("head") or {}).get("repo") or {}).get("clone_url") + ), + base_repo_full_name=( + ((pr.get("base") or {}).get("repo") or {}).get("full_name") + ), + base_repo_clone_url=( + ((pr.get("base") or {}).get("repo") or {}).get("clone_url") + ), + is_fork=( + ((pr.get("head") or {}).get("repo") or {}).get("fork") + ), ) return asyncio.run(_run()) # type: ignore[no-any-return] diff --git a/src/dependamerge/github_graphql.py b/src/dependamerge/github_graphql.py index a264089..c1a7bef 100644 --- a/src/dependamerge/github_graphql.py +++ b/src/dependamerge/github_graphql.py @@ -81,6 +81,8 @@ baseRefName headRefName headRefOid + headRepository { nameWithOwner url isFork } + baseRepository { nameWithOwner url } createdAt updatedAt files(first: 50) { @@ -169,6 +171,8 @@ baseRefName headRefName headRefOid + headRepository { nameWithOwner url isFork } + baseRepository { nameWithOwner url } createdAt updatedAt files(first: $filesPageSize) { diff --git a/src/dependamerge/github_service.py b/src/dependamerge/github_service.py index bb933a0..ea0931b 100644 --- a/src/dependamerge/github_service.py +++ b/src/dependamerge/github_service.py @@ -40,6 +40,47 @@ ] +def _str_or_none(value: Any) -> str | None: + """Return ``value`` as a string when truthy, else None. + + Used when populating optional ``PullRequestInfo`` fields from + GraphQL responses where the field may be missing entirely or + explicitly null. + """ + if isinstance(value, str) and value: + return value + return None + + +def _bool_or_none(value: Any) -> bool | None: + """Coerce ``value`` to bool when present, else None. + + Mirrors :func:`_str_or_none` for boolean GraphQL fields + (``isFork``). + """ + if isinstance(value, bool): + return value + return None + + +def _clone_url_with_git_suffix(url: Any) -> str | None: + """Synthesise a canonical ``.git`` clone URL from a GraphQL ``url``. + + GraphQL's ``Repository.url`` returns the HTTPS URL without the + ``.git`` suffix that REST's ``clone_url`` includes. This + helper appends ``.git`` so both code paths produce the same + string and downstream consumers (notably + :func:`rebase.local_rebase_pr`) can treat them uniformly. + + Returns None when the input is missing or empty so the + PullRequestInfo field stays unset rather than holding a + bogus ``".git"`` string. + """ + if isinstance(url, str) and url: + return f"{url}.git" + return None + + class GitHubService: """ Asynchronous service orchestrating GraphQL paging and mapping results @@ -515,6 +556,30 @@ def to_pull_request_info( repository_full_name=repo_full_name, html_url=pr.get("url") or "", reviews=reviews, + # Populate head/base repo identity from the GraphQL + # ``headRepository`` / ``baseRepository`` fields so the + # signature-preserving local-rebase path can tell + # whether the PR is from a fork (and which remote to + # push to). Without these, ``rebase.local_rebase_pr()`` + # fails closed to avoid pushing to the wrong repository. + # GraphQL returns the HTTPS URL via ``url`` (without the + # ``.git`` suffix), so we synthesise the canonical + # ``clone_url`` form for parity with REST. + head_repo_full_name=_str_or_none( + (pr.get("headRepository") or {}).get("nameWithOwner") + ), + head_repo_clone_url=_clone_url_with_git_suffix( + (pr.get("headRepository") or {}).get("url") + ), + base_repo_full_name=_str_or_none( + (pr.get("baseRepository") or {}).get("nameWithOwner") + ), + base_repo_clone_url=_clone_url_with_git_suffix( + (pr.get("baseRepository") or {}).get("url") + ), + is_fork=_bool_or_none( + (pr.get("headRepository") or {}).get("isFork") + ), ) async def find_similar_prs( diff --git a/src/dependamerge/merge_manager.py b/src/dependamerge/merge_manager.py index afdc42a..6a66afe 100644 --- a/src/dependamerge/merge_manager.py +++ b/src/dependamerge/merge_manager.py @@ -16,6 +16,7 @@ from rich.console import Console +from . import rebase from .copilot_handler import CopilotCommentHandler from .gerrit import ( GerritAuthError, @@ -52,6 +53,13 @@ DEFAULT_MERGE_TIMEOUT: float = 300.0 # seconds (5 minutes) DEFAULT_MERGE_RECHECK_INTERVAL: float = 10.0 # seconds between polls +# A DCO check normally completes within a few seconds. When it has +# been pending for longer than this on a PR that itself was created +# / last updated more than this many seconds ago, the check is +# treated as stuck. Used by ``_detect_stuck_dco`` to decide whether +# to ask dependabot to recreate the PR. +STUCK_DCO_THRESHOLD_SECONDS: float = 60.0 + class MergeStatus(Enum): """Status of a PR merge operation.""" @@ -104,6 +112,7 @@ def __init__( github2gerrit_mode: str = "submit", no_netrc: bool = False, netrc_file: Path | None = None, + rebase_local: bool = True, ): self.token = token self.default_merge_method = merge_method @@ -117,6 +126,16 @@ def __init__( self.github2gerrit_mode = github2gerrit_mode self.no_netrc = no_netrc self.netrc_file = netrc_file + # When True (the default), Step 5's rebase path uses a local + # ``git`` clone + rebase + force-push-with-lease workflow + # for PRs whose verification status would otherwise be lost + # by the GitHub REST ``update-branch`` endpoint. The local + # workflow inherits the user's ``~/.gitconfig`` and so + # respects ``commit.gpgsign`` / ``gpg.format`` / + # ``user.signingkey`` automatically. Set to False to force + # the legacy REST-only path (simpler but loses signature + # verification on signed branches). + self.rebase_local = rebase_local self.log = logging.getLogger(__name__) # Centralised merge-operation timing @@ -191,6 +210,19 @@ def __init__( self._waiting_prs: dict[str, float] = {} self._waiting_lock = asyncio.Lock() + # Per-repo locks that serialise the actual ``merge_pull_request`` + # API call. Multiple workers can run in parallel through approve, + # rebase polling, and Step 5.5's auto-merge wait loop — only the + # final dispatch is serialised, and only between PRs that target + # the same repository. This avoids the head-of-line blocking we + # used to get from forcing ``concurrency=1`` for repo-scoped + # runs (where a single PR parked in the wait loop could block + # every other PR in the batch for the full ``merge_timeout``) + # while still preventing back-to-back merges on the same repo + # from racing GitHub's branch-protection propagation. + self._merge_dispatch_locks: dict[str, asyncio.Lock] = {} + self._merge_dispatch_locks_lock = asyncio.Lock() + # Delay (seconds) after submitting a new approval before attempting merge. # GitHub needs time to propagate the approval to branch-protection evaluation. default_post_approval_delay = 3.0 @@ -347,14 +379,10 @@ async def _merge_single_pr_with_semaphore( # or have auto-merge pending (where neither merge_success # nor merge_failure is called because the merge hasn't # actually happened yet — GitHub will merge it later). - if ( - self.progress_tracker - and result.status - in ( - MergeStatus.BLOCKED, - MergeStatus.SKIPPED, - MergeStatus.AUTO_MERGE_PENDING, - ) + if self.progress_tracker and result.status in ( + MergeStatus.BLOCKED, + MergeStatus.SKIPPED, + MergeStatus.AUTO_MERGE_PENDING, ): self.progress_tracker.pr_completed() return result @@ -391,9 +419,7 @@ async def _wait_status_ticker(self) -> None: # ``print()`` calls. Updating every second would spam the # user's terminal/logs, so use the slower plain ticker # cadence instead. - rich_available = bool( - getattr(self.progress_tracker, "rich_available", False) - ) + rich_available = bool(getattr(self.progress_tracker, "rich_available", False)) if not rich_available: await self._wait_status_ticker_plain() return @@ -456,9 +482,7 @@ async def _wait_status_ticker_plain(self) -> None: if snapshot: now = asyncio.get_running_loop().time() if now - last_emit >= 15.0: - remaining = max( - 0.0, max(snapshot.values()) - now - ) + remaining = max(0.0, max(snapshot.values()) - now) count = len(snapshot) noun = "PR" if count == 1 else "PRs" try: @@ -1093,215 +1117,37 @@ async def _merge_single_pr(self, pr_info: PullRequestInfo) -> MergeResult: await asyncio.sleep(self._post_approval_delay) result.status = MergeStatus.APPROVED - # Step 5: Handle rebase if needed before merge + # Step 5: Handle rebase if needed before merge. + # + # Dispatched to the dedicated ``rebase`` module so the + # local-vs-REST decision tree, the local-git workflow, + # and the post-rebase polling loop all live in one + # place where they can be tested in isolation. if pr_info.mergeable_state == "behind" and self.fix_out_of_date: - if self.preview_mode: - # NOTE: In preview mode, we should NOT print here as it breaks single-line reporting - # The preview output should only be a single line per PR in the evaluation section - pass - else: - log_and_print( - self.log, - self._console, - f"🔄 Rebasing: {pr_info.html_url} [behind base branch]", - level="debug", - ) - - try: - if self._github_client is None: - raise RuntimeError("GitHub client not initialized") - await self._github_client.update_branch( - repo_owner, repo_name, pr_info.number - ) - - # Enable auto-merge so the PR merges even if we - # time out waiting for status checks. - auto_merge_ok = await self._enable_auto_merge_for_pr( - pr_info, repo_owner, repo_name - ) - if auto_merge_ok: - self.log.debug( - "Auto-merge enabled after rebase for %s/%s#%s", - repo_owner, - repo_name, - pr_info.number, - ) - - # Wait for GitHub to process the update and run checks - self._console.print( - f"⏳ Waiting: {pr_info.html_url}" - ) - await asyncio.sleep(self._merge_recheck_interval) - - # Re-check PR status after rebase with extended retry logic - # Initialize variables before the loop - updated_mergeable: bool | None = pr_info.mergeable - updated_mergeable_state: str | None = pr_info.mergeable_state - - for check_attempt in range(self._merge_poll_max_attempts): - updated_pr_data = await self._github_client.get( - f"/repos/{repo_owner}/{repo_name}/pulls/{pr_info.number}" - ) - - if isinstance(updated_pr_data, dict): - updated_mergeable = updated_pr_data.get("mergeable") - updated_mergeable_state = updated_pr_data.get( - "mergeable_state" - ) - # Capture the new head SHA so later - # block-reason analysis queries the - # rebased commit, not the pre-rebase - # one. update_branch() advances - # head.sha, and analyze_block_reason() - # uses head_sha to query check runs. - updated_head = ( - updated_pr_data.get("head") or {} - ).get("sha") - if updated_head: - pr_info.head_sha = updated_head - else: - updated_mergeable = None - updated_mergeable_state = None - - if updated_mergeable_state == "clean": - break - elif updated_mergeable_state == "behind": - if check_attempt < self._merge_poll_max_attempts - 1: - self.log.debug( - "PR still processing rebase, waiting... " - "(attempt %d/%d)", - check_attempt + 1, - self._merge_poll_max_attempts, - ) - await asyncio.sleep(self._merge_recheck_interval) - elif updated_mergeable_state == "blocked": - if check_attempt < self._merge_poll_max_attempts - 1: - self.log.debug( - "PR status checks running after rebase, " - "waiting... (attempt %d/%d)", - check_attempt + 1, - self._merge_poll_max_attempts, - ) - await asyncio.sleep(self._merge_recheck_interval) - else: - if auto_merge_ok: - log_and_print( - self.log, - self._console, - f"⏳ Auto-merge will complete: " - f"{pr_info.html_url} " - "[timeout waiting for checks]", - level="warning", - ) - else: - log_and_print( - self.log, - self._console, - f"⚠️ Proceeding without checks: " - f"{pr_info.html_url} " - "[timeout waiting for checks]", - level="warning", - ) - break - elif updated_mergeable_state is None: - # GitHub is still computing mergeability - # (typically right after update_branch). - # Treat as transient and keep polling - # until the deadline or a concrete state - # arrives — breaking here would otherwise - # exit prematurely and (if auto-merge - # enablement failed) fall through to a - # manual merge attempt against the - # still-resolving PR state. - if check_attempt < self._merge_poll_max_attempts - 1: - self.log.debug( - "PR mergeable_state still computing " - "after rebase, waiting... " - "(attempt %d/%d)", - check_attempt + 1, - self._merge_poll_max_attempts, - ) - await asyncio.sleep(self._merge_recheck_interval) - else: - break - else: - break - - # Update our PR info with the latest state. - # Preserve the previous non-None mergeable - # value when the refresh returns ``null`` - # (GitHub is still computing). The Step 6 - # auto-merge skip gate now accepts both - # ``True`` and ``None`` (it excludes only - # the explicit ``False`` case), so a - # transient null no longer blocks the - # auto-merge path on its own. We still - # preserve the prior known ``True`` here - # so downstream logging and any future - # tightening of that predicate get an - # accurate state to work with. - if updated_mergeable is not None: - pr_info.mergeable = updated_mergeable - # Preserve the previous non-None mergeable_state - # for the same reason as ``mergeable`` above: - # GitHub returns ``null`` while still computing, - # and the post-rebase reporting / Step 5.5 logic - # branches on this value (e.g. "clean" vs - # "blocked" vs "behind"). A transient ``None`` - # would otherwise be classified as the catch-all - # "other state" branch. - if updated_mergeable_state is not None: - pr_info.mergeable_state = updated_mergeable_state - - # Mark this PR as having gone through the - # Step 5 rebase + poll path. Step 5.5 will - # consult ``_rebased_prs`` to avoid doubling - # the merge_timeout when the rebase exits - # in ``blocked`` or ``behind`` state. - self._rebased_prs.add( - f"{repo_owner}/{repo_name}#{pr_info.number}" - ) - - # Report post-rebase status - if pr_info.mergeable_state == "clean": - log_and_print( - self.log, - self._console, - f"✅ Rebased: {pr_info.html_url}", - level="debug", - ) - elif pr_info.mergeable_state == "behind": - log_and_print( - self.log, - self._console, - f"⚠️ Rebased: {pr_info.html_url} [still behind after rebase]", - level="debug", - ) - elif pr_info.mergeable_state == "blocked": - log_and_print( - self.log, - self._console, - f"⬆️ Rebased: {pr_info.html_url} [waiting for status checks]", - level="debug", - ) - else: - log_and_print( - self.log, - self._console, - f"ℹ️ Rebased: {pr_info.html_url}", - level="debug", - ) - - except Exception as e: - result.status = MergeStatus.FAILED - result.error = f"Failed to rebase PR: {e}" - - if self.progress_tracker: - self.progress_tracker.merge_failure() - self._console.print( - f"❌ Failed: {pr_info.html_url} [rebase error: {e}]" - ) - return result + rebase_ctx = rebase.RebaseContext( + github_client=self._github_client, + token=self.token, + rebase_local=self.rebase_local, + preview_mode=self.preview_mode, + merge_recheck_interval=self._merge_recheck_interval, + merge_poll_max_attempts=self._merge_poll_max_attempts, + log=self.log, + console=self._console, + rebased_prs=self._rebased_prs, + enable_auto_merge=self._enable_auto_merge_for_pr, + ) + outcome = await rebase.perform_step5_rebase( + ctx=rebase_ctx, + pr_info=pr_info, + owner=repo_owner, + repo=repo_name, + ) + if outcome.failed: + result.status = MergeStatus.FAILED + result.error = outcome.error_message + if self.progress_tracker: + self.progress_tracker.merge_failure() + return result # Step 5.5: If the PR is still blocked (e.g. by a pending # required status check such as pre-commit.ci), behind @@ -1336,9 +1182,7 @@ async def _merge_single_pr(self, pr_info: PullRequestInfo) -> MergeResult: # below still weeds out genuinely-stuck cases (missing # approvals, etc.) so we don't burn ``merge_timeout`` on # them. - pr_key_for_wait = ( - f"{repo_owner}/{repo_name}#{pr_info.number}" - ) + pr_key_for_wait = f"{repo_owner}/{repo_name}#{pr_info.number}" already_rebased = pr_key_for_wait in self._rebased_prs base_should_wait = ( not self.preview_mode @@ -1360,13 +1204,11 @@ async def _merge_single_pr(self, pr_info: PullRequestInfo) -> MergeResult: and self._github_client is not None ): try: - pre_block_reason = ( - await self._github_client.analyze_block_reason( - repo_owner, - repo_name, - pr_info.number, - pr_info.head_sha, - ) + pre_block_reason = await self._github_client.analyze_block_reason( + repo_owner, + repo_name, + pr_info.number, + pr_info.head_sha, ) except Exception as exc: self.log.debug( @@ -1414,10 +1256,7 @@ async def _merge_single_pr(self, pr_info: PullRequestInfo) -> MergeResult: # deadline so the total wait is bounded by # ``merge_timeout`` even if a single iteration # over-sleeps slightly. - wait_deadline = ( - asyncio.get_running_loop().time() - + self._merge_timeout - ) + wait_deadline = asyncio.get_running_loop().time() + self._merge_timeout # Local alias so the type checker can narrow # ``self._github_client`` across the await boundary. wait_client = self._github_client @@ -1434,16 +1273,20 @@ async def _merge_single_pr(self, pr_info: PullRequestInfo) -> MergeResult: closed_during_wait = False merged_during_wait = False try: - while ( - asyncio.get_running_loop().time() < wait_deadline - ): + while asyncio.get_running_loop().time() < wait_deadline: if pr_info.mergeable_state == "clean": break # Sleep no longer than the time remaining # so we don't overshoot ``wait_deadline``. - remaining = ( - wait_deadline - - asyncio.get_running_loop().time() + # Clamp to a non-negative value: the loop's + # ``while ... < wait_deadline`` check and the + # ``time()`` call here are not atomic, so a + # near-deadline crossing can produce a tiny + # negative ``remaining`` which would raise + # ``ValueError`` from ``asyncio.sleep``. + remaining = max( + 0.0, + wait_deadline - asyncio.get_running_loop().time(), ) await asyncio.sleep( min(self._merge_recheck_interval, remaining) @@ -1477,9 +1320,7 @@ async def _merge_single_pr(self, pr_info: PullRequestInfo) -> MergeResult: # informative for downstream logging and any # future tightening of that predicate. if "mergeable" in refreshed_wait: - refreshed_mergeable = refreshed_wait.get( - "mergeable" - ) + refreshed_mergeable = refreshed_wait.get("mergeable") if refreshed_mergeable is not None: pr_info.mergeable = refreshed_mergeable if "mergeable_state" in refreshed_wait: @@ -1491,9 +1332,7 @@ async def _merge_single_pr(self, pr_info: PullRequestInfo) -> MergeResult: # otherwise break the wait loop early # on a transient ``null`` returned # while GitHub is still computing. - refreshed_state = refreshed_wait.get( - "mergeable_state" - ) + refreshed_state = refreshed_wait.get("mergeable_state") if refreshed_state is not None: pr_info.mergeable_state = refreshed_state # Capture the current head SHA so any @@ -1501,9 +1340,9 @@ async def _merge_single_pr(self, pr_info: PullRequestInfo) -> MergeResult: # call queries the right commit. The # head can change while we wait # (rebase, dependabot force-push, etc.). - refreshed_head = ( - refreshed_wait.get("head") or {} - ).get("sha") + refreshed_head = (refreshed_wait.get("head") or {}).get( + "sha" + ) if refreshed_head: pr_info.head_sha = refreshed_head if refreshed_wait.get("state") == "closed": @@ -1554,12 +1393,13 @@ async def _merge_single_pr(self, pr_info: PullRequestInfo) -> MergeResult: ) else: result.status = MergeStatus.FAILED - result.error = "PR closed without merging during auto-merge wait" + result.error = ( + "PR closed without merging during auto-merge wait" + ) if self.progress_tracker: self.progress_tracker.merge_failure() self._console.print( - f"🛑 Closed without merging: " - f"{pr_info.html_url}" + f"🛑 Closed without merging: {pr_info.html_url}" ) return result @@ -1654,8 +1494,7 @@ async def _merge_single_pr(self, pr_info: PullRequestInfo) -> MergeResult: auto_merge_pending_checks = False if ( pr_key in self._auto_merge_enabled - and pr_info.mergeable_state - in ("blocked", "behind", "unstable") + and pr_info.mergeable_state in ("blocked", "behind", "unstable") and self.force_level != "all" ): if pr_info.mergeable_state in ("behind", "unstable"): @@ -1695,17 +1534,24 @@ async def _merge_single_pr(self, pr_info: PullRequestInfo) -> MergeResult: # checks are outstanding. The same predicate is # used by the Step 5.5 pre-check. auto_merge_pending_checks = ( - self._block_reason_indicates_pending_checks( - block_reason - ) + self._block_reason_indicates_pending_checks(block_reason) ) if auto_merge_pending_checks: merged = None # Sentinel: auto-merge pending else: - merged = await self._merge_pr_with_retry( - pr_info, repo_owner, repo_name + # Serialise the actual merge dispatch per repo so + # back-to-back merges don't race GitHub's branch + # protection propagation. Workers on the same + # repo queue here; workers on different repos run + # in parallel. See ``_get_merge_dispatch_lock``. + dispatch_lock = await self._get_merge_dispatch_lock( + repo_owner, repo_name ) + async with dispatch_lock: + merged = await self._merge_pr_with_retry( + pr_info, repo_owner, repo_name + ) if merged is None: # Auto-merge is active — PR will merge asynchronously. @@ -1752,9 +1598,55 @@ async def _merge_single_pr(self, pr_info: PullRequestInfo) -> MergeResult: # Before giving up, check if this is a dependabot PR # that failed due to unsigned commits. If so, ask # dependabot to recreate the PR and merge the new one. + # + # Two recreate triggers are considered: + # 1. Branch-protection failures (the original + # unsigned-commit case). + # 2. A DCO check that has been stuck (queued / + # in_progress / pending) for longer than + # ``STUCK_DCO_THRESHOLD_SECONDS`` on a PR + # that itself was created and last updated + # that long ago. DCO normally completes in + # seconds; when it does not, the only + # reliable recovery for dependabot PRs is + # to recreate the PR so the check fires + # again on a fresh head SHA. recreated_pr = None if pr_info.author == "dependabot[bot]" and not self.preview_mode: - if "branch protection" in failure_reason.lower(): + should_recreate = ( + "branch protection" in failure_reason.lower() + ) + if not should_recreate: + try: + ( + is_stuck, + stuck_check, + stuck_age, + ) = await self._detect_stuck_dco(pr_info) + except Exception as exc: + self.log.debug( + "_detect_stuck_dco failed for %s#%s: %s", + pr_info.repository_full_name, + pr_info.number, + exc, + ) + is_stuck = False + stuck_check = None + stuck_age = 0.0 + if is_stuck: + # Use markup=False so a check name + # containing characters that look like + # Rich markup (e.g. ``[required]``) does + # not get silently swallowed by the + # console parser. + self._console.print( + f"⏳ Stuck DCO detected: {pr_info.html_url} " + f"[{stuck_check} pending for " + f"{stuck_age:.0f}s, requesting recreate]", + markup=False, + ) + should_recreate = True + if should_recreate: recreated_pr = await self._trigger_dependabot_recreate( pr_info ) @@ -1772,12 +1664,23 @@ async def _merge_single_pr(self, pr_info: PullRequestInfo) -> MergeResult: try: if self._github_client is None: raise RuntimeError("GitHub client not initialized") - new_merged = await self._github_client.merge_pull_request( - new_owner, - new_repo, - recreated_pr.number, - new_merge_method, + # Same per-repo dispatch serialisation as + # the main merge path — see + # ``_get_merge_dispatch_lock``. + new_dispatch_lock = ( + await self._get_merge_dispatch_lock( + new_owner, new_repo + ) ) + async with new_dispatch_lock: + new_merged = ( + await self._github_client.merge_pull_request( + new_owner, + new_repo, + recreated_pr.number, + new_merge_method, + ) + ) except Exception as merge_err: self.log.error( "Failed to merge recreated PR %s#%s: %s", @@ -2119,9 +2022,7 @@ async def _post_pr_comment_with_retry( # Both attempts failed — surface a single line so the # user knows the PR-side audit trail is incomplete. try: - self._console.print( - f"⚠️ Unable to add pull request comment: {html_url}" - ) + self._console.print(f"⚠️ Unable to add pull request comment: {html_url}") except Exception: pass return False @@ -2185,9 +2086,7 @@ async def _enable_auto_merge_for_pr( return True cache_key = f"{owner}/{repo}" - merge_method = self._pr_merge_methods.get( - cache_key, self.default_merge_method - ) + merge_method = self._pr_merge_methods.get(cache_key, self.default_merge_method) enabled = await self._github_client.enable_auto_merge( pr_info.node_id, merge_method @@ -2207,8 +2106,7 @@ async def _enable_auto_merge_for_pr( ) except Exception as exc: self.log.debug( - "Could not refresh PR %s to check existing " - "auto-merge state: %s", + "Could not refresh PR %s to check existing auto-merge state: %s", pr_key, exc, ) @@ -2242,8 +2140,7 @@ async def _enable_auto_merge_for_pr( # see at a glance that dependamerge enabled auto-merge # on the PR. audit_comment = ( - "🤖 Dependamerge\n" - "Enabled auto-merge due to pending updates/checks ⏳" + "🤖 Dependamerge\nEnabled auto-merge due to pending updates/checks ⏳" ) await self._post_pr_comment_with_retry( owner, repo, pr_info.number, pr_info.html_url, audit_comment @@ -2602,6 +2499,174 @@ async def _trigger_stale_precommit_ci(self, pr_info: PullRequestInfo) -> bool: ) return False + async def _detect_stuck_dco( + self, + pr_info: PullRequestInfo, + ) -> tuple[bool, str | None, float]: + """Detect whether a DCO check on ``pr_info`` is stuck. + + The DCO check normally finishes within a handful of seconds. + When it has been queued / in-progress for longer than + :data:`STUCK_DCO_THRESHOLD_SECONDS` on a PR that itself was + created and last updated more than that long ago, treat it + as stuck so the caller can decide whether to ask dependabot + to recreate the PR. + + The age floor on PR ``created_at`` / ``updated_at`` avoids + false positives on PRs that were touched seconds before we + observed them — in those cases the DCO check is simply + running normally and should be allowed to finish. + + Args: + pr_info: The pull request being evaluated. + + Returns: + A 3-tuple ``(is_stuck, check_name, age_seconds)``. + ``check_name`` is the GitHub check / status name of the + stuck check (or ``None`` when no stuck check was found). + ``age_seconds`` is the time the check has been pending + (or ``0.0`` when no candidate check was found). + """ + if not self._github_client: + return False, None, 0.0 + + repo_owner, repo_name = pr_info.repository_full_name.split("/", 1) + threshold = STUCK_DCO_THRESHOLD_SECONDS + + from datetime import datetime, timezone + + def _parse_ts(value: Any) -> datetime | None: + if not isinstance(value, str) or not value: + return None + try: + # GitHub returns RFC 3339 with a trailing ``Z``. + return datetime.fromisoformat(value.replace("Z", "+00:00")) + except ValueError: + return None + + def _is_dco_name(name: str) -> bool: + """Return True when ``name`` looks like a DCO check. + + Matches the common variants emitted by the GitHub DCO + App and similar bots: ``DCO``, ``dco/dco``, ``dcobot``, + and any name containing ``signoff`` / ``sign-off`` / + ``signed-off`` (case-insensitive). + """ + n = (name or "").strip().lower() + if not n: + return False + if n in {"dco", "dco/dco", "dcobot"} or n.startswith("dco/"): + return True + return ( + "signoff" in n + or "sign-off" in n + or "signed-off" in n + ) + + # 1. PR-level age floor — don't fire on PRs we caught right + # after they were opened or force-pushed; the DCO check + # on those is simply running normally. + now = datetime.now(timezone.utc) + try: + pr_data = await self._github_client.get( + f"/repos/{repo_owner}/{repo_name}/pulls/{pr_info.number}" + ) + except Exception as exc: + self.log.debug( + "_detect_stuck_dco: pr fetch failed for %s#%s: %s", + pr_info.repository_full_name, + pr_info.number, + exc, + ) + return False, None, 0.0 + + if not isinstance(pr_data, dict): + return False, None, 0.0 + + pr_created = _parse_ts(pr_data.get("created_at")) + pr_updated = _parse_ts(pr_data.get("updated_at")) + if pr_created is None or pr_updated is None: + # Without timing data we cannot safely judge stuckness; + # fail closed. + return False, None, 0.0 + + pr_age = (now - pr_created).total_seconds() + pr_idle = (now - pr_updated).total_seconds() + if pr_age < threshold or pr_idle < threshold: + return False, None, 0.0 + + # 2. Examine check-runs and status contexts on the head SHA. + candidate_name: str | None = None + candidate_age: float = 0.0 + + try: + runs = await self._github_client.get( + f"/repos/{repo_owner}/{repo_name}/commits/" + f"{pr_info.head_sha}/check-runs" + ) + except Exception as exc: + self.log.debug( + "_detect_stuck_dco: check-runs fetch failed for %s#%s: %s", + pr_info.repository_full_name, + pr_info.number, + exc, + ) + runs = None + + if isinstance(runs, dict): + for run in runs.get("check_runs") or []: + if not isinstance(run, dict): + continue + name = run.get("name", "") + if not _is_dco_name(name): + continue + status = run.get("status") + if status not in ("queued", "in_progress"): + continue + started = _parse_ts(run.get("started_at")) + # Use the *latest* of started_at and PR updated_at + # as the reference so a stale started_at left over + # from a prior head SHA does not inflate the age. + ref = max(started, pr_updated) if started else pr_updated + age = (now - ref).total_seconds() + if age >= threshold and age > candidate_age: + candidate_name = name + candidate_age = age + + try: + statuses = await self._github_client.get( + f"/repos/{repo_owner}/{repo_name}/commits/" + f"{pr_info.head_sha}/status" + ) + except Exception as exc: + self.log.debug( + "_detect_stuck_dco: status fetch failed for %s#%s: %s", + pr_info.repository_full_name, + pr_info.number, + exc, + ) + statuses = None + + if isinstance(statuses, dict): + for s in statuses.get("statuses") or []: + if not isinstance(s, dict): + continue + ctx = s.get("context", "") + if not _is_dco_name(ctx): + continue + if s.get("state") != "pending": + continue + updated = _parse_ts(s.get("updated_at")) or pr_updated + ref = max(updated, pr_updated) + age = (now - ref).total_seconds() + if age >= threshold and age > candidate_age: + candidate_name = ctx + candidate_age = age + + if candidate_name is None: + return False, None, 0.0 + return True, candidate_name, candidate_age + async def _trigger_dependabot_recreate( self, pr_info: PullRequestInfo ) -> PullRequestInfo | None: @@ -3507,6 +3572,31 @@ async def _handle_merge_failure( # For other failure types, don't retry return False + async def _get_merge_dispatch_lock( + self, owner: str, repo: str + ) -> asyncio.Lock: + """Return the ``asyncio.Lock`` that serialises merge dispatch for ``owner/repo``. + + The lock is created lazily on first request and shared by + every worker targeting the same repository. Workers + targeting different repositories receive distinct locks and + can dispatch in parallel. + + Holding this lock around the actual ``merge_pull_request`` + API call (and its retry loop) prevents back-to-back merges + on the same repo from racing GitHub's branch-protection + propagation, while leaving every other phase of the merge + flow — approve, rebase polling, Step 5.5's auto-merge wait — + free to run in parallel across workers. + """ + key = f"{owner}/{repo}" + async with self._merge_dispatch_locks_lock: + lock = self._merge_dispatch_locks.get(key) + if lock is None: + lock = asyncio.Lock() + self._merge_dispatch_locks[key] = lock + return lock + async def _get_org_settings(self, owner: str) -> dict[str, Any] | None: """ Get organization-level settings, with caching. @@ -3551,15 +3641,15 @@ async def _get_org_settings(self, owner: str) -> dict[str, Any] | None: "web_commit_signoff_required", False ) if web_commit_signoff: - self.log.debug( - f"Organization {owner} requires commit signoff" - ) + self.log.debug(f"Organization {owner} requires commit signoff") return org_data else: self._org_settings_cache[owner] = None return None except Exception as e: - self.log.debug(f"Could not check organization settings for {owner}: {e}") + self.log.debug( + f"Could not check organization settings for {owner}: {e}" + ) self._org_settings_cache[owner] = None return None diff --git a/src/dependamerge/rebase.py b/src/dependamerge/rebase.py new file mode 100644 index 0000000..28227ec --- /dev/null +++ b/src/dependamerge/rebase.py @@ -0,0 +1,957 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: 2026 The Linux Foundation +"""Rebase strategies for behind pull requests. + +This module centralises every code path dependamerge uses to bring +a PR up to date with its base branch before merging: + +- :func:`should_use_local_rebase` decides between the local-git + workflow (preserves verified commit signatures via the user's + ``~/.gitconfig``) and the GitHub REST ``update-branch`` + endpoint (fast but produces unsigned commits). +- :func:`local_rebase_pr` runs the local clone + rebase + + force-push-with-lease against a secure temp workspace. +- :func:`perform_step5_rebase` is the top-level dispatcher used by + :class:`AsyncMergeManager._merge_single_pr` Step 5. It calls + :func:`should_use_local_rebase` to decide between paths, then + delegates to either ``_run_local_path`` or ``_run_rest_path`` + (private helpers). ``_run_rest_path`` runs the REST + ``update-branch`` call and the post-rebase polling loop + (``_poll_post_rebase``) that waits for GitHub to recompute + mergeability. +- :func:`authed_clone_url` injects a token into an HTTPS clone URL + for non-interactive ``git clone`` auth. + +The dispatcher takes a :class:`RebaseContext` rather than a full +``AsyncMergeManager`` reference, which keeps the rebase logic +testable in isolation (no need to construct a manager + GitHub +client + progress tracker just to exercise a decision tree). + +The local-rebase path is the headline reason this module exists: +``PUT /repos/{owner}/{repo}/pulls/{n}/update-branch`` creates a +server-side merge commit whose committer is the calling token's +GitHub user, which is *not* signed with the user's local SSH/GPG +key. On repos whose branch protection requires verified +signatures, the resulting commit loses its ``Verified`` badge and +becomes un-mergeable. ``pre-commit-ci[bot]`` PRs are particularly +affected because that bot has no comment macro for recreating a PR +with a re-signed commit (https://github.com/pre-commit-ci/issues/issues/41). +The local path solves this by shelling out to ``git`` so the +user's signing config is honoured. +""" + +from __future__ import annotations + +import asyncio +import logging +from collections.abc import Awaitable, Callable +from dataclasses import dataclass +from pathlib import Path +from typing import TYPE_CHECKING, Any + +from rich.console import Console + +from . import git_ops +from .git_ops import ( + GitError, + add_remote, + checkout, + clone, + ensure_git_available, + fetch, + push_force_with_lease, + rebase, + rebase_abort, + secure_rmtree, +) +from .models import PullRequestInfo +from .output_utils import log_and_print + +if TYPE_CHECKING: + from .github_async import GitHubAsync + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +@dataclass +class RebaseContext: + """Bundle of dependencies the rebase orchestrator needs. + + Passed in lieu of a full ``AsyncMergeManager`` reference so the + rebase logic stays decoupled from manager internals and can be + tested without standing up the whole merge pipeline. + """ + + github_client: GitHubAsync | None + token: str + rebase_local: bool + preview_mode: bool + merge_recheck_interval: float + merge_poll_max_attempts: int + log: logging.Logger + console: Console + # Mutable set on the manager that records PR keys (``owner/repo#N``) + # which have already been through Step 5. Step 5.5 consults this + # to avoid doubling the configured ``merge_timeout``. We keep the + # raw set reference rather than a callback so the existing + # invariant (Step 5 always adds, Step 5.5 always reads) stays + # obvious at the call site. + rebased_prs: set[str] + # Async callable equivalent to ``manager._enable_auto_merge_for_pr``. + # Passed in to avoid a circular import. + enable_auto_merge: Callable[[PullRequestInfo, str, str], Awaitable[bool]] + + +@dataclass +class Step5Outcome: + """Result of :func:`perform_step5_rebase`. + + ``failed`` indicates the caller should mark the PR as ``FAILED`` + and bail out of the merge attempt. ``error_message`` is the + user-visible reason in that case. + """ + + failed: bool = False + error_message: str | None = None + + +def authed_clone_url(clone_url: str, token: str) -> str: + """Return an HTTPS clone URL with the token injected for auth. + + The token *is* passed to ``git`` as part of the URL command- + line argument, which means it can be visible to process- + listing tools (``ps``, ``/proc//cmdline``) on the local + machine for the duration of the git invocation. Log output is + separately redacted by :func:`git_ops._redact`, but no + equivalent protection exists for ``ps``-style introspection. + Callers needing stronger guarantees should use a different + auth mechanism (e.g. SSH keys or a credential helper) and + pass an unmodified ``ssh://`` / ``git@`` URL through + unchanged. + + Non-HTTPS URLs (SSH, ``git://``) are returned unchanged. + """ + if clone_url.startswith("https://"): + return clone_url.replace("https://", f"https://x-access-token:{token}@") + return clone_url + + +async def should_use_local_rebase( + *, + github_client: GitHubAsync | None, + pr_info: PullRequestInfo, + owner: str, + repo: str, + base_branch: str, + rebase_local: bool, + log: logging.Logger, +) -> tuple[bool, str]: + """Decide whether Step 5 should rebase locally instead of via REST. + + Returns ``(use_local, reason)``. ``reason`` is a short + human-readable string suitable for debug logging or a + user-visible note when ``use_local`` is True. + + The gate activates when ``rebase_local`` is True AND either: + + - the PR is from ``pre-commit-ci[bot]`` (always — that bot has + no comment macro for recreating a PR with a re-signed + commit), OR + - the base branch requires verified signatures AND the current + PR head commit is itself verified (so REST update-branch + *would* break verification). + + Strict ``is True`` comparison is used on the + ``requires_commit_signatures`` return so ``AsyncMock`` defaults + don't accidentally route real PRs into the local-rebase path + in tests that haven't explicitly set ``return_value``. If the + requirement check raises, we fail safely to the REST path. + """ + if not rebase_local: + return False, "--no-rebase-local set" + + # Always use local rebase for pre-commit.ci PRs. The bot has + # no comment macro to recover from a verification break, so + # we treat it as opt-in regardless of branch protection. + if pr_info.author == "pre-commit-ci[bot]": + return True, "pre-commit-ci[bot] has no recreate/rebase macro" + + if github_client is None: + return False, "no GitHub client" + + # Branch-protection signature requirement (classic + rulesets) + try: + requires_signatures = await github_client.requires_commit_signatures( + owner, repo, base_branch + ) + except Exception as exc: + log.debug( + "Could not determine signature requirement for %s/%s:%s: %s", + owner, + repo, + base_branch, + exc, + ) + return False, "signature requirement check failed" + + # Strict ``is True`` rather than truthy check: ``AsyncMock`` + # default returns evaluate as truthy, and we explicitly do + # not want to enter the network-touching local-rebase path + # in test mocks that haven't been set up to handle it. + if requires_signatures is not True: + return False, "base branch does not require signatures" + + # Base does require verified signatures. Check whether the + # current PR head is itself verified — if it isn't, REST + # update-branch can't make things worse, so we don't need + # the local-rebase machinery. + try: + all_verified, _unverified = await github_client.check_pr_commit_signatures( + owner, repo, pr_info.number + ) + except Exception as exc: + log.debug( + "Could not check PR commit signatures for %s/%s#%s: %s", + owner, + repo, + pr_info.number, + exc, + ) + # Fail closed: if we can't confirm the PR head is + # verified, route to the REST path. The opposite + # (assuming verification and using the local path) + # would mean transient API failures could trigger + # network-touching local clones, and would conflict + # with the documented gate ("base requires signatures + # AND PR head is verified"). When verification isn't + # established, REST update-branch can't make things + # any worse than they already are. + return False, "signature check failed" + + if all_verified: + return True, "base requires signatures and PR head is verified" + return False, "PR head is not currently verified" + + +async def local_rebase_pr( + *, + pr_info: PullRequestInfo, + owner: str, + repo: str, + token: str, + log: logging.Logger, +) -> bool: + """Rebase a PR locally and force-push the result. + + Clones the head repo into a secure temp workspace, fetches the + base branch (from upstream when the PR is from a fork), runs + ``git rebase``, and force-pushes with lease back to the head + repo. All git invocations inherit the user's ``~/.gitconfig``, + so signing config is respected. + + Returns True only if every step succeeds. On any failure (no + ``git`` on PATH, conflict during rebase, network error, push + rejected) the workspace is cleaned up and False is returned; + the caller should fall through to the auto-merge path so we + never leave a half-applied state. + """ + # Ensure ``git`` is on PATH before we start. ``GitError`` is + # also raised when git is missing entirely. + try: + ensure_git_available() + except Exception as exc: + log.debug("Local rebase unavailable (no git on PATH?): %s", exc) + return False + + # We need the head/base clone URLs. They are populated for PRs + # surfaced by recent versions of the find-similar / merge flows; + # if missing we synthesise them from the repository names. + head_clone_url = pr_info.head_repo_clone_url + base_clone_url = pr_info.base_repo_clone_url + head_full = pr_info.head_repo_full_name + base_full = pr_info.base_repo_full_name or f"{owner}/{repo}" + + # Fail closed when head repo identity is ambiguous. Without a + # confirmed ``head_repo_full_name`` or ``head_repo_clone_url`` + # we cannot tell whether the PR is from a fork. Synthesising a + # clone URL from the base repo would push to the wrong remote + # for fork PRs (creating or overwriting a branch on the base + # repo). The caller falls through to the auto-merge path on + # False, which is always safe. + if not head_full and not head_clone_url: + log.debug( + "Local rebase: PR %s/%s#%s missing head_repo identity " + "(head_repo_full_name and head_repo_clone_url are both unset); " + "failing closed to avoid pushing to the wrong remote.", + owner, + repo, + pr_info.number, + ) + return False + + # Both fields populated, or one of them — synthesise the + # missing URL from the known full_name. Safe because + # ``head_full`` is now confirmed to refer to the head repo, + # not the base. + if not head_clone_url: + head_clone_url = f"https://github.com/{head_full}.git" + if not base_clone_url: + base_clone_url = f"https://github.com/{base_full}.git" + + # Decide whether the PR is from a fork *before* we collapse + # ``head_full`` to ``base_full`` for clone-URL fallback. + # Treating fork PRs as non-fork would cause us to fetch the + # base branch from ``origin`` (the fork) instead of + # ``upstream`` (the canonical base repo) — either failing + # to fetch (the fork doesn't have the latest base commits) + # or, worse, fetching stale state and pushing back to the + # wrong remote. + # + # Preference order, all defensive: + # 1. The explicit ``pr_info.is_fork`` flag from the API. + # 2. Direct comparison of head/base full_names. + # 3. Direct comparison of head/base clone URLs. + if pr_info.is_fork is not None: + is_fork = bool(pr_info.is_fork) + elif head_full and base_full and head_full != base_full: + is_fork = True + elif ( + head_clone_url + and base_clone_url + and head_clone_url != base_clone_url + ): + is_fork = True + else: + is_fork = False + + # Now safe to collapse ``head_full`` for the clone-URL + # synthesis fallback. ``is_fork`` has already been computed + # above and won't be misled by this assignment. + head_full = head_full or base_full + + head_branch = pr_info.head_branch + base_branch = pr_info.base_branch or "main" + if not head_branch: + log.debug( + "Local rebase: PR %s/%s#%s missing head_branch", + owner, + repo, + pr_info.number, + ) + return False + + origin_url = authed_clone_url(head_clone_url, token) + upstream_url = authed_clone_url(base_clone_url, token) + + # Use a per-PR workspace under a secure temp parent so + # concurrent rebases (--concurrency=N) don't collide. + workspace_parent = Path( + git_ops.create_secure_tempdir(prefix="dependamerge-localrebase-") + ) + workspace = ( + workspace_parent + / f"{(head_full or base_full).replace('/', '__')}__pr_{pr_info.number}" + ) + workspace.mkdir(parents=True, exist_ok=True) + + try: + # Clone the head repo at the PR's head branch. Shallow + # clone keeps disk + network footprint low for what are + # typically tiny dependency-update PRs. + try: + clone( + origin_url, + workspace, + branch=head_branch, + depth=50, + single_branch=True, + no_tags=True, + filter_blobs=True, + logger=log.debug, + ) + except GitError as exc: + log.debug("Local rebase: clone failed for %s: %s", pr_info.html_url, exc) + return False + + # Fetch the base branch — from upstream when the PR is + # from a fork, from origin otherwise. We need it + # available locally before we can rebase onto it. + try: + if is_fork: + add_remote("upstream", upstream_url, cwd=workspace, logger=log.debug) + fetch( + "upstream", + base_branch, + cwd=workspace, + depth=50, + logger=log.debug, + ) + rebase_onto = f"upstream/{base_branch}" + else: + fetch( + "origin", + base_branch, + cwd=workspace, + depth=50, + logger=log.debug, + ) + rebase_onto = f"origin/{base_branch}" + except GitError as exc: + log.debug("Local rebase: fetch failed for %s: %s", pr_info.html_url, exc) + return False + + # Make sure we are on the head branch (defensive against + # detached HEAD after clone --branch). + try: + checkout(head_branch, cwd=workspace, create=False, logger=log.debug) + except GitError: + # Already on the branch, or branch missing locally; + # rebase will surface the real problem if any. + pass + + # Rebase. ``git rebase`` runs with ``check=False`` (see + # ``git_ops.rebase``), so a non-zero exit does *not* raise + # ``GitError``; we have to inspect ``returncode`` + # ourselves. Conflicts are the most common cause of a + # non-zero exit here, but other failures (corrupt index, + # invalid base ref, etc.) hit the same path — surface + # stderr/stdout in debug output so the cause is visible + # to anyone investigating, then abort the rebase to leave + # the workspace in a clean state before cleanup. + rebase_result = rebase( + rebase_onto, + cwd=workspace, + autostash=False, + interactive=False, + logger=log.debug, + ) + + if rebase_result.returncode != 0: + # Shallow clones (depth=50) can miss the merge base + # for PRs whose branch point is older than 50 commits. + # ``git rebase`` reports this as a generic non-zero + # exit; the diagnostic is visible in stderr but we + # can't reliably distinguish it without parsing + # locale-dependent text. + # + # Recovery: abort the failed rebase, unshallow both + # remotes, and retry the rebase. If that also fails, + # the cause is genuine (conflicts, corrupt index, + # etc.) and we return False as before so the caller + # falls through to the auto-merge path. + log.debug( + "Local rebase: rebase exited non-zero for %s " + "(rc=%d, stderr=%r, stdout=%r); attempting " + "unshallow + retry.", + pr_info.html_url, + rebase_result.returncode, + rebase_result.stderr, + rebase_result.stdout, + ) + try: + rebase_abort(cwd=workspace, logger=log.debug) + except Exception: + pass + + try: + fetch( + "origin", + cwd=workspace, + unshallow=True, + logger=log.debug, + ) + if is_fork: + fetch( + "upstream", + cwd=workspace, + unshallow=True, + logger=log.debug, + ) + except GitError as exc: + log.debug( + "Local rebase: unshallow failed for %s: %s", + pr_info.html_url, + exc, + ) + return False + + rebase_result = rebase( + rebase_onto, + cwd=workspace, + autostash=False, + interactive=False, + logger=log.debug, + ) + if rebase_result.returncode != 0: + log.debug( + "Local rebase: rebase still failing after unshallow " + "for %s (rc=%d, stderr=%r); aborting.", + pr_info.html_url, + rebase_result.returncode, + rebase_result.stderr, + ) + try: + rebase_abort(cwd=workspace, logger=log.debug) + except Exception: + pass + return False + log.debug( + "Local rebase: succeeded after unshallow for %s", + pr_info.html_url, + ) + + # Force-push with lease to the head repo. We push back to + # ``origin`` because the head ref always lives there (even + # for forks, the head repo *is* the fork). + try: + push_force_with_lease( + "origin", + head_branch, + head_branch, + cwd=workspace, + logger=log.debug, + ) + except GitError as exc: + log.debug( + "Local rebase: force-push failed for %s: %s", + pr_info.html_url, + exc, + ) + return False + + log.debug("Local rebase succeeded for %s", pr_info.html_url) + return True + + finally: + # Always clean up. The workspace contains a clone of the + # user's repository, so we want it gone even on success. + try: + secure_rmtree(workspace_parent) + except Exception as exc: + log.debug( + "Local rebase: failed to clean up workspace %s: %s", + workspace_parent, + exc, + ) + + +async def perform_step5_rebase( + *, + ctx: RebaseContext, + pr_info: PullRequestInfo, + owner: str, + repo: str, +) -> Step5Outcome: + """Run Step 5 of the merge flow: bring the PR up to date with its base. + + Dispatches between the local-git path (signature-preserving) + and the legacy REST ``update-branch`` path based on + :func:`should_use_local_rebase`. When the local path is + selected, REST ``update-branch`` is **never** called — even on + local-rebase failure — so we never destroy a verified + signature. In the failure case we mark the PR as having been + through Step 5 (so Step 5.5 doesn't double the configured + ``merge_timeout``) and let auto-merge take over server-side. + + Returns a :class:`Step5Outcome`. ``failed=True`` indicates the + caller should set ``MergeStatus.FAILED`` and bail; the legacy + REST path is the only path that can produce this outcome (a + raised exception during ``update_branch`` or the polling loop). + """ + if ctx.preview_mode: + # NOTE: In preview mode, we should NOT print here as it + # breaks single-line reporting. The preview output + # should only be a single line per PR in the evaluation + # section. + return Step5Outcome() + + log_and_print( + ctx.log, + ctx.console, + f"🔄 Rebasing: {pr_info.html_url} [behind base branch]", + level="debug", + ) + + use_local, local_reason = await should_use_local_rebase( + github_client=ctx.github_client, + pr_info=pr_info, + owner=owner, + repo=repo, + base_branch=pr_info.base_branch or "main", + rebase_local=ctx.rebase_local, + log=ctx.log, + ) + + if use_local: + await _run_local_path( + ctx=ctx, + pr_info=pr_info, + owner=owner, + repo=repo, + local_reason=local_reason, + ) + return Step5Outcome() + + return await _run_rest_path( + ctx=ctx, + pr_info=pr_info, + owner=owner, + repo=repo, + ) + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + + +async def _run_local_path( + *, + ctx: RebaseContext, + pr_info: PullRequestInfo, + owner: str, + repo: str, + local_reason: str, +) -> None: + """Local-rebase path. Always succeeds from the caller's POV. + + Whether the underlying ``git`` workflow succeeds or fails, we + never fall back to REST ``update-branch`` (doing so would + defeat the whole point of the local path). + + Auto-merge enablement and ``_rebased_prs`` marking are linked: + + - If auto-merge gets enabled, mark ``_rebased_prs`` so Step + 5.5 skips this PR; auto-merge will handle the bounded wait + server-side and Step 6's skip gate routes to + ``AUTO_MERGE_PENDING``. + - If auto-merge cannot be enabled (repo doesn't allow it, + branch protection blocks it, etc.), do **not** mark + ``_rebased_prs`` so Step 5.5 still runs its bounded poll + loop. Without this, GitHub may still be recomputing + mergeability after the force-push when Step 6 fires and a + manual merge attempt would 405 transiently. Letting Step + 5.5 wait gives GitHub time to settle before Step 6 acts. + """ + log_and_print( + ctx.log, + ctx.console, + f"🛡️ Local rebase: {pr_info.html_url} [{local_reason}]", + level="debug", + ) + try: + local_rebase_ok = await local_rebase_pr( + pr_info=pr_info, + owner=owner, + repo=repo, + token=ctx.token, + log=ctx.log, + ) + except Exception as exc: + ctx.log.debug( + "Local rebase raised unexpectedly for %s: %s", + pr_info.html_url, + exc, + ) + local_rebase_ok = False + + # Try to enable auto-merge. We capture the return value so we + # can decide whether Step 5.5 should also run (see comment + # below). + try: + auto_merge_ok = await ctx.enable_auto_merge(pr_info, owner, repo) + except Exception as exc: + ctx.log.debug( + "Could not enable auto-merge after local rebase for %s: %s", + pr_info.html_url, + exc, + ) + auto_merge_ok = False + + # Only mark ``_rebased_prs`` when auto-merge is active. + # Otherwise leave it unset so Step 5.5 still runs its bounded + # poll loop — GitHub may still be recomputing mergeability + # after the force-push, and a manual merge attempt in Step 6 + # without that wait would 405 transiently. When auto-merge + # *is* active, Step 6's skip gate routes the PR to + # ``AUTO_MERGE_PENDING`` directly, so the Step 5.5 wait would + # only double the merge_timeout. + if auto_merge_ok: + ctx.rebased_prs.add(f"{owner}/{repo}#{pr_info.number}") + + if local_rebase_ok: + log_and_print( + ctx.log, + ctx.console, + f"✅ Rebased (local): {pr_info.html_url}", + level="debug", + ) + else: + log_and_print( + ctx.log, + ctx.console, + f"🛡️ Local rebase failed; deferring to auto-merge: {pr_info.html_url}", + level="debug", + ) + + + +async def _run_rest_path( + *, + ctx: RebaseContext, + pr_info: PullRequestInfo, + owner: str, + repo: str, +) -> Step5Outcome: + """Legacy REST ``update-branch`` path with post-rebase polling. + + Uses the GitHub REST API to bring the PR up to date, enables + auto-merge so the PR merges even if we time out waiting for + status checks, then polls until checks complete or + ``merge_timeout`` elapses. Updates ``pr_info`` in place with + the post-rebase state. + + Returns a :class:`Step5Outcome` whose ``failed`` field is True + when ``update_branch`` (or the polling apparatus) raises an + exception — the caller should mark the merge as ``FAILED`` in + that case. + """ + if ctx.github_client is None: + return Step5Outcome(failed=True, error_message="GitHub client not initialized") + client = ctx.github_client + + try: + await client.update_branch(owner, repo, pr_info.number) + + # Enable auto-merge so the PR merges even if we time out + # waiting for status checks. + auto_merge_ok = await ctx.enable_auto_merge(pr_info, owner, repo) + if auto_merge_ok: + ctx.log.debug( + "Auto-merge enabled after rebase for %s/%s#%s", + owner, + repo, + pr_info.number, + ) + + # Wait for GitHub to process the update and run checks + ctx.console.print(f"⏳ Waiting: {pr_info.html_url}") + await asyncio.sleep(ctx.merge_recheck_interval) + + updated_mergeable, updated_mergeable_state = await _poll_post_rebase( + ctx=ctx, + pr_info=pr_info, + owner=owner, + repo=repo, + auto_merge_ok=auto_merge_ok, + ) + + # Update our PR info with the latest state. Preserve the + # previous non-None values when the refresh returns + # ``null`` (GitHub is still computing). The Step 6 + # auto-merge skip gate accepts both ``True`` and ``None`` + # (it excludes only the explicit ``False`` case), so a + # transient null no longer blocks the auto-merge path on + # its own. We still preserve the prior known ``True`` so + # downstream logging and any future tightening of that + # predicate get an accurate state to work with. The same + # rationale applies to ``mergeable_state``: GitHub returns + # ``null`` while still computing, and the post-rebase + # reporting / Step 5.5 logic branches on this value (e.g. + # "clean" vs "blocked" vs "behind"); a transient ``None`` + # would otherwise be classified as the catch-all "other + # state" branch. + if updated_mergeable is not None: + pr_info.mergeable = updated_mergeable + if updated_mergeable_state is not None: + pr_info.mergeable_state = updated_mergeable_state + + # Mark this PR as having gone through the Step 5 rebase + # + poll path. Step 5.5 will consult ``_rebased_prs`` to + # avoid doubling the merge_timeout when the rebase exits + # in ``blocked`` or ``behind`` state. + ctx.rebased_prs.add(f"{owner}/{repo}#{pr_info.number}") + + _log_post_rebase_status(ctx=ctx, pr_info=pr_info) + return Step5Outcome() + + except Exception as exc: + ctx.console.print(f"❌ Failed: {pr_info.html_url} [rebase error: {exc}]") + return Step5Outcome(failed=True, error_message=f"Failed to rebase PR: {exc}") + + +async def _poll_post_rebase( + *, + ctx: RebaseContext, + pr_info: PullRequestInfo, + owner: str, + repo: str, + auto_merge_ok: bool, +) -> tuple[bool | None, str | None]: + """Poll the PR after ``update_branch`` until it stabilises. + + Returns the latest ``(mergeable, mergeable_state)`` observed. + Updates ``pr_info.head_sha`` in place when the refresh shows a + new head commit (so any subsequent ``analyze_block_reason()`` + call queries the rebased commit, not the pre-rebase one). + """ + if ctx.github_client is None: + return pr_info.mergeable, pr_info.mergeable_state + client = ctx.github_client + + updated_mergeable: bool | None = pr_info.mergeable + updated_mergeable_state: str | None = pr_info.mergeable_state + + for check_attempt in range(ctx.merge_poll_max_attempts): + updated_pr_data: Any = await client.get( + f"/repos/{owner}/{repo}/pulls/{pr_info.number}" + ) + + if isinstance(updated_pr_data, dict): + updated_mergeable = updated_pr_data.get("mergeable") + updated_mergeable_state = updated_pr_data.get("mergeable_state") + updated_head = (updated_pr_data.get("head") or {}).get("sha") + if updated_head: + pr_info.head_sha = updated_head + else: + updated_mergeable = None + updated_mergeable_state = None + + if _poll_should_continue( + ctx=ctx, + pr_info=pr_info, + attempt=check_attempt, + mergeable_state=updated_mergeable_state, + auto_merge_ok=auto_merge_ok, + ): + await asyncio.sleep(ctx.merge_recheck_interval) + continue + break + + return updated_mergeable, updated_mergeable_state + + +def _poll_should_continue( + *, + ctx: RebaseContext, + pr_info: PullRequestInfo, + attempt: int, + mergeable_state: str | None, + auto_merge_ok: bool, +) -> bool: + """Return True when the post-rebase poll loop should keep waiting. + + Centralising the per-state decisions here keeps + :func:`_poll_post_rebase` short and readable. + """ + if mergeable_state == "clean": + return False + + last_attempt = attempt >= ctx.merge_poll_max_attempts - 1 + + if mergeable_state == "behind": + if last_attempt: + return False + ctx.log.debug( + "PR still processing rebase, waiting... (attempt %d/%d)", + attempt + 1, + ctx.merge_poll_max_attempts, + ) + return True + + if mergeable_state == "blocked": + if last_attempt: + _log_blocked_timeout(ctx=ctx, pr_info=pr_info, auto_merge_ok=auto_merge_ok) + return False + ctx.log.debug( + "PR status checks running after rebase, waiting... (attempt %d/%d)", + attempt + 1, + ctx.merge_poll_max_attempts, + ) + return True + + if mergeable_state is None: + # GitHub is still computing mergeability (typically right + # after update_branch). Treat as transient and keep + # polling until the deadline or a concrete state arrives — + # breaking here would otherwise exit prematurely and (if + # auto-merge enablement failed) fall through to a manual + # merge attempt against the still-resolving PR state. + if last_attempt: + return False + ctx.log.debug( + "PR mergeable_state still computing after rebase, " + "waiting... (attempt %d/%d)", + attempt + 1, + ctx.merge_poll_max_attempts, + ) + return True + + # Any other concrete state ("dirty", "draft", "unstable", + # "unknown", ...) ends the poll loop immediately. + return False + + +def _log_blocked_timeout( + *, + ctx: RebaseContext, + pr_info: PullRequestInfo, + auto_merge_ok: bool, +) -> None: + """Emit the user-facing line when the post-rebase poll times out blocked.""" + if auto_merge_ok: + log_and_print( + ctx.log, + ctx.console, + f"⏳ Auto-merge will complete: {pr_info.html_url} " + "[timeout waiting for checks]", + level="warning", + ) + else: + log_and_print( + ctx.log, + ctx.console, + f"⚠️ Proceeding without checks: {pr_info.html_url} " + "[timeout waiting for checks]", + level="warning", + ) + + +def _log_post_rebase_status( + *, + ctx: RebaseContext, + pr_info: PullRequestInfo, +) -> None: + """Emit the post-rebase status line based on the final mergeable_state.""" + state = pr_info.mergeable_state + if state == "clean": + log_and_print( + ctx.log, + ctx.console, + f"✅ Rebased: {pr_info.html_url}", + level="debug", + ) + elif state == "behind": + log_and_print( + ctx.log, + ctx.console, + f"⚠️ Rebased: {pr_info.html_url} [still behind after rebase]", + level="debug", + ) + elif state == "blocked": + log_and_print( + ctx.log, + ctx.console, + f"⬆️ Rebased: {pr_info.html_url} [waiting for status checks]", + level="debug", + ) + else: + log_and_print( + ctx.log, + ctx.console, + f"ℹ️ Rebased: {pr_info.html_url}", + level="debug", + ) diff --git a/tests/test_dependabot_recreate.py b/tests/test_dependabot_recreate.py index bb30cd2..dddd68a 100644 --- a/tests/test_dependabot_recreate.py +++ b/tests/test_dependabot_recreate.py @@ -148,19 +148,26 @@ async def test_missing_verification_field(self): assert len(unverified) == 1 @pytest.mark.asyncio - async def test_api_error_returns_verified(self): - """On API errors, assume verified to avoid false positives.""" + async def test_api_error_propagates(self): + """On API errors, the exception now propagates to callers. + + The previous fail-open default (returning ``(True, [])`` on + error) collided with the signature-preservation gate in + ``rebase.py``, which interprets ``all_verified=True`` as a + positive confirmation. Surfacing the error lets each caller + choose its own fail-open / fail-closed semantics. + """ api = AsyncMock(spec=GitHubAsync) async def _raise_gen(*a, **kw): - raise Exception("API error") + raise RuntimeError("API error") yield # pragma: no cover – makes this an async generator api.get_paginated = _raise_gen api.log = AsyncMock() api.log.debug = lambda *a, **kw: None - result = await GitHubAsync.check_pr_commit_signatures(api, "owner", "repo", 42) - assert result == (True, []) + with pytest.raises(RuntimeError, match="API error"): + await GitHubAsync.check_pr_commit_signatures(api, "owner", "repo", 42) @pytest.mark.asyncio async def test_unexpected_response_shape(self): diff --git a/tests/test_local_rebase_signing.py b/tests/test_local_rebase_signing.py new file mode 100644 index 0000000..ecafcfe --- /dev/null +++ b/tests/test_local_rebase_signing.py @@ -0,0 +1,809 @@ +# SPDX-FileCopyrightText: 2026 The Linux Foundation +# SPDX-License-Identifier: Apache-2.0 +"""Tests for the signature-preserving local-rebase path. + +Covers: + +- ``should_use_local_rebase`` decision tree (pre-commit-ci, signed + base + signed head, signed base + unsigned head, unsigned base, + ``--no-rebase-local`` opt-out). +- End-to-end Step 5 dispatch: when the gate says ``use_local``, we + do **not** call the REST ``update-branch`` endpoint regardless of + whether the local rebase succeeds or fails. +- ``_rebased_prs`` is populated in both local-success and + local-failure cases so Step 5.5 doesn't double the configured + ``merge_timeout``. +""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from dependamerge import rebase as rebase_module +from dependamerge.github2gerrit_detector import GitHub2GerritDetectionResult +from dependamerge.merge_manager import AsyncMergeManager, MergeStatus +from dependamerge.models import PullRequestInfo + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_pr( + *, + author: str = "dependabot[bot]", + mergeable_state: str = "behind", + head_repo_clone_url: str = "https://github.com/owner/repo.git", + base_repo_clone_url: str = "https://github.com/owner/repo.git", +) -> PullRequestInfo: + return PullRequestInfo( + number=42, + node_id="PR_node42", + title="Test PR", + body="", + author=author, + head_sha="abc123", + base_branch="main", + head_branch="feature", + state="open", + mergeable=True, + mergeable_state=mergeable_state, + behind_by=2, + files_changed=[], + repository_full_name="owner/repo", + html_url="https://github.com/owner/repo/pull/42", + reviews=[], + review_comments=[], + head_repo_full_name="owner/repo", + head_repo_clone_url=head_repo_clone_url, + base_repo_full_name="owner/repo", + base_repo_clone_url=base_repo_clone_url, + is_fork=False, + ) + + +def _make_mgr(**overrides) -> tuple[AsyncMergeManager, AsyncMock]: + """Build a manager with an AsyncMock GitHub client. + + Thin wrapper around the shared ``tests.conftest.make_merge_manager`` + so typing/lint fixes and defaults stay centralised. Local + additions: defaults ``rebase_local=True`` (the production + default — this module's tests focus on the rebase path). + """ + from tests.conftest import make_merge_manager + + defaults: dict[str, Any] = {"rebase_local": True} + defaults.update(overrides) + return make_merge_manager(**defaults) + + +# --------------------------------------------------------------------------- +# 1. should_use_local_rebase decision tree +# --------------------------------------------------------------------------- + + +class TestShouldUseLocalRebaseGate: + """Pin the gating decisions for the local-rebase path.""" + + @pytest.mark.asyncio + async def test_disabled_by_no_rebase_local_flag(self) -> None: + """``--no-rebase-local`` short-circuits the gate to False.""" + mgr, client = _make_mgr(rebase_local=False) + pr = _make_pr(author="pre-commit-ci[bot]") # would otherwise match + use_local, reason = await rebase_module.should_use_local_rebase( + github_client=client, + pr_info=pr, + owner="owner", + repo="repo", + base_branch="main", + rebase_local=mgr.rebase_local, + log=mgr.log, + ) + assert use_local is False + assert "--no-rebase-local" in reason + + @pytest.mark.asyncio + async def test_pre_commit_ci_always_local(self) -> None: + """``pre-commit-ci[bot]`` PRs always take the local path. + + The bot has no comment macro for recreating a PR with a + re-signed commit, so we never route it through REST + update-branch (which would break verification). + """ + mgr, client = _make_mgr() + pr = _make_pr(author="pre-commit-ci[bot]") + # No signature mocks needed: pre-commit-ci shortcut fires + # before requires_commit_signatures is consulted. + use_local, reason = await rebase_module.should_use_local_rebase( + github_client=client, + pr_info=pr, + owner="owner", + repo="repo", + base_branch="main", + rebase_local=mgr.rebase_local, + log=mgr.log, + ) + assert use_local is True + assert "pre-commit-ci" in reason + client.requires_commit_signatures.assert_not_awaited() + + @pytest.mark.asyncio + async def test_signed_base_signed_head_uses_local(self) -> None: + """Signed branch + verified head → local path.""" + mgr, client = _make_mgr() + pr = _make_pr(author="dependabot[bot]") + client.requires_commit_signatures = AsyncMock(return_value=True) + client.check_pr_commit_signatures = AsyncMock(return_value=(True, [])) + use_local, reason = await rebase_module.should_use_local_rebase( + github_client=client, + pr_info=pr, + owner="owner", + repo="repo", + base_branch="main", + rebase_local=mgr.rebase_local, + log=mgr.log, + ) + assert use_local is True + assert "PR head is verified" in reason + + @pytest.mark.asyncio + async def test_signed_base_unsigned_head_uses_rest(self) -> None: + """Signed branch + unsigned PR head → REST path. + + Signature is already broken on the PR head, so REST + update-branch can't make verification any worse. Use the + cheaper REST path. + """ + mgr, client = _make_mgr() + pr = _make_pr(author="dependabot[bot]") + client.requires_commit_signatures = AsyncMock(return_value=True) + client.check_pr_commit_signatures = AsyncMock(return_value=(False, ["abc123"])) + use_local, reason = await rebase_module.should_use_local_rebase( + github_client=client, + pr_info=pr, + owner="owner", + repo="repo", + base_branch="main", + rebase_local=mgr.rebase_local, + log=mgr.log, + ) + assert use_local is False + assert "not currently verified" in reason + + @pytest.mark.asyncio + async def test_unsigned_base_uses_rest(self) -> None: + """Base branch with no signature requirement → REST path.""" + mgr, client = _make_mgr() + pr = _make_pr(author="dependabot[bot]") + client.requires_commit_signatures = AsyncMock(return_value=False) + use_local, reason = await rebase_module.should_use_local_rebase( + github_client=client, + pr_info=pr, + owner="owner", + repo="repo", + base_branch="main", + rebase_local=mgr.rebase_local, + log=mgr.log, + ) + assert use_local is False + assert "does not require signatures" in reason + + @pytest.mark.asyncio + async def test_signature_check_failure_uses_rest(self) -> None: + """If the requirement check raises, fail safely to REST. + + We don't want to risk an unbounded local-rebase attempt + when we can't determine whether signatures are required. + """ + mgr, client = _make_mgr() + pr = _make_pr(author="dependabot[bot]") + client.requires_commit_signatures = AsyncMock(side_effect=RuntimeError("boom")) + use_local, reason = await rebase_module.should_use_local_rebase( + github_client=client, + pr_info=pr, + owner="owner", + repo="repo", + base_branch="main", + rebase_local=mgr.rebase_local, + log=mgr.log, + ) + assert use_local is False + assert "signature requirement check failed" in reason + + @pytest.mark.asyncio + async def test_no_github_client_returns_false(self) -> None: + """Without a client we cannot consult branch protection.""" + mgr, _client = _make_mgr() + pr = _make_pr(author="dependabot[bot]") + use_local, reason = await rebase_module.should_use_local_rebase( + github_client=None, + pr_info=pr, + owner="owner", + repo="repo", + base_branch="main", + rebase_local=mgr.rebase_local, + log=mgr.log, + ) + assert use_local is False + assert "no GitHub client" in reason + + @pytest.mark.asyncio + async def test_signature_check_truthy_non_true_treated_as_false( + self, + ) -> None: + """Non-strict ``True`` from requires_commit_signatures must not match. + + ``AsyncMock`` defaults return a truthy ``MagicMock`` + instance; if we accepted any truthy value the production + gate would route real PRs into the local-rebase path on + mocks that hadn't explicitly set ``return_value``. + """ + mgr, client = _make_mgr() + pr = _make_pr(author="dependabot[bot]") + client.requires_commit_signatures = AsyncMock(return_value=MagicMock()) + use_local, _reason = await rebase_module.should_use_local_rebase( + github_client=client, + pr_info=pr, + owner="owner", + repo="repo", + base_branch="main", + rebase_local=mgr.rebase_local, + log=mgr.log, + ) + assert use_local is False + + @pytest.mark.asyncio + async def test_pr_signature_check_failure_uses_rest(self) -> None: + """If ``check_pr_commit_signatures()`` raises, fail safely to REST. + + Distinct from ``test_signature_check_failure_uses_rest`` + which covers ``requires_commit_signatures()`` raising. + Here the base requirement check succeeds (signatures are + required) but the PR-head verification check raises. + Failing closed avoids triggering network-touching local + clones on transient API failures, and keeps the gate + consistent with its documented invariant ("base requires + signatures AND PR head is verified"). + """ + mgr, client = _make_mgr() + pr = _make_pr(author="dependabot[bot]") + client.requires_commit_signatures = AsyncMock(return_value=True) + client.check_pr_commit_signatures = AsyncMock( + side_effect=RuntimeError("transient API error") + ) + use_local, reason = await rebase_module.should_use_local_rebase( + github_client=client, + pr_info=pr, + owner="owner", + repo="repo", + base_branch="main", + rebase_local=mgr.rebase_local, + log=mgr.log, + ) + assert use_local is False + assert "signature check failed" in reason + + +# --------------------------------------------------------------------------- +# 2. Step 5 dispatch: local-rebase path skips REST update-branch +# --------------------------------------------------------------------------- + + +class TestStep5DispatchLocalRebase: + """Step 5 must not call REST update-branch when use_local is True. + + These tests verify the integration: regardless of whether the + local rebase succeeds or fails, ``update_branch()`` must not + fire. This is the whole point of the feature — a REST + update-branch would replace the verified commit with an + unsigned one and break branch protection. + """ + + @pytest.mark.asyncio + async def test_pre_commit_ci_pr_skips_rest_update_branch_on_success( + self, + ) -> None: + """Pre-commit-ci PR + local rebase succeeds → no REST call.""" + mgr, client = _make_mgr(merge_timeout=0.1, fix_out_of_date=True) + pr = _make_pr(author="pre-commit-ci[bot]", mergeable_state="behind") + + client.update_branch = AsyncMock() + client.enable_auto_merge = AsyncMock(return_value=True) + client.post_issue_comment = AsyncMock() + client.get = AsyncMock( + return_value={ + "mergeable": True, + "mergeable_state": "clean", + "state": "open", + } + ) + client.analyze_block_reason = AsyncMock(return_value=None) + client.get_required_status_checks = AsyncMock(return_value=[]) + client.requires_commit_signatures = AsyncMock(return_value=True) + + with ( + patch( + "dependamerge.rebase.local_rebase_pr", + new_callable=AsyncMock, + return_value=True, + ) as mock_local_rebase, + patch.object( + mgr, + "_detect_github2gerrit", + new_callable=AsyncMock, + return_value=GitHub2GerritDetectionResult(), + ), + patch.object( + mgr, + "_get_merge_method_for_repo", + new_callable=AsyncMock, + return_value="merge", + ), + patch.object( + mgr, + "_trigger_stale_precommit_ci", + new_callable=AsyncMock, + return_value=False, + ), + patch.object( + mgr, + "_check_merge_requirements", + new_callable=AsyncMock, + return_value=(True, ""), + ), + patch.object( + mgr, + "_approve_pr", + new_callable=AsyncMock, + return_value=True, + ), + patch.object( + mgr, + "_merge_pr_with_retry", + new_callable=AsyncMock, + return_value=True, + ), + ): + await mgr._merge_single_pr(pr) + + # Local rebase was attempted exactly once. + mock_local_rebase.assert_awaited_once() + # REST update-branch was NEVER called — this is the key + # assertion: the verification-preserving path did not + # fall through to the verification-breaking endpoint. + client.update_branch.assert_not_awaited() + # The PR was marked as rebased so Step 5.5 doesn't double + # the merge timeout. + assert "owner/repo#42" in mgr._rebased_prs + + @pytest.mark.asyncio + async def test_pre_commit_ci_pr_skips_rest_update_branch_on_failure( + self, + ) -> None: + """Pre-commit-ci PR + local rebase FAILS → still no REST call. + + This is the most important invariant: when local rebase + fails (no git, conflicts, network) we must NOT fall back + to REST update-branch, because doing so would break + verification — exactly the bug this feature exists to + prevent. + """ + mgr, client = _make_mgr(merge_timeout=0.1, fix_out_of_date=True) + pr = _make_pr(author="pre-commit-ci[bot]", mergeable_state="behind") + + client.update_branch = AsyncMock() + client.enable_auto_merge = AsyncMock(return_value=True) + client.post_issue_comment = AsyncMock() + client.get = AsyncMock( + return_value={ + "mergeable": True, + "mergeable_state": "behind", + "state": "open", + } + ) + client.analyze_block_reason = AsyncMock(return_value=None) + client.get_required_status_checks = AsyncMock(return_value=[]) + client.requires_commit_signatures = AsyncMock(return_value=True) + + with ( + patch( + "dependamerge.rebase.local_rebase_pr", + new_callable=AsyncMock, + return_value=False, + ) as mock_local_rebase, + patch.object( + mgr, + "_detect_github2gerrit", + new_callable=AsyncMock, + return_value=GitHub2GerritDetectionResult(), + ), + patch.object( + mgr, + "_get_merge_method_for_repo", + new_callable=AsyncMock, + return_value="merge", + ), + patch.object( + mgr, + "_trigger_stale_precommit_ci", + new_callable=AsyncMock, + return_value=False, + ), + patch.object( + mgr, + "_check_merge_requirements", + new_callable=AsyncMock, + return_value=(True, ""), + ), + patch.object( + mgr, + "_approve_pr", + new_callable=AsyncMock, + return_value=True, + ), + patch.object( + mgr, + "_merge_pr_with_retry", + new_callable=AsyncMock, + return_value=True, + ), + ): + result = await mgr._merge_single_pr(pr) + + mock_local_rebase.assert_awaited_once() + # REST update-branch must still NOT fire on local failure. + client.update_branch.assert_not_awaited() + # PR is marked rebased so Step 5.5 doesn't double-wait. + assert "owner/repo#42" in mgr._rebased_prs + # Auto-merge gets enabled by the local-rebase orchestrator + # (regardless of whether the local rebase itself succeeded + # or failed) so Step 6's skip gate routes the PR to + # AUTO_MERGE_PENDING. Without this, marking ``_rebased_prs`` + # would skip Step 5.5 too, and Step 6 would attempt a + # manual merge that 405s against pending checks — exactly + # the failure mode this feature exists to prevent. + assert "owner/repo#42" in mgr._auto_merge_enabled + assert result.status == MergeStatus.AUTO_MERGE_PENDING + + @pytest.mark.asyncio + async def test_unsigned_base_uses_rest_update_branch(self) -> None: + """Unsigned base + non-pre-commit-ci → REST path runs. + + Backward compatibility: when the gate says don't go local, + Step 5 must still call the REST endpoint (the existing + behaviour for repos without signature requirements). + """ + mgr, client = _make_mgr(merge_timeout=0.1, fix_out_of_date=True) + pr = _make_pr(author="dependabot[bot]", mergeable_state="behind") + + client.update_branch = AsyncMock() + client.enable_auto_merge = AsyncMock(return_value=True) + client.post_issue_comment = AsyncMock() + client.get = AsyncMock( + return_value={ + "mergeable": True, + "mergeable_state": "clean", + "state": "open", + } + ) + client.analyze_block_reason = AsyncMock(return_value=None) + client.get_required_status_checks = AsyncMock(return_value=[]) + # Unsigned base → gate returns False → REST path. + client.requires_commit_signatures = AsyncMock(return_value=False) + + with ( + patch( + "dependamerge.rebase.local_rebase_pr", + new_callable=AsyncMock, + return_value=True, + ) as mock_local_rebase, + patch.object( + mgr, + "_detect_github2gerrit", + new_callable=AsyncMock, + return_value=GitHub2GerritDetectionResult(), + ), + patch.object( + mgr, + "_get_merge_method_for_repo", + new_callable=AsyncMock, + return_value="merge", + ), + patch.object( + mgr, + "_trigger_stale_precommit_ci", + new_callable=AsyncMock, + return_value=False, + ), + patch.object( + mgr, + "_check_merge_requirements", + new_callable=AsyncMock, + return_value=(True, ""), + ), + patch.object( + mgr, + "_approve_pr", + new_callable=AsyncMock, + return_value=True, + ), + patch.object( + mgr, + "_merge_pr_with_retry", + new_callable=AsyncMock, + return_value=True, + ), + ): + await mgr._merge_single_pr(pr) + + # Local rebase was NOT attempted. + mock_local_rebase.assert_not_awaited() + # REST update-branch WAS called (the legacy path). + client.update_branch.assert_awaited_once_with("owner", "repo", 42) + + @pytest.mark.asyncio + async def test_no_rebase_local_flag_uses_rest(self) -> None: + """``--no-rebase-local`` forces the REST path even for pre-commit-ci. + + Provides a user-visible escape hatch when the local path + is unavailable (e.g. running in a constrained environment + with no ``git`` or no network). + """ + mgr, client = _make_mgr( + merge_timeout=0.1, fix_out_of_date=True, rebase_local=False + ) + pr = _make_pr(author="pre-commit-ci[bot]", mergeable_state="behind") + + client.update_branch = AsyncMock() + client.enable_auto_merge = AsyncMock(return_value=True) + client.post_issue_comment = AsyncMock() + client.get = AsyncMock( + return_value={ + "mergeable": True, + "mergeable_state": "clean", + "state": "open", + } + ) + client.analyze_block_reason = AsyncMock(return_value=None) + client.get_required_status_checks = AsyncMock(return_value=[]) + client.requires_commit_signatures = AsyncMock(return_value=True) + + with ( + patch( + "dependamerge.rebase.local_rebase_pr", + new_callable=AsyncMock, + return_value=True, + ) as mock_local_rebase, + patch.object( + mgr, + "_detect_github2gerrit", + new_callable=AsyncMock, + return_value=GitHub2GerritDetectionResult(), + ), + patch.object( + mgr, + "_get_merge_method_for_repo", + new_callable=AsyncMock, + return_value="merge", + ), + patch.object( + mgr, + "_trigger_stale_precommit_ci", + new_callable=AsyncMock, + return_value=False, + ), + patch.object( + mgr, + "_check_merge_requirements", + new_callable=AsyncMock, + return_value=(True, ""), + ), + patch.object( + mgr, + "_approve_pr", + new_callable=AsyncMock, + return_value=True, + ), + patch.object( + mgr, + "_merge_pr_with_retry", + new_callable=AsyncMock, + return_value=True, + ), + ): + await mgr._merge_single_pr(pr) + + mock_local_rebase.assert_not_awaited() + client.update_branch.assert_awaited_once_with("owner", "repo", 42) + + +# --------------------------------------------------------------------------- +# 3. _authed_clone_url helper +# --------------------------------------------------------------------------- + + +class TestAuthedCloneUrl: + """Token injection mirrors ``FixOrchestrator._authed_url``.""" + + def test_https_url_gets_token(self) -> None: + url = rebase_module.authed_clone_url( + "https://github.com/owner/repo.git", "abc123" + ) + assert url == "https://x-access-token:abc123@github.com/owner/repo.git" + + def test_ssh_url_unchanged(self) -> None: + ssh = "git@github.com:owner/repo.git" + assert rebase_module.authed_clone_url(ssh, "abc123") == ssh + + def test_git_protocol_url_unchanged(self) -> None: + url = "git://github.com/owner/repo.git" + assert rebase_module.authed_clone_url(url, "abc123") == url + + +# --------------------------------------------------------------------------- +# 4. Local-rebase path conditionally skips _rebased_prs marking +# --------------------------------------------------------------------------- + + +class TestLocalRebaseAutoMergeUnavailable: + """Verify the conditional ``_rebased_prs`` invariant. + + When local rebase succeeds but auto-merge cannot be enabled + (repo doesn't allow it, branch protection blocks it, etc.), + we must **not** mark ``_rebased_prs`` — doing so would skip + Step 5.5 and let Step 6 attempt a manual merge while GitHub + is still recomputing mergeability after the force-push, + which would 405 transiently. + + When auto-merge *is* enabled, marking ``_rebased_prs`` is + correct: Step 5.5's wait would be redundant since auto-merge + handles the wait server-side, and the marker keeps Step 6's + skip gate firing for ``AUTO_MERGE_PENDING``. + """ + + @pytest.mark.asyncio + async def test_auto_merge_unavailable_leaves_pr_unmarked(self) -> None: + """enable_auto_merge False → PR not in ``_rebased_prs``.""" + mgr, client = _make_mgr(merge_timeout=0.1, fix_out_of_date=True) + pr = _make_pr(author="pre-commit-ci[bot]", mergeable_state="behind") + + client.update_branch = AsyncMock() + # Repo doesn't allow auto-merge — enable_auto_merge returns False. + client.enable_auto_merge = AsyncMock(return_value=False) + # Refresh keeps the PR ``behind`` (transient post-push state). + client.get = AsyncMock( + return_value={ + "mergeable": True, + "mergeable_state": "behind", + "state": "open", + } + ) + client.analyze_block_reason = AsyncMock(return_value=None) + client.get_required_status_checks = AsyncMock(return_value=[]) + client.requires_commit_signatures = AsyncMock(return_value=True) + client.post_issue_comment = AsyncMock() + + with ( + patch( + "dependamerge.rebase.local_rebase_pr", + new_callable=AsyncMock, + return_value=True, + ), + patch.object( + mgr, + "_detect_github2gerrit", + new_callable=AsyncMock, + return_value=GitHub2GerritDetectionResult(), + ), + patch.object( + mgr, + "_get_merge_method_for_repo", + new_callable=AsyncMock, + return_value="merge", + ), + patch.object( + mgr, + "_trigger_stale_precommit_ci", + new_callable=AsyncMock, + return_value=False, + ), + patch.object( + mgr, + "_check_merge_requirements", + new_callable=AsyncMock, + return_value=(True, ""), + ), + patch.object( + mgr, + "_approve_pr", + new_callable=AsyncMock, + return_value=True, + ), + patch.object( + mgr, + "_merge_pr_with_retry", + new_callable=AsyncMock, + return_value=True, + ), + ): + await mgr._merge_single_pr(pr) + + # REST update-branch must still NOT fire (signature + # preservation invariant). + client.update_branch.assert_not_awaited() + # _rebased_prs must NOT be populated, so Step 5.5 still + # runs its bounded wait. This gives GitHub time to + # recompute mergeability after the force-push before any + # manual merge attempt in Step 6. + assert "owner/repo#42" not in mgr._rebased_prs + + +# --------------------------------------------------------------------------- +# 5. local_rebase_pr fails closed when head repo identity is unknown +# --------------------------------------------------------------------------- + + +class TestLocalRebaseFailClosed: + """Verify local_rebase_pr() refuses to push when head repo is unknown. + + For fork PRs, synthesising a clone URL from the base repo name + would push to the upstream repo instead of the fork (creating + or overwriting a branch on someone else's repository). When + ``head_repo_full_name`` and ``head_repo_clone_url`` are both + unset we cannot tell whether the PR is from a fork, so we + fail closed to avoid the dangerous mis-target. + """ + + @pytest.mark.asyncio + async def test_missing_head_repo_identity_returns_false( + self, + ) -> None: + """head_repo_full_name + head_repo_clone_url both None → False.""" + import logging + + from dependamerge.models import PullRequestInfo + + # Construct a PR with NEITHER head_repo identifier set. + # This mimics the production state where ``PullRequestInfo`` + # objects from the merge workflow don't populate the + # optional head/base repo fields (they're only set in the + # interactive fix flow). + pr = PullRequestInfo( + number=42, + node_id="PR_node42", + title="Test PR", + body="", + author="dependabot[bot]", + head_sha="abc123", + base_branch="main", + head_branch="feature", + state="open", + mergeable=True, + mergeable_state="behind", + behind_by=2, + files_changed=[], + repository_full_name="owner/repo", + html_url="https://github.com/owner/repo/pull/42", + reviews=[], + review_comments=[], + head_repo_full_name=None, + head_repo_clone_url=None, + base_repo_full_name=None, + base_repo_clone_url=None, + is_fork=None, + ) + + result = await rebase_module.local_rebase_pr( + pr_info=pr, + owner="owner", + repo="repo", + token="fake-token", + log=logging.getLogger("test"), + ) + # Must NOT attempt the rebase — we'd risk pushing to the + # base repo for a fork PR. The caller falls through to + # auto-merge, which is always safe. + assert result is False diff --git a/tests/test_merge_dispatch_lock.py b/tests/test_merge_dispatch_lock.py new file mode 100644 index 0000000..38df8d8 --- /dev/null +++ b/tests/test_merge_dispatch_lock.py @@ -0,0 +1,128 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: 2025 The Linux Foundation + +""" +Unit tests for ``AsyncMergeManager._get_merge_dispatch_lock``. + +The dispatch lock decouples the actual ``merge_pull_request`` API +call from the Step 5.5 auto-merge wait loop: + +* Workers targeting the same repository serialise on the lock so + back-to-back merges don't race GitHub's branch-protection + propagation. +* Workers targeting different repositories receive distinct locks + and can dispatch in parallel. +* The wait loops in Step 5.5 do not hold the lock, so a PR parked + waiting for required checks no longer head-of-line blocks the + rest of the worker pool. +""" + +from __future__ import annotations + +import asyncio +from typing import Any + +import pytest + + +def _make_manager(**overrides: Any): + """Build an AsyncMergeManager using the project's typed mock helper.""" + from tests.conftest import make_merge_manager + + defaults: dict[str, Any] = {"preview_mode": False} + defaults.update(overrides) + return make_merge_manager(**defaults) + + +class TestMergeDispatchLockIdentity: + """The lock map is keyed by ``owner/repo`` and shared correctly.""" + + @pytest.mark.asyncio + async def test_same_repo_returns_same_lock(self) -> None: + mgr, _client = _make_manager() + a = await mgr._get_merge_dispatch_lock("acme", "widgets") + b = await mgr._get_merge_dispatch_lock("acme", "widgets") + assert a is b + + @pytest.mark.asyncio + async def test_different_repos_return_distinct_locks(self) -> None: + mgr, _client = _make_manager() + a = await mgr._get_merge_dispatch_lock("acme", "widgets") + b = await mgr._get_merge_dispatch_lock("acme", "gizmos") + c = await mgr._get_merge_dispatch_lock("other-org", "widgets") + assert a is not b + assert a is not c + assert b is not c + + @pytest.mark.asyncio + async def test_concurrent_first_acquire_does_not_duplicate_lock(self) -> None: + """Two coroutines requesting the same lock at the same time + must receive the *same* ``asyncio.Lock`` instance. + + Without the outer ``_merge_dispatch_locks_lock`` guard this + would race and produce two distinct locks for the same repo, + which would defeat the serialisation guarantee. + """ + mgr, _client = _make_manager() + results = await asyncio.gather( + mgr._get_merge_dispatch_lock("acme", "widgets"), + mgr._get_merge_dispatch_lock("acme", "widgets"), + mgr._get_merge_dispatch_lock("acme", "widgets"), + ) + assert results[0] is results[1] is results[2] + + +class TestMergeDispatchLockSerialisation: + """The lock genuinely serialises critical sections per repo.""" + + @pytest.mark.asyncio + async def test_same_repo_critical_sections_serialise(self) -> None: + """Two workers entering the lock for the same repo must run + their critical sections strictly one-after-the-other.""" + mgr, _client = _make_manager() + active = 0 + peak = 0 + + async def critical(owner: str, repo: str) -> None: + nonlocal active, peak + lock = await mgr._get_merge_dispatch_lock(owner, repo) + async with lock: + active += 1 + peak = max(peak, active) + # Yield to the event loop so a competing coroutine + # would have a chance to enter the section if the + # lock failed to serialise. + await asyncio.sleep(0.01) + active -= 1 + + await asyncio.gather( + critical("acme", "widgets"), + critical("acme", "widgets"), + critical("acme", "widgets"), + ) + assert peak == 1 + + @pytest.mark.asyncio + async def test_different_repos_run_in_parallel(self) -> None: + """Workers on distinct repos hold distinct locks, so their + critical sections must overlap when run concurrently.""" + mgr, _client = _make_manager() + active = 0 + peak = 0 + + async def critical(owner: str, repo: str) -> None: + nonlocal active, peak + lock = await mgr._get_merge_dispatch_lock(owner, repo) + async with lock: + active += 1 + peak = max(peak, active) + await asyncio.sleep(0.01) + active -= 1 + + await asyncio.gather( + critical("acme", "alpha"), + critical("acme", "beta"), + critical("acme", "gamma"), + ) + # All three should be in their critical sections at once. + assert peak == 3 diff --git a/tests/test_stuck_dco_detection.py b/tests/test_stuck_dco_detection.py new file mode 100644 index 0000000..b8d68ae --- /dev/null +++ b/tests/test_stuck_dco_detection.py @@ -0,0 +1,379 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: 2025 The Linux Foundation + +""" +Unit tests for ``AsyncMergeManager._detect_stuck_dco``. + +Covers: +- DCO check stuck longer than the threshold on a sufficiently old + PR -> detected (returns ``(True, name, age)``). +- Sub-threshold stuck DCO check -> not detected. +- Stuck pending check whose name is not DCO-shaped -> not detected. +- DCO check stuck but PR was just touched (updated_at younger than + threshold) -> not detected (age floor protects against false + positives on freshly-pushed PRs). +- DCO check completed -> not detected. +- Stuck status-context (older API) DCO entry -> detected. +- API failures degrade to ``(False, None, 0.0)``. +- ``_merge_single_pr`` triggers ``_trigger_dependabot_recreate`` when + ``_detect_stuck_dco`` returns True for a dependabot PR. +""" + +from datetime import datetime, timedelta, timezone +from typing import Any +from unittest.mock import AsyncMock + +import pytest + +from dependamerge.merge_manager import STUCK_DCO_THRESHOLD_SECONDS +from dependamerge.models import PullRequestInfo + + +def _iso(dt: datetime) -> str: + """Return ``dt`` in the RFC 3339 ``...Z`` form GitHub emits.""" + return dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + +def _make_pr_info(**overrides: Any) -> PullRequestInfo: + """Helper to build a PullRequestInfo with sensible defaults.""" + defaults: dict[str, Any] = { + "number": 342, + "title": "Chore: Bump foo from 1 to 2", + "body": "Dependabot PR", + "author": "dependabot[bot]", + "head_sha": "abc123", + "base_branch": "main", + "head_branch": "dependabot/foo", + "state": "open", + "mergeable": True, + "mergeable_state": "blocked", + "behind_by": 0, + "files_changed": [], + "repository_full_name": "lfreleng-actions/lftools-uv", + "html_url": "https://github.com/lfreleng-actions/lftools-uv/pull/342", + } + defaults.update(overrides) + return PullRequestInfo(**defaults) + + +def _make_manager(**overrides: Any): + """Build an AsyncMergeManager with a mocked GitHub client. + + Returns ``(manager, client)`` — see ``tests/conftest.py`` for the + typed-mock-client pattern. + """ + from tests.conftest import make_merge_manager + + defaults: dict[str, Any] = {"preview_mode": False} + defaults.update(overrides) + return make_merge_manager(**defaults) + + +def _build_get_responder( + pr_response: dict[str, Any] | None = None, + check_runs_response: dict[str, Any] | None = None, + status_response: dict[str, Any] | None = None, +): + """Return an ``AsyncMock``-compatible side effect dispatcher. + + Routes ``GET`` requests to the right canned response based on the + URL path — this mirrors how ``_detect_stuck_dco`` actually calls + the client. + """ + + async def _get(path: str, *args: Any, **kwargs: Any) -> Any: + if path.endswith("/check-runs"): + return check_runs_response + if path.endswith("/status"): + return status_response + # Anything else is the PR detail fetch. + return pr_response + + return _get + + +# --------------------------------------------------------------------------- +# _detect_stuck_dco — positive and negative cases +# --------------------------------------------------------------------------- + + +class TestDetectStuckDcoCheckRuns: + """Detection via the modern ``check-runs`` API.""" + + @pytest.mark.asyncio + async def test_detects_stuck_dco_check_run(self) -> None: + """A queued/in_progress DCO check older than the threshold + on an old-enough PR is detected.""" + mgr, client = _make_manager() + now = datetime.now(timezone.utc) + old = now - timedelta(seconds=STUCK_DCO_THRESHOLD_SECONDS + 30) + pr = _make_pr_info() + + client.get = AsyncMock( + side_effect=_build_get_responder( + pr_response={ + "created_at": _iso(old), + "updated_at": _iso(old), + }, + check_runs_response={ + "check_runs": [ + { + "name": "DCO", + "status": "in_progress", + "conclusion": None, + "started_at": _iso(old), + } + ], + }, + status_response={"statuses": []}, + ) + ) + + is_stuck, name, age = await mgr._detect_stuck_dco(pr) + assert is_stuck is True + assert name == "DCO" + assert age >= STUCK_DCO_THRESHOLD_SECONDS + + @pytest.mark.asyncio + async def test_sub_threshold_age_is_not_stuck(self) -> None: + """A DCO check pending for less than the threshold is not stuck.""" + mgr, client = _make_manager() + now = datetime.now(timezone.utc) + # PR is old enough; the check itself is too young. + pr_age = now - timedelta(seconds=STUCK_DCO_THRESHOLD_SECONDS + 30) + check_started = now - timedelta(seconds=10) + pr = _make_pr_info() + + client.get = AsyncMock( + side_effect=_build_get_responder( + pr_response={ + "created_at": _iso(pr_age), + "updated_at": _iso(check_started), + }, + check_runs_response={ + "check_runs": [ + { + "name": "DCO", + "status": "in_progress", + "started_at": _iso(check_started), + } + ], + }, + status_response={"statuses": []}, + ) + ) + + # The PR-level idle floor (updated_at) catches this case + # before we even look at check timing. + is_stuck, name, age = await mgr._detect_stuck_dco(pr) + assert is_stuck is False + assert name is None + assert age == 0.0 + + @pytest.mark.asyncio + async def test_completed_dco_is_not_stuck(self) -> None: + """A completed DCO check (success or failure) is not stuck.""" + mgr, client = _make_manager() + now = datetime.now(timezone.utc) + old = now - timedelta(seconds=STUCK_DCO_THRESHOLD_SECONDS + 30) + pr = _make_pr_info() + + client.get = AsyncMock( + side_effect=_build_get_responder( + pr_response={ + "created_at": _iso(old), + "updated_at": _iso(old), + }, + check_runs_response={ + "check_runs": [ + { + "name": "DCO", + "status": "completed", + "conclusion": "success", + "started_at": _iso(old), + } + ], + }, + status_response={"statuses": []}, + ) + ) + + is_stuck, name, age = await mgr._detect_stuck_dco(pr) + assert is_stuck is False + assert name is None + + @pytest.mark.asyncio + async def test_stuck_non_dco_check_is_ignored(self) -> None: + """A stuck check whose name is not DCO-shaped is not detected.""" + mgr, client = _make_manager() + now = datetime.now(timezone.utc) + old = now - timedelta(seconds=STUCK_DCO_THRESHOLD_SECONDS + 30) + pr = _make_pr_info() + + client.get = AsyncMock( + side_effect=_build_get_responder( + pr_response={ + "created_at": _iso(old), + "updated_at": _iso(old), + }, + check_runs_response={ + "check_runs": [ + { + "name": "build", + "status": "in_progress", + "started_at": _iso(old), + } + ], + }, + status_response={"statuses": []}, + ) + ) + + is_stuck, name, age = await mgr._detect_stuck_dco(pr) + assert is_stuck is False + assert name is None + + @pytest.mark.asyncio + async def test_pr_age_floor_blocks_detection(self) -> None: + """A young PR (created < threshold ago) is never flagged.""" + mgr, client = _make_manager() + now = datetime.now(timezone.utc) + young = now - timedelta(seconds=10) + pr = _make_pr_info() + + client.get = AsyncMock( + side_effect=_build_get_responder( + pr_response={ + "created_at": _iso(young), + "updated_at": _iso(young), + }, + check_runs_response={ + "check_runs": [ + { + "name": "DCO", + "status": "in_progress", + "started_at": _iso(young), + } + ], + }, + status_response={"statuses": []}, + ) + ) + + is_stuck, name, age = await mgr._detect_stuck_dco(pr) + assert is_stuck is False + assert name is None + + @pytest.mark.asyncio + async def test_dco_slash_dco_name_variant_matches(self) -> None: + """The ``dco/dco`` status-context naming is matched.""" + mgr, client = _make_manager() + now = datetime.now(timezone.utc) + old = now - timedelta(seconds=STUCK_DCO_THRESHOLD_SECONDS + 30) + pr = _make_pr_info() + + client.get = AsyncMock( + side_effect=_build_get_responder( + pr_response={ + "created_at": _iso(old), + "updated_at": _iso(old), + }, + check_runs_response={ + "check_runs": [ + { + "name": "dco/dco", + "status": "queued", + "started_at": _iso(old), + } + ], + }, + status_response={"statuses": []}, + ) + ) + + is_stuck, name, _age = await mgr._detect_stuck_dco(pr) + assert is_stuck is True + assert name == "dco/dco" + + +class TestDetectStuckDcoStatusContexts: + """Detection via the older ``commits/{sha}/status`` API.""" + + @pytest.mark.asyncio + async def test_detects_stuck_dco_status_context(self) -> None: + mgr, client = _make_manager() + now = datetime.now(timezone.utc) + old = now - timedelta(seconds=STUCK_DCO_THRESHOLD_SECONDS + 30) + pr = _make_pr_info() + + client.get = AsyncMock( + side_effect=_build_get_responder( + pr_response={ + "created_at": _iso(old), + "updated_at": _iso(old), + }, + check_runs_response={"check_runs": []}, + status_response={ + "statuses": [ + { + "context": "DCO", + "state": "pending", + "updated_at": _iso(old), + } + ], + }, + ) + ) + + is_stuck, name, age = await mgr._detect_stuck_dco(pr) + assert is_stuck is True + assert name == "DCO" + assert age >= STUCK_DCO_THRESHOLD_SECONDS + + +class TestDetectStuckDcoRobustness: + """Defensive behaviour when the GitHub API misbehaves.""" + + @pytest.mark.asyncio + async def test_pr_fetch_failure_returns_safe_default(self) -> None: + mgr, client = _make_manager() + client.get = AsyncMock(side_effect=RuntimeError("boom")) + pr = _make_pr_info() + + is_stuck, name, age = await mgr._detect_stuck_dco(pr) + assert is_stuck is False + assert name is None + assert age == 0.0 + + @pytest.mark.asyncio + async def test_unparseable_timestamps_return_safe_default(self) -> None: + mgr, client = _make_manager() + pr = _make_pr_info() + + client.get = AsyncMock( + side_effect=_build_get_responder( + pr_response={ + "created_at": "not-a-real-date", + "updated_at": None, + }, + check_runs_response={"check_runs": []}, + status_response={"statuses": []}, + ) + ) + + is_stuck, name, age = await mgr._detect_stuck_dco(pr) + assert is_stuck is False + assert name is None + assert age == 0.0 + + @pytest.mark.asyncio + async def test_no_github_client_returns_safe_default(self) -> None: + mgr, _client = _make_manager() + # Simulate the manager being used outside the async context. + mgr._github_client = None + pr = _make_pr_info() + + is_stuck, name, age = await mgr._detect_stuck_dco(pr) + assert is_stuck is False + assert name is None + assert age == 0.0