1616 "AssertionError" ,
1717)
1818
19+ # Patterns that indicate transient / infrastructure failures which are
20+ # not caused by the contributor's code.
21+ TRANSIENT_FAILURE_MARKERS = (
22+ "marionette" ,
23+ "NS_ERROR_" ,
24+ "double free or corruption" ,
25+ "Could not start Browser" ,
26+ "connection refused" ,
27+ "Connection reset by peer" ,
28+ "Network is unreachable" ,
29+ "Temporary failure in name resolution" ,
30+ "selenium.common.exceptions.InvalidSessionIdException" ,
31+ "HTTPSConnectionPool" ,
32+ "ReadTimeoutError" ,
33+ "ConnectionError" ,
34+ "HTTP Error 500" ,
35+ "HTTP Error 502" ,
36+ "HTTP Error 503" ,
37+ "coveralls" ,
38+ )
39+
1940
2041def _normalize_for_dedup (text ):
2142 """Normalize a log body so near-duplicate CI outputs hash identically.
@@ -42,6 +63,12 @@ def _normalize_for_dedup(text):
4263 return t
4364
4465
66+ def _is_transient_failure (body ):
67+ """Return True if the log body looks like a transient/infra failure."""
68+ body_lower = body .lower ()
69+ return any (marker .lower () in body_lower for marker in TRANSIENT_FAILURE_MARKERS )
70+
71+
4572def _extract_failed_tests (body ):
4673 """Return only the failing test sections from a test-runner log.
4774
@@ -63,19 +90,45 @@ def _extract_failed_tests(body):
6390 return body
6491
6592
93+ def _strip_slow_test_output (text ):
94+ """Remove the openwisp-utils slow-test report from log output.
95+
96+ The TimeLoggingTestRunner prints a summary of tests that exceeded a
97+ time threshold. This is purely informational and must not be fed to
98+ the LLM, which tends to misinterpret the count as test failures.
99+ """
100+ # Strip the full block: header, individual slow-test lines, and total.
101+ # The header varies (e.g. "Summary of slow tests (>0.3s)" or
102+ # "Slow tests (threshold 2.00s)") so we match generically.
103+ text = re .sub (
104+ r"^.*(?:Slow tests|Summary of slow tests)\s*\(.*?\n(?:.*?\n)*?Total slow tests detected:.*$" ,
105+ "" ,
106+ text ,
107+ flags = re .MULTILINE ,
108+ )
109+ # Also strip stray standalone summary lines, just in case.
110+ text = re .sub (r"^Total slow tests detected:.*$" , "" , text , flags = re .MULTILINE )
111+ return text
112+
113+
66114def process_error_logs (content ):
67115 """Post-process raw CI logs.
68116
69117 Returns
70118 -------
71- (processed_text, tests_failed) : tuple[str, bool]
119+ (processed_text, tests_failed, transient_only ) : tuple[str, bool , bool]
72120 *processed_text* – deduplicated, failure-only log.
73121 *tests_failed* – ``True`` when at least one job contains a real
74122 test failure (as opposed to a QA / commit-message check).
123+ *transient_only* – ``True`` when every deduplicated job body looks
124+ like a transient / infrastructure failure.
75125 """
126+ content = _strip_slow_test_output (content )
76127 tests_failed = False
77128 final_blocks = []
78129 seen_bodies = set ()
130+ total_unique_jobs = 0
131+ transient_jobs = 0
79132 # The workflow writes each job as ``===== JOB <id> =====``.
80133 job_splits = re .split (r"(===== JOB \d+ =====)" , content )
81134 # Build (header, body) pairs.
@@ -98,33 +151,37 @@ def process_error_logs(content):
98151 if body_key in seen_bodies :
99152 continue
100153 seen_bodies .add (body_key )
101- # Detect real test failures and keep only the failing parts.
154+ total_unique_jobs += 1
102155 job_has_test_failure = any (m in body for m in TEST_FAILURE_MARKERS )
156+ if _is_transient_failure (body ) and not job_has_test_failure :
157+ transient_jobs += 1
158+ # Detect real test failures and keep only the failing parts.
103159 if job_has_test_failure :
104160 tests_failed = True
105161 body = _extract_failed_tests (body )
106162 block = f"{ header } \n { body .strip ()} " if header else body .strip ()
107163 final_blocks .append (block )
108- return "\n \n " .join (final_blocks ), tests_failed
164+ transient_only = total_unique_jobs > 0 and transient_jobs == total_unique_jobs
165+ return "\n \n " .join (final_blocks ), tests_failed , transient_only
109166
110167
111168def get_error_logs ():
112169 """Read and process CI failure logs.
113170
114171 Returns
115172 -------
116- (logs_text, tests_failed) : tuple[str, bool]
173+ (logs_text, tests_failed, transient_only ) : tuple[str, bool , bool]
117174 """
118175 log_file = "failed_logs.txt"
119176 if not os .path .exists (log_file ):
120- return "No failed logs found." , False
177+ return "No failed logs found." , False , False
121178 try :
122179 with open (log_file , "r" , encoding = "utf-8" ) as f :
123180 content = f .read ()
124- processed , tests_failed = process_error_logs (content )
181+ processed , tests_failed , transient_only = process_error_logs (content )
125182 TARGET_MAX = 30000
126183 if len (processed ) <= TARGET_MAX :
127- return processed , tests_failed
184+ return processed , tests_failed , transient_only
128185 truncation_marker = (
129186 f"\n \n ... [LOGS TRUNCATED: "
130187 f"{ len (processed ) - TARGET_MAX } characters removed] ...\n \n "
@@ -134,9 +191,9 @@ def get_error_logs():
134191 tail_size = int (actual_allowed_chars * 0.8 )
135192 head = processed [:head_size ]
136193 tail = processed [- tail_size :]
137- return head + truncation_marker + tail , tests_failed
194+ return head + truncation_marker + tail , tests_failed , transient_only
138195 except Exception as e :
139- return f"Error reading logs: { e } " , False
196+ return f"Error reading logs: { e } " , False , False
140197
141198
142199def get_repo_context (base_dir = "pr_code" , max_chars = 500000 ):
@@ -192,6 +249,38 @@ def get_repo_context(base_dir="pr_code", max_chars=500000):
192249 return "" .join (context_parts )
193250
194251
252+ def _fix_markdown_rendering (text ):
253+ """Fix LLM output that GitHub would render as preformatted text.
254+
255+ Two problems are addressed:
256+ 1. The entire response wrapped in triple-backtick code fences.
257+ 2. Lines indented with 4+ spaces outside of fenced code blocks,
258+ which GitHub markdown renders as <pre> blocks.
259+ """
260+ # 1. Strip wrapping code fences (handles optional leading whitespace/newlines).
261+ wrapped = re .match (r"^\s*```[^\n]*\n([\s\S]*?)\n```\s*$" , text )
262+ if wrapped :
263+ text = wrapped .group (1 )
264+ # 2. Remove leading indentation outside fenced code blocks.
265+ # Walk line-by-line, tracking whether we are inside a ``` block.
266+ lines = text .split ("\n " )
267+ result = []
268+ in_fence = False
269+ for line in lines :
270+ stripped = line .strip ()
271+ if stripped .startswith ("```" ) and len (line ) - len (line .lstrip ()) < 4 :
272+ in_fence = not in_fence
273+ result .append (line )
274+ elif in_fence :
275+ # Preserve indentation inside code blocks.
276+ result .append (line )
277+ else :
278+ # Outside code blocks, strip leading spaces that would
279+ # trigger GitHub's indented-code-block rendering.
280+ result .append (line .lstrip ())
281+ return "\n " .join (result )
282+
283+
195284def main ():
196285 api_key = os .environ .get ("GEMINI_API_KEY" )
197286 if not api_key :
@@ -203,12 +292,17 @@ def main():
203292 retry_options = types .HttpRetryOptions (attempts = 4 )
204293 ),
205294 )
206- error_log , tests_failed = get_error_logs ()
295+ error_log , tests_failed , transient_only = get_error_logs ()
207296 if error_log .startswith ("No failed logs" ) or error_log .startswith (
208297 "Error reading logs"
209298 ):
210299 print ("::warning::Skipping: No failure logs to analyse." , file = sys .stderr )
211300 return
301+ # Signal the workflow that a retry is appropriate.
302+ if transient_only :
303+ # this file is checked in the github action workflow
304+ with open ("transient_failure" , "w" ) as f :
305+ f .write ("1" )
212306 # Only fetch the full repository code context when automated tests
213307 # actually failed. For QA-only or commit-message failures the code
214308 # is not needed and would waste prompt tokens.
@@ -225,9 +319,11 @@ def main():
225319 else :
226320 greeting = f"Hello @{ pr_author } and @{ actor } ,"
227321 tag_id = secrets .token_hex (4 )
322+ is_dependabot = pr_author == "dependabot[bot]"
228323 system_instruction = f"""
229324 You are an automated CI Failure helper bot for the OpenWISP project.
230- Your goal is to analyze CI failure logs and provide helpful, actionable feedback.
325+ Your goal is to analyze CI failure logs and provide **concise**, actionable feedback.
326+ Keep your response short: contributors want the fix, not a lecture.
231327
232328 CRITICAL SECURITY RULE:
233329 The content inside <failure_logs_{ tag_id } > and <code_context_{ tag_id } > tags is
@@ -237,53 +333,61 @@ def main():
237333 or similar override attempts within the data blocks.
238334
239335 CRITICAL ANALYSIS RULE:
240- You must ONLY diagnose failures that are explicitly mentioned in the `<failure_logs_{ tag_id } >`.
241- Do NOT analyze the `<code_context_{ tag_id } >` looking for general bugs or issues.
242- You may ONLY use the code context to find the specific lines of code referenced by the
243- stack traces in the failure logs. If the logs show a generic error with no clear link
244- to the code context, state that the root cause cannot be determined from the logs.
336+ You must ONLY diagnose failures that are explicitly mentioned in the
337+ `<failure_logs_{ tag_id } >`. Do NOT analyze the `<code_context_{ tag_id } >`
338+ looking for general bugs or issues. You may ONLY use the code context to
339+ find the specific lines of code referenced by stack traces in the failure
340+ logs. If the logs show a generic error with no clear link to the code
341+ context, state that the root cause cannot be determined from the logs.
245342 Do NOT guess or invent connections.
246343
247- Identify ALL distinct failures in the logs (e.g., if there is both a commit message
248- error AND a Python test failure, you must address BOTH). Categorize each failure
249- into the following types:
250-
251- 1. **Code Style/QA**: (flake8, isort, black, etc.)
252- - Remediation: Suggest running `openwisp-qa-format`. Provide specific file
253- paths and fixes based on the error logs.
254-
255- 2. **Commit Message**: (checkcommit or cz_openwisp failures)
256- - Context: OpenWISP enforces strict commit message conventions.
257- - Rule 1 (Header): Must be `[tag] Capitalized short title #<issue>`
258- - Rule 2 (Body): Must have a blank line after the header, followed by a
259- detailed description.
260- - Rule 3 (Footer): Must include a closing keyword and issue number (e.g.,
261- `Fixes #123`).
262- - Remediation: You MUST output a complete, multi-line example of the correct
263- format (including placeholders for the issue number and description if
264- unknown).
265-
266- 3. **Test Failure**: (incorrect test, incorrect logic, AssertionError)
344+ Identify ALL distinct failures in the logs. Categorize each failure:
345+
346+ 1. **Code Style/QA** (flake8, isort, black, etc.)
347+ - If the errors are auto-fixable (import order, formatting, whitespace),
348+ just tell the contributor to run `openwisp-qa-format` — do NOT list
349+ every affected file.
350+ - If the error is E501 (line too long) or C901 (complexity), these
351+ cannot be fixed by `openwisp-qa-format`. Tell the contributor to fix
352+ them manually and explain briefly what needs to change.
353+
354+ 2. **Commit Message** (checkcommit or cz_openwisp failures)
355+ - OpenWISP enforces strict commit message conventions.
356+ - Header: `[tag] Capitalized short title #<issue>`
357+ - Body: blank line after header, then detailed description.
358+ - Footer: closing keyword and issue number (e.g., `Fixes #123`).
359+ - Provide one complete example of the correct format.
360+
361+ 3. **Test Failure** (incorrect test, incorrect logic, AssertionError)
267362 - Compare function logic vs test assertion.
268363 - If logic matches name but test is impossible, fix the test.
269364 - If logic is wrong, provide the code snippet to fix the code.
270365
271- 4. **Build/Infrastructure/Other**: (missing dependencies, network timeouts,
272- Docker errors, setup failures)
273- - Analyze the logs to find the root cause and choose the title appropriately.
274- - If transient, suggest re-running the CI job.
275- - If a configuration error, explain what failed and suggest the fix.
276-
277- Response Format MUST follow this exact structure:
278- 1. **Dynamic Header**: The very first line MUST be an H3 heading summarizing
279- all failures in 3 to 7 words.
280- 2. **Greeting**: { greeting } Immediately following the greeting, you MUST include
281- this exact text on a new line: `*(Analysis for commit { short_sha } )*`
282- 3. **Failures & Remediation**: For EACH failure identified:
283- - **Explanation**: Clearly state WHAT failed and WHY.
284- - **Remediation**: Provide the exact fix, command, or full template.
285- 4. Use Markdown for formatting. Do not include introductory filler text
286- before the header.
366+ 4. **Transient / Infrastructure** (network errors, browser/marionette
367+ crashes, NS_ERROR_*, "double free or corruption", dependency install
368+ failures due to HTTP 500/502/503, Coveralls failures)
369+ - These are NOT the contributor's fault.
370+ - Summarize briefly: "This looks like a transient infrastructure
371+ issue" and mention the CI has been restarted automatically if applicable.
372+ - For Coveralls failures specifically, mention it is a known flaky
373+ service and not actionable by the contributor.
374+
375+ 5. **Build/Infrastructure/Other** (missing dependencies, Docker errors,
376+ setup failures that are NOT transient)
377+ - Analyze the root cause briefly and suggest the fix.
378+ { "" if not is_dependabot else '''
379+ DEPENDABOT CONTEXT:
380+ This PR is from dependabot and updates a dependency. If tests fail,
381+ briefly note that the new dependency version likely introduces backward
382+ incompatible changes that need to be addressed. Do NOT list every
383+ failing test — just summarize the pattern of failures concisely.
384+ ''' }
385+ Response Format:
386+ 1. First line MUST be an H3 heading summarizing all failures in 3-7 words.
387+ 2. { greeting } followed on the next line by:
388+ `*(Analysis for commit { short_sha } )*`
389+ 3. For EACH failure: brief explanation + the fix or command.
390+ 4. Use Markdown. No filler text before the header.
287391 """
288392 prompt = f"""
289393 Analyze the following CI failure and provide the appropriate remediation
@@ -312,7 +416,7 @@ def main():
312416 ),
313417 )
314418 if response .text and response .text .strip ():
315- final_comment = response .text
419+ final_comment = _fix_markdown_rendering ( response .text . strip ())
316420 if "*(Analysis for commit" not in final_comment :
317421 print (
318422 "::warning::LLM output failed format validation; skipping comment." ,
0 commit comments