-
Notifications
You must be signed in to change notification settings - Fork 55
[CI] Use failure anchor for Stage/CODEPATH #2299
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -260,24 +260,85 @@ Map<String,String> classifyBuildFailure(String logText) { | |
| } | ||
|
|
||
| // Scenario 7: MIGraphX CMake configuration failed. | ||
| // Guard with stage position to avoid misclassifying CMake failures from other stages. | ||
| def migraphxStagePos = logText.lastIndexOf('Build and Verify MIGraphX with MLIR') | ||
| // Match by context around "Configuring incomplete" (MIGraphX path or composable_kernel_host) so we don't rely on stage order in interleaved logs. | ||
| def cmakeConfigErrorPos = logText.lastIndexOf('Configuring incomplete, errors occurred!') | ||
| if (!reason && migraphxStagePos >= 0 && cmakeConfigErrorPos > migraphxStagePos) { | ||
| reason = 'MIGraphX: CMake configuration failed (check CMakeError.log / CMakeOutput.log)' | ||
| if (!reason && cmakeConfigErrorPos >= 0) { | ||
| def ctxStart = Math.max(0, cmakeConfigErrorPos - 4000) | ||
| def ctxAround = logText.substring(ctxStart, Math.min(logText.length(), cmakeConfigErrorPos + 500)) | ||
| if (ctxAround.contains('MIGraphX') || ctxAround.contains('composable_kernel_host') || ctxAround.contains('Findcomposable_kernel_host')) { | ||
| reason = 'MIGraphX: CMake configuration failed (check CMakeError.log / CMakeOutput.log)' | ||
| } | ||
| } | ||
|
|
||
| if (!reason) reason = 'Could not match a known error pattern. See build log for details.' | ||
|
|
||
| // Extract CODEPATH (e.g. "Matrix - CODEPATH = 'navi4x'" or "Running navi4x on") | ||
| def cpMatch = logText =~ /CODEPATH\s*=\s*['"]?(\w+)['"]?|Running\s+(\w+)\s+on\s+\S+/ | ||
| if (cpMatch.find()) codepath = cpMatch[0][1] ?: cpMatch[0][2] ?: '' | ||
| // Failure anchor: position in log where this failure was detected (used to extract stage/CODEPATH from the failing branch, not from later branches). | ||
| def failureAnchor = -1 | ||
|
|
||
| // Prefer detecting the anchor directly from log patterns instead of the human-facing reason text. | ||
| def scmAnchor = Math.max(logText.lastIndexOf('Maximum checkout retry attempts reached'), | ||
| logText.lastIndexOf('[SCM] Checkout failed on')) | ||
| if (scmAnchor < 0) scmAnchor = logText.lastIndexOf("ERROR: Error cloning remote repo") | ||
| if (scmAnchor < 0) scmAnchor = logText.lastIndexOf('ERROR: Checkout failed') | ||
|
|
||
| if (scmAnchor >= 0) { | ||
| failureAnchor = scmAnchor | ||
| } else { | ||
| def tuneAnchor = logText.lastIndexOf('Tuning failed: Detected errors in tuning log') | ||
| if (tuneAnchor >= 0) { | ||
| failureAnchor = tuneAnchor | ||
| } else { | ||
| def sweepsAnchor = logText.indexOf('*** Summary of failures ***') | ||
| if (sweepsAnchor >= 0) { | ||
| failureAnchor = sweepsAnchor | ||
| } else { | ||
| def hipNoDeviceAnchor = logText.lastIndexOf('hipErrorNoDevice') | ||
| if (hipNoDeviceAnchor >= 0) { | ||
| failureAnchor = hipNoDeviceAnchor | ||
| } else { | ||
| def testsFailedAnchor = logText.lastIndexOf('Failed Tests (') | ||
| if (testsFailedAnchor >= 0) { | ||
| failureAnchor = testsFailedAnchor | ||
| } else { | ||
| def migraphxAnchor = logText.lastIndexOf('Configuring incomplete, errors occurred!') | ||
| if (migraphxAnchor >= 0) { | ||
| failureAnchor = migraphxAnchor | ||
| } else { | ||
| def agentFlappingAnchor = logText.lastIndexOf('seems to be removed or offline') | ||
| if (agentFlappingAnchor >= 0) { | ||
| failureAnchor = agentFlappingAnchor | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| def searchStart = (failureAnchor >= 0) ? Math.max(0, failureAnchor - 8000) : 0 | ||
| def searchEnd = (failureAnchor >= 0) ? Math.min(logText.length(), failureAnchor + 500) : logText.length() | ||
| def contextWindow = (failureAnchor >= 0) ? logText.substring(searchStart, searchEnd) : logText | ||
| def logBeforeAnchor = (failureAnchor > 0) ? logText.substring(0, failureAnchor) : '' | ||
|
|
||
| // CODEPATH: prefer "Failed in branch Matrix - CODEPATH = 'X'" near the failure; else any CODEPATH in context window; else global. | ||
| def branchMatch = contextWindow =~ /Failed in branch Matrix - CODEPATH = ['"](\w+)['"]/ | ||
| if (branchMatch.find()) { | ||
| codepath = branchMatch.group(1) | ||
| } else { | ||
| def cpMatch = contextWindow =~ /CODEPATH\s*=\s*['"]?(\w+)['"]?|Running\s+(\w+)\s+on\s+\S+/ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Both the } else {
def cpMatchCtx = contextWindow =~ /CODEPATH\s*=\s*['"']?(\w+)['"']?|Running\s+(\w+)\s+on\s+\S+/
if (cpMatchCtx.find()) codepath = cpMatchCtx[0][1] ?: cpMatchCtx[0][2] ?: ''
}
if (!codepath) {
def cpMatchFull = logText =~ /CODEPATH\s*=\s*['"']?(\w+)['"']?|Running\s+(\w+)\s+on\s+\S+/
if (cpMatchFull.find()) codepath = cpMatchFull[0][1] ?: cpMatchFull[0][2] ?: ''
} |
||
| if (cpMatch.find()) codepath = cpMatch[0][1] ?: cpMatch[0][2] ?: '' | ||
| } | ||
| if (!codepath) { | ||
| def cpMatch = logText =~ /CODEPATH\s*=\s*['"]?(\w+)['"]?|Running\s+(\w+)\s+on\s+\S+/ | ||
| if (cpMatch.find()) codepath = cpMatch[0][1] ?: cpMatch[0][2] ?: '' | ||
| } | ||
|
|
||
| // Extract stage (last occurrence in log by position = likely failed stage) | ||
| // Stage: last stage name that appears *before* the failure anchor (so we report the stage that was running when it failed). | ||
| def stageNames = ['SCM Checkout', 'Build and Test', 'Parameter sweeps', 'Tune MLIR kernels', 'Tune rocMLIR', 'Code coverage', 'Archive performance DB', 'MIGraphX', 'Build and Verify MIGraphX with MLIR'] | ||
| def stageSearchText = (logBeforeAnchor.length() > 0) ? logBeforeAnchor : logText | ||
| def stageIdx = -1 | ||
| for (def name in stageNames) { | ||
| def idx = logText.lastIndexOf(name) | ||
| def idx = stageSearchText.lastIndexOf(name) | ||
| if (idx >= 0 && idx > stageIdx) { stage = name; stageIdx = idx } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor inconsistency:
searchStart,searchEnd, andcontextWindowall gate onfailureAnchor >= 0, butlogBeforeAnchorusesfailureAnchor > 0. WhenfailureAnchor == 0both branches evaluate to''(becauselogText.substring(0, 0)is empty), so the result is identical in practice — but the inconsistency can cause confusion. Suggest changing to>= 0to match the pattern used on the three lines above: