Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 32 additions & 16 deletions .github/actions/get-job-id/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,28 +30,44 @@ runs:
# The script will first try to match by (name, runner_name), then fall back to name-only.
run: |
set -euo pipefail

job_url="https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/runs/${RUN_ID}/jobs?per_page=100"
json=$(curl -sSL \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $GITHUB_TOKEN" \
"$job_url")
max_retries=6
job_id=""
json=""

# Prefer matching both job name and the current runner name to disambiguate matrix jobs
job_id=$(jq -r --arg name "$JOB_NAME" --arg runner "$RUNNER_NAME" '
(.jobs // [])
| map(select(.name == $name and (.runner_name // "") == $runner))
| (.[0].id // empty)
' <<< "$json" )
for ((attempt = 1; attempt <= max_retries; attempt++)); do
json=$(curl -sSL \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $GITHUB_TOKEN" \
"$job_url")

# Fallback: match by name only
if [ -z "${job_id:-}" ]; then
job_id=$(jq -r --arg name "$JOB_NAME" '
(.jobs // []) | map(select(.name == $name)) | (.[0].id // empty)
# Prefer matching both job name and the current runner name to disambiguate matrix jobs
job_id=$(jq -r --arg name "$JOB_NAME" --arg runner "$RUNNER_NAME" '
(.jobs // [])
| map(select(.name == $name and (.runner_name // "") == $runner))
| (.[0].id // empty)
' <<< "$json" )
fi

# Fallback: match by name only
if [ -z "${job_id:-}" ]; then
job_id=$(jq -r --arg name "$JOB_NAME" '
(.jobs // []) | map(select(.name == $name)) | (.[0].id // empty)
' <<< "$json" )
fi

if [ -n "${job_id:-}" ] && [ "$job_id" != "null" ]; then
break
fi

if [ "$attempt" -lt "$max_retries" ]; then
echo "::notice::Job ID for '$JOB_NAME' not visible yet (attempt $attempt/$max_retries); retrying in 5s"
sleep 5
fi
done
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Claude suggested doing retries here and I thought that was a good idea; wouldn't want to fail the run because of this being flaky.


if [ -z "${job_id:-}" ] || [ "$job_id" = "null" ]; then
echo "::error::Failed to resolve job ID for name '$JOB_NAME' on runner '$RUNNER_NAME' in run '$RUN_ID'" >&2
echo "::error::Failed to resolve job ID for name '$JOB_NAME' on runner '$RUNNER_NAME' in run '$RUN_ID' after retries" >&2
exit 1
fi
echo "job_id=$job_id" >> "$GITHUB_OUTPUT"
42 changes: 18 additions & 24 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -328,15 +328,13 @@ jobs:
[ "$(find .testoutput -maxdepth 1 -name 'junit.*.xml' | wc -l)" -lt "$MAX_TEST_ATTEMPTS" ] &&
CRASH_REPORT_NAME="$GITHUB_JOB" make report-test-crash

- name: Generate test summary
uses: mikepenz/action-junit-report@v6
- name: Write test summary
if: ${{ !cancelled() }}
with:
report_paths: ./.testoutput/junit.*.xml
detailed_summary: true
check_annotations: false
annotate_only: true
skip_annotations: true
run: |
summary="$(make -s print-test-summary)"
if [ -n "$summary" ]; then
printf '%s\n' "$summary" > "$GITHUB_STEP_SUMMARY"
fi

- name: Upload code coverage to Codecov
uses: codecov/codecov-action@v5
Expand Down Expand Up @@ -428,15 +426,13 @@ jobs:
[ "$(find .testoutput -maxdepth 1 -name 'junit.*.xml' | wc -l)" -lt "$MAX_TEST_ATTEMPTS" ] &&
CRASH_REPORT_NAME="$GITHUB_JOB" make report-test-crash

- name: Generate test summary
uses: mikepenz/action-junit-report@v6
- name: Write test summary
if: ${{ !cancelled() }}
with:
report_paths: ./.testoutput/junit.*.xml
detailed_summary: true
check_annotations: false
annotate_only: true
skip_annotations: true
run: |
summary="$(make -s print-test-summary)"
if [ -n "$summary" ]; then
printf '%s\n' "$summary" > "$GITHUB_STEP_SUMMARY"
fi

- name: Upload code coverage to Codecov
uses: codecov/codecov-action@v5
Expand Down Expand Up @@ -567,15 +563,13 @@ jobs:
[ "$(find .testoutput -maxdepth 1 -name 'junit.*.xml' | wc -l)" -lt "$MAX_TEST_ATTEMPTS" ] &&
CRASH_REPORT_NAME="$GITHUB_JOB" make report-test-crash

- name: Generate test summary
uses: mikepenz/action-junit-report@v6
- name: Write test summary
if: ${{ !cancelled() }}
with:
report_paths: ./.testoutput/junit.*.xml
detailed_summary: true
check_annotations: false
annotate_only: true
skip_annotations: true
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mikepenz/action-junit-report doesn't support showing failure details.

run: |
summary="$(make -s print-test-summary)"
if [ -n "$summary" ]; then
printf '%s\n' "$summary" > "$GITHUB_STEP_SUMMARY"
fi

- name: Upload code coverage to Codecov
uses: codecov/codecov-action@v5
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,10 @@ report-test-crash: $(TEST_OUTPUT_ROOT)
--junitfile=$(TEST_OUTPUT_ROOT)/junit.crash.xml \
--crashreportname=$(CRASH_REPORT_NAME)

print-test-summary: $(TEST_OUTPUT_ROOT)
@go run ./cmd/tools/test-runner print-summary \
--junit-glob=$(TEST_OUTPUT_ROOT)/junit.*.xml

##### Schema #####
install-schema-cass-es: temporal-cassandra-tool install-schema-es
@printf $(COLOR) "Install Cassandra schema..."
Expand Down
107 changes: 95 additions & 12 deletions tools/testrunner/junit.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,24 @@ import (
"github.com/jstemmer/go-junit-report/v2/junit"
)

// alertsSuiteName is the JUnit suite name used for structural alerts (data
// races, panics, fatal errors).
const alertsSuiteName = "ALERTS"

const junitAlertDetailsMaxBytes = 64 * 1024

type failureType string

const (
// failureTypeFailed marks a failed assertion.
failureTypeFailed failureType = "Failed"
failureTypeTimeout failureType = "TIMEOUT"
failureTypeCrash failureType = "CRASH"
failureTypeDataRace failureType = "DATA RACE"
failureTypePanic failureType = "PANIC"
failureTypeFatal failureType = "FATAL"
)

type junitReport struct {
junit.Testsuites
path string
Expand All @@ -33,12 +51,16 @@ func (j *junitReport) read() error {
return nil
}

func generateStatic(names []string, suffix string, message string) *junitReport {
// generateReport builds a JUnit report for failures that the runner
// derives itself, such as timeouts and crashes. Failure.Type stores the
// canonical failure type (for example TIMEOUT or CRASH), and Failure.Data is
// intentionally left empty.
func generateReport(names []string, suffix string, kind failureType) *junitReport {
var testcases []junit.Testcase
for _, name := range names {
testcases = append(testcases, junit.Testcase{
Name: fmt.Sprintf("%s (%s)", name, suffix),
Failure: &junit.Result{Message: message},
Failure: generateFailure(kind, ""),
})
}
return &junitReport{
Expand All @@ -53,6 +75,14 @@ func generateStatic(names []string, suffix string, message string) *junitReport
}
}

func generateFailure(kind failureType, data string) *junit.Result {
return &junit.Result{
Message: string(kind),
Type: string(kind),
Data: data,
}
}

func (j *junitReport) write() error {
f, err := os.Create(j.path)
if err != nil {
Expand All @@ -72,30 +102,37 @@ func (j *junitReport) write() error {
// appendAlertsSuite adds a synthetic JUnit suite summarizing high-priority alerts
// (data races, panics, fatals) so that CI surfaces them prominently.
func (j *junitReport) appendAlertsSuite(alerts []alert) {
// Deduplicate by kind+details to avoid noisy repeats across retries.
// Deduplicate by type+details to avoid noisy repeats across retries.
alerts = dedupeAlerts(alerts)
if len(alerts) == 0 {
return
}

// Convert alerts to JUnit test cases.
var cases []junit.Testcase
for _, a := range alerts {
name := fmt.Sprintf("%s: %s", a.Kind, a.Summary)
name := fmt.Sprintf("%s: %s", a.Type, a.Summary)
if p := primaryTestName(a.Tests); p != "" {
name = fmt.Sprintf("%s — in %s", name, p)
}
// Include only test names for context, not the full log details to avoid XML malformation
var details string
var sb strings.Builder
if a.Details != "" {
sb.WriteString(truncateAlertDetails(sanitizeXML(a.Details)))
sb.WriteByte('\n')
}
if len(a.Tests) > 0 {
details = fmt.Sprintf("Detected in tests:\n\t%s", strings.Join(a.Tests, "\n\t"))
fmt.Fprintf(&sb, "Detected in tests:\n\t%s", strings.Join(a.Tests, "\n\t"))
}
r := &junit.Result{Message: string(a.Kind), Data: details}
f := generateFailure(a.Type, strings.TrimRight(sb.String(), "\n"))
cases = append(cases, junit.Testcase{
Name: name,
Failure: r,
Failure: f,
})
}

// Append the alerts suite to the report.
suite := junit.Testsuite{
Name: "ALERTS",
Name: alertsSuiteName,
Failures: len(cases),
Tests: len(cases),
Testcases: cases,
Expand All @@ -105,13 +142,41 @@ func (j *junitReport) appendAlertsSuite(alerts []alert) {
j.Tests += suite.Tests
}

// sanitizeXML removes characters that are invalid in XML 1.0. Go's xml.Encoder
// escapes <, >, & etc., but control characters other than \t, \n, \r are not
// legal XML and cause parsers to reject the document.
func sanitizeXML(s string) string {
return strings.Map(func(r rune) rune {
switch r {
case '\t', '\n', '\r':
return r
case 0xFFFE, 0xFFFF:
return -1 // Reserved Unicode noncharacters; disallowed in XML 1.0.
}
if r < 0x20 {
// 0x20 is space; lower code points are ASCII control characters.
return -1
}
return r
}, s)
}

// truncateAlertDetails keeps alert payloads from bloating the JUnit artifact.
func truncateAlertDetails(s string) string {
if len(s) <= junitAlertDetailsMaxBytes {
return s
}
const marker = "\n... (truncated) ...\n"
return s[:junitAlertDetailsMaxBytes-len(marker)] + marker
}

// dedupeAlerts removes duplicate alerts (e.g., repeated across retries) based
// on kind and details while preserving the first-seen order.
// on type and details while preserving the first-seen order.
func dedupeAlerts(alerts []alert) []alert {
seen := make(map[string]struct{}, len(alerts))
var out []alert
for _, a := range alerts {
key := string(a.Kind) + "\n" + a.Details
key := string(a.Type) + "\n" + a.Details
if _, ok := seen[key]; ok {
continue
}
Expand Down Expand Up @@ -224,6 +289,24 @@ func mergeReports(reports []*junitReport) (*junitReport, error) {
// Discard test case parents since they provide no value.
continue
}

// Parse failure details from Failure.Data, if present.
if testCase.Failure != nil && testCase.Failure.Data != "" {
if details := parseFailureDetails(testCase.Failure.Data); details != noFailureDetails {
testCase.Failure.Data = details
}
}

// Failure.Type carries the canonical kind in merged JUnit.
if testCase.Failure != nil {
if suite.Name == alertsSuiteName {
if testCase.Failure.Type == "" {
testCase.Failure.Type = testCase.Failure.Message
}
} else {
testCase.Failure.Type = string(failureTypeFailed)
}
}
testCase.Name += suffix
newSuite.Testcases = append(newSuite.Testcases, testCase)
}
Expand Down
Loading
Loading