diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java index b418c70538ca..2bd2bd83584e 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java @@ -58,6 +58,7 @@ public class QOutProcessor { public static final String MASK_PATTERN = "#### A masked pattern was here ####"; public static final String PARTIAL_MASK_PATTERN = "#### A PARTIAL masked pattern was here ####"; + public static final String MASKED_VERTEX_KILLED_PATTERN = "[Masked Vertex killed due to OTHER_VERTEX_FAILURE]"; private static final PatternReplacementPair MASK_STATS = new PatternReplacementPair( Pattern.compile(" Num rows: [1-9][0-9]* Data size: [1-9][0-9]*"), " Num rows: ###Masked### Data size: ###Masked###"); @@ -197,6 +198,7 @@ public void maskPatterns(String fname) throws Exception { out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8")); boolean lastWasMasked = false; + boolean lastWasVertexKilled = false; while (null != (line = in.readLine())) { LineProcessingResult result = processLine(line); @@ -209,10 +211,22 @@ public void maskPatterns(String fname) throws Exception { lastWasMasked = true; result.partialMaskWasMatched = false; } + lastWasVertexKilled = false; + } else if (result.line.equals(MASKED_VERTEX_KILLED_PATTERN)) { + // Deduplicate consecutive standalone vertex-killed lines — the number of sibling + // vertices still alive when the kill propagates is non-deterministic. + if (!lastWasVertexKilled) { + out.write(result.line); + out.write("\n"); + lastWasVertexKilled = true; + } + lastWasMasked = false; + result.partialMaskWasMatched = false; } else { out.write(result.line); out.write("\n"); lastWasMasked = false; + lastWasVertexKilled = false; result.partialMaskWasMatched = false; } } @@ -350,7 +364,16 @@ private final static class PatternReplacementPair { // We do not want the test to fail because of this. ppm.add(new PatternReplacementPair( Pattern.compile("Vertex killed, vertexName=(.*?),.*\\[\\1\\] killed\\/failed due to:OTHER_VERTEX_FAILURE\\]"), - "[Masked Vertex killed due to OTHER_VERTEX_FAILURE]")); + MASKED_VERTEX_KILLED_PATTERN)); + + // Collapse multiple consecutive embedded [Masked Vertex killed] tokens on the same line + // (the long FAILED: summary line repeats one token per killed vertex). + ppm.add(new PatternReplacementPair(Pattern.compile("(\\Q" + MASKED_VERTEX_KILLED_PATTERN + "\\E){2,}"), + MASKED_VERTEX_KILLED_PATTERN)); + + // The number of vertices killed when a DAG fails is a scheduling race condition — + // depends on how many sibling vertices are still running at the moment the kill propagates. + ppm.add(new PatternReplacementPair(Pattern.compile("killedVertices:[0-9]+"), "killedVertices:#Masked#")); partialPlanMask = ppm.toArray(new PatternReplacementPair[ppm.size()]); } diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java index 447102f91c49..e8edcfd1d094 100644 --- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java +++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java @@ -1020,7 +1020,19 @@ public QTestProcessExecResult checkCliDriverResults() throws Exception { qTestResultProcessor.overwriteResults(f.getPath(), outFileName); return QTestProcessExecResult.createWithoutOutput(0); } else { - return qTestResultProcessor.executeDiffCommand(f.getPath(), outFileName, false); + // Apply the same masking pipeline to a temporary copy of the reference file so that + // non-deterministic values are normalized on both sides. + // This preserves backward compatibility with existing .q.out files that were written + // before the masking rules were introduced. + File maskedRef = new File(outFileName + ".masked_ref"); + try { + FileUtils.copyFile(new File(outFileName), maskedRef); + qOutProcessor.maskPatterns(maskedRef.getPath()); + return qTestResultProcessor.executeDiffCommand(f.getPath(), maskedRef.getPath(), false); + } finally { + maskedRef.delete(); + new File(maskedRef.getPath() + ".orig").delete(); + } } } diff --git a/itests/util/src/test/java/org/apache/hadoop/hive/ql/TestQOutProcessor.java b/itests/util/src/test/java/org/apache/hadoop/hive/ql/TestQOutProcessor.java index 17fde37f36a2..aa6a8d35e26f 100644 --- a/itests/util/src/test/java/org/apache/hadoop/hive/ql/TestQOutProcessor.java +++ b/itests/util/src/test/java/org/apache/hadoop/hive/ql/TestQOutProcessor.java @@ -17,10 +17,19 @@ */ package org.apache.hadoop.hive.ql; +import java.io.File; +import java.io.PrintWriter; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.util.Arrays; +import java.util.List; + import org.apache.hadoop.hive.ql.QTestMiniClusters.FsType; import org.apache.hadoop.hive.ql.qoption.QTestReplaceHandler; import org.junit.Assert; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.TemporaryFolder; /** * This class contains unit tests for QTestUtil @@ -28,6 +37,138 @@ public class TestQOutProcessor { QOutProcessor qOutProcessor = new QOutProcessor(FsType.LOCAL, new QTestReplaceHandler()); + @Rule + public TemporaryFolder tmpFolder = new TemporaryFolder(); + + /** + * A raw vertex-killed log line must be replaced with MASKED_VERTEX_KILLED_PATTERN. + */ + @Test + public void testVertexKilledLineIsReplaced() { + String raw = "Vertex killed, vertexName=Map 2, " + + "diagnostics=[Task failed, taskAttemptId=attempt_1 " + + "[Map 2] killed/failed due to:OTHER_VERTEX_FAILURE]"; + Assert.assertEquals(QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, processLine(raw)); + } + + /** + * A line containing multiple embedded MASKED_VERTEX_KILLED_PATTERN tokens + * (produced after the first regex pass) must be collapsed to a single token. + */ + @Test + public void testMultipleEmbeddedVertexKilledTokensCollapsedOnSameLine() { + String twoTokens = QOutProcessor.MASKED_VERTEX_KILLED_PATTERN + + QOutProcessor.MASKED_VERTEX_KILLED_PATTERN; + Assert.assertEquals(QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, processLine(twoTokens)); + + String threeTokens = QOutProcessor.MASKED_VERTEX_KILLED_PATTERN + + QOutProcessor.MASKED_VERTEX_KILLED_PATTERN + + QOutProcessor.MASKED_VERTEX_KILLED_PATTERN; + Assert.assertEquals(QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, processLine(threeTokens)); + } + + /** + * A single MASKED_VERTEX_KILLED_PATTERN token must be left unchanged by processLine. + */ + @Test + public void testSingleEmbeddedVertexKilledTokenUnchanged() { + Assert.assertEquals( + QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, + processLine(QOutProcessor.MASKED_VERTEX_KILLED_PATTERN)); + } + + /** + * killedVertices: must be masked regardless of the numeric value. + */ + @Test + public void testKilledVerticesCountIsMasked() { + Assert.assertEquals("killedVertices:#Masked#", processLine("killedVertices:3")); + Assert.assertEquals("killedVertices:#Masked#", processLine("killedVertices:0")); + Assert.assertEquals("killedVertices:#Masked#", processLine("killedVertices:100")); + } + + /** + * killedVertices masking should work when embedded in a longer line (e.g. FAILED: summary). + */ + @Test + public void testKilledVerticesCountIsMaskedInLongerLine() { + String input = "FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.tez." + + "TezTask. killedVertices:2 Vertex re-run not supported in current execution mode."; + String output = processLine(input); + Assert.assertTrue("killedVertices:#Masked# must appear in output", + output.contains("killedVertices:#Masked#")); + Assert.assertFalse("raw killedVertices:2 must not appear in output", + output.contains("killedVertices:2")); + } + + /** + * Multiple consecutive standalone MASKED_VERTEX_KILLED_PATTERN lines must be + * collapsed to a single line by maskPatterns(). + */ + @Test + public void testConsecutiveVertexKilledLinesDeduplicatedInFile() throws Exception { + File f = tmpFile( + "line before", + QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, + QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, + QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, + "line after"); + + qOutProcessor.maskPatterns(f.getAbsolutePath()); + + List lines = readLines(f); + Assert.assertEquals( + Arrays.asList("line before", QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, "line after"), + lines); + } + + /** + * Two separate (non-consecutive) vertex-killed blocks must each produce one line. + */ + @Test + public void testNonConsecutiveVertexKilledLinesKeptSeparately() throws Exception { + File f = tmpFile( + QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, + QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, + "some other line", + QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, + QOutProcessor.MASKED_VERTEX_KILLED_PATTERN); + + qOutProcessor.maskPatterns(f.getAbsolutePath()); + + List lines = readLines(f); + Assert.assertEquals( + Arrays.asList( + QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, + "some other line", + QOutProcessor.MASKED_VERTEX_KILLED_PATTERN), + lines); + } + + /** + * Vertex-killed deduplication must reset when a normal (masked) line interrupts + * the run of vertex-killed lines. + */ + @Test + public void testVertexKilledRunResetByMaskedLine() throws Exception { + // "Deleted something" starts with "Deleted" → gets replaced by MASK_PATTERN + File f = tmpFile( + QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, + "Deleted /tmp/something", // will be masked → MASK_PATTERN + QOutProcessor.MASKED_VERTEX_KILLED_PATTERN); + + qOutProcessor.maskPatterns(f.getAbsolutePath()); + + List lines = readLines(f); + // MASK_PATTERN lines fold duplicates; but here there is only one occurrence + Assert.assertEquals( + Arrays.asList( + QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, + QOutProcessor.MASK_PATTERN, + QOutProcessor.MASKED_VERTEX_KILLED_PATTERN), + lines); + } + @Test public void testSelectiveHdfsPatternMaskOnlyHdfsPath() { Assert.assertEquals("nothing to be masked", processLine("nothing to be masked")); @@ -77,4 +218,23 @@ public void testSelectiveHdfsPatternMaskOnlyHdfsPath() { private String processLine(String line) { return qOutProcessor.processLine(line).get(); } + + private File tmpFile(String... lines) throws Exception { + File f = tmpFolder.newFile(); + try (PrintWriter pw = new PrintWriter(f, "UTF-8")) { + for (String l : lines) { + pw.println(l); + } + } + return f; + } + + private List readLines(File f) throws Exception { + List all = Files.readAllLines(f.toPath(), StandardCharsets.UTF_8); + while (!all.isEmpty() && all.get(all.size() - 1).isEmpty()) { + all.remove(all.size() - 1); + } + return all; + } + } \ No newline at end of file