Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public class QOutProcessor {

public static final String MASK_PATTERN = "#### A masked pattern was here ####";
public static final String PARTIAL_MASK_PATTERN = "#### A PARTIAL masked pattern was here ####";
public static final String MASKED_VERTEX_KILLED_PATTERN = "[Masked Vertex killed due to OTHER_VERTEX_FAILURE]";
private static final PatternReplacementPair MASK_STATS = new PatternReplacementPair(
Pattern.compile(" Num rows: [1-9][0-9]* Data size: [1-9][0-9]*"),
" Num rows: ###Masked### Data size: ###Masked###");
Expand Down Expand Up @@ -197,6 +198,7 @@ public void maskPatterns(String fname) throws Exception {
out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"));

boolean lastWasMasked = false;
boolean lastWasVertexKilled = false;

while (null != (line = in.readLine())) {
LineProcessingResult result = processLine(line);
Expand All @@ -209,10 +211,22 @@ public void maskPatterns(String fname) throws Exception {
lastWasMasked = true;
result.partialMaskWasMatched = false;
}
lastWasVertexKilled = false;
} else if (result.line.equals(MASKED_VERTEX_KILLED_PATTERN)) {
// Deduplicate consecutive standalone vertex-killed lines — the number of sibling
// vertices still alive when the kill propagates is non-deterministic.
if (!lastWasVertexKilled) {
out.write(result.line);
out.write("\n");
lastWasVertexKilled = true;
}
lastWasMasked = false;
result.partialMaskWasMatched = false;
} else {
out.write(result.line);
out.write("\n");
lastWasMasked = false;
lastWasVertexKilled = false;
result.partialMaskWasMatched = false;
}
}
Expand Down Expand Up @@ -350,7 +364,16 @@ private final static class PatternReplacementPair {
// We do not want the test to fail because of this.
ppm.add(new PatternReplacementPair(
Pattern.compile("Vertex killed, vertexName=(.*?),.*\\[\\1\\] killed\\/failed due to:OTHER_VERTEX_FAILURE\\]"),
"[Masked Vertex killed due to OTHER_VERTEX_FAILURE]"));
MASKED_VERTEX_KILLED_PATTERN));

// Collapse multiple consecutive embedded [Masked Vertex killed] tokens on the same line
// (the long FAILED: summary line repeats one token per killed vertex).
ppm.add(new PatternReplacementPair(Pattern.compile("(\\Q" + MASKED_VERTEX_KILLED_PATTERN + "\\E){2,}"),
MASKED_VERTEX_KILLED_PATTERN));

// The number of vertices killed when a DAG fails is a scheduling race condition —
// depends on how many sibling vertices are still running at the moment the kill propagates.
ppm.add(new PatternReplacementPair(Pattern.compile("killedVertices:[0-9]+"), "killedVertices:#Masked#"));

partialPlanMask = ppm.toArray(new PatternReplacementPair[ppm.size()]);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1020,7 +1020,19 @@ public QTestProcessExecResult checkCliDriverResults() throws Exception {
qTestResultProcessor.overwriteResults(f.getPath(), outFileName);
return QTestProcessExecResult.createWithoutOutput(0);
} else {
return qTestResultProcessor.executeDiffCommand(f.getPath(), outFileName, false);
// Apply the same masking pipeline to a temporary copy of the reference file so that
// non-deterministic values are normalized on both sides.
// This preserves backward compatibility with existing .q.out files that were written
// before the masking rules were introduced.
File maskedRef = new File(outFileName + ".masked_ref");
try {
FileUtils.copyFile(new File(outFileName), maskedRef);
qOutProcessor.maskPatterns(maskedRef.getPath());
return qTestResultProcessor.executeDiffCommand(f.getPath(), maskedRef.getPath(), false);
} finally {
maskedRef.delete();
new File(maskedRef.getPath() + ".orig").delete();
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,158 @@
*/
package org.apache.hadoop.hive.ql;

import java.io.File;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.List;

import org.apache.hadoop.hive.ql.QTestMiniClusters.FsType;
import org.apache.hadoop.hive.ql.qoption.QTestReplaceHandler;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

/**
* This class contains unit tests for QTestUtil
*/
public class TestQOutProcessor {
QOutProcessor qOutProcessor = new QOutProcessor(FsType.LOCAL, new QTestReplaceHandler());

@Rule
public TemporaryFolder tmpFolder = new TemporaryFolder();

/**
* A raw vertex-killed log line must be replaced with MASKED_VERTEX_KILLED_PATTERN.
*/
@Test
public void testVertexKilledLineIsReplaced() {
String raw = "Vertex killed, vertexName=Map 2, "
+ "diagnostics=[Task failed, taskAttemptId=attempt_1 "
+ "[Map 2] killed/failed due to:OTHER_VERTEX_FAILURE]";
Assert.assertEquals(QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, processLine(raw));
}

/**
* A line containing multiple embedded MASKED_VERTEX_KILLED_PATTERN tokens
* (produced after the first regex pass) must be collapsed to a single token.
*/
@Test
public void testMultipleEmbeddedVertexKilledTokensCollapsedOnSameLine() {
String twoTokens = QOutProcessor.MASKED_VERTEX_KILLED_PATTERN
+ QOutProcessor.MASKED_VERTEX_KILLED_PATTERN;
Assert.assertEquals(QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, processLine(twoTokens));

String threeTokens = QOutProcessor.MASKED_VERTEX_KILLED_PATTERN
+ QOutProcessor.MASKED_VERTEX_KILLED_PATTERN
+ QOutProcessor.MASKED_VERTEX_KILLED_PATTERN;
Assert.assertEquals(QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, processLine(threeTokens));
}

/**
* A single MASKED_VERTEX_KILLED_PATTERN token must be left unchanged by processLine.
*/
@Test
public void testSingleEmbeddedVertexKilledTokenUnchanged() {
Assert.assertEquals(
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
processLine(QOutProcessor.MASKED_VERTEX_KILLED_PATTERN));
}

/**
* killedVertices:<number> must be masked regardless of the numeric value.
*/
@Test
public void testKilledVerticesCountIsMasked() {
Assert.assertEquals("killedVertices:#Masked#", processLine("killedVertices:3"));
Assert.assertEquals("killedVertices:#Masked#", processLine("killedVertices:0"));
Assert.assertEquals("killedVertices:#Masked#", processLine("killedVertices:100"));
}

/**
* killedVertices masking should work when embedded in a longer line (e.g. FAILED: summary).
*/
@Test
public void testKilledVerticesCountIsMaskedInLongerLine() {
String input = "FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.tez."
+ "TezTask. killedVertices:2 Vertex re-run not supported in current execution mode.";
String output = processLine(input);
Assert.assertTrue("killedVertices:#Masked# must appear in output",
output.contains("killedVertices:#Masked#"));
Assert.assertFalse("raw killedVertices:2 must not appear in output",
output.contains("killedVertices:2"));
}

/**
* Multiple consecutive standalone MASKED_VERTEX_KILLED_PATTERN lines must be
* collapsed to a single line by maskPatterns().
*/
@Test
public void testConsecutiveVertexKilledLinesDeduplicatedInFile() throws Exception {
File f = tmpFile(
"line before",
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
"line after");

qOutProcessor.maskPatterns(f.getAbsolutePath());

List<String> lines = readLines(f);
Assert.assertEquals(
Arrays.asList("line before", QOutProcessor.MASKED_VERTEX_KILLED_PATTERN, "line after"),
lines);
}

/**
* Two separate (non-consecutive) vertex-killed blocks must each produce one line.
*/
@Test
public void testNonConsecutiveVertexKilledLinesKeptSeparately() throws Exception {
File f = tmpFile(
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
"some other line",
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN);

qOutProcessor.maskPatterns(f.getAbsolutePath());

List<String> lines = readLines(f);
Assert.assertEquals(
Arrays.asList(
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
"some other line",
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN),
lines);
}

/**
* Vertex-killed deduplication must reset when a normal (masked) line interrupts
* the run of vertex-killed lines.
*/
@Test
public void testVertexKilledRunResetByMaskedLine() throws Exception {
// "Deleted something" starts with "Deleted" → gets replaced by MASK_PATTERN
File f = tmpFile(
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
"Deleted /tmp/something", // will be masked → MASK_PATTERN
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN);

qOutProcessor.maskPatterns(f.getAbsolutePath());

List<String> lines = readLines(f);
// MASK_PATTERN lines fold duplicates; but here there is only one occurrence
Assert.assertEquals(
Arrays.asList(
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN,
QOutProcessor.MASK_PATTERN,
QOutProcessor.MASKED_VERTEX_KILLED_PATTERN),
lines);
}

@Test
public void testSelectiveHdfsPatternMaskOnlyHdfsPath() {
Assert.assertEquals("nothing to be masked", processLine("nothing to be masked"));
Expand Down Expand Up @@ -77,4 +218,23 @@ public void testSelectiveHdfsPatternMaskOnlyHdfsPath() {
private String processLine(String line) {
return qOutProcessor.processLine(line).get();
}

private File tmpFile(String... lines) throws Exception {
File f = tmpFolder.newFile();
try (PrintWriter pw = new PrintWriter(f, "UTF-8")) {
for (String l : lines) {
pw.println(l);
}
}
return f;
}

private List<String> readLines(File f) throws Exception {
List<String> all = Files.readAllLines(f.toPath(), StandardCharsets.UTF_8);
while (!all.isEmpty() && all.get(all.size() - 1).isEmpty()) {
all.remove(all.size() - 1);
}
return all;
}

}
Loading