|
18 | 18 | */ |
19 | 19 | package org.apache.parquet; |
20 | 20 |
|
| 21 | +import static org.junit.Assert.assertEquals; |
21 | 22 | import static org.junit.Assert.assertFalse; |
22 | 23 | import static org.junit.Assert.assertTrue; |
23 | 24 |
|
24 | 25 | import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; |
| 26 | +import org.junit.Before; |
25 | 27 | import org.junit.Test; |
26 | 28 |
|
27 | 29 | public class CorruptStatisticsTest { |
28 | 30 |
|
| 31 | + @Before |
| 32 | + public void setUp() { |
| 33 | + CorruptStatistics.clearCache(); |
| 34 | + } |
| 35 | + |
29 | 36 | @Test |
30 | 37 | public void testOnlyAppliesToBinary() { |
31 | 38 | assertTrue(CorruptStatistics.shouldIgnoreStatistics( |
@@ -124,4 +131,49 @@ public void testDistributionCorruptStatistics() { |
124 | 131 | assertTrue(CorruptStatistics.shouldIgnoreStatistics( |
125 | 132 | "parquet-mr version 1.7.0 (build abcd)", PrimitiveTypeName.BINARY)); |
126 | 133 | } |
| 134 | + |
| 135 | + @Test |
| 136 | + public void testCachingBehavior() { |
| 137 | + assertEquals(0, CorruptStatistics.cacheSize()); |
| 138 | + |
| 139 | + // Call many times with the same createdBy — cache should store exactly one entry |
| 140 | + String createdBy = "parquet-mr version 1.6.0 (build cache-test)"; |
| 141 | + for (int i = 0; i < 100; i++) { |
| 142 | + CorruptStatistics.shouldIgnoreStatistics(createdBy, PrimitiveTypeName.BINARY); |
| 143 | + } |
| 144 | + assertEquals(1, CorruptStatistics.cacheSize()); |
| 145 | + |
| 146 | + // A different createdBy should add a second entry |
| 147 | + CorruptStatistics.shouldIgnoreStatistics("parquet-mr version 1.9.0 (build abcd)", PrimitiveTypeName.BINARY); |
| 148 | + assertEquals(2, CorruptStatistics.cacheSize()); |
| 149 | + } |
| 150 | + |
| 151 | + @Test |
| 152 | + public void testCorrectnessWhenCacheIsFull() { |
| 153 | + CorruptStatistics.clearCache(); |
| 154 | + |
| 155 | + // Fill cache to capacity with 64 distinct corrupt-version strings |
| 156 | + for (int i = 0; i < 64; i++) { |
| 157 | + assertTrue( |
| 158 | + "Corrupt version should be ignored", |
| 159 | + CorruptStatistics.shouldIgnoreStatistics( |
| 160 | + "parquet-mr version 1.6." + i + " (build x)", PrimitiveTypeName.BINARY)); |
| 161 | + } |
| 162 | + assertEquals(64, CorruptStatistics.cacheSize()); |
| 163 | + |
| 164 | + // 65th distinct string bypasses cache -- must still return correct result |
| 165 | + assertTrue( |
| 166 | + "Cache-bypass path must still return correct result for corrupt version", |
| 167 | + CorruptStatistics.shouldIgnoreStatistics( |
| 168 | + "parquet-mr version 1.6.99 (build bypass)", PrimitiveTypeName.BINARY)); |
| 169 | + |
| 170 | + // Non-corrupt version also returns correct result when cache is full |
| 171 | + assertFalse( |
| 172 | + "Non-corrupt version must return false even when cache is full", |
| 173 | + CorruptStatistics.shouldIgnoreStatistics( |
| 174 | + "parquet-mr version 1.10.0 (build bypass2)", PrimitiveTypeName.BINARY)); |
| 175 | + |
| 176 | + // Cache did not grow beyond cap |
| 177 | + assertEquals(64, CorruptStatistics.cacheSize()); |
| 178 | + } |
127 | 179 | } |
0 commit comments