apache · bharos · Apr 3, 2026 · Apr 3, 2026
diff --git a/api/src/main/java/org/apache/iceberg/expressions/StrictMetricsEvaluator.java b/api/src/main/java/org/apache/iceberg/expressions/StrictMetricsEvaluator.java
@@ -22,13 +22,15 @@
 
 import java.nio.ByteBuffer;
 import java.util.Collection;
+import java.util.Comparator;
 import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;
 import org.apache.iceberg.ContentFile;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.Schema;
 import org.apache.iceberg.expressions.ExpressionVisitors.BoundExpressionVisitor;
+import org.apache.iceberg.types.Comparators;
 import org.apache.iceberg.types.Conversions;
 import org.apache.iceberg.types.Types.StructType;
 import org.apache.iceberg.util.NaNUtil;
@@ -467,8 +469,36 @@ public <T> Boolean startsWith(BoundReference<T> ref, Literal<T> lit) {
 
     @Override
     public <T> Boolean notStartsWith(BoundReference<T> ref, Literal<T> lit) {
-      // TODO: Handle cases that definitely cannot match, such as notStartsWith("x") when the bounds
-      // are ["a", "b"].
+      int id = ref.fieldId();
+      if (isNestedColumn(id)) {
+        return ROWS_MIGHT_NOT_MATCH;
+      }
+
+      if (containsNullsOnly(id)) {
+        return ROWS_MUST_MATCH;
+      }
+
+      String prefix = (String) lit.value();
+      Comparator<CharSequence> comparator = Comparators.charSequences();
+
+      if (lowerBounds != null && lowerBounds.containsKey(id)) {
+        CharSequence lower = Conversions.fromByteBuffer(ref.type(), lowerBounds.get(id));
+        // truncate lower bound so that its length is not greater than the length of prefix
+        int length = Math.min(prefix.length(), lower.length());
+        if (comparator.compare(lower.subSequence(0, length), prefix) > 0) {
+          return ROWS_MUST_MATCH;
+        }
+      }
+
+      if (upperBounds != null && upperBounds.containsKey(id)) {
+        CharSequence upper = Conversions.fromByteBuffer(ref.type(), upperBounds.get(id));
+        // truncate upper bound so that its length is not greater than the length of prefix
+        int length = Math.min(prefix.length(), upper.length());
+        if (comparator.compare(upper.subSequence(0, length), prefix) < 0) {
+          return ROWS_MUST_MATCH;
+        }
+      }
+
       return ROWS_MIGHT_NOT_MATCH;
     }
 

diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java b/api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java
@@ -32,6 +32,7 @@
 import static org.apache.iceberg.expressions.Expressions.notIn;
 import static org.apache.iceberg.expressions.Expressions.notNaN;
 import static org.apache.iceberg.expressions.Expressions.notNull;
+import static org.apache.iceberg.expressions.Expressions.notStartsWith;
 import static org.apache.iceberg.expressions.Expressions.or;
 import static org.apache.iceberg.types.Conversions.toByteBuffer;
 import static org.apache.iceberg.types.Types.NestedField.optional;
@@ -172,6 +173,40 @@ public class TestStrictMetricsEvaluator {
           // upper bounds
           ImmutableMap.of(5, toByteBuffer(StringType.get(), "bbb")));
 
+  // String-focused file: required column 3 has no nulls and string bounds ["abc", "abd"]
+  private static final DataFile STRING_FILE =
+      new TestDataFile(
+          "string_file.avro",
+          Row.of(),
+          50,
+          // any value counts, including nulls
+          ImmutableMap.of(3, 50L),
+          // null value counts
+          ImmutableMap.of(),
+          // nan value counts
+          null,
+          // lower bounds
+          ImmutableMap.of(3, toByteBuffer(StringType.get(), "abc")),
+          // upper bounds
+          ImmutableMap.of(3, toByteBuffer(StringType.get(), "abd")));
+
+  // String file with wider range: required column 3 has no nulls and bounds ["aa", "dC"]
+  private static final DataFile STRING_FILE_2 =
+      new TestDataFile(
+          "string_file_2.avro",
+          Row.of(),
+          50,
+          // any value counts, including nulls
+          ImmutableMap.of(3, 50L),
+          // null value counts
+          ImmutableMap.of(),
+          // nan value counts
+          null,
+          // lower bounds
+          ImmutableMap.of(3, toByteBuffer(StringType.get(), "aa")),
+          // upper bounds
+          ImmutableMap.of(3, toByteBuffer(StringType.get(), "dC")));
+
   @Test
   public void testAllNulls() {
     boolean shouldRead = new StrictMetricsEvaluator(SCHEMA, notNull("all_nulls")).eval(FILE);
@@ -684,4 +719,84 @@ SCHEMA, lessThanOrEqual("struct.nested_col_with_stats", INT_MAX_VALUE))
         new StrictMetricsEvaluator(SCHEMA, notNull("struct.nested_col_with_stats")).eval(FILE);
     assertThat(shouldRead).as("notNull nested column should not match").isFalse();
   }
+
+  @Test
+  public void testNotStartsWithAllNulls() {
+    boolean shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, notStartsWith("all_nulls", "a")).eval(FILE);
+    assertThat(shouldRead).as("Should match: all null values satisfy notStartsWith").isTrue();
+  }
+
+  @Test
+  public void testNotStartsWithBoundsAbovePrefix() {
+    boolean shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, notStartsWith("required", "aaa")).eval(STRING_FILE);
+    assertThat(shouldRead).as("Should match: all values are above the prefix range").isTrue();
+  }
+
+  @Test
+  public void testNotStartsWithBoundsBelowPrefix() {
+    boolean shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, notStartsWith("required", "zzz")).eval(STRING_FILE);
+    assertThat(shouldRead).as("Should match: all values are below the prefix range").isTrue();
+  }
+
+  @Test
+  public void testNotStartsWithBoundsOverlapPrefix() {
+    boolean shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, notStartsWith("required", "ab")).eval(STRING_FILE);
+    assertThat(shouldRead).as("Should not match: bounds overlap the prefix range").isFalse();
+
+    shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, notStartsWith("required", "abc")).eval(STRING_FILE);
+    assertThat(shouldRead).as("Should not match: lower bound starts with the prefix").isFalse();
+  }
+
+  @Test
+  public void testNotStartsWithWiderRange() {
+    boolean shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, notStartsWith("required", "e")).eval(STRING_FILE_2);
+    assertThat(shouldRead).as("Should match: all values are below the prefix").isTrue();
+
+    shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, notStartsWith("required", "a")).eval(STRING_FILE_2);
+    assertThat(shouldRead).as("Should not match: lower bound starts with the prefix").isFalse();
+
+    shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, notStartsWith("required", "c")).eval(STRING_FILE_2);
+    assertThat(shouldRead).as("Should not match: prefix is within the bounds range").isFalse();
+  }
+
+  @Test
+  public void testNotStartsWithNoStats() {
+    boolean shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, notStartsWith("required", "a")).eval(FILE);
+    assertThat(shouldRead).as("Should not match: no bounds available for column").isFalse();
+  }
+
+  @Test
+  public void testNotStartsWithSomeNullsBoundsOutsidePrefix() {
+    boolean shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, notStartsWith("some_nulls", "zzz")).eval(FILE_2);
+    assertThat(shouldRead).as("Should match: all values are below the prefix").isTrue();
+
+    shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, notStartsWith("some_nulls", "aaa")).eval(FILE_2);
+    assertThat(shouldRead).as("Should match: all values are above the prefix").isTrue();
+  }
+
+  @Test
+  public void testNotStartsWithPrefixLongerThanBounds() {
+    boolean shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, notStartsWith("required", "aaaaaaa")).eval(STRING_FILE);
+    assertThat(shouldRead).as("Should match: all values are above the long prefix").isTrue();
+
+    shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, notStartsWith("required", "zzzzzzz")).eval(STRING_FILE);
+    assertThat(shouldRead).as("Should match: all values are below the long prefix").isTrue();
+
+    shouldRead =
+        new StrictMetricsEvaluator(SCHEMA, notStartsWith("required", "abcdef")).eval(STRING_FILE);
+    assertThat(shouldRead).as("Should not match: prefix overlaps with bound range").isFalse();
+  }
 }