From ada735e4f3929ddbaaf57ef3117490a45ca7f599 Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Wed, 15 Apr 2026 01:04:33 +0200
Subject: [PATCH 1/3] A more compact charindex

---
 .../java/org/unicode/text/tools/Indexer.java  | 153 ++++++++++++------
 .../org/unicode/text/tools/charindex.js       | 104 +++++++-----
 2 files changed, 167 insertions(+), 90 deletions(-)
diff --git a/unicodetools/src/main/java/org/unicode/text/tools/Indexer.java b/unicodetools/src/main/java/org/unicode/text/tools/Indexer.java
index eddfa21e6..26a7cfcbb 100644
--- a/unicodetools/src/main/java/org/unicode/text/tools/Indexer.java
+++ b/unicodetools/src/main/java/org/unicode/text/tools/Indexer.java
@@ -9,12 +9,14 @@
 import com.ibm.icu.text.Transliterator;
 import com.ibm.icu.text.UnicodeSet;
 import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Base64;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -26,6 +28,7 @@
 import java.util.TreeMap;
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
+import java.util.zip.DeflaterOutputStream;
 import org.unicode.props.IndexUnicodeProperties;
 import org.unicode.props.UcdProperty;
 import org.unicode.props.UcdPropertyValues;
@@ -38,6 +41,8 @@
 
 public class Indexer {
 
+    private static final char RECORD_SEPARATOR = 0x001E;
+
     private static Transliterator toHTML;
     private static String htmlRulesControls;
 
@@ -113,8 +118,8 @@ public class Indexer {
     }
 
     private static class IndexEntry {
-        IndexEntry(String snippet, UnicodeProperty property) {
-            this.snippet = snippet;
+        IndexEntry(int snippetIndex, UnicodeProperty property) {
+            this.snippetIndex = snippetIndex;
             this.property = property;
             characters = new UnicodeSet();
         }
@@ -127,12 +132,12 @@ List<IndexSubEntry> subEntries() {
                         /* showName= */ property != NAME,
                         characters);
             } catch (Exception e) {
-                System.err.println("In entry for " + property.getName() + ": " + snippet);
+                System.err.println("In entry for " + property.getName() + ": " + snippetIndex);
                 throw e;
             }
         }
 
-        String snippet;
+        int snippetIndex;
         UnicodeProperty property;
         UnicodeSet characters;
         Map<String, UnicodeSet> relatedCharacters = new TreeMap<>();
@@ -258,11 +263,14 @@ public int compare(UnicodeProperty left, UnicodeProperty right) {
                 return left.getName().compareTo(right.getName());
             }
         }
-        // Property to property value to index entry.
-        Map<UnicodeProperty, Map<String, IndexEntry>> indexEntries =
+        final StringBuilder allTheStrings = new StringBuilder();
+        final HashMap<String, Integer> stringIndices = new HashMap<>();
+        // Property to snippet based on property value (as an index in allTheStrings) to index
+        // entry.
+        Map<UnicodeProperty, Map<Integer, IndexEntry>> indexEntries =
                 new TreeMap<>(new PropertyComparator());
-        // Lemma to snippet to position of the word in the snippet.
-        Map<String, Map<String, Integer>> wordIndex = new TreeMap<>();
+        // Lemma to snippet (as an index in allTheStrings) to position of the word in the snippet.
+        Map<String, Map<Integer, Integer>> wordIndex = new TreeMap<>();
         final var properties =
                 List.of(
                         BLOCK,
@@ -292,10 +300,14 @@ public int compare(UnicodeProperty left, UnicodeProperty right) {
                     } else if (prop == NAME) {
                         snippet = snippet.replace(Utility.hex(cp), "#");
                     }
-                    // Copy the snippet to a final variable for use in the λ.
-                    final String indexSnippet = snippet;
+                    final int snippetIndex =
+                            stringIndices.getOrDefault(snippet, allTheStrings.length());
+                    if (snippetIndex == allTheStrings.length()) {
+                        allTheStrings.append(snippet).append(RECORD_SEPARATOR);
+                        stringIndices.put(snippet, snippetIndex);
+                    }
                     propertyIndex
-                            .computeIfAbsent(snippet, k -> new IndexEntry(indexSnippet, prop))
+                            .computeIfAbsent(snippetIndex, k -> new IndexEntry(k, prop))
                             .characters
                             .add(cp);
                     // Override word breaking of ' and - in appropriate contexts so that
@@ -313,11 +325,11 @@ public int compare(UnicodeProperty left, UnicodeProperty right) {
                             String lemma = lemmatize(word);
                             wordIndex
                                     .computeIfAbsent(fold(word), k -> new TreeMap<>())
-                                    .putIfAbsent(snippet, start);
+                                    .putIfAbsent(snippetIndex, start);
                             if (!lemma.equals(fold(word))) {
                                 wordIndex
                                         .computeIfAbsent(lemma, k -> new TreeMap<>())
-                                        .putIfAbsent(snippet, start);
+                                        .putIfAbsent(snippetIndex, start);
                             }
                         }
                     }
@@ -327,18 +339,22 @@ public int compare(UnicodeProperty left, UnicodeProperty right) {
                 System.out.println("Indexed plane " + cp / 0x10000);
             }
         }
+        final int bettyIndex = allTheStrings.length();
+        allTheStrings.append("Betty").append(RECORD_SEPARATOR);
+        final int theIndex = allTheStrings.length();
+        allTheStrings.append("the").append(RECORD_SEPARATOR);
         indexEntries
                 .get(BLOCK)
-                .computeIfAbsent("Betty", k -> new IndexEntry(k, BLOCK))
+                .computeIfAbsent(bettyIndex, k -> new IndexEntry(k, BLOCK))
                 .characters
                 .add(BOOP);
         indexEntries
                 .get(BLOCK)
-                .computeIfAbsent("the", k -> new IndexEntry(k, BLOCK))
+                .computeIfAbsent(theIndex, k -> new IndexEntry(k, BLOCK))
                 .characters
                 .add(DOOD);
-        wordIndex.computeIfAbsent("betty", k -> new TreeMap<>()).putIfAbsent("Betty", 0);
-        wordIndex.computeIfAbsent("the", k -> new TreeMap<>()).putIfAbsent("the", 0);
+        wordIndex.computeIfAbsent("betty", k -> new TreeMap<>()).putIfAbsent(bettyIndex, 0);
+        wordIndex.computeIfAbsent("the", k -> new TreeMap<>()).putIfAbsent(theIndex, 0);
 
         System.out.println("Radicals…");
         final var radicalSets = getRadicalSets();
@@ -379,7 +395,11 @@ public int compare(UnicodeProperty left, UnicodeProperty right) {
                 }
                 css.close();
             } else if (htmlLine.contains("JS HERE")) {
-                file.println("let wordIndex = new Map([");
+                // No pretty-printing in the loops that print these two maps; each space or newline
+                // here enlarges charindex.html by hundreds of kilobytes.  These are not suitable
+                // for human consumption anyway, since anything readable is turned into indices in
+                // allTheStrings.
+                file.print("let wordIndex = new Map([");
                 System.out.println("wordIndex...");
                 {
                     int i = 0;
@@ -387,53 +407,88 @@ public int compare(UnicodeProperty left, UnicodeProperty right) {
                         if (++i % 1000 == 0) {
                             System.out.println(i + "/" + wordIndex.size() + "...");
                         }
-                        file.println(
-                                "    ['"
+                        file.print(
+                                "['"
                                         + wordAndSnippets.getKey().replace("'", "\\'")
-                                        + "', new Map([");
-                        for (var snippetAndPosition : wordAndSnippets.getValue().entrySet()) {
-                            file.println(
-                                    "      ['"
-                                            + snippetAndPosition.getKey().replace("'", "\\'")
-                                            + "', "
-                                            + snippetAndPosition.getValue()
-                                            + "],");
-                        }
-                        file.println("])],");
+                                        + "',new Map([");
+                        // Stream and collect for the innermost map to avoid trailing commas, for
+                        // size.
+                        file.print(
+                                wordAndSnippets.getValue().entrySet().stream()
+                                        .map(
+                                                snippetAndPosition ->
+                                                        "["
+                                                                + snippetAndPosition.getKey()
+                                                                + ","
+                                                                + snippetAndPosition.getValue()
+                                                                + "]")
+                                        .collect(Collectors.joining(",")));
+                        file.print("])],");
                     }
                 }
                 file.println("]);");
                 System.out.println("indexEntries...");
-                file.println("let indexEntries = new Map([");
+                file.print("let indexEntries = new Map([");
                 for (var property : properties) {
                     System.out.println(property.getName() + "...");
                     final var propertyIndex = indexEntries.get(property);
-                    file.println("  ['" + property.getName() + "', new Map([");
+                    file.print("['" + property.getName() + "',new Map([");
                     int i = 0;
                     for (var indexEntry : propertyIndex.values()) {
                         if (++i % 1000 == 0) {
                             System.out.println(i + "/" + propertyIndex.size() + "...");
                         }
-                        file.println("    ['" + indexEntry.snippet.replace("'", "\\'") + "', {");
-                        file.println(
-                                "       html: \""
-                                        + indexEntry.toHTML().replace("\"", "\\\"")
-                                        + "\",");
-                        file.println("       characters: [");
-                        for (var range : indexEntry.coveredCharacters().ranges()) {
-                            file.println(
-                                    "         [0x"
-                                            + Utility.hex(range.codepoint)
-                                            + ", 0x"
-                                            + Utility.hex(range.codepointEnd)
-                                            + "],");
-                        }
-                        file.println("      ],");
-                        file.println("    }],");
+                        final int htmlIndex = allTheStrings.length();
+                        allTheStrings.append(indexEntry.toHTML()).append(RECORD_SEPARATOR);
+                        file.print("[" + indexEntry.snippetIndex + ",{");
+                        file.print("html:" + htmlIndex + ",");
+                        file.print("characters:[");
+                        // Stream and collect for the innermost array to avoid trailing commas, for
+                        // size.
+                        file.print(
+                                indexEntry
+                                        .coveredCharacters()
+                                        .rangeStream()
+                                        .map(
+                                                range ->
+                                                        // Code points in decimal without
+                                                        // zero-padding for size.
+                                                        "["
+                                                                + range.codepoint
+                                                                + ","
+                                                                + range.codepointEnd
+                                                                + "]")
+                                        .collect(Collectors.joining(",")));
+                        file.print("]}],");
                     }
-                    file.println("  ])],");
+                    file.print("])],");
                 }
                 file.println("]);");
+                file.println("let bettyIndex = " + bettyIndex + ";");
+                file.println("let theIndex = " + theIndex + ";");
+                final var compressed = new ByteArrayOutputStream();
+                final var compressor = new DeflaterOutputStream(compressed);
+                final var uncompressed = allTheStrings.toString().getBytes("UTF-8");
+                compressor.write(uncompressed);
+                compressor.close();
+                final var compressedBytes = compressed.toByteArray();
+                System.out.println(
+                        "Strings compressed from "
+                                + (uncompressed.length >> 20)
+                                + " MiB to "
+                                + (compressedBytes.length >> 10)
+                                + " kiB ("
+                                + 100 * compressedBytes.length / uncompressed.length
+                                + "%)");
+                System.out.println(
+                        "Compressed payload is "
+                                + compressedBytes.length
+                                + " bytes, first byte is "
+                                + Byte.toUnsignedInt(compressedBytes[0]));
+                file.println(
+                        "let allTheStringsCompressed = '"
+                                + Base64.getEncoder().encodeToString(compressedBytes)
+                                + "'");
                 final var js =
                         new BufferedReader(new FileReader(new File(resources + "charindex.js")));
                 for (String jsLine = js.readLine(); jsLine != null; jsLine = js.readLine()) {
diff --git a/unicodetools/src/main/resources/org/unicode/text/tools/charindex.js b/unicodetools/src/main/resources/org/unicode/text/tools/charindex.js
index 4adbcdf44..eca405322 100644
--- a/unicodetools/src/main/resources/org/unicode/text/tools/charindex.js
+++ b/unicodetools/src/main/resources/org/unicode/text/tools/charindex.js
@@ -1,15 +1,28 @@
-// Lemma to snippet to position of the word in the snippet.
-/**@type {Map<string, Map<String, number>>}*/
+// Lemma to snippet (compressed) to position of the word in the snippet.
+/**@type {Map<string, Map<number, number>>}*/
 let wordIndex/*= GENERATED LINE*/;
-// Property name to snippet to index entry.
-/**@type {Map<string, Map<string, {html: string, characters: [number, number][]}>>}*/
+// Property name to snippet (compressed) to index entry; the html is compressed.
+/**@type {Map<string, Map<number, {html: number, characters: [number, number][]}>>}*/
 let indexEntries/*= GENERATED LINE*/;
+/**@type {number}*/
+let bettyIndex/*= GENERATED LINE*/;
+/**@type {number}*/
+let theIndex/*= GENERATED LINE*/;
+/**@type {string}*/
+let allTheStringsCompressed/*= GENERATED LINE*/;
+let decompressor = new DecompressionStream("deflate");
+/**@type {string}*/
+var allTheStrings;
+new Response(
+  new Blob([Uint8Array.fromBase64(allTheStringsCompressed)])
+      .stream().pipeThrough(decompressor))
+    .text().then(s => allTheStrings = s);
 
-/**@type {Map<number, string>}*/
+/**@type {Map<number, number>}*/
 let characterNames = new Map();
-/**@type {Map<[number, number], {property: string, snippet: string}>}*/
+/**@type {Map<[number, number], {property: string, snippetIndex: number}>}*/
 let radicalStrokeRanges = new Map();
-/**@type {Map<[number, number], string>}*/
+/**@type {Map<[number, number], number>}*/
 let characterNameRanges = new Map();
 
 let maxResults = 100;
@@ -18,9 +31,9 @@ for (let [property, propertyIndex] of indexEntries) {
   if (!property.endsWith("RSUnicode") && property !== "kSEAL_Rad") {
     continue;
   }
-  for (let [snippet, entry] of propertyIndex) {
+  for (let [snippetIndex, entry] of propertyIndex) {
     for (let range of entry.characters) {
-      radicalStrokeRanges.set(range, {property, snippet});
+      radicalStrokeRanges.set(range, {property, snippetIndex});
     }
   }
 }
@@ -40,6 +53,12 @@ for (let [name, entry] of indexEntries.get("Name_Alias")) {
   }
 }
 
+function getString(/**@type {number}*/ start) {
+  let RECORD_SEPARATOR = "\x1E";
+  let limit = allTheStrings.indexOf(RECORD_SEPARATOR, start);
+  return allTheStrings.substring(start, limit);
+}
+
 function updateQuery(event) {
   if(event.key === 'Enter') {
     let newURL = window.location.protocol + "//" + window.location.host + window.location.pathname
@@ -76,47 +95,49 @@ function search(/**@type {string}*/ query) {
   var covered = [];
   /**@type {string[]}*/
   var result = [];
-  /**@type {Set<string>}*/
-  var resultSnippets = new Set(wordIndex.get(foldedQuery[0])?.keys() ?? []);
+  /**@type {Set<number>}*/
+  var resultSnippetIndices = new Set(wordIndex.get(foldedQuery[0])?.keys() ?? []);
   let firstLemmata = [foldedQuery[0]];
-  if (resultSnippets.size === 0 && foldedQuery.length == 1) {
+  if (resultSnippetIndices.size === 0 && foldedQuery.length == 1) {
     let prefix = fold(queryWords.at(-1));
-    for (let [completion, leaves] of wordIndex) {
+    for (let [completion, snippets] of wordIndex) {
       if (completion.startsWith(prefix)) {
         firstLemmata.push(completion);
-        resultSnippets = resultSnippets.union(leaves);
+        resultSnippetIndices = resultSnippetIndices.union(snippets);
       }
     }
   }
   for (var i = 1; i < foldedQuery.length; ++i) {
     var rhs = new Set(wordIndex.get(foldedQuery[i])?.keys() ?? []);
-    let intersection = resultSnippets.intersection(rhs);
+    let intersection = resultSnippetIndices.intersection(rhs);
     if (intersection.size === 0 && i == foldedQuery.length - 1) {
       let prefix = fold(queryWords.at(-1));
-      for (let [completion, leaves] of wordIndex) {
+      for (let [completion, snippets] of wordIndex) {
         if (completion.startsWith(prefix)) {
-          rhs = rhs.union(leaves);
+          rhs = rhs.union(snippets);
         }
       }
-      resultSnippets = resultSnippets.intersection(rhs);
+      resultSnippetIndices = resultSnippetIndices.intersection(rhs);
     } else {
-      resultSnippets = intersection;
+      resultSnippetIndices = intersection;
     }
   }
   let pivots = firstLemmata.map(l => wordIndex.get(l)).filter(x => !!x);
-  let getPivot = (/**@type {string}*/s) => pivots.map(p => p.get(s)).filter(x => x !== undefined)[0];
+  let getPivot = (/**@type {number}*/s) => pivots.map(p => p.get(s)).filter(x => x !== undefined)[0];
   let collator = new Intl.Collator("en");
-  resultSnippets = Array.from(resultSnippets).sort(
+  let sortKeys = new Map(Array.from(resultSnippetIndices).map(
+    i => {
+      let snippet = getString(i);
+      return [i, snippet.substring(getPivot(i)) + ' \uFFFE ' +
+                     snippet.substring(0, getPivot(i))];
+    }));
+  let sortedSnippetIndices = Array.from(resultSnippetIndices).sort(
     (left, right) => collator.compare(
-      left.substring(getPivot(left)) +
-                      ' \uFFFE ' +
-                      left.substring(0, getPivot(left)),
-      right.substring(getPivot(right)) +
-                      ' \uFFFE ' +
-                      right.substring(0, getPivot(right))));
+      sortKeys.get(left),
+      sortKeys.get(right)));
   for (let propertyIndex of indexEntries.values()) {
-    for (let snippet of resultSnippets) {
-      let entry = propertyIndex.get(snippet);
+    for (let snippetIndex of sortedSnippetIndices) {
+      let entry = propertyIndex.get(snippetIndex);
       if (!entry) {
         continue;
       }
@@ -126,9 +147,10 @@ function search(/**@type {string}*/ query) {
       }
       rangeCount += entrySet.length;
       covered = covered.concat(entrySet);
-      let pivot = getPivot(snippet);
+      let pivot = getPivot(snippetIndex);
+      let snippet = getString(snippetIndex);
       let tail = snippet.substring(pivot);
-      result.push(entry.html.replace(
+      result.push(getString(entry.html).replace(
         "[RESULT TEXT]",
         "<span class=tail" +
         (snippet.includes(",") ? " style=width:100%" : "") + ">" +
@@ -156,17 +178,17 @@ function search(/**@type {string}*/ query) {
       var name = characterNames.get(cp);
       var rs = null;
       if (!name) {
-        for (let [[first, last], {property, snippet}] of radicalStrokeRanges) {
+        for (let [[first, last], {property, snippetIndex}] of radicalStrokeRanges) {
           if (first <= cp && cp <= last) {
-            rs = {property, snippet};
+            rs = {property, snippetIndex};
             break;
           }
         }
         if (rs) {
-          rangeCount += indexEntries.get(rs.property).get(rs.snippet).characters.length;
+          rangeCount += indexEntries.get(rs.property).get(rs.snippetIndex).characters.length;
           result.push(
-            indexEntries.get(rs.property).get(rs.snippet).html.replace(
-            "[RESULT TEXT]", toHTML(rs.snippet)));
+            getString(indexEntries.get(rs.property).get(rs.snippetIndex).html).replace(
+            "[RESULT TEXT]", toHTML(getString(rs.snippetIndex))));
         } else {
           for (let [[first, last], n] of characterNameRanges) {
             if (first <= cp && cp <= last) {
@@ -179,20 +201,20 @@ function search(/**@type {string}*/ query) {
       if (name) {
         rangeCount += 1;
         result.push(
-          (indexEntries.get("Name").get(name) ??
-          indexEntries.get("Name_Alias").get(name)).html.replace(
-          "[RESULT TEXT]", toHTML(name)));
+          getString(indexEntries.get("Name").get(name) ??
+                    indexEntries.get("Name_Alias").get(name).html).replace(
+          "[RESULT TEXT]", toHTML(getString(name))));
       }
     }
     if (/^boop$/i.test(query)) {
         rangeCount += 1;
       result.push(
-        indexEntries.get("Block").get("Betty").html.replace(
+        getString(indexEntries.get("Block").get(bettyIndex).html).replace(
         "[RESULT TEXT]", toHTML("Betty")));
     } else if (/^dood$/i.test(query)) {
         rangeCount += 1;
         result.push(
-          indexEntries.get("Block").get("the").html.replace(
+          getString(indexEntries.get("Block").get(theIndex).html).replace(
           "[RESULT TEXT]", toHTML("the")));
     }
   }

From 78f09073f1c32e4acad048fe4c50e49b3444d7ee Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Wed, 15 Apr 2026 03:39:48 +0200
Subject: [PATCH 2/3] Save another 433 kiB

---
 .../java/org/unicode/text/tools/Indexer.java  |  6 ++++--
 .../org/unicode/text/tools/charindex.js       | 21 +++++++++++--------
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/unicodetools/src/main/java/org/unicode/text/tools/Indexer.java b/unicodetools/src/main/java/org/unicode/text/tools/Indexer.java
index 26a7cfcbb..81b7f745f 100644
--- a/unicodetools/src/main/java/org/unicode/text/tools/Indexer.java
+++ b/unicodetools/src/main/java/org/unicode/text/tools/Indexer.java
@@ -455,8 +455,10 @@ public int compare(UnicodeProperty left, UnicodeProperty right) {
                                                         // zero-padding for size.
                                                         "["
                                                                 + range.codepoint
-                                                                + ","
-                                                                + range.codepointEnd
+                                                                + (range.codepointEnd
+                                                                                != range.codepoint
+                                                                        ? "," + range.codepointEnd
+                                                                        : "")
                                                                 + "]")
                                         .collect(Collectors.joining(",")));
                         file.print("]}],");
diff --git a/unicodetools/src/main/resources/org/unicode/text/tools/charindex.js b/unicodetools/src/main/resources/org/unicode/text/tools/charindex.js
index eca405322..3171fa324 100644
--- a/unicodetools/src/main/resources/org/unicode/text/tools/charindex.js
+++ b/unicodetools/src/main/resources/org/unicode/text/tools/charindex.js
@@ -2,7 +2,7 @@
 /**@type {Map<string, Map<number, number>>}*/
 let wordIndex/*= GENERATED LINE*/;
 // Property name to snippet (compressed) to index entry; the html is compressed.
-/**@type {Map<string, Map<number, {html: number, characters: [number, number][]}>>}*/
+/**@type {Map<string, Map<number, {html: number, characters: ([number]|[number,number])[]}>>}*/
 let indexEntries/*= GENERATED LINE*/;
 /**@type {number}*/
 let bettyIndex/*= GENERATED LINE*/;
@@ -39,7 +39,7 @@ for (let [property, propertyIndex] of indexEntries) {
 }
 
 for (let [name, entry] of indexEntries.get("Name")) {
-  if (entry.characters[0][0] == entry.characters[0][1]) {
+  if (entry.characters[0][0] == entry.characters[0].at(-1)) {
     characterNames.set(entry.characters[0][0], name);
   } else {
     for (let range of entry.characters) {
@@ -227,7 +227,8 @@ function toHTML(/**@type {string}*/ plain) {
               .replaceAll(">", "&gt;")
 }
 
-function superset(/**@type {[number, number][]}*/left, /**@type {[number, number][]}*/right) {
+function superset(/**@type {([number, number]|[number])[]}*/left,
+                  /**@type {([number, number]|[number])[]}*/right) {
   var remaining = right.slice();
   for (containingRange of left) {
     remaining = remaining.flatMap(r => rangeMinus(r, containingRange));
@@ -238,7 +239,8 @@ function superset(/**@type {[number, number][]}*/left, /**@type {[number, number
   return true;
 }
 
-function rangeMinus(/**@type {[number, number]}*/left, /**@type {[number, number]}*/right) {
+function rangeMinus(/**@type {[number, number]|[number]}*/left,
+                    /**@type {[number, number]|[number]}*/right) {
   let intersection = rangeIntersection(left, right);
   if (intersection === left || intersection === right) {
     return [];
@@ -250,16 +252,17 @@ function rangeMinus(/**@type {[number, number]}*/left, /**@type {[number, number
     if (left[0] < intersection[0]) {
       result.push([left[0], intersection[0] - 1]);
     }
-    if (left[1] > intersection[1]) {
-      result.push([intersection[1] + 1, left[1] - 1]);
+    if (left.at(-1) > intersection.at(-1)) {
+      result.push([intersection.at(-1) + 1, left.at(-1) - 1]);
     }
     return result;
   }
 }
 
-function rangeIntersection(/**@type {[number, number]}*/left, /**@type {[number, number]}*/right) {
-  let [leftStart, leftEnd] = left;
-  let [rightStart, rightEnd] = right;
+function rangeIntersection(/**@type {[number, number]|[number]}*/left,
+                           /**@type {[number, number]|[number]}*/right) {
+  let [leftStart, leftEnd] = [left[0], left.at(-1)];
+  let [rightStart, rightEnd] = [right[0], right.at(-1)];
   if (leftEnd < rightStart || rightEnd < leftStart) {
     return null;
   } else {

From 07ad58533aba5f07b31db8ff25830ab949b494cb Mon Sep 17 00:00:00 2001
From: Robin Leroy <egg.robin.leroy@gmail.com>
Date: Mon, 20 Apr 2026 17:54:09 +0200
Subject: [PATCH 3/3] helper class

---
 .../java/org/unicode/text/tools/Indexer.java  | 40 ++++++++++++-------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/unicodetools/src/main/java/org/unicode/text/tools/Indexer.java b/unicodetools/src/main/java/org/unicode/text/tools/Indexer.java
index 81b7f745f..712334b57 100644
--- a/unicodetools/src/main/java/org/unicode/text/tools/Indexer.java
+++ b/unicodetools/src/main/java/org/unicode/text/tools/Indexer.java
@@ -87,6 +87,27 @@ public class Indexer {
 
     private static int maxRSEntryCharacters = 0;
 
+    private static class StringIndexer {
+        public StringIndexer() {}
+
+        public int getStringIndex(String s) {
+            int result = stringIndices.getOrDefault(s, allTheStrings.length());
+            if (result == allTheStrings.length()) {
+                allTheStrings.append(s).append(RECORD_SEPARATOR);
+                stringIndices.put(s, result);
+            }
+            return result;
+        }
+
+        @Override
+        public String toString() {
+            return allTheStrings.toString();
+        }
+
+        private final HashMap<String, Integer> stringIndices = new HashMap<>();
+        private final StringBuilder allTheStrings = new StringBuilder();
+    }
+
     static {
         String baseRules =
                 "'<' > '&lt;' ;"
@@ -263,8 +284,7 @@ public int compare(UnicodeProperty left, UnicodeProperty right) {
                 return left.getName().compareTo(right.getName());
             }
         }
-        final StringBuilder allTheStrings = new StringBuilder();
-        final HashMap<String, Integer> stringIndices = new HashMap<>();
+        final var allTheStrings = new StringIndexer();
         // Property to snippet based on property value (as an index in allTheStrings) to index
         // entry.
         Map<UnicodeProperty, Map<Integer, IndexEntry>> indexEntries =
@@ -300,12 +320,7 @@ public int compare(UnicodeProperty left, UnicodeProperty right) {
                     } else if (prop == NAME) {
                         snippet = snippet.replace(Utility.hex(cp), "#");
                     }
-                    final int snippetIndex =
-                            stringIndices.getOrDefault(snippet, allTheStrings.length());
-                    if (snippetIndex == allTheStrings.length()) {
-                        allTheStrings.append(snippet).append(RECORD_SEPARATOR);
-                        stringIndices.put(snippet, snippetIndex);
-                    }
+                    final int snippetIndex = allTheStrings.getStringIndex(snippet);
                     propertyIndex
                             .computeIfAbsent(snippetIndex, k -> new IndexEntry(k, prop))
                             .characters
@@ -339,10 +354,8 @@ public int compare(UnicodeProperty left, UnicodeProperty right) {
                 System.out.println("Indexed plane " + cp / 0x10000);
             }
         }
-        final int bettyIndex = allTheStrings.length();
-        allTheStrings.append("Betty").append(RECORD_SEPARATOR);
-        final int theIndex = allTheStrings.length();
-        allTheStrings.append("the").append(RECORD_SEPARATOR);
+        final int bettyIndex = allTheStrings.getStringIndex("Betty");
+        final int theIndex = allTheStrings.getStringIndex("the");
         indexEntries
                 .get(BLOCK)
                 .computeIfAbsent(bettyIndex, k -> new IndexEntry(k, BLOCK))
@@ -438,8 +451,7 @@ public int compare(UnicodeProperty left, UnicodeProperty right) {
                         if (++i % 1000 == 0) {
                             System.out.println(i + "/" + propertyIndex.size() + "...");
                         }
-                        final int htmlIndex = allTheStrings.length();
-                        allTheStrings.append(indexEntry.toHTML()).append(RECORD_SEPARATOR);
+                        final int htmlIndex = allTheStrings.getStringIndex(indexEntry.toHTML());
                         file.print("[" + indexEntry.snippetIndex + ",{");
                         file.print("html:" + htmlIndex + ",");
                         file.print("characters:[");