diff --git a/gigamap/lucene/src/test/java/org/eclipse/store/gigamap/lucene/LuceneLifecycleTest.java b/gigamap/lucene/src/test/java/org/eclipse/store/gigamap/lucene/LuceneLifecycleTest.java new file mode 100644 index 00000000..e3a84cde --- /dev/null +++ b/gigamap/lucene/src/test/java/org/eclipse/store/gigamap/lucene/LuceneLifecycleTest.java @@ -0,0 +1,244 @@ +package org.eclipse.store.gigamap.lucene; + +/*- + * #%L + * EclipseStore GigaMap Lucene + * %% + * Copyright (C) 2023 - 2026 MicroStream Software + * %% + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + * #L% + */ + +import org.apache.lucene.document.Document; +import org.eclipse.store.gigamap.types.GigaMap; +import org.junit.jupiter.api.Test; + +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests the lifecycle of a {@link LuceneIndex} when used through {@link GigaMap}: + * close/reopen semantics, update propagation, manual commit, and concurrent access. + */ +public class LuceneLifecycleTest +{ + // ── shared entity ───────────────────────────────────────────────────────── + + private static class Article + { + final String title; + final String content; + + Article(final String title, final String content) + { + this.title = title; + this.content = content; + } + } + + private static class ArticlePopulator extends DocumentPopulator
+ { + @Override + public void populate(final Document document, final Article entity) + { + document.add(createTextField("title", entity.title)); + document.add(createTextField("content", entity.content)); + } + } + + private static LuceneContext
standardContext() + { + return LuceneContext.New(DirectoryCreator.ByteBuffers(), new ArticlePopulator()); + } + + private static class ManualCommitContext extends LuceneContext.Default
+ { + ManualCommitContext() + { + super(DirectoryCreator.ByteBuffers(), AnalyzerCreator.Standard(), new ArticlePopulator()); + } + + @Override + public boolean autoCommit() + { + return false; + } + } + + + // ── close / reopen ──────────────────────────────────────────────────────── + + @Test + void closeAndReopenGraphDirectoryRetainsData() + { + // null directoryCreator → GraphDirectory stores index data in fileEntries inside GigaMap. + // After close(), the fileEntries map is still in memory; lazyInit re-uses it. + final LuceneContext
ctx = LuceneContext.New(new ArticlePopulator()); + + final GigaMap
map = GigaMap.New(); + final LuceneIndex
idx = map.index().register(LuceneIndex.Category(ctx)); + + map.add(new Article("eclipse", "persistent")); + assertEquals(1, idx.query("title:eclipse").size()); + + idx.close(); + + assertEquals(1, idx.query("title:eclipse").size(), + "GraphDirectory index data must survive close/reopen without EmbeddedStorage"); + + idx.close(); + } + + + // ── update propagation ──────────────────────────────────────────────────── + + @Test + void setReplacesDocumentInIndex() + { + final GigaMap
map = GigaMap.New(); + try(final LuceneIndex
idx = map.index().register(LuceneIndex.Category(standardContext()))) + { + final long id = map.add(new Article("original title", "content")); + + assertEquals(1, idx.query("title:original").size()); + assertEquals(0, idx.query("title:replaced").size()); + + map.set(id, new Article("replaced title", "content")); + + assertEquals(0, idx.query("title:original").size(), + "Old document must be removed from Lucene index after set()"); + assertEquals(1, idx.query("title:replaced").size(), + "New document must be visible in Lucene index after set()"); + } + } + + + // ── manual commit ───────────────────────────────────────────────────────── + + @Test + void manualCommitWorksCorrectly() + { + final GigaMap
map = GigaMap.New(); + try(final LuceneIndex
idx = map.index().register( + LuceneIndex.Category(new ManualCommitContext()))) + { + map.add(new Article("eclipse", "manual commit test")); + + // NRT reader sees uncommitted changes immediately + assertEquals(1, idx.query("title:eclipse").size(), + "NRT reader must see changes before explicit commit"); + + // Explicit commit must work without errors and keep data visible + assertDoesNotThrow(idx::commit); + assertEquals(1, idx.query("title:eclipse").size(), + "Data must still be visible after explicit commit"); + } + } + + + // ── large corpus ────────────────────────────────────────────────────────── + + @Test + void largeCorpus1000Docs() + { + final GigaMap
map = GigaMap.New(); + try(final LuceneIndex
idx = map.index().register(LuceneIndex.Category(standardContext()))) + { + for(int i = 0; i < 1000; i++) + { + final String category = i % 2 == 0 ? "even" : "odd"; + map.add(new Article(category, "doc " + i)); + } + + final List
evens = idx.query("title:even", 1000); + final List
odds = idx.query("title:odd", 1000); + + assertEquals(500, evens.size(), "Expected 500 even-titled docs"); + assertEquals(500, odds.size(), "Expected 500 odd-titled docs"); + } + } + + + // ── concurrent add + search ─────────────────────────────────────────────── + + @Test + void concurrentAddAndSearch() throws InterruptedException + { + final GigaMap
map = GigaMap.New(); + try(final LuceneIndex
idx = map.index().register(LuceneIndex.Category(standardContext()))) + { + final int WRITE_THREADS = 4; + final int ADDS_PER_THREAD = 50; + final int READ_THREADS = 2; + final int READS_PER_READER = 20; + + final CountDownLatch writeLatch = new CountDownLatch(WRITE_THREADS); + final CountDownLatch readLatch = new CountDownLatch(READ_THREADS); + final AtomicInteger errors = new AtomicInteger(); + + for(int t = 0; t < WRITE_THREADS; t++) + { + final int threadId = t; + new Thread(() -> + { + try + { + for(int i = 0; i < ADDS_PER_THREAD; i++) + { + map.add(new Article("concurrent", "thread " + threadId + " item " + i)); + } + } + catch(final Exception e) + { + errors.incrementAndGet(); + } + finally + { + writeLatch.countDown(); + } + }).start(); + } + + for(int r = 0; r < READ_THREADS; r++) + { + new Thread(() -> + { + try + { + for(int i = 0; i < READS_PER_READER; i++) + { + idx.query("title:concurrent"); + } + } + catch(final Exception e) + { + errors.incrementAndGet(); + } + finally + { + readLatch.countDown(); + } + }).start(); + } + + assertTrue(writeLatch.await(30, TimeUnit.SECONDS), "Writers did not finish in time"); + assertTrue(readLatch.await(30, TimeUnit.SECONDS), "Readers did not finish in time"); + + assertEquals(0, errors.get(), "No concurrent errors expected"); + assertEquals(WRITE_THREADS * ADDS_PER_THREAD, (int) map.size()); + + final List
finalHits = idx.query("title:concurrent", WRITE_THREADS * ADDS_PER_THREAD); + assertEquals(WRITE_THREADS * ADDS_PER_THREAD, finalHits.size(), + "All added entities must be findable after concurrent writes complete"); + } + } +} diff --git a/gigamap/lucene/src/test/java/org/eclipse/store/gigamap/lucene/LuceneQueryTypesTest.java b/gigamap/lucene/src/test/java/org/eclipse/store/gigamap/lucene/LuceneQueryTypesTest.java new file mode 100644 index 00000000..9ab1ea51 --- /dev/null +++ b/gigamap/lucene/src/test/java/org/eclipse/store/gigamap/lucene/LuceneQueryTypesTest.java @@ -0,0 +1,314 @@ +package org.eclipse.store.gigamap.lucene; + +/*- + * #%L + * EclipseStore GigaMap Lucene + * %% + * Copyright (C) 2023 - 2026 MicroStream Software + * %% + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + * #L% + */ + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.ByteBuffersDirectory; +import org.apache.lucene.store.Directory; +import org.eclipse.store.gigamap.types.GigaMap; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests that various Lucene query types work correctly through the GigaMap-Lucene integration. + * These tests exercise the integration layer, not Lucene internals. + */ +public class LuceneQueryTypesTest +{ + // ── shared entity ───────────────────────────────────────────────────────── + + private static class Article + { + final String title; + final String content; + + Article(final String title, final String content) + { + this.title = title; + this.content = content; + } + } + + private static class ArticlePopulator extends DocumentPopulator
+ { + @Override + public void populate(final Document document, final Article entity) + { + document.add(createTextField("title", entity.title)); + document.add(createTextField("content", entity.content)); + } + } + + private static LuceneContext
standardContext() + { + return LuceneContext.New(DirectoryCreator.ByteBuffers(), new ArticlePopulator()); + } + + + // ── wildcard ────────────────────────────────────────────────────────────── + + @Test + void wildcardSuffixQuery() + { + final GigaMap
map = GigaMap.New(); + try(final LuceneIndex
idx = map.index().register(LuceneIndex.Category(standardContext()))) + { + map.add(new Article("eclipse store", "x")); + map.add(new Article("java runtime", "x")); + + final List
hits = idx.query("title:ecl*"); + + assertEquals(1, hits.size()); + assertEquals("eclipse store", hits.get(0).title); + } + } + + @Test + void wildcardLeadingQuery() + { + final GigaMap
map = GigaMap.New(); + try(final LuceneIndex
idx = map.index().register(LuceneIndex.Category(standardContext()))) + { + map.add(new Article("eclipse", "x")); + map.add(new Article("java", "x")); + + // Leading wildcards are enabled via setAllowLeadingWildcard(true) in LuceneIndex + final List
hits = idx.query("title:*lipse"); + + assertEquals(1, hits.size()); + assertEquals("eclipse", hits.get(0).title); + } + } + + + // ── phrase ──────────────────────────────────────────────────────────────── + + @Test + void phraseQuery() + { + final GigaMap
map = GigaMap.New(); + try(final LuceneIndex
idx = map.index().register(LuceneIndex.Category(standardContext()))) + { + map.add(new Article("T1", "eclipse store gigamap")); // adjacent — matches + map.add(new Article("T2", "eclipse foundation store")); // non-adjacent — no match + map.add(new Article("T3", "java runtime")); // unrelated + + final List
hits = idx.query("content:\"eclipse store\""); + + assertEquals(1, hits.size()); + assertEquals("T1", hits.get(0).title); + } + } + + + // ── boolean ─────────────────────────────────────────────────────────────── + + @Test + void booleanOrQuery() + { + final GigaMap
map = GigaMap.New(); + try(final LuceneIndex
idx = map.index().register(LuceneIndex.Category(standardContext()))) + { + map.add(new Article("alpha", "x")); + map.add(new Article("beta", "x")); + map.add(new Article("gamma", "x")); + + final Query orQuery = new BooleanQuery.Builder() + .add(new TermQuery(new Term("title", "alpha")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("title", "beta")), BooleanClause.Occur.SHOULD) + .build(); + + final List
hits = idx.query(orQuery); + assertEquals(2, hits.size()); + } + } + + @Test + void booleanMustNotQuery() + { + final GigaMap
map = GigaMap.New(); + try(final LuceneIndex
idx = map.index().register(LuceneIndex.Category(standardContext()))) + { + map.add(new Article("eclipse java", "x")); // has both "eclipse" and "java" + map.add(new Article("eclipse store", "x")); // has "eclipse", NOT "java" + + final Query query = new BooleanQuery.Builder() + .add(new TermQuery(new Term("title", "eclipse")), BooleanClause.Occur.MUST) + .add(new TermQuery(new Term("title", "java")), BooleanClause.Occur.MUST_NOT) + .build(); + + final List
hits = idx.query(query); + assertEquals(1, hits.size()); + assertEquals("eclipse store", hits.get(0).title); + } + } + + + // ── fuzzy ───────────────────────────────────────────────────────────────── + + @Test + void fuzzyQuery() + { + final GigaMap
map = GigaMap.New(); + try(final LuceneIndex
idx = map.index().register(LuceneIndex.Category(standardContext()))) + { + map.add(new Article("eclipse", "x")); + map.add(new Article("java", "x")); + + // "ecllipse" has Levenshtein distance 1 from "eclipse" (one extra 'l') + final List
hits = idx.query("title:ecllipse~1"); + + assertEquals(1, hits.size()); + assertEquals("eclipse", hits.get(0).title); + } + } + + + // ── score ordering ──────────────────────────────────────────────────────── + + @Test + void scoreOrderingByTermFrequency() + { + final GigaMap
map = GigaMap.New(); + try(final LuceneIndex
idx = map.index().register(LuceneIndex.Category(standardContext()))) + { + final long idHigh = map.add(new Article("eclipse eclipse eclipse", "x")); + final long idLow = map.add(new Article("eclipse", "x")); + + final LuceneSearchResult
result = idx.search("title:eclipse", 10); + final var entries = result.toList(); + + assertEquals(2, entries.size()); + assertTrue( + entries.get(0).score() >= entries.get(1).score(), + "Entry with higher term frequency must score at least as high" + ); + assertEquals(idHigh, entries.get(0).entityId(), + "Higher-frequency entry must appear first in score order"); + } + } + + + // ── custom AnalyzerCreator ──────────────────────────────────────────────── + + @Test + void customAnalyzerCreatorAffectsTokenization() + { + // StandardAnalyzer with "eclipse" as a custom stop word. + // The term "eclipse" is stripped at both index and query time. + final class CustomStopWordCreator extends AnalyzerCreator + { + @Override + public Analyzer createAnalyzer() + { + return new StandardAnalyzer(new CharArraySet(List.of("eclipse"), false)); + } + } + + final LuceneContext
ctx = LuceneContext.New( + DirectoryCreator.ByteBuffers(), + new CustomStopWordCreator(), + new ArticlePopulator() + ); + + final GigaMap
map = GigaMap.New(); + try(final LuceneIndex
idx = map.index().register(LuceneIndex.Category(ctx))) + { + map.add(new Article("eclipse store", "x")); + + assertTrue(idx.query("title:eclipse").isEmpty(), + "Custom stop word 'eclipse' must not be indexed"); + assertEquals(1, idx.query("title:store").size(), + "Non-stop-word term 'store' must still be indexed"); + } + } + + + // ── custom DirectoryCreator ─────────────────────────────────────────────── + + @Test + void customDirectoryCreatorIsInvoked() + { + final boolean[] invoked = {false}; + + final DirectoryCreator trackingCreator = new DirectoryCreator() + { + @Override + public Directory createDirectory() + { + invoked[0] = true; + return new ByteBuffersDirectory(); + } + }; + + final LuceneContext
ctx = LuceneContext.New(trackingCreator, new ArticlePopulator()); + + final GigaMap
map = GigaMap.New(); + try(final LuceneIndex
idx = map.index().register(LuceneIndex.Category(ctx))) + { + map.add(new Article("test", "custom directory")); + assertTrue(invoked[0], "Custom DirectoryCreator must be invoked on first index access"); + } + } + + + // ── numeric range ───────────────────────────────────────────────────────── + + @Test + void numericRangeQuery() + { + // content field is (ab)used to carry the year as a string for test simplicity + final class YearPopulator extends DocumentPopulator
+ { + @Override + public void populate(final Document document, final Article entity) + { + document.add(createTextField("title", entity.title)); + document.add(createIntField("year", Integer.parseInt(entity.content))); + } + } + + final LuceneContext
ctx = LuceneContext.New( + DirectoryCreator.ByteBuffers(), + new YearPopulator() + ); + + final GigaMap
map = GigaMap.New(); + try(final LuceneIndex
idx = map.index().register(LuceneIndex.Category(ctx))) + { + map.add(new Article("alpha", "2020")); + map.add(new Article("beta", "2021")); + map.add(new Article("gamma", "2022")); + map.add(new Article("delta", "2023")); + + final Query rangeQuery = IntPoint.newRangeQuery("year", 2021, 2022); + final List
hits = idx.query(rangeQuery); + + assertEquals(2, hits.size()); + } + } +}