From a37ad7a285c424be99fdab2199dff747f23d77f4 Mon Sep 17 00:00:00 2001 From: Christian Trefzer Date: Wed, 18 Oct 2023 16:28:21 +0200 Subject: [PATCH 01/10] ExternalizableBean: drop redundant 'class' from log --- dump/src/util/dump/ExternalizableBean.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dump/src/util/dump/ExternalizableBean.java b/dump/src/util/dump/ExternalizableBean.java index 536646a..129b8e2 100644 --- a/dump/src/util/dump/ExternalizableBean.java +++ b/dump/src/util/dump/ExternalizableBean.java @@ -792,7 +792,7 @@ default void readExternal( ObjectInput in ) throws IOException, ClassNotFoundExc if ( f != null ) { f.set(this, o); } - // throw new IllegalArgumentException("The field type " + fieldTypes[i] + " in class " + getClass() + // throw new IllegalArgumentException("The field type " + fieldTypes[i] + " in " + getClass() // + " is unsupported by util.dump.ExternalizableBean."); } } @@ -802,7 +802,7 @@ default void readExternal( ObjectInput in ) throws IOException, ClassNotFoundExc throw e; } catch ( Throwable e ) { - throw new RuntimeException("Failed to read externalized instance. Maybe the field order was changed? class " + getClass(), e); + throw new RuntimeException("Failed to read externalized instance. Maybe the field order was changed? " + getClass(), e); } } From e24fa7562a3ee1d044eb20e2b34724fb32b03a29 Mon Sep 17 00:00:00 2001 From: Christian Trefzer Date: Fri, 13 Oct 2023 14:38:30 +0200 Subject: [PATCH 02/10] UniqueIndex: extract interface --- dump/src/util/dump/UniqueConstraint.java | 19 +++++++++++++++++++ dump/src/util/dump/UniqueIndex.java | 13 +++++++++---- 2 files changed, 28 insertions(+), 4 deletions(-) create mode 100644 dump/src/util/dump/UniqueConstraint.java diff --git a/dump/src/util/dump/UniqueConstraint.java b/dump/src/util/dump/UniqueConstraint.java new file mode 100644 index 0000000..e21b6b5 --- /dev/null +++ b/dump/src/util/dump/UniqueConstraint.java @@ -0,0 +1,19 @@ +package util.dump; + +public interface UniqueConstraint { + + boolean contains( int key ); + boolean contains( long key ); + boolean contains( Object key ); + + long[] getAllLongKeys(); + + Object getKey( E o ); + + int getNumKeys(); + + E lookup( int key ); + E lookup( long key ); + E lookup( Object key ); + +} diff --git a/dump/src/util/dump/UniqueIndex.java b/dump/src/util/dump/UniqueIndex.java index e706f03..6ba376a 100644 --- a/dump/src/util/dump/UniqueIndex.java +++ b/dump/src/util/dump/UniqueIndex.java @@ -27,7 +27,7 @@ import util.dump.stream.SingleTypeObjectInputStream; -public class UniqueIndex extends DumpIndex { +public class UniqueIndex extends DumpIndex implements UniqueConstraint { protected TObjectLongHashMap _lookupObject; protected TLongLongHashMap _lookupLong; @@ -156,6 +156,7 @@ public int[] getAllIntKeys() { return _lookupInt.keys(); } + @Override public long[] getAllLongKeys() { return _lookupLong.keys(); } @@ -178,6 +179,7 @@ public TLongList getAllPositions() { return pos; } + @Override public Object getKey( E o ) { if ( _fieldIsInt ) { return getIntKey(o); @@ -202,6 +204,7 @@ public int getNumKeys() { throw new IllegalStateException("weird, all lookup maps are null"); } + @Override public E lookup( int key ) { synchronized ( _dump ) { if ( !_fieldIsInt ) { @@ -210,12 +213,13 @@ public E lookup( int key ) { } long pos = getPosition(key); if ( pos < 0 ) { - return (E)null; + return null; } return _dump.get(pos); } } + @Override public E lookup( long key ) { synchronized ( _dump ) { if ( !_fieldIsLong ) { @@ -224,12 +228,13 @@ public E lookup( long key ) { } long pos = getPosition(key); if ( pos < 0 ) { - return (E)null; + return null; } return _dump.get(pos); } } + @Override public E lookup( Object key ) { synchronized ( _dump ) { if ( (_fieldIsLong || _fieldIsLongObject) && key instanceof Long ) { @@ -244,7 +249,7 @@ public E lookup( Object key ) { } long pos = getPosition(key); if ( pos < 0 ) { - return (E)null; + return null; } return _dump.get(pos); } From bc899d31839f074490e144e450b72c55108ac090 Mon Sep 17 00:00:00 2001 From: Christian Trefzer Date: Mon, 16 Oct 2023 16:38:36 +0200 Subject: [PATCH 03/10] DuplicateKeyException: move from UniqueIndex to UniqueConstraint --- dump/src/util/dump/Dump.java | 2 +- dump/src/util/dump/GroupedIndex.java | 2 +- dump/src/util/dump/UniqueConstraint.java | 11 +++++++++++ dump/src/util/dump/UniqueIndex.java | 12 ------------ 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/dump/src/util/dump/Dump.java b/dump/src/util/dump/Dump.java index 9228f87..8d0a998 100644 --- a/dump/src/util/dump/Dump.java +++ b/dump/src/util/dump/Dump.java @@ -49,7 +49,7 @@ import gnu.trove.list.array.TByteArrayList; import gnu.trove.set.hash.TLongHashSet; import util.dump.ExternalizableBean.externalizationVersion; -import util.dump.UniqueIndex.DuplicateKeyException; +import util.dump.UniqueConstraint.DuplicateKeyException; import util.dump.cache.SoftLRUCache; import util.dump.io.IOUtils; import util.dump.sort.InfiniteSorter; diff --git a/dump/src/util/dump/GroupedIndex.java b/dump/src/util/dump/GroupedIndex.java index 9931471..1502b7b 100644 --- a/dump/src/util/dump/GroupedIndex.java +++ b/dump/src/util/dump/GroupedIndex.java @@ -3,7 +3,7 @@ import java.util.Iterator; import java.util.NoSuchElementException; -import util.dump.UniqueIndex.DuplicateKeyException; +import util.dump.UniqueConstraint.DuplicateKeyException; import util.dump.reflection.FieldAccessor; diff --git a/dump/src/util/dump/UniqueConstraint.java b/dump/src/util/dump/UniqueConstraint.java index e21b6b5..6f24be4 100644 --- a/dump/src/util/dump/UniqueConstraint.java +++ b/dump/src/util/dump/UniqueConstraint.java @@ -16,4 +16,15 @@ public interface UniqueConstraint { E lookup( long key ); E lookup( Object key ); + /** + * This Exception is thrown, when trying to add a non-unique index-value to a dump. + */ + class DuplicateKeyException extends RuntimeException { + + private static final long serialVersionUID = -7959993269514169802L; + + public DuplicateKeyException( String message ) { + super(message); + } + } } diff --git a/dump/src/util/dump/UniqueIndex.java b/dump/src/util/dump/UniqueIndex.java index 6ba376a..7b8edca 100644 --- a/dump/src/util/dump/UniqueIndex.java +++ b/dump/src/util/dump/UniqueIndex.java @@ -685,16 +685,4 @@ private void addToIgnoredPositions( long pos ) { } } - /** - * This Exception is thrown, when trying to add a non-unique index-value to a dump. - */ - public static class DuplicateKeyException extends RuntimeException { - - private static final long serialVersionUID = -7959993269514169802L; - - public DuplicateKeyException( String message ) { - super(message); - } - } - } From 6dcba9feabfbf77daf4e56644a036259d720aba1 Mon Sep 17 00:00:00 2001 From: Christian Trefzer Date: Mon, 16 Oct 2023 12:26:05 +0200 Subject: [PATCH 04/10] MmapLongIdIndex: add simple-as-stupid array-based off-heap index for long (and int) keys --- dump/src/util/dump/MmapLongIdIndex.java | 1035 ++++++++++++++++++ dump/test/util/dump/MmapLongIdIndexTest.java | 568 ++++++++++ pom.xml | 8 +- 3 files changed, 1610 insertions(+), 1 deletion(-) create mode 100644 dump/src/util/dump/MmapLongIdIndex.java create mode 100644 dump/test/util/dump/MmapLongIdIndexTest.java diff --git a/dump/src/util/dump/MmapLongIdIndex.java b/dump/src/util/dump/MmapLongIdIndex.java new file mode 100644 index 0000000..592d925 --- /dev/null +++ b/dump/src/util/dump/MmapLongIdIndex.java @@ -0,0 +1,1035 @@ +package util.dump; + +import static java.nio.ByteOrder.BIG_ENDIAN; +import static java.nio.channels.FileChannel.MapMode.READ_ONLY; +import static java.nio.channels.FileChannel.MapMode.READ_WRITE; +import static java.nio.file.StandardOpenOption.CREATE_NEW; +import static java.nio.file.StandardOpenOption.SPARSE; +import static java.nio.file.StandardOpenOption.WRITE; +import static jdk.incubator.foreign.MemoryLayout.PathElement.groupElement; +import static jdk.incubator.foreign.MemoryLayout.PathElement.sequenceElement; +import static jdk.incubator.foreign.MemoryLayout.sequenceLayout; +import static jdk.incubator.foreign.MemoryLayout.structLayout; +import static jdk.incubator.foreign.MemoryLayouts.JAVA_LONG; + +import java.io.File; +import java.io.IOException; +import java.lang.invoke.VarHandle; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Duration; +import java.util.EnumSet; +import java.util.Map; +import java.util.function.ToLongFunction; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import gnu.trove.list.TLongList; +import gnu.trove.list.array.TLongArrayList; +import jdk.incubator.foreign.GroupLayout; +import jdk.incubator.foreign.MemoryLayout; +import jdk.incubator.foreign.MemoryLayout.PathElement; +import jdk.incubator.foreign.MemorySegment; +import jdk.incubator.foreign.ResourceScope; +import util.dump.reflection.FieldAccessor; +import util.dump.reflection.FieldFieldAccessor; +import util.dump.reflection.Reflection; + + +/** + * Special-purpose fixed-size off-heap id-to-pos lookup (not only) for sharded dumps. + *

+ * Employs a simple array-like approach, based on the assumption that IDs either start at 1 and keep incrementing, or will be constrained to a closed range + * defined a priori. In the former case, the backing file will keep growing, while in the latter, the bounds are set at initialization. + *

+ * Requires compiler and runtime parameter --add-modules=jdk.incubator.foreign in order to work. + */ +public abstract class MmapLongIdIndex extends DumpIndex implements UniqueConstraint { + + private static final Logger _log = LoggerFactory.getLogger(MmapLongIdIndex.class); + + private static final VarHandle LONG_ARRAY_ACCESS = sequenceLayout(JAVA_LONG).varHandle(long.class, sequenceElement()); + + public static MmapLongIdIndex forClosedRange( Dump dump, String fieldName, long minKey, long maxKey ) throws NoSuchFieldException { + return new ClosedRangeMmapLongIdIndex<>(dump, fieldName, minKey, maxKey); + } + + public static MmapLongIdIndex forClosedRange( Dump dump, FieldAccessor fieldAccessor, long minKey, long maxKey ) { + return new ClosedRangeMmapLongIdIndex<>(dump, fieldAccessor, minKey, maxKey); + } + + public static MmapLongIdIndex forOpenRange( Dump dump, String fieldName, long minKey ) throws NoSuchFieldException { + return new OpenRangeMmapLongIdIndex<>(dump, fieldName, minKey); + } + + public static MmapLongIdIndex forOpenRange( Dump dump, FieldAccessor fieldAccessor, long minKey ) { + return new OpenRangeMmapLongIdIndex<>(dump, fieldAccessor, minKey); + } + + private static boolean isPowerOfTwo( long n ) { + return n > 0L && (n & n - 1L) == 0L; + } + + private static long longArrayGet( MemorySegment array, long index ) { + return (long)LONG_ARRAY_ACCESS.get(array, index); + } + + private static long longArrayGetVolatile( MemorySegment array, long index ) { + return (long)LONG_ARRAY_ACCESS.getVolatile(array, index); + } + + private static void longArraySet( MemorySegment array, long index, long pos ) { + LONG_ARRAY_ACCESS.set(array, index, pos); + } + + private static void longArraySetVolatile( MemorySegment array, long index, long pos ) { + LONG_ARRAY_ACCESS.setVolatile(array, index, pos); + } + + protected final ToLongFunction _getKey; + + protected final Path _lookupPath; + + protected final long _minKey; + protected final long _maxKey; + + private final HeaderCorrections _headerCorrections = new HeaderCorrections(); + + protected FileLayout _fileLayout; + private Header _header; + + private MemorySegment _tableSegment; + private volatile long _tableCapacity; + + private MmapLongIdIndex( Dump dump, String fieldName, long minKey, long maxKey ) throws NoSuchFieldException { + this(dump, new FieldFieldAccessor(Reflection.getField(dump._beanClass, fieldName)), minKey, maxKey); + } + + private MmapLongIdIndex( Dump dump, FieldAccessor fieldAccessor, long minKey, long maxKey ) { + super(dump, fieldAccessor, new File(dump.getDumpFile().getParentFile(), dump.getDumpFile().getName() + "." + fieldAccessor.getName() + ".mmap.lookup")); + + _lookupPath = Paths.get(getLookupFile().getPath()); + + _minKey = minKey; + _maxKey = maxKey; + + if ( _fieldIsLong ) { + if ( _fieldIsLongObject ) { + _getKey = o -> { + try { + return (Long)_fieldAccessor.get(o); + } + catch ( Exception e ) { + throw new RuntimeException(e); + } + }; + } else { + _getKey = o -> { + try { + return _fieldAccessor.getLong(o); + } + catch ( Exception e ) { + throw new RuntimeException(e); + } + }; + } + } else if ( _fieldIsInt ) { + if ( _fieldIsIntObject ) { + _getKey = o -> { + try { + return (Integer)_fieldAccessor.get(o); + } + catch ( Exception e ) { + throw new RuntimeException(e); + } + }; + } else { + _getKey = o -> { + try { + return _fieldAccessor.getInt(o); + } + catch ( Exception e ) { + throw new RuntimeException(e); + } + }; + } + } else { + throw new IllegalStateException("only long (and int) fields implemented"); + } + } + + @Override + public void close() throws IOException { + _log.info("{} closing...", _lookupPath.getFileName()); + flushTable(); + closeHeader(); + + super.close(); + _log.info("{} closed.", _lookupPath.getFileName()); + } + + @Override + public boolean contains( int key ) { + return contains((long)key); + } + + @Override + public boolean contains( long key ) { + long index = indexFor(key); + long pos = getPosAt(index); + + if ( pos < 0 ) { + return false; + } + + synchronized ( _dump ) { + return !_dump._deletedPositions.contains(pos); + } + } + + @Override + public boolean contains( Object key ) { + if ( key instanceof Long l ) { + return contains(l.longValue()); + } + if ( key instanceof Integer i ) { + return contains(i.intValue()); + } + + throw new IllegalArgumentException("only long (and int) fields implemented"); + } + + @Override + public void flush() throws IOException { + // We don't have to do anything to push data out-of-process, it's already in the page cache. Hammering the file to disk all the time gets us nowhere. + } + + @Override + public long[] getAllLongKeys() { + TLongList keys = new TLongArrayList(getNumKeys(), -1); + MemorySegment table = _tableSegment.asReadOnly(); + + for ( long index = 0, n = capacity(table); index < n; ++index ) { + // not used during live operation, hence concurrency is not an issue + if ( getPosAt(table, index) >= 0 ) { + keys.add(keyOffsetRevert(index)); + } + } + + return keys.toArray(); + } + + @Override + public TLongList getAllPositions() { + throw new UnsupportedOperationException(); + } + + @Override + public Object getKey( E o ) { + return keyFor(o); + } + + @Override + public int getNumKeys() { + return (int)_header.getNumKeys(); + } + + @Override + public E lookup( int key ) { + return lookup((long)key); + } + + @Override + public E lookup( long key ) { + long index = indexFor(key); + long pos = getPosAt(index); + + if ( pos < 0 ) { + return null; + } + + synchronized ( _dump ) { + return !_dump._deletedPositions.contains(pos) ? _dump.get(pos) : null; + } + } + + @Override + public E lookup( Object key ) { + if ( key instanceof Long l ) { + return lookup(l.longValue()); + } + if ( key instanceof Integer i ) { + return lookup(i.longValue()); + } + + throw new IllegalArgumentException("only long (and int) fields implemented"); + } + + protected long capacity( MemorySegment segment ) { + return segment.byteSize() / Long.BYTES; + } + + @Override + protected boolean checkMeta() { + return super.checkMeta() && checkHeader(); + } + + protected boolean checkNumKeys( Header header ) { + if ( header.getNumKeys() <= 0 ) { + return false; + } + return header.getNumKeys() <= header.getTableBytes() / Long.BYTES; + } + + @Override + protected String getIndexType() { + return MmapLongIdIndex.class.getSimpleName(); + } + + protected void growTableSegment( long minTableSize ) { + long tableOffset = _header.getTableOffset(); + long tableSize = _header.getTableBytes(); + + long currentFileSize = tableOffset + tableSize; + long minFileSize = tableOffset + minTableSize; + + if ( minFileSize <= currentFileSize ) { + throw new IllegalArgumentException("no need to grow table if min size <= current size"); + } + + try { + long alignedFileSize = minFileSize + (-minFileSize & (_fileLayout.blockSize() - 1)); + + _header.setTableBytes(alignedFileSize - tableOffset); + mapTableSegment(); + + _log.info("{} grew from {} to {} bytes", _lookupPath.getFileName(), currentFileSize, alignedFileSize); + } + catch ( IOException e ) { + throw new RuntimeException(e); + } + } + + @Override + protected void initLookupMap() { + // does nothing + } + + @Override + protected void initLookupOutputStream() { + try { + if ( Files.exists(_lookupPath) ) { + openExisting(); + } else { + createNew(); + } + } + catch ( IOException e ) { + throw new RuntimeException(e); + } + } + + protected abstract long initialTableSize(); + + @Override + protected void load() { + // does nothing, we only need to open the mmapped file + } + + protected abstract void setPosAt( long index, long pos ); + + protected final void setPosAtVolatile( long index, long pos ) { + longArraySetVolatile(_tableSegment, index, posOffsetApply(pos)); + } + + protected long tableCapacity() { + return _tableCapacity; + } + + @Override + void add( E elem, long pos ) { + if ( add0(elem, pos) ) { + _header.incrementNumKeys(1); + } + } + + @Override + void delete( E elem, long pos ) { + if ( delete0(elem) ) { + _header.incrementNumKeys(-1); + } + } + + @Override + boolean isUpdatable( E oldItem, E newItem ) { + return true; + } + + @Override + void update( long pos, E oldElem, E newElem ) { + if ( keyFor(oldElem) == keyFor(newElem) ) { + return; // pos and key are identical => no change + } + + boolean deleted = delete0(oldElem); + boolean added = add0(newElem, pos); + + if ( added == deleted ) { + return; // key count unchanged + } + + if ( added ) { + _header.incrementNumKeys(1); + } else { // deleted + _header.incrementNumKeys(-1); + } + } + + private boolean add0( E elem, long pos ) { + long index = indexFor(elem); + boolean unique = getPosAt(index) < 0; + if ( !unique ) { + throw new DuplicateKeyException("Dump already contains an instance with the key " + keyFor(elem)); + } + + setPosAt(index, pos); + return true; + } + + private void applyHeaderCorrections() { + if ( _headerCorrections.numKeys != null ) { + _header.setNumKeys(_headerCorrections.numKeys); + } + } + + private boolean checkConsistency( Header header, MemorySegment tableSegment ) { + boolean consistent = true; + long start = System.nanoTime(); + + long dumpSize = _dump._outputStream._n; + long numKeys = 0; + for ( long i = 0, n = capacity(tableSegment); i < n; ++i ) { + // not used during live operation, hence concurrency is not an issue + long pos = getPosAt(tableSegment, i); + if ( pos >= 0 ) { + if ( _dump._deletedPositions.contains(pos) ) { + _log.warn("{} Found deleted position! Will rebuild index.", _lookupPath.getFileName()); + consistent = false; // TODO: probably just an incomplete write during deletion, add offending positions to header corrections for later removal + } + if ( pos >= dumpSize ) { + _log.warn("{} Found position {} beyond the end of the dump file with size {}! Will rebuild index.", _lookupPath.getFileName(), pos, dumpSize); + consistent = false; + } + + ++numKeys; + } + } + + if ( numKeys != header.getNumKeys() ) { + _log.info("{} numKeys differ between actual table ({}) and caching header ({}), fixing.", _lookupPath.getFileName(), numKeys, header.getNumKeys()); + _headerCorrections.numKeys = numKeys; + } + + Duration duration = Duration.ofNanos(System.nanoTime() - start); + if ( consistent ) { + _log.info("{} was checked and found to be {} in {}", _lookupPath.getFileName(), "consistent", duration); + } else { + _log.warn("{} was checked and found to be {} in {}", _lookupPath.getFileName(), "inconsistent", duration); + } + return consistent; + } + + private boolean checkHeader() { + if ( !Files.exists(_lookupPath) ) { + return false; + } + + try (ResourceScope autoClosedScope = ResourceScope.newConfinedScope()) { + long fileSize = Files.size(_lookupPath); + if ( fileSize < LeadIn.byteSize() ) { + _log.warn("{} file is too small to contain even the header. Will rebuild index.", _lookupPath.getFileName()); + return false; + } + + LeadIn leadIn = new LeadIn(MemorySegment.mapFile(_lookupPath, 0, LeadIn.byteSize(), READ_ONLY, autoClosedScope)); + + if ( leadIn.getFileMagic() != LeadIn.FILE_MAGIC ) { + _log.warn("{} has wrong file magic. Will rebuild index.", _lookupPath.getFileName()); + return false; + } + + boolean checkRequired = false; + + // version or size unset, unknown, or mismatching + if ( !Header.isReadable(leadIn.getLayoutVersion()) ) { + _log.warn("{} has unknown header version {}. Will rebuild index.", _lookupPath.getFileName(), leadIn.getLayoutVersion()); + return false; + } + + // pin everything to current one-and-only version + if ( leadIn.getLayoutVersion() != Header.getCurrentLayout().layoutVersion() ) { + _log.warn("{} has mismatching header version. Will rebuild index.", _lookupPath.getFileName()); + return false; + } + + FileLayout fileLayout = Header.layoutByVersion(leadIn.getLayoutVersion()); + + if ( leadIn.getHeaderBytes() < fileLayout.headerBytes() ) { + _log.warn("{} has mismatching header size information. Will rebuild index.", _lookupPath.getFileName()); + return false; + } + + Header header = new Header(fileLayout, MemorySegment.mapFile(_lookupPath, 0, leadIn.getHeaderBytes(), READ_ONLY, autoClosedScope)); + + if ( header.getTableOffset() <= 0 || !isPowerOfTwo(header.getTableOffset()) ) { + _log.warn("{} has inconsistent alignment information. Will rebuild index.", _lookupPath.getFileName()); + return false; + } + // file size mismatch + if ( header.getTableOffset() + header.getTableBytes() != fileSize ) { + _log.warn("{} has mismatching file size information. Will rebuild index.", _lookupPath.getFileName()); + return false; + } + + // configuration changes + if ( header.getMinKey() != _minKey || header.getMaxKey() != _maxKey ) { + _log.warn("{} has mismatching key bounds. Will rebuild index.", _lookupPath.getFileName()); + return false; + } + + // not closed properly, or inconsistency between dump file and header state + if ( header.getOpenedTimestamp() >= header.getClosedTimestamp() || header.getNumKeys() == 0 && _dump.getDumpSize() > 0 ) { + _log.info("{} was not closed properly, checking consistency...", _lookupPath.getFileName()); + checkRequired = true; + } + + // plausibility checks + if ( !checkNumKeys(header) ) { + _log.info("{} has stored implausible numKeys, checking consistency...", _lookupPath.getFileName()); + checkRequired = true; + } + + if ( checkRequired ) { + MemorySegment tableSegment = MemorySegment.mapFile(_lookupPath, header.getTableOffset(), header.getTableBytes(), READ_ONLY, autoClosedScope); + if ( !checkConsistency(header, tableSegment) ) { + return false; + } + } + + _fileLayout = fileLayout; // store this, so we won't have to restart at the lead-in + + return true; + } + catch ( Exception e ) { + throw new RuntimeException(e); + } + + } + + private void closeHeader() { + if ( _header != null ) { + _header.setClosedTimestamp(System.currentTimeMillis()); + _header.flush(); + } + } + + private void createNew() throws IOException { + Files.newByteChannel(_lookupPath, EnumSet.of(CREATE_NEW, SPARSE, WRITE)).close(); // just create sparse file; MemorySegment API takes a Path as of java 17 + _fileLayout = Header.getCurrentLayout(); + + initHeader(mapHeaderSegment()); + + mapTableSegment(); + } + + private boolean delete0( E elem ) { + long index = indexFor(elem); + boolean deleted = getPosAt(index) >= 0; + setPosAt(index, -1); + return deleted; + } + + private void flushTable() { + if ( _tableSegment != null ) { + _tableSegment.force(); + } + } + + private long getPosAt( long index ) { + if ( index < 0 ) { + throw new IndexOutOfBoundsException("index out of bounds"); + } + + if ( index < tableCapacity() ) { + return getPosAtVolatile(_tableSegment, index); + } else { + return -1; + } + } + + private long getPosAt( MemorySegment tableSegment, long index ) { + return posOffsetRevert(longArrayGet(tableSegment, index)); + } + + private long getPosAtVolatile( MemorySegment tableSegment, long index ) { + return posOffsetRevert(longArrayGetVolatile(tableSegment, index)); + } + + private long indexFor( long key ) { + if ( _minKey <= key && key <= _maxKey ) { + return keyOffsetApply(key); + } + + throw new IndexOutOfBoundsException("key out of bounds"); + } + + private long indexFor( E elem ) { + long key = keyFor(elem); + return indexFor(key); + } + + private void initHeader( MemorySegment headerSegment ) { + _header = new Header(_fileLayout, headerSegment); + + _header.setFileMagic(LeadIn.FILE_MAGIC); + _header.setLayoutVersion(_fileLayout.layoutVersion()); + _header.setHeaderBytes(_fileLayout.headerBytes()); + + _header.setTableOffset(_fileLayout.tableOffset()); + + _header.setMinKey(_minKey); + _header.setMaxKey(_maxKey); + + _header.setOpenedTimestamp(System.currentTimeMillis()); + _header.setClosedTimestamp(0L); + + _header.setTableBytes(initialTableSize()); + + _header.setNumKeys(0); + } + + private long keyFor( E elem ) { + return _getKey.applyAsLong(elem); + } + + /** + * Key offset shifts the start of the array closer to the start of the actually used key range. With sharded dumps in particular, the exact key range is + * known a priori and can be neatly catered for. + * + * @param realKey the actual key identifying the instance in the dump + * @return the index into the array where the corresponding dump position is to be stored + */ + private long keyOffsetApply( long realKey ) { + return realKey - _minKey; // => array index + } + + /** + * @param arrayIndex the index into the array where the corresponding dump position is stored + * @return the actual key identifying the instance in the dump + * @see #keyOffsetApply(long) + */ + private long keyOffsetRevert( long arrayIndex ) { + return arrayIndex + _minKey; // real key + } + + private MemorySegment mapHeaderSegment() throws IOException { + return MemorySegment.mapFile(_lookupPath, 0, _fileLayout.headerBytes(), READ_WRITE, ResourceScope.newImplicitScope()); + } + + private void mapTableSegment() throws IOException { + _tableSegment = MemorySegment.mapFile(_lookupPath, _header.getTableOffset(), _header.getTableBytes(), READ_WRITE, ResourceScope.newImplicitScope()); + _tableCapacity = capacity(_tableSegment); + } + + private void openExisting() throws IOException { + openHeader(mapHeaderSegment()); + + mapTableSegment(); + } + + private void openHeader( MemorySegment headerSegment ) { + _header = new Header(_fileLayout, headerSegment); + _header.setOpenedTimestamp(System.currentTimeMillis()); + applyHeaderCorrections(); + } + + /** + * Pos offset masks the fact that valid dump positions start at 0, which would collide with "empty value". Pre-filling with -1 is detrimental to + * performance, causing needless write load on initialization / segment growth, and conflicting with the idea of sparse files. + * + * @param realDumpPosition the actual position in the dump + * @return the position info to be stored in the index + */ + private long posOffsetApply( long realDumpPosition ) { + return realDumpPosition + 1; // => stored position info + } + + /** + * @param storedPositioninfo the position info that is stored in the index + * @return the actual position in the dump + * @see #posOffsetApply(long) + */ + private long posOffsetRevert( long storedPositioninfo ) { + return storedPositioninfo - 1; // => real dump position + } + + public interface Arch { + + long cacheLineBytes(); + + long pageSizeBytes(); + + final class AmdZen implements Arch { + + public static final Arch INSTANCE = new AmdZen(); + + @Override + public long cacheLineBytes() { + return 64; + } + + @Override + public long pageSizeBytes() { + return 4096; + } + } + } + + + protected record FileLayout(long layoutVersion, Arch arch, long tableOffsetInPages, long blockSizeInPages, GroupLayout headerLayout) { + + public FileLayout sanityCheck() { + if ( tableOffset() < headerBytes() ) { + throw new IllegalStateException("table overlaps header"); + } + if ( blockSize() < tableOffset() ) { + throw new IllegalStateException("table offset well exceeds first block"); + } + return this; + } + + long blockSize() { + return blockSizeInPages * arch.pageSizeBytes(); + } + + long headerBytes() { + return headerLayout.byteSize(); + } + + long tableOffset() { + return tableOffsetInPages * arch.pageSizeBytes(); + } + } + + + protected static final class Header { + + private static final Map FILE_LAYOUT_BY_VERSION; + + private static final FileLayout FILE_LAYOUT_V1 = new FileLayout(1, Arch.AmdZen.INSTANCE, 1, 1024, structLayout( // + + // these are mostly constant, might change when file layout version is updated / migrated + + LeadIn.LAYOUT, // + + JAVA_LONG.withName("tableOffset"), // + + JAVA_LONG.withName("minKey"), // + JAVA_LONG.withName("maxKey"), // + + MemoryLayout.paddingLayout(Arch.AmdZen.INSTANCE.cacheLineBytes()), // keep things cache-line-aligned + + // these change during open/close + + JAVA_LONG.withName("openedTimestamp"), // + JAVA_LONG.withName("closedTimestamp"), // + + MemoryLayout.paddingLayout(Arch.AmdZen.INSTANCE.cacheLineBytes()), // keep things cache-line-aligned + + // changes whenever file needs to grow + JAVA_LONG.withName("tableBytes"), // + + MemoryLayout.paddingLayout(Arch.AmdZen.INSTANCE.cacheLineBytes()), // keep things cache-line-aligned + + // changes whenever keys are added/removed + JAVA_LONG.withName("numKeys"), // + + MemoryLayout.paddingLayout(Arch.AmdZen.INSTANCE.cacheLineBytes()) // keep things cache-line-aligned + ).withBitAlignment(8 * Arch.AmdZen.INSTANCE.cacheLineBytes()).withName("headerLayoutV1")) // + .sanityCheck(); + + static { + FILE_LAYOUT_BY_VERSION = Map.of(1L, FILE_LAYOUT_V1); + } + + public static FileLayout getCurrentLayout() { + return Header.layoutByVersion(1L); + } + + public static boolean isReadable( long version ) { + return FILE_LAYOUT_BY_VERSION.containsKey(version); + + } + + public static FileLayout layoutByVersion( long version ) { + return FILE_LAYOUT_BY_VERSION.get(version); + } + + private final MemorySegment _memorySegment; + + private final VarHandle _fileMagic; + private final VarHandle _layoutVersion; + private final VarHandle _headerBytes; + + private final VarHandle _tableOffset; + + private final VarHandle _minKey; + private final VarHandle _maxKey; + + private final VarHandle _openedTimestamp; + private final VarHandle _closedTimestamp; + + private final VarHandle _tableBytes; + + private final VarHandle _numKeys; + + public Header( FileLayout fileLayout, MemorySegment memorySegment ) { + + GroupLayout layout = fileLayout.headerLayout(); + + _memorySegment = memorySegment; + + PathElement leadIn = groupElement("leadIn"); + _fileMagic = layout.varHandle(long.class, leadIn, groupElement("fileMagic")); + _layoutVersion = layout.varHandle(long.class, leadIn, groupElement("layoutVersion")); + _headerBytes = layout.varHandle(long.class, leadIn, groupElement("headerBytes")); + + _tableOffset = layout.varHandle(long.class, groupElement("tableOffset")); + + _minKey = layout.varHandle(long.class, groupElement("minKey")); + _maxKey = layout.varHandle(long.class, groupElement("maxKey")); + + _openedTimestamp = layout.varHandle(long.class, groupElement("openedTimestamp")); + _closedTimestamp = layout.varHandle(long.class, groupElement("closedTimestamp")); + + _tableBytes = layout.varHandle(long.class, groupElement("tableBytes")); + + _numKeys = layout.varHandle(long.class, groupElement("numKeys")); + } + + public void flush() { + _memorySegment.force(); + } + + public long getClosedTimestamp() { + return getVolatile(_closedTimestamp); + } + + public long getMaxKey() { + return getVolatile(_maxKey); + } + + public long getMinKey() { + return getVolatile(_minKey); + } + + public long getNumKeys() { + return getVolatile(_numKeys); + } + + public long getOpenedTimestamp() { + return getVolatile(_openedTimestamp); + } + + public long getTableBytes() { + return getVolatile(_tableBytes); + } + + public long getTableOffset() { + return getVolatile(_tableOffset); + } + + public void incrementNumKeys( long difference ) { + _numKeys.getAndAdd(_memorySegment, difference); + } + + public void setClosedTimestamp( long closedTimestamp ) { + setVolatile(_closedTimestamp, closedTimestamp); + } + + public void setFileMagic( long fileMagic ) { + setVolatile(_fileMagic, fileMagic); + } + + public void setHeaderBytes( long headerBytes ) { + setVolatile(_headerBytes, headerBytes); + } + + public void setLayoutVersion( long layoutVersion ) { + setVolatile(_layoutVersion, layoutVersion); + } + + public void setMaxKey( long maxKey ) { + setVolatile(_maxKey, maxKey); + } + + public void setMinKey( long minKey ) { + setVolatile(_minKey, minKey); + } + + public void setNumKeys( long numKeys ) { + setVolatile(_numKeys, numKeys); + } + + public void setOpenedTimestamp( long openedTimestamp ) { + setVolatile(_openedTimestamp, openedTimestamp); + } + + public void setTableBytes( long tableBytes ) { + setVolatile(_tableBytes, tableBytes); + } + + public void setTableOffset( long tableOffset ) { + setVolatile(_tableOffset, tableOffset); + } + + private long getVolatile( VarHandle varHandle ) { + return (long)varHandle.getVolatile(_memorySegment); + } + + private void setVolatile( VarHandle varHandle, long value ) { + varHandle.setVolatile(_memorySegment, value); + } + } + + + static final class ClosedRangeMmapLongIdIndex extends MmapLongIdIndex { + + private static long maxNumKeys( long minKey, long maxKey ) { + return maxKey - minKey + 1; + } + + private ClosedRangeMmapLongIdIndex( Dump dump, String fieldName, long minKey, long maxKey ) throws NoSuchFieldException { + super(dump, fieldName, minKey, maxKey); + init(); + } + + private ClosedRangeMmapLongIdIndex( Dump dump, FieldAccessor fieldAccessor, long minKey, long maxKey ) { + super(dump, fieldAccessor, minKey, maxKey); + init(); + } + + @Override + protected long initialTableSize() { + return maxNumKeys(_minKey, _maxKey) * Long.BYTES; + } + + @Override + protected void setPosAt( long index, long pos ) { + if ( index < 0 || index >= tableCapacity() ) { + throw new IndexOutOfBoundsException("index out of bounds"); + } + + setPosAtVolatile(index, pos); + } + } + + + static final class OpenRangeMmapLongIdIndex extends MmapLongIdIndex { + + private static long deriveMaxKeyFrom( long minKey ) { + // this basically rotates the zero-index of the array, just like simple index addition/subtraction does during offset compensation + return Long.MAX_VALUE + minKey; + } + + private final Object _growLock = new Object(); + + private OpenRangeMmapLongIdIndex( Dump dump, String fieldName, long minKey ) throws NoSuchFieldException { + super(dump, fieldName, minKey, deriveMaxKeyFrom(minKey)); + init(); + } + + private OpenRangeMmapLongIdIndex( Dump dump, FieldAccessor fieldAccessor, long minKey ) { + super(dump, fieldAccessor, minKey, deriveMaxKeyFrom(minKey)); + init(); + } + + protected long initialTableSize() { + return _fileLayout.blockSize() - _fileLayout.tableOffset(); // align everything to block boundaries + } + + @Override + protected void setPosAt( long index, long pos ) { + if ( index < 0 ) { + throw new IndexOutOfBoundsException("index out of bounds"); + } + + ensureTableContains(index); + setPosAtVolatile(index, pos); + } + + private void ensureTableContains( long index ) { + if ( index >= tableCapacity() ) { + synchronized ( _growLock ) { + if ( index >= tableCapacity() ) { + // fence post problem: we index the start, but segments need to cover everything to the end; hence +1 + growTableSegment((index + 1) * Long.BYTES); + } + } + } + } + } + + + private static final class HeaderCorrections { + + Long numKeys; + + } + + + /** + * Helper for reading in the bare minimum from existing files. + */ + private static final class LeadIn { + + public static final long FILE_MAGIC = 0x6861696C756C6672L; + + private static final GroupLayout LAYOUT = structLayout( // + + JAVA_LONG.withName("fileMagic").withOrder(BIG_ENDIAN), // + JAVA_LONG.withName("layoutVersion"), // + JAVA_LONG.withName("headerBytes") // + + ).withBitAlignment(8 * Arch.AmdZen.INSTANCE.cacheLineBytes()).withName("leadIn"); + + private static final VarHandle _fileMagic = LAYOUT.varHandle(long.class, groupElement("fileMagic")); + private static final VarHandle _layoutVersion = LAYOUT.varHandle(long.class, groupElement("layoutVersion")); + private static final VarHandle _headerBytes = LAYOUT.varHandle(long.class, groupElement("headerBytes")); + + public static long byteSize() { + return LAYOUT.byteSize(); // minimum read length: header version and bytes + } + + private final MemorySegment _memorySegment; + + public LeadIn( MemorySegment memorySegment ) { + _memorySegment = memorySegment; + } + + public long getFileMagic() { + return (Long)_fileMagic.get(_memorySegment); + } + + public long getHeaderBytes() { + return (Long)_headerBytes.get(_memorySegment); + } + + public long getLayoutVersion() { + return (Long)_layoutVersion.get(_memorySegment); + } + + } +} diff --git a/dump/test/util/dump/MmapLongIdIndexTest.java b/dump/test/util/dump/MmapLongIdIndexTest.java new file mode 100644 index 0000000..d6e1f03 --- /dev/null +++ b/dump/test/util/dump/MmapLongIdIndexTest.java @@ -0,0 +1,568 @@ +package util.dump; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.io.File; +import java.io.FileFilter; +import java.io.IOException; +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Random; + +import org.assertj.core.util.Arrays; +import org.junit.After; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +import gnu.trove.set.TIntSet; +import gnu.trove.set.hash.TIntHashSet; +import junit.framework.Assert; +import util.dump.reflection.FieldFieldAccessor; +import util.dump.reflection.Reflection; + + +@RunWith(Parameterized.class) +public class MmapLongIdIndexTest { + + private static final String DUMP_FILENAME = "DumpTest.dmp"; + private static final int READ_NUMBER = 1000; + private static final int BEAN_SIZE = 10; + private static File _tmpdir; + + @Parameters + public static Collection getDumpSizesToTestFor() { + List parameters = new ArrayList<>(); + parameters.add(Arrays.array(10)); + parameters.add(Arrays.array(1000)); + parameters.add(Arrays.array(100000)); + return parameters; + } + + @BeforeClass + public static void setUpTmpdir() throws IOException { + _tmpdir = new File("target", "tmp"); + _tmpdir.mkdirs(); + if ( !_tmpdir.isDirectory() ) { + throw new IOException("unable to create temporary directory: " + _tmpdir.getAbsolutePath()); + } + System.setProperty("java.io.tmpdir", _tmpdir.getAbsolutePath()); + } + + private Random _random; + + private final int _dumpSize; + private final long _negativeOffset; + + public MmapLongIdIndexTest( Integer dumpSize ) { + _dumpSize = dumpSize; + _negativeOffset = -_dumpSize; // need to cater to generated negative indexed + } + + @Before + @After + public void deleteOldTestDumps() { + File[] dumpFile = _tmpdir.listFiles(new FileFilter() { + + @Override + public boolean accept( File f ) { + return f.getName().startsWith("DumpTest."); + } + }); + for ( File df : dumpFile ) { + if ( !df.delete() ) { + System.out.println("Failed to delete old dump file " + df); + } + } + } + + @Before + public void initRandom() { + long seed = System.currentTimeMillis(); + _random = new Random(seed); + System.out.println("Seed used for this DumpTest run: " + seed); + } + + @After + public void printMemory() { + System.gc(); + long mem = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory(); + System.out.println(mem / (1024 * 1024) + " MB used after test run"); + } + + @Test + public void testGetNumKeys() throws Exception { + File dumpFile = new File(_tmpdir, DUMP_FILENAME); + Dump dump = new Dump<>(Bean.class, dumpFile); + try { + int numBeansToAddForTest = 500; + for ( int i = 0; i < numBeansToAddForTest; i++ ) { + dump.add(new Bean(i, null)); + } + + // reopen dump + dump.close(); + dump = new Dump<>(Bean.class, dumpFile); + + DumpIndex intIndex = MmapLongIdIndex.forOpenRange(dump, "_idInt", _negativeOffset); + DumpIndex longIndex = MmapLongIdIndex.forOpenRange(dump, "_idLong", _negativeOffset); + + assertThat(longIndex.getNumKeys()).isEqualTo(numBeansToAddForTest); + assertThat(intIndex.getNumKeys()).isEqualTo(numBeansToAddForTest); + + int deleted = 0; + for ( Bean bean : dump ) { + if ( bean._idInt % 2 == 0 ) { + dump.deleteLast(); + deleted++; + } + } + + assertThat(longIndex.getNumKeys()).isEqualTo(numBeansToAddForTest - deleted); + assertThat(intIndex.getNumKeys()).isEqualTo(numBeansToAddForTest - deleted); + } + finally { + dump.close(); + } + } + + @Test + public void testIntKeyIndex() throws Exception { + testIndex("_idInt", new TestConfiguration() { + + @Override + public Object createKey( int id ) { + return id; + } + }); + } + + @Test + public void testLongKeyIndex() throws Exception { + testIndex("_idLong", new TestConfiguration() { + + @Override + public Object createKey( int id ) { + return (long)id; + } + }); + } + + @Test + public void testLongObjectKeyIndex() throws Exception { + testIndex("_idLongObject", new TestConfiguration() { + + @Override + public Object createKey( int id ) { + return (long)id; + } + }); + } + + @Test + public void testRecreateIndex() throws NoSuchFieldException, IOException { + File dumpFile = new File(_tmpdir, DUMP_FILENAME); + + Dump dump = new Dump<>(Bean.class, dumpFile); + try { + Field field = Reflection.getField(Bean.class, "_idInt"); + UniqueConstraint index = MmapLongIdIndex.forOpenRange(dump, new FieldFieldAccessor(field), _negativeOffset); + + validateNumKeys(dump, index); + + fillDump(dump); + + validateNumKeys(dump, index); + + dump.close(); + + System.out.println("Closing and re-opening dump, deleting index"); + Assert.assertTrue("Failed to delete index", + new File(_tmpdir, DUMP_FILENAME + "._idInt.mmap.lookup").delete() && !new File(_tmpdir, DUMP_FILENAME + "._idInt.mmap.lookup").exists()); + + dump = new Dump<>(Bean.class, dumpFile); + index = MmapLongIdIndex.forOpenRange(dump, new FieldFieldAccessor(field), _negativeOffset); + + long t = System.currentTimeMillis(); + for ( int j = 0; j < READ_NUMBER; j++ ) { + int i = _random.nextInt(_dumpSize); + Bean bean = index.lookup(i); + Assert.assertNotNull("no Bean for index " + i, bean); + Assert.assertEquals(i, bean._idInt); + Assert.assertTrue(bean._data.startsWith(i + "-")); + } + System.out.println("Read " + READ_NUMBER + " instances from dump. Needed " + (System.currentTimeMillis() - t) / (float)READ_NUMBER + " ms/instance."); + + Bean nonExistingBean = index.lookup(_dumpSize + 1); + Assert.assertNull(nonExistingBean); + } + finally { + dump.close(); + } + } + + protected void testIndex( String fieldName, TestConfiguration config ) throws Exception { + + testLateOpenIndex(fieldName, config); + + File dumpFile = new File(_tmpdir, DUMP_FILENAME); + + deleteOldTestDumps(); + + /* create dump and index */ + Dump dump = new Dump<>(Bean.class, dumpFile); + try { + Field field = Reflection.getField(Bean.class, fieldName); + UniqueConstraint index = MmapLongIdIndex.forOpenRange(dump, new FieldFieldAccessor(field), _negativeOffset); + + fillDump(dump); + + validateNumKeys(dump, index); + + testLookup(config, field, index); + + dump.close(); + + System.out.println("Closing and re-opening dump"); + + dump = new Dump<>(Bean.class, dumpFile); + index = MmapLongIdIndex.forOpenRange(dump, new FieldFieldAccessor(field), _negativeOffset); + + validateNumKeys(dump, index); + + testLookup(config, field, index); + + /* test lookup of non-existing key */ + Object k = config.createKey(_dumpSize + 1); + Bean nonExistingBean = index.lookup(k); + Assert.assertNull(nonExistingBean); + + /* iterate dump and delete half of it */ + long t = System.currentTimeMillis(); + int id = 0; + int deletions = 0; + for ( Bean bean : dump ) { + Assert.assertEquals(config.createKey(id), field.get(bean)); + Assert.assertTrue("unexpected bean data", bean._data.startsWith("" + id)); + if ( id % 2 == 0 ) { + Bean deleted = dump.deleteLast(); + Assert.assertEquals("deleted bean != iterated bean", deleted, bean); + deletions++; + } + id++; + } + System.out.println("Iterated the whole dump. Deleted " + deletions + " items. Needed " + (System.currentTimeMillis() - t) + " ms."); + + /* lookup and assert deletions */ + t = System.currentTimeMillis(); + for ( int j = 0; j < READ_NUMBER; j++ ) { + id = _random.nextInt(_dumpSize); + k = config.createKey(id); + Bean bean = index.lookup(k); + if ( id % 2 == 0 ) { + Assert.assertNull("deleted Bean with index " + k + " is still accessable", bean); + } else { + Assert.assertNotNull("no Bean for index " + k, bean); + Assert.assertEquals(k, field.get(bean)); + Assert.assertTrue(bean._data.startsWith(id + "-")); + } + } + System.out.println("Read " + READ_NUMBER + " instances from dump. Needed " + (System.currentTimeMillis() - t) / (float)READ_NUMBER + " ms/instance."); + + /* iterate dump and update beans */ + t = System.currentTimeMillis(); + id = 1; + int updates = 0; + for ( Bean bean : dump ) { + Assert.assertEquals(config.createKey(id), field.get(bean)); + Assert.assertTrue("unexpected bean data", bean._data.startsWith("" + id)); + if ( id % 3 == 0 ) { + /* update without changing externalization size of bean */ + long oldDumpSize = dump._outputStream._n; + Bean updatedBean = new Bean(-bean._idInt, bean._data); + Bean oldVersion = dump.updateLast(updatedBean); + Assert.assertEquals("old bean != iterated bean", oldVersion, bean); + Assert.assertEquals("dump has grown, even though the update was overwrite compatible", oldDumpSize, dump._outputStream._n); + updates++; + } else { + /* update and change externalization size of bean */ + Bean updatedBean = new Bean(bean._idInt, bean._data.replaceFirst("-", "++")); + Bean oldVersion = dump.updateLast(updatedBean); + Assert.assertEquals("old bean != iterated bean", oldVersion, bean); + updates++; + } + id += 2; + } + for ( Bean bean : dump ) { + id = bean._idInt; + if ( id > 0 && id % 7 == 0 ) { + /* update and change externalization size of bean */ + Bean updatedBean = new Bean(-bean._idInt, bean._data.replaceFirst("-", "++")); + Bean oldVersion = dump.updateLast(updatedBean); + Assert.assertEquals("old bean != iterated bean", oldVersion, bean); + updates++; + } + } + System.out.println("Iterated the whole dump. Updated " + updates + " items. Needed " + (System.currentTimeMillis() - t) + " ms."); + + validateNumKeys(dump, index); + + testLookupAfterUpdates(config, field, index); + + dump.close(); + + System.out.println("Closing and re-opening dump"); + + dump = new Dump<>(Bean.class, dumpFile); + index = MmapLongIdIndex.forOpenRange(dump, new FieldFieldAccessor(field), _negativeOffset); + + validateNumKeys(dump, index); + + testLookupAfterUpdates(config, field, index); + + dump.close(); + + /* delete index meta file to invalidate the index */ + File[] metaFiles = _tmpdir.listFiles(new FileFilter() { + + @Override + public boolean accept( File f ) { + return f.getName().startsWith("DumpTest.") && f.getName().endsWith("meta"); + } + }); + for ( File df : metaFiles ) { + Assert.assertTrue("Failed to delete meta file " + df, df.delete()); + } + /* re-open, enforcing the index to be re-created */ + dump = new Dump<>(Bean.class, dumpFile); + index = MmapLongIdIndex.forOpenRange(dump, new FieldFieldAccessor(field), _negativeOffset); + + validateNumKeys(dump, index); + + /* after having re-created the index, repeat last test */ + testLookupAfterUpdates(config, field, index); + + } + finally { + dump.close(); + } + } + + private void fillDump( Dump dump ) throws IOException { + StringBuilder sb = new StringBuilder("-"); + for ( int i = 0; i < BEAN_SIZE - 15; i++ ) { // 15 is an estimation for the size of the Bean instance without this padding + sb.append('0'); + } + + /* add some elements */ + long t = System.currentTimeMillis(); + for ( int i = 0; i < _dumpSize; i++ ) { + dump.add(new Bean(i, i + sb.toString())); + } + System.out.println("Written " + _dumpSize + " instances to dump. Needed " + (System.currentTimeMillis() - t) / (float)_dumpSize + " ms/instance."); + } + + private void testLateOpenIndex( String fieldName, TestConfiguration config ) throws Exception { + File dumpFile = new File(_tmpdir, DUMP_FILENAME); + + /* create dump and index */ + Dump dump = new Dump<>(Bean.class, dumpFile); + try { + Field field = Reflection.getField(Bean.class, fieldName); + + fillDump(dump); + UniqueConstraint index = MmapLongIdIndex.forOpenRange(dump, new FieldFieldAccessor(field), _negativeOffset); + + testLookup(config, field, index); + } + finally { + dump.close(); + } + } + + private void testLookup( TestConfiguration config, Field field, UniqueConstraint index ) throws IllegalAccessException { + long t; + t = System.currentTimeMillis(); + for ( int j = 0; j < READ_NUMBER; j++ ) { + int id = _random.nextInt(_dumpSize); + Object k = config.createKey(id); + Bean bean = index.lookup(k); + Assert.assertNotNull("no Bean for index " + k, bean); + Assert.assertEquals(k, field.get(bean)); + Assert.assertTrue(bean._data.startsWith(id + "-")); + } + System.out.println("Read " + READ_NUMBER + " instances from dump. Needed " + (System.currentTimeMillis() - t) / (float)READ_NUMBER + " ms/instance."); + } + + private void testLookupAfterUpdates( TestConfiguration config, Field field, UniqueConstraint index ) throws IllegalAccessException { + long t; + Object k; + int id; + t = System.currentTimeMillis(); + for ( int j = 0; j < READ_NUMBER; j++ ) { + id = _random.nextInt(_dumpSize); + if ( id % 3 == 0 || id % 7 == 0 ) { + id = -id; + } + k = config.createKey(id); + Bean bean = index.lookup(k); + if ( id % 2 == 0 ) { + Assert.assertNull("deleted Bean with index " + k + " is still accessible", bean); + } else if ( id % 7 == 0 && id % 3 != 0 ) { + Assert.assertNotNull("no Bean for index " + k, bean); + Assert.assertEquals(config.createKey(id), field.get(bean)); + Assert.assertTrue("bean data wrong: id=" + id + ", data=" + bean._data, bean._data.startsWith(-id + "++")); + } else if ( Math.abs(id) % 3 == 0 ) { + Assert.assertNotNull("no Bean for index " + k, bean); + Assert.assertEquals(config.createKey(id), field.get(bean)); + Assert.assertTrue("bean data wrong: id=" + id + ", data=" + bean._data, bean._data.startsWith((-id) + "-")); + } else { + Assert.assertNotNull("no Bean for index " + k, bean); + Assert.assertEquals(k, field.get(bean)); + Assert.assertTrue(bean._data.startsWith(id + "++")); + } + } + System.out.println("Read " + READ_NUMBER + " instances from dump. Needed " + (System.currentTimeMillis() - t) / (float)READ_NUMBER + " ms/instance."); + } + + private void validateNumKeys( Dump dump, UniqueConstraint index ) { + // count keys + TIntSet keys = new TIntHashSet(); + for ( Bean bean : dump ) { + keys.add(bean._idInt); + } + + int numKeys = index.getNumKeys(); + assertThat(numKeys).isEqualTo(keys.size()); + } + + public static class Bean implements ExternalizableBean { + + @externalize(1) + long _idLong; + @externalize(2) + int _idInt; + @externalize(3) + String _idString; + @externalize(4) + Long _idLongObject; + @externalize(5) + ExternalizableId _idExternalizable; + @externalize(10) + String _data; + + public Bean() { + // for Externalization + } + + public Bean( int id, String data ) { + _idLong = id; + _idInt = id; + _idString = (id < 0 ? "" : "+") + id; + _idLongObject = (long)id; + _idExternalizable = new ExternalizableId(id); + _data = data; + } + + @Override + public boolean equals( Object obj ) { + if ( this == obj ) { + return true; + } + if ( obj == null ) { + return false; + } + if ( getClass() != obj.getClass() ) { + return false; + } + Bean other = (Bean)obj; + if ( _data == null ) { + if ( other._data != null ) { + return false; + } + } else if ( !_data.equals(other._data) ) { + return false; + } + if ( _idExternalizable == null ) { + if ( other._idExternalizable != null ) { + return false; + } + } else if ( !_idExternalizable.equals(other._idExternalizable) ) { + return false; + } + if ( _idInt != other._idInt ) { + return false; + } + if ( _idLong != other._idLong ) { + return false; + } + if ( _idLongObject == null ) { + if ( other._idLongObject != null ) { + return false; + } + } else if ( !_idLongObject.equals(other._idLongObject) ) { + return false; + } + if ( _idString == null ) { + return other._idString == null; + } else { + return _idString.equals(other._idString); + } + } + + @Override + public int hashCode() { + throw new UnsupportedOperationException("hashCode() not needed."); + } + } + + + public static class ExternalizableId implements ExternalizableBean { + + @externalize(1) + long _id; + + public ExternalizableId() { + // for Externalization + } + + public ExternalizableId( long id ) { + _id = id; + } + + @Override + public boolean equals( Object obj ) { + if ( this == obj ) { + return true; + } + if ( obj == null ) { + return false; + } + if ( getClass() != obj.getClass() ) { + return false; + } + ExternalizableId other = (ExternalizableId)obj; + return _id == other._id; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + (int)(_id ^ (_id >>> 32)); + return result; + } + } + + + protected static abstract class TestConfiguration { + + public abstract Object createKey( int id ); + } + +} diff --git a/pom.xml b/pom.xml index b1dcd71..64244a7 100644 --- a/pom.xml +++ b/pom.xml @@ -27,6 +27,9 @@ UTF-8 17 17 + + --add-modules=jdk.incubator.foreign + @@ -45,7 +48,10 @@ maven-surefire-plugin 3.3.1 - --add-opens java.base/java.util=ALL-UNNAMED + + --add-opens java.base/java.util=ALL-UNNAMED + --add-modules=jdk.incubator.foreign + From a1ac81bd62b45e928691475784f8b81bf6feb557 Mon Sep 17 00:00:00 2001 From: Christian Trefzer Date: Tue, 17 Oct 2023 14:06:58 +0200 Subject: [PATCH 05/10] MmapLongIdIndex: improve consistency check, add paranoia mode --- dump/src/util/dump/MmapLongIdIndex.java | 278 ++++++++++++++++++++---- 1 file changed, 238 insertions(+), 40 deletions(-) diff --git a/dump/src/util/dump/MmapLongIdIndex.java b/dump/src/util/dump/MmapLongIdIndex.java index 592d925..5a26c08 100644 --- a/dump/src/util/dump/MmapLongIdIndex.java +++ b/dump/src/util/dump/MmapLongIdIndex.java @@ -28,11 +28,14 @@ import gnu.trove.list.TLongList; import gnu.trove.list.array.TLongArrayList; +import gnu.trove.map.TLongLongMap; +import gnu.trove.map.hash.TLongLongHashMap; import jdk.incubator.foreign.GroupLayout; import jdk.incubator.foreign.MemoryLayout; import jdk.incubator.foreign.MemoryLayout.PathElement; import jdk.incubator.foreign.MemorySegment; import jdk.incubator.foreign.ResourceScope; +import util.dump.cache.LRUCache; import util.dump.reflection.FieldAccessor; import util.dump.reflection.FieldFieldAccessor; import util.dump.reflection.Reflection; @@ -50,6 +53,8 @@ public abstract class MmapLongIdIndex extends DumpIndex implements UniqueC private static final Logger _log = LoggerFactory.getLogger(MmapLongIdIndex.class); + private static final boolean PARANOIA_MODE = true; + private static final VarHandle LONG_ARRAY_ACCESS = sequenceLayout(JAVA_LONG).varHandle(long.class, sequenceElement()); public static MmapLongIdIndex forClosedRange( Dump dump, String fieldName, long minKey, long maxKey ) throws NoSuchFieldException { @@ -95,7 +100,7 @@ private static void longArraySetVolatile( MemorySegment array, long index, long protected final long _minKey; protected final long _maxKey; - private final HeaderCorrections _headerCorrections = new HeaderCorrections(); + private final IndexCorrections _indexCorrections = new IndexCorrections(); protected FileLayout _fileLayout; private Header _header; @@ -277,9 +282,13 @@ protected boolean checkMeta() { } protected boolean checkNumKeys( Header header ) { - if ( header.getNumKeys() <= 0 ) { + if ( header.getNumKeys() < 0 ) { + return false; + } + if ( header.getNumKeys() == 0 && _dump.getDumpSize() > 0 ) { return false; } + return header.getNumKeys() <= header.getTableBytes() / Long.BYTES; } @@ -399,46 +408,28 @@ private boolean add0( E elem, long pos ) { } private void applyHeaderCorrections() { - if ( _headerCorrections.numKeys != null ) { - _header.setNumKeys(_headerCorrections.numKeys); + if ( _indexCorrections.numKeys != null ) { + _log.info("{} fixing header...", _lookupPath.getFileName()); + _header.setNumKeys(_indexCorrections.numKeys); } } - private boolean checkConsistency( Header header, MemorySegment tableSegment ) { - boolean consistent = true; - long start = System.nanoTime(); + private void applyTableCorrections() { + if ( _indexCorrections.rawContentCorrections != null ) { + _log.info("{} fixing table...", _lookupPath.getFileName()); + _indexCorrections.rawContentCorrections.forEachEntry(( index, posInfo ) -> { + longArraySet(_tableSegment, index, posInfo); + return true; + }); - long dumpSize = _dump._outputStream._n; - long numKeys = 0; - for ( long i = 0, n = capacity(tableSegment); i < n; ++i ) { - // not used during live operation, hence concurrency is not an issue - long pos = getPosAt(tableSegment, i); - if ( pos >= 0 ) { - if ( _dump._deletedPositions.contains(pos) ) { - _log.warn("{} Found deleted position! Will rebuild index.", _lookupPath.getFileName()); - consistent = false; // TODO: probably just an incomplete write during deletion, add offending positions to header corrections for later removal - } - if ( pos >= dumpSize ) { - _log.warn("{} Found position {} beyond the end of the dump file with size {}! Will rebuild index.", _lookupPath.getFileName(), pos, dumpSize); - consistent = false; - } - - ++numKeys; + if ( _header.getNumKeys() != getAllLongKeys().length ) { + throw new IllegalStateException(_lookupPath.getFileName() + " inconsistencies post-fixup, wtf"); } } + } - if ( numKeys != header.getNumKeys() ) { - _log.info("{} numKeys differ between actual table ({}) and caching header ({}), fixing.", _lookupPath.getFileName(), numKeys, header.getNumKeys()); - _headerCorrections.numKeys = numKeys; - } - - Duration duration = Duration.ofNanos(System.nanoTime() - start); - if ( consistent ) { - _log.info("{} was checked and found to be {} in {}", _lookupPath.getFileName(), "consistent", duration); - } else { - _log.warn("{} was checked and found to be {} in {}", _lookupPath.getFileName(), "inconsistent", duration); - } - return consistent; + private boolean checkConsistency( Header header, MemorySegment tableSegment ) throws IOException { + return new ConsistencyCheck(header, tableSegment).perform(); } private boolean checkHeader() { @@ -500,7 +491,7 @@ private boolean checkHeader() { } // not closed properly, or inconsistency between dump file and header state - if ( header.getOpenedTimestamp() >= header.getClosedTimestamp() || header.getNumKeys() == 0 && _dump.getDumpSize() > 0 ) { + if ( header.getOpenedTimestamp() >= header.getClosedTimestamp() ) { _log.info("{} was not closed properly, checking consistency...", _lookupPath.getFileName()); checkRequired = true; } @@ -511,6 +502,11 @@ private boolean checkHeader() { checkRequired = true; } + if ( PARANOIA_MODE && !checkRequired ) { + _log.info("{} hardcoded paranoia mode enabled, checking consistency...", _lookupPath.getFileName()); + checkRequired = true; + } + if ( checkRequired ) { MemorySegment tableSegment = MemorySegment.mapFile(_lookupPath, header.getTableOffset(), header.getTableBytes(), READ_ONLY, autoClosedScope); if ( !checkConsistency(header, tableSegment) ) { @@ -522,8 +518,8 @@ private boolean checkHeader() { return true; } - catch ( Exception e ) { - throw new RuntimeException(e); + catch ( Exception argh ) { + throw new RuntimeException(argh); } } @@ -647,11 +643,14 @@ private void openExisting() throws IOException { openHeader(mapHeaderSegment()); mapTableSegment(); + + applyTableCorrections(); } private void openHeader( MemorySegment headerSegment ) { _header = new Header(_fileLayout, headerSegment); _header.setOpenedTimestamp(System.currentTimeMillis()); + applyHeaderCorrections(); } @@ -941,7 +940,8 @@ static final class OpenRangeMmapLongIdIndex extends MmapLongIdIndex { private static long deriveMaxKeyFrom( long minKey ) { // this basically rotates the zero-index of the array, just like simple index addition/subtraction does during offset compensation - return Long.MAX_VALUE + minKey; + // return Long.MAX_VALUE + minKey; // this totally screws up the < comparison + return minKey < 0 ? Long.MAX_VALUE + minKey : Long.MAX_VALUE; } private final Object _growLock = new Object(); @@ -983,10 +983,18 @@ private void ensureTableContains( long index ) { } - private static final class HeaderCorrections { + private static final class IndexCorrections { Long numKeys; + TLongLongMap rawContentCorrections; // no key or pos offsets here, just write values to array indexes verbatim + + TLongLongMap rawContentCorrections() { + if ( rawContentCorrections == null ) { + rawContentCorrections = new TLongLongHashMap(); + } + return rawContentCorrections; + } } @@ -1032,4 +1040,194 @@ public long getLayoutVersion() { } } + + + private final class ConsistencyCheck { + + private final Header _header; + private final MemorySegment _tableSegment; + private final long _segmentCapacity; + + private boolean _consistent = true; + + private long _numKeysInIndex; + + public ConsistencyCheck( Header header, MemorySegment tableSegment ) { + _header = header; + _tableSegment = tableSegment; + _segmentCapacity = capacity(_tableSegment); + } + + public boolean perform() throws IOException { + long start = System.nanoTime(); + + preloadSegment(); + checkNumKeys(); + + if ( PARANOIA_MODE ) { + Map originalCache = _dump._cache; + if ( originalCache != null ) { // otherwise things are uninitialized and lead to NPEs + _dump.setCache(new LRUCache<>((int)_numKeysInIndex * 2, 0.5f)); // temporarily increase cache + } + try (ResourceScope scope = ResourceScope.newConfinedScope()) { + MemorySegment dumpSegment = preloadDump(_dump, scope); + + checkDumpElements(); + checkIndexElements(); + + _log.info("{} releasing preload mapping of size {}", _lookupPath.getFileName(), dumpSegment.byteSize()); + } + finally { + _dump.setCache(originalCache); // restore original configuration + } + } + + Duration duration = Duration.ofNanos(System.nanoTime() - start); + if ( _consistent ) { + _log.info("{} was checked and found to be {} in {}", _lookupPath.getFileName(), "consistent", duration); + } else { + _log.warn("{} was checked and found to be {} in {}", _lookupPath.getFileName(), "inconsistent", duration); + } + + return _consistent; + } + + private void checkDumpElements() { + long start = System.nanoTime(); + long numKeysInDump = 0; + + long maxKey = _header.getMaxKey(); + DumpIterator iterator = _dump.iterator(); + while ( iterator.hasNext() ) { + ++numKeysInDump; + + E element = iterator.next(); + + long elementKey = keyFor(element); + long elementIndex = keyOffsetApply(elementKey); // intentionally not bounds-checked + + // not used during live operation, hence concurrency is not an issue + long posInIndex = getPosAt(_tableSegment, elementIndex); + long posInDump = iterator.getPosition(); + + if ( elementKey > maxKey ) { + _consistent = false; + _log.warn("{} Dump contains element with key {}, but upper bound for key is {}! Will rebuild index.", _lookupPath.getFileName(), elementKey, + maxKey); + } else if ( elementIndex >= _segmentCapacity ) { + _log.info( + "{} Dump contains element with key {}, but segment capacity is {}. Has probably been added to dump already, but not yet to index; fixing.", + _lookupPath.getFileName(), elementKey, _segmentCapacity); + _indexCorrections.rawContentCorrections().put(keyOffsetApply(elementKey), posOffsetApply(posInDump)); + } + + if ( posInIndex != posInDump ) { + _consistent = false; + _log.warn("{} Index claims element with key {} to be at position {}, but dump insists on {}! Will rebuild index.", _lookupPath.getFileName(), + elementKey, posInIndex, posInDump); + } + } + if ( _numKeysInIndex != numKeysInDump ) { + _log.warn("{} numKeys differ between index ({}) and dump ({}), fixing.", _lookupPath.getFileName(), _numKeysInIndex, numKeysInDump); + _indexCorrections.numKeys = numKeysInDump; + } + + Duration duration = Duration.ofNanos(System.nanoTime() - start); + _log.info("{} was checked against dump iteration in {}", _lookupPath.getFileName(), duration); + } + + private void checkIndexElements() { + long start = System.nanoTime(); + for ( long arrayIndex = 0; arrayIndex < _segmentCapacity; ++arrayIndex ) { + // not used during live operation, hence concurrency is not an issue + long position = getPosAt(_tableSegment, arrayIndex); + if ( position >= 0 ) { + try { + E element = _dump.get(position); + + if ( element == null ) { + _consistent = false; + _log.error("{} This is weird! Found position {} not to be deleted, but dump still returns null! Will rebuild index.", + _lookupPath.getFileName(), position); + } else { + long arrayKey = keyOffsetRevert(arrayIndex); + + long elementKey = keyFor(element); + long elementIndex = keyOffsetApply(elementKey); // intentionally not bounds-checked + + if ( elementIndex != arrayIndex ) { + _consistent = false; + _log.warn( + "{} Found position {} for key {} at index {}, but corresponding element from dump with key {} belongs at index {}! Will rebuild index.", + _lookupPath.getFileName(), position, arrayKey, arrayIndex, elementKey, elementIndex); + } + } + } + catch ( Exception argh ) { + _consistent = false; + _log.warn("{} Caught exception trying to get element at pos {} from dump! Will rebuild index.", _lookupPath.getFileName(), position, argh); + } + } + } + + Duration duration = Duration.ofNanos(System.nanoTime() - start); + _log.info("{} was checked against dump lookups in {}", _lookupPath.getFileName(), duration); + } + + private void checkNumKeys() { + long start = System.nanoTime(); + long numKeysInIndex = 0; + + long dumpSize = _dump.getDumpSize(); + for ( long arrayIndex = 0; arrayIndex < _segmentCapacity; ++arrayIndex ) { + long position = getPosAt(_tableSegment, arrayIndex); + if ( position >= 0 ) { + ++numKeysInIndex; + + if ( position >= dumpSize ) { + _consistent = false; + _log.warn("{} Found position {} beyond the end of the dump file with size {}! Will rebuild index.", _lookupPath.getFileName(), position, + dumpSize); + } + + if ( _dump._deletedPositions.contains(position) ) { + _log.warn("{} Found deleted position {} at index {}. Has probably been deleted from dump already, but not yet from index; fixing.", + _lookupPath.getFileName(), position, arrayIndex); + _indexCorrections.rawContentCorrections().put(arrayIndex, 0); + } + } + } + if ( _header.getNumKeys() != numKeysInIndex ) { + _log.info("{} numKeys differ between caching header ({}) and bare count in actual table ({}), fixing.", _lookupPath.getFileName(), + _header.getNumKeys(), numKeysInIndex); + _indexCorrections.numKeys = numKeysInIndex; + } + + _numKeysInIndex = numKeysInIndex; + Duration duration = Duration.ofNanos(System.nanoTime() - start); + _log.info("{} had its contents checked in {}", _lookupPath.getFileName(), duration); + } + + private MemorySegment preloadDump( Dump dump, ResourceScope scope ) throws IOException { + long start = System.nanoTime(); + + Path dumpPath = Paths.get(dump._dumpFile.getPath()); + long dumpSize = dump.getDumpSize(); + MemorySegment dumpSegment = MemorySegment.mapFile(dumpPath, 0, dumpSize, READ_ONLY, scope); + dumpSegment.load(); // force-fetch into memory + + Duration duration = Duration.ofNanos(System.nanoTime() - start); + _log.info("{} was mapped and preloaded in {}", dumpPath.getFileName(), duration); + return dumpSegment; + } + + private void preloadSegment() { + long start = System.nanoTime(); + + _tableSegment.load(); // force-fetch into memory + + Duration duration = Duration.ofNanos(System.nanoTime() - start); + _log.info("{} was preloaded in {}", _lookupPath.getFileName(), duration); + } + } } From 3c3ee8670126a756a8939d2dfa27b3f158c8a08f Mon Sep 17 00:00:00 2001 From: Christian Trefzer Date: Mon, 16 Oct 2023 12:28:31 +0200 Subject: [PATCH 06/10] UniqueIndexTest, MmapLongIdIndexTest: take effective constant out of the loop --- dump/test/util/dump/MmapLongIdIndexTest.java | 3 ++- dump/test/util/dump/UniqueIndexTest.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/dump/test/util/dump/MmapLongIdIndexTest.java b/dump/test/util/dump/MmapLongIdIndexTest.java index d6e1f03..b2efb3a 100644 --- a/dump/test/util/dump/MmapLongIdIndexTest.java +++ b/dump/test/util/dump/MmapLongIdIndexTest.java @@ -358,11 +358,12 @@ private void fillDump( Dump dump ) throws IOException { for ( int i = 0; i < BEAN_SIZE - 15; i++ ) { // 15 is an estimation for the size of the Bean instance without this padding sb.append('0'); } + String padding = sb.toString(); /* add some elements */ long t = System.currentTimeMillis(); for ( int i = 0; i < _dumpSize; i++ ) { - dump.add(new Bean(i, i + sb.toString())); + dump.add(new Bean(i, i + padding)); } System.out.println("Written " + _dumpSize + " instances to dump. Needed " + (System.currentTimeMillis() - t) / (float)_dumpSize + " ms/instance."); } diff --git a/dump/test/util/dump/UniqueIndexTest.java b/dump/test/util/dump/UniqueIndexTest.java index 4746e85..8e4103c 100644 --- a/dump/test/util/dump/UniqueIndexTest.java +++ b/dump/test/util/dump/UniqueIndexTest.java @@ -382,11 +382,12 @@ private void fillDump( Dump dump ) throws IOException { for ( int i = 0; i < BEAN_SIZE - 15; i++ ) { // 15 is an estimation for the size of the Bean instance without this padding sb.append('0'); } + String padding = sb.toString(); /* add some elements */ long t = System.currentTimeMillis(); for ( int i = 0; i < _dumpSize; i++ ) { - dump.add(new Bean(i, i + sb.toString())); + dump.add(new Bean(i, i + padding)); } System.out.println("Written " + _dumpSize + " instances to dump. Needed " + (System.currentTimeMillis() - t) / (float)_dumpSize + " ms/instance."); } From ee20827f7ff564ed92d98f2b2c080900a3ab7b74 Mon Sep 17 00:00:00 2001 From: Christian Trefzer Date: Fri, 12 Jan 2024 17:36:47 +0100 Subject: [PATCH 07/10] remove redundant and insanely expensive paranoia integrity check --- dump/src/util/dump/MmapLongIdIndex.java | 55 ++----------------------- 1 file changed, 4 insertions(+), 51 deletions(-) diff --git a/dump/src/util/dump/MmapLongIdIndex.java b/dump/src/util/dump/MmapLongIdIndex.java index 5a26c08..826ed87 100644 --- a/dump/src/util/dump/MmapLongIdIndex.java +++ b/dump/src/util/dump/MmapLongIdIndex.java @@ -35,7 +35,6 @@ import jdk.incubator.foreign.MemoryLayout.PathElement; import jdk.incubator.foreign.MemorySegment; import jdk.incubator.foreign.ResourceScope; -import util.dump.cache.LRUCache; import util.dump.reflection.FieldAccessor; import util.dump.reflection.FieldFieldAccessor; import util.dump.reflection.Reflection; @@ -1062,24 +1061,16 @@ public boolean perform() throws IOException { long start = System.nanoTime(); preloadSegment(); - checkNumKeys(); + checkIndexContents(); if ( PARANOIA_MODE ) { - Map originalCache = _dump._cache; - if ( originalCache != null ) { // otherwise things are uninitialized and lead to NPEs - _dump.setCache(new LRUCache<>((int)_numKeysInIndex * 2, 0.5f)); // temporarily increase cache - } try (ResourceScope scope = ResourceScope.newConfinedScope()) { MemorySegment dumpSegment = preloadDump(_dump, scope); - checkDumpElements(); - checkIndexElements(); + checkDumpContents(); _log.info("{} releasing preload mapping of size {}", _lookupPath.getFileName(), dumpSegment.byteSize()); } - finally { - _dump.setCache(originalCache); // restore original configuration - } } Duration duration = Duration.ofNanos(System.nanoTime() - start); @@ -1092,7 +1083,7 @@ public boolean perform() throws IOException { return _consistent; } - private void checkDumpElements() { + private void checkDumpContents() { long start = System.nanoTime(); long numKeysInDump = 0; @@ -1136,45 +1127,7 @@ private void checkDumpElements() { _log.info("{} was checked against dump iteration in {}", _lookupPath.getFileName(), duration); } - private void checkIndexElements() { - long start = System.nanoTime(); - for ( long arrayIndex = 0; arrayIndex < _segmentCapacity; ++arrayIndex ) { - // not used during live operation, hence concurrency is not an issue - long position = getPosAt(_tableSegment, arrayIndex); - if ( position >= 0 ) { - try { - E element = _dump.get(position); - - if ( element == null ) { - _consistent = false; - _log.error("{} This is weird! Found position {} not to be deleted, but dump still returns null! Will rebuild index.", - _lookupPath.getFileName(), position); - } else { - long arrayKey = keyOffsetRevert(arrayIndex); - - long elementKey = keyFor(element); - long elementIndex = keyOffsetApply(elementKey); // intentionally not bounds-checked - - if ( elementIndex != arrayIndex ) { - _consistent = false; - _log.warn( - "{} Found position {} for key {} at index {}, but corresponding element from dump with key {} belongs at index {}! Will rebuild index.", - _lookupPath.getFileName(), position, arrayKey, arrayIndex, elementKey, elementIndex); - } - } - } - catch ( Exception argh ) { - _consistent = false; - _log.warn("{} Caught exception trying to get element at pos {} from dump! Will rebuild index.", _lookupPath.getFileName(), position, argh); - } - } - } - - Duration duration = Duration.ofNanos(System.nanoTime() - start); - _log.info("{} was checked against dump lookups in {}", _lookupPath.getFileName(), duration); - } - - private void checkNumKeys() { + private void checkIndexContents() { long start = System.nanoTime(); long numKeysInIndex = 0; From 534e8d7dc65eefef6f33bc6d1163dadc9607eb98 Mon Sep 17 00:00:00 2001 From: Christian Trefzer Date: Tue, 21 Oct 2025 17:01:02 +0200 Subject: [PATCH 08/10] TECH: port Project Panama from j17 incubator API to j21 preview --- dump/src/util/dump/MmapLongIdIndex.java | 296 +++++++++++++----------- pom.xml | 8 +- 2 files changed, 163 insertions(+), 141 deletions(-) diff --git a/dump/src/util/dump/MmapLongIdIndex.java b/dump/src/util/dump/MmapLongIdIndex.java index 826ed87..f10b936 100644 --- a/dump/src/util/dump/MmapLongIdIndex.java +++ b/dump/src/util/dump/MmapLongIdIndex.java @@ -1,20 +1,25 @@ package util.dump; -import static java.nio.ByteOrder.BIG_ENDIAN; +import static java.lang.foreign.MemoryLayout.PathElement.groupElement; +import static java.lang.foreign.MemoryLayout.PathElement.sequenceElement; +import static java.lang.foreign.MemoryLayout.sequenceLayout; +import static java.lang.foreign.MemoryLayout.structLayout; +import static java.lang.foreign.ValueLayout.JAVA_LONG; import static java.nio.channels.FileChannel.MapMode.READ_ONLY; import static java.nio.channels.FileChannel.MapMode.READ_WRITE; import static java.nio.file.StandardOpenOption.CREATE_NEW; +import static java.nio.file.StandardOpenOption.READ; import static java.nio.file.StandardOpenOption.SPARSE; import static java.nio.file.StandardOpenOption.WRITE; -import static jdk.incubator.foreign.MemoryLayout.PathElement.groupElement; -import static jdk.incubator.foreign.MemoryLayout.PathElement.sequenceElement; -import static jdk.incubator.foreign.MemoryLayout.sequenceLayout; -import static jdk.incubator.foreign.MemoryLayout.structLayout; -import static jdk.incubator.foreign.MemoryLayouts.JAVA_LONG; import java.io.File; import java.io.IOException; +import java.lang.foreign.Arena; +import java.lang.foreign.GroupLayout; +import java.lang.foreign.MemoryLayout; +import java.lang.foreign.MemorySegment; import java.lang.invoke.VarHandle; +import java.nio.channels.FileChannel; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -30,11 +35,6 @@ import gnu.trove.list.array.TLongArrayList; import gnu.trove.map.TLongLongMap; import gnu.trove.map.hash.TLongLongHashMap; -import jdk.incubator.foreign.GroupLayout; -import jdk.incubator.foreign.MemoryLayout; -import jdk.incubator.foreign.MemoryLayout.PathElement; -import jdk.incubator.foreign.MemorySegment; -import jdk.incubator.foreign.ResourceScope; import util.dump.reflection.FieldAccessor; import util.dump.reflection.FieldFieldAccessor; import util.dump.reflection.Reflection; @@ -48,14 +48,13 @@ *

* Requires compiler and runtime parameter --add-modules=jdk.incubator.foreign in order to work. */ +@SuppressWarnings({ "preview", "Since15" }) public abstract class MmapLongIdIndex extends DumpIndex implements UniqueConstraint { private static final Logger _log = LoggerFactory.getLogger(MmapLongIdIndex.class); private static final boolean PARANOIA_MODE = true; - private static final VarHandle LONG_ARRAY_ACCESS = sequenceLayout(JAVA_LONG).varHandle(long.class, sequenceElement()); - public static MmapLongIdIndex forClosedRange( Dump dump, String fieldName, long minKey, long maxKey ) throws NoSuchFieldException { return new ClosedRangeMmapLongIdIndex<>(dump, fieldName, minKey, maxKey); } @@ -76,22 +75,6 @@ private static boolean isPowerOfTwo( long n ) { return n > 0L && (n & n - 1L) == 0L; } - private static long longArrayGet( MemorySegment array, long index ) { - return (long)LONG_ARRAY_ACCESS.get(array, index); - } - - private static long longArrayGetVolatile( MemorySegment array, long index ) { - return (long)LONG_ARRAY_ACCESS.getVolatile(array, index); - } - - private static void longArraySet( MemorySegment array, long index, long pos ) { - LONG_ARRAY_ACCESS.set(array, index, pos); - } - - private static void longArraySetVolatile( MemorySegment array, long index, long pos ) { - LONG_ARRAY_ACCESS.setVolatile(array, index, pos); - } - protected final ToLongFunction _getKey; protected final Path _lookupPath; @@ -101,11 +84,13 @@ private static void longArraySetVolatile( MemorySegment array, long index, long private final IndexCorrections _indexCorrections = new IndexCorrections(); - protected FileLayout _fileLayout; - private Header _header; + protected FileLayout _fileLayout; + private FileChannel _readWriteFileChannel; + private Header _header; private MemorySegment _tableSegment; - private volatile long _tableCapacity; + private long _tableCapacity; + private volatile VarHandle _longArrayAccess; private MmapLongIdIndex( Dump dump, String fieldName, long minKey, long maxKey ) throws NoSuchFieldException { this(dump, new FieldFieldAccessor(Reflection.getField(dump._beanClass, fieldName)), minKey, maxKey); @@ -170,6 +155,10 @@ public void close() throws IOException { flushTable(); closeHeader(); + if ( _readWriteFileChannel != null ) { + _readWriteFileChannel.close(); + } + super.close(); _log.info("{} closed.", _lookupPath.getFileName()); } @@ -272,7 +261,7 @@ public E lookup( Object key ) { } protected long capacity( MemorySegment segment ) { - return segment.byteSize() / Long.BYTES; + return segment.byteSize() / JAVA_LONG.byteSize(); } @Override @@ -436,86 +425,88 @@ private boolean checkHeader() { return false; } - try (ResourceScope autoClosedScope = ResourceScope.newConfinedScope()) { + try (Arena arena = Arena.ofConfined()) { long fileSize = Files.size(_lookupPath); if ( fileSize < LeadIn.byteSize() ) { _log.warn("{} file is too small to contain even the header. Will rebuild index.", _lookupPath.getFileName()); return false; } - LeadIn leadIn = new LeadIn(MemorySegment.mapFile(_lookupPath, 0, LeadIn.byteSize(), READ_ONLY, autoClosedScope)); + try (FileChannel readonlyFileChannel = FileChannel.open(_lookupPath, READ)) { + LeadIn leadIn = new LeadIn(readonlyFileChannel.map(READ_ONLY, 0, LeadIn.byteSize(), arena)); - if ( leadIn.getFileMagic() != LeadIn.FILE_MAGIC ) { - _log.warn("{} has wrong file magic. Will rebuild index.", _lookupPath.getFileName()); - return false; - } + if ( leadIn.getFileMagic() != LeadIn.FILE_MAGIC ) { + _log.warn("{} has wrong file magic. Will rebuild index.", _lookupPath.getFileName()); + return false; + } - boolean checkRequired = false; + boolean checkRequired = false; - // version or size unset, unknown, or mismatching - if ( !Header.isReadable(leadIn.getLayoutVersion()) ) { - _log.warn("{} has unknown header version {}. Will rebuild index.", _lookupPath.getFileName(), leadIn.getLayoutVersion()); - return false; - } + // version or size unset, unknown, or mismatching + if ( !Header.isReadable(leadIn.getLayoutVersion()) ) { + _log.warn("{} has unknown header version {}. Will rebuild index.", _lookupPath.getFileName(), leadIn.getLayoutVersion()); + return false; + } - // pin everything to current one-and-only version - if ( leadIn.getLayoutVersion() != Header.getCurrentLayout().layoutVersion() ) { - _log.warn("{} has mismatching header version. Will rebuild index.", _lookupPath.getFileName()); - return false; - } + // pin everything to current one-and-only version + if ( leadIn.getLayoutVersion() != Header.getCurrentLayout().layoutVersion() ) { + _log.warn("{} has mismatching header version. Will rebuild index.", _lookupPath.getFileName()); + return false; + } - FileLayout fileLayout = Header.layoutByVersion(leadIn.getLayoutVersion()); + FileLayout fileLayout = Header.layoutByVersion(leadIn.getLayoutVersion()); - if ( leadIn.getHeaderBytes() < fileLayout.headerBytes() ) { - _log.warn("{} has mismatching header size information. Will rebuild index.", _lookupPath.getFileName()); - return false; - } + if ( leadIn.getHeaderBytes() < fileLayout.headerBytes() ) { + _log.warn("{} has mismatching header size information. Will rebuild index.", _lookupPath.getFileName()); + return false; + } - Header header = new Header(fileLayout, MemorySegment.mapFile(_lookupPath, 0, leadIn.getHeaderBytes(), READ_ONLY, autoClosedScope)); + Header header = new Header(fileLayout, readonlyFileChannel.map(READ_ONLY, 0, leadIn.getHeaderBytes(), arena)); - if ( header.getTableOffset() <= 0 || !isPowerOfTwo(header.getTableOffset()) ) { - _log.warn("{} has inconsistent alignment information. Will rebuild index.", _lookupPath.getFileName()); - return false; - } - // file size mismatch - if ( header.getTableOffset() + header.getTableBytes() != fileSize ) { - _log.warn("{} has mismatching file size information. Will rebuild index.", _lookupPath.getFileName()); - return false; - } + if ( header.getTableOffset() <= 0 || !isPowerOfTwo(header.getTableOffset()) ) { + _log.warn("{} has inconsistent alignment information. Will rebuild index.", _lookupPath.getFileName()); + return false; + } + // file size mismatch + if ( header.getTableOffset() + header.getTableBytes() != fileSize ) { + _log.warn("{} has mismatching file size information. Will rebuild index.", _lookupPath.getFileName()); + return false; + } - // configuration changes - if ( header.getMinKey() != _minKey || header.getMaxKey() != _maxKey ) { - _log.warn("{} has mismatching key bounds. Will rebuild index.", _lookupPath.getFileName()); - return false; - } + // configuration changes + if ( header.getMinKey() != _minKey || header.getMaxKey() != _maxKey ) { + _log.warn("{} has mismatching key bounds. Will rebuild index.", _lookupPath.getFileName()); + return false; + } - // not closed properly, or inconsistency between dump file and header state - if ( header.getOpenedTimestamp() >= header.getClosedTimestamp() ) { - _log.info("{} was not closed properly, checking consistency...", _lookupPath.getFileName()); - checkRequired = true; - } + // not closed properly, or inconsistency between dump file and header state + if ( header.getOpenedTimestamp() >= header.getClosedTimestamp() ) { + _log.info("{} was not closed properly, checking consistency...", _lookupPath.getFileName()); + checkRequired = true; + } - // plausibility checks - if ( !checkNumKeys(header) ) { - _log.info("{} has stored implausible numKeys, checking consistency...", _lookupPath.getFileName()); - checkRequired = true; - } + // plausibility checks + if ( !checkNumKeys(header) ) { + _log.info("{} has stored implausible numKeys, checking consistency...", _lookupPath.getFileName()); + checkRequired = true; + } - if ( PARANOIA_MODE && !checkRequired ) { - _log.info("{} hardcoded paranoia mode enabled, checking consistency...", _lookupPath.getFileName()); - checkRequired = true; - } + if ( PARANOIA_MODE && !checkRequired ) { + _log.info("{} hardcoded paranoia mode enabled, checking consistency...", _lookupPath.getFileName()); + checkRequired = true; + } - if ( checkRequired ) { - MemorySegment tableSegment = MemorySegment.mapFile(_lookupPath, header.getTableOffset(), header.getTableBytes(), READ_ONLY, autoClosedScope); - if ( !checkConsistency(header, tableSegment) ) { - return false; + if ( checkRequired ) { + MemorySegment tableSegment = readonlyFileChannel.map(READ_ONLY, header.getTableOffset(), header.getTableBytes(), arena); + if ( !checkConsistency(header, tableSegment) ) { + return false; + } } - } - _fileLayout = fileLayout; // store this, so we won't have to restart at the lead-in + _fileLayout = fileLayout; // store this, so we won't have to restart at the lead-in - return true; + return true; + } } catch ( Exception argh ) { throw new RuntimeException(argh); @@ -629,13 +620,31 @@ private long keyOffsetRevert( long arrayIndex ) { return arrayIndex + _minKey; // real key } + private long longArrayGet( MemorySegment array, long index ) { + return (long)_longArrayAccess.get(array, index); + } + + private long longArrayGetVolatile( MemorySegment array, long index ) { + return (long)_longArrayAccess.getVolatile(array, index); + } + + private void longArraySet( MemorySegment array, long index, long pos ) { + _longArrayAccess.set(array, index, pos); + } + + private void longArraySetVolatile( MemorySegment array, long index, long pos ) { + _longArrayAccess.setVolatile(array, index, pos); + } + private MemorySegment mapHeaderSegment() throws IOException { - return MemorySegment.mapFile(_lookupPath, 0, _fileLayout.headerBytes(), READ_WRITE, ResourceScope.newImplicitScope()); + _readWriteFileChannel = FileChannel.open(_lookupPath, READ, WRITE); + return _readWriteFileChannel.map(READ_WRITE, 0, _fileLayout.headerBytes(), Arena.ofAuto()); } private void mapTableSegment() throws IOException { - _tableSegment = MemorySegment.mapFile(_lookupPath, _header.getTableOffset(), _header.getTableBytes(), READ_WRITE, ResourceScope.newImplicitScope()); + _tableSegment = _readWriteFileChannel.map(READ_WRITE, _header.getTableOffset(), _header.getTableBytes(), Arena.ofAuto()); _tableCapacity = capacity(_tableSegment); + _longArrayAccess = sequenceLayout(_tableCapacity, JAVA_LONG).varHandle(sequenceElement()); } private void openExisting() throws IOException { @@ -734,9 +743,6 @@ protected static final class Header { JAVA_LONG.withName("tableOffset"), // - JAVA_LONG.withName("minKey"), // - JAVA_LONG.withName("maxKey"), // - MemoryLayout.paddingLayout(Arch.AmdZen.INSTANCE.cacheLineBytes()), // keep things cache-line-aligned // these change during open/close @@ -754,8 +760,11 @@ protected static final class Header { // changes whenever keys are added/removed JAVA_LONG.withName("numKeys"), // + JAVA_LONG.withName("minKey"), // + JAVA_LONG.withName("maxKey"), // + MemoryLayout.paddingLayout(Arch.AmdZen.INSTANCE.cacheLineBytes()) // keep things cache-line-aligned - ).withBitAlignment(8 * Arch.AmdZen.INSTANCE.cacheLineBytes()).withName("headerLayoutV1")) // + ).withByteAlignment(Arch.AmdZen.INSTANCE.cacheLineBytes()).withName("headerLayoutV1")) // .sanityCheck(); static { @@ -777,9 +786,7 @@ public static FileLayout layoutByVersion( long version ) { private final MemorySegment _memorySegment; - private final VarHandle _fileMagic; - private final VarHandle _layoutVersion; - private final VarHandle _headerBytes; + private final LeadIn _leadIn; private final VarHandle _tableOffset; @@ -799,22 +806,19 @@ public Header( FileLayout fileLayout, MemorySegment memorySegment ) { _memorySegment = memorySegment; - PathElement leadIn = groupElement("leadIn"); - _fileMagic = layout.varHandle(long.class, leadIn, groupElement("fileMagic")); - _layoutVersion = layout.varHandle(long.class, leadIn, groupElement("layoutVersion")); - _headerBytes = layout.varHandle(long.class, leadIn, groupElement("headerBytes")); + _leadIn = new LeadIn(memorySegment); - _tableOffset = layout.varHandle(long.class, groupElement("tableOffset")); + _tableOffset = layout.varHandle(groupElement("tableOffset")); - _minKey = layout.varHandle(long.class, groupElement("minKey")); - _maxKey = layout.varHandle(long.class, groupElement("maxKey")); + _minKey = layout.varHandle(groupElement("minKey")); + _maxKey = layout.varHandle(groupElement("maxKey")); - _openedTimestamp = layout.varHandle(long.class, groupElement("openedTimestamp")); - _closedTimestamp = layout.varHandle(long.class, groupElement("closedTimestamp")); + _openedTimestamp = layout.varHandle(groupElement("openedTimestamp")); + _closedTimestamp = layout.varHandle(groupElement("closedTimestamp")); - _tableBytes = layout.varHandle(long.class, groupElement("tableBytes")); + _tableBytes = layout.varHandle(groupElement("tableBytes")); - _numKeys = layout.varHandle(long.class, groupElement("numKeys")); + _numKeys = layout.varHandle(groupElement("numKeys")); } public void flush() { @@ -858,15 +862,15 @@ public void setClosedTimestamp( long closedTimestamp ) { } public void setFileMagic( long fileMagic ) { - setVolatile(_fileMagic, fileMagic); + _leadIn.setFileMagic(fileMagic); } public void setHeaderBytes( long headerBytes ) { - setVolatile(_headerBytes, headerBytes); + _leadIn.setHeaderBytes(headerBytes); } public void setLayoutVersion( long layoutVersion ) { - setVolatile(_layoutVersion, layoutVersion); + _leadIn.setLayoutVersion(layoutVersion); } public void setMaxKey( long maxKey ) { @@ -1000,44 +1004,57 @@ TLongLongMap rawContentCorrections() { /** * Helper for reading in the bare minimum from existing files. */ - private static final class LeadIn { + private record LeadIn(MemorySegment _memorySegment) { public static final long FILE_MAGIC = 0x6861696C756C6672L; private static final GroupLayout LAYOUT = structLayout( // - JAVA_LONG.withName("fileMagic").withOrder(BIG_ENDIAN), // + JAVA_LONG.withName("fileMagic"), // JAVA_LONG.withName("layoutVersion"), // JAVA_LONG.withName("headerBytes") // - ).withBitAlignment(8 * Arch.AmdZen.INSTANCE.cacheLineBytes()).withName("leadIn"); + ).withByteAlignment(Arch.AmdZen.INSTANCE.cacheLineBytes()).withName("leadIn"); - private static final VarHandle _fileMagic = LAYOUT.varHandle(long.class, groupElement("fileMagic")); - private static final VarHandle _layoutVersion = LAYOUT.varHandle(long.class, groupElement("layoutVersion")); - private static final VarHandle _headerBytes = LAYOUT.varHandle(long.class, groupElement("headerBytes")); + private static final VarHandle _fileMagic = LAYOUT.varHandle(groupElement("fileMagic")); + private static final VarHandle _layoutVersion = LAYOUT.varHandle(groupElement("layoutVersion")); + private static final VarHandle _headerBytes = LAYOUT.varHandle(groupElement("headerBytes")); public static long byteSize() { return LAYOUT.byteSize(); // minimum read length: header version and bytes } - private final MemorySegment _memorySegment; - - public LeadIn( MemorySegment memorySegment ) { - _memorySegment = memorySegment; - } - public long getFileMagic() { - return (Long)_fileMagic.get(_memorySegment); + return get(_fileMagic); } public long getHeaderBytes() { - return (Long)_headerBytes.get(_memorySegment); + return get(_headerBytes); } public long getLayoutVersion() { - return (Long)_layoutVersion.get(_memorySegment); + return get(_layoutVersion); + } + + public void setFileMagic( long fileMagic ) { + setVolatile(_fileMagic, fileMagic); } + public void setHeaderBytes( long headerBytes ) { + setVolatile(_headerBytes, headerBytes); + } + + public void setLayoutVersion( long layoutVersion ) { + setVolatile(_layoutVersion, layoutVersion); + } + + private long get( VarHandle varHandle ) { + return (long)varHandle.get(_memorySegment); + } + + private void setVolatile( VarHandle varHandle, long value ) { + varHandle.setVolatile(_memorySegment, value); + } } @@ -1055,6 +1072,8 @@ public ConsistencyCheck( Header header, MemorySegment tableSegment ) { _header = header; _tableSegment = tableSegment; _segmentCapacity = capacity(_tableSegment); + + _longArrayAccess = sequenceLayout(_segmentCapacity, JAVA_LONG).varHandle(sequenceElement()); } public boolean perform() throws IOException { @@ -1064,8 +1083,8 @@ public boolean perform() throws IOException { checkIndexContents(); if ( PARANOIA_MODE ) { - try (ResourceScope scope = ResourceScope.newConfinedScope()) { - MemorySegment dumpSegment = preloadDump(_dump, scope); + try (Arena arena = Arena.ofConfined()) { + MemorySegment dumpSegment = preloadDump(_dump, arena); checkDumpContents(); @@ -1161,17 +1180,20 @@ private void checkIndexContents() { _log.info("{} had its contents checked in {}", _lookupPath.getFileName(), duration); } - private MemorySegment preloadDump( Dump dump, ResourceScope scope ) throws IOException { + private MemorySegment preloadDump( Dump dump, Arena arena ) throws IOException { long start = System.nanoTime(); Path dumpPath = Paths.get(dump._dumpFile.getPath()); long dumpSize = dump.getDumpSize(); - MemorySegment dumpSegment = MemorySegment.mapFile(dumpPath, 0, dumpSize, READ_ONLY, scope); - dumpSegment.load(); // force-fetch into memory - Duration duration = Duration.ofNanos(System.nanoTime() - start); - _log.info("{} was mapped and preloaded in {}", dumpPath.getFileName(), duration); - return dumpSegment; + try (FileChannel readOnlyFileChannel = FileChannel.open(dumpPath, READ)) { + MemorySegment dumpSegment = readOnlyFileChannel.map(READ_ONLY, 0, dumpSize, arena); + dumpSegment.load(); // force-fetch into memory + + Duration duration = Duration.ofNanos(System.nanoTime() - start); + _log.info("{} was mapped and preloaded in {}", dumpPath.getFileName(), duration); + return dumpSegment; + } } private void preloadSegment() { diff --git a/pom.xml b/pom.xml index 64244a7..416de2d 100644 --- a/pom.xml +++ b/pom.xml @@ -25,10 +25,10 @@ maven-compiler-plugin UTF-8 - 17 - 17 + 21 + 21 - --add-modules=jdk.incubator.foreign + --enable-preview @@ -50,7 +50,7 @@ --add-opens java.base/java.util=ALL-UNNAMED - --add-modules=jdk.incubator.foreign + --enable-preview From cac43d397960c902c7181483dd35d3889a1df440 Mon Sep 17 00:00:00 2001 From: Christian Trefzer Date: Wed, 22 Oct 2025 13:36:01 +0200 Subject: [PATCH 09/10] TECH: port Project Panama from j21 preview API to j22 stable The v22 implementation has a bug though, so the minimum working runtime version is 23, actually. See https://bugs.openjdk.org/browse/JDK-8331734 --- dump/src/util/dump/MmapLongIdIndex.java | 19 +++++++++---------- pom.xml | 8 ++------ 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/dump/src/util/dump/MmapLongIdIndex.java b/dump/src/util/dump/MmapLongIdIndex.java index f10b936..4a0cb09 100644 --- a/dump/src/util/dump/MmapLongIdIndex.java +++ b/dump/src/util/dump/MmapLongIdIndex.java @@ -48,7 +48,6 @@ *

* Requires compiler and runtime parameter --add-modules=jdk.incubator.foreign in order to work. */ -@SuppressWarnings({ "preview", "Since15" }) public abstract class MmapLongIdIndex extends DumpIndex implements UniqueConstraint { private static final Logger _log = LoggerFactory.getLogger(MmapLongIdIndex.class); @@ -621,19 +620,19 @@ private long keyOffsetRevert( long arrayIndex ) { } private long longArrayGet( MemorySegment array, long index ) { - return (long)_longArrayAccess.get(array, index); + return (long)_longArrayAccess.get(array, 0L, index); } private long longArrayGetVolatile( MemorySegment array, long index ) { - return (long)_longArrayAccess.getVolatile(array, index); + return (long)_longArrayAccess.getVolatile(array, 0L, index); } private void longArraySet( MemorySegment array, long index, long pos ) { - _longArrayAccess.set(array, index, pos); + _longArrayAccess.set(array, 0L, index, pos); } private void longArraySetVolatile( MemorySegment array, long index, long pos ) { - _longArrayAccess.setVolatile(array, index, pos); + _longArrayAccess.setVolatile(array, 0L, index, pos); } private MemorySegment mapHeaderSegment() throws IOException { @@ -854,7 +853,7 @@ public long getTableOffset() { } public void incrementNumKeys( long difference ) { - _numKeys.getAndAdd(_memorySegment, difference); + _numKeys.getAndAdd(_memorySegment, 0L, difference); } public void setClosedTimestamp( long closedTimestamp ) { @@ -898,11 +897,11 @@ public void setTableOffset( long tableOffset ) { } private long getVolatile( VarHandle varHandle ) { - return (long)varHandle.getVolatile(_memorySegment); + return (long)varHandle.getVolatile(_memorySegment, 0L); } private void setVolatile( VarHandle varHandle, long value ) { - varHandle.setVolatile(_memorySegment, value); + varHandle.setVolatile(_memorySegment, 0L, value); } } @@ -1049,11 +1048,11 @@ public void setLayoutVersion( long layoutVersion ) { } private long get( VarHandle varHandle ) { - return (long)varHandle.get(_memorySegment); + return (long)varHandle.get(_memorySegment, 0L); } private void setVolatile( VarHandle varHandle, long value ) { - varHandle.setVolatile(_memorySegment, value); + varHandle.setVolatile(_memorySegment, 0L, value); } } diff --git a/pom.xml b/pom.xml index 416de2d..f7401a1 100644 --- a/pom.xml +++ b/pom.xml @@ -25,11 +25,8 @@ maven-compiler-plugin UTF-8 - 21 - 21 - - --enable-preview - + 22 + 22 @@ -50,7 +47,6 @@ --add-opens java.base/java.util=ALL-UNNAMED - --enable-preview From 8c8d077b2b9cb2971b6a45eb14cbe60128030514 Mon Sep 17 00:00:00 2001 From: Christian Trefzer Date: Wed, 22 Oct 2025 14:19:56 +0200 Subject: [PATCH 10/10] test closed range index as well --- dump/test/util/dump/MmapLongIdIndexTest.java | 147 +++++++++++++------ 1 file changed, 99 insertions(+), 48 deletions(-) diff --git a/dump/test/util/dump/MmapLongIdIndexTest.java b/dump/test/util/dump/MmapLongIdIndexTest.java index b2efb3a..53fff83 100644 --- a/dump/test/util/dump/MmapLongIdIndexTest.java +++ b/dump/test/util/dump/MmapLongIdIndexTest.java @@ -10,6 +10,7 @@ import java.util.Collection; import java.util.List; import java.util.Random; +import java.util.function.BiFunction; import org.assertj.core.util.Arrays; import org.junit.After; @@ -59,6 +60,11 @@ public static void setUpTmpdir() throws IOException { private final int _dumpSize; private final long _negativeOffset; + private final BiFunction, Field, MmapLongIdIndex> _createOpenRangeIndex = // + ( dump, field ) -> MmapLongIdIndex.forOpenRange(dump, new FieldFieldAccessor(field), minId()); + private final BiFunction, Field, MmapLongIdIndex> _createClosedRangeIndex = // + ( dump, field ) -> MmapLongIdIndex.forClosedRange(dump, new FieldFieldAccessor(field), minId(), maxId()); + public MmapLongIdIndexTest( Integer dumpSize ) { _dumpSize = dumpSize; _negativeOffset = -_dumpSize; // need to cater to generated negative indexed @@ -131,84 +137,80 @@ public void testGetNumKeys() throws Exception { } } + @Test - public void testIntKeyIndex() throws Exception { + public void testIntKey_OpenRangeIndex() throws Exception { + testIntKeyIndex(_createOpenRangeIndex); + } + + @Test + public void testIntKey_ClosedRangeIndex() throws Exception { + testIntKeyIndex(_createClosedRangeIndex); + } + + private void testIntKeyIndex(BiFunction, Field, MmapLongIdIndex> createIndex) throws Exception { testIndex("_idInt", new TestConfiguration() { @Override public Object createKey( int id ) { return id; } - }); + }, createIndex); + } + + @Test + public void testLongKey_OpenRangeIndex() throws Exception { + testLongKeyIndex(_createOpenRangeIndex); } @Test - public void testLongKeyIndex() throws Exception { + public void testLongKey_ClosedRangeIndex() throws Exception { + testLongKeyIndex(_createClosedRangeIndex); + } + + private void testLongKeyIndex(BiFunction, Field, MmapLongIdIndex> createIndex) throws Exception { testIndex("_idLong", new TestConfiguration() { @Override public Object createKey( int id ) { return (long)id; } - }); + }, createIndex); + } + + @Test + public void testLongObjectKey_OpenRangeIndex() throws Exception { + testLongObjectKeyIndex(_createOpenRangeIndex); } @Test - public void testLongObjectKeyIndex() throws Exception { + public void testLongObjectKey_ClosedRangeIndex() throws Exception { + testLongObjectKeyIndex(_createClosedRangeIndex); + } + + private void testLongObjectKeyIndex(BiFunction, Field, MmapLongIdIndex> createIndex) throws Exception { testIndex("_idLongObject", new TestConfiguration() { @Override public Object createKey( int id ) { return (long)id; } - }); + }, createIndex); } @Test - public void testRecreateIndex() throws NoSuchFieldException, IOException { - File dumpFile = new File(_tmpdir, DUMP_FILENAME); - - Dump dump = new Dump<>(Bean.class, dumpFile); - try { - Field field = Reflection.getField(Bean.class, "_idInt"); - UniqueConstraint index = MmapLongIdIndex.forOpenRange(dump, new FieldFieldAccessor(field), _negativeOffset); - - validateNumKeys(dump, index); - - fillDump(dump); - - validateNumKeys(dump, index); - - dump.close(); - - System.out.println("Closing and re-opening dump, deleting index"); - Assert.assertTrue("Failed to delete index", - new File(_tmpdir, DUMP_FILENAME + "._idInt.mmap.lookup").delete() && !new File(_tmpdir, DUMP_FILENAME + "._idInt.mmap.lookup").exists()); - - dump = new Dump<>(Bean.class, dumpFile); - index = MmapLongIdIndex.forOpenRange(dump, new FieldFieldAccessor(field), _negativeOffset); - - long t = System.currentTimeMillis(); - for ( int j = 0; j < READ_NUMBER; j++ ) { - int i = _random.nextInt(_dumpSize); - Bean bean = index.lookup(i); - Assert.assertNotNull("no Bean for index " + i, bean); - Assert.assertEquals(i, bean._idInt); - Assert.assertTrue(bean._data.startsWith(i + "-")); - } - System.out.println("Read " + READ_NUMBER + " instances from dump. Needed " + (System.currentTimeMillis() - t) / (float)READ_NUMBER + " ms/instance."); + public void testRecreateIndex_ClosedRange() throws IOException, NoSuchFieldException { + testRecreateIndex(_createClosedRangeIndex); + } - Bean nonExistingBean = index.lookup(_dumpSize + 1); - Assert.assertNull(nonExistingBean); - } - finally { - dump.close(); - } + @Test + public void testRecreateIndex_OpenRange() throws IOException, NoSuchFieldException { + testRecreateIndex(_createOpenRangeIndex); } - protected void testIndex( String fieldName, TestConfiguration config ) throws Exception { + protected void testIndex( String fieldName, TestConfiguration config, BiFunction, Field, MmapLongIdIndex> createIndex) throws Exception { - testLateOpenIndex(fieldName, config); + testLateOpenIndex(fieldName, config, createIndex); File dumpFile = new File(_tmpdir, DUMP_FILENAME); @@ -368,7 +370,15 @@ private void fillDump( Dump dump ) throws IOException { System.out.println("Written " + _dumpSize + " instances to dump. Needed " + (System.currentTimeMillis() - t) / (float)_dumpSize + " ms/instance."); } - private void testLateOpenIndex( String fieldName, TestConfiguration config ) throws Exception { + private long maxId() { + return _dumpSize; + } + + private long minId() { + return _negativeOffset; + } + + private void testLateOpenIndex( String fieldName, TestConfiguration config, BiFunction, Field, MmapLongIdIndex> createIndex ) throws Exception { File dumpFile = new File(_tmpdir, DUMP_FILENAME); /* create dump and index */ @@ -377,7 +387,7 @@ private void testLateOpenIndex( String fieldName, TestConfiguration config ) thr Field field = Reflection.getField(Bean.class, fieldName); fillDump(dump); - UniqueConstraint index = MmapLongIdIndex.forOpenRange(dump, new FieldFieldAccessor(field), _negativeOffset); + UniqueConstraint index = createIndex.apply(dump, field); testLookup(config, field, index); } @@ -431,6 +441,47 @@ private void testLookupAfterUpdates( TestConfiguration config, Field field, Uniq System.out.println("Read " + READ_NUMBER + " instances from dump. Needed " + (System.currentTimeMillis() - t) / (float)READ_NUMBER + " ms/instance."); } + private void testRecreateIndex( BiFunction, Field, MmapLongIdIndex> createIndex ) throws NoSuchFieldException, IOException { + File dumpFile = new File(_tmpdir, DUMP_FILENAME); + + Dump dump = new Dump<>(Bean.class, dumpFile); + try { + Field field = Reflection.getField(Bean.class, "_idInt"); + UniqueConstraint index = createIndex.apply(dump, field); + + validateNumKeys(dump, index); + + fillDump(dump); + + validateNumKeys(dump, index); + + dump.close(); + + System.out.println("Closing and re-opening dump, deleting index"); + Assert.assertTrue("Failed to delete index", + new File(_tmpdir, DUMP_FILENAME + "._idInt.mmap.lookup").delete() && !new File(_tmpdir, DUMP_FILENAME + "._idInt.mmap.lookup").exists()); + + dump = new Dump<>(Bean.class, dumpFile); + index = MmapLongIdIndex.forOpenRange(dump, new FieldFieldAccessor(field), _negativeOffset); + + long t = System.currentTimeMillis(); + for ( int j = 0; j < READ_NUMBER; j++ ) { + int i = _random.nextInt(_dumpSize); + Bean bean = index.lookup(i); + Assert.assertNotNull("no Bean for index " + i, bean); + Assert.assertEquals(i, bean._idInt); + Assert.assertTrue(bean._data.startsWith(i + "-")); + } + System.out.println("Read " + READ_NUMBER + " instances from dump. Needed " + (System.currentTimeMillis() - t) / (float)READ_NUMBER + " ms/instance."); + + Bean nonExistingBean = index.lookup(_dumpSize + 1); + Assert.assertNull(nonExistingBean); + } + finally { + dump.close(); + } + } + private void validateNumKeys( Dump dump, UniqueConstraint index ) { // count keys TIntSet keys = new TIntHashSet();