From 8b10a34c3bee72abb89054d22a4d8d4bf1ecb388 Mon Sep 17 00:00:00 2001 From: Christian Trefzer Date: Mon, 7 Apr 2025 14:08:28 +0200 Subject: [PATCH 1/4] ERR-11776: DumpIndex: pull implementations of #getNumKeys() and #getAllPositions() under dump lock --- dump/src/util/dump/GroupIndex.java | 44 +-- dump/src/util/dump/InfiniteGroupIndex.java | 349 +++++++++++---------- dump/src/util/dump/UniqueIndex.java | 36 ++- 3 files changed, 225 insertions(+), 204 deletions(-) diff --git a/dump/src/util/dump/GroupIndex.java b/dump/src/util/dump/GroupIndex.java index 00b54fd..8cb1922 100644 --- a/dump/src/util/dump/GroupIndex.java +++ b/dump/src/util/dump/GroupIndex.java @@ -174,31 +174,39 @@ public boolean contains( Object key ) { @Override public TLongList getAllPositions() { - TLongList pos = new TLongArrayList(100000); - Collection c = _fieldIsInt ? _lookupInt.valueCollection() : (_fieldIsLong ? _lookupLong.valueCollection() : _lookupObject.values()); - for ( Positions p : c ) { - ensureSorting(p); - for ( int i = 0, length = p.size(); i < length; i++ ) { - long pp = p.get(i); - if ( !_dump._deletedPositions.contains(pp) ) { - pos.add(pp); + synchronized ( _dump ) { + Collection c = _fieldIsInt ? _lookupInt.valueCollection() : (_fieldIsLong ? _lookupLong.valueCollection() : _lookupObject.values()); + int numPos = 0; + for ( Positions positions : c ) { + numPos += positions.size(); + } + TLongList pos = new TLongArrayList(numPos); + for ( Positions p : c ) { + ensureSorting(p); + for ( int i = 0, length = p.size(); i < length; i++ ) { + long pp = p.get(i); + if ( !_dump._deletedPositions.contains(pp) ) { + pos.add(pp); + } } } + pos.sort(); + return pos; } - pos.sort(); - return pos; } @Override public int getNumKeys() { - if ( _lookupObject != null ) { - return _lookupObject.size(); - } - if ( _lookupLong != null ) { - return _lookupLong.size(); - } - if ( _lookupInt != null ) { - return _lookupInt.size(); + synchronized ( _dump ) { + if ( _lookupObject != null ) { + return _lookupObject.size(); + } + if ( _lookupLong != null ) { + return _lookupLong.size(); + } + if ( _lookupInt != null ) { + return _lookupInt.size(); + } } throw new IllegalStateException("weird, all lookup maps are null"); } diff --git a/dump/src/util/dump/InfiniteGroupIndex.java b/dump/src/util/dump/InfiniteGroupIndex.java index b27412e..7081808 100644 --- a/dump/src/util/dump/InfiniteGroupIndex.java +++ b/dump/src/util/dump/InfiniteGroupIndex.java @@ -176,132 +176,135 @@ public boolean contains( Object key ) { @Override public TLongList getAllPositions() { - TLongList pos = new TLongArrayList((int)(_lookupFileLength / (8 + (_fieldIsLong ? 8 : 4)))); - - if ( _fieldIsInt || _fieldIsLong ) { - DataInputStream in = null; - try { - in = new DataInputStream(new BufferedInputStream(new FileInputStream(getLookupFile()))); - while ( true ) { - if ( _fieldIsLong ) { - in.readLong(); - } else { - in.readInt(); - } - long p = in.readLong(); - if ( !_dump._deletedPositions.contains(p) ) { - pos.add(p); + synchronized ( _dump ) { + TLongList pos = new TLongArrayList((int)(_lookupFileLength / (8 + (_fieldIsLong ? 8 : 4)))); + + if ( _fieldIsInt || _fieldIsLong ) { + DataInputStream in = null; + try { + in = new DataInputStream(new BufferedInputStream(new FileInputStream(getLookupFile()))); + while ( true ) { + if ( _fieldIsLong ) { + in.readLong(); + } else { + in.readInt(); + } + long p = in.readLong(); + if ( !_dump._deletedPositions.contains(p) ) { + pos.add(p); + } } } - } - catch ( EOFException e ) { - // ignore, since all is good - } - catch ( Exception e ) { - throw new RuntimeException("Failed to read dump positions", e); - } - finally { - if ( in != null ) { - try { - in.close(); - } - catch ( IOException e ) { - LOG.error("Failed to close inputstream."); + catch ( EOFException e ) { + // ignore, since all is good + } + catch ( Exception e ) { + throw new RuntimeException("Failed to read dump positions", e); + } + finally { + if ( in != null ) { + try { + in.close(); + } + catch ( IOException e ) { + LOG.error("Failed to close inputstream."); + } } } - } - } else if ( _fieldIsExternalizable ) { - for ( ExternalizableKeyPosition kp : _externalizableKeyDump ) { - if ( !_dump._deletedPositions.contains(kp._pos) ) { - pos.add(kp._pos); + } else if ( _fieldIsExternalizable ) { + for ( ExternalizableKeyPosition kp : _externalizableKeyDump ) { + if ( !_dump._deletedPositions.contains(kp._pos) ) { + pos.add(kp._pos); + } } - } - } else if ( _fieldIsString ) { - for ( StringKeyPosition kp : _stringKeyDump ) { - if ( !_dump._deletedPositions.contains(kp._pos) ) { - pos.add(kp._pos); + } else if ( _fieldIsString ) { + for ( StringKeyPosition kp : _stringKeyDump ) { + if ( !_dump._deletedPositions.contains(kp._pos) ) { + pos.add(kp._pos); + } } } + pos.addAll(_overflowIndex.getAllPositions()); + // TODO sort since we added the positions from groupIndex? + return pos; } - pos.addAll(_overflowIndex.getAllPositions()); - // TODO sort since we added the positions from groupIndex? - return pos; } @Override public int getNumKeys() { + synchronized ( _dump ) { + boolean first = true; + int numKeys = 0; + if ( _fieldIsInt ) { + int before = 0; + for ( IntKeyPosition keyPos : _intKeyDump ) { + if ( _dump._deletedPositions.contains(keyPos._pos) ) { + continue; + } - boolean first = true; - int numKeys = 0; - if ( _fieldIsInt ) { - int before = 0; - for ( IntKeyPosition keyPos : _intKeyDump ) { - if ( _dump._deletedPositions.contains(keyPos._pos) ) { - continue; + int key = keyPos._key; + if ( first || key != before ) { + first = false; + if ( !_overflowIndex.contains(key) ) { + numKeys++; + } + } + before = key; } + } else if ( _fieldIsLong ) { + long before = 0; + for ( LongKeyPosition keyPos : _longKeyDump ) { + if ( _dump._deletedPositions.contains(keyPos._pos) ) { + continue; + } - int key = keyPos._key; - if ( first || key != before ) { - first = false; - if ( !_overflowIndex.contains(key) ) { - numKeys++; + long key = keyPos._key; + if ( first || key != before ) { + first = false; + if ( !_overflowIndex.contains(key) ) { + numKeys++; + } } + before = key; } - before = key; - } - } else if ( _fieldIsLong ) { - long before = 0; - for ( LongKeyPosition keyPos : _longKeyDump ) { - if ( _dump._deletedPositions.contains(keyPos._pos) ) { - continue; + } else { + if ( !_fieldIsExternalizable && !_fieldIsString ) { + throw new IllegalStateException("must not happen"); } - long key = keyPos._key; - if ( first || key != before ) { - first = false; - if ( !_overflowIndex.contains(key) ) { - numKeys++; - } - } - before = key; - } - } else { - if ( !_fieldIsExternalizable && !_fieldIsString ) { - throw new IllegalStateException("must not happen"); - } + int before = 0; + Set set = new HashSet<>(); + for ( IntKeyPosition keyPos : _intKeyDump ) { - int before = 0; - Set set = new HashSet<>(); - for ( IntKeyPosition keyPos : _intKeyDump ) { + int hashCode = keyPos._key; + if ( first || hashCode != before ) { + numKeys += countDistinctObjects(set); + first = false; + } - int hashCode = keyPos._key; - if ( first || hashCode != before ) { - numKeys += countDistinctObjects(set); - first = false; + Object objectKey = null; + long objectPos = -1; + if ( _fieldIsString ) { + StringKeyPosition object = _stringKeyDump.get(keyPos._pos); + objectPos = object._pos; + objectKey = object._key; + } else { + ExternalizableKeyPosition object = _externalizableKeyDump.get(keyPos._pos); + objectPos = object._pos; + objectKey = _externalizableKeyDump.get(keyPos._pos)._key; + } + if ( _dump._deletedPositions.contains(objectPos) ) { + continue; + } + set.add(objectKey); + before = hashCode; } - Object objectKey = null; - long objectPos = -1; - if ( _fieldIsString ) { - StringKeyPosition object = _stringKeyDump.get(keyPos._pos); - objectPos = object._pos; - objectKey = object._key; - } else { - ExternalizableKeyPosition object = _externalizableKeyDump.get(keyPos._pos); - objectPos = object._pos; - objectKey = _externalizableKeyDump.get(keyPos._pos)._key; - } - if ( _dump._deletedPositions.contains(objectPos) ) { - continue; - } - set.add(objectKey); - before = hashCode; + numKeys += countDistinctObjects(set); } - numKeys += countDistinctObjects(set); + return numKeys + _overflowIndex.getNumKeys(); } - - return numKeys + _overflowIndex.getNumKeys(); } @Override @@ -381,69 +384,73 @@ protected String getIndexType() { } protected long[] getPositions( int key ) { - if ( !(_fieldIsInt || _fieldIsExternalizable || _fieldIsString) ) { - throw new IllegalArgumentException( - "The type of the used key class of this index is " + _fieldAccessor.getType() + ". Please use the appropriate lookup(.) method."); - } + synchronized ( _dump ) { + if ( !(_fieldIsInt || _fieldIsExternalizable || _fieldIsString) ) { + throw new IllegalArgumentException( + "The type of the used key class of this index is " + _fieldAccessor.getType() + ". Please use the appropriate lookup(.) method."); + } - long[] cachedPositions = getPositionsFromCache(key); - if ( cachedPositions != null ) { - return cachedPositions; - } + long[] cachedPositions = getPositionsFromCache(key); + if ( cachedPositions != null ) { + return cachedPositions; + } - TLongList pos = new TLongArrayList(_overflowIndex.getPositions(key)); + TLongList pos = new TLongArrayList(_overflowIndex.getPositions(key)); - int keyLength = 4 + 8; // in bytes + int keyLength = 4 + 8; // in bytes - long firstIndex = findIntKey(key, keyLength); - if ( firstIndex >= 0 ) { - for ( long p = firstIndex * keyLength; p < _lookupFileLength; p += keyLength ) { - IntKeyPosition ip = _intKeyDump.get(p); - if ( ip._key != key ) { - break; - } - if ( ip._pos >= 0 && !_dump._deletedPositions.contains(ip._pos) ) { - pos.add(ip._pos); + long firstIndex = findIntKey(key, keyLength); + if ( firstIndex >= 0 ) { + for ( long p = firstIndex * keyLength; p < _lookupFileLength; p += keyLength ) { + IntKeyPosition ip = _intKeyDump.get(p); + if ( ip._key != key ) { + break; + } + if ( ip._pos >= 0 && !_dump._deletedPositions.contains(ip._pos) ) { + pos.add(ip._pos); + } } } - } - long[] positions = pos.toArray(); - putPositionsIntoCache(key, positions); - return positions; + long[] positions = pos.toArray(); + putPositionsIntoCache(key, positions); + return positions; + } } protected long[] getPositions( long key ) { - if ( !_fieldIsLong ) { - throw new IllegalArgumentException( - "The type of the used key class of this index is " + _fieldAccessor.getType() + ". Please use the appropriate lookup(.) method."); - } + synchronized ( _dump ) { + if ( !_fieldIsLong ) { + throw new IllegalArgumentException( + "The type of the used key class of this index is " + _fieldAccessor.getType() + ". Please use the appropriate lookup(.) method."); + } - long[] cachedPositions = getPositionsFromCache(key); - if ( cachedPositions != null ) { - return cachedPositions; - } + long[] cachedPositions = getPositionsFromCache(key); + if ( cachedPositions != null ) { + return cachedPositions; + } - TLongList pos = new TLongArrayList(_overflowIndex.getPositions(key)); + TLongList pos = new TLongArrayList(_overflowIndex.getPositions(key)); - int keyLength = 8 + 8; // in bytes + int keyLength = 8 + 8; // in bytes - long firstIndex = findLongKey(key, keyLength); - if ( firstIndex >= 0 ) { - for ( long p = firstIndex * keyLength; p < _lookupFileLength; p += keyLength ) { - LongKeyPosition ip = _longKeyDump.get(p); - if ( ip._key != key ) { - break; - } - if ( ip._pos >= 0 && !_dump._deletedPositions.contains(ip._pos) ) { - pos.add(ip._pos); + long firstIndex = findLongKey(key, keyLength); + if ( firstIndex >= 0 ) { + for ( long p = firstIndex * keyLength; p < _lookupFileLength; p += keyLength ) { + LongKeyPosition ip = _longKeyDump.get(p); + if ( ip._key != key ) { + break; + } + if ( ip._pos >= 0 && !_dump._deletedPositions.contains(ip._pos) ) { + pos.add(ip._pos); + } } } - } - long[] positions = pos.toArray(); - putPositionsIntoCache(key, positions); - return positions; + long[] positions = pos.toArray(); + putPositionsIntoCache(key, positions); + return positions; + } } protected long[] getPositions( Object key ) { @@ -462,35 +469,37 @@ protected long[] getPositions( Object key ) { + ". You tried to using the index with a key of type " + key.getClass() + "."); } - long[] cachedPositions = getPositionsFromCache(key); - if ( cachedPositions != null ) { - return cachedPositions; - } + synchronized ( _dump ) { + long[] cachedPositions = getPositionsFromCache(key); + if ( cachedPositions != null ) { + return cachedPositions; + } - TLongList keyPositions = getObjectKeyPositions(key); + TLongList keyPositions = getObjectKeyPositions(key); - TLongList positions = new TLongArrayList(_overflowIndex.getPositions(key)); - for ( TLongIterator iterator = keyPositions.iterator(); iterator.hasNext(); ) { - long pos = iterator.next(); - if ( _fieldIsExternalizable ) { - ExternalizableKeyPosition keyPosition = _externalizableKeyDump.get(pos); - long kp = keyPosition._pos; - if ( kp >= 0 && keyPosition._key.equals(key) && !_dump._deletedPositions.contains(kp) ) { - positions.add(kp); - } - } else if ( _fieldIsString ) { - StringKeyPosition keyPosition = _stringKeyDump.get(pos); - long kp = keyPosition._pos; - if ( kp >= 0 && keyPosition._key.equals(key) && !_dump._deletedPositions.contains(kp) ) { - positions.add(kp); + TLongList positions = new TLongArrayList(_overflowIndex.getPositions(key)); + for ( TLongIterator iterator = keyPositions.iterator(); iterator.hasNext(); ) { + long pos = iterator.next(); + if ( _fieldIsExternalizable ) { + ExternalizableKeyPosition keyPosition = _externalizableKeyDump.get(pos); + long kp = keyPosition._pos; + if ( kp >= 0 && keyPosition._key.equals(key) && !_dump._deletedPositions.contains(kp) ) { + positions.add(kp); + } + } else if ( _fieldIsString ) { + StringKeyPosition keyPosition = _stringKeyDump.get(pos); + long kp = keyPosition._pos; + if ( kp >= 0 && keyPosition._key.equals(key) && !_dump._deletedPositions.contains(kp) ) { + positions.add(kp); + } } } - } - positions.sort(); - long[] posArray = positions.toArray(); - putPositionsIntoCache(key, posArray); - return posArray; + positions.sort(); + long[] posArray = positions.toArray(); + putPositionsIntoCache(key, posArray); + return posArray; + } } protected long[] getPositionsFromCache( Object key ) { diff --git a/dump/src/util/dump/UniqueIndex.java b/dump/src/util/dump/UniqueIndex.java index e706f03..e220392 100644 --- a/dump/src/util/dump/UniqueIndex.java +++ b/dump/src/util/dump/UniqueIndex.java @@ -166,16 +166,18 @@ public Object[] getAllObjectKeys() { @Override public TLongList getAllPositions() { - TLongList pos = new TLongArrayList(100000, 10000); - TLongCollection c = _fieldIsInt ? _lookupInt.valueCollection() : (_fieldIsLong ? _lookupLong.valueCollection() : _lookupObject.valueCollection()); - for ( TLongIterator iterator = c.iterator(); iterator.hasNext(); ) { - long p = iterator.next(); - if ( !_dump._deletedPositions.contains(p) ) { - pos.add(p); + synchronized ( _dump ) { + TLongCollection c = _fieldIsInt ? _lookupInt.valueCollection() : (_fieldIsLong ? _lookupLong.valueCollection() : _lookupObject.valueCollection()); + TLongList pos = new TLongArrayList(c.size(), 10000); + for ( TLongIterator iterator = c.iterator(); iterator.hasNext(); ) { + long p = iterator.next(); + if ( !_dump._deletedPositions.contains(p) ) { + pos.add(p); + } } + pos.sort(); + return pos; } - pos.sort(); - return pos; } public Object getKey( E o ) { @@ -190,14 +192,16 @@ public Object getKey( E o ) { @Override public int getNumKeys() { - if ( _lookupObject != null ) { - return _lookupObject.size(); - } - if ( _lookupLong != null ) { - return _lookupLong.size(); - } - if ( _lookupInt != null ) { - return _lookupInt.size(); + synchronized ( _dump ) { + if ( _lookupObject != null ) { + return _lookupObject.size(); + } + if ( _lookupLong != null ) { + return _lookupLong.size(); + } + if ( _lookupInt != null ) { + return _lookupInt.size(); + } } throw new IllegalStateException("weird, all lookup maps are null"); } From 471fd87c71eaa4b2ff0d52a04184d25153df614f Mon Sep 17 00:00:00 2001 From: Christian Trefzer Date: Mon, 7 Apr 2025 14:54:46 +0200 Subject: [PATCH 2/4] ERR-11776: UniqueIndex: pull #getAll[Type]Keys() under dump lock --- dump/src/util/dump/UniqueIndex.java | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/dump/src/util/dump/UniqueIndex.java b/dump/src/util/dump/UniqueIndex.java index e220392..c290ba8 100644 --- a/dump/src/util/dump/UniqueIndex.java +++ b/dump/src/util/dump/UniqueIndex.java @@ -153,15 +153,21 @@ public void deleteFromDump( Collection elements ) { } public int[] getAllIntKeys() { - return _lookupInt.keys(); + synchronized ( _dump ) { + return _lookupInt.keys(); + } } public long[] getAllLongKeys() { - return _lookupLong.keys(); + synchronized ( _dump ) { + return _lookupLong.keys(); + } } public Object[] getAllObjectKeys() { - return _lookupObject.keys(); + synchronized ( _dump ) { + return _lookupObject.keys(); + } } @Override From d69efd84cd88922f273d72076715884e697ca2dc Mon Sep 17 00:00:00 2001 From: Christian Trefzer Date: Mon, 7 Apr 2025 14:23:44 +0200 Subject: [PATCH 3/4] TECH: UniqueIndex: fix long-standing bug extracting positions assign proper no_entry_value instead of arbitrary 10000 --- dump/src/util/dump/UniqueIndex.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dump/src/util/dump/UniqueIndex.java b/dump/src/util/dump/UniqueIndex.java index c290ba8..79ee8b5 100644 --- a/dump/src/util/dump/UniqueIndex.java +++ b/dump/src/util/dump/UniqueIndex.java @@ -174,7 +174,7 @@ public Object[] getAllObjectKeys() { public TLongList getAllPositions() { synchronized ( _dump ) { TLongCollection c = _fieldIsInt ? _lookupInt.valueCollection() : (_fieldIsLong ? _lookupLong.valueCollection() : _lookupObject.valueCollection()); - TLongList pos = new TLongArrayList(c.size(), 10000); + TLongList pos = new TLongArrayList(c.size(), -1L); for ( TLongIterator iterator = c.iterator(); iterator.hasNext(); ) { long p = iterator.next(); if ( !_dump._deletedPositions.contains(p) ) { From a1b7cbb380aaaa32bddc248b804206890b74f674 Mon Sep 17 00:00:00 2001 From: Christian Trefzer Date: Mon, 7 Apr 2025 14:31:24 +0200 Subject: [PATCH 4/4] TECH: GroupIndex, InfiniteGroupIndex: fix long-standing bug extracting positions assign proper no_entry_value instead of default 0L --- dump/src/util/dump/GroupIndex.java | 2 +- dump/src/util/dump/InfiniteGroupIndex.java | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/dump/src/util/dump/GroupIndex.java b/dump/src/util/dump/GroupIndex.java index 8cb1922..1fd00c1 100644 --- a/dump/src/util/dump/GroupIndex.java +++ b/dump/src/util/dump/GroupIndex.java @@ -180,7 +180,7 @@ public TLongList getAllPositions() { for ( Positions positions : c ) { numPos += positions.size(); } - TLongList pos = new TLongArrayList(numPos); + TLongList pos = new TLongArrayList(numPos, -1L); for ( Positions p : c ) { ensureSorting(p); for ( int i = 0, length = p.size(); i < length; i++ ) { diff --git a/dump/src/util/dump/InfiniteGroupIndex.java b/dump/src/util/dump/InfiniteGroupIndex.java index 7081808..837bfce 100644 --- a/dump/src/util/dump/InfiniteGroupIndex.java +++ b/dump/src/util/dump/InfiniteGroupIndex.java @@ -21,6 +21,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static gnu.trove.impl.Constants.DEFAULT_CAPACITY; import gnu.trove.iterator.TLongIterator; import gnu.trove.list.TLongList; import gnu.trove.list.array.TLongArrayList; @@ -177,7 +178,7 @@ public boolean contains( Object key ) { @Override public TLongList getAllPositions() { synchronized ( _dump ) { - TLongList pos = new TLongArrayList((int)(_lookupFileLength / (8 + (_fieldIsLong ? 8 : 4)))); + TLongList pos = new TLongArrayList((int)(_lookupFileLength / (8 + (_fieldIsLong ? 8 : 4))), -1L); if ( _fieldIsInt || _fieldIsLong ) { DataInputStream in = null; @@ -337,7 +338,7 @@ public Iterable lookup( Object key ) { */ public Iterable rangeLookup( long lowerKey, long upperKey ) { synchronized ( _dump ) { - TLongList pos = new TLongArrayList(); + TLongList pos = new TLongArrayList(DEFAULT_CAPACITY, -1L); _overflowIndex._lookupLong.forEachEntry(( key, positions ) -> { if ( key >= lowerKey && key < upperKey ) pos.addAll(positions); @@ -395,7 +396,8 @@ protected long[] getPositions( int key ) { return cachedPositions; } - TLongList pos = new TLongArrayList(_overflowIndex.getPositions(key)); + TLongList pos = new TLongArrayList(DEFAULT_CAPACITY, -1L); + pos.add(_overflowIndex.getPositions(key)); int keyLength = 4 + 8; // in bytes @@ -430,7 +432,8 @@ protected long[] getPositions( long key ) { return cachedPositions; } - TLongList pos = new TLongArrayList(_overflowIndex.getPositions(key)); + TLongList pos = new TLongArrayList(DEFAULT_CAPACITY, -1L); + pos.add(_overflowIndex.getPositions(key)); int keyLength = 8 + 8; // in bytes @@ -477,7 +480,8 @@ protected long[] getPositions( Object key ) { TLongList keyPositions = getObjectKeyPositions(key); - TLongList positions = new TLongArrayList(_overflowIndex.getPositions(key)); + TLongList positions = new TLongArrayList(DEFAULT_CAPACITY, -1L); + positions.add(_overflowIndex.getPositions(key)); for ( TLongIterator iterator = keyPositions.iterator(); iterator.hasNext(); ) { long pos = iterator.next(); if ( _fieldIsExternalizable ) { @@ -916,7 +920,7 @@ private long findLongKey( long key, int keyLength ) { } private TLongList getObjectKeyPositions( Object key ) { - TLongList keyPositions = new TLongArrayList(); + TLongList keyPositions = new TLongArrayList(DEFAULT_CAPACITY, -1L); int keyLength = 4 + 8; // in bytes int keyHashCode = key.hashCode();