-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSegmentedStringPool.cs
More file actions
293 lines (261 loc) · 11.7 KB
/
SegmentedStringPool.cs
File metadata and controls
293 lines (261 loc) · 11.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
namespace LookBusy;
using System;
using System.Runtime.CompilerServices;
internal static class SegmentedConstants
{
public const uint HighBit = 0x80000000u; // bit 31 of Generation: set=freed, clear=live
public const uint GenerationMask = 0x7FFFFFFFu; // lower 31 bits: monotonically-increasing reuse counter
public const uint NoFreeSlot = 0xFFFFFFFFu; // sentinel stored in freeHead when the slot free chain is empty
public const int PtrAlignment = 8; // Marshal.AllocHGlobal guarantees ≥8-byte alignment, so low 3 bits of every pointer are always zero
public const int ArenaAllocationAlignment = PtrAlignment; // arena blocks are normalized to the unmanaged pointer alignment
public const long TierTagMask = 1L; // bit 0 of a tagged pointer: 0=slab tier, 1=arena tier
public const long PtrMask = ~7L; // clears the 3 tag bits from a tagged pointer to recover the raw address
public const int TierSlab = 0; // bit 0 value: allocation lives in a slab cell
public const int TierArena = 1; // bit 0 value: allocation lives in an arena block
public const int SlabSizeClassCount = 5; // five classes: 8/16/32/64/128 chars; doubling progression caps internal waste at <50% per live string
public const int MinArenaBlockBytes = 16; // a free block must hold a SegmentedFreeBlockHeader (16 bytes) in its own payload
public const int ArenaBinCount = 16; // bins 0..15 cover Log2(size)-4, so bin 0 = 16 B, bin 15 ≥ 256 KB
public const int DefaultSlotCapacity = 64;
public const int DefaultSlabCellsPerSlab = 256;
public const int DefaultArenaSegmentBytes = 1 << 20; // 1 MB: amortises Marshal.AllocHGlobal overhead over many large strings
public const int DefaultSmallStringThresholdChars = 128; // strings ≤ threshold go to slab tier; above go to arena tier
public const int MaxSlabSizeClassChars = 128; // largest slab size class; threshold above this has no valid class
public const int MaxArenaAllocationChars = int.MaxValue / sizeof(char); // largest char count whose UTF-16 byte size fits in a signed int
}
internal static class IntPtrExtensions
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool ContainsPointer(this IntPtr buffer, long capacity, IntPtr ptr)
{
var raw = ptr.ToInt64();
var start = buffer.ToInt64();
return raw >= start && raw < start + capacity;
}
}
public sealed record SegmentedStringPoolOptions(
int InitialSlotCapacity = SegmentedConstants.DefaultSlotCapacity,
int SlabCellsPerSlab = SegmentedConstants.DefaultSlabCellsPerSlab,
int ArenaSegmentBytes = SegmentedConstants.DefaultArenaSegmentBytes,
int SmallStringThresholdChars = SegmentedConstants.DefaultSmallStringThresholdChars
);
/// <summary>
/// A GC-pressure-free string pool backed by unmanaged memory. Strings shorter than
/// <see cref="SegmentedStringPoolOptions.SmallStringThresholdChars"/> are stored in the slab tier
/// (fixed-size cells, bitmap-tracked, O(1) alloc/free); longer strings go to the arena tier
/// (bump + segregated free lists). Callers hold <see cref="PooledStringRef"/> handles — 16-byte
/// value types that never pin or move any managed object.
/// </summary>
/// <remarks>
/// Not thread-safe. All callers sharing a pool instance must provide external synchronisation.
/// </remarks>
public sealed class SegmentedStringPool : IDisposable
{
private readonly SegmentedSlotTable slots;
private readonly SegmentedSlabTier slabTier;
private readonly SegmentedArenaTier arenaTier;
private readonly int smallThreshold;
// volatile: FreeSlot reads this on the user thread while the GC finalizer thread may call ~SegmentedStringPool.
private volatile bool disposed;
public SegmentedStringPool() : this(new()) { }
public SegmentedStringPool(SegmentedStringPoolOptions options)
{
ArgumentNullException.ThrowIfNull(options);
ArgumentOutOfRangeException.ThrowIfGreaterThan(
options.SmallStringThresholdChars,
SegmentedConstants.MaxSlabSizeClassChars,
nameof(options));
slots = new(options.InitialSlotCapacity);
slabTier = new(options.SlabCellsPerSlab);
arenaTier = new(options.ArenaSegmentBytes);
smallThreshold = options.SmallStringThresholdChars;
}
public int ActiveAllocations => slots.ActiveCount;
public long GetTotalBytesUnmanaged() => slabTier.GetUnmanagedBytes() + arenaTier.GetUnmanagedBytes();
/// <summary>
/// Lower-bound estimate of managed heap bytes owned by this pool. Includes the slot-entry array
/// and slab bitmap arrays; excludes GC object headers and framework list overhead.
/// </summary>
public long GetTotalBytesManaged() =>
slots.Capacity * (long)Unsafe.SizeOf<SegmentedSlotEntry>() + slabTier.GetManagedBitmapBytes();
public int SlabCount => slabTier.SlabCount;
public int SegmentCount => arenaTier.SegmentCount;
internal bool IsDisposed => disposed;
/// <summary>
/// Copies <paramref name="value"/> into unmanaged memory and returns a <see cref="PooledStringRef"/> handle.
/// Empty spans return <see cref="PooledStringRef.Empty"/> without touching either tier.
/// </summary>
public PooledStringRef Allocate(ReadOnlySpan<char> value)
{
ObjectDisposedException.ThrowIf(disposed, typeof(SegmentedStringPool));
if (value.IsEmpty) {
return PooledStringRef.Empty;
}
var length = value.Length;
var ptr = AllocateUnmanaged(length, out var tier, out var owner, out var allocatedBytes);
var byteCount = GetArenaByteCount(length);
unsafe {
fixed (char* src = value) {
Buffer.MemoryCopy(src, (void*)ptr, byteCount, byteCount);
}
}
// bit 0 of the raw pointer is guaranteed zero by 8-byte alignment, so OR-ing the tier tag is safe
var taggedPtr = new IntPtr((ptr.ToInt64() & SegmentedConstants.PtrMask) | (uint)tier);
var (slotIndex, gen) = slots.Allocate(taggedPtr, length, owner, allocatedBytes);
return new(this, slotIndex, gen);
}
/// <summary>
/// Resolves a handle to the raw character span in unmanaged memory. The tier tag stored in bit 0 of the
/// slot's pointer is masked off before building the span — reading never needs to know which tier owns the data.
/// </summary>
internal ReadOnlySpan<char> ReadSlot(uint slotIndex, uint generation)
{
ObjectDisposedException.ThrowIf(disposed, typeof(SegmentedStringPool));
if (!slots.TryReadSlot(slotIndex, generation, out var entry)) {
throw new InvalidOperationException("PooledStringRef is stale or freed");
}
var raw = new IntPtr(entry.Ptr.ToInt64() & SegmentedConstants.PtrMask);
unsafe {
return new((void*)raw, entry.LengthChars);
}
}
/// <summary>Returns the character count for a live handle without constructing a span.</summary>
internal int GetLength(uint slotIndex, uint generation)
{
ObjectDisposedException.ThrowIf(disposed, typeof(SegmentedStringPool));
return !slots.TryReadSlot(slotIndex, generation, out var entry)
? throw new InvalidOperationException("PooledStringRef is stale or freed")
: entry.LengthChars;
}
/// <summary>
/// Decodes the tier tag from the slot's pointer and routes the deallocation to the correct tier,
/// then frees the slot entry. Silently no-ops if the pool is already disposed or the handle is stale.
/// </summary>
internal void FreeSlot(uint slotIndex, uint generation)
{
if (disposed) { return; } // silent: guards against PooledStringRef.Dispose() racing pool.Dispose()
if (!slots.TryReadSlot(slotIndex, generation, out var entry)) {
return;
}
var raw = new IntPtr(entry.Ptr.ToInt64() & SegmentedConstants.PtrMask);
var tier = (int)(entry.Ptr.ToInt64() & SegmentedConstants.TierTagMask);
// Free the slot in a finally block so its index is never leaked even if the tier
// throws (which indicates internal corruption — the pool is unusable anyway, but
// a leaked slot index would compound the damage by preventing GC of the owner ref).
try {
if (tier == SegmentedConstants.TierSlab) {
slabTier.Free(raw, (SegmentedSlab)entry.Owner!);
} else {
SegmentedArenaTier.Free(raw, entry.AllocatedBytes, (SegmentedArenaSegment)entry.Owner!);
}
}
finally {
_ = slots.Free(slotIndex, generation);
}
}
/// <summary>
/// Invalidates all live handles and reclaims all string storage without freeing any unmanaged memory.
/// Slabs and segments are reset and reused for subsequent allocations — use this to efficiently
/// process a new batch of strings without the cost of re-allocating buffers from the OS.
/// </summary>
public void Clear()
{
ObjectDisposedException.ThrowIf(disposed, typeof(SegmentedStringPool));
slots.ClearAllSlots();
slabTier.ResetAll();
arenaTier.ResetAll();
}
/// <summary>
/// Pre-allocates unmanaged slab capacity for at least <paramref name="chars"/> small-string characters,
/// distributing slabs proportionally across all size classes.
/// </summary>
public void ReserveSmall(int chars)
{
ObjectDisposedException.ThrowIf(disposed, typeof(SegmentedStringPool));
ArgumentOutOfRangeException.ThrowIfNegative(chars);
if (chars == 0) {
return;
}
slabTier.Reserve(chars);
}
/// <summary>
/// Pre-allocates unmanaged arena capacity for at least <paramref name="chars"/> large-string characters.
/// </summary>
public void ReserveLarge(int chars)
{
ObjectDisposedException.ThrowIf(disposed, typeof(SegmentedStringPool));
ArgumentOutOfRangeException.ThrowIfNegative(chars);
if (chars == 0) {
return;
}
arenaTier.Reserve(GetArenaByteCount(chars));
}
/// <summary>
/// Pre-allocates unmanaged capacity for at least <paramref name="chars"/> characters, split evenly between tiers.
/// For workload-aware pre-warming prefer <see cref="ReserveSmall"/> and <see cref="ReserveLarge"/> directly.
/// </summary>
public void Reserve(int chars)
{
ObjectDisposedException.ThrowIf(disposed, typeof(SegmentedStringPool));
ArgumentOutOfRangeException.ThrowIfNegative(chars);
if (chars == 0) {
return;
}
ReserveSmall(chars / 2);
ReserveLarge(chars - chars / 2);
}
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
private void Dispose(bool disposing)
{
if (disposed) {
return;
}
disposed = true;
if (disposing) {
slots.ClearAllSlots();
// Tier classes must not own unmanaged memory directly — each SegmentedSlab /
// SegmentedArenaSegment is independently finalizable. If that invariant ever changes,
// the Dispose(false) path below must call tier Dispose() as well. (D-3)
slabTier.Dispose();
arenaTier.Dispose();
}
}
/// <summary>
/// Routes an allocation to the slab tier (≤ threshold) or arena tier (> threshold)
/// and returns the raw unmanaged pointer together with the tier tag to be embedded in the slot.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private IntPtr AllocateUnmanaged(int charCount, out int tier, out object owner, out int allocatedBytes)
{
if (charCount <= smallThreshold) {
tier = SegmentedConstants.TierSlab;
var ptr = slabTier.Allocate(charCount, out var slab);
owner = slab;
allocatedBytes = 0; // slab free uses cell-index arithmetic; byte count is not needed
return ptr;
}
tier = SegmentedConstants.TierArena;
var byteCount = GetArenaByteCount(charCount);
var arenaPtr = arenaTier.Allocate(byteCount, out var segment, out allocatedBytes);
owner = segment;
return arenaPtr;
}
internal static int GetArenaByteCount(int charCount)
{
ArgumentOutOfRangeException.ThrowIfNegative(charCount);
ArgumentOutOfRangeException.ThrowIfGreaterThan(charCount, SegmentedConstants.MaxArenaAllocationChars);
return checked(charCount * sizeof(char));
}
~SegmentedStringPool()
{
// D-4: surface disposal leaks in debug builds. Debug.WriteLine is safe from a finalizer;
// Debug.Assert/Fail would abort the process in non-interactive environments.
if (!disposed) {
System.Diagnostics.Debug.WriteLine("SegmentedStringPool was not disposed before finalization — unmanaged memory will be reclaimed by GC but later than necessary.");
}
Dispose(false);
}
}