-
Notifications
You must be signed in to change notification settings - Fork 33
Allow optionally passed string allocator and expand benchmarks #264
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
6eb1ba3
912eab9
521c907
f91c397
92adc2c
7109e4b
eaf4ecd
decb60c
25ed1f4
fb6835f
ad33f8c
1046f57
2afe0f8
960a04e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| using System; | ||
|
|
||
| namespace MaxMind.Db; | ||
|
|
||
| /// <summary> | ||
| /// Delegate methods for allocations | ||
| /// </summary> | ||
| public static class AllocatorDelegates | ||
|
jashook marked this conversation as resolved.
|
||
| { | ||
| /// <summary> | ||
| /// Allocate a string based on a sequence of bytes | ||
| /// </summary> | ||
| /// <param name="bytes"></param> | ||
| /// <returns></returns> | ||
| public delegate string GetString(ReadOnlyMemory<byte> bytes); | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -6,6 +6,7 @@ | |||
| #endif | ||||
| using System.Collections; | ||||
| using System.Collections.Generic; | ||||
| using System.Diagnostics; | ||||
| using System.Numerics; | ||||
|
|
||||
| #endregion | ||||
|
|
@@ -47,21 +48,24 @@ | |||
|
|
||||
| private readonly DictionaryActivatorCreator _dictionaryActivatorCreator; | ||||
| private readonly ListActivatorCreator _listActivatorCreator; | ||||
| private readonly SNECache? _cache; | ||||
|
|
||||
| /// <summary> | ||||
| /// Initializes a new instance of the <see cref="Decoder" /> class. | ||||
| /// </summary> | ||||
| /// <param name="database">The database.</param> | ||||
| /// <param name="pointerBase">The base address in the stream.</param> | ||||
| /// <param name="followPointers">Whether to follow pointers. For testing.</param> | ||||
| internal Decoder(MemoryMapBuffer database, long pointerBase, bool followPointers = true) | ||||
| /// <param name="defaultCacheSize">Whether to use a cache or not.</param> | ||||
| internal Decoder(Buffer database, long pointerBase, bool followPointers = true, int? defaultCacheSize = null) | ||||
|
Check failure on line 60 in MaxMind.Db/Decoder.cs
|
||||
| { | ||||
| _pointerBase = pointerBase; | ||||
| _database = database; | ||||
| _followPointers = followPointers; | ||||
| _listActivatorCreator = new ListActivatorCreator(); | ||||
| _dictionaryActivatorCreator = new DictionaryActivatorCreator(); | ||||
| _typeActivatorCreator = new TypeActivatorCreator(); | ||||
| _cache = defaultCacheSize is null ? null : new (defaultCacheSize.Value); | ||||
| } | ||||
|
|
||||
| /// <summary> | ||||
|
|
@@ -81,10 +85,19 @@ | |||
| return decoded; | ||||
| } | ||||
|
|
||||
| /// <summary> | ||||
| /// Get cache size | ||||
| /// </summary> | ||||
| /// <returns></returns> | ||||
| internal int CacheSize() | ||||
| { | ||||
| return _cache is null ? 0 : _cache.Size(); | ||||
| } | ||||
|
|
||||
| private object Decode(Type expectedType, long offset, out long outOffset, InjectableValues? injectables = null, Network? network = null) | ||||
| { | ||||
| var type = CtrlData(offset, out var size, out offset); | ||||
| return DecodeByType(expectedType, type, offset, size, out outOffset, injectables, network); | ||||
| return DecodeByTypeFromCacheOrCreate(expectedType, type, offset, size, out outOffset, injectables, network); | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should also have a test looking up different IPs that resolve to the same offset where we use |
||||
| } | ||||
|
|
||||
| private ObjectType CtrlData(long offset, out int size, out long outOffset) | ||||
|
|
@@ -145,6 +158,38 @@ | |||
| /// <param name="network"></param> | ||||
| /// <returns></returns> | ||||
| /// <exception cref="Exception">Unable to handle type!</exception> | ||||
| private object DecodeByTypeFromCacheOrCreate( | ||||
| Type expectedType, | ||||
| ObjectType type, | ||||
| long offset, | ||||
| int size, | ||||
| out long outOffset, | ||||
| InjectableValues? injectables, | ||||
| Network? network | ||||
| ) | ||||
| { | ||||
| ValueTuple<object, long> returnValue = DecodeFromCacheOrCreate(offset, size, expectedType, type, injectables, network, static (Buffer database, long offset, int size, Type type, ObjectType objectType, Decoder decoder, InjectableValues? injectableValues, Network? network) => | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if it would be simpler if we only cached by data section offset / pointer. You can see we actually have caching by that already for when iterating the tree in MaxMind-DB-Reader-dotnet/MaxMind.Db/Reader.cs Line 297 in 31eb3d2
We cache by that in It may make sense to extend that caching too so we don't have two kinds of caches. I didn't realize we had that cache when you first made your PR. |
||||
| { | ||||
| long returnOffset = 0; | ||||
| return (decoder.DecodeByType(type, objectType, offset, size, out returnOffset, injectableValues, network), returnOffset); | ||||
| }); | ||||
|
|
||||
| outOffset = returnValue.Item2; | ||||
| return returnValue.Item1; | ||||
| } | ||||
|
|
||||
| /// <summary> | ||||
| /// Decodes the type of the by. | ||||
| /// </summary> | ||||
| /// <param name="expectedType"></param> | ||||
| /// <param name="type">The type.</param> | ||||
| /// <param name="offset">The offset.</param> | ||||
| /// <param name="size">The size.</param> | ||||
| /// <param name="outOffset">The out offset</param> | ||||
| /// <param name="injectables"></param> | ||||
| /// <param name="network"></param> | ||||
| /// <returns></returns> | ||||
| /// <exception cref="Exception">Unable to handle type!</exception> | ||||
| private object DecodeByType( | ||||
| Type expectedType, | ||||
| ObjectType type, | ||||
|
|
@@ -282,14 +327,20 @@ | |||
| { | ||||
| ReflectionUtil.CheckType(expectedType, typeof(string)); | ||||
|
|
||||
| return _database.ReadString(offset, size); | ||||
| return (string)DecodeFromCacheOrCreate(offset, size, expectedType, static (Buffer database, long offset, int size) => | ||||
| { | ||||
| return (database.ReadString(offset, size), offset + size); | ||||
| }).Item1; | ||||
| } | ||||
|
|
||||
| private byte[] DecodeBytes(Type expectedType, long offset, int size) | ||||
| { | ||||
| ReflectionUtil.CheckType(expectedType, typeof(byte[])); | ||||
|
|
||||
| return _database.Read(offset, size); | ||||
| return (byte[])DecodeFromCacheOrCreate(offset, size, expectedType, static (Buffer database, long offset, int size) => | ||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These lower level DecodeFromCacheOrCreate calls seem to cause us to bump size a second time. Once here, and once in the call to DecodeByType. |
||||
| { | ||||
| return (database.Read(offset, size), offset + size); | ||||
| }).Item1; | ||||
| } | ||||
|
|
||||
| /// <summary> | ||||
|
|
@@ -662,5 +713,60 @@ | |||
|
|
||||
| return _database.ReadVarInt(offset, size); | ||||
| } | ||||
|
|
||||
| private (object, long) DecodeFromCacheOrCreate(long offset, int size, Type type, Func<Buffer, long, int, (object, long)> factory) | ||||
|
Check failure on line 717 in MaxMind.Db/Decoder.cs
|
||||
| { | ||||
| if (_cache is not null) | ||||
| { | ||||
| ValueTuple<object, long> item; | ||||
| bool found = _cache.TryGet(offset, size, type, out item); | ||||
| if (found) | ||||
| { | ||||
| // Not null if found. | ||||
| Debug.Assert(item.Item1 is not null); | ||||
| return item; | ||||
| } | ||||
| else | ||||
| { | ||||
| item = factory(_database, offset, size); | ||||
| bool added = _cache.TryAdd(offset, size, type, item); | ||||
| } | ||||
|
|
||||
| return item; | ||||
| } | ||||
|
|
||||
| return factory(_database, offset, size); | ||||
| } | ||||
|
|
||||
| private (object, long) DecodeFromCacheOrCreate( | ||||
| long offset, | ||||
| int size, | ||||
| Type type, | ||||
| ObjectType objectType, | ||||
| InjectableValues? injectableValues, | ||||
| Network? network, | ||||
| Func<Buffer, long, int, Type, ObjectType, Decoder, InjectableValues?, Network?, (object, long)> factory) | ||||
|
Check failure on line 748 in MaxMind.Db/Decoder.cs
|
||||
| { | ||||
| if (_cache is not null) | ||||
| { | ||||
| ValueTuple<object, long> item; | ||||
| bool found = _cache.TryGet(offset, size, type, out item); | ||||
| if (found) | ||||
| { | ||||
| // Not null if found. | ||||
| Debug.Assert(item.Item1 is not null); | ||||
| return item; | ||||
| } | ||||
| else | ||||
| { | ||||
| item = factory(_database, offset, size, type, objectType, this, injectableValues, network); | ||||
| bool added = _cache.TryAdd(offset, size, type, item); | ||||
| } | ||||
|
|
||||
| return item; | ||||
| } | ||||
|
|
||||
| return factory(_database, offset, size, type, objectType, this, injectableValues, network); | ||||
| } | ||||
| } | ||||
| } | ||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -50,4 +50,7 @@ | |
| </None> | ||
| </ItemGroup> | ||
|
|
||
| <ItemGroup> | ||
| <PackageReference Include="System.Memory" Version="4.5.5" /> | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need this? |
||
| </ItemGroup> | ||
| </Project> | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -110,7 +110,8 @@ | |
| /// Initializes a new instance of the <see cref="Reader" /> class. | ||
| /// </summary> | ||
| /// <param name="file">The file.</param> | ||
| public Reader(string file) : this(file, FileAccessMode.MemoryMapped) | ||
| public Reader(string file) | ||
| : this(file, FileAccessMode.MemoryMapped) | ||
| { | ||
| } | ||
|
|
||
|
|
@@ -119,7 +120,19 @@ | |
| /// </summary> | ||
| /// <param name="file">The MaxMind DB file.</param> | ||
| /// <param name="mode">The mode by which to access the DB file.</param> | ||
| public Reader(string file, FileAccessMode mode) : this(BufferForMode(file, mode), file) | ||
| public Reader(string file, FileAccessMode mode) | ||
| : this(BufferForMode(file, mode), file) | ||
| { | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Initializes a new instance of the <see cref="Reader" /> class. | ||
| /// </summary> | ||
| /// <param name="file">The MaxMind DB file.</param> | ||
| /// <param name="mode">The mode by which to access the DB file.</param> | ||
| /// <param name="cacheSize">Cache size for optional internal cache</param> | ||
| public Reader(string file, FileAccessMode mode, int cacheSize) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems like an improvement in terms of not breaking the API. However I wonder if we would want to provide some way to swap out the cache implementation. I'm not sure and not sure what it would best look like if so. |
||
| : this(BufferForMode(file, mode), file, cacheSize) | ||
| { | ||
| } | ||
|
|
||
|
|
@@ -136,7 +149,7 @@ | |
| { | ||
| } | ||
|
|
||
| private Reader(MemoryMapBuffer buffer, string? file) | ||
| private Reader(Buffer buffer, string? file, int? cacheSize = null) | ||
|
Check failure on line 152 in MaxMind.Db/Reader.cs
|
||
| { | ||
| _fileName = file; | ||
| _database = buffer; | ||
|
|
@@ -148,7 +161,7 @@ | |
| _nodeByteSize = Metadata.NodeByteSize; | ||
| _nodeCount = Metadata.NodeCount; | ||
| _recordSize = Metadata.RecordSize; | ||
| Decoder = new Decoder(_database, Metadata.SearchTreeSize + DataSectionSeparatorSize); | ||
| Decoder = new Decoder(_database, Metadata.SearchTreeSize + DataSectionSeparatorSize, defaultCacheSize: cacheSize); | ||
|
|
||
| if (_dbIPVersion == 6) | ||
| { | ||
|
|
@@ -227,6 +240,15 @@ | |
| _disposed = true; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Cache size | ||
| /// </summary> | ||
| /// <returns></returns> | ||
| public int CacheSize() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we could leave this out unless it's necessary, to reduce adding to the public API for now. |
||
| { | ||
| return Decoder.CacheSize(); | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Finds the data related to the specified address. | ||
| /// </summary> | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It may make sense to have a separate PR for improvements to the benchmark tool, if they are unrelated to the PR. I realize in a prior version the changes were more relevant as you were exercising new parameters to the reader.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#279
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There will still need to a be change to exercise the new parameters, but it will make the pr easier to review. Thank you