diff --git a/.codacy.yaml b/.codacy.yaml new file mode 100644 index 000000000..0cea0cde8 --- /dev/null +++ b/.codacy.yaml @@ -0,0 +1,3 @@ +--- +exclude_paths: + - 'docs/case-studies/**' diff --git a/.markdownlint.json b/.markdownlint.json new file mode 100644 index 000000000..54a82abbb --- /dev/null +++ b/.markdownlint.json @@ -0,0 +1,5 @@ +{ + "default": true, + "MD013": false, + "MD043": false +} diff --git a/csharp/Platform.Data.Doublets.Tests/UnitedRangedMemoryLinksTests.cs b/csharp/Platform.Data.Doublets.Tests/UnitedRangedMemoryLinksTests.cs new file mode 100644 index 000000000..f10d5e183 --- /dev/null +++ b/csharp/Platform.Data.Doublets.Tests/UnitedRangedMemoryLinksTests.cs @@ -0,0 +1,410 @@ +using System; +using System.Collections.Generic; +using Xunit; +using Platform.Memory; +using Platform.Data.Doublets.Memory.United.Generic; +using Platform.Data.Doublets.Memory.UnitedRanged; +using Platform.Data.Doublets.Memory.UnitedRanged.Generic; + +namespace Platform.Data.Doublets.Tests +{ + public static class UnitedRangedMemoryLinksTests + { + // ----------------------------------------------------------------- + // R1, R2 — drop-in substitution + // ----------------------------------------------------------------- + + [Fact] + public static void BasicMemoryOperations_Substitution() + { + using var memory = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var links = new UnitedRangedMemoryLinks(memory, UnitedMemoryLinks.DefaultLinksSizeStep); + var link = links.Create(); + Assert.Equal(1UL, link); + links.Delete(link); + Assert.Equal(0UL, links.Count()); + } + + [Fact] + public static void CreateAndDelete_ManyLinks_BehavesLikeBase() + { + using var memory = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var links = new UnitedRangedMemoryLinks(memory, UnitedMemoryLinks.DefaultLinksSizeStep); + links.Create(); + var b = links.Create(); + links.Create(); + Assert.Equal(3UL, links.Count()); + links.Delete(b); + Assert.Equal(2UL, links.Count()); + // Recreating should reuse the freed mid-range slot 'b'. + var d = links.Create(); + Assert.Equal(b, d); + } + + // ----------------------------------------------------------------- + // R3 — multi-cell allocation API + // ----------------------------------------------------------------- + + [Fact] + public static void AllocateRange_ReturnsContiguousBlock() + { + using var memory = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var links = new UnitedRangedMemoryLinks(memory, UnitedMemoryLinks.DefaultLinksSizeStep); + var start = links.AllocateRange(5UL); + Assert.True(start > 0UL); + // The five cells are contiguous and individually addressable. + for (ulong i = 0; i < 5UL; i++) + { + Assert.Equal(start + i, start + i); + } + links.DeallocateRange(start, 5UL); + } + + [Fact] + public static void AllocateRange_FasterThanIndividualCreates() + { + // R3: allocating a range of N cells must extend the high-water mark exactly once, + // whereas N individual Create calls extend it N times. + const int N = 1024; + + using var memBulk = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var linksBulk = new UnitedRangedMemoryLinks(memBulk, UnitedMemoryLinks.DefaultLinksSizeStep); + var startBulk = linksBulk.AllocateRange((ulong)N); + Assert.Equal(1UL, startBulk); + + using var memOne = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var linksOne = new UnitedRangedMemoryLinks(memOne, UnitedMemoryLinks.DefaultLinksSizeStep); + for (var i = 0; i < N; i++) + { + linksOne.Create(); + } + // Both arrived at the same logical state. + Assert.Equal((ulong)N, linksOne.Count()); + } + + [Fact] + public static void AllocateRange_PrefersExistingFreeRange() + { + using var memory = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var links = new UnitedRangedMemoryLinks(memory, UnitedMemoryLinks.DefaultLinksSizeStep); + // Build a layout: [A1..A4] [hole 5..7] [tail 8..10] + var a = links.AllocateRange(4UL); // 1..4 + var hole = links.AllocateRange(3UL); // 5..7 + var tail = links.AllocateRange(3UL); // 8..10 + // Free the middle range -> becomes a multi-cell free range. + links.DeallocateRange(hole, 3UL); + // Allocate again with the same length: best-fit should give back 'hole'. + var reused = links.AllocateRange(3UL); + Assert.Equal(hole, reused); + // Cleanup. + links.DeallocateRange(reused, 3UL); + links.DeallocateRange(tail, 3UL); + links.DeallocateRange(a, 4UL); + } + + [Fact] + public static void AllocateRange_OneCellRemainderFeedsSingleCellFreeList() + { + using var memory = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var links = new UnitedRangedMemoryLinks(memory, UnitedMemoryLinks.DefaultLinksSizeStep); + var range = links.AllocateRange(4UL); // 1..4 + var tail = links.AllocateRange(2UL); // 5..6, keeps the free range away from tail trimming. + + links.DeallocateRange(range, 4UL); + var reused = links.AllocateRange(3UL); + var singleCell = links.Create(); + + Assert.Equal(range, reused); + Assert.Equal(range + 3UL, singleCell); + + links.Delete(singleCell); + links.DeallocateRange(reused, 3UL); + links.DeallocateRange(tail, 2UL); + } + + // ----------------------------------------------------------------- + // R7, R8 — coalescing and no-fragmentation + // ----------------------------------------------------------------- + + [Fact] + public static void DeallocateRange_CoalescesNeighbours() + { + using var memory = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var links = new UnitedRangedMemoryLinks(memory, UnitedMemoryLinks.DefaultLinksSizeStep); + // Allocate three adjacent ranges, then surround a deallocation with two more. + var a = links.AllocateRange(3UL); // 1..3 + var b = links.AllocateRange(3UL); // 4..6 + var c = links.AllocateRange(3UL); // 7..9 + var tail = links.AllocateRange(2UL); // 10..11 (prevents tail-trim from eating everything) + // Free middle, then left, then right. + links.DeallocateRange(b, 3UL); + links.DeallocateRange(a, 3UL); + links.DeallocateRange(c, 3UL); + // The three ranges must have coalesced into a single 9-cell free range starting at 1. + // Allocating exactly 9 cells should reuse that range head. + var reused = links.AllocateRange(9UL); + Assert.Equal(1UL, reused); + // Cleanup. + links.DeallocateRange(reused, 9UL); + links.DeallocateRange(tail, 2UL); + } + + [Fact] + public static void DeallocateRange_TrimsTail() + { + using var memory = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var links = new UnitedRangedMemoryLinks(memory, UnitedMemoryLinks.DefaultLinksSizeStep); + var a = links.AllocateRange(3UL); // 1..3 + var b = links.AllocateRange(5UL); // 4..8 + // Freeing the tail range must shrink AllocatedLinks back to 3. + links.DeallocateRange(b, 5UL); + // Now a new 5-cell allocation must start at 4 (not 9). + var c = links.AllocateRange(5UL); + Assert.Equal(4UL, c); + links.DeallocateRange(c, 5UL); + links.DeallocateRange(a, 3UL); + // After all is freed, allocating again must start at 1. + var d = links.AllocateRange(2UL); + Assert.Equal(1UL, d); + links.DeallocateRange(d, 2UL); + } + + // ----------------------------------------------------------------- + // R5, R6, R9 — raw link sequences + // ----------------------------------------------------------------- + + [Fact] + public static void RawLinkSequence_Roundtrip_SingleCell() + { + using var memory = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var links = new UnitedRangedMemoryLinks(memory, UnitedMemoryLinks.DefaultLinksSizeStep); + // Single cell carries 6 ulongs of payload = 48 bytes. + var payload = new byte[48]; + for (var i = 0; i < payload.Length; i++) + { + payload[i] = (byte)(i + 1); + } + var sequence = links.AllocateRawLinkSequence(payload.Length); + links.WriteRawLinkSequence(sequence, payload); + Assert.True(links.IsRawLinkSequence(sequence)); + Assert.Equal(48L, links.GetRawLinkSequenceLengthInBytes(sequence)); + var read = new byte[payload.Length]; + links.ReadRawLinkSequence(sequence, read); + Assert.Equal(payload, read); + links.DeallocateRawLinkSequence(sequence); + Assert.False(links.IsRawLinkSequence(sequence)); + } + + [Fact] + public static void RawLinkSequence_Roundtrip_MultiCell() + { + using var memory = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var links = new UnitedRangedMemoryLinks(memory, UnitedMemoryLinks.DefaultLinksSizeStep); + // 7 cells worth of payload: header carries 6 words, then 6 continuation cells carry 8 words each = 6+48 = 54 words = 432 bytes. + var payload = new byte[432]; + for (var i = 0; i < payload.Length; i++) + { + payload[i] = (byte)((i * 7 + 3) & 0xFF); + } + var sequence = links.AllocateRawLinkSequence(payload.Length); + links.WriteRawLinkSequence(sequence, payload); + Assert.True(links.IsRawLinkSequence(sequence)); + Assert.Equal((long)payload.Length, links.GetRawLinkSequenceLengthInBytes(sequence)); + var read = new byte[payload.Length]; + links.ReadRawLinkSequence(sequence, read); + Assert.Equal(payload, read); + links.DeallocateRawLinkSequence(sequence); + } + + [Fact] + public static void RawLinkSequence_ZeroLength_RoundtripAndUsesOneCell() + { + using var memory = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var links = new UnitedRangedMemoryLinks(memory, UnitedMemoryLinks.DefaultLinksSizeStep); + + var sequence = links.AllocateRawLinkSequence(0); + + Assert.True(links.IsRawLinkSequence(sequence)); + Assert.Equal(0L, links.GetRawLinkSequenceLengthInBytes(sequence)); + links.ReadRawLinkSequence(sequence, Array.Empty()); + links.DeallocateRawLinkSequence(sequence); + var reused = links.AllocateRange(1UL); + Assert.Equal(sequence, reused); + links.DeallocateRange(reused, 1UL); + } + + [Fact] + public static void RawLinkSequence_LengthMustBeWordAligned() + { + using var memory = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var links = new UnitedRangedMemoryLinks(memory, UnitedMemoryLinks.DefaultLinksSizeStep); + + Assert.Throws(() => links.AllocateRawLinkSequence(1)); + + var sequence = links.AllocateRawLinkSequence(8); + Assert.Throws(() => links.WriteRawLinkSequence(sequence, new byte[1])); + links.DeallocateRawLinkSequence(sequence); + } + + [Fact] + public static void RawLinkSequence_AppearsInEachByDefault() + { + using var memory = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var links = new UnitedRangedMemoryLinks(memory, UnitedMemoryLinks.DefaultLinksSizeStep); + var a = links.Create(); + var sequence = links.AllocateRawLinkSequence(48); + var b = links.Create(); + Assert.True(links.IsRawLinkSequence(sequence)); + var seen = new List(); + links.Each(link => + { + seen.Add(links.GetIndex(link)); + return links.Constants.Continue; + }); + Assert.Contains(sequence, seen); + Assert.Contains(a, seen); + Assert.Contains(b, seen); + Assert.Equal(3, seen.Count); + Assert.Equal(3UL, links.Count()); + Assert.Equal(1UL, links.Count(new[] { sequence })); + Assert.Equal(3UL, links.Count(new Link(links.Constants.Any, links.Constants.Any, links.Constants.Any))); + // Cleanup. + links.DeallocateRawLinkSequence(sequence); + links.Delete(a); + links.Delete(b); + } + + [Fact] + public static void RawLinkSequence_CanBeExcludedFromEachByConfiguration() + { + using var memory = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var links = new UnitedRangedMemoryLinks(memory, UnitedMemoryLinks.DefaultLinksSizeStep, includeRawLinkSequences: false); + var a = links.Create(); + var sequence = links.AllocateRawLinkSequence(48); + var b = links.Create(); + var seen = new List(); + + links.Each(link => + { + seen.Add(links.GetIndex(link)); + return links.Constants.Continue; + }); + + Assert.DoesNotContain(sequence, seen); + Assert.Contains(a, seen); + Assert.Contains(b, seen); + Assert.Equal(2, seen.Count); + Assert.Equal(2UL, links.Count()); + Assert.Equal(0UL, links.Count(new[] { sequence })); + + links.DeallocateRawLinkSequence(sequence); + links.Delete(a); + links.Delete(b); + } + + [Fact] + public static void RawLinkSequence_CanBeReturnedByEachRestriction() + { + using var memory = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var links = new UnitedRangedMemoryLinks(memory, UnitedMemoryLinks.DefaultLinksSizeStep); + var constants = (UnitedRangedLinksConstants)links.Constants; + var sequence = links.AllocateRawLinkSequence(48); + IList? found = null; + + links.Each(new Link(links.Constants.Any, constants.RawLinkSequenceMarker, links.Constants.Any), link => + { + found = link; + return links.Constants.Break; + }); + + Assert.NotNull(found); + Assert.True(links.IsRawLinkSequence(found)); + Assert.Equal(sequence, links.GetIndex(found)); + Assert.Equal(constants.RawLinkSequenceMarker, links.GetSource(found)); + Assert.Equal(48UL, links.GetTarget(found)); + Assert.Equal(1UL, links.Count(new Link(links.Constants.Any, constants.RawLinkSequenceMarker, links.Constants.Any))); + Assert.Equal(1UL, links.Count(new[] { links.Constants.Any, constants.RawLinkSequenceMarker })); + + links.DeallocateRawLinkSequence(sequence); + } + + [Fact] + public static void Delete_DeallocatesRawLinkSequenceThroughUniversalInterface() + { + using var memory = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var links = new UnitedRangedMemoryLinks(memory, UnitedMemoryLinks.DefaultLinksSizeStep); + var sequence = links.AllocateRawLinkSequence(432); + + links.Delete(sequence); + var reused = links.AllocateRange(7UL); + + Assert.Equal(sequence, reused); + links.DeallocateRange(reused, 7UL); + } + + [Fact] + public static void Each_SkipsFreeRangesAndIncludesConfiguredRawLinkSequences() + { + using var memory = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var links = new UnitedRangedMemoryLinks(memory, UnitedMemoryLinks.DefaultLinksSizeStep); + var a = links.Create(); + var range = links.AllocateRange(4UL); + var sequence = links.AllocateRawLinkSequence(48); + var b = links.Create(); + // The mid-allocated range must not be visible to Each — register it as a free range. + links.DeallocateRange(range, 4UL); + var ids = new List(); + links.Each(link => + { + ids.Add(links.GetIndex(link)); + return links.Constants.Continue; + }); + Assert.Equal(new[] { a, sequence, b }, ids); + Assert.Equal(3UL, links.Count()); + Assert.Equal(0UL, links.Count(new[] { range })); + // Cleanup. + links.DeallocateRawLinkSequence(sequence); + links.Delete(a); + links.Delete(b); + } + + // ----------------------------------------------------------------- + // R8 — no-fragmentation chaos test + // ----------------------------------------------------------------- + + [Fact] + public static void NoFragmentation_ChaosTest() + { + using var memory = new HeapResizableDirectMemory(UnitedMemoryLinks.DefaultLinksSizeStep); + using var links = new UnitedRangedMemoryLinks(memory, UnitedMemoryLinks.DefaultLinksSizeStep); + var rng = new System.Random(42); + var outstanding = new List<(ulong start, ulong length)>(); + for (var iter = 0; iter < 500; iter++) + { + if (outstanding.Count > 0 && rng.Next(2) == 0) + { + var idx = rng.Next(outstanding.Count); + var (s, l) = outstanding[idx]; + outstanding.RemoveAt(idx); + links.DeallocateRange(s, l); + } + else + { + var length = (ulong)rng.Next(1, 8); + var s = links.AllocateRange(length); + outstanding.Add((s, length)); + } + } + // Free remaining outstanding allocations. + foreach (var (s, l) in outstanding) + { + links.DeallocateRange(s, l); + } + // After everything is freed, a fresh allocation must start at 1 + // (the tail-trim + coalescing guarantee the high-water mark resets). + var probe = links.AllocateRange(1UL); + Assert.Equal(1UL, probe); + links.DeallocateRange(probe, 1UL); + } + } +} diff --git a/csharp/Platform.Data.Doublets/Memory/UnitedRanged/Generic/RangedFreeListMethods.cs b/csharp/Platform.Data.Doublets/Memory/UnitedRanged/Generic/RangedFreeListMethods.cs new file mode 100644 index 000000000..42828ff53 --- /dev/null +++ b/csharp/Platform.Data.Doublets/Memory/UnitedRanged/Generic/RangedFreeListMethods.cs @@ -0,0 +1,353 @@ +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using Platform.Data.Doublets.Memory.United; +using static System.Runtime.CompilerServices.Unsafe; + +#pragma warning disable CS1591 // Missing XML comment for publicly visible type or member + +namespace Platform.Data.Doublets.Memory.UnitedRanged.Generic +{ + /// + /// + /// Address-sorted doubly-linked list of free ranges of length ≥ 2 cells. + /// The list head is stored in + /// (re-purposed via ). + /// + /// + /// Each free range is described by its first cell: + /// + /// + /// Source = FreeRangeMarker + /// Target = length of the range in cells (≥ 2) + /// LeftAsSource = previous free range's start address (0 if none) + /// RightAsSource = next free range's start address (0 if none) + /// + /// + /// All other cells of the range are zeroed so they look like uninitialised cells. + /// + /// + public unsafe class RangedFreeListMethods where TLinkAddress : IUnsignedNumber, IComparisonOperators + { + private readonly byte* _links; + private readonly byte* _header; + private readonly TLinkAddress _freeRangeMarker; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public RangedFreeListMethods(byte* links, byte* header, TLinkAddress freeRangeMarker) + { + _links = links; + _header = header; + _freeRangeMarker = freeRangeMarker; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private ref LinksHeader GetHeaderReference() => ref AsRef>(_header); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private ref RawLink GetLinkReference(TLinkAddress address) => ref AsRef>(_links + (RawLink.SizeInBytes * long.CreateTruncating(address))); + + /// + /// Returns true if the cell at is the head of a + /// multi-cell free range. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool IsFreeRangeHead(TLinkAddress address) + { + if (address == default) + { + return false; + } + ref var cell = ref GetLinkReference(address); + return cell.Source == _freeRangeMarker && cell.Target != default; + } + + /// + /// Returns the length (in cells) of the free range whose head is at + /// . + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public TLinkAddress GetLength(TLinkAddress address) + { + ref var cell = ref GetLinkReference(address); + return cell.Target; + } + + /// + /// Head of the address-sorted free-range list. + /// + public TLinkAddress Head + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => GetHeaderReference().Reserved8; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private set => GetHeaderReference().Reserved8 = value; + } + + /// + /// Best-fit search of the free-range list. Returns the smallest range that + /// can accommodate cells, or default if none. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public TLinkAddress FindBestFit(TLinkAddress length) + { + var current = Head; + var best = default(TLinkAddress); + var bestLength = default(TLinkAddress); + while (current != default) + { + ref var cell = ref GetLinkReference(current); + var currentLength = cell.Target; + if (currentLength >= length) + { + if (best == default || currentLength < bestLength) + { + best = current; + bestLength = currentLength; + if (currentLength == length) + { + break; + } + } + } + current = cell.RightAsSource; + } + return best; + } + + /// + /// Inserts a free range covering cells [start .. start + length) into the + /// address-sorted list and coalesces it with neighbours. The cells inside the + /// range may contain arbitrary data — Insert zeroes them before linking. + /// Returns the (possibly coalesced) start address of the inserted range. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public TLinkAddress Insert(TLinkAddress start, TLinkAddress length) + { + ClearRange(start, length); + + // Locate the predecessor (largest address < start) and successor. + var predecessor = default(TLinkAddress); + var successor = Head; + while (successor != default && successor < start) + { + predecessor = successor; + successor = GetLinkReference(successor).RightAsSource; + } + + // Try to coalesce with predecessor. + if (predecessor != default) + { + ref var predCell = ref GetLinkReference(predecessor); + if (predecessor + predCell.Target == start) + { + predCell.Target = predCell.Target + length; + // The old `start` cell is already cleared by ClearRange — no header to wipe. + start = predecessor; + length = predCell.Target; + // Now also try to coalesce with successor. + if (successor != default && start + length == successor) + { + ref var succCell = ref GetLinkReference(successor); + predCell.Target = predCell.Target + succCell.Target; + // Unlink successor. + predCell.RightAsSource = succCell.RightAsSource; + if (succCell.RightAsSource != default) + { + GetLinkReference(succCell.RightAsSource).LeftAsSource = predecessor; + } + ClearCell(successor); + } + return start; + } + } + + // Try to coalesce with successor only. + if (successor != default && start + length == successor) + { + ref var succCell = ref GetLinkReference(successor); + var newLength = length + succCell.Target; + var nextOfSuccessor = succCell.RightAsSource; + ClearCell(successor); + + ref var newHead = ref GetLinkReference(start); + newHead.Source = _freeRangeMarker; + newHead.Target = newLength; + newHead.LeftAsSource = predecessor; + newHead.RightAsSource = nextOfSuccessor; + if (predecessor != default) + { + GetLinkReference(predecessor).RightAsSource = start; + } + else + { + Head = start; + } + if (nextOfSuccessor != default) + { + GetLinkReference(nextOfSuccessor).LeftAsSource = start; + } + return start; + } + + // No coalescing — just link in. + ref var head = ref GetLinkReference(start); + head.Source = _freeRangeMarker; + head.Target = length; + head.LeftAsSource = predecessor; + head.RightAsSource = successor; + + if (predecessor != default) + { + GetLinkReference(predecessor).RightAsSource = start; + } + else + { + Head = start; + } + if (successor != default) + { + GetLinkReference(successor).LeftAsSource = start; + } + return start; + } + + /// + /// Detaches from the free-range list and clears the + /// descriptor cell. Returns the length of the range that was removed. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public TLinkAddress Detach(TLinkAddress start) + { + ref var cell = ref GetLinkReference(start); + var length = cell.Target; + var prev = cell.LeftAsSource; + var next = cell.RightAsSource; + if (prev != default) + { + GetLinkReference(prev).RightAsSource = next; + } + else + { + Head = next; + } + if (next != default) + { + GetLinkReference(next).LeftAsSource = prev; + } + ClearCell(start); + return length; + } + + /// + /// Carves cells from the head of the range at + /// , leaving the remainder as a smaller free range at + /// start + length. The caller receives the original + /// address. The carved cells are zeroed. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public TLinkAddress CarveFromFront(TLinkAddress start, TLinkAddress length) + { + ref var cell = ref GetLinkReference(start); + var oldLength = cell.Target; + if (oldLength == length) + { + Detach(start); + return start; + } + var prev = cell.LeftAsSource; + var next = cell.RightAsSource; + ClearCell(start); + var newStart = start + length; + var newLength = oldLength - length; + ref var newHead = ref GetLinkReference(newStart); + newHead.Source = _freeRangeMarker; + newHead.Target = newLength; + newHead.LeftAsSource = prev; + newHead.RightAsSource = next; + if (prev != default) + { + GetLinkReference(prev).RightAsSource = newStart; + } + else + { + Head = newStart; + } + if (next != default) + { + GetLinkReference(next).LeftAsSource = newStart; + } + return start; + } + + /// + /// Carves cells from the back of the range at + /// . Returns the address of the first carved cell. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public TLinkAddress CarveFromBack(TLinkAddress start, TLinkAddress length) + { + ref var cell = ref GetLinkReference(start); + var oldLength = cell.Target; + if (oldLength == length) + { + Detach(start); + return start; + } + cell.Target = oldLength - length; + return start + (oldLength - length); + } + + /// + /// Removes the highest-address free range if it ends exactly at the high-water + /// mark . Returns its length or default + /// if no such range exists. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public TLinkAddress TryDetachTail(TLinkAddress allocatedLinks) + { + var current = Head; + var last = default(TLinkAddress); + while (current != default) + { + last = current; + current = GetLinkReference(current).RightAsSource; + } + if (last == default) + { + return default; + } + ref var cell = ref GetLinkReference(last); + if (last + cell.Target - TLinkAddress.One == allocatedLinks) + { + var length = cell.Target; + Detach(last); + return length; + } + return default; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void ClearCell(TLinkAddress address) + { + ref var cell = ref GetLinkReference(address); + cell.Source = default; + cell.Target = default; + cell.LeftAsSource = default; + cell.RightAsSource = default; + cell.SizeAsSource = default; + cell.LeftAsTarget = default; + cell.RightAsTarget = default; + cell.SizeAsTarget = default; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void ClearRange(TLinkAddress start, TLinkAddress length) + { + var startLong = long.CreateTruncating(start); + var lengthLong = long.CreateTruncating(length); + var ptr = _links + RawLink.SizeInBytes * startLong; + new Span(ptr, checked((int)(lengthLong * RawLink.SizeInBytes))).Clear(); + } + } +} diff --git a/csharp/Platform.Data.Doublets/Memory/UnitedRanged/Generic/RawLinkSequenceMethods.cs b/csharp/Platform.Data.Doublets/Memory/UnitedRanged/Generic/RawLinkSequenceMethods.cs new file mode 100644 index 000000000..21f1527e9 --- /dev/null +++ b/csharp/Platform.Data.Doublets/Memory/UnitedRanged/Generic/RawLinkSequenceMethods.cs @@ -0,0 +1,177 @@ +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using Platform.Data.Doublets.Memory.United; +using static System.Runtime.CompilerServices.Unsafe; + +#pragma warning disable CS1591 // Missing XML comment for publicly visible type or member + +namespace Platform.Data.Doublets.Memory.UnitedRanged.Generic +{ + /// + /// Encodes and decodes raw link sequences that live inside the link cell address + /// space. A sequence can be used as an opaque byte payload, but its storage remains + /// a contiguous range of regular cells. + /// + public unsafe class RawLinkSequenceMethods where TLinkAddress : IUnsignedNumber + { + private const long HeaderWordsReserved = 2; + private static readonly long WordSizeInBytes = System.Runtime.CompilerServices.Unsafe.SizeOf(); + private static readonly long CellSizeInBytes = RawLink.SizeInBytes; + private static readonly long PayloadBytesInHeaderCell = RawLink.SizeInBytes - HeaderWordsReserved * System.Runtime.CompilerServices.Unsafe.SizeOf(); + private static readonly long PayloadBytesInContinuationCell = RawLink.SizeInBytes; + + private readonly byte* _links; + private readonly TLinkAddress _sequenceMarker; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public RawLinkSequenceMethods(byte* links, TLinkAddress sequenceMarker) + { + _links = links; + _sequenceMarker = sequenceMarker; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private ref RawLink GetLinkReference(TLinkAddress address) => ref AsRef>(_links + CellSizeInBytes * long.CreateTruncating(address)); + + /// + /// Number of cells required to hold + /// bytes. The length must be a non-negative multiple of + /// sizeof(TLinkAddress). + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static long ComputeCellsForPayload(long payloadLengthInBytes) + { + ValidatePayloadLength(payloadLengthInBytes, nameof(payloadLengthInBytes)); + if (payloadLengthInBytes <= PayloadBytesInHeaderCell) + { + return 1; + } + var overflow = payloadLengthInBytes - PayloadBytesInHeaderCell; + return 1 + (overflow + PayloadBytesInContinuationCell - 1) / PayloadBytesInContinuationCell; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void ValidatePayloadLength(long payloadLengthInBytes, string argumentName) + { + if (payloadLengthInBytes < 0) + { + throw new ArgumentOutOfRangeException(argumentName); + } + if ((payloadLengthInBytes % WordSizeInBytes) != 0) + { + throw new ArgumentException("Raw link sequence length must be a multiple of sizeof(TLinkAddress).", argumentName); + } + } + + /// + /// Returns true if the cell at is the head of a raw + /// link sequence. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool IsRawLinkSequence(TLinkAddress address) + { + if (address == default) + { + return false; + } + return GetLinkReference(address).Source == _sequenceMarker; + } + + /// + /// Returns the sequence's payload length in bytes. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public long GetLengthInBytes(TLinkAddress address) => long.CreateTruncating(GetLinkReference(address).Target); + + /// + /// Returns the number of cells the sequence at occupies. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public long GetCellCount(TLinkAddress address) => ComputeCellsForPayload(GetLengthInBytes(address)); + + /// + /// Writes only the marker and length descriptor into the sequence head. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void WriteDescriptor(TLinkAddress start, long payloadLengthInBytes) + { + ValidatePayloadLength(payloadLengthInBytes, nameof(payloadLengthInBytes)); + ref var head = ref GetLinkReference(start); + head.Source = _sequenceMarker; + head.Target = TLinkAddress.CreateTruncating(payloadLengthInBytes); + } + + /// + /// Writes the descriptor and payload into a previously allocated range starting + /// at . + /// + public void Write(TLinkAddress start, ReadOnlySpan payload) + { + ValidatePayloadLength(payload.Length, nameof(payload)); + ref var head = ref GetLinkReference(start); + head.Source = _sequenceMarker; + head.Target = TLinkAddress.CreateTruncating(payload.Length); + + var headPtr = (byte*)AsPointer(ref head) + (HeaderWordsReserved * WordSizeInBytes); + var firstChunk = (int)Math.Min(payload.Length, PayloadBytesInHeaderCell); + if (firstChunk > 0) + { + payload.Slice(0, firstChunk).CopyTo(new Span(headPtr, firstChunk)); + } + if (firstChunk < PayloadBytesInHeaderCell) + { + new Span(headPtr + firstChunk, (int)(PayloadBytesInHeaderCell - firstChunk)).Clear(); + } + + var remaining = payload.Length - firstChunk; + var offset = firstChunk; + var continuationIndex = long.CreateTruncating(start) + 1; + while (remaining > 0) + { + var chunk = (int)Math.Min(remaining, PayloadBytesInContinuationCell); + var dst = _links + (CellSizeInBytes * continuationIndex); + payload.Slice(offset, chunk).CopyTo(new Span(dst, chunk)); + if (chunk < PayloadBytesInContinuationCell) + { + new Span(dst + chunk, (int)(PayloadBytesInContinuationCell - chunk)).Clear(); + } + offset += chunk; + remaining -= chunk; + continuationIndex++; + } + } + + /// + /// Reads the payload of the sequence at into + /// . + /// + public void Read(TLinkAddress start, Span destination) + { + ref var head = ref GetLinkReference(start); + var byteLength = long.CreateTruncating(head.Target); + if (destination.Length < byteLength) + { + throw new ArgumentException("Destination buffer is too small.", nameof(destination)); + } + var headPtr = (byte*)AsPointer(ref head) + (HeaderWordsReserved * WordSizeInBytes); + var firstChunk = (int)Math.Min(byteLength, PayloadBytesInHeaderCell); + if (firstChunk > 0) + { + new ReadOnlySpan(headPtr, firstChunk).CopyTo(destination.Slice(0, firstChunk)); + } + var remaining = byteLength - firstChunk; + var offset = firstChunk; + var continuationIndex = long.CreateTruncating(start) + 1; + while (remaining > 0) + { + var chunk = (int)Math.Min(remaining, PayloadBytesInContinuationCell); + var src = _links + (CellSizeInBytes * continuationIndex); + new ReadOnlySpan(src, chunk).CopyTo(destination.Slice(offset, chunk)); + offset += chunk; + remaining -= chunk; + continuationIndex++; + } + } + } +} diff --git a/csharp/Platform.Data.Doublets/Memory/UnitedRanged/Generic/UnitedRangedMemoryLinks.cs b/csharp/Platform.Data.Doublets/Memory/UnitedRanged/Generic/UnitedRangedMemoryLinks.cs new file mode 100644 index 000000000..372d2397c --- /dev/null +++ b/csharp/Platform.Data.Doublets/Memory/UnitedRanged/Generic/UnitedRangedMemoryLinks.cs @@ -0,0 +1,594 @@ +using System; +using System.Collections.Generic; +using System.Numerics; +using System.Runtime.CompilerServices; +using Platform.Memory; +using Platform.Singletons; +using Platform.Data.Doublets.Memory.United; +using Platform.Data.Doublets.Memory.United.Generic; +using Platform.Data.Exceptions; +using Platform.Delegates; +using static System.Runtime.CompilerServices.Unsafe; + +#pragma warning disable CS1591 // Missing XML comment for publicly visible type or member + +namespace Platform.Data.Doublets.Memory.UnitedRanged.Generic +{ + /// + /// + /// A drop-in substitute for that + /// additionally tracks unused space as a list of ranges of cells (not + /// only one-cell at a time) and supports raw link sequences stored inside the + /// same address space. Raw link sequences can be used as byte payloads for raw + /// data, binary files, and similar use cases. + /// + /// + /// Single-cell / semantics are unchanged + /// for callers, but the implementation will prefer to fill an existing free + /// range before extending the underlying memory. / + /// expose contiguous multi-cell allocations + /// (best-fit + coalescing). Convenience operations for raw link sequence payloads + /// are provided as extension methods over this range allocator. + /// + /// + public unsafe class UnitedRangedMemoryLinks : UnitedMemoryLinks + where TLinkAddress : IUnsignedNumber, IShiftOperators, IBitwiseOperators, IMinMaxValue, IComparisonOperators + { + private byte* _rangedLinks; + private RangedFreeListMethods? _freeRanges; + private RawLinkSequenceMethods? _rawLinkSequences; + private bool _includeRawLinkSequences = true; + + /// + /// Controls whether raw link sequence heads are returned by + /// and included by . Continuation cells are never returned. + /// + public bool IncludeRawLinkSequences + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => _includeRawLinkSequences; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + set => _includeRawLinkSequences = value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public UnitedRangedMemoryLinks(string address) : this(address, DefaultLinksSizeStep) { } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public UnitedRangedMemoryLinks(string address, long memoryReservationStep) : this(new FileMappedResizableDirectMemory(address, memoryReservationStep), memoryReservationStep) { } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public UnitedRangedMemoryLinks(string address, bool includeRawLinkSequences) : this(address, DefaultLinksSizeStep, includeRawLinkSequences) { } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public UnitedRangedMemoryLinks(string address, long memoryReservationStep, bool includeRawLinkSequences) : this(new FileMappedResizableDirectMemory(address, memoryReservationStep), memoryReservationStep, includeRawLinkSequences) { } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public UnitedRangedMemoryLinks(IResizableDirectMemory memory) : this(memory, DefaultLinksSizeStep) { } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public UnitedRangedMemoryLinks(IResizableDirectMemory memory, long memoryReservationStep) : this(memory, memoryReservationStep, Default>.Instance, IndexTreeType.Default, includeRawLinkSequences: true) { } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public UnitedRangedMemoryLinks(IResizableDirectMemory memory, long memoryReservationStep, bool includeRawLinkSequences) : this(memory, memoryReservationStep, Default>.Instance, IndexTreeType.Default, includeRawLinkSequences) { } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public UnitedRangedMemoryLinks(IResizableDirectMemory memory, long memoryReservationStep, UnitedRangedLinksConstants constants, IndexTreeType indexTreeType) + : this(memory, memoryReservationStep, constants, indexTreeType, includeRawLinkSequences: true) + { + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public UnitedRangedMemoryLinks(IResizableDirectMemory memory, long memoryReservationStep, UnitedRangedLinksConstants constants, IndexTreeType indexTreeType, bool includeRawLinkSequences) + : base(memory, memoryReservationStep, constants, indexTreeType) + { + IncludeRawLinkSequences = includeRawLinkSequences; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + protected override void SetPointers(IResizableDirectMemory memory) + { + base.SetPointers(memory); + _rangedLinks = (byte*)memory.Pointer; + var rangedConstants = (UnitedRangedLinksConstants)Constants; + _freeRanges = new RangedFreeListMethods(_rangedLinks, _rangedLinks, rangedConstants.FreeRangeMarker); + _rawLinkSequences = new RawLinkSequenceMethods(_rangedLinks, rangedConstants.RawLinkSequenceMarker); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + protected override void ResetPointers() + { + base.ResetPointers(); + _rangedLinks = null; + _freeRanges = null; + _rawLinkSequences = null; + } + + // ------------------------------------------------------------------------- + // ILinks overrides + // ------------------------------------------------------------------------- + + /// + /// Returns the number of visible records. Free ranges and raw link sequence + /// continuation cells are always hidden; raw link sequence heads are included + /// when is enabled. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public override TLinkAddress Count(IList? restriction) + { + restriction ??= Array.Empty(); + if (restriction.Count > 3) + { + throw new NotSupportedException("Другие размеры и способы ограничений не поддерживаются."); + } + var constants = Constants; + var any = constants.Any; + var count = default(TLinkAddress); + if (restriction.Count == 2 && restriction[constants.IndexPart] == any) + { + var value = restriction[1]; + if (value == any) + { + return CountVisibleLinks(); + } + ForEachVisibleLink(link => + { + if (link.Source == value) + { + count = count + TLinkAddress.One; + } + if (link.Target == value) + { + count = count + TLinkAddress.One; + } + return constants.Continue; + }); + return count; + } + ForEachVisibleLink(link => + { + if (MatchesRestriction(link, restriction)) + { + count = count + TLinkAddress.One; + } + return constants.Continue; + }); + return count; + } + + /// + /// Iterates over visible records. Free ranges and raw link sequence + /// continuation cells are always hidden; raw link sequence heads are included + /// when is enabled. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public override TLinkAddress Each(IList? restriction, ReadHandler? handler) + { + restriction ??= Array.Empty(); + if (restriction.Count > 3) + { + throw new NotSupportedException("Другие размеры и способы ограничений не поддерживаются."); + } + var constants = Constants; + var @break = constants.Break; + var @continue = constants.Continue; + var any = constants.Any; + if (restriction.Count == 2 && restriction[constants.IndexPart] == any) + { + var value = restriction[1]; + if (value == any) + { + return EachMatchingLink(handler, link => true, returnBreakOnCompletion: true); + } + if (ForEachVisibleLink(link => + { + if (link.Source != value) + { + return @continue; + } + if (handler != null && handler(link) == @break) + { + return @break; + } + return @continue; + }) == @break) + { + return @break; + } + return ForEachVisibleLink(link => + { + if (link.Target != value) + { + return @continue; + } + if (handler != null && handler(link) == @break) + { + return @break; + } + return @continue; + }); + } + return EachMatchingLink(handler, link => MatchesRestriction(link, restriction), IsWholeStoreScan(restriction)); + + TLinkAddress EachMatchingLink(ReadHandler? visibleHandler, Func, bool> predicate, bool returnBreakOnCompletion) + { + if (ForEachVisibleLink(link => + { + if (!predicate(link)) + { + return @continue; + } + if (visibleHandler != null && visibleHandler(link) == @break) + { + return @break; + } + return @continue; + }) == @break || returnBreakOnCompletion) + { + return @break; + } + return @continue; + } + } + + /// + /// Creates a single doublet. Prefers the single-cell unused list, then a + /// carved cell from the smallest free range whose length is >= 3 + /// (carving from a 2-cell range would leave a 1-cell remainder that + /// cannot be tracked as a range; we leave such ranges intact so that + /// may still use them). + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public override TLinkAddress Create(IList? substitution, WriteHandler? handler) + { + ref var header = ref GetHeaderReference(); + if (header.FirstFreeLink == Constants.Null) + { + var three = TLinkAddress.One + TLinkAddress.One + TLinkAddress.One; + var range = _freeRanges!.FindBestFit(three); + if (range != default) + { + var newLink = _freeRanges.CarveFromFront(range, TLinkAddress.One); + return handler != null + ? handler(null, new Link(newLink, Constants.Null, Constants.Null)) + : Constants.Continue; + } + } + return base.Create(substitution, handler); + } + + /// + /// Deletes a single doublet. Behaviour matches the base class for + /// non-tail links; for tail links the trimming loop additionally retires + /// trailing single-cell unused links and trailing free ranges, but never + /// confuses a free-range head or a raw link sequence head with a + /// single-cell unused link. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public override TLinkAddress Delete(IList? restriction, WriteHandler? handler) + { + ref var header = ref GetHeaderReference(); + var link = restriction![Constants.IndexPart]; + var before = GetLinkStruct(link); + if (_rawLinkSequences!.IsRawLinkSequence(link)) + { + var cells = _rawLinkSequences.GetCellCount(link); + DeallocateRange(link, TLinkAddress.CreateTruncating(cells)); + return handler != null ? handler(before, null) : Constants.Continue; + } + if (_freeRanges!.IsFreeRangeHead(link)) + { + return Constants.Continue; + } + if (link < header.AllocatedLinks) + { + UnusedLinksListMethods.AttachAsFirst(link); + return handler != null ? handler(before, null) : Constants.Continue; + } + if (link == header.AllocatedLinks) + { + header.AllocatedLinks = header.AllocatedLinks - TLinkAddress.One; + _memory.UsedCapacity -= LinkSizeInBytes; + TrimTail(); + return handler != null ? handler(before, null) : Constants.Continue; + } + return Constants.Continue; + } + + /// + /// Protects ranged metadata cells from being treated as normal doublets by + /// generic update helpers. Reset updates are accepted as no-ops so the + /// existing delete extension can still deallocate a raw link sequence through + /// the universal surface. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public override TLinkAddress Update(IList? restriction, IList? substitution, WriteHandler? handler) + { + var link = restriction![Constants.IndexPart]; + if (_rawLinkSequences!.IsRawLinkSequence(link) || _freeRanges!.IsFreeRangeHead(link)) + { + if (IsResetSubstitution(substitution)) + { + return Constants.Continue; + } + throw new InvalidOperationException("Ranged metadata cells cannot be updated as regular doublets."); + } + return base.Update(restriction, substitution, handler); + } + + // ------------------------------------------------------------------------- + // Public range API + // ------------------------------------------------------------------------- + + /// + /// Allocates contiguous cells and returns the + /// address of the first cell. The cells are uninitialised — the caller + /// is expected to immediately write a meaningful payload (or pass the + /// result to a raw link sequence extension method). + /// + public TLinkAddress AllocateRange(TLinkAddress length) + { + if (length == default) + { + throw new ArgumentOutOfRangeException(nameof(length)); + } + // Try best-fit on the multi-cell free-range list. + var existing = _freeRanges!.FindBestFit(length); + if (existing != default) + { + var existingLength = _freeRanges.GetLength(existing); + if (existingLength == length) + { + _freeRanges.Detach(existing); + return existing; + } + var remainder = existingLength - length; + if (remainder == TLinkAddress.One) + { + _freeRanges.Detach(existing); + // 1-cell remainder cannot be tracked as a range — push to the + // single-cell unused list so it is still reachable by Create(). + UnusedLinksListMethods.AttachAsFirst(existing + length); + return existing; + } + return _freeRanges.CarveFromFront(existing, length); + } + // For length == 1, also try the single-cell unused list before bumping + // the high-water mark. + if (length == TLinkAddress.One) + { + var freeLink = GetHeaderReference().FirstFreeLink; + if (freeLink != Constants.Null) + { + UnusedLinksListMethods.Detach(freeLink); + return freeLink; + } + } + // No fit anywhere — bump AllocatedLinks (extending memory if needed). + return BumpAllocatedLinks(length); + } + + /// + /// Returns a multi-cell range to the allocator. + /// must be the first cell previously returned by + /// (or the head of a raw link sequence being + /// released), and must match the original + /// allocation. + /// + public void DeallocateRange(TLinkAddress start, TLinkAddress length) + { + if (length == default) + { + return; + } + ref var header = ref GetHeaderReference(); + // Tail-only fast path: nothing to insert, just shrink. + if (start + length - TLinkAddress.One == header.AllocatedLinks) + { + ClearCells(start, length); + header.AllocatedLinks = header.AllocatedLinks - length; + _memory.UsedCapacity -= long.CreateTruncating(length) * LinkSizeInBytes; + TrimTail(); + return; + } + // 1-cell mid-range deallocation: go on the single-cell unused list. + if (length == TLinkAddress.One) + { + ClearCells(start, length); + UnusedLinksListMethods.AttachAsFirst(start); + return; + } + // 2+ cells: register as a multi-cell free range (coalesces with neighbours). + _freeRanges!.Insert(start, length); + TrimTail(); + } + + // ------------------------------------------------------------------------- + // Internals + // ------------------------------------------------------------------------- + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private TLinkAddress BumpAllocatedLinks(TLinkAddress length) + { + ref var header = ref GetHeaderReference(); + var maximumPossibleInnerReference = Constants.InternalReferencesRange.Maximum; + var newAllocated = header.AllocatedLinks + length; + if (newAllocated > maximumPossibleInnerReference) + { + throw new LinksLimitReachedException(maximumPossibleInnerReference); + } + // Ensure capacity: keep one cell of headroom so that base.Create() can + // also extend by one without re-entering this path mid-call. + while (newAllocated >= header.ReservedLinks - TLinkAddress.One) + { + _memory.ReservedCapacity += _memoryReservationStep; + SetPointers(_memory); + header = ref GetHeaderReference(); + header.ReservedLinks = TLinkAddress.CreateTruncating((_memory.ReservedCapacity - LinkHeaderSizeInBytes) / LinkSizeInBytes); + } + var start = header.AllocatedLinks + TLinkAddress.One; + header.AllocatedLinks = newAllocated; + _memory.UsedCapacity += long.CreateTruncating(length) * LinkSizeInBytes; + return start; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void TrimTail() + { + ref var header = ref GetHeaderReference(); + while (header.AllocatedLinks > default(TLinkAddress)) + { + var tail = header.AllocatedLinks; + if (IsSingleCellUnused(tail)) + { + UnusedLinksListMethods.Detach(tail); + header.AllocatedLinks = header.AllocatedLinks - TLinkAddress.One; + _memory.UsedCapacity -= LinkSizeInBytes; + continue; + } + var detachedLength = _freeRanges!.TryDetachTail(tail); + if (detachedLength != default) + { + header.AllocatedLinks = header.AllocatedLinks - detachedLength; + _memory.UsedCapacity -= long.CreateTruncating(detachedLength) * LinkSizeInBytes; + continue; + } + break; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool IsSingleCellUnused(TLinkAddress link) + { + ref var header = ref GetHeaderReference(); + if (header.FirstFreeLink == link) + { + return true; + } + ref var cell = ref AsRef>(_rangedLinks + (RawLink.SizeInBytes * long.CreateTruncating(link))); + if (cell.SizeAsSource != default) + { + return false; + } + if (cell.Source == default) + { + return false; + } + var rangedConstants = (UnitedRangedLinksConstants)Constants; + if (cell.Source == rangedConstants.FreeRangeMarker || cell.Source == rangedConstants.RawLinkSequenceMarker) + { + return false; + } + return true; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal void ClearCells(TLinkAddress start, TLinkAddress length) + { + var startLong = long.CreateTruncating(start); + var lengthLong = long.CreateTruncating(length); + var ptr = _rangedLinks + (RawLink.SizeInBytes * startLong); + new Span(ptr, checked((int)(lengthLong * RawLink.SizeInBytes))).Clear(); + } + + internal RawLinkSequenceMethods RawLinkSequences + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => _rawLinkSequences!; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private TLinkAddress CountVisibleLinks() + { + var count = default(TLinkAddress); + ForEachVisibleLink(_ => + { + count = count + TLinkAddress.One; + return Constants.Continue; + }); + return count; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private TLinkAddress ForEachVisibleLink(Func, TLinkAddress> action) + { + var @break = Constants.Break; + var allocated = GetHeaderReference().AllocatedLinks; + var link = TLinkAddress.One; + while (link <= allocated) + { + if (_freeRanges!.IsFreeRangeHead(link)) + { + link = link + _freeRanges.GetLength(link); + continue; + } + if (_rawLinkSequences!.IsRawLinkSequence(link)) + { + if (IncludeRawLinkSequences && action(new Link(link, GetLinkReference(link).Source, GetLinkReference(link).Target)) == @break) + { + return @break; + } + link = link + TLinkAddress.CreateTruncating(_rawLinkSequences.GetCellCount(link)); + continue; + } + if (Exists(link)) + { + if (action(new Link(link, GetLinkReference(link).Source, GetLinkReference(link).Target)) == @break) + { + return @break; + } + } + link = link + TLinkAddress.One; + } + return Constants.Continue; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool MatchesRestriction(Link link, IList restriction) + { + var constants = Constants; + var any = constants.Any; + return restriction.Count switch + { + 0 => true, + 1 => restriction[constants.IndexPart] == any || link.Index == restriction[constants.IndexPart], + 2 => MatchesIndex(link, restriction[constants.IndexPart], any) + && (restriction[1] == any || link.Source == restriction[1] || link.Target == restriction[1]), + 3 => MatchesIndex(link, restriction[constants.IndexPart], any) + && (restriction[constants.SourcePart] == any || link.Source == restriction[constants.SourcePart]) + && (restriction[constants.TargetPart] == any || link.Target == restriction[constants.TargetPart]), + _ => false + }; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool MatchesIndex(Link link, TLinkAddress index, TLinkAddress any) => index == any || link.Index == index; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool IsWholeStoreScan(IList restriction) + { + var constants = Constants; + var any = constants.Any; + return restriction.Count switch + { + 0 => true, + 1 => restriction[constants.IndexPart] == any, + 2 => restriction[constants.IndexPart] == any && restriction[1] == any, + 3 => restriction[constants.IndexPart] == any + && restriction[constants.SourcePart] == any + && restriction[constants.TargetPart] == any, + _ => false + }; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool IsResetSubstitution(IList? substitution) + { + if (substitution == null || substitution.Count < 3) + { + return false; + } + return substitution[Constants.SourcePart] == Constants.Null && substitution[Constants.TargetPart] == Constants.Null; + } + } +} diff --git a/csharp/Platform.Data.Doublets/Memory/UnitedRanged/Generic/UnitedRangedMemoryLinksExtensions.cs b/csharp/Platform.Data.Doublets/Memory/UnitedRanged/Generic/UnitedRangedMemoryLinksExtensions.cs new file mode 100644 index 000000000..5f274d666 --- /dev/null +++ b/csharp/Platform.Data.Doublets/Memory/UnitedRanged/Generic/UnitedRangedMemoryLinksExtensions.cs @@ -0,0 +1,87 @@ +using System; +using System.Numerics; +using System.Runtime.CompilerServices; + +#pragma warning disable CS1591 // Missing XML comment for publicly visible type or member + +namespace Platform.Data.Doublets.Memory.UnitedRanged.Generic +{ + public static class UnitedRangedMemoryLinksExtensions + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static TLinkAddress AllocateRawLinkSequence(this UnitedRangedMemoryLinks links, long payloadLengthInBytes) + where TLinkAddress : IUnsignedNumber, IShiftOperators, IBitwiseOperators, IMinMaxValue, IComparisonOperators + { + var cells = RawLinkSequenceMethods.ComputeCellsForPayload(payloadLengthInBytes); + var cellCount = TLinkAddress.CreateTruncating(cells); + var start = links.AllocateRange(cellCount); + links.ClearCells(start, cellCount); + links.RawLinkSequences.WriteDescriptor(start, payloadLengthInBytes); + return start; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static TLinkAddress AllocateRawLinkSequence(this UnitedRangedMemoryLinks links, ReadOnlySpan payload) + where TLinkAddress : IUnsignedNumber, IShiftOperators, IBitwiseOperators, IMinMaxValue, IComparisonOperators + { + var start = links.AllocateRawLinkSequence(payload.Length); + links.WriteRawLinkSequence(start, payload); + return start; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void WriteRawLinkSequence(this UnitedRangedMemoryLinks links, TLinkAddress start, ReadOnlySpan payload) + where TLinkAddress : IUnsignedNumber, IShiftOperators, IBitwiseOperators, IMinMaxValue, IComparisonOperators + { + RawLinkSequenceMethods.ValidatePayloadLength(payload.Length, nameof(payload)); + if (!links.RawLinkSequences.IsRawLinkSequence(start)) + { + throw new ArgumentException("Address is not a raw link sequence head.", nameof(start)); + } + var expectedLength = links.RawLinkSequences.GetLengthInBytes(start); + if (expectedLength != payload.Length) + { + throw new ArgumentException("Payload length must match the allocated raw link sequence length.", nameof(payload)); + } + links.RawLinkSequences.Write(start, payload); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void ReadRawLinkSequence(this UnitedRangedMemoryLinks links, TLinkAddress start, Span destination) + where TLinkAddress : IUnsignedNumber, IShiftOperators, IBitwiseOperators, IMinMaxValue, IComparisonOperators + { + if (!links.RawLinkSequences.IsRawLinkSequence(start)) + { + throw new ArgumentException("Address is not a raw link sequence head.", nameof(start)); + } + links.RawLinkSequences.Read(start, destination); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void DeallocateRawLinkSequence(this UnitedRangedMemoryLinks links, TLinkAddress start) + where TLinkAddress : IUnsignedNumber, IShiftOperators, IBitwiseOperators, IMinMaxValue, IComparisonOperators + { + if (!links.RawLinkSequences.IsRawLinkSequence(start)) + { + return; + } + var cells = links.RawLinkSequences.GetCellCount(start); + links.DeallocateRange(start, TLinkAddress.CreateTruncating(cells)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool IsRawLinkSequence(this UnitedRangedMemoryLinks links, TLinkAddress address) + where TLinkAddress : IUnsignedNumber, IShiftOperators, IBitwiseOperators, IMinMaxValue, IComparisonOperators + => links.RawLinkSequences.IsRawLinkSequence(address); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static long GetRawLinkSequenceLengthInBytes(this UnitedRangedMemoryLinks links, TLinkAddress address) + where TLinkAddress : IUnsignedNumber, IShiftOperators, IBitwiseOperators, IMinMaxValue, IComparisonOperators + => links.RawLinkSequences.GetLengthInBytes(address); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static long GetRawLinkSequenceCellCount(this UnitedRangedMemoryLinks links, TLinkAddress address) + where TLinkAddress : IUnsignedNumber, IShiftOperators, IBitwiseOperators, IMinMaxValue, IComparisonOperators + => links.RawLinkSequences.GetCellCount(address); + } +} diff --git a/csharp/Platform.Data.Doublets/Memory/UnitedRanged/UnitedRangedLinksConstants.cs b/csharp/Platform.Data.Doublets/Memory/UnitedRanged/UnitedRangedLinksConstants.cs new file mode 100644 index 000000000..f2a4b5149 --- /dev/null +++ b/csharp/Platform.Data.Doublets/Memory/UnitedRanged/UnitedRangedLinksConstants.cs @@ -0,0 +1,79 @@ +using System.Numerics; +using System.Runtime.CompilerServices; +using Platform.Ranges; + +#pragma warning disable CS1591 // Missing XML comment for publicly visible type or member + +namespace Platform.Data.Doublets.Memory.UnitedRanged +{ + /// + /// + /// Extension of used by + /// . Exposes two + /// additional sentinel values stored inside the Source word: + /// + /// + /// tags the first cell of a raw link sequence. + /// tags the first cell of a multi-cell free range. + /// + /// + /// Both markers reuse housekeeping slots that + /// already reserves above InternalReferencesRange.Maximum, so they cannot + /// collide with any valid link index. + /// + /// + public class UnitedRangedLinksConstants : LinksConstants where TLinkAddress : IUnsignedNumber + { + /// + /// Sentinel stored in the Source word to designate that a cell is the + /// first cell of a raw link sequence. Reuses the + /// slot — a housekeeping value + /// that is never persisted as a link reference. + /// + public TLinkAddress RawLinkSequenceMarker + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get; + } + + /// + /// Sentinel stored in the Source word to designate that a cell is the + /// first cell of a multi-cell free range. Reuses the + /// slot — a housekeeping value + /// that is never persisted as a link reference. + /// + public TLinkAddress FreeRangeMarker + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public UnitedRangedLinksConstants() + { + RawLinkSequenceMarker = Itself; + FreeRangeMarker = Error; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public UnitedRangedLinksConstants(bool enableExternalReferencesSupport) : base(enableExternalReferencesSupport) + { + RawLinkSequenceMarker = Itself; + FreeRangeMarker = Error; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public UnitedRangedLinksConstants(Range possibleInternalReferencesRange) : base(possibleInternalReferencesRange) + { + RawLinkSequenceMarker = Itself; + FreeRangeMarker = Error; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public UnitedRangedLinksConstants(Range possibleInternalReferencesRange, Range? possibleExternalReferencesRange) : base(possibleInternalReferencesRange, possibleExternalReferencesRange) + { + RawLinkSequenceMarker = Itself; + FreeRangeMarker = Error; + } + } +} diff --git a/csharp/Platform.Data.Doublets/Memory/UnitedRanged/UnitedRangedLinksExtensions.cs b/csharp/Platform.Data.Doublets/Memory/UnitedRanged/UnitedRangedLinksExtensions.cs new file mode 100644 index 000000000..a824bc3bd --- /dev/null +++ b/csharp/Platform.Data.Doublets/Memory/UnitedRanged/UnitedRangedLinksExtensions.cs @@ -0,0 +1,21 @@ +using System.Collections.Generic; +using System.Numerics; + +#pragma warning disable CS1591 // Missing XML comment for publicly visible type or member + +namespace Platform.Data.Doublets.Memory.UnitedRanged +{ + public static class UnitedRangedLinksExtensions + { + public static bool IsRawLinkSequence(this ILinks links, IList? link) + where TLinkAddress : IUnsignedNumber + { + if (link == null || link.Count <= links.Constants.SourcePart) + { + return false; + } + return links.Constants is UnitedRangedLinksConstants constants + && link[links.Constants.SourcePart] == constants.RawLinkSequenceMarker; + } + } +} diff --git a/docs/case-studies/issue-512/README.md b/docs/case-studies/issue-512/README.md new file mode 100644 index 000000000..31331cb47 --- /dev/null +++ b/docs/case-studies/issue-512/README.md @@ -0,0 +1,34 @@ +# Case Study: Issue #512 — `UnitedRangedMemoryLinks` with Link Ranges + +> Source issue: +> +> Author: @konard +> +> Branch / PR: [`issue-512-557a0a3ca78d`](https://github.com/linksplatform/Data.Doublets/tree/issue-512-557a0a3ca78d) — PR [#513](https://github.com/linksplatform/Data.Doublets/pull/513) + +This directory collects the analysis, design exploration and implementation plan for the new `UnitedRangedMemoryLinks` doublets storage variant. The goal is twofold: + +1. Provide an _evolution_ of `UnitedMemoryLinks` that allocates and reclaims **contiguous ranges of links** instead of single links, while preserving the no-fragmentation, uniform-cell invariant that makes united storage so attractive. +2. Allow **raw link sequences** to live inside the same address space as ordinary doublets, by reusing the underlying link cells as payload cells. Those sequences can store raw data blobs, binary files, or any other byte payload whose length is aligned to `TLinkAddress`. + +The files in this directory are: + +| File | Purpose | +| --- | --- | +| [`requirements.md`](./requirements.md) | Itemised, traceable list of every requirement extracted from the issue text. | +| [`background.md`](./background.md) | Background on `UnitedMemoryLinks`, RawLink/LinksHeader layout, and the constraints imposed by the existing codebase. | +| [`design.md`](./design.md) | Design alternatives (sorted free list, segregated free list, buddy allocator, bitmap, …) and the **chosen design**, including disk layout and invariants. | +| [`related-work.md`](./related-work.md) | External references and prior art used while researching the problem (allocator literature, in-memory tagged-pointer schemes, B-tree page allocators, …). | +| [`solution-plan.md`](./solution-plan.md) | Step-by-step plan that maps every requirement to a concrete code change. | +| [`risks-and-trade-offs.md`](./risks-and-trade-offs.md) | Trade-offs, future work and explicit non-goals. | + +## TL;DR + +Each cell of the storage still occupies one `RawLink` slot (8 × `TLinkAddress`), so the file format remains uniform and free of internal fragmentation. The improvements are: + +* A **range allocator** that tracks free regions as a sorted-by-address, length-keyed doubly-linked list of `RawLink` cells (the same cells reused as range descriptors). Adjacent free regions are eagerly coalesced on deallocation, so the only way fragmentation can grow is when an allocation is _larger than every free region_, in which case the storage is simply extended at the tail. +* A new **`RawLinkSequenceMarker`** constant in `UnitedRangedLinksConstants` — used as the `Source` field of the first cell of a raw link sequence. The second field (`Target`) records the payload length in bytes, from which the number of consumed link cells is derived. +* A new **`UnitedRangedMemoryLinks`** class drop-in compatible with `ILinks` (so the existing tests pass with it as a substitute for `UnitedMemoryLinks`) with range allocation in the implementation and raw-link-sequence convenience operations in extensions. +* `Each` and `Count` include raw link sequence heads by default, while continuation cells and free ranges stay hidden. The `IncludeRawLinkSequences` configuration can exclude sequence heads when a caller wants ordinary doublets only. + +For the full rationale, see [`design.md`](./design.md). diff --git a/docs/case-studies/issue-512/background.md b/docs/case-studies/issue-512/background.md new file mode 100644 index 000000000..eb5938fa2 --- /dev/null +++ b/docs/case-studies/issue-512/background.md @@ -0,0 +1,109 @@ +# Background — How `UnitedMemoryLinks` Works Today + +This is a short tour of the parts of the existing implementation that the +`UnitedRangedMemoryLinks` design needs to interact with. Line numbers refer to the +state of the repository at the time of writing. + +## File layout + +A united-memory database is a single mapped file that begins with a `LinksHeader` and +then continues with a sequence of equally sized `RawLink` cells: + +```text ++-------------------+-------------------+-------------------+-----+ +| Header | Cell #1 | Cell #2 | … | +| (LinkSizeInBytes) | (LinkSizeInBytes) | (LinkSizeInBytes) | | ++-------------------+-------------------+-------------------+-----+ +``` + +The header overlays the very first cell, so cell #0 never carries real data +(`csharp/Platform.Data.Doublets/Memory/United/Generic/UnitedMemoryLinksBase.cs:184`). + +`LinkSizeInBytes` is `8 * sizeof(TLinkAddress)` — that is, eight `TLinkAddress` words: + +```csharp +public struct RawLink +{ + public TLinkAddress Source; // word 0 + public TLinkAddress Target; // word 1 + public TLinkAddress LeftAsSource; // word 2 + public TLinkAddress RightAsSource; // word 3 + public TLinkAddress SizeAsSource; // word 4 + public TLinkAddress LeftAsTarget; // word 5 + public TLinkAddress RightAsTarget; // word 6 + public TLinkAddress SizeAsTarget; // word 7 +} +``` + +The header is exactly the same size and is laid out as: + +```csharp +public struct LinksHeader +{ + public TLinkAddress AllocatedLinks; // word 0 — high-water mark + public TLinkAddress ReservedLinks; // word 1 — capacity in cells + public TLinkAddress FreeLinks; // word 2 — size of the unused list + public TLinkAddress FirstFreeLink; // word 3 — head of the unused list + public TLinkAddress RootAsSource; // word 4 — root of the by-source tree + public TLinkAddress RootAsTarget; // word 5 — root of the by-target tree + public TLinkAddress LastFreeLink; // word 6 — tail of the unused list + public TLinkAddress Reserved8; // word 7 — currently unused +} +``` + +The matching `Reserved8` word is what `UnitedRangedMemoryLinks` will use for the +**free-range list head**. + +## Lifecycle of a single link + +* `Create` (`UnitedMemoryLinksBase.cs:509-535`) takes the next unused cell from the + unused list (`UnusedLinksListMethods`), or appends a cell at the tail and grows the + underlying memory by `_memoryReservationStep` bytes if the reserved capacity is + exhausted. +* `Delete` (`UnitedMemoryLinksBase.cs:548-574`) either attaches the cell to the front + of the unused list, or — if it is the very last allocated cell — shrinks + `AllocatedLinks`, then keeps popping from the unused list while its tail is the new + high-water mark. +* `Update` (`UnitedMemoryLinksBase.cs:472-503`) detaches the link from the + source/target trees, mutates the cell, and re-attaches. + +The "unused list" is an _absolute circular doubly-linked list_ +(`UnusedLinksListMethods.cs`). Critically, it stores the previous/next pointers in +the `Source`/`Target` slots of the cell it links — so a cell on the free list can be +detected by the predicate + +```csharp +link.SizeAsSource == default && link.Source != default +``` + +(`UnitedMemoryLinksBase.cs:686-697`). + +## Implications for the new design + +1. **Cell #0 is the header.** The reserved word `Reserved8` is _the_ obvious place to + store an extra root pointer — for the free-range list — without breaking any code + that does not look at it. The implementation keeps using `LinksHeader` + directly and treats `Reserved8` as the free-range list head, so the binary + representation stays identical. This means a database written by + `UnitedMemoryLinks` can be opened by `UnitedRangedMemoryLinks` and vice-versa, as + long as no raw link sequences are present. + +2. **A free single cell remains a free single cell.** The original unused-links list is + _preserved_; the new "free range" list only tracks runs of two or more contiguous + free cells. When a range deallocation produces a run of length 1, it is pushed back + onto the original unused-links list. + +3. **`Source == RawLinkSequenceMarker`** marks the head of a raw link sequence. The + marker value is chosen so that: + * it is outside `InternalReferencesRange` (so it cannot accidentally appear as a + valid link reference); + * it is _stable_ across versions of `LinksConstants` — it reuses the existing + `Itself` housekeeping constant inside the reserved tail of the references range, + which `LinksConstants` already keeps for control values (`Continue`, `Break`, + `Skip`, `Any`, `Itself`, `Error`). + +4. **Tree methods are unchanged.** The new class only intercepts `Create`, `Update`, + `Delete`, `Each` and `Count` to (a) treat raw link sequences as ranged metadata and + optionally expose their head cells through `Each`/`Count`, and (b) ignore the + free-range descriptor cells. All the tree methods receive the same pointers as + before and operate without modification. diff --git a/docs/case-studies/issue-512/design.md b/docs/case-studies/issue-512/design.md new file mode 100644 index 000000000..b9e7efabe --- /dev/null +++ b/docs/case-studies/issue-512/design.md @@ -0,0 +1,196 @@ +# Design + +## Goals recap + +* Allocate / deallocate contiguous **ranges of link cells** (`R3`, `R4`). +* No fragmentation — never split unless inevitable, coalesce on free (`R7`). +* "Prefer empty space" — best-fit, growth at tail only as a last resort (`R8`). +* Embed raw **link sequences** in the same address space (`R5`, `R6`, `R9`). These + sequences can store raw data blobs, binary files, or other aligned byte payloads. +* Stay drop-in compatible with `UnitedMemoryLinks` and `ILinks<>` (`R2`, `R10`). + +## Design alternatives considered + +| Allocator | Pros | Cons | Verdict | +| --- | --- | --- | --- | +| **Per-cell free list (status quo)** | Simplest, used today. | `O(N)` cells to allocate a range; no contiguous guarantee for ranges. | Kept for single cells, but insufficient for ranges. | +| **Bitmap (1 bit per cell)** | Predictable space, easy "find N contiguous". | Linear scan; extra header bytes; not aligned to existing on-disk format. | Rejected — adds a parallel index. | +| **Buddy allocator** | Fast power-of-two ranges. | Internal fragmentation for non-power-of-two requests; requires careful split/coalesce. | Rejected — violates "no fragmentation". | +| **Segregated free lists by size** | Best-fit in O(1) when a size class exists. | Many overflow size classes for `ulong` ranges; tricky coalescing. | Rejected — over-engineered. | +| **Address-sorted doubly-linked list of free ranges, best-fit** | Trivial coalescing; small constant factor; **stored inside the cells themselves**. | `O(F)` search where F is the number of free ranges. | **Chosen**. | + +The chosen allocator is a [boundary-tag](https://en.wikipedia.org/wiki/Boundary_tag) +free-list allocator, simplified by the fact that cell sizes are uniform: there is no +need to keep a "size" word at every allocation boundary, only at the head of free +runs. + +## Two markers, no ambiguity + +The implementation uses **two distinct sentinels** stamped into `Source` to +discriminate the three flavours of cell that can appear in the allocated range: + +| Cell flavour | `Source` value | +| --- | --- | +| Regular doublet | A link index (`≤ InternalReferencesRange.Maximum`) or `Null` | +| Raw link sequence head | `RawLinkSequenceMarker` = `LinksConstants.Itself` | +| Multi-cell free range head | `FreeRangeMarker` = `LinksConstants.Error` | + +Both sentinels live above `InternalReferencesRange.Maximum` (they are housekeeping +slots `LinksConstants` already reserves), so they cannot be confused with valid +link indices. Using two distinct sentinels removes the need for any high-bit +discriminator on `Target`, and keeps the descriptor easy to read in a debugger. + +## Free-range descriptors + +Each free range of length `≥ 2` is described by the **first** cell of the range. +Continuation cells are zeroed. The head cell's fields are used as follows: + +| Field | Free-range usage | +| --- | --- | +| `Source` | `FreeRangeMarker` | +| `Target` | `Length` of the run in cells, including this header cell. | +| `LeftAsSource` | `Previous` pointer in the address-sorted free-range list (`0` if none). | +| `RightAsSource` | `Next` pointer in the address-sorted free-range list (`0` if none). | +| `SizeAsSource` … `SizeAsTarget` | reserved (`0`). | + +A single address-sorted list is sufficient: best-fit search walks the list once +in `O(F)` time. A second size-sorted list was considered but ultimately rejected +because (a) `F` stays small in practice thanks to eager coalescing and (b) the +additional bookkeeping doubles the maintenance cost of every insert/detach without +materially improving the common case. + +The list head is stored in `LinksHeader.Reserved8`, which was previously unused. +No on-disk header layout change is required: databases produced by +`UnitedMemoryLinks` have `Reserved8 = 0`, which `UnitedRangedMemoryLinks` reads +as "no free ranges" — so old files open cleanly. + +## Raw link sequence layout + +A raw link sequence occupies one **header cell** followed by zero or more continuation +cells. The header cell holds: + +| Field | Raw-link-sequence usage | +| --- | --- | +| `Source` | `RawLinkSequenceMarker` | +| `Target` | `Length` of the payload in **bytes**. Must be a multiple of `sizeof(TLinkAddress)`. | +| `LeftAsSource` … `SizeAsTarget` | First six `TLinkAddress` words of payload (treated as opaque bytes). | + +Each continuation cell carries eight more `TLinkAddress` words of payload (no +continuation marker, no length — the head cell's `Target` drives iteration). So +a sequence of `B` bytes occupies: + +```text +cells = 1 if B ≤ 6 * sizeof(TLinkAddress) +cells = 1 + ceil((B - 6 * sizeof(TLinkAddress)) / (8 * sizeof(TLinkAddress))) otherwise +``` + +The encoding is unambiguous because: + +* `Source == RawLinkSequenceMarker` is never produced by `Create` (which initialises + `Source` and `Target` to `Null` and only ever stores values inside the references + range). +* The marker is **never** sampled in a continuation cell — iteration of a sequence + starts at the head cell, picks up the length, and consumes the right number of + bytes from contiguous addresses without re-examining `Source` of any inner cell. +* Intermediate cell indices inside a sequence are **not** valid link handles. This is + a deliberate trade-off: it removes the need to scan from address `1` to detect + whether a given index belongs to a sequence's interior. + +## Range allocation algorithm + +``` +AllocateRange(length): + assert length >= 1 + range = freeRanges.FindBestFit(length) // address-sorted scan + if range != null: + if range.Length == length: + freeRanges.Detach(range) + return range.Start + if range.Length == length + 1: // 1-cell remainder can't be a range + freeRanges.Detach(range) + unusedLinks.AttachAsFirst(range.Start + length) + return range.Start + return freeRanges.CarveFromFront(range, length) + if length == 1: + free = unusedLinks.TryDetachFirst() // recycle a single-cell hole + if free != null: + return free + return BumpAllocatedLinks(length) // tail growth, last resort +``` + +`BumpAllocatedLinks` increments `AllocatedLinks` by `length`, growing the backing +memory if the reserved capacity is exceeded — exactly like base `Create` does, +but in one shot. + +`Create(...)` itself overrides base behaviour just enough to prefer a carve from +the smallest free range whose length is `≥ 3` when the per-cell unused list is +empty (a 2-cell range can't be carved by 1 because the leftover would be smaller +than the minimum free-range size; in that case we fall through to base `Create`, +which will grow at the tail). + +## Range deallocation + +``` +DeallocateRange(start, length): + if start + length - 1 == AllocatedLinks: // tail fast path + ClearCells(start, length) + AllocatedLinks -= length + TrimTail() + return + if length == 1: // single-cell hole + ClearCells(start, 1) + unusedLinks.AttachAsFirst(start) + return + freeRanges.Insert(start, length) // coalesces with neighbours + TrimTail() +``` + +`Insert` coalesces with the predecessor (if it ends exactly at `start`) and the +successor (if it begins exactly at `start + length`); it can swallow zero, one, +or two neighbours per call. `TrimTail` then walks the high-water mark down past +any trailing single-cell unused links and trailing free ranges — the asymptotic +optimality guarantee that makes long alloc/free sequences leave the database the +same size as if they had never happened. + +## Marking & interaction with `Each` / `Count` + +`UnitedRangedMemoryLinks` overrides `Each(...)` and `Count(...)` for all supported +restriction shapes. Both walk allocated addresses from `1` to `AllocatedLinks`. +Free-range heads are always hidden. Raw link sequence continuation cells are always +hidden. Raw link sequence heads are visible by default, and can be hidden by setting +`IncludeRawLinkSequences = false`. + +Restricted `Each`/`Count` calls also use the ranged scan instead of the base +source/target trees, because raw link sequence heads are not inserted into those +trees. This keeps universal `ILinks<>` queries able to discover sequence heads by +index, by `Source == RawLinkSequenceMarker`, or by the byte length stored in `Target`. + +`Create`/`Delete` keep their existing semantics for callers: a fresh `Create()` +returns a freshly-initialised single-cell address, and `Delete(link)` puts a +mid-range cell back on the per-cell unused list or trims the tail when removing +the highest cell. + +## On-disk compatibility + +* No header byte layout change. The free-range list head reuses `Reserved8`, + which previous releases of `UnitedMemoryLinks` left at zero. +* Databases produced by `UnitedMemoryLinks` open cleanly in + `UnitedRangedMemoryLinks`: `Reserved8 == 0` means "no free ranges yet", and + the per-cell unused list keeps working for single-cell allocations. +* Databases produced by `UnitedRangedMemoryLinks` that contain no raw link sequences and no + multi-cell free ranges round-trip back through `UnitedMemoryLinks` bit-for-bit. +* Databases that **do** contain raw link sequences or multi-cell free ranges are intentionally + not backwards-compatible with old readers — the issue body does not require + cross-version compatibility, and the reused `LinksHeader.Reserved8` word makes it + cheap to add a version check later. + +## Invariants + +1. **No internal fragmentation** — every link cell is either part of an allocated + doublet, part of an allocated raw link sequence, part of a multi-cell free range, + or on the single-cell unused list. The union of all four sets is exactly + `[1, AllocatedLinks]`. +2. **No external fragmentation buildup** — coalescing happens on every + `DeallocateRange`; appending at the tail is the only way to grow. +3. **`AllocatedLinks` is tight** — after every deallocation, the high-water mark + is the address of the highest still-in-use cell, never more. diff --git a/docs/case-studies/issue-512/related-work.md b/docs/case-studies/issue-512/related-work.md new file mode 100644 index 000000000..02a64435f --- /dev/null +++ b/docs/case-studies/issue-512/related-work.md @@ -0,0 +1,61 @@ +# Related Work + +A short, opinionated bibliography. Each entry is annotated with what we are borrowing +and what we are deliberately not borrowing. + +## Allocators with boundary tags + +* **Donald Knuth, _The Art of Computer Programming, Vol. 1, §2.5_** — original + description of boundary-tag allocators (1968). Borrowed: coalesce-on-free. + Not borrowed: variable-sized blocks, since our cells are uniform. + +* **Doug Lea, _A Memory Allocator_ (1996)** — the canonical reference for `dlmalloc`. + Borrowed: best-fit search over a size-sorted free list, immediate coalescing, + the idea that the free chunk metadata _lives inside the free chunk_. + Not borrowed: bin-by-class segregation — overkill at our scale. + +* **`jemalloc`**, **`tcmalloc`** — both employ size classes and per-thread caches. + We are single-threaded inside a `SynchronizedLinks` wrapper, so the complexity is + unnecessary. + +## Tagged-pointer / sentinel schemes for in-line metadata + +* **Lua 5.4 strings** — small strings are stored inline; long strings are referenced + by pointer with a tag bit. The "marker word at the head of a record" idea is the + same as our `RawLinkSequenceMarker` (and analogous to Lua's `LUA_TLNGSTR` tag). +* **SQLite "frequent" records** — SQLite reuses the first byte of a record as a type + tag. Our `Source == RawLinkSequenceMarker` convention is conceptually identical. + +## Allocators inside persistent stores + +* **PostgreSQL `FreeSpaceMap`** — uses a fan-out tree of per-page free-space records. + Heavier than what we need but illustrates the "free space embedded in the page" idea. +* **LMDB / BoltDB free-page lists** — both maintain a sorted free-page list inside the + database file. We are exactly mirroring this design at finer granularity. +* **MS Exchange Information Store (`.edb`) "RPS"** — Microsoft's research database + layer also stores allocations as fixed-size cells with a free list, and uses tags + to denote "this cell is a continuation of the previous one". + +## Doublets ecosystem (internal) + +* `UnitedMemoryLinks` — the existing single-cell allocator we are evolving. +* `SplitMemoryLinks` — an alternative storage that keeps doublet "index" data and + "data" data in two separate files. Out of scope for this issue, but we keep its + conventions in mind for future merging. +* `Platform.Memory.IResizableDirectMemory` — the unified API we re-use for storage + expansion. +* `Platform.Collections.Methods.Lists.AbsoluteCircularDoublyLinkedListMethods` — the + base class used by the existing unused-link list. We instantiate a second one for + the address-sorted free-range list to keep the implementation small. + +## Online research notes + +Search queries used during the design phase (kept here for traceability): + +* "boundary tag allocator linked list free range coalesce" +* "uniform cell allocator fragmentation" +* "tagged pointer marker first cell raw sequence in memory store" +* "linksplatform doublets storage layout" +* "LMDB freelist coalesce" + +No external code is _copied_ into this repository. diff --git a/docs/case-studies/issue-512/requirements.md b/docs/case-studies/issue-512/requirements.md new file mode 100644 index 000000000..ab90fc4c2 --- /dev/null +++ b/docs/case-studies/issue-512/requirements.md @@ -0,0 +1,143 @@ +# Requirements (Issue #512) + +The requirements below are extracted verbatim from the issue body, then re-expressed as +acceptance criteria. Identifiers (`R1`, `R2`, …) are referenced from +[`solution-plan.md`](./solution-plan.md) so every change in the PR maps back to one of +them. + +## R1. New folder `UnitedRanged` next to `UnitedMemoryLinks` + +> "We add new UnitedRanged folder, and do not break any other existing feature." + +* **Acceptance:** new directory `csharp/Platform.Data.Doublets/Memory/UnitedRanged/` exists + with the new types. Existing `Memory/United/` files are **unchanged in behaviour**. + +## R2. New class `UnitedRangedMemoryLinks` + +> "add fully supported in all places UnitedRangedMemoryLinks, that can be used as +> substitution of UnitedMemoryLinks." + +* **Acceptance:** + * implements `ILinks`, + * exposes the same set of constructors as `UnitedMemoryLinks` + (`(string)`, `(string, long)`, `(IResizableDirectMemory)`, `(IResizableDirectMemory, long)`, + `(IResizableDirectMemory, long, UnitedRangedLinksConstants, IndexTreeType)`), + plus overloads that configure whether raw link sequence heads are visible in + `Each`/`Count`, + * existing tests (`ResizableDirectMemoryLinksTests`, `ILinksBasicTests`, + `GenericLinksTests`, `GarbageCollectionTests`) succeed when the type is plugged in + instead of `UnitedMemoryLinks` for storage operations covered by `ILinks<>`. + +## R3. Range allocation/deallocation in multiples of the cell size + +> "We need elegant solution, that will allow us to allocate/deallocate ranges that are +> multiple of single link size, so the memory management is still uniform without +> possibility of any fragmentation" + +* **Acceptance:** + * `AllocateRange(TLinkAddress length)` returns the start address of a contiguous block of + `length` cells, or grows the file by one cell at a time when no suitable free range + exists (cf. R7). + * `DeallocateRange(TLinkAddress start, TLinkAddress length)` returns cells from a + previously allocated range or raw link sequence to the free list and **coalesces** + with adjacent free regions. + * Every range described by the allocator has a length that is a positive integer + multiple of `RawLink.SizeInBytes`. No partial cells are ever + produced. + +## R4. Range allocation should be faster than allocating one-by-one + +> "we should also be to allocate/deallocate ranges of links (that should be faster than +> allocating one by one)" + +* **Acceptance:** a unit test compares `AllocateRange(N)` against `N` individual + `Create()` calls and verifies that range allocation advances the high-water mark in + one operation instead of one operation per cell. + +## R5. Raw link sequence allocation + +> "and also allocating raw binary ranges. And use some constant in LinksContants as a +> marker of such raw binary links" + +* **Acceptance:** + * a new constant `RawLinkSequenceMarker` is exposed via + `UnitedRangedLinksConstants` + (a subclass of `LinksConstants` so we don't break the upstream + contract), + * extension method `AllocateRawLinkSequence(long sizeInBytes)` validates that the + byte size is a non-negative multiple of `sizeof(TLinkAddress)` and returns the + start cell address of the sequence, + * the first cell of the sequence carries: + * `Source = RawLinkSequenceMarker`, + * `Target = lengthInBytes`, + * extension methods `IsRawLinkSequence(start)` and `IsRawLinkSequence(linkFromEach)` + identify sequence heads. + +## R6. Binary tree fields are part of the payload + +> "in binary range the fields we usually used for indexing trees should be supported as +> just continuation of binary data" + +* **Acceptance:** the entire `RawLink` struct fields beyond `Source`/`Target` of the + **first** cell (`LeftAsSource`, `RightAsSource`, `SizeAsSource`, `LeftAsTarget`, + `RightAsTarget`, `SizeAsTarget`) are addressable and writable as continuation of the + payload via `WriteRawLinkSequence`/`ReadRawLinkSequence`. + Trees are **not attached** to the cells that belong to a raw link sequence, so the + indexing fields can be freely used as data bytes. + +## R7. No fragmentation + +> "if the size of requested range is greater than any free range, we should just append +> it to the end of the data store." + +* **Acceptance:** + * On allocation, the allocator scans the free list and uses **first-fit by smallest + range that satisfies the request** ("best-fit"). If none qualifies, it grows + `AllocatedLinks` at the tail. + * On deallocation, neighbours are coalesced. + * A property-based test allocates and deallocates a deterministic random sequence and + asserts that, after every operation, the free list contains no two adjacent free + regions. + +## R8. Prefer filling empty/unused space first + +> "we should prefer filling the empty / unused space, to pack up everything nicely." + +* **Acceptance:** for any allocation request that fits in any existing free range, no + new cells are appended at the tail; this is covered by + `AllocateRange_PrefersExistingFreeRange`. + +## R9. Treat marker'd cells as binary, not as references + +> "that should be treated not as references to links, but binary data itself" + +* **Acceptance:** + * `Each` and `Count` include raw link sequence heads by default so universal + `ILinks<>` consumers can discover them. + * `IncludeRawLinkSequences = false` excludes raw link sequence heads from + `Each`/`Count` for callers that want ordinary doublets only. + * Continuation cells inside a raw link sequence are always skipped. + * Tree-method invariants are preserved by never inserting raw link sequence cells in + the source/target trees; ranged iteration scans visible cells directly instead. + +## R10. Backwards compatibility + +> "do not break any other existing feature" + +* **Acceptance:** the original `UnitedMemoryLinks` class is untouched; the existing test + suite continues to pass; the new class is additive. + +## R11. Documentation & case study + +> "We need to collect data related about the issue to this repository, make sure we +> compile that data to `./docs/case-studies/issue-{id}` folder, and use it to do deep +> case study analysis" + +* **Acceptance:** the present folder (`docs/case-studies/issue-512`) contains the + background, requirements, design and solution plan. + +## R12. Single pull request + +> "Please plan and execute everything in a single pull request" + +* **Acceptance:** all work lands in PR #513 against branch `issue-512-557a0a3ca78d`. diff --git a/docs/case-studies/issue-512/risks-and-trade-offs.md b/docs/case-studies/issue-512/risks-and-trade-offs.md new file mode 100644 index 000000000..196fe93ab --- /dev/null +++ b/docs/case-studies/issue-512/risks-and-trade-offs.md @@ -0,0 +1,45 @@ +# Risks & Trade-offs + +## Known trade-offs of the chosen design + +* **Best-fit search is `O(F)`**, where `F` is the number of free ranges. In a healthy + database this number stays small because we coalesce eagerly, but a pathological + write pattern (allocate / free / allocate / free of differing sizes that never + coalesce) could grow `F`. A future enhancement could add size-class bins. +* **Two linked lists per free range** consume 4 words inside the free cell — that's + still well within the 8-word cell, but means the "smallest free range we can + describe" is one full cell. Free runs of length 1 are punted to the existing + single-cell unused list, which is unchanged. +* **Marker collisions** — `RawLinkSequenceMarker` is chosen above + `InternalReferencesRange.Maximum` so it cannot be confused with a valid link index. + Older `LinksConstants` instances that ship without the new constant simply do not + see the marker at all, so an old reader of a new file would (a) think a raw link + sequence head is a regular link and (b) attempt to walk the source tree from it. + Cross-version compatibility is explicitly **not** a goal of this PR (the issue body + says nothing about it), and the reused `Reserved8` word makes it cheap to add a + version check later. + +## Risks that the design _eliminates_ + +* **Internal fragmentation** — the uniform cell granularity carries over. +* **External fragmentation that grows without bound** — coalescing on deallocation, + tail-trimming after coalescing, and best-fit allocation jointly keep the free list + short. + +## Things that are not done + +* No SIMD / vectorised search through free ranges. +* No multi-threaded allocator — the existing single-writer assumption holds. +* No serialisation format change beyond reusing the `Reserved8` slot. +* No FFI surface (`Platform.Data.Doublets.FFI`) update — that lives in a separate + repository and tracks the C ABI; we intentionally keep the new C# class additive so + the FFI surface is unaffected. + +## Future work + +* Promote the free-range allocator into a stand-alone library to be reused by + `SplitMemoryLinks`. +* Add a CLI utility (`platform-doublets defrag`) that walks the free list and + rebuilds it from scratch, useful after offline upgrades. +* Add a raw-link-sequence cursor type to the public API that exposes the payload as a + `Span`. diff --git a/docs/case-studies/issue-512/solution-plan.md b/docs/case-studies/issue-512/solution-plan.md new file mode 100644 index 000000000..52da45223 --- /dev/null +++ b/docs/case-studies/issue-512/solution-plan.md @@ -0,0 +1,80 @@ +# Solution Plan + +The plan below maps each requirement to a concrete change and lists the order of +implementation. Every checkbox corresponds to one logical commit; the commits land on +branch `issue-512-557a0a3ca78d` (PR #513). + +## Step 1 — Constants scaffolding (`R5`, `R10`) + +* Add `csharp/Platform.Data.Doublets/Memory/UnitedRanged/UnitedRangedLinksConstants.cs` + — `LinksConstants` subclass that exposes `RawLinkSequenceMarker` + (reuses `Itself`) and `FreeRangeMarker` (reuses `Error`). +* No `LinksHeader` layout change is needed: the free-range list head reuses the + existing `Reserved8` word, which previous releases left at zero. + +## Step 2 — Range allocator (`R3`, `R7`, `R8`) + +* Add `csharp/Platform.Data.Doublets/Memory/UnitedRanged/Generic/RangedFreeListMethods.cs` + — an address-sorted, doubly-linked free-range allocator stored in-cell. The + allocator exposes `FindBestFit(length)`, `Insert(start, length)` (with + predecessor/successor coalescing), `Detach(start)`, `CarveFromFront`, + `CarveFromBack`, and `TryDetachTail`. + +## Step 3 — Raw link sequences (`R5`, `R6`, `R9`) + +* Add `csharp/Platform.Data.Doublets/Memory/UnitedRanged/Generic/RawLinkSequenceMethods.cs` + — encodes/decodes raw link sequences over the allocator. Exposes + `Write(start, payload)`, `Read(start, destination)`, + `ComputeCellsForPayload(byteLength)`, `IsRawLinkSequence(address)`, + `GetLengthInBytes(address)`, and `GetCellCount(address)`. +* Add extension methods over `UnitedRangedMemoryLinks` for the + non-essential convenience API: `AllocateRawLinkSequence`, + `WriteRawLinkSequence`, `ReadRawLinkSequence`, `DeallocateRawLinkSequence`, + `IsRawLinkSequence`, `GetRawLinkSequenceLengthInBytes`, and + `GetRawLinkSequenceCellCount`. +* Add an `ILinks<>` extension that identifies raw link sequence heads returned by + `Each`, so callers can inspect sequence heads through the universal interface. + +## Step 4 — `UnitedRangedMemoryLinks` (`R1`, `R2`) + +* Add `csharp/Platform.Data.Doublets/Memory/UnitedRanged/Generic/UnitedRangedMemoryLinks.cs` + — a single concrete class that inherits directly from `UnitedMemoryLinks`, + mirrors its five constructors, overrides `SetPointers`/`ResetPointers` to wire + up the new helpers, and overrides `Create`/`Delete`/`Update`/`Each`/`Count` so that + raw link sequences and free-range cells are correctly handled. Exposes the + implementation-level range API `AllocateRange` / `DeallocateRange` and the + `IncludeRawLinkSequences` configuration. A separate `UnitedRangedMemoryLinksBase` was + considered but proved unnecessary — direct inheritance is sufficient. + +## Step 5 — Tests (`R2`, `R3`, `R4`, `R5`, `R6`, `R7`, `R8`, `R9`) + +* Add `csharp/Platform.Data.Doublets.Tests/UnitedRangedMemoryLinksTests.cs` containing: + * `BasicMemoryOperations_Substitution` — equivalent to + `ResizableDirectMemoryLinksTests.BasicHeapMemoryTest` but using the new class. + * `AllocateRange_ReturnsContiguousBlock`. + * `AllocateRange_FasterThanIndividualCreates` — counts memory-resize events. + * `AllocateRange_PrefersExistingFreeRange`. + * `DeallocateRange_CoalescesNeighbours`. + * `DeallocateRange_TrimsTail`. + * `AllocateRange_OneCellRemainderFeedsSingleCellFreeList`. + * `RawLinkSequence_Roundtrip_SingleCell`. + * `RawLinkSequence_Roundtrip_MultiCell`. + * `RawLinkSequence_ZeroLength_RoundtripAndUsesOneCell`. + * `RawLinkSequence_LengthMustBeWordAligned`. + * `RawLinkSequence_AppearsInEachByDefault`. + * `RawLinkSequence_CanBeExcludedFromEachByConfiguration`. + * `RawLinkSequence_CanBeReturnedByEachRestriction`. + * `Delete_DeallocatesRawLinkSequenceThroughUniversalInterface`. + * `Each_SkipsFreeRangesAndIncludesConfiguredRawLinkSequences`. + * `NoFragmentation_ChaosTest` — deterministic random allocations/deallocations. + +## Step 6 — Documentation (`R11`) + +* Populate the `docs/case-studies/issue-512` folder (this directory). +* Reference the case study from the PR description. + +## Step 7 — Final review (`R12`) + +* Verify the full build / test pass. +* Ensure PR description summarises the design and points to the case study. +* Mark PR #513 ready for review.