diff options
author | Araq <rumpf_a@web.de> | 2017-12-07 10:54:46 +0100 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2017-12-07 10:54:46 +0100 |
commit | ede38a70fc4c7bb403555fb5b95fb50d609e73dc (patch) | |
tree | 2720a111a2251172e194da805ecc82c86e81d3bb /lib/system/alloc.nim | |
parent | 9820c2c4561ee56f30c1578672dd1247be25cb11 (diff) | |
download | Nim-ede38a70fc4c7bb403555fb5b95fb50d609e73dc.tar.gz |
make allocator use the TLSF algorithm; work in progress
Diffstat (limited to 'lib/system/alloc.nim')
-rw-r--r-- | lib/system/alloc.nim | 157 |
1 files changed, 127 insertions, 30 deletions
diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim index 19d27e7d2..d587b1698 100644 --- a/lib/system/alloc.nim +++ b/lib/system/alloc.nim @@ -8,8 +8,6 @@ # # Low level allocator for Nim. Has been designed to support the GC. -# TODO: -# - make searching for block O(1) {.push profiler:off.} include osalloc @@ -19,14 +17,16 @@ template track(op, address, size) = memTrackerOp(op, address, size) # We manage *chunks* of memory. Each chunk is a multiple of the page size. -# Each chunk starts at an address that is divisible by the page size. Chunks -# that are bigger than ``ChunkOsReturn`` are returned back to the operating -# system immediately. +# Each chunk starts at an address that is divisible by the page size. const - ChunkOsReturn = 256 * PageSize # 1 MB - InitialMemoryRequest = ChunkOsReturn div 2 # < ChunkOsReturn! + InitialMemoryRequest = 128 * PageSize # 0.5 MB SmallChunkSize = PageSize + MaxFli = 30 + MaxLog2Sli = 5 + MaxSli = 1 shl MaxLog2Sli + FliOffset = 6 + RealFli = MaxFli - FliOffset type PTrunk = ptr Trunk @@ -99,10 +99,12 @@ type MemRegion = object minLargeObj, maxLargeObj: int freeSmallChunks: array[0..SmallChunkSize div MemAlign-1, PSmallChunk] + flBitmap: uint32 + slBitmap: array[RealFli, uint32] + matrix: array[RealFli, array[MaxSli, PBigChunk]] llmem: PLLChunk currMem, maxMem, freeMem: int # memory sizes (allocated from OS) lastSize: int # needed for the case that OS gives us pages linearly - freeChunksList: PBigChunk # XXX make this a datastructure with O(1) access chunkStarts: IntSet root, deleted, last, freeAvlNodes: PAvlNode locked, blockChunkSizeIncrease: bool # if locked, we cannot free pages. @@ -110,7 +112,104 @@ type bottomData: AvlNode heapLinks: HeapLinks -{.deprecated: [TMemRegion: MemRegion].} +const + fsLookupTable: array[byte, int8] = [ + -1'i8, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 + ] + +proc msbit(x: uint32): int {.inline.} = + let a = if x <= 0xff_ff: + (if x <= 0xff: 0 else: 8) + else: + (if x <= 0xff_ff_ff: 16 else: 24) + result = int(fsLookupTable[byte(x shr a)]) + a + +proc lsbit(x: uint32): int {.inline.} = + msbit(x and ((not x) + 1)) + +proc setBit(nr: int; dest: var uint32) {.inline.} = + dest = dest or (1u32 shl (nr and 0x1f)) + +proc clearBit(nr: int; dest: var uint32) {.inline.} = + dest = dest and not (1u32 shl (nr and 0x1f)) + +proc mappingSearch(r, fl, sl: var int) {.inline.} = + let t = (1 shl (msbit(uint32 r) - MaxLog2Sli)) - 1 + r = r + t + fl = msbit(uint32 r) + sl = (r shr (fl - MaxLog2Sli)) - MaxSli + dec fl, FliOffset + r = r and not t + +# See http://www.gii.upv.es/tlsf/files/papers/tlsf_desc.pdf for details of +# this algorithm. + +proc mappingInsert(r: int): tuple[fl, sl: int] {.inline.} = + result.fl = msbit(uint32 r) + result.sl = (r shr (result.fl - MaxLog2Sli)) - MaxSli + dec result.fl, FliOffset + +template mat(): untyped = a.matrix[fl][sl] + +proc findSuitableBlock(a: MemRegion; fl, sl: var int): PBigChunk {.inline.} = + let tmp = a.slBitmap[fl] and (not 0u32 shl sl) + result = nil + if tmp != 0: + sl = lsbit(tmp) + result = mat() + else: + fl = lsbit(a.flBitmap and (not 0u32 shl (fl + 1))) + if fl > 0: + sl = lsbit(a.slBitmap[fl]) + result = mat() + +template clearBits(sl, fl) = + clearBit(sl, a.slBitmap[fl]) + if a.slBitmap[fl] == 0u32: + # do not forget to cascade: + clearBit(fl, a.flBitmap) + +proc removeChunkFromMatrix(a: var MemRegion; b: PBigChunk) = + let (fl, sl) = mappingInsert(b.size) + if b.next != nil: b.next.prev = b.prev + if b.prev != nil: b.prev.next = b.next + if mat() == b: + mat() = b.next + if mat() == nil: + clearBits(sl, fl) + b.prev = nil + b.next = nil + +proc removeChunkFromMatrix2(a: var MemRegion; b: PBigChunk; fl, sl: int) = + mat() = b.next + if mat() != nil: + mat().prev = nil + else: + clearBits(sl, fl) + b.prev = nil + b.next = nil + +proc addChunkToMatrix(a: var MemRegion; b: PBigChunk) = + let (fl, sl) = mappingInsert(b.size) + b.prev = nil + b.next = mat() + if mat() != nil: + mat().prev = b + mat() = b + setBit(sl, a.slBitmap[fl]) + setBit(fl, a.flBitmap) {.push stack_trace: off.} proc initAllocator() = discard "nothing to do anymore" @@ -419,7 +518,7 @@ proc freeBigChunk(a: var MemRegion, c: PBigChunk) = if isAccessible(a, ri) and chunkUnused(ri): sysAssert(not isSmallChunk(ri), "freeBigChunk 3") if not isSmallChunk(ri): - listRemove(a.freeChunksList, cast[PBigChunk](ri)) + removeChunkFromMatrix(a, cast[PBigChunk](ri)) inc(c.size, ri.size) excl(a.chunkStarts, pageIndex(ri)) when coalescLeft: @@ -430,49 +529,42 @@ proc freeBigChunk(a: var MemRegion, c: PBigChunk) = if isAccessible(a, le) and chunkUnused(le): sysAssert(not isSmallChunk(le), "freeBigChunk 5") if not isSmallChunk(le): - listRemove(a.freeChunksList, cast[PBigChunk](le)) + removeChunkFromMatrix(a, cast[PBigChunk](le)) inc(le.size, c.size) excl(a.chunkStarts, pageIndex(c)) c = cast[PBigChunk](le) incl(a, a.chunkStarts, pageIndex(c)) updatePrevSize(a, c, c.size) - listAdd(a.freeChunksList, c) + addChunkToMatrix(a, c) # set 'used' to false: c.prevSize = c.prevSize and not 1 proc splitChunk(a: var MemRegion, c: PBigChunk, size: int) = var rest = cast[PBigChunk](cast[ByteAddress](c) +% size) - sysAssert(rest notin a.freeChunksList, "splitChunk") rest.size = c.size - size track("rest.origSize", addr rest.origSize, sizeof(int)) + # XXX check if these two nil assignments are dead code given + # addChunkToMatrix's implementation: rest.next = nil rest.prev = nil - # size and not used + # size and not used: rest.prevSize = size sysAssert((size and 1) == 0, "splitChunk 2") updatePrevSize(a, c, rest.size) c.size = size incl(a, a.chunkStarts, pageIndex(rest)) - listAdd(a.freeChunksList, rest) + addChunkToMatrix(a, rest) proc getBigChunk(a: var MemRegion, size: int): PBigChunk = # use first fit for now: sysAssert((size and PageMask) == 0, "getBigChunk 1") sysAssert(size > 0, "getBigChunk 2") - result = a.freeChunksList - block search: - while result != nil: - sysAssert chunkUnused(result), "getBigChunk 3" - if result.size == size: - listRemove(a.freeChunksList, result) - break search - elif result.size > size: - listRemove(a.freeChunksList, result) - splitChunk(a, result, size) - break search - result = result.next - sysAssert result != a.freeChunksList, "getBigChunk 4" + var size = size # roundup(size, PageSize) + var fl, sl: int + mappingSearch(size, fl, sl) + result = findSuitableBlock(a, fl, sl) + if result == nil: if size < InitialMemoryRequest: result = requestOsChunks(a, InitialMemoryRequest) splitChunk(a, result, size) @@ -481,7 +573,10 @@ proc getBigChunk(a: var MemRegion, size: int): PBigChunk = # if we over allocated split the chunk: if result.size > size: splitChunk(a, result, size) - + else: + removeChunkFromMatrix2(a, result, fl, sl) + if result.size >= size + PageSize: + splitChunk(a, result, size) # set 'used' to to true: result.prevSize = 1 track("setUsedToFalse", addr result.origSize, sizeof(int)) @@ -577,9 +672,11 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer = var c = getBigChunk(a, size) sysAssert c.prev == nil, "rawAlloc 10" sysAssert c.next == nil, "rawAlloc 11" - sysAssert c.size == size, "rawAlloc 12" result = addr(c.data) sysAssert((cast[ByteAddress](result) and (MemAlign-1)) == 0, "rawAlloc 13") + #if (cast[ByteAddress](result) and PageMask) != 0: + # cprintf("Address is %p for size %ld\n", result, c.size) + sysAssert((cast[ByteAddress](result) and PageMask) == 0, "rawAlloc: Not aligned on a page boundary") if a.root == nil: a.root = getBottom(a) add(a, a.root, cast[ByteAddress](result), cast[ByteAddress](result)+%size) sysAssert(isAccessible(a, result), "rawAlloc 14") |