diff options
author | Daniil Yarancev <21169548+Yardanico@users.noreply.github.com> | 2018-01-07 21:02:00 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-01-07 21:02:00 +0300 |
commit | fb44c522e6173528efa8035ecc459c84887d0167 (patch) | |
tree | a2f5e98606be265981a5f72748896967033e23d7 /lib/system/alloc.nim | |
parent | ccf99fa5ce4fe992fb80dc89271faa51456c3fa5 (diff) | |
parent | e23ea64c41e101d4e1d933f0b015f51cc6c2f7de (diff) | |
download | Nim-fb44c522e6173528efa8035ecc459c84887d0167.tar.gz |
Merge pull request #1 from nim-lang/devel
upstream
Diffstat (limited to 'lib/system/alloc.nim')
-rw-r--r-- | lib/system/alloc.nim | 186 |
1 files changed, 146 insertions, 40 deletions
diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim index 78db96e77..e274e8e0c 100644 --- a/lib/system/alloc.nim +++ b/lib/system/alloc.nim @@ -8,8 +8,6 @@ # # Low level allocator for Nim. Has been designed to support the GC. -# TODO: -# - make searching for block O(1) {.push profiler:off.} include osalloc @@ -19,14 +17,17 @@ template track(op, address, size) = memTrackerOp(op, address, size) # We manage *chunks* of memory. Each chunk is a multiple of the page size. -# Each chunk starts at an address that is divisible by the page size. Chunks -# that are bigger than ``ChunkOsReturn`` are returned back to the operating -# system immediately. +# Each chunk starts at an address that is divisible by the page size. const - ChunkOsReturn = 256 * PageSize # 1 MB - InitialMemoryRequest = ChunkOsReturn div 2 # < ChunkOsReturn! + InitialMemoryRequest = 128 * PageSize # 0.5 MB SmallChunkSize = PageSize + MaxFli = 30 + MaxLog2Sli = 5 # 32, this cannot be increased without changing 'uint32' + # everywhere! + MaxSli = 1 shl MaxLog2Sli + FliOffset = 6 + RealFli = MaxFli - FliOffset type PTrunk = ptr Trunk @@ -99,10 +100,12 @@ type MemRegion = object minLargeObj, maxLargeObj: int freeSmallChunks: array[0..SmallChunkSize div MemAlign-1, PSmallChunk] + flBitmap: uint32 + slBitmap: array[RealFli, uint32] + matrix: array[RealFli, array[MaxSli, PBigChunk]] llmem: PLLChunk currMem, maxMem, freeMem: int # memory sizes (allocated from OS) lastSize: int # needed for the case that OS gives us pages linearly - freeChunksList: PBigChunk # XXX make this a datastructure with O(1) access chunkStarts: IntSet root, deleted, last, freeAvlNodes: PAvlNode locked, blockChunkSizeIncrease: bool # if locked, we cannot free pages. @@ -110,7 +113,109 @@ type bottomData: AvlNode heapLinks: HeapLinks -{.deprecated: [TMemRegion: MemRegion].} +const + fsLookupTable: array[byte, int8] = [ + -1'i8, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 + ] + +proc msbit(x: uint32): int {.inline.} = + let a = if x <= 0xff_ff: + (if x <= 0xff: 0 else: 8) + else: + (if x <= 0xff_ff_ff: 16 else: 24) + result = int(fsLookupTable[byte(x shr a)]) + a + +proc lsbit(x: uint32): int {.inline.} = + msbit(x and ((not x) + 1)) + +proc setBit(nr: int; dest: var uint32) {.inline.} = + dest = dest or (1u32 shl (nr and 0x1f)) + +proc clearBit(nr: int; dest: var uint32) {.inline.} = + dest = dest and not (1u32 shl (nr and 0x1f)) + +proc mappingSearch(r, fl, sl: var int) {.inline.} = + #let t = (1 shl (msbit(uint32 r) - MaxLog2Sli)) - 1 + # This diverges from the standard TLSF algorithm because we need to ensure + # PageSize alignment: + let t = roundup((1 shl (msbit(uint32 r) - MaxLog2Sli)), PageSize) - 1 + r = r + t + fl = msbit(uint32 r) + sl = (r shr (fl - MaxLog2Sli)) - MaxSli + dec fl, FliOffset + r = r and not t + sysAssert((r and PageMask) == 0, "mappingSearch: still not aligned") + +# See http://www.gii.upv.es/tlsf/files/papers/tlsf_desc.pdf for details of +# this algorithm. + +proc mappingInsert(r: int): tuple[fl, sl: int] {.inline.} = + sysAssert((r and PageMask) == 0, "mappingInsert: still not aligned") + result.fl = msbit(uint32 r) + result.sl = (r shr (result.fl - MaxLog2Sli)) - MaxSli + dec result.fl, FliOffset + +template mat(): untyped = a.matrix[fl][sl] + +proc findSuitableBlock(a: MemRegion; fl, sl: var int): PBigChunk {.inline.} = + let tmp = a.slBitmap[fl] and (not 0u32 shl sl) + result = nil + if tmp != 0: + sl = lsbit(tmp) + result = mat() + else: + fl = lsbit(a.flBitmap and (not 0u32 shl (fl + 1))) + if fl > 0: + sl = lsbit(a.slBitmap[fl]) + result = mat() + +template clearBits(sl, fl) = + clearBit(sl, a.slBitmap[fl]) + if a.slBitmap[fl] == 0u32: + # do not forget to cascade: + clearBit(fl, a.flBitmap) + +proc removeChunkFromMatrix(a: var MemRegion; b: PBigChunk) = + let (fl, sl) = mappingInsert(b.size) + if b.next != nil: b.next.prev = b.prev + if b.prev != nil: b.prev.next = b.next + if mat() == b: + mat() = b.next + if mat() == nil: + clearBits(sl, fl) + b.prev = nil + b.next = nil + +proc removeChunkFromMatrix2(a: var MemRegion; b: PBigChunk; fl, sl: int) = + mat() = b.next + if mat() != nil: + mat().prev = nil + else: + clearBits(sl, fl) + b.prev = nil + b.next = nil + +proc addChunkToMatrix(a: var MemRegion; b: PBigChunk) = + let (fl, sl) = mappingInsert(b.size) + b.prev = nil + b.next = mat() + if mat() != nil: + mat().prev = b + mat() = b + setBit(sl, a.slBitmap[fl]) + setBit(fl, a.flBitmap) {.push stack_trace: off.} proc initAllocator() = discard "nothing to do anymore" @@ -203,6 +308,7 @@ proc llDeallocAll(a: var MemRegion) = var next = it.next osDeallocPages(it, PageSize) it = next + a.llmem = nil proc intSetGet(t: IntSet, key: int): PTrunk = var it = t.data[key and high(t.data)] @@ -301,13 +407,14 @@ proc pageAddr(p: pointer): PChunk {.inline.} = result = cast[PChunk](cast[ByteAddress](p) and not PageMask) #sysAssert(Contains(allocator.chunkStarts, pageIndex(result))) -proc writeFreeList(a: MemRegion) = - var it = a.freeChunksList - c_fprintf(stdout, "freeChunksList: %p\n", it) - while it != nil: - c_fprintf(stdout, "it: %p, next: %p, prev: %p, size: %ld\n", - it, it.next, it.prev, it.size) - it = it.next +when false: + proc writeFreeList(a: MemRegion) = + var it = a.freeChunksList + c_fprintf(stdout, "freeChunksList: %p\n", it) + while it != nil: + c_fprintf(stdout, "it: %p, next: %p, prev: %p, size: %ld\n", + it, it.next, it.prev, it.size) + it = it.next const nimMaxHeap {.intdefine.} = 0 @@ -368,6 +475,7 @@ proc requestOsChunks(a: var MemRegion, size: int): PBigChunk = result.prevSize = 0 or (result.prevSize and 1) # unknown # but do not overwrite 'used' field a.lastSize = size # for next request + sysAssert((cast[int](result) and PageMask) == 0, "requestOschunks: unaligned chunk") proc isAccessible(a: MemRegion, p: pointer): bool {.inline.} = result = contains(a.chunkStarts, pageIndex(p)) @@ -418,7 +526,7 @@ proc freeBigChunk(a: var MemRegion, c: PBigChunk) = if isAccessible(a, ri) and chunkUnused(ri): sysAssert(not isSmallChunk(ri), "freeBigChunk 3") if not isSmallChunk(ri): - listRemove(a.freeChunksList, cast[PBigChunk](ri)) + removeChunkFromMatrix(a, cast[PBigChunk](ri)) inc(c.size, ri.size) excl(a.chunkStarts, pageIndex(ri)) when coalescLeft: @@ -429,49 +537,44 @@ proc freeBigChunk(a: var MemRegion, c: PBigChunk) = if isAccessible(a, le) and chunkUnused(le): sysAssert(not isSmallChunk(le), "freeBigChunk 5") if not isSmallChunk(le): - listRemove(a.freeChunksList, cast[PBigChunk](le)) + removeChunkFromMatrix(a, cast[PBigChunk](le)) inc(le.size, c.size) excl(a.chunkStarts, pageIndex(c)) c = cast[PBigChunk](le) incl(a, a.chunkStarts, pageIndex(c)) updatePrevSize(a, c, c.size) - listAdd(a.freeChunksList, c) + addChunkToMatrix(a, c) # set 'used' to false: c.prevSize = c.prevSize and not 1 proc splitChunk(a: var MemRegion, c: PBigChunk, size: int) = var rest = cast[PBigChunk](cast[ByteAddress](c) +% size) - sysAssert(rest notin a.freeChunksList, "splitChunk") rest.size = c.size - size track("rest.origSize", addr rest.origSize, sizeof(int)) + # XXX check if these two nil assignments are dead code given + # addChunkToMatrix's implementation: rest.next = nil rest.prev = nil - # size and not used + # size and not used: rest.prevSize = size sysAssert((size and 1) == 0, "splitChunk 2") + sysAssert((size and PageMask) == 0, + "splitChunk: size is not a multiple of the PageSize") updatePrevSize(a, c, rest.size) c.size = size incl(a, a.chunkStarts, pageIndex(rest)) - listAdd(a.freeChunksList, rest) + addChunkToMatrix(a, rest) proc getBigChunk(a: var MemRegion, size: int): PBigChunk = # use first fit for now: - sysAssert((size and PageMask) == 0, "getBigChunk 1") sysAssert(size > 0, "getBigChunk 2") - result = a.freeChunksList - block search: - while result != nil: - sysAssert chunkUnused(result), "getBigChunk 3" - if result.size == size: - listRemove(a.freeChunksList, result) - break search - elif result.size > size: - listRemove(a.freeChunksList, result) - splitChunk(a, result, size) - break search - result = result.next - sysAssert result != a.freeChunksList, "getBigChunk 4" + var size = size # roundup(size, PageSize) + var fl, sl: int + mappingSearch(size, fl, sl) + sysAssert((size and PageMask) == 0, "getBigChunk: unaligned chunk") + result = findSuitableBlock(a, fl, sl) + if result == nil: if size < InitialMemoryRequest: result = requestOsChunks(a, InitialMemoryRequest) splitChunk(a, result, size) @@ -480,7 +583,10 @@ proc getBigChunk(a: var MemRegion, size: int): PBigChunk = # if we over allocated split the chunk: if result.size > size: splitChunk(a, result, size) - + else: + removeChunkFromMatrix2(a, result, fl, sl) + if result.size >= size + PageSize: + splitChunk(a, result, size) # set 'used' to to true: result.prevSize = 1 track("setUsedToFalse", addr result.origSize, sizeof(int)) @@ -571,14 +677,14 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer = size == 0, "rawAlloc 21") sysAssert(allocInv(a), "rawAlloc: end small size") else: - size = roundup(requestedSize+bigChunkOverhead(), PageSize) + size = requestedSize + bigChunkOverhead() # roundup(requestedSize+bigChunkOverhead(), PageSize) # allocate a large block var c = getBigChunk(a, size) sysAssert c.prev == nil, "rawAlloc 10" sysAssert c.next == nil, "rawAlloc 11" - sysAssert c.size == size, "rawAlloc 12" result = addr(c.data) - sysAssert((cast[ByteAddress](result) and (MemAlign-1)) == 0, "rawAlloc 13") + sysAssert((cast[ByteAddress](c) and (MemAlign-1)) == 0, "rawAlloc 13") + sysAssert((cast[ByteAddress](c) and PageMask) == 0, "rawAlloc: Not aligned on a page boundary") if a.root == nil: a.root = getBottom(a) add(a, a.root, cast[ByteAddress](result), cast[ByteAddress](result)+%size) sysAssert(isAccessible(a, result), "rawAlloc 14") |