import hashes, math type TSlotEnum = enum seEmpty, seFilled, seDeleted TKeyValuePair[A, B] = tuple[slot: TSlotEnum, key: A, val: B] TKeyValuePairSeq[A, B] = seq[TKeyValuePair[A, B]] TOrderedKeyValuePair[A, B] = tuple[ slot: TSlotEnum, next: int, key: A, val: B] TOrderedKeyValuePairSeq[A, B] = seq[TOrderedKeyValuePair[A, B]] TOrderedTable*[A, B] = object ## table that remembers insertion order data: TOrderedKeyValuePairSeq[A, B] counter, first, last: int const growthFactor = 2 proc mustRehash(length, counter: int): bool {.inline.} = assert(length > counter) result = (length * 2 < counter * 3) or (length - counter < 4) proc nextTry(h, maxHash: Hash): Hash {.inline.} = result = ((5 * h) + 1) and maxHash template rawGetImpl() {.dirty.} = var h: Hash = hash(key) and high(t.data) # start with real hash value while t.data[h].slot != seEmpty: if t.data[h].key == key and t.data[h].slot == seFilled: return h h = nextTry(h, high(t.data)) result = -1 template rawInsertImpl() {.dirty.} = var h: Hash = hash(key) and high(data) while data[h].slot == seFilled: h = nextTry(h, high(data)) data[h].key = key data[h].val = val data[h].slot = seFilled template addImpl() {.dirty.} = if mustRehash(len(t.data), t.counter): enlarge(t) rawInsert(t, t.data, key, val) inc(t.counter) template putImpl() {.dirty.} = var index = rawGet(t, key) if index >= 0: t.data[index].val = val else: addImpl() proc len*[A, B](t: TOrderedTable[A, B]): int {.inline.} = ## returns the number of keys in `t`. result = t.counter template forAllOrderedPairs(yieldStmt: untyped) {.dirty.} = var h = t.first while h >= 0: var nxt = t.data[h].next if t.data[h].slot == seFilled: yieldStmt h = nxt iterator pairs*[A, B](t: TOrderedTable[A, B]): tuple[key: A, val: B] = ## iterates over any (key, value) pair in the table `t` in insertion ## order. forAllOrderedPairs: yield (t.data[h].key, t.data[h].val) iterator mpairs*[A, B](t: var TOrderedTable[A, B]): tuple[key: A, val: var B] = ## iterates over any (key, value) pair in the table `t` in insertion ## order. The values can be modified. forAllOrderedPairs: yield (t.data[h].key, t.data[h].val) iterator keys*[A, B](t: TOrderedTable[A, B]): A = ## iterates over any key in the table `t` in insertion order. forAllOrderedPairs: yield t.data[h].key iterator values*[A, B](t: TOrderedTable[A, B]): B = ## iterates over any value in the table `t` in insertion order. forAllOrderedPairs: yield t.data[h].val iterator mvalues*[A, B](t: var TOrderedTable[A, B]): var B = ## iterates over any value in the table `t` in insertion order. The values ## can be modified. forAllOrderedPairs: yield t.data[h].val proc rawGet[A, B](t: TOrderedTable[A, B], key: A): int = rawGetImpl() proc `[]`*[A, B](t: TOrderedTable[A, B], key: A): B = ## retrieves the value at ``t[key]``. If `key` is not in `t`, ## default empty value for the type `B` is returned ## and no exception is raised. One can check with ``hasKey`` whether the key ## exists. var index = rawGet(t, key) if index >= 0: result = t.data[index].val proc mget*[A, B](t: var TOrderedTable[A, B], key: A): var B = ## retrieves the value at ``t[key]``. The value can be modified. ## If `key` is not in `t`, the ``EInvalidKey`` exception is raised. var index = rawGet(t, key) if index >= 0: result = t.data[index].val else: raise newException(KeyError, "key not found: " & $key) proc hasKey*[A, B](t: TOrderedTable[A, B], key: A): bool = ## returns true iff `key` is in the table `t`. result = rawGet(t, key) >= 0 proc rawInsert[A, B](t: var TOrderedTable[A, B], data: var TOrderedKeyValuePairSeq[A, B], key: A, val: B) = rawInsertImpl() data[h].next = -1 if t.first < 0: t.first = h if t.last >= 0: data[t.last].next = h t.last = h proc enlarge[A, B](t: var TOrderedTable[A, B]) = var n: TOrderedKeyValuePairSeq[A, B] newSeq(n, len(t.data) * growthFactor) var h = t.first t.first = -1 t.last = -1 while h >= 0: var nxt = t.data[h].next if t.data[h].slot == seFilled: rawInsert(t, n, t.data[h].key, t.data[h].val) h = nxt swap(t.data, n) proc `[]=`*[A, B](t: var TOrderedTable[A, B], key: A, val: B) = ## puts a (key, value)-pair into `t`. putImpl() proc add*[A, B](t: var TOrderedTable[A, B], key: A, val: B) = ## puts a new (key, value)-pair into `t` even if ``t[key]`` already exists. addImpl() proc initOrderedTable*[A, B](initialSize=64): TOrderedTable[A, B] = ## creates a new ordered hash table that is empty. `initialSize` needs to be ## a power of two. assert isPowerOfTwo(initialSize) result.counter = 0 result.first = -1 result.last = -1 newSeq(result.data, initialSize) proc toOrderedTable*[A, B](pairs: openarray[tuple[key: A, val: B]]): TOrderedTable[A, B] = ## creates a new ordered hash table that contains the given `pairs`. result = initOrderedTable[A, B](nextPowerOfTwo(pairs.len+10)) for key, val in items(pairs): result[key] = val proc sort*[A, B](t: var TOrderedTable[A,B], cmp: proc (x, y: tuple[key: A, val: B]): int {.closure.}) = ## sorts the ordered table so that the entry with the highest counter comes ## first. This is destructive (with the advantage of being efficient)! ## You must not modify `t` afterwards! ## You can use the iterators `pairs`, `keys`, and `values` to iterate over ## `t` in the sorted order. # we use shellsort here; fast enough and simple var h = 1 while true: h = 3 * h + 1 if h >= high(t.data): break while true: h = h div 3 for i in countup(h, high(t.data)): var j = i #echo(t.data.len, " ", j, " - ", h) #echo(repr(t.data[j-h])) proc rawCmp(x, y: TOrderedKeyValuePair[A, B]): int = if x.slot in {seEmpty, seDeleted} and y.slot in {seEmpty, seDeleted}: return 0 elif x.slot in {seEmpty, seDeleted}: return -1 elif y.slot in {seEmpty, seDeleted}: return 1 else: let item1 = (x.key, x.val) let item2 = (y.key, y.val) return cmp(item1, item2) while rawCmp(t.data[j-h], t.data[j]) <= 0: swap(t.data[j], t.data[j-h]) j = j-h if j < h: break if h == 1: break