summary refs log tree commit diff stats
path: root/compiler/ic/bitabs.nim
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/ic/bitabs.nim')
-rw-r--r--compiler/ic/bitabs.nim178
1 files changed, 178 insertions, 0 deletions
diff --git a/compiler/ic/bitabs.nim b/compiler/ic/bitabs.nim
new file mode 100644
index 000000000..0c9994c83
--- /dev/null
+++ b/compiler/ic/bitabs.nim
@@ -0,0 +1,178 @@
+## A BiTable is a table that can be seen as an optimized pair
+## of `(Table[LitId, Val], Table[Val, LitId])`.
+
+import std/hashes
+import rodfiles
+
+when defined(nimPreviewSlimSystem):
+  import std/assertions
+
+type
+  LitId* = distinct uint32
+
+  BiTable*[T] = object
+    vals: seq[T] # indexed by LitId
+    keys: seq[LitId]  # indexed by hash(val)
+
+proc initBiTable*[T](): BiTable[T] = BiTable[T](vals: @[], keys: @[])
+
+proc nextTry(h, maxHash: Hash): Hash {.inline.} =
+  result = (h + 1) and maxHash
+
+template maxHash(t): untyped = high(t.keys)
+template isFilled(x: LitId): bool = x.uint32 > 0'u32
+
+proc `$`*(x: LitId): string {.borrow.}
+proc `<`*(x, y: LitId): bool {.borrow.}
+proc `<=`*(x, y: LitId): bool {.borrow.}
+proc `==`*(x, y: LitId): bool {.borrow.}
+proc hash*(x: LitId): Hash {.borrow.}
+
+
+proc len*[T](t: BiTable[T]): int = t.vals.len
+
+proc mustRehash(length, counter: int): bool {.inline.} =
+  assert(length > counter)
+  result = (length * 2 < counter * 3) or (length - counter < 4)
+
+const
+  idStart = 1
+
+template idToIdx(x: LitId): int = x.int - idStart
+
+proc hasLitId*[T](t: BiTable[T]; x: LitId): bool =
+  let idx = idToIdx(x)
+  result = idx >= 0 and idx < t.vals.len
+
+proc enlarge[T](t: var BiTable[T]) =
+  var n: seq[LitId]
+  newSeq(n, len(t.keys) * 2)
+  swap(t.keys, n)
+  for i in 0..high(n):
+    let eh = n[i]
+    if isFilled(eh):
+      var j = hash(t.vals[idToIdx eh]) and maxHash(t)
+      while isFilled(t.keys[j]):
+        j = nextTry(j, maxHash(t))
+      t.keys[j] = move n[i]
+
+proc getKeyId*[T](t: BiTable[T]; v: T): LitId =
+  let origH = hash(v)
+  var h = origH and maxHash(t)
+  if t.keys.len != 0:
+    while true:
+      let litId = t.keys[h]
+      if not isFilled(litId): break
+      if t.vals[idToIdx t.keys[h]] == v: return litId
+      h = nextTry(h, maxHash(t))
+  return LitId(0)
+
+proc getOrIncl*[T](t: var BiTable[T]; v: T): LitId =
+  let origH = hash(v)
+  var h = origH and maxHash(t)
+  if t.keys.len != 0:
+    while true:
+      let litId = t.keys[h]
+      if not isFilled(litId): break
+      if t.vals[idToIdx t.keys[h]] == v: return litId
+      h = nextTry(h, maxHash(t))
+    # not found, we need to insert it:
+    if mustRehash(t.keys.len, t.vals.len):
+      enlarge(t)
+      # recompute where to insert:
+      h = origH and maxHash(t)
+      while true:
+        let litId = t.keys[h]
+        if not isFilled(litId): break
+        h = nextTry(h, maxHash(t))
+  else:
+    setLen(t.keys, 16)
+    h = origH and maxHash(t)
+
+  result = LitId(t.vals.len + idStart)
+  t.keys[h] = result
+  t.vals.add v
+
+
+proc `[]`*[T](t: var BiTable[T]; litId: LitId): var T {.inline.} =
+  let idx = idToIdx litId
+  assert idx < t.vals.len
+  result = t.vals[idx]
+
+proc `[]`*[T](t: BiTable[T]; litId: LitId): lent T {.inline.} =
+  let idx = idToIdx litId
+  assert idx < t.vals.len
+  result = t.vals[idx]
+
+proc hash*[T](t: BiTable[T]): Hash =
+  ## as the keys are hashes of the values, we simply use them instead
+  var h: Hash = 0
+  for i, n in pairs t.keys:
+    h = h !& hash((i, n))
+  result = !$h
+
+proc store*[T](f: var RodFile; t: BiTable[T]) =
+  storeSeq(f, t.vals)
+  storeSeq(f, t.keys)
+
+proc load*[T](f: var RodFile; t: var BiTable[T]) =
+  loadSeq(f, t.vals)
+  loadSeq(f, t.keys)
+
+proc sizeOnDisc*(t: BiTable[string]): int =
+  result = 4
+  for x in t.vals:
+    result += x.len + 4
+  result += t.keys.len * sizeof(LitId)
+
+when isMainModule:
+
+  var t: BiTable[string]
+
+  echo getOrIncl(t, "hello")
+
+  echo getOrIncl(t, "hello")
+  echo getOrIncl(t, "hello3")
+  echo getOrIncl(t, "hello4")
+  echo getOrIncl(t, "helloasfasdfdsa")
+  echo getOrIncl(t, "hello")
+  echo getKeyId(t, "hello")
+  echo getKeyId(t, "none")
+
+  for i in 0 ..< 100_000:
+    discard t.getOrIncl($i & "___" & $i)
+
+  for i in 0 ..< 100_000:
+    assert t.getOrIncl($i & "___" & $i).idToIdx == i + 4
+  echo "begin"
+  echo t.vals.len
+
+  echo t.vals[0]
+  echo t.vals[1004]
+
+  echo "middle"
+
+  var tf: BiTable[float]
+
+  discard tf.getOrIncl(0.4)
+  discard tf.getOrIncl(16.4)
+  discard tf.getOrIncl(32.4)
+  echo getKeyId(tf, 32.4)
+
+  var f2 = open("testblah.bin", fmWrite)
+  echo store(f2, tf)
+  f2.close
+
+  var f1 = open("testblah.bin", fmRead)
+
+  var t2: BiTable[float]
+
+  echo f1.load(t2)
+  echo t2.vals.len
+
+  echo getKeyId(t2, 32.4)
+
+  echo "end"
+
+
+  f1.close