diff options
Diffstat (limited to 'tests/misc/tsimplesort.nim')
-rw-r--r-- | tests/misc/tsimplesort.nim | 313 |
1 files changed, 313 insertions, 0 deletions
diff --git a/tests/misc/tsimplesort.nim b/tests/misc/tsimplesort.nim new file mode 100644 index 000000000..0167ca78a --- /dev/null +++ b/tests/misc/tsimplesort.nim @@ -0,0 +1,313 @@ +discard """ + output: '''true''' +""" + +import hashes, math + + +when defined(shallowADT): + {.pragma: myShallow, shallow.} +else: + {.pragma: myShallow.} + +type + TSlotEnum = enum seEmpty, seFilled, seDeleted + TKeyValuePair[A, B] = tuple[slot: TSlotEnum, key: A, val: B] + TKeyValuePairSeq[A, B] = seq[TKeyValuePair[A, B]] + TTable* {.final, myShallow.}[A, B] = object + data: TKeyValuePairSeq[A, B] + counter: int + +proc len*[A, B](t: TTable[A, B]): int = + ## returns the number of keys in `t`. + result = t.counter + +iterator pairs*[A, B](t: TTable[A, B]): tuple[key: A, val: B] = + ## iterates over any (key, value) pair in the table `t`. + for h in 0..high(t.data): + if t.data[h].slot == seFilled: yield (t.data[h].key, t.data[h].val) + +iterator keys*[A, B](t: TTable[A, B]): A = + ## iterates over any key in the table `t`. + for h in 0..high(t.data): + if t.data[h].slot == seFilled: yield t.data[h].key + +iterator values*[A, B](t: TTable[A, B]): B = + ## iterates over any value in the table `t`. + for h in 0..high(t.data): + if t.data[h].slot == seFilled: yield t.data[h].val + +const + growthFactor = 2 + +proc mustRehash(length, counter: int): bool {.inline.} = + assert(length > counter) + result = (length * 2 < counter * 3) or (length - counter < 4) + +proc nextTry(h, maxHash: THash): THash {.inline.} = + result = ((5 * h) + 1) and maxHash + +template rawGetImpl() = + var h: THash = hash(key) and high(t.data) # start with real hash value + while t.data[h].slot != seEmpty: + if t.data[h].key == key and t.data[h].slot == seFilled: + return h + h = nextTry(h, high(t.data)) + result = -1 + +template rawInsertImpl() = + var h: THash = hash(key) and high(data) + while data[h].slot == seFilled: + h = nextTry(h, high(data)) + data[h].key = key + data[h].val = val + data[h].slot = seFilled + +proc RawGet[A, B](t: TTable[A, B], key: A): int = + rawGetImpl() + +proc `[]`*[A, B](t: TTable[A, B], key: A): B = + ## retrieves the value at ``t[key]``. If `key` is not in `t`, + ## default empty value for the type `B` is returned + ## and no exception is raised. One can check with ``hasKey`` whether the key + ## exists. + var index = RawGet(t, key) + if index >= 0: result = t.data[index].val + +proc hasKey*[A, B](t: TTable[A, B], key: A): bool = + ## returns true iff `key` is in the table `t`. + result = rawGet(t, key) >= 0 + +proc RawInsert[A, B](t: var TTable[A, B], data: var TKeyValuePairSeq[A, B], + key: A, val: B) = + rawInsertImpl() + +proc Enlarge[A, B](t: var TTable[A, B]) = + var n: TKeyValuePairSeq[A, B] + newSeq(n, len(t.data) * growthFactor) + for i in countup(0, high(t.data)): + if t.data[i].slot == seFilled: RawInsert(t, n, t.data[i].key, t.data[i].val) + swap(t.data, n) + +template PutImpl() = + var index = RawGet(t, key) + if index >= 0: + t.data[index].val = val + else: + if mustRehash(len(t.data), t.counter): Enlarge(t) + RawInsert(t, t.data, key, val) + inc(t.counter) + +proc `[]=`*[A, B](t: var TTable[A, B], key: A, val: B) = + ## puts a (key, value)-pair into `t`. + putImpl() + +proc del*[A, B](t: var TTable[A, B], key: A) = + ## deletes `key` from hash table `t`. + var index = RawGet(t, key) + if index >= 0: + t.data[index].slot = seDeleted + dec(t.counter) + +proc initTable*[A, B](initialSize=64): TTable[A, B] = + ## creates a new hash table that is empty. `initialSize` needs to be + ## a power of two. + assert isPowerOfTwo(initialSize) + result.counter = 0 + newSeq(result.data, initialSize) + +proc toTable*[A, B](pairs: openarray[tuple[key: A, + val: B]]): TTable[A, B] = + ## creates a new hash table that contains the given `pairs`. + result = initTable[A, B](nextPowerOfTwo(pairs.len+10)) + for key, val in items(pairs): result[key] = val + +template dollarImpl(): stmt = + if t.len == 0: + result = "{:}" + else: + result = "{" + for key, val in pairs(t): + if result.len > 1: result.add(", ") + result.add($key) + result.add(": ") + result.add($val) + result.add("}") + +proc `$`*[A, B](t: TTable[A, B]): string = + ## The `$` operator for hash tables. + dollarImpl() + +# ------------------------------ count tables ------------------------------- + +type + TCountTable* {.final, myShallow.}[ + A] = object ## table that counts the number of each key + data: seq[tuple[key: A, val: int]] + counter: int + +proc len*[A](t: TCountTable[A]): int = + ## returns the number of keys in `t`. + result = t.counter + +iterator pairs*[A](t: TCountTable[A]): tuple[key: A, val: int] = + ## iterates over any (key, value) pair in the table `t`. + for h in 0..high(t.data): + if t.data[h].val != 0: yield (t.data[h].key, t.data[h].val) + +iterator keys*[A](t: TCountTable[A]): A = + ## iterates over any key in the table `t`. + for h in 0..high(t.data): + if t.data[h].val != 0: yield t.data[h].key + +iterator values*[A](t: TCountTable[A]): int = + ## iterates over any value in the table `t`. + for h in 0..high(t.data): + if t.data[h].val != 0: yield t.data[h].val + +proc RawGet[A](t: TCountTable[A], key: A): int = + var h: THash = hash(key) and high(t.data) # start with real hash value + while t.data[h].val != 0: + if t.data[h].key == key: return h + h = nextTry(h, high(t.data)) + result = -1 + +proc `[]`*[A](t: TCountTable[A], key: A): int = + ## retrieves the value at ``t[key]``. If `key` is not in `t`, + ## 0 is returned. One can check with ``hasKey`` whether the key + ## exists. + var index = RawGet(t, key) + if index >= 0: result = t.data[index].val + +proc hasKey*[A](t: TCountTable[A], key: A): bool = + ## returns true iff `key` is in the table `t`. + result = rawGet(t, key) >= 0 + +proc RawInsert[A](t: TCountTable[A], data: var seq[tuple[key: A, val: int]], + key: A, val: int) = + var h: THash = hash(key) and high(data) + while data[h].val != 0: h = nextTry(h, high(data)) + data[h].key = key + data[h].val = val + +proc Enlarge[A](t: var TCountTable[A]) = + var n: seq[tuple[key: A, val: int]] + newSeq(n, len(t.data) * growthFactor) + for i in countup(0, high(t.data)): + if t.data[i].val != 0: RawInsert(t, n, t.data[i].key, t.data[i].val) + swap(t.data, n) + +proc `[]=`*[A](t: var TCountTable[A], key: A, val: int) = + ## puts a (key, value)-pair into `t`. `val` has to be positive. + assert val > 0 + PutImpl() + +proc initCountTable*[A](initialSize=64): TCountTable[A] = + ## creates a new count table that is empty. `initialSize` needs to be + ## a power of two. + assert isPowerOfTwo(initialSize) + result.counter = 0 + newSeq(result.data, initialSize) + +proc toCountTable*[A](keys: openArray[A]): TCountTable[A] = + ## creates a new count table with every key in `keys` having a count of 1. + result = initCountTable[A](nextPowerOfTwo(keys.len+10)) + for key in items(keys): result[key] = 1 + +proc `$`*[A](t: TCountTable[A]): string = + ## The `$` operator for count tables. + dollarImpl() + +proc inc*[A](t: var TCountTable[A], key: A, val = 1) = + ## increments `t[key]` by `val`. + var index = RawGet(t, key) + if index >= 0: + inc(t.data[index].val, val) + else: + if mustRehash(len(t.data), t.counter): Enlarge(t) + RawInsert(t, t.data, key, val) + inc(t.counter) + +proc Smallest*[A](t: TCountTable[A]): tuple[key: A, val: int] = + ## returns the largest (key,val)-pair. Efficiency: O(n) + assert t.len > 0 + var minIdx = 0 + for h in 1..high(t.data): + if t.data[h].val > 0 and t.data[minIdx].val > t.data[h].val: minIdx = h + result.key = t.data[minIdx].key + result.val = t.data[minIdx].val + +proc Largest*[A](t: TCountTable[A]): tuple[key: A, val: int] = + ## returns the (key,val)-pair with the largest `val`. Efficiency: O(n) + assert t.len > 0 + var maxIdx = 0 + for h in 1..high(t.data): + if t.data[maxIdx].val < t.data[h].val: maxIdx = h + result.key = t.data[maxIdx].key + result.val = t.data[maxIdx].val + +proc sort*[A](t: var TCountTable[A]) = + ## sorts the count table so that the entry with the highest counter comes + ## first. This is destructive! You must not modify `t` afterwards! + ## You can use the iterators `pairs`, `keys`, and `values` to iterate over + ## `t` in the sorted order. + + # we use shellsort here; fast enough and simple + var h = 1 + while true: + h = 3 * h + 1 + if h >= high(t.data): break + while true: + h = h div 3 + for i in countup(h, high(t.data)): + var j = i + while t.data[j-h].val <= t.data[j].val: + var xyz = t.data[j] + t.data[j] = t.data[j-h] + t.data[j-h] = xyz + j = j-h + if j < h: break + if h == 1: break + + +const + data = { + "34": 123456, "12": 789, + "90": 343, "0": 34404, + "1": 344004, "2": 344774, + "3": 342244, "4": 3412344, + "5": 341232144, "6": 34214544, + "7": 3434544, "8": 344544, + "9": 34435644, "---00": 346677844, + "10": 34484, "11": 34474, "19": 34464, + "20": 34454, "30": 34141244, "40": 344114, + "50": 344490, "60": 344491, "70": 344492, + "80": 344497} + +proc countTableTest1 = + var s = initTable[string, int](64) + for key, val in items(data): s[key] = val + var w: tuple[key: string, val: int] #type(otherCountTable.data[0]) + + var t = initCountTable[string]() + for k, v in items(data): t.inc(k) + for k in t.keys: assert t[k] == 1 + t.inc("90", 3) + t.inc("12", 2) + t.inc("34", 1) + assert t.largest()[0] == "90" + t.sort() + + var i = 0 + for k, v in t.pairs: + case i + of 0: assert k == "90" and v == 4 + of 1: assert k == "12" and v == 3 + of 2: assert k == "34" and v == 2 + else: break + inc i + +countTableTest1() +echo true + + |