# # # Nim's Runtime Library # (c) Copyright 2012 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## The ``strtabs`` module implements an efficient hash table that is a mapping ## from strings to strings. Supports a case-sensitive, case-insensitive and ## style-insensitive mode. runnableExamples: var t = newStringTable() t["name"] = "John" t["city"] = "Monaco" doAssert t.len == 2 doAssert t.hasKey "name" doAssert "name" in t ## String tables can be created from a table constructor: runnableExamples: var t = {"name": "John", "city": "Monaco"}.newStringTable ## When using the style insensitive mode (``modeStyleInsensitive``), ## all letters are compared case insensitively within the ASCII range ## and underscores are ignored. runnableExamples: var x = newStringTable(modeStyleInsensitive) x["first_name"] = "John" x["LastName"] = "Doe" doAssert x["firstName"] == "John" doAssert x["last_name"] == "Doe" ## An efficient string substitution operator ## `% <#%25,string,StringTableRef,set[FormatFlag]>`_ for the string table ## is also provided. runnableExamples: var t = {"name": "John", "city": "Monaco"}.newStringTable doAssert "${name} lives in ${city}" % t == "John lives in Monaco" ## **See also:** ## * `tables module `_ for general hash tables ## * `sharedtables module`_ for shared hash table support ## * `strutils module`_ for common string functions ## * `json module`_ for table-like structure which allows ## heterogeneous members import hashes, strutils when defined(js): {.pragma: rtlFunc.} else: {.pragma: rtlFunc, rtl.} import os include "system/inclrtl" type StringTableMode* = enum ## Describes the tables operation mode. modeCaseSensitive, ## the table is case sensitive modeCaseInsensitive, ## the table is case insensitive modeStyleInsensitive ## the table is style insensitive KeyValuePair = tuple[key, val: string, hasValue: bool] KeyValuePairSeq = seq[KeyValuePair] StringTableObj* = object of RootObj counter: int data: KeyValuePairSeq mode: StringTableMode StringTableRef* = ref StringTableObj FormatFlag* = enum ## Flags for the `%` operator. useEnvironment, ## Use environment variable if the ``$key`` ## is not found in the table. ## Does nothing when using `js` target. useEmpty, ## Use the empty string as a default, thus it ## won't throw an exception if ``$key`` is not ## in the table. useKey ## Do not replace ``$key`` if it is not found ## in the table (or in the environment). const growthFactor = 2 startSize = 64 iterator pairs*(t: StringTableRef): tuple[key, value: string] = ## Iterates over every `(key, value)` pair in the table `t`. for h in 0..high(t.data): if t.data[h].hasValue: yield (t.data[h].key, t.data[h].val) iterator keys*(t: StringTableRef): string = ## Iterates over every key in the table `t`. for h in 0..high(t.data): if t.data[h].hasValue: yield t.data[h].key iterator values*(t: StringTableRef): string = ## Iterates over every value in the table `t`. for h in 0..high(t.data): if t.data[h].hasValue: yield t.data[h].val proc myhash(t: StringTableRef, key: string): Hash = case t.mode of modeCaseSensitive: result = hashes.hash(key) of modeCaseInsensitive: result = hashes.hashIgnoreCase(key) of modeStyleInsensitive: result = hashes.hashIgnoreStyle(key) proc myCmp(t: StringTableRef, a, b: string): bool = case t.mode of modeCaseSensitive: result = cmp(a, b) == 0 of modeCaseInsensitive: result = cmpIgnoreCase(a, b) == 0 of modeStyleInsensitive: result = cmpIgnoreStyle(a, b) == 0 proc mustRehash(length, counter: int): bool = assert(length > counter) result = (length * 2 < counter * 3) or (length - counter < 4) proc nextTry(h, maxHash: Hash): Hash {.inline.} = result = (h + 1) and maxHash proc rawGet(t: StringTableRef, key: string): int = var h: Hash = myhash(t, key) and high(t.data) # start with real hash value while t.data[h].hasValue: if myCmp(t, t.data[h].key, key): return h h = nextTry(h, high(t.data)) result = - 1 template get(t: StringTableRef, key: string) = var index = rawGet(t, key) if index >= 0: result = t.data[index].val else: when compiles($key): raise newException(KeyError, "key not found: " & $key) else: raise newException(KeyError, "key not found") proc len*(t: StringTableRef): int {.rtlFunc, extern: "nst$1".} = ## Returns the number of keys in `t`. result = t.counter proc `[]`*(t: StringTableRef, key: string): var string {. rtlFunc, extern: "nstTake".} = ## Retrieves the location at ``t[key]``. ## ## If `key` is not in `t`, the ``KeyError`` exception is raised. ## One can check with `hasKey proc <#hasKey,StringTableRef,string>`_ ## whether the key exists. ## ## See also: ## * `getOrDefault proc <#getOrDefault,StringTableRef,string,string>`_ ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new ## (key, value) pair in the table ## * `hasKey proc <#hasKey,StringTableRef,string>`_ for checking if a key ## is in the table runnableExamples: var t = {"name": "John", "city": "Monaco"}.newStringTable doAssert t["name"] == "John" doAssertRaises(KeyError): echo t["occupation"] get(t, key) proc getOrDefault*(t: StringTableRef; key: string, default: string = ""): string = ## Retrieves the location at ``t[key]``. ## ## If `key` is not in `t`, the default value is returned (if not specified, ## it is an empty string (`""`)). ## ## See also: ## * `[] proc <#[],StringTableRef,string>`_ for retrieving a value of a key ## * `hasKey proc <#hasKey,StringTableRef,string>`_ for checking if a key ## is in the table ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new ## (key, value) pair in the table runnableExamples: var t = {"name": "John", "city": "Monaco"}.newStringTable doAssert t.getOrDefault("name") == "John" doAssert t.getOrDefault("occupation") == "" doAssert t.getOrDefault("occupation", "teacher") == "teacher" doAssert t.getOrDefault("name", "Paul") == "John" var index = rawGet(t, key) if index >= 0: result = t.data[index].val else: result = default proc hasKey*(t: StringTableRef, key: string): bool {.rtlFunc, extern: "nst$1".} = ## Returns true if `key` is in the table `t`. ## ## See also: ## * `getOrDefault proc <#getOrDefault,StringTableRef,string,string>`_ ## * `contains proc <#contains,StringTableRef,string>`_ runnableExamples: var t = {"name": "John", "city": "Monaco"}.newStringTable doAssert t.hasKey("name") doAssert not t.hasKey("occupation") result = rawGet(t, key) >= 0 proc contains*(t: StringTableRef, key: string): bool = ## Alias of `hasKey proc <#hasKey,StringTableRef,string>`_ for use with ## the `in` operator. runnableExamples: var t = {"name": "John", "city": "Monaco"}.newStringTable doAssert "name" in t doAssert "occupation" notin t return hasKey(t, key) proc rawInsert(t: StringTableRef, data: var KeyValuePairSeq, key, val: string) = var h: Hash = myhash(t, key) and high(data) while data[h].hasValue: h = nextTry(h, high(data)) data[h].key = key data[h].val = val data[h].hasValue = true proc enlarge(t: StringTableRef) = var n: KeyValuePairSeq newSeq(n, len(t.data) * growthFactor) for i in countup(0, high(t.data)): if t.data[i].hasValue: rawInsert(t, n, t.data[i].key, t.data[i].val) swap(t.data, n) proc `[]=`*(t: StringTableRef, key, val: string) {. rtlFunc, extern: "nstPut", noSideEffect.} = ## Inserts a `(key, value)` pair into `t`. ## ## See also: ## * `[] proc <#[],StringTableRef,string>`_ for retrieving a value of a key ## * `del proc <#del,StringTableRef,string>`_ for removing a key from the table runnableExamples: var t = {"name": "John", "city": "Monaco"}.newStringTable t["occupation"] = "teacher" doAssert t.hasKey("occupation") var index = rawGet(t, key) if index >= 0: t.data[index].val = val else: if mustRehash(len(t.data), t.counter): enlarge(t) rawInsert(t, t.data, key, val) inc(t.counter) proc newStringTable*(mode: StringTableMode): StringTableRef {. rtlFunc, extern: "nst$1".} = ## Creates a new empty string table. ## ## See also: ## * `newStringTable(keyValuePairs) proc ## <#newStringTable,varargs[tuple[string,string]],StringTableMode>`_ new(result) result.mode = mode result.counter = 0 newSeq(result.data, startSize) proc newStringTable*(keyValuePairs: varargs[string], mode: StringTableMode): StringTableRef {. rtlFunc, extern: "nst$1WithPairs".} = ## Creates a new string table with given `key, value` string pairs. ## ## `StringTableMode` must be specified. runnableExamples: var mytab = newStringTable("key1", "val1", "key2", "val2", modeCaseInsensitive) result = newStringTable(mode) var i = 0 while i < high(keyValuePairs): result[keyValuePairs[i]] = keyValuePairs[i + 1] inc(i, 2) proc newStringTable*(keyValuePairs: varargs[tuple[key, val: string]], mode: StringTableMode = modeCaseSensitive): StringTableRef {. rtlFunc, extern: "nst$1WithTableConstr".} = ## Creates a new string table with given `(key, value)` tuple pairs. ## ## The default mode is case sensitive. runnableExamples: var mytab1 = newStringTable({"key1": "val1", "key2": "val2"}, modeCaseInsensitive) mytab2 = newStringTable([("key3", "val3"), ("key4", "val4")]) result = newStringTable(mode) for key, val in items(keyValuePairs): result[key] = val proc raiseFormatException(s: string) = var e: ref ValueError new(e) e.msg = "format string: key not found: " & s raise e proc getValue(t: StringTableRef, flags: set[FormatFlag], key: string): string = if hasKey(t, key): return t.getOrDefault(key) # hm difficult: assume safety in taint mode here. XXX This is dangerous! when defined(js): result = "" else: if useEnvironment in flags: result = os.getEnv(key).string else: result = "" if result.len == 0: if useKey in flags: result = '$' & key elif useEmpty notin flags: raiseFormatException(key) proc clear*(s: StringTableRef, mode: StringTableMode) {. rtlFunc, extern: "nst$1".} = ## Resets a string table to be empty again. ## ## See also: ## * `del proc <#del,StringTableRef,string>`_ for removing a key from the table runnableExamples: var t = {"name": "John", "city": "Monaco"}.newStringTable clear(t, modeCaseSensitive) doAssert len(t) == 0 doAssert "name" notin t doAssert "city" notin t s.mode = mode s.counter = 0 s.data.setLen(startSize) for i in 0..`_ for reseting a ## table to be empty ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new ## (key, value) pair in the table runnableExamples: var t = {"name": "John", "city": "Monaco"}.newStringTable t.del("name") doAssert len(t) == 1 doAssert "name" notin t doAssert "city" in t # Impl adapted from `tableimpl.delImplIdx` var i = rawGet(t, key) let msk = high(t.data) if i >= 0: dec(t.counter) block outer: while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 var j = i # The correctness of this depends on (h+1) in nextTry, var r = j # though may be adaptable to other simple sequences. t.data[i].hasValue = false # mark current EMPTY t.data[i].key = "" t.data[i].val = "" while true: i = (i + 1) and msk # increment mod table size if not t.data[i].hasValue: # end of collision cluster; So all done break outer r = t.myhash(t.data[i].key) and msk # "home" location of key@i if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): break when defined(js): t.data[j] = t.data[i] else: shallowCopy(t.data[j], t.data[i]) # data[j] will be marked EMPTY next loop proc `$`*(t: StringTableRef): string {.rtlFunc, extern: "nstDollar".} = ## The `$` operator for string tables. Used internally when calling ## `echo` on a table. if t.len == 0: result = "{:}" else: result = "{" for key, val in pairs(t): if result.len > 1: result.add(", ") result.add(key) result.add(": ") result.add(val) result.add("}") proc `%`*(f: string, t: StringTableRef, flags: set[FormatFlag] = {}): string {. rtlFunc, extern: "nstFormat".} = ## The `%` operator for string tables. runnableExamples: var t = {"name": "John", "city": "Monaco"}.newStringTable doAssert "${name} lives in ${city}" % t == "John lives in Monaco" const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'} result = "" var i = 0 while i < len(f): if f[i] == '$': case f[i+1] of '$': add(result, '$') inc(i, 2) of '{': var j = i + 1 while j < f.len and f[j] != '}': inc(j) add(result, getValue(t, flags, substr(f, i+2, j-1))) i = j + 1 of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '_': var j = i + 1 while j < f.len and f[j] in PatternChars: inc(j) add(result, getValue(t, flags, substr(f, i+1, j-1))) i = j else: add(result, f[i]) inc(i) else: add(result, f[i]) inc(i) when isMainModule: var x = {"k": "v", "11": "22", "565": "67"}.newStringTable assert x["k"] == "v" assert x["11"] == "22" assert x["565"] == "67" x["11"] = "23" assert x["11"] == "23" x.clear(modeCaseInsensitive) x["11"] = "22" assert x["11"] == "22"