diff options
Diffstat (limited to 'lib/pure/strtabs.nim')
-rw-r--r--[-rwxr-xr-x] | lib/pure/strtabs.nim | 453 |
1 files changed, 318 insertions, 135 deletions
diff --git a/lib/pure/strtabs.nim b/lib/pure/strtabs.nim index 77b463fc0..4b07aca5a 100755..100644 --- a/lib/pure/strtabs.nim +++ b/lib/pure/strtabs.nim @@ -1,6 +1,6 @@ # # -# Nimrod's Runtime Library +# Nim's Runtime Library # (c) Copyright 2012 Andreas Rumpf # # See the file "copying.txt", included in this @@ -9,73 +9,118 @@ ## The ``strtabs`` module implements an efficient hash table that is a mapping ## from strings to strings. Supports a case-sensitive, case-insensitive and -## style-insensitive mode. An efficient string substitution operator ``%`` -## for the string table is also provided. +## style-insensitive mode. + +runnableExamples: + var t = newStringTable() + t["name"] = "John" + t["city"] = "Monaco" + doAssert t.len == 2 + doAssert t.hasKey "name" + doAssert "name" in t + +## String tables can be created from a table constructor: +runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + +## When using the style insensitive mode (``modeStyleInsensitive``), +## all letters are compared case insensitively within the ASCII range +## and underscores are ignored. +runnableExamples: + var x = newStringTable(modeStyleInsensitive) + x["first_name"] = "John" + x["LastName"] = "Doe" + + doAssert x["firstName"] == "John" + doAssert x["last_name"] == "Doe" + +## An efficient string substitution operator +## `% <#%25,string,StringTableRef,set[FormatFlag]>`_ for the string table +## is also provided. +runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + doAssert "${name} lives in ${city}" % t == "John lives in Monaco" + +## **See also:** +## * `tables module <tables.html>`_ for general hash tables +## * `sharedtables module<sharedtables.html>`_ for shared hash table support +## * `strutils module<strutils.html>`_ for common string functions +## * `json module<json.html>`_ for table-like structure which allows +## heterogeneous members + +import std/private/since import - os, hashes, strutils + std/[hashes, strutils] + +when defined(nimPreviewSlimSystem): + import std/assertions + + +when defined(js) or defined(nimscript) or defined(Standalone): + {.pragma: rtlFunc.} +else: + {.pragma: rtlFunc, rtl.} + import std/envvars include "system/inclrtl" type - TStringTableMode* = enum ## describes the tables operation mode - modeCaseSensitive, ## the table is case sensitive - modeCaseInsensitive, ## the table is case insensitive - modeStyleInsensitive ## the table is style insensitive - TKeyValuePair = tuple[key, val: string] - TKeyValuePairSeq = seq[TKeyValuePair] - TStringTable* = object of TObject + StringTableMode* = enum ## Describes the tables operation mode. + modeCaseSensitive, ## the table is case sensitive + modeCaseInsensitive, ## the table is case insensitive + modeStyleInsensitive ## the table is style insensitive + KeyValuePair = tuple[key, val: string, hasValue: bool] + KeyValuePairSeq = seq[KeyValuePair] + StringTableObj* = object of RootObj counter: int - data: TKeyValuePairSeq - mode: TStringTableMode + data: KeyValuePairSeq + mode: StringTableMode - PStringTable* = ref TStringTable ## use this type to declare string tables + StringTableRef* = ref StringTableObj -proc len*(t: PStringTable): int {.rtl, extern: "nst$1".} = - ## returns the number of keys in `t`. - result = t.counter + FormatFlag* = enum ## Flags for the `%` operator. + useEnvironment, ## Use environment variable if the ``$key`` + ## is not found in the table. + ## Does nothing when using `js` target. + useEmpty, ## Use the empty string as a default, thus it + ## won't throw an exception if ``$key`` is not + ## in the table. + useKey ## Do not replace ``$key`` if it is not found + ## in the table (or in the environment). -iterator pairs*(t: PStringTable): tuple[key, value: string] = - ## iterates over every (key, value) pair in the table `t`. +const + growthFactor = 2 + startSize = 64 + +proc mode*(t: StringTableRef): StringTableMode {.inline.} = t.mode + +iterator pairs*(t: StringTableRef): tuple[key, value: string] = + ## Iterates over every `(key, value)` pair in the table `t`. for h in 0..high(t.data): - if not isNil(t.data[h].key): + if t.data[h].hasValue: yield (t.data[h].key, t.data[h].val) -iterator keys*(t: PStringTable): string = - ## iterates over every key in the table `t`. +iterator keys*(t: StringTableRef): string = + ## Iterates over every key in the table `t`. for h in 0..high(t.data): - if not isNil(t.data[h].key): + if t.data[h].hasValue: yield t.data[h].key -iterator values*(t: PStringTable): string = - ## iterates over every value in the table `t`. +iterator values*(t: StringTableRef): string = + ## Iterates over every value in the table `t`. for h in 0..high(t.data): - if not isNil(t.data[h].key): + if t.data[h].hasValue: yield t.data[h].val -type - TFormatFlag* = enum ## flags for the `%` operator - useEnvironment, ## use environment variable if the ``$key`` - ## is not found in the table - useEmpty, ## use the empty string as a default, thus it - ## won't throw an exception if ``$key`` is not - ## in the table - useKey ## do not replace ``$key`` if it is not found - ## in the table (or in the environment) - -# implementation - -const - growthFactor = 2 - startSize = 64 -proc myhash(t: PStringTable, key: string): THash = +proc myhash(t: StringTableRef, key: string): Hash = case t.mode of modeCaseSensitive: result = hashes.hash(key) of modeCaseInsensitive: result = hashes.hashIgnoreCase(key) of modeStyleInsensitive: result = hashes.hashIgnoreStyle(key) -proc myCmp(t: PStringTable, a, b: string): bool = +proc myCmp(t: StringTableRef, a, b: string): bool = case t.mode of modeCaseSensitive: result = cmp(a, b) == 0 of modeCaseInsensitive: result = cmpIgnoreCase(a, b) == 0 @@ -85,110 +130,270 @@ proc mustRehash(length, counter: int): bool = assert(length > counter) result = (length * 2 < counter * 3) or (length - counter < 4) -proc nextTry(h, maxHash: THash): THash {.inline.} = - result = ((5 * h) + 1) and maxHash +proc nextTry(h, maxHash: Hash): Hash {.inline.} = + result = (h + 1) and maxHash -proc RawGet(t: PStringTable, key: string): int = - var h: THash = myhash(t, key) and high(t.data) # start with real hash value - while not isNil(t.data[h].key): - if mycmp(t, t.data[h].key, key): +proc rawGet(t: StringTableRef, key: string): int = + var h: Hash = myhash(t, key) and high(t.data) # start with real hash value + while t.data[h].hasValue: + if myCmp(t, t.data[h].key, key): return h h = nextTry(h, high(t.data)) result = - 1 -proc `[]`*(t: PStringTable, key: string): string {.rtl, extern: "nstGet".} = - ## retrieves the value at ``t[key]``. If `key` is not in `t`, "" is returned - ## and no exception is raised. One can check with ``hasKey`` whether the key - ## exists. - var index = RawGet(t, key) +template get(t: StringTableRef, key: string) = + var index = rawGet(t, key) if index >= 0: result = t.data[index].val - else: result = "" + else: + raise newException(KeyError, "key not found: " & key) -proc mget*(t: PStringTable, key: string): var string {. - rtl, extern: "nstTake".} = - ## retrieves the location at ``t[key]``. If `key` is not in `t`, the - ## ``EInvalidKey`` exception is raised. - var index = RawGet(t, key) - if index >= 0: result = t.data[index].val - else: raise newException(EInvalidKey, "key does not exist: " & key) -proc hasKey*(t: PStringTable, key: string): bool {.rtl, extern: "nst$1".} = - ## returns true iff `key` is in the table `t`. +proc len*(t: StringTableRef): int {.rtlFunc, extern: "nst$1".} = + ## Returns the number of keys in `t`. + result = t.counter + +proc `[]`*(t: StringTableRef, key: string): var string {. + rtlFunc, extern: "nstTake".} = + ## Retrieves the location at ``t[key]``. + ## + ## If `key` is not in `t`, the ``KeyError`` exception is raised. + ## One can check with `hasKey proc <#hasKey,StringTableRef,string>`_ + ## whether the key exists. + ## + ## See also: + ## * `getOrDefault proc <#getOrDefault,StringTableRef,string,string>`_ + ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new + ## (key, value) pair in the table + ## * `hasKey proc <#hasKey,StringTableRef,string>`_ for checking if a key + ## is in the table + runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + doAssert t["name"] == "John" + doAssertRaises(KeyError): + echo t["occupation"] + get(t, key) + +proc getOrDefault*(t: StringTableRef; key: string, + default: string = ""): string = + ## Retrieves the location at ``t[key]``. + ## + ## If `key` is not in `t`, the default value is returned (if not specified, + ## it is an empty string (`""`)). + ## + ## See also: + ## * `[] proc <#[],StringTableRef,string>`_ for retrieving a value of a key + ## * `hasKey proc <#hasKey,StringTableRef,string>`_ for checking if a key + ## is in the table + ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new + ## (key, value) pair in the table + runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + doAssert t.getOrDefault("name") == "John" + doAssert t.getOrDefault("occupation") == "" + doAssert t.getOrDefault("occupation", "teacher") == "teacher" + doAssert t.getOrDefault("name", "Paul") == "John" + + var index = rawGet(t, key) + if index >= 0: result = t.data[index].val + else: result = default + +proc hasKey*(t: StringTableRef, key: string): bool {.rtlFunc, + extern: "nst$1".} = + ## Returns true if `key` is in the table `t`. + ## + ## See also: + ## * `getOrDefault proc <#getOrDefault,StringTableRef,string,string>`_ + ## * `contains proc <#contains,StringTableRef,string>`_ + runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + doAssert t.hasKey("name") + doAssert not t.hasKey("occupation") result = rawGet(t, key) >= 0 -proc RawInsert(t: PStringTable, data: var TKeyValuePairSeq, key, val: string) = - var h: THash = myhash(t, key) and high(data) - while not isNil(data[h].key): +proc contains*(t: StringTableRef, key: string): bool = + ## Alias of `hasKey proc <#hasKey,StringTableRef,string>`_ for use with + ## the `in` operator. + runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + doAssert "name" in t + doAssert "occupation" notin t + return hasKey(t, key) + +proc rawInsert(t: StringTableRef, data: var KeyValuePairSeq, key, val: string) = + var h: Hash = myhash(t, key) and high(data) + while data[h].hasValue: h = nextTry(h, high(data)) data[h].key = key data[h].val = val + data[h].hasValue = true -proc Enlarge(t: PStringTable) = - var n: TKeyValuePairSeq +proc enlarge(t: StringTableRef) = + var n: KeyValuePairSeq newSeq(n, len(t.data) * growthFactor) for i in countup(0, high(t.data)): - if not isNil(t.data[i].key): RawInsert(t, n, t.data[i].key, t.data[i].val) + if t.data[i].hasValue: rawInsert(t, n, move t.data[i].key, move t.data[i].val) swap(t.data, n) -proc `[]=`*(t: PStringTable, key, val: string) {.rtl, extern: "nstPut".} = - ## puts a (key, value)-pair into `t`. - var index = RawGet(t, key) +proc `[]=`*(t: StringTableRef, key, val: string) {. + rtlFunc, extern: "nstPut".} = + ## Inserts a `(key, value)` pair into `t`. + ## + ## See also: + ## * `[] proc <#[],StringTableRef,string>`_ for retrieving a value of a key + ## * `del proc <#del,StringTableRef,string>`_ for removing a key from the table + runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + t["occupation"] = "teacher" + doAssert t.hasKey("occupation") + + var index = rawGet(t, key) if index >= 0: t.data[index].val = val else: - if mustRehash(len(t.data), t.counter): Enlarge(t) - RawInsert(t, t.data, key, val) + if mustRehash(len(t.data), t.counter): enlarge(t) + rawInsert(t, t.data, key, val) inc(t.counter) -proc RaiseFormatException(s: string) = - var e: ref EInvalidValue - new(e) - e.msg = "format string: key not found: " & s - raise e - -proc getValue(t: PStringTable, flags: set[TFormatFlag], key: string): string = - if hasKey(t, key): return t[key] - # hm difficult: assume safety in taint mode here. XXX This is dangerous! - if useEnvironment in flags: result = os.getEnv(key).string - else: result = "" - if result.len == 0: - if useKey in flags: result = '$' & key - elif not (useEmpty in flags): raiseFormatException(key) - -proc newStringTable*(mode: TStringTableMode): PStringTable {. - rtl, extern: "nst$1".} = - ## creates a new string table that is empty. - new(result) - result.mode = mode - result.counter = 0 - newSeq(result.data, startSize) +proc newStringTable*(mode: StringTableMode): owned(StringTableRef) {. + rtlFunc, extern: "nst$1", noSideEffect.} = + ## Creates a new empty string table. + ## + ## See also: + ## * `newStringTable(keyValuePairs) proc + ## <#newStringTable,varargs[tuple[string,string]],StringTableMode>`_ + result = StringTableRef(mode: mode, counter: 0, data: newSeq[KeyValuePair](startSize)) proc newStringTable*(keyValuePairs: varargs[string], - mode: TStringTableMode): PStringTable {. - rtl, extern: "nst$1WithPairs".} = - ## creates a new string table with given key value pairs. - ## Example:: - ## var mytab = newStringTable("key1", "val1", "key2", "val2", - ## modeCaseInsensitive) + mode: StringTableMode): owned(StringTableRef) {. + rtlFunc, extern: "nst$1WithPairs", noSideEffect.} = + ## Creates a new string table with given `key, value` string pairs. + ## + ## `StringTableMode` must be specified. + runnableExamples: + var mytab = newStringTable("key1", "val1", "key2", "val2", + modeCaseInsensitive) + result = newStringTable(mode) var i = 0 while i < high(keyValuePairs): - result[keyValuePairs[i]] = keyValuePairs[i + 1] + {.noSideEffect.}: + result[keyValuePairs[i]] = keyValuePairs[i + 1] inc(i, 2) proc newStringTable*(keyValuePairs: varargs[tuple[key, val: string]], - mode: TStringTableMode = modeCaseSensitive): PStringTable {. - rtl, extern: "nst$1WithTableConstr".} = - ## creates a new string table with given key value pairs. - ## Example:: - ## var mytab = newStringTable({"key1": "val1", "key2": "val2"}, - ## modeCaseInsensitive) + mode: StringTableMode = modeCaseSensitive): owned(StringTableRef) {. + rtlFunc, extern: "nst$1WithTableConstr", noSideEffect.} = + ## Creates a new string table with given `(key, value)` tuple pairs. + ## + ## The default mode is case sensitive. + runnableExamples: + var + mytab1 = newStringTable({"key1": "val1", "key2": "val2"}, modeCaseInsensitive) + mytab2 = newStringTable([("key3", "val3"), ("key4", "val4")]) + result = newStringTable(mode) - for key, val in items(keyvaluePairs): result[key] = val + for key, val in items(keyValuePairs): + {.noSideEffect.}: + result[key] = val + +proc raiseFormatException(s: string) = + raise newException(ValueError, "format string: key not found: " & s) -proc `%`*(f: string, t: PStringTable, flags: set[TFormatFlag] = {}): string {. - rtl, extern: "nstFormat".} = +proc getValue(t: StringTableRef, flags: set[FormatFlag], key: string): string = + if hasKey(t, key): return t.getOrDefault(key) + when defined(js) or defined(nimscript) or defined(Standalone): + result = "" + else: + if useEnvironment in flags: result = getEnv(key) + else: result = "" + if result.len == 0: + if useKey in flags: result = '$' & key + elif useEmpty notin flags: raiseFormatException(key) + +proc clear*(s: StringTableRef, mode: StringTableMode) {. + rtlFunc, extern: "nst$1".} = + ## Resets a string table to be empty again, perhaps altering the mode. + ## + ## See also: + ## * `del proc <#del,StringTableRef,string>`_ for removing a key from the table + runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + clear(t, modeCaseSensitive) + doAssert len(t) == 0 + doAssert "name" notin t + doAssert "city" notin t + s.mode = mode + s.counter = 0 + s.data.setLen(startSize) + for i in 0..<s.data.len: + s.data[i].hasValue = false + +proc clear*(s: StringTableRef) {.since: (1, 1).} = + ## Resets a string table to be empty again without changing the mode. + s.clear(s.mode) + +proc del*(t: StringTableRef, key: string) = + ## Removes `key` from `t`. + ## + ## See also: + ## * `clear proc <#clear,StringTableRef,StringTableMode>`_ for resetting a + ## table to be empty + ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new + ## (key, value) pair in the table + runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + t.del("name") + doAssert len(t) == 1 + doAssert "name" notin t + doAssert "city" in t + + # Impl adapted from `tableimpl.delImplIdx` + var i = rawGet(t, key) + let msk = high(t.data) + if i >= 0: + dec(t.counter) + block outer: + while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1 + var j = i # The correctness of this depends on (h+1) in nextTry, + var r = j # though may be adaptable to other simple sequences. + t.data[i].hasValue = false # mark current EMPTY + t.data[i].key = "" + t.data[i].val = "" + while true: + i = (i + 1) and msk # increment mod table size + if not t.data[i].hasValue: # end of collision cluster; So all done + break outer + r = t.myhash(t.data[i].key) and msk # "home" location of key@i + if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)): + break + when defined(js): + t.data[j] = t.data[i] + elif defined(gcDestructors): + t.data[j] = move t.data[i] + else: + shallowCopy(t.data[j], t.data[i]) # data[j] will be marked EMPTY next loop + +proc `$`*(t: StringTableRef): string {.rtlFunc, extern: "nstDollar".} = + ## The `$` operator for string tables. Used internally when calling + ## `echo` on a table. + if t.len == 0: + result = "{:}" + else: + result = "{" + for key, val in pairs(t): + if result.len > 1: result.add(", ") + result.add(key) + result.add(": ") + result.add(val) + result.add("}") + +proc `%`*(f: string, t: StringTableRef, flags: set[FormatFlag] = {}): string {. + rtlFunc, extern: "nstFormat".} = ## The `%` operator for string tables. + runnableExamples: + var t = {"name": "John", "city": "Monaco"}.newStringTable + doAssert "${name} lives in ${city}" % t == "John lives in Monaco" + const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'} result = "" @@ -215,25 +420,3 @@ proc `%`*(f: string, t: PStringTable, flags: set[TFormatFlag] = {}): string {. else: add(result, f[i]) inc(i) - -proc `$`*(t: PStringTable): string {.rtl, extern: "nstDollar".} = - ## The `$` operator for string tables. - if t.len == 0: - result = "{:}" - else: - result = "{" - for key, val in pairs(t): - if result.len > 1: result.add(", ") - result.add(key) - result.add(": ") - result.add(val) - result.add("}") - -when isMainModule: - var x = {"k": "v", "11": "22", "565": "67"}.newStringTable - assert x["k"] == "v" - assert x["11"] == "22" - assert x["565"] == "67" - x.mget("11") = "23" - assert x["11"] == "23" - |