summary refs log blame commit diff stats
path: root/lib/pure/strtabs.nim
blob: 26811cc656a27d9e603967e6e26b1e3d4da4ff1f (plain) (tree)
1
2
3
4
5
6
7
8
9

 
                                  
                                         




                                                   

                                                                              
                          
 
                 







                                                         


                                                           
                                                                    
                                                                     
                               
                 






                                              













                                                                            
                        
 
      
                  
 
                                       
                     
     

                          

                        
 
    



                                                                 
                                                        

                                     
                

                         
 

                                      








                                                                   



                  
 
 
                                                               
                                                              
                           
                          

                                          
                                           
                                              
                           
                          

                         
                                             
                                                
                           
                          

                         
 
                                                   



                                                               
 
                                                   



                                                             

                                             


                                                               
                                                 
                              
 
                                                  
                                                                            
                           
                                    



                                
                                              
                            
                                           




                                                            
 



                                                                
                                                         
                                         
















                                                                           

             

                                                  

















                                                                             

                                           
                        
 

                                                             








                                                                        

                              
                                                      





                                                                         

                       
                                                                                
                                             
                         


                              
                         
 
                                 
                        

                                       
                                                                                  

                 











                                                                                 
                            
                
                           
       

                                                     
                  
 
                                                                     











                                                                        
                                                                     














                                                                     

                                                                        










                                                                                    
                                      
                                                                      
 
                                                                               
                                               
                                                                          
                                         



                                                              
                     
                                          
                                                        
 

                                                        
                                                                        








                                                                                 


                          
                          
                              
 



                                                                       



                                           
                                                                            


                                                                          

                                                             



                           
 





                                            



                                                                        


                          

                                                                            
                       
                                                                        



                                                                                 

                                    


                                                                                    
                                                                       

                                                                     



                  
                             





                                         


































                                                                               
                  
                                                            


                         
                
                        
 


                              
#
#
#            Nim's Runtime Library
#        (c) Copyright 2012 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

## The ``strtabs`` module implements an efficient hash table that is a mapping
## from strings to strings. Supports a case-sensitive, case-insensitive and
## style-insensitive mode.

runnableExamples:
  var t = newStringTable()
  t["name"] = "John"
  t["city"] = "Monaco"
  doAssert t.len == 2
  doAssert t.hasKey "name"
  doAssert "name" in t

## String tables can be created from a table constructor:
runnableExamples:
  var t = {"name": "John", "city": "Monaco"}.newStringTable

## When using the style insensitive mode (``modeStyleInsensitive``),
## all letters are compared case insensitively within the ASCII range
## and underscores are ignored.
runnableExamples:
  var x = newStringTable(modeStyleInsensitive)
  x["first_name"] = "John"
  x["LastName"] = "Doe"

  doAssert x["firstName"] == "John"
  doAssert x["last_name"] == "Doe"

## An efficient string substitution operator
## `% <#%25,string,StringTableRef,set[FormatFlag]>`_ for the string table
## is also provided.
runnableExamples:
  var t = {"name": "John", "city": "Monaco"}.newStringTable
  doAssert "${name} lives in ${city}" % t == "John lives in Monaco"

## **See also:**
## * `tables module <tables.html>`_ for general hash tables
## * `sharedtables module<sharedtables.html>`_ for shared hash table support
## * `strutils module<strutils.html>`_ for common string functions
## * `json module<json.html>`_ for table-like structure which allows
##   heterogeneous members

import std/private/since

import
  hashes, strutils

when defined(js) or defined(nimscript):
  {.pragma: rtlFunc.}
else:
  {.pragma: rtlFunc, rtl.}
  import os

include "system/inclrtl"

type
  StringTableMode* = enum ## Describes the tables operation mode.
    modeCaseSensitive,    ## the table is case sensitive
    modeCaseInsensitive,  ## the table is case insensitive
    modeStyleInsensitive  ## the table is style insensitive
  KeyValuePair = tuple[key, val: string, hasValue: bool]
  KeyValuePairSeq = seq[KeyValuePair]
  StringTableObj* = object of RootObj
    counter: int
    data: KeyValuePairSeq
    mode: StringTableMode

  StringTableRef* = ref StringTableObj

  FormatFlag* = enum ## Flags for the `%` operator.
    useEnvironment,  ## Use environment variable if the ``$key``
                     ## is not found in the table.
                     ## Does nothing when using `js` target.
    useEmpty,        ## Use the empty string as a default, thus it
                     ## won't throw an exception if ``$key`` is not
                     ## in the table.
    useKey           ## Do not replace ``$key`` if it is not found
                     ## in the table (or in the environment).

const
  growthFactor = 2
  startSize = 64


iterator pairs*(t: StringTableRef): tuple[key, value: string] =
  ## Iterates over every `(key, value)` pair in the table `t`.
  for h in 0..high(t.data):
    if t.data[h].hasValue:
      yield (t.data[h].key, t.data[h].val)

iterator keys*(t: StringTableRef): string =
  ## Iterates over every key in the table `t`.
  for h in 0..high(t.data):
    if t.data[h].hasValue:
      yield t.data[h].key

iterator values*(t: StringTableRef): string =
  ## Iterates over every value in the table `t`.
  for h in 0..high(t.data):
    if t.data[h].hasValue:
      yield t.data[h].val


proc myhash(t: StringTableRef, key: string): Hash =
  case t.mode
  of modeCaseSensitive: result = hashes.hash(key)
  of modeCaseInsensitive: result = hashes.hashIgnoreCase(key)
  of modeStyleInsensitive: result = hashes.hashIgnoreStyle(key)

proc myCmp(t: StringTableRef, a, b: string): bool =
  case t.mode
  of modeCaseSensitive: result = cmp(a, b) == 0
  of modeCaseInsensitive: result = cmpIgnoreCase(a, b) == 0
  of modeStyleInsensitive: result = cmpIgnoreStyle(a, b) == 0

proc mustRehash(length, counter: int): bool =
  assert(length > counter)
  result = (length * 2 < counter * 3) or (length - counter < 4)

proc nextTry(h, maxHash: Hash): Hash {.inline.} =
  result = (h + 1) and maxHash

proc rawGet(t: StringTableRef, key: string): int =
  var h: Hash = myhash(t, key) and high(t.data) # start with real hash value
  while t.data[h].hasValue:
    if myCmp(t, t.data[h].key, key):
      return h
    h = nextTry(h, high(t.data))
  result = - 1

template get(t: StringTableRef, key: string) =
  var index = rawGet(t, key)
  if index >= 0: result = t.data[index].val
  else:
    when compiles($key):
      raise newException(KeyError, "key not found: " & $key)
    else:
      raise newException(KeyError, "key not found")

proc len*(t: StringTableRef): int {.rtlFunc, extern: "nst$1".} =
  ## Returns the number of keys in `t`.
  result = t.counter

proc `[]`*(t: StringTableRef, key: string): var string {.
           rtlFunc, extern: "nstTake".} =
  ## Retrieves the location at ``t[key]``.
  ##
  ## If `key` is not in `t`, the ``KeyError`` exception is raised.
  ## One can check with `hasKey proc <#hasKey,StringTableRef,string>`_
  ## whether the key exists.
  ##
  ## See also:
  ## * `getOrDefault proc <#getOrDefault,StringTableRef,string,string>`_
  ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new
  ##   (key, value) pair in the table
  ## * `hasKey proc <#hasKey,StringTableRef,string>`_ for checking if a key
  ##   is in the table
  runnableExamples:
    var t = {"name": "John", "city": "Monaco"}.newStringTable
    doAssert t["name"] == "John"
    doAssertRaises(KeyError):
      echo t["occupation"]
  get(t, key)

proc getOrDefault*(t: StringTableRef; key: string,
    default: string = ""): string =
  ## Retrieves the location at ``t[key]``.
  ##
  ## If `key` is not in `t`, the default value is returned (if not specified,
  ## it is an empty string (`""`)).
  ##
  ## See also:
  ## * `[] proc <#[],StringTableRef,string>`_ for retrieving a value of a key
  ## * `hasKey proc <#hasKey,StringTableRef,string>`_ for checking if a key
  ##   is in the table
  ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new
  ##   (key, value) pair in the table
  runnableExamples:
    var t = {"name": "John", "city": "Monaco"}.newStringTable
    doAssert t.getOrDefault("name") == "John"
    doAssert t.getOrDefault("occupation") == ""
    doAssert t.getOrDefault("occupation", "teacher") == "teacher"
    doAssert t.getOrDefault("name", "Paul") == "John"

  var index = rawGet(t, key)
  if index >= 0: result = t.data[index].val
  else: result = default

proc hasKey*(t: StringTableRef, key: string): bool {.rtlFunc,
    extern: "nst$1".} =
  ## Returns true if `key` is in the table `t`.
  ##
  ## See also:
  ## * `getOrDefault proc <#getOrDefault,StringTableRef,string,string>`_
  ## * `contains proc <#contains,StringTableRef,string>`_
  runnableExamples:
    var t = {"name": "John", "city": "Monaco"}.newStringTable
    doAssert t.hasKey("name")
    doAssert not t.hasKey("occupation")
  result = rawGet(t, key) >= 0

proc contains*(t: StringTableRef, key: string): bool =
  ## Alias of `hasKey proc <#hasKey,StringTableRef,string>`_ for use with
  ## the `in` operator.
  runnableExamples:
    var t = {"name": "John", "city": "Monaco"}.newStringTable
    doAssert "name" in t
    doAssert "occupation" notin t
  return hasKey(t, key)

proc rawInsert(t: StringTableRef, data: var KeyValuePairSeq, key, val: string) =
  var h: Hash = myhash(t, key) and high(data)
  while data[h].hasValue:
    h = nextTry(h, high(data))
  data[h].key = key
  data[h].val = val
  data[h].hasValue = true

proc enlarge(t: StringTableRef) =
  var n: KeyValuePairSeq
  newSeq(n, len(t.data) * growthFactor)
  for i in countup(0, high(t.data)):
    if t.data[i].hasValue: rawInsert(t, n, move t.data[i].key, move t.data[i].val)
  swap(t.data, n)

proc `[]=`*(t: StringTableRef, key, val: string) {.
  rtlFunc, extern: "nstPut", noSideEffect.} =
  ## Inserts a `(key, value)` pair into `t`.
  ##
  ## See also:
  ## * `[] proc <#[],StringTableRef,string>`_ for retrieving a value of a key
  ## * `del proc <#del,StringTableRef,string>`_ for removing a key from the table
  runnableExamples:
    var t = {"name": "John", "city": "Monaco"}.newStringTable
    t["occupation"] = "teacher"
    doAssert t.hasKey("occupation")

  var index = rawGet(t, key)
  if index >= 0:
    t.data[index].val = val
  else:
    if mustRehash(len(t.data), t.counter): enlarge(t)
    rawInsert(t, t.data, key, val)
    inc(t.counter)

proc newStringTable*(mode: StringTableMode): owned(StringTableRef) {.
  rtlFunc, extern: "nst$1".} =
  ## Creates a new empty string table.
  ##
  ## See also:
  ## * `newStringTable(keyValuePairs) proc
  ##   <#newStringTable,varargs[tuple[string,string]],StringTableMode>`_
  new(result)
  result.mode = mode
  result.counter = 0
  newSeq(result.data, startSize)

proc newStringTable*(keyValuePairs: varargs[string],
                     mode: StringTableMode): owned(StringTableRef) {.
  rtlFunc, extern: "nst$1WithPairs".} =
  ## Creates a new string table with given `key, value` string pairs.
  ##
  ## `StringTableMode` must be specified.
  runnableExamples:
    var mytab = newStringTable("key1", "val1", "key2", "val2",
                               modeCaseInsensitive)

  result = newStringTable(mode)
  var i = 0
  while i < high(keyValuePairs):
    result[keyValuePairs[i]] = keyValuePairs[i + 1]
    inc(i, 2)

proc newStringTable*(keyValuePairs: varargs[tuple[key, val: string]],
    mode: StringTableMode = modeCaseSensitive): owned(StringTableRef) {.
    rtlFunc, extern: "nst$1WithTableConstr".} =
  ## Creates a new string table with given `(key, value)` tuple pairs.
  ##
  ## The default mode is case sensitive.
  runnableExamples:
    var
      mytab1 = newStringTable({"key1": "val1", "key2": "val2"}, modeCaseInsensitive)
      mytab2 = newStringTable([("key3", "val3"), ("key4", "val4")])

  result = newStringTable(mode)
  for key, val in items(keyValuePairs): result[key] = val

proc raiseFormatException(s: string) =
  raise newException(ValueError, "format string: key not found: " & s)

proc getValue(t: StringTableRef, flags: set[FormatFlag], key: string): string =
  if hasKey(t, key): return t.getOrDefault(key)
  # hm difficult: assume safety in taint mode here. XXX This is dangerous!
  when defined(js) or defined(nimscript):
    result = ""
  else:
    if useEnvironment in flags: result = os.getEnv(key).string
    else: result = ""
  if result.len == 0:
    if useKey in flags: result = '$' & key
    elif useEmpty notin flags: raiseFormatException(key)

proc clear*(s: StringTableRef, mode: StringTableMode) {.
  rtlFunc, extern: "nst$1".} =
  ## Resets a string table to be empty again, perhaps altering the mode.
  ##
  ## See also:
  ## * `del proc <#del,StringTableRef,string>`_ for removing a key from the table
  runnableExamples:
    var t = {"name": "John", "city": "Monaco"}.newStringTable
    clear(t, modeCaseSensitive)
    doAssert len(t) == 0
    doAssert "name" notin t
    doAssert "city" notin t
  s.mode = mode
  s.counter = 0
  s.data.setLen(startSize)
  for i in 0..<s.data.len:
    s.data[i].hasValue = false

proc clear*(s: StringTableRef) {.since: (1, 1).} =
  ## Resets a string table to be empty again without changing the mode.
  s.clear(s.mode)

proc del*(t: StringTableRef, key: string) =
  ## Removes `key` from `t`.
  ##
  ## See also:
  ## * `clear proc <#clear,StringTableRef,StringTableMode>`_ for resetting a
  ##   table to be empty
  ## * `[]= proc <#[]=,StringTableRef,string,string>`_ for inserting a new
  ##   (key, value) pair in the table
  runnableExamples:
    var t = {"name": "John", "city": "Monaco"}.newStringTable
    t.del("name")
    doAssert len(t) == 1
    doAssert "name" notin t
    doAssert "city" in t

  # Impl adapted from `tableimpl.delImplIdx`
  var i = rawGet(t, key)
  let msk = high(t.data)
  if i >= 0:
    dec(t.counter)
    block outer:
      while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
        var j = i # The correctness of this depends on (h+1) in nextTry,
        var r = j # though may be adaptable to other simple sequences.
        t.data[i].hasValue = false # mark current EMPTY
        t.data[i].key = ""
        t.data[i].val = ""
        while true:
          i = (i + 1) and msk # increment mod table size
          if not t.data[i].hasValue: # end of collision cluster; So all done
            break outer
          r = t.myhash(t.data[i].key) and msk # "home" location of key@i
          if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
            break
        when defined(js):
          t.data[j] = t.data[i]
        elif defined(gcDestructors):
          t.data[j] = move t.data[i]
        else:
          shallowCopy(t.data[j], t.data[i]) # data[j] will be marked EMPTY next loop

proc `$`*(t: StringTableRef): string {.rtlFunc, extern: "nstDollar".} =
  ## The `$` operator for string tables. Used internally when calling
  ## `echo` on a table.
  if t.len == 0:
    result = "{:}"
  else:
    result = "{"
    for key, val in pairs(t):
      if result.len > 1: result.add(", ")
      result.add(key)
      result.add(": ")
      result.add(val)
    result.add("}")

proc `%`*(f: string, t: StringTableRef, flags: set[FormatFlag] = {}): string {.
  rtlFunc, extern: "nstFormat".} =
  ## The `%` operator for string tables.
  runnableExamples:
    var t = {"name": "John", "city": "Monaco"}.newStringTable
    doAssert "${name} lives in ${city}" % t == "John lives in Monaco"

  const
    PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'}
  result = ""
  var i = 0
  while i < len(f):
    if f[i] == '$':
      case f[i+1]
      of '$':
        add(result, '$')
        inc(i, 2)
      of '{':
        var j = i + 1
        while j < f.len and f[j] != '}': inc(j)
        add(result, getValue(t, flags, substr(f, i+2, j-1)))
        i = j + 1
      of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '_':
        var j = i + 1
        while j < f.len and f[j] in PatternChars: inc(j)
        add(result, getValue(t, flags, substr(f, i+1, j-1)))
        i = j
      else:
        add(result, f[i])
        inc(i)
    else:
      add(result, f[i])
      inc(i)


when isMainModule:
  var x = {"k": "v", "11": "22", "565": "67"}.newStringTable
  assert x["k"] == "v"
  assert x["11"] == "22"
  assert x["565"] == "67"
  x["11"] = "23"
  assert x["11"] == "23"

  x.clear(modeCaseInsensitive)
  x["11"] = "22"
  assert x["11"] == "22"