summary refs log tree commit diff stats
path: root/lib/pure/ropes.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/ropes.nim')
-rwxr-xr-xlib/pure/ropes.nim375
1 files changed, 375 insertions, 0 deletions
diff --git a/lib/pure/ropes.nim b/lib/pure/ropes.nim
new file mode 100755
index 000000000..6655a9fda
--- /dev/null
+++ b/lib/pure/ropes.nim
@@ -0,0 +1,375 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2009 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## This module contains support for a `rope`:idx: data type.
+## Ropes can represent very long strings efficiently; especially concatenation
+## is done in O(1) instead of O(n). They are essentially concatenation
+## trees that are only flattened when converting to a native Nimrod
+## string. The empty string is represented by ``nil``. Ropes are immutable and
+## subtrees can be shared without copying.
+## Leaves can be cached for better memory efficiency at the cost of a bit of
+## runtime efficiency.
+
+{.deadCodeElim: on.}
+
+{.push debugger:off .} # the user does not want to trace a part
+                       # of the standard library!
+
+# copied from excpt.nim, because I don't want to make this template public
+template newException(exceptn, message: expr): expr =
+  block: # open a new scope
+    var
+      e: ref exceptn
+    new(e)
+    e.msg = message
+    e
+
+const
+  countCacheMisses = false
+
+var
+  cacheEnabled = false
+
+type 
+  PRope* = ref TRope ## empty rope is represented by nil
+  TRope {.acyclic, final, pure.} = object
+    left, right: PRope
+    length: int
+    data: string # != nil if a leaf
+
+proc isConc(r: PRope): bool {.inline.} = return isNil(r.data)
+
+# Note that the left and right pointers are not needed for leafs.
+# Leaves have relatively high memory overhead (~30 bytes on a 32
+# bit machine) and we produce many of them. This is why we cache and
+# share leafs accross different rope trees.
+# To cache them they are inserted in another tree, a splay tree for best
+# performance. But for the caching tree we use the leaf's left and right
+# pointers.
+
+proc len*(a: PRope): int =
+  ## the rope's length
+  if a == nil: result = 0
+  else: result = a.length
+  
+proc newRope(): PRope = new(result)
+proc newRope(data: string): PRope = 
+  new(result)
+  result.length = len(data)
+  result.data = data
+
+var 
+  cache: PRope                # the root of the cache tree
+  N: PRope                    # dummy rope needed for splay algorithm
+
+when countCacheMisses:
+  var misses, hits: int
+  
+proc splay(s: string, tree: PRope, cmpres: var int): PRope = 
+  var c: int
+  var t = tree
+  N.left = nil
+  N.right = nil               # reset to nil
+  var le = N
+  var r = N
+  while true: 
+    c = cmp(s, t.data)
+    if c < 0: 
+      if (t.left != nil) and (s < t.left.data): 
+        var y = t.left
+        t.left = y.right
+        y.right = t
+        t = y
+      if t.left == nil: break 
+      r.left = t
+      r = t
+      t = t.left
+    elif c > 0: 
+      if (t.right != nil) and (s > t.right.data): 
+        var y = t.right
+        t.right = y.left
+        y.left = t
+        t = y
+      if t.right == nil: break 
+      le.right = t
+      le = t
+      t = t.right
+    else: 
+      break 
+  cmpres = c
+  le.right = t.left
+  r.left = t.right
+  t.left = N.right
+  t.right = N.left
+  result = t
+
+proc insertInCache(s: string, tree: PRope): PRope = 
+  var t = tree
+  if t == nil: 
+    result = newRope(s)
+    when countCacheMisses: inc(misses)
+    return 
+  var cmp: int
+  t = splay(s, t, cmp)
+  if cmp == 0: 
+    # We get here if it's already in the Tree
+    # Don't add it again
+    result = t
+    when countCacheMisses: inc(hits)
+  else: 
+    when countCacheMisses: inc(misses)
+    result = newRope(s)
+    if cmp < 0: 
+      result.left = t.left
+      result.right = t
+      t.left = nil
+    else: 
+      # i > t.item:
+      result.right = t.right
+      result.left = t
+      t.right = nil
+
+proc rope*(s: string): PRope =
+  ## Converts a string to a rope. 
+  if s.len == 0: 
+    result = nil
+  elif cacheEnabled: 
+    result = insertInCache(s, cache)
+    cache = result
+  else: 
+    result = newRope(s)
+  
+proc rope*(i: BiggestInt): PRope = 
+  ## Converts an int to a rope. 
+  result = rope($i)
+
+proc rope*(f: BiggestFloat): PRope =
+  ## Converts a float to a rope. 
+  result = rope($f)
+
+proc disableCache*() =
+  ## the cache is discarded and disabled. The GC will reuse its used memory.
+  cache = nil
+  cacheEnabled = false
+  
+proc enableCache*() =
+  ## Enables the caching of leaves. This reduces the memory footprint at
+  ## the cost of runtime efficiency.
+  cacheEnabled = true
+
+proc `&`*(a, b: PRope): PRope =
+  ## the concatenation operator for ropes.
+  if a == nil: 
+    result = b
+  elif b == nil: 
+    result = a
+  else:
+    result = newRope()
+    result.length = a.length + b.length
+    when false:
+      # XXX rebalancing would be nice, but is too expensive.
+      result.left = a.left
+      var x = newRope()
+      x.left = a.right
+      x.right = b
+      result.right = x
+    else:
+      result.left = a
+      result.right = b
+  
+proc `&`*(a: PRope, b: string): PRope = 
+  ## the concatenation operator for ropes.
+  result = a & rope(b)
+  
+proc `&`*(a: string, b: PRope): PRope = 
+  ## the concatenation operator for ropes.
+  result = rope(a) & b
+  
+proc `&`*(a: openarray[PRope]): PRope = 
+  ## the concatenation operator for an openarray of ropes.
+  for i in countup(0, high(a)): result = result & a[i]
+
+proc add*(a: var PRope, b: PRope) =
+  ## adds `b` to the rope `a`.
+  a = a & b
+
+proc add*(a: var PRope, b: string) =
+  ## adds `b` to the rope `a`.
+  a = a & b
+  
+proc `[]`*(r: PRope, i: int): char =
+  ## returns the character at position `i` in the rope `r`. This is quite
+  ## expensive! Worst-case: O(n). If ``i >= r.len``, ``\0`` is returned.
+  var x = r
+  var j = i
+  if x == nil: return
+  while true:
+    if not isConc(x):
+      if x.data.len <% j: return x.data[j]
+      return '\0'
+    else:
+      if x.left.len >% j:
+        x = x.left
+      else:
+        x = x.right
+        dec(j, x.len)
+
+iterator leaves*(r: PRope): string =
+  ## iterates over any leaf string in the rope `r`.
+  if r != nil:
+    var stack = @[r]
+    while stack.len > 0: 
+      var it = stack.pop
+      while isConc(it):
+        stack.add(it.right)
+        it = it.left
+        assert(it != nil)
+      assert(it.data != nil)
+      yield it.data
+  
+iterator items*(r: PRope): char =
+  ## iterates over any character in the rope `r`.
+  for s in leaves(r):
+    for c in items(s): yield c
+
+proc write*(f: TFile, r: PRope) =
+  ## writes a rope to a file.
+  for s in leaves(r): write(f, s)
+
+proc `$`*(r: PRope): string = 
+  ## converts a rope back to a string.
+  result = newString(r.len)
+  setLen(result, 0)
+  for s in leaves(r): add(result, s)
+
+when false:
+  # Format string caching seems reasonable: All leaves can be shared and format
+  # string parsing has to be done only once. A compiled format string is stored
+  # as a rope. A negative length is used for the index into the args array.
+  proc compiledArg(idx: int): PRope =
+    new(result)
+    result.length = -idx
+  
+  proc compileFrmt(frmt: string): PRope = 
+    var i = 0
+    var length = len(frmt)
+    result = nil
+    var num = 0
+    while i < length: 
+      if frmt[i] == '$': 
+        inc(i)
+        case frmt[i]
+        of '$': 
+          add(result, "$")
+          inc(i)
+        of '#': 
+          inc(i)
+          add(result, compiledArg(num+1))
+          inc(num)
+        of '0'..'9': 
+          var j = 0
+          while true: 
+            j = j * 10 + ord(frmt[i]) - ord('0')
+            inc(i)
+            if frmt[i] notin {'0'..'9'}: break 
+          num = j
+          add(s, compiledArg(j))
+        of '{':
+          inc(i)
+          var j = 0
+          while frmt[i] in {'0'..'9'}:
+            j = j * 10 + ord(frmt[i]) - ord('0')
+            inc(i)
+          if frmt[i] == '}': inc(i)
+          else: raise newException(EInvalidValue, "invalid format string")
+          num = j
+          add(s, compiledArg(j))
+        else: raise newException(EInvalidValue, "invalid format string")
+      var start = i
+      while i < length: 
+        if frmt[i] != '$': inc(i)
+        else: break 
+      if i - 1 >= start: 
+        add(result, copy(frmt, start, i-1))
+  
+proc `%`*(frmt: string, args: openarray[PRope]): PRope =
+  ## `%` substitution operator for ropes. Does not support the ``$identifier``
+  ## nor ``${identifier}`` notations.
+  var i = 0
+  var length = len(frmt)
+  result = nil
+  var num = 0
+  while i < length: 
+    if frmt[i] == '$': 
+      inc(i)
+      case frmt[i]
+      of '$': 
+        add(result, "$")
+        inc(i)
+      of '#': 
+        inc(i)
+        add(result, args[num])
+        inc(num)
+      of '0'..'9': 
+        var j = 0
+        while true: 
+          j = j * 10 + ord(frmt[i]) - ord('0')
+          inc(i)
+          if frmt[i] notin {'0'..'9'}: break 
+        num = j
+        add(result, args[j-1])
+      of '{':
+        inc(i)
+        var j = 0
+        while frmt[i] in {'0'..'9'}:
+          j = j * 10 + ord(frmt[i]) - ord('0')
+          inc(i)
+        if frmt[i] == '}': inc(i)
+        else: raise newException(EInvalidValue, "invalid format string")
+        num = j
+        add(result, args[j-1])
+      else: raise newException(EInvalidValue, "invalid format string")
+    var start = i
+    while i < length: 
+      if frmt[i] != '$': inc(i)
+      else: break 
+    if i - 1 >= start: 
+      add(result, copy(frmt, start, i - 1))
+
+proc addf*(c: var PRope, frmt: string, args: openarray[PRope]) =
+  ## shortcut for ``add(c, frmt % args)``.
+  add(c, frmt % args)
+
+proc equalsFile*(r: PRope, f: TFile): bool =
+  ## returns true if the contents of the file `f` equal `r`.
+  var bufSize = 1024 # reasonable start value
+  var buf = alloc(BufSize)
+  for s in leaves(r):
+    if s.len > bufSize:
+      bufSize = max(bufSize * 2, s.len)
+      buf = realloc(buf, bufSize)
+    var readBytes = readBuffer(f, buf, s.len)
+    result = readBytes == s.len and equalMem(buf, cstring(s), s.len)
+    if not result: break
+  if result:
+    result = readBuffer(f, buf, 1) == 0 # really at the end of file?
+  dealloc(buf)
+
+proc equalsFile*(r: PRope, f: string): bool =
+  ## returns true if the contents of the file `f` equal `r`. If `f` does not
+  ## exist, false is returned.
+  var bin: TFile
+  result = open(bin, f)
+  if result:
+    result = equalsFile(r, bin)
+    close(bin)
+
+new(N) # init dummy node for splay algorithm
+
+{.pop.}
\ No newline at end of file