summary refs log tree commit diff stats
path: root/lib/pure/ropes.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/ropes.nim')
-rw-r--r--lib/pure/ropes.nim400
1 files changed, 400 insertions, 0 deletions
diff --git a/lib/pure/ropes.nim b/lib/pure/ropes.nim
new file mode 100644
index 000000000..8750aca87
--- /dev/null
+++ b/lib/pure/ropes.nim
@@ -0,0 +1,400 @@
+#
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2010 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## This module contains support for a `rope`:idx: data type.
+## Ropes can represent very long strings efficiently; in particular, concatenation
+## is done in O(1) instead of O(n). They are essentially concatenation
+## trees that are only flattened when converting to a native Nim
+## string. The empty string is represented by `nil`. Ropes are immutable and
+## subtrees can be shared without copying.
+## Leaves can be cached for better memory efficiency at the cost of
+## runtime efficiency.
+
+include system/inclrtl
+import std/streams
+
+when defined(nimPreviewSlimSystem):
+  import std/[syncio, formatfloat, assertions]
+
+{.push debugger: off.} # the user does not want to trace a part
+                       # of the standard library!
+
+const
+  countCacheMisses = false
+
+var
+  cacheEnabled = false
+
+type
+  Rope* {.acyclic.} = ref object
+    ## A rope data type. The empty rope is represented by `nil`.
+    left, right: Rope
+    length: int
+    data: string # not empty if a leaf
+
+# Note that the left and right pointers are not needed for leafs.
+# Leaves have relatively high memory overhead (~30 bytes on a 32
+# bit machine) and we produce many of them. This is why we cache and
+# share leafs across different rope trees.
+# To cache them they are inserted in another tree, a splay tree for best
+# performance. But for the caching tree we use the leaf's left and right
+# pointers.
+
+proc len*(a: Rope): int {.rtl, extern: "nro$1".} =
+  ## The rope's length.
+  if a == nil: 0 else: a.length
+
+proc newRope(): Rope = new(result)
+proc newRope(data: string): Rope =
+  new(result)
+  result.length = len(data)
+  result.data = data
+
+var
+  cache {.threadvar.}: Rope # the root of the cache tree
+  N {.threadvar.}: Rope     # dummy rope needed for splay algorithm
+
+when countCacheMisses:
+  var misses, hits: int
+
+proc splay(s: string, tree: Rope, cmpres: var int): Rope =
+  var c: int
+  var t = tree
+  N.left = nil
+  N.right = nil # reset to nil
+  var le = N
+  var r = N
+  while true:
+    c = cmp(s, t.data)
+    if c < 0:
+      if (t.left != nil) and (s < t.left.data):
+        var y = t.left
+        t.left = y.right
+        y.right = t
+        t = y
+      if t.left == nil: break
+      r.left = t
+      r = t
+      t = t.left
+    elif c > 0:
+      if (t.right != nil) and (s > t.right.data):
+        var y = t.right
+        t.right = y.left
+        y.left = t
+        t = y
+      if t.right == nil: break
+      le.right = t
+      le = t
+      t = t.right
+    else:
+      break
+  cmpres = c
+  le.right = t.left
+  r.left = t.right
+  t.left = N.right
+  t.right = N.left
+  result = t
+
+proc insertInCache(s: string, tree: Rope): Rope =
+  var t = tree
+  if t == nil:
+    result = newRope(s)
+    when countCacheMisses: inc(misses)
+    return
+  var cmp: int
+  t = splay(s, t, cmp)
+  if cmp == 0:
+    # We get here if it's already in the Tree
+    # Don't add it again
+    result = t
+    when countCacheMisses: inc(hits)
+  else:
+    when countCacheMisses: inc(misses)
+    result = newRope(s)
+    if cmp < 0:
+      result.left = t.left
+      result.right = t
+      t.left = nil
+    else:
+      # i > t.item:
+      result.right = t.right
+      result.left = t
+      t.right = nil
+
+proc rope*(s: string = ""): Rope {.rtl, extern: "nro$1Str".} =
+  ## Converts a string to a rope.
+  runnableExamples:
+    let r = rope("I'm a rope")
+    doAssert $r == "I'm a rope"
+
+  if s.len == 0:
+    result = nil
+  else:
+    when nimvm:
+      # No caching in VM context
+      result = newRope(s)
+    else:
+      if cacheEnabled:
+        result = insertInCache(s, cache)
+        cache = result
+      else:
+        result = newRope(s)
+
+proc rope*(i: BiggestInt): Rope {.rtl, extern: "nro$1BiggestInt".} =
+  ## Converts an int to a rope.
+  runnableExamples:
+    let r = rope(429)
+    doAssert $r == "429"
+
+  result = rope($i)
+
+proc rope*(f: BiggestFloat): Rope {.rtl, extern: "nro$1BiggestFloat".} =
+  ## Converts a float to a rope.
+  runnableExamples:
+    let r = rope(4.29)
+    doAssert $r == "4.29"
+
+  result = rope($f)
+
+proc enableCache*() {.rtl, extern: "nro$1".} =
+  ## Enables the caching of leaves. This reduces the memory footprint at
+  ## the cost of runtime efficiency.
+  cacheEnabled = true
+
+proc disableCache*() {.rtl, extern: "nro$1".} =
+  ## The cache is discarded and disabled. The GC will reuse its used memory.
+  cache = nil
+  cacheEnabled = false
+
+proc `&`*(a, b: Rope): Rope {.rtl, extern: "nroConcRopeRope".} =
+  ## The concatenation operator for ropes.
+  runnableExamples:
+    let r = rope("Hello, ") & rope("Nim!")
+    doAssert $r == "Hello, Nim!"
+
+  if a == nil:
+    result = b
+  elif b == nil:
+    result = a
+  else:
+    result = newRope()
+    result.length = a.length + b.length
+    result.left = a
+    result.right = b
+
+proc `&`*(a: Rope, b: string): Rope {.rtl, extern: "nroConcRopeStr".} =
+  ## The concatenation operator for ropes.
+  runnableExamples:
+    let r = rope("Hello, ") & "Nim!"
+    doAssert $r == "Hello, Nim!"
+
+  result = a & rope(b)
+
+proc `&`*(a: string, b: Rope): Rope {.rtl, extern: "nroConcStrRope".} =
+  ## The concatenation operator for ropes.
+  runnableExamples:
+    let r = "Hello, " & rope("Nim!")
+    doAssert $r == "Hello, Nim!"
+
+  result = rope(a) & b
+
+proc `&`*(a: openArray[Rope]): Rope {.rtl, extern: "nroConcOpenArray".} =
+  ## The concatenation operator for an `openArray` of ropes.
+  runnableExamples:
+    let r = &[rope("Hello, "), rope("Nim"), rope("!")]
+    doAssert $r == "Hello, Nim!"
+
+  for item in a: result = result & item
+
+proc add*(a: var Rope, b: Rope) {.rtl, extern: "nro$1Rope".} =
+  ## Adds `b` to the rope `a`.
+  runnableExamples:
+    var r = rope("Hello, ")
+    r.add(rope("Nim!"))
+    doAssert $r == "Hello, Nim!"
+
+  a = a & b
+
+proc add*(a: var Rope, b: string) {.rtl, extern: "nro$1Str".} =
+  ## Adds `b` to the rope `a`.
+  runnableExamples:
+    var r = rope("Hello, ")
+    r.add("Nim!")
+    doAssert $r == "Hello, Nim!"
+
+  a = a & b
+
+proc `[]`*(r: Rope, i: int): char {.rtl, extern: "nroCharAt".} =
+  ## Returns the character at position `i` in the rope `r`. This is quite
+  ## expensive! Worst-case: O(n). If `i >= r.len or i < 0`, `\0` is returned.
+  runnableExamples:
+    let r = rope("Hello, Nim!")
+
+    doAssert r[0] == 'H'
+    doAssert r[7] == 'N'
+    doAssert r[22] == '\0'
+
+  var x = r
+  var j = i
+  if x == nil or i < 0 or i >= r.len: return
+  while true:
+    if x != nil and x.data.len > 0:
+      # leaf
+      return x.data[j]
+    else:
+      if x.left.length > j:
+        x = x.left
+      else:
+        dec(j, x.left.length)
+        x = x.right
+
+iterator leaves*(r: Rope): string =
+  ## Iterates over any leaf string in the rope `r`.
+  runnableExamples:
+    let r = rope("Hello") & rope(", Nim!")
+    let s = ["Hello", ", Nim!"]
+    var index = 0
+    for leave in r.leaves:
+      doAssert leave == s[index]
+      inc(index)
+
+  if r != nil:
+    var stack = @[r]
+    while stack.len > 0:
+      var it = stack.pop
+      while it.left != nil:
+        assert(it.right != nil)
+        stack.add(it.right)
+        it = it.left
+        assert(it != nil)
+      yield it.data
+
+iterator items*(r: Rope): char =
+  ## Iterates over any character in the rope `r`.
+  for s in leaves(r):
+    for c in items(s): yield c
+
+proc write*(f: File, r: Rope) {.rtl, extern: "nro$1".} =
+  ## Writes a rope to a file.
+  for s in leaves(r): write(f, s)
+
+proc write*(s: Stream, r: Rope) {.rtl, extern: "nroWriteStream".} =
+  ## Writes a rope to a stream.
+  for rs in leaves(r): write(s, rs)
+
+proc `$`*(r: Rope): string {.rtl, extern: "nroToString".} =
+  ## Converts a rope back to a string.
+  result = newStringOfCap(r.len)
+  for s in leaves(r): add(result, s)
+
+proc `%`*(frmt: string, args: openArray[Rope]): Rope {.rtl, extern: "nroFormat".} =
+  ## `%` substitution operator for ropes. Does not support the `$identifier`
+  ## nor `${identifier}` notations.
+  runnableExamples:
+    let r1 = "$1 $2 $3" % [rope("Nim"), rope("is"), rope("a great language")]
+    doAssert $r1 == "Nim is a great language"
+
+    let r2 = "$# $# $#" % [rope("Nim"), rope("is"), rope("a great language")]
+    doAssert $r2 == "Nim is a great language"
+
+    let r3 = "${1} ${2} ${3}" % [rope("Nim"), rope("is"), rope("a great language")]
+    doAssert $r3 == "Nim is a great language"
+
+  var i = 0
+  var length = len(frmt)
+  result = nil
+  var num = 0
+  while i < length:
+    if frmt[i] == '$':
+      inc(i)
+      case frmt[i]
+      of '$':
+        add(result, "$")
+        inc(i)
+      of '#':
+        inc(i)
+        add(result, args[num])
+        inc(num)
+      of '0'..'9':
+        var j = 0
+        while true:
+          j = j * 10 + ord(frmt[i]) - ord('0')
+          inc(i)
+          if i >= frmt.len or frmt[i] notin {'0'..'9'}: break
+        add(result, args[j-1])
+      of '{':
+        inc(i)
+        var j = 0
+        while frmt[i] in {'0'..'9'}:
+          j = j * 10 + ord(frmt[i]) - ord('0')
+          inc(i)
+        if frmt[i] == '}': inc(i)
+        else: raise newException(ValueError, "invalid format string")
+
+        add(result, args[j-1])
+      else: raise newException(ValueError, "invalid format string")
+    var start = i
+    while i < length:
+      if frmt[i] != '$': inc(i)
+      else: break
+    if i - 1 >= start:
+      add(result, substr(frmt, start, i - 1))
+
+proc addf*(c: var Rope, frmt: string, args: openArray[Rope]) {.rtl, extern: "nro$1".} =
+  ## Shortcut for `add(c, frmt % args)`.
+  runnableExamples:
+    var r = rope("Dash: ")
+    r.addf "$1 $2 $3", [rope("Nim"), rope("is"), rope("a great language")]
+    doAssert $r == "Dash: Nim is a great language"
+
+  add(c, frmt % args)
+
+when not defined(js) and not defined(nimscript):
+  const
+    bufSize = 1024 # 1 KB is reasonable
+
+  proc equalsFile*(r: Rope, f: File): bool {.rtl, extern: "nro$1File".} =
+    ## Returns true if the contents of the file `f` equal `r`.
+    var
+      buf: array[bufSize, char]
+      bpos = buf.len
+      blen = buf.len
+
+    for s in leaves(r):
+      var spos = 0
+      let slen = s.len
+      while spos < slen:
+        if bpos == blen:
+          # Read more data
+          bpos = 0
+          blen = readBuffer(f, addr(buf[0]), buf.len)
+          if blen == 0: # no more data in file
+            return false
+        let n = min(blen - bpos, slen - spos)
+        # TODO: There's gotta be a better way of comparing here...
+        if not equalMem(addr(buf[bpos]),
+                        cast[pointer](cast[int](cstring(s)) + spos), n):
+          return false
+        spos += n
+        bpos += n
+
+    result = readBuffer(f, addr(buf[0]), 1) == 0 # check that we've read all
+
+  proc equalsFile*(r: Rope, filename: string): bool {.rtl, extern: "nro$1Str".} =
+    ## Returns true if the contents of the file `f` equal `r`. If `f` does not
+    ## exist, false is returned.
+    var f: File
+    result = open(f, filename)
+    if result:
+      result = equalsFile(r, f)
+      close(f)
+
+new(N) # init dummy node for splay algorithm
+
+{.pop.}