summary refs log tree commit diff stats
path: root/lib/pure/ropes.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/ropes.nim')
-rw-r--r--[-rwxr-xr-x]lib/pure/ropes.nim436
1 files changed, 235 insertions, 201 deletions
diff --git a/lib/pure/ropes.nim b/lib/pure/ropes.nim
index 4a6c3f530..8750aca87 100755..100644
--- a/lib/pure/ropes.nim
+++ b/lib/pure/ropes.nim
@@ -1,6 +1,6 @@
 #
 #
-#            Nimrod's Runtime Library
+#            Nim's Runtime Library
 #        (c) Copyright 2010 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
@@ -8,19 +8,21 @@
 #
 
 ## This module contains support for a `rope`:idx: data type.
-## Ropes can represent very long strings efficiently; especially concatenation
+## Ropes can represent very long strings efficiently; in particular, concatenation
 ## is done in O(1) instead of O(n). They are essentially concatenation
-## trees that are only flattened when converting to a native Nimrod
-## string. The empty string is represented by ``nil``. Ropes are immutable and
+## trees that are only flattened when converting to a native Nim
+## string. The empty string is represented by `nil`. Ropes are immutable and
 ## subtrees can be shared without copying.
 ## Leaves can be cached for better memory efficiency at the cost of
 ## runtime efficiency.
 
-include "system/inclrtl"
+include system/inclrtl
+import std/streams
 
-{.deadCodeElim: on.}
+when defined(nimPreviewSlimSystem):
+  import std/[syncio, formatfloat, assertions]
 
-{.push debugger:off .} # the user does not want to trace a part
+{.push debugger: off.} # the user does not want to trace a part
                        # of the standard library!
 
 const
@@ -29,72 +31,69 @@ const
 var
   cacheEnabled = false
 
-type 
-  PRope* = ref TRope ## empty rope is represented by nil
-  TRope {.acyclic, final, pure.} = object
-    left, right: PRope
+type
+  Rope* {.acyclic.} = ref object
+    ## A rope data type. The empty rope is represented by `nil`.
+    left, right: Rope
     length: int
-    data: string # != nil if a leaf
-
-proc isConc(r: PRope): bool {.inline.} = return isNil(r.data)
+    data: string # not empty if a leaf
 
 # Note that the left and right pointers are not needed for leafs.
 # Leaves have relatively high memory overhead (~30 bytes on a 32
 # bit machine) and we produce many of them. This is why we cache and
-# share leafs accross different rope trees.
+# share leafs across different rope trees.
 # To cache them they are inserted in another tree, a splay tree for best
 # performance. But for the caching tree we use the leaf's left and right
 # pointers.
 
-proc len*(a: PRope): int {.rtl, extern: "nro$1".} =
-  ## the rope's length
-  if a == nil: result = 0
-  else: result = a.length
-  
-proc newRope(): PRope = new(result)
-proc newRope(data: string): PRope = 
+proc len*(a: Rope): int {.rtl, extern: "nro$1".} =
+  ## The rope's length.
+  if a == nil: 0 else: a.length
+
+proc newRope(): Rope = new(result)
+proc newRope(data: string): Rope =
   new(result)
   result.length = len(data)
   result.data = data
 
-var 
-  cache: PRope                # the root of the cache tree
-  N: PRope                    # dummy rope needed for splay algorithm
+var
+  cache {.threadvar.}: Rope # the root of the cache tree
+  N {.threadvar.}: Rope     # dummy rope needed for splay algorithm
 
 when countCacheMisses:
   var misses, hits: int
-  
-proc splay(s: string, tree: PRope, cmpres: var int): PRope = 
+
+proc splay(s: string, tree: Rope, cmpres: var int): Rope =
   var c: int
   var t = tree
   N.left = nil
-  N.right = nil               # reset to nil
+  N.right = nil # reset to nil
   var le = N
   var r = N
-  while true: 
+  while true:
     c = cmp(s, t.data)
-    if c < 0: 
-      if (t.left != nil) and (s < t.left.data): 
+    if c < 0:
+      if (t.left != nil) and (s < t.left.data):
         var y = t.left
         t.left = y.right
         y.right = t
         t = y
-      if t.left == nil: break 
+      if t.left == nil: break
       r.left = t
       r = t
       t = t.left
-    elif c > 0: 
-      if (t.right != nil) and (s > t.right.data): 
+    elif c > 0:
+      if (t.right != nil) and (s > t.right.data):
         var y = t.right
         t.right = y.left
         y.left = t
         t = y
-      if t.right == nil: break 
+      if t.right == nil: break
       le.right = t
       le = t
       t = t.right
-    else: 
-      break 
+    else:
+      break
   cmpres = c
   le.right = t.left
   r.left = t.right
@@ -102,218 +101,232 @@ proc splay(s: string, tree: PRope, cmpres: var int): PRope =
   t.right = N.left
   result = t
 
-proc insertInCache(s: string, tree: PRope): PRope = 
+proc insertInCache(s: string, tree: Rope): Rope =
   var t = tree
-  if t == nil: 
+  if t == nil:
     result = newRope(s)
     when countCacheMisses: inc(misses)
-    return 
+    return
   var cmp: int
   t = splay(s, t, cmp)
-  if cmp == 0: 
+  if cmp == 0:
     # We get here if it's already in the Tree
     # Don't add it again
     result = t
     when countCacheMisses: inc(hits)
-  else: 
+  else:
     when countCacheMisses: inc(misses)
     result = newRope(s)
-    if cmp < 0: 
+    if cmp < 0:
       result.left = t.left
       result.right = t
       t.left = nil
-    else: 
+    else:
       # i > t.item:
       result.right = t.right
       result.left = t
       t.right = nil
 
-proc rope*(s: string): PRope {.rtl, extern: "nro$1Str".} =
-  ## Converts a string to a rope. 
-  if s.len == 0: 
+proc rope*(s: string = ""): Rope {.rtl, extern: "nro$1Str".} =
+  ## Converts a string to a rope.
+  runnableExamples:
+    let r = rope("I'm a rope")
+    doAssert $r == "I'm a rope"
+
+  if s.len == 0:
     result = nil
-  elif cacheEnabled: 
-    result = insertInCache(s, cache)
-    cache = result
-  else: 
-    result = newRope(s)
-  
-proc rope*(i: BiggestInt): PRope {.rtl, extern: "nro$1BiggestInt".} = 
-  ## Converts an int to a rope. 
+  else:
+    when nimvm:
+      # No caching in VM context
+      result = newRope(s)
+    else:
+      if cacheEnabled:
+        result = insertInCache(s, cache)
+        cache = result
+      else:
+        result = newRope(s)
+
+proc rope*(i: BiggestInt): Rope {.rtl, extern: "nro$1BiggestInt".} =
+  ## Converts an int to a rope.
+  runnableExamples:
+    let r = rope(429)
+    doAssert $r == "429"
+
   result = rope($i)
 
-proc rope*(f: BiggestFloat): PRope {.rtl, extern: "nro$1BiggestFloat".} =
-  ## Converts a float to a rope. 
+proc rope*(f: BiggestFloat): Rope {.rtl, extern: "nro$1BiggestFloat".} =
+  ## Converts a float to a rope.
+  runnableExamples:
+    let r = rope(4.29)
+    doAssert $r == "4.29"
+
   result = rope($f)
-  
+
 proc enableCache*() {.rtl, extern: "nro$1".} =
   ## Enables the caching of leaves. This reduces the memory footprint at
   ## the cost of runtime efficiency.
   cacheEnabled = true
 
 proc disableCache*() {.rtl, extern: "nro$1".} =
-  ## the cache is discarded and disabled. The GC will reuse its used memory.
+  ## The cache is discarded and disabled. The GC will reuse its used memory.
   cache = nil
   cacheEnabled = false
 
-proc `&`*(a, b: PRope): PRope {.rtl, extern: "nroConcRopeRope".} =
-  ## the concatenation operator for ropes.
-  if a == nil: 
+proc `&`*(a, b: Rope): Rope {.rtl, extern: "nroConcRopeRope".} =
+  ## The concatenation operator for ropes.
+  runnableExamples:
+    let r = rope("Hello, ") & rope("Nim!")
+    doAssert $r == "Hello, Nim!"
+
+  if a == nil:
     result = b
-  elif b == nil: 
+  elif b == nil:
     result = a
   else:
     result = newRope()
     result.length = a.length + b.length
-    when false:
-      # XXX rebalancing would be nice, but is too expensive.
-      result.left = a.left
-      var x = newRope()
-      x.left = a.right
-      x.right = b
-      result.right = x
-    else:
-      result.left = a
-      result.right = b
-  
-proc `&`*(a: PRope, b: string): PRope {.rtl, extern: "nroConcRopeStr".} = 
-  ## the concatenation operator for ropes.
+    result.left = a
+    result.right = b
+
+proc `&`*(a: Rope, b: string): Rope {.rtl, extern: "nroConcRopeStr".} =
+  ## The concatenation operator for ropes.
+  runnableExamples:
+    let r = rope("Hello, ") & "Nim!"
+    doAssert $r == "Hello, Nim!"
+
   result = a & rope(b)
-  
-proc `&`*(a: string, b: PRope): PRope {.rtl, extern: "nroConcStrRope".} = 
-  ## the concatenation operator for ropes.
+
+proc `&`*(a: string, b: Rope): Rope {.rtl, extern: "nroConcStrRope".} =
+  ## The concatenation operator for ropes.
+  runnableExamples:
+    let r = "Hello, " & rope("Nim!")
+    doAssert $r == "Hello, Nim!"
+
   result = rope(a) & b
-  
-proc `&`*(a: openarray[PRope]): PRope {.rtl, extern: "nroConcOpenArray".} = 
-  ## the concatenation operator for an openarray of ropes.
-  for i in countup(0, high(a)): result = result & a[i]
 
-proc add*(a: var PRope, b: PRope) {.rtl, extern: "nro$1Rope".} =
-  ## adds `b` to the rope `a`.
+proc `&`*(a: openArray[Rope]): Rope {.rtl, extern: "nroConcOpenArray".} =
+  ## The concatenation operator for an `openArray` of ropes.
+  runnableExamples:
+    let r = &[rope("Hello, "), rope("Nim"), rope("!")]
+    doAssert $r == "Hello, Nim!"
+
+  for item in a: result = result & item
+
+proc add*(a: var Rope, b: Rope) {.rtl, extern: "nro$1Rope".} =
+  ## Adds `b` to the rope `a`.
+  runnableExamples:
+    var r = rope("Hello, ")
+    r.add(rope("Nim!"))
+    doAssert $r == "Hello, Nim!"
+
   a = a & b
 
-proc add*(a: var PRope, b: string) {.rtl, extern: "nro$1Str".} =
-  ## adds `b` to the rope `a`.
+proc add*(a: var Rope, b: string) {.rtl, extern: "nro$1Str".} =
+  ## Adds `b` to the rope `a`.
+  runnableExamples:
+    var r = rope("Hello, ")
+    r.add("Nim!")
+    doAssert $r == "Hello, Nim!"
+
   a = a & b
-  
-proc `[]`*(r: PRope, i: int): char {.rtl, extern: "nroCharAt".} =
-  ## returns the character at position `i` in the rope `r`. This is quite
-  ## expensive! Worst-case: O(n). If ``i >= r.len``, ``\0`` is returned.
+
+proc `[]`*(r: Rope, i: int): char {.rtl, extern: "nroCharAt".} =
+  ## Returns the character at position `i` in the rope `r`. This is quite
+  ## expensive! Worst-case: O(n). If `i >= r.len or i < 0`, `\0` is returned.
+  runnableExamples:
+    let r = rope("Hello, Nim!")
+
+    doAssert r[0] == 'H'
+    doAssert r[7] == 'N'
+    doAssert r[22] == '\0'
+
   var x = r
   var j = i
-  if x == nil: return
+  if x == nil or i < 0 or i >= r.len: return
   while true:
-    if not isConc(x):
-      if x.data.len <% j: return x.data[j]
-      return '\0'
+    if x != nil and x.data.len > 0:
+      # leaf
+      return x.data[j]
     else:
-      if x.left.len >% j:
+      if x.left.length > j:
         x = x.left
       else:
+        dec(j, x.left.length)
         x = x.right
-        dec(j, x.len)
 
-iterator leaves*(r: PRope): string =
-  ## iterates over any leaf string in the rope `r`.
+iterator leaves*(r: Rope): string =
+  ## Iterates over any leaf string in the rope `r`.
+  runnableExamples:
+    let r = rope("Hello") & rope(", Nim!")
+    let s = ["Hello", ", Nim!"]
+    var index = 0
+    for leave in r.leaves:
+      doAssert leave == s[index]
+      inc(index)
+
   if r != nil:
     var stack = @[r]
-    while stack.len > 0: 
+    while stack.len > 0:
       var it = stack.pop
-      while isConc(it):
+      while it.left != nil:
+        assert(it.right != nil)
         stack.add(it.right)
         it = it.left
         assert(it != nil)
-      assert(it.data != nil)
       yield it.data
-  
-iterator items*(r: PRope): char =
-  ## iterates over any character in the rope `r`.
+
+iterator items*(r: Rope): char =
+  ## Iterates over any character in the rope `r`.
   for s in leaves(r):
     for c in items(s): yield c
 
-proc write*(f: TFile, r: PRope) {.rtl, extern: "nro$1".} =
-  ## writes a rope to a file.
+proc write*(f: File, r: Rope) {.rtl, extern: "nro$1".} =
+  ## Writes a rope to a file.
   for s in leaves(r): write(f, s)
 
-proc `$`*(r: PRope): string  {.rtl, extern: "nroToString".}= 
-  ## converts a rope back to a string.
-  result = newString(r.len)
-  setLen(result, 0)
+proc write*(s: Stream, r: Rope) {.rtl, extern: "nroWriteStream".} =
+  ## Writes a rope to a stream.
+  for rs in leaves(r): write(s, rs)
+
+proc `$`*(r: Rope): string {.rtl, extern: "nroToString".} =
+  ## Converts a rope back to a string.
+  result = newStringOfCap(r.len)
   for s in leaves(r): add(result, s)
 
-when false:
-  # Format string caching seems reasonable: All leaves can be shared and format
-  # string parsing has to be done only once. A compiled format string is stored
-  # as a rope. A negative length is used for the index into the args array.
-  proc compiledArg(idx: int): PRope =
-    new(result)
-    result.length = -idx
-  
-  proc compileFrmt(frmt: string): PRope = 
-    var i = 0
-    var length = len(frmt)
-    result = nil
-    var num = 0
-    while i < length: 
-      if frmt[i] == '$': 
-        inc(i)
-        case frmt[i]
-        of '$': 
-          add(result, "$")
-          inc(i)
-        of '#': 
-          inc(i)
-          add(result, compiledArg(num+1))
-          inc(num)
-        of '0'..'9': 
-          var j = 0
-          while true: 
-            j = j * 10 + ord(frmt[i]) - ord('0')
-            inc(i)
-            if frmt[i] notin {'0'..'9'}: break 
-          add(s, compiledArg(j))
-        of '{':
-          inc(i)
-          var j = 0
-          while frmt[i] in {'0'..'9'}:
-            j = j * 10 + ord(frmt[i]) - ord('0')
-            inc(i)
-          if frmt[i] == '}': inc(i)
-          else: raise newException(EInvalidValue, "invalid format string")
-          add(s, compiledArg(j))
-        else: raise newException(EInvalidValue, "invalid format string")
-      var start = i
-      while i < length: 
-        if frmt[i] != '$': inc(i)
-        else: break 
-      if i - 1 >= start: 
-        add(result, substr(frmt, start, i-1))
-  
-proc `%`*(frmt: string, args: openarray[PRope]): PRope {. 
-  rtl, extern: "nroFormat".} =
-  ## `%` substitution operator for ropes. Does not support the ``$identifier``
-  ## nor ``${identifier}`` notations.
+proc `%`*(frmt: string, args: openArray[Rope]): Rope {.rtl, extern: "nroFormat".} =
+  ## `%` substitution operator for ropes. Does not support the `$identifier`
+  ## nor `${identifier}` notations.
+  runnableExamples:
+    let r1 = "$1 $2 $3" % [rope("Nim"), rope("is"), rope("a great language")]
+    doAssert $r1 == "Nim is a great language"
+
+    let r2 = "$# $# $#" % [rope("Nim"), rope("is"), rope("a great language")]
+    doAssert $r2 == "Nim is a great language"
+
+    let r3 = "${1} ${2} ${3}" % [rope("Nim"), rope("is"), rope("a great language")]
+    doAssert $r3 == "Nim is a great language"
+
   var i = 0
   var length = len(frmt)
   result = nil
   var num = 0
-  while i < length: 
-    if frmt[i] == '$': 
+  while i < length:
+    if frmt[i] == '$':
       inc(i)
       case frmt[i]
-      of '$': 
+      of '$':
         add(result, "$")
         inc(i)
-      of '#': 
+      of '#':
         inc(i)
         add(result, args[num])
         inc(num)
-      of '0'..'9': 
+      of '0'..'9':
         var j = 0
-        while true: 
+        while true:
           j = j * 10 + ord(frmt[i]) - ord('0')
           inc(i)
-          if frmt[i] notin {'0'..'9'}: break 
+          if i >= frmt.len or frmt[i] notin {'0'..'9'}: break
         add(result, args[j-1])
       of '{':
         inc(i)
@@ -322,44 +335,65 @@ proc `%`*(frmt: string, args: openarray[PRope]): PRope {.
           j = j * 10 + ord(frmt[i]) - ord('0')
           inc(i)
         if frmt[i] == '}': inc(i)
-        else: raise newException(EInvalidValue, "invalid format string")
+        else: raise newException(ValueError, "invalid format string")
+
         add(result, args[j-1])
-      else: raise newException(EInvalidValue, "invalid format string")
+      else: raise newException(ValueError, "invalid format string")
     var start = i
-    while i < length: 
+    while i < length:
       if frmt[i] != '$': inc(i)
-      else: break 
-    if i - 1 >= start: 
+      else: break
+    if i - 1 >= start:
       add(result, substr(frmt, start, i - 1))
 
-proc addf*(c: var PRope, frmt: string, args: openarray[PRope]) {.
-  rtl, extern: "nro$1".} =
-  ## shortcut for ``add(c, frmt % args)``.
+proc addf*(c: var Rope, frmt: string, args: openArray[Rope]) {.rtl, extern: "nro$1".} =
+  ## Shortcut for `add(c, frmt % args)`.
+  runnableExamples:
+    var r = rope("Dash: ")
+    r.addf "$1 $2 $3", [rope("Nim"), rope("is"), rope("a great language")]
+    doAssert $r == "Dash: Nim is a great language"
+
   add(c, frmt % args)
 
-proc equalsFile*(r: PRope, f: TFile): bool {.rtl, extern: "nro$1File".} =
-  ## returns true if the contents of the file `f` equal `r`.
-  var bufSize = 1024 # reasonable start value
-  var buf = alloc(BufSize)
-  for s in leaves(r):
-    if s.len > bufSize:
-      bufSize = max(bufSize * 2, s.len)
-      buf = realloc(buf, bufSize)
-    var readBytes = readBuffer(f, buf, s.len)
-    result = readBytes == s.len and equalMem(buf, cstring(s), s.len)
-    if not result: break
-  if result:
-    result = readBuffer(f, buf, 1) == 0 # really at the end of file?
-  dealloc(buf)
-
-proc equalsFile*(r: PRope, f: string): bool {.rtl, extern: "nro$1Str".} =
-  ## returns true if the contents of the file `f` equal `r`. If `f` does not
-  ## exist, false is returned.
-  var bin: TFile
-  result = open(bin, f)
-  if result:
-    result = equalsFile(r, bin)
-    close(bin)
+when not defined(js) and not defined(nimscript):
+  const
+    bufSize = 1024 # 1 KB is reasonable
+
+  proc equalsFile*(r: Rope, f: File): bool {.rtl, extern: "nro$1File".} =
+    ## Returns true if the contents of the file `f` equal `r`.
+    var
+      buf: array[bufSize, char]
+      bpos = buf.len
+      blen = buf.len
+
+    for s in leaves(r):
+      var spos = 0
+      let slen = s.len
+      while spos < slen:
+        if bpos == blen:
+          # Read more data
+          bpos = 0
+          blen = readBuffer(f, addr(buf[0]), buf.len)
+          if blen == 0: # no more data in file
+            return false
+        let n = min(blen - bpos, slen - spos)
+        # TODO: There's gotta be a better way of comparing here...
+        if not equalMem(addr(buf[bpos]),
+                        cast[pointer](cast[int](cstring(s)) + spos), n):
+          return false
+        spos += n
+        bpos += n
+
+    result = readBuffer(f, addr(buf[0]), 1) == 0 # check that we've read all
+
+  proc equalsFile*(r: Rope, filename: string): bool {.rtl, extern: "nro$1Str".} =
+    ## Returns true if the contents of the file `f` equal `r`. If `f` does not
+    ## exist, false is returned.
+    var f: File
+    result = open(f, filename)
+    if result:
+      result = equalsFile(r, f)
+      close(f)
 
 new(N) # init dummy node for splay algorithm