summary refs log tree commit diff stats
path: root/rod/ropes.nim
diff options
context:
space:
mode:
authorAndreas Rumpf <andreas@andreas-desktop>2009-12-07 01:23:19 +0100
committerAndreas Rumpf <andreas@andreas-desktop>2009-12-07 01:23:19 +0100
commite254741541b0389dfb0b675116c76a6a144b90b7 (patch)
treec6769c04b3bdc6a77bcc5075b0df011252e3702b /rod/ropes.nim
parent90119066adf6a9a2e8d779d4955637c6dd99aba3 (diff)
downloadNim-e254741541b0389dfb0b675116c76a6a144b90b7.tar.gz
version 0.8.5: added Nimrod version of the compiler
Diffstat (limited to 'rod/ropes.nim')
-rwxr-xr-xrod/ropes.nim497
1 files changed, 497 insertions, 0 deletions
diff --git a/rod/ropes.nim b/rod/ropes.nim
new file mode 100755
index 000000000..f9b1841ee
--- /dev/null
+++ b/rod/ropes.nim
@@ -0,0 +1,497 @@
+#
+#
+#           The Nimrod Compiler
+#        (c) Copyright 2009 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+# Ropes for the C code generator
+#
+#  Ropes are a data structure that represents a very long string
+#  efficiently; especially concatenation is done in O(1) instead of O(N).
+#  Ropes make use a lazy evaluation: They are essentially concatenation
+#  trees that are only flattened when converting to a native Nimrod
+#  string or when written to disk. The empty string is represented by a
+#  nil pointer.
+#  A little picture makes everything clear:
+#
+#  "this string" & " is internally " & "represented as"
+#
+#             con  -- inner nodes do not contain raw data
+#            /   \
+#           /     \
+#          /       \
+#        con       "represented as"
+#       /   \
+#      /     \
+#     /       \
+#    /         \
+#   /           \
+#"this string"  " is internally "
+#
+#  Note that this is the same as:
+#  "this string" & (" is internally " & "represented as")
+#
+#             con
+#            /   \
+#           /     \
+#          /       \
+# "this string"    con
+#                 /   \
+#                /     \
+#               /       \
+#              /         \
+#             /           \
+#" is internally "        "represented as"
+#
+#  The 'con' operator is associative! This does not matter however for
+#  the algorithms we use for ropes.
+#
+#  Note that the left and right pointers are not needed for leafs.
+#  Leafs have relatively high memory overhead (~30 bytes on a 32
+#  bit machines) and we produce many of them. This is why we cache and
+#  share leafs accross different rope trees.
+#  To cache them they are inserted in another tree, a splay tree for best
+#  performance. But for the caching tree we use the leafs' left and right
+#  pointers.
+#
+
+import 
+  msgs, strutils, platform, nhashes, crc
+
+const 
+  CacheLeafs* = true
+  countCacheMisses* = False   # see what our little optimization gives
+
+type 
+  TFormatStr* = string # later we may change it to CString for better
+                       # performance of the code generator (assignments copy the format strings
+                       # though it is not necessary)
+  PRope* = ref TRope
+  TRope*{.acyclic.} = object of TObject # the empty rope is represented by nil to safe space
+    left*, right*: PRope
+    length*: int
+    data*: string             # != nil if a leaf
+  
+  TRopeSeq* = seq[PRope]
+
+proc con*(a, b: PRope): PRope
+proc con*(a: PRope, b: string): PRope
+proc con*(a: string, b: PRope): PRope
+proc con*(a: openarray[PRope]): PRope
+proc app*(a: var PRope, b: PRope)
+proc app*(a: var PRope, b: string)
+proc prepend*(a: var PRope, b: PRope)
+proc toRope*(s: string): PRope
+proc toRopeF*(r: BiggestFloat): PRope
+proc toRope*(i: BiggestInt): PRope
+proc ropeLen*(a: PRope): int
+proc WriteRope*(head: PRope, filename: string)
+proc writeRopeIfNotEqual*(r: PRope, filename: string): bool
+proc ropeToStr*(p: PRope): string
+proc ropef*(frmt: TFormatStr, args: openarray[PRope]): PRope
+proc appf*(c: var PRope, frmt: TFormatStr, args: openarray[PRope])
+proc getCacheStats*(): string
+proc RopeEqualsFile*(r: PRope, f: string): bool
+  # returns true if the rope r is the same as the contents of file f
+proc RopeInvariant*(r: PRope): bool
+  # exported for debugging
+# implementation
+
+proc ropeLen(a: PRope): int = 
+  if a == nil: result = 0
+  else: result = a.length
+  
+proc newRope(data: string = nil): PRope = 
+  new(result)
+  if data != nil: 
+    result.length = len(data)
+    result.data = data
+
+var 
+  cache: PRope                # the root of the cache tree
+  misses, hits: int
+  N: PRope                    # dummy rope needed for splay algorithm
+
+proc getCacheStats(): string = 
+  if hits + misses != 0: 
+    result = "Misses: " & $(misses) & " total: " & $(hits + misses) & " quot: " &
+        $(toFloat(misses) / toFloat(hits + misses))
+  else: 
+    result = ""
+  
+proc splay(s: string, tree: PRope, cmpres: var int): PRope = 
+  var 
+    le, r, y, t: PRope
+    c: int
+  t = tree
+  N.left = nil
+  N.right = nil               # reset to nil
+  le = N
+  r = N
+  while true: 
+    c = cmp(s, t.data)
+    if c < 0: 
+      if (t.left != nil) and (s < t.left.data): 
+        y = t.left
+        t.left = y.right
+        y.right = t
+        t = y
+      if t.left == nil: break 
+      r.left = t
+      r = t
+      t = t.left
+    elif c > 0: 
+      if (t.right != nil) and (s > t.right.data): 
+        y = t.right
+        t.right = y.left
+        y.left = t
+        t = y
+      if t.right == nil: break 
+      le.right = t
+      le = t
+      t = t.right
+    else: 
+      break 
+  cmpres = c
+  le.right = t.left
+  r.left = t.right
+  t.left = N.right
+  t.right = N.left
+  result = t
+
+proc insertInCache(s: string, tree: PRope): PRope = 
+  # Insert i into the tree t, unless it's already there.
+  # Return a pointer to the resulting tree.
+  var 
+    t: PRope
+    cmp: int
+  t = tree
+  if t == nil: 
+    result = newRope(s)
+    if countCacheMisses: inc(misses)
+    return 
+  t = splay(s, t, cmp)
+  if cmp == 0: 
+    # We get here if it's already in the Tree
+    # Don't add it again
+    result = t
+    if countCacheMisses: inc(hits)
+  else: 
+    if countCacheMisses: inc(misses)
+    result = newRope(s)
+    if cmp < 0: 
+      result.left = t.left
+      result.right = t
+      t.left = nil
+    else: 
+      # i > t.item:
+      result.right = t.right
+      result.left = t
+      t.right = nil
+
+proc RopeInvariant(r: PRope): bool = 
+  if r == nil: 
+    result = true
+  else: 
+    result = true #
+                  #    if r.data <> snil then
+                  #      result := true
+                  #    else begin
+                  #      result := (r.left <> nil) and (r.right <> nil);
+                  #      if result then result := ropeInvariant(r.left);
+                  #      if result then result := ropeInvariant(r.right);
+                  #    end 
+  
+proc toRope(s: string): PRope = 
+  if s == "": 
+    result = nil
+  elif cacheLeafs: 
+    result = insertInCache(s, cache)
+    cache = result
+  else: 
+    result = newRope(s)
+  assert(RopeInvariant(result))
+
+proc RopeSeqInsert(rs: var TRopeSeq, r: PRope, at: Natural) = 
+  var length: int
+  length = len(rs)
+  if at > length: 
+    setlen(rs, at + 1)
+  else: 
+    setlen(rs, length + 1)    # move old rope elements:
+  for i in countdown(length, at + 1): 
+    rs[i] = rs[i - 1]         # this is correct, I used pen and paper to validate it
+  rs[at] = r
+
+proc con(a, b: PRope): PRope = 
+  assert(RopeInvariant(a))
+  assert(RopeInvariant(b))
+  if a == nil: 
+    result = b
+  elif b == nil: 
+    result = a
+  else: 
+    result = newRope()
+    result.length = a.length + b.length
+    result.left = a
+    result.right = b
+  assert(RopeInvariant(result))
+
+proc con(a: PRope, b: string): PRope = 
+  var r: PRope
+  assert(RopeInvariant(a))
+  if b == "": 
+    result = a
+  else: 
+    r = toRope(b)
+    if a == nil: 
+      result = r
+    else: 
+      result = newRope()
+      result.length = a.length + r.length
+      result.left = a
+      result.right = r
+  assert(RopeInvariant(result))
+
+proc con(a: string, b: PRope): PRope = 
+  var r: PRope
+  assert(RopeInvariant(b))
+  if a == "": 
+    result = b
+  else: 
+    r = toRope(a)
+    if b == nil: 
+      result = r
+    else: 
+      result = newRope()
+      result.length = b.length + r.length
+      result.left = r
+      result.right = b
+  assert(RopeInvariant(result))
+
+proc con(a: openarray[PRope]): PRope = 
+  result = nil
+  for i in countup(0, high(a)): result = con(result, a[i])
+  assert(RopeInvariant(result))
+
+proc toRope(i: BiggestInt): PRope = 
+  result = toRope($(i))
+
+proc toRopeF(r: BiggestFloat): PRope = 
+  result = toRope($(r))
+
+proc app(a: var PRope, b: PRope) = 
+  a = con(a, b)
+  assert(RopeInvariant(a))
+
+proc app(a: var PRope, b: string) = 
+  a = con(a, b)
+  assert(RopeInvariant(a))
+
+proc prepend(a: var PRope, b: PRope) = 
+  a = con(b, a)
+  assert(RopeInvariant(a))
+
+proc InitStack(stack: var TRopeSeq) = 
+  stack = @ []
+
+proc push(stack: var TRopeSeq, r: PRope) = 
+  var length: int
+  length = len(stack)
+  setlen(stack, length + 1)
+  stack[length] = r
+
+proc pop(stack: var TRopeSeq): PRope = 
+  var length: int
+  length = len(stack)
+  result = stack[length - 1]
+  setlen(stack, length - 1)
+
+proc WriteRopeRec(f: var tfile, c: PRope) = 
+  assert(RopeInvariant(c))
+  if c == nil: return 
+  if (c.data != nil): 
+    write(f, c.data)
+  else: 
+    writeRopeRec(f, c.left)
+    writeRopeRec(f, c.right)
+
+proc newWriteRopeRec(f: var tfile, c: PRope) = 
+  var 
+    stack: TRopeSeq
+    it: PRope
+  assert(RopeInvariant(c))
+  initStack(stack)
+  push(stack, c)
+  while len(stack) > 0: 
+    it = pop(stack)
+    while it.data == nil: 
+      push(stack, it.right)
+      it = it.left
+      assert(it != nil)
+    assert(it.data != nil)
+    write(f, it.data)
+
+proc WriteRope(head: PRope, filename: string) = 
+  var f: tfile                # we use a textfile for automatic buffer handling
+  if open(f, filename, fmWrite): 
+    if head != nil: newWriteRopeRec(f, head)
+    close(f)
+  else: 
+    rawMessage(errCannotOpenFile, filename)
+  
+proc recRopeToStr(result: var string, resultLen: var int, p: PRope) = 
+  if p == nil: 
+    return                    # do not add to result
+  if (p.data == nil): 
+    recRopeToStr(result, resultLen, p.left)
+    recRopeToStr(result, resultLen, p.right)
+  else: 
+    CopyMem(addr(result[resultLen + 0]), addr(p.data[0]), p.length)
+    Inc(resultLen, p.length)
+    assert(resultLen <= len(result))
+
+proc newRecRopeToStr(result: var string, resultLen: var int, r: PRope) = 
+  var 
+    stack: TRopeSeq
+    it: PRope
+  initStack(stack)
+  push(stack, r)
+  while len(stack) > 0: 
+    it = pop(stack)
+    while it.data == nil: 
+      push(stack, it.right)
+      it = it.left
+    assert(it.data != nil)
+    CopyMem(addr(result[resultLen + 0]), addr(it.data[0]), it.length)
+    Inc(resultLen, it.length)
+    assert(resultLen <= len(result))
+
+proc ropeToStr(p: PRope): string = 
+  var resultLen: int
+  assert(RopeInvariant(p))
+  if p == nil: 
+    result = ""
+  else: 
+    result = newString(p.length)
+    resultLen = 0
+    newRecRopeToStr(result, resultLen, p)
+
+proc ropef(frmt: TFormatStr, args: openarray[PRope]): PRope = 
+  var i, j, length, start, num: int
+  i = 0
+  length = len(frmt)
+  result = nil
+  num = 0
+  while i <= length + 0 - 1: 
+    if frmt[i] == '$': 
+      inc(i)                  # skip '$'
+      case frmt[i]
+      of '$': 
+        app(result, "$")
+        inc(i)
+      of '#': 
+        inc(i)
+        app(result, args[num])
+        inc(num)
+      of '0'..'9': 
+        j = 0
+        while true: 
+          j = (j * 10) + Ord(frmt[i]) - ord('0')
+          inc(i)
+          if (i > length + 0 - 1) or not (frmt[i] in {'0'..'9'}): break 
+        num = j
+        if j > high(args) + 1: 
+          internalError("ropes: invalid format string $" & $(j))
+        app(result, args[j - 1])
+      of 'N', 'n': 
+        app(result, tnl)
+        inc(i)
+      else: InternalError("ropes: invalid format string $" & frmt[i])
+    start = i
+    while (i <= length + 0 - 1): 
+      if (frmt[i] != '$'): inc(i)
+      else: break 
+    if i - 1 >= start: 
+      app(result, copy(frmt, start, i - 1))
+  assert(RopeInvariant(result))
+
+proc appf(c: var PRope, frmt: TFormatStr, args: openarray[PRope]) = 
+  app(c, ropef(frmt, args))
+
+const 
+  bufSize = 1024              # 1 KB is reasonable
+
+proc auxRopeEqualsFile(r: PRope, bin: var tfile, buf: Pointer): bool = 
+  var readBytes: int
+  if (r.data != nil): 
+    if r.length > bufSize: 
+      internalError("ropes: token too long")
+    readBytes = readBuffer(bin, buf, r.length)
+    result = (readBytes == r.length) and
+        equalMem(buf, addr(r.data[0]), r.length) # BUGFIX
+  else: 
+    result = auxRopeEqualsFile(r.left, bin, buf)
+    if result: result = auxRopeEqualsFile(r.right, bin, buf)
+  
+proc RopeEqualsFile(r: PRope, f: string): bool = 
+  var 
+    bin: tfile
+    buf: Pointer
+  result = open(bin, f)
+  if not result: 
+    return                    # not equal if file does not exist
+  buf = alloc(BufSize)
+  result = auxRopeEqualsFile(r, bin, buf)
+  if result: 
+    result = readBuffer(bin, buf, bufSize) == 0 # really at the end of file?
+  dealloc(buf)
+  close(bin)
+
+proc crcFromRopeAux(r: PRope, startVal: TCrc32): TCrc32 = 
+  if r.data != nil: 
+    result = startVal
+    for i in countup(0, len(r.data) + 0 - 1): 
+      result = updateCrc32(r.data[i], result)
+  else: 
+    result = crcFromRopeAux(r.left, startVal)
+    result = crcFromRopeAux(r.right, result)
+
+proc newCrcFromRopeAux(r: PRope, startVal: TCrc32): TCrc32 = 
+  var 
+    stack: TRopeSeq
+    it: PRope
+    L, i: int
+  initStack(stack)
+  push(stack, r)
+  result = startVal
+  while len(stack) > 0: 
+    it = pop(stack)
+    while it.data == nil: 
+      push(stack, it.right)
+      it = it.left
+    assert(it.data != nil)
+    i = 0
+    L = len(it.data) + 0
+    while i < L: 
+      result = updateCrc32(it.data[i], result)
+      inc(i)
+
+proc crcFromRope(r: PRope): TCrc32 = 
+  result = newCrcFromRopeAux(r, initCrc32)
+
+proc writeRopeIfNotEqual(r: PRope, filename: string): bool = 
+  # returns true if overwritten
+  var c: TCrc32
+  c = crcFromFile(filename)
+  if c != crcFromRope(r): 
+    writeRope(r, filename)
+    result = true
+  else: 
+    result = false
+  
+new(N)
+# init dummy node for splay algorithm
\ No newline at end of file