summary refs log blame commit diff stats
path: root/compiler/ropes.nim
blob: 9b8a8466c5ca6ebdeda2a152e72b330c30582ca7 (plain) (tree)


















































                                                                         

                                                                   
                                                                      
                                             
                                                                         
                                                                          



            
                                       






                                                                      

                                                                        

                                                    

                                                                        













                                                




































                                                                                 

              

                                            

            



                                                
                      








                                                  
                       


















                                                        
              



                                    
              










































                                                                         
                      




                                                       
                                                                            

            





                                                                      
        



























                                                                         



                                       
 

                                                                 

                                       
                                                          
 
                                                       
                                                            


                                                    
 
                                      
                  
                        
                       
                          
                          







                                                                               
                                      


                                           

                                                              

                        
              
             
                         










                                        
                 











                                                                        
                 
                             


                                 
                                             








                                                                       


                                            
                                                  






                                                            
                


                                                                
                          








                                                                            
                                          





                                                             
                            

                        
                       
                          
                          

                          

                        
















                                                             
                                            
#
#
#           The Nimrod Compiler
#        (c) Copyright 2009 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

# Ropes for the C code generator
#
#  Ropes are a data structure that represents a very long string
#  efficiently; especially concatenation is done in O(1) instead of O(N).
#  Ropes make use a lazy evaluation: They are essentially concatenation
#  trees that are only flattened when converting to a native Nimrod
#  string or when written to disk. The empty string is represented by a
#  nil pointer.
#  A little picture makes everything clear:
#
#  "this string" & " is internally " & "represented as"
#
#             con  -- inner nodes do not contain raw data
#            /   \
#           /     \
#          /       \
#        con       "represented as"
#       /   \
#      /     \
#     /       \
#    /         \
#   /           \
#"this string"  " is internally "
#
#  Note that this is the same as:
#  "this string" & (" is internally " & "represented as")
#
#             con
#            /   \
#           /     \
#          /       \
# "this string"    con
#                 /   \
#                /     \
#               /       \
#              /         \
#             /           \
#" is internally "        "represented as"
#
#  The 'con' operator is associative! This does not matter however for
#  the algorithms we use for ropes.
#
#  Note that the left and right pointers are not needed for leaves.
#  Leaves have relatively high memory overhead (~30 bytes on a 32
#  bit machines) and we produce many of them. This is why we cache and
#  share leaves accross different rope trees.
#  To cache them they are inserted in another tree, a splay tree for best
#  performance. But for the caching tree we use the leaves' left and right
#  pointers.
#

import 
  msgs, strutils, platform, hashes, crc

const 
  CacheLeafs* = true
  countCacheMisses* = False   # see what our little optimization gives

type 
  TFormatStr* = string # later we may change it to CString for better
                       # performance of the code generator (assignments 
                       # copy the format strings
                       # though it is not necessary)
  PRope* = ref TRope
  TRope*{.acyclic.} = object of TObject # the empty rope is represented 
                                        # by nil to safe space
    left*, right*: PRope
    length*: int
    data*: string             # != nil if a leaf
  
  TRopeSeq* = seq[PRope]

proc con*(a, b: PRope): PRope
proc con*(a: PRope, b: string): PRope
proc con*(a: string, b: PRope): PRope
proc con*(a: openarray[PRope]): PRope
proc app*(a: var PRope, b: PRope)
proc app*(a: var PRope, b: string)
proc prepend*(a: var PRope, b: PRope)
proc toRope*(s: string): PRope
proc toRope*(i: BiggestInt): PRope
proc ropeLen*(a: PRope): int
proc WriteRope*(head: PRope, filename: string)
proc writeRopeIfNotEqual*(r: PRope, filename: string): bool
proc ropeToStr*(p: PRope): string
proc ropef*(frmt: TFormatStr, args: openarray[PRope]): PRope
proc appf*(c: var PRope, frmt: TFormatStr, args: openarray[PRope])
proc getCacheStats*(): string
proc RopeEqualsFile*(r: PRope, f: string): bool
  # returns true if the rope r is the same as the contents of file f
proc RopeInvariant*(r: PRope): bool
  # exported for debugging
# implementation

proc ropeLen(a: PRope): int = 
  if a == nil: result = 0
  else: result = a.length
  
proc newRope(data: string = nil): PRope = 
  new(result)
  if data != nil: 
    result.length = len(data)
    result.data = data

var 
  cache: PRope                # the root of the cache tree
  misses, hits: int
  N: PRope                    # dummy rope needed for splay algorithm

proc getCacheStats(): string = 
  if hits + misses != 0: 
    result = "Misses: " & $(misses) & " total: " & $(hits + misses) & " quot: " &
        $(toFloat(misses) / toFloat(hits + misses))
  else: 
    result = ""
  
proc splay(s: string, tree: PRope, cmpres: var int): PRope = 
  var c: int
  var t = tree
  N.left = nil
  N.right = nil               # reset to nil
  var le = N
  var r = N
  while true: 
    c = cmp(s, t.data)
    if c < 0: 
      if (t.left != nil) and (s < t.left.data): 
        var y = t.left
        t.left = y.right
        y.right = t
        t = y
      if t.left == nil: break 
      r.left = t
      r = t
      t = t.left
    elif c > 0: 
      if (t.right != nil) and (s > t.right.data): 
        var y = t.right
        t.right = y.left
        y.left = t
        t = y
      if t.right == nil: break 
      le.right = t
      le = t
      t = t.right
    else: 
      break 
  cmpres = c
  le.right = t.left
  r.left = t.right
  t.left = N.right
  t.right = N.left
  result = t

proc insertInCache(s: string, tree: PRope): PRope = 
  # Insert i into the tree t, unless it's already there.
  # Return a pointer to the resulting tree.
  var t = tree
  if t == nil: 
    result = newRope(s)
    if countCacheMisses: inc(misses)
    return 
  var cmp: int
  t = splay(s, t, cmp)
  if cmp == 0: 
    # We get here if it's already in the Tree
    # Don't add it again
    result = t
    if countCacheMisses: inc(hits)
  else: 
    if countCacheMisses: inc(misses)
    result = newRope(s)
    if cmp < 0: 
      result.left = t.left
      result.right = t
      t.left = nil
    else: 
      # i > t.item:
      result.right = t.right
      result.left = t
      t.right = nil

proc RopeInvariant(r: PRope): bool = 
  if r == nil: 
    result = true
  else: 
    result = true #
                  #    if r.data <> snil then
                  #      result := true
                  #    else begin
                  #      result := (r.left <> nil) and (r.right <> nil);
                  #      if result then result := ropeInvariant(r.left);
                  #      if result then result := ropeInvariant(r.right);
                  #    end 
  
proc toRope(s: string): PRope = 
  if s == "": 
    result = nil
  elif cacheLeafs: 
    result = insertInCache(s, cache)
    cache = result
  else: 
    result = newRope(s)
  assert(RopeInvariant(result))

proc RopeSeqInsert(rs: var TRopeSeq, r: PRope, at: Natural) = 
  var length = len(rs)
  if at > length: 
    setlen(rs, at + 1)
  else: 
    setlen(rs, length + 1)    # move old rope elements:
  for i in countdown(length, at + 1): 
    rs[i] = rs[i - 1] # this is correct, I used pen and paper to validate it
  rs[at] = r

proc recRopeToStr(result: var string, resultLen: var int, p: PRope) = 
  if p == nil: 
    return                    # do not add to result
  if (p.data == nil): 
    recRopeToStr(result, resultLen, p.left)
    recRopeToStr(result, resultLen, p.right)
  else: 
    CopyMem(addr(result[resultLen + 0]), addr(p.data[0]), p.length)
    Inc(resultLen, p.length)
    assert(resultLen <= len(result))

proc newRecRopeToStr(result: var string, resultLen: var int, r: PRope) = 
  var stack = @[r]
  while len(stack) > 0: 
    var it = pop(stack)
    while it.data == nil: 
      add(stack, it.right)
      it = it.left
    assert(it.data != nil)
    CopyMem(addr(result[resultLen]), addr(it.data[0]), it.length)
    Inc(resultLen, it.length)
    assert(resultLen <= len(result))

proc ropeToStr(p: PRope): string = 
  if p == nil: 
    result = ""
  else: 
    result = newString(p.length)
    var resultLen = 0
    newRecRopeToStr(result, resultLen, p)

proc con(a, b: PRope): PRope = 
  if a == nil: result = b
  elif b == nil: result = a
  else:
    result = newRope()
    result.length = a.length + b.length
    result.left = a
    result.right = b

proc con(a: PRope, b: string): PRope = result = con(a, toRope(b))
proc con(a: string, b: PRope): PRope = result = con(toRope(a), b)

proc con(a: openarray[PRope]): PRope = 
  for i in countup(0, high(a)): result = con(result, a[i])

proc toRope(i: BiggestInt): PRope = result = toRope($i)
#proc toRopeF*(r: BiggestFloat): PRope = result = toRope($r)
proc app(a: var PRope, b: PRope) = a = con(a, b)
proc app(a: var PRope, b: string) = a = con(a, b)
proc prepend(a: var PRope, b: PRope) = a = con(b, a)

proc writeRope*(f: TFile, c: PRope) = 
  var stack = @[c]
  while len(stack) > 0: 
    var it = pop(stack)
    while it.data == nil: 
      add(stack, it.right)
      it = it.left
      assert(it != nil)
    assert(it.data != nil)
    write(f, it.data)

proc WriteRope(head: PRope, filename: string) = 
  var f: tfile                # we use a textfile for automatic buffer handling
  if open(f, filename, fmWrite): 
    if head != nil: WriteRope(f, head)
    close(f)
  else: 
    rawMessage(errCannotOpenFile, filename)

proc ropef(frmt: TFormatStr, args: openarray[PRope]): PRope = 
  var i = 0
  var length = len(frmt)
  result = nil
  var num = 0
  while i <= length - 1: 
    if frmt[i] == '$': 
      inc(i)                  # skip '$'
      case frmt[i]
      of '$': 
        app(result, "$")
        inc(i)
      of '#': 
        inc(i)
        app(result, args[num])
        inc(num)
      of '0'..'9': 
        var j = 0
        while true: 
          j = (j * 10) + Ord(frmt[i]) - ord('0')
          inc(i)
          if (i > length + 0 - 1) or not (frmt[i] in {'0'..'9'}): break 
        num = j
        if j > high(args) + 1: 
          internalError("ropes: invalid format string $" & $(j))
        app(result, args[j - 1])
      of 'N', 'n': 
        app(result, tnl)
        inc(i)
      else: InternalError("ropes: invalid format string $" & frmt[i])
    var start = i
    while (i <= length - 1): 
      if (frmt[i] != '$'): inc(i)
      else: break 
    if i - 1 >= start: 
      app(result, substr(frmt, start, i - 1))
  assert(RopeInvariant(result))

proc appf(c: var PRope, frmt: TFormatStr, args: openarray[PRope]) = 
  app(c, ropef(frmt, args))

const 
  bufSize = 1024              # 1 KB is reasonable

proc auxRopeEqualsFile(r: PRope, bin: var tfile, buf: Pointer): bool = 
  if (r.data != nil): 
    if r.length > bufSize: 
      internalError("ropes: token too long")
    var readBytes = readBuffer(bin, buf, r.length)
    result = (readBytes == r.length) and
        equalMem(buf, addr(r.data[0]), r.length) # BUGFIX
  else: 
    result = auxRopeEqualsFile(r.left, bin, buf)
    if result: result = auxRopeEqualsFile(r.right, bin, buf)
  
proc RopeEqualsFile(r: PRope, f: string): bool = 
  var bin: tfile
  result = open(bin, f)
  if not result: 
    return                    # not equal if file does not exist
  var buf = alloc(BufSize)
  result = auxRopeEqualsFile(r, bin, buf)
  if result: 
    result = readBuffer(bin, buf, bufSize) == 0 # really at the end of file?
  dealloc(buf)
  close(bin)

proc crcFromRopeAux(r: PRope, startVal: TCrc32): TCrc32 = 
  if r.data != nil: 
    result = startVal
    for i in countup(0, len(r.data) - 1): 
      result = updateCrc32(r.data[i], result)
  else: 
    result = crcFromRopeAux(r.left, startVal)
    result = crcFromRopeAux(r.right, result)

proc newCrcFromRopeAux(r: PRope, startVal: TCrc32): TCrc32 = 
  var stack: TRopeSeq = @[r]
  result = startVal
  while len(stack) > 0: 
    var it = pop(stack)
    while it.data == nil: 
      add(stack, it.right)
      it = it.left
    assert(it.data != nil)
    var i = 0
    var L = len(it.data)
    while i < L: 
      result = updateCrc32(it.data[i], result)
      inc(i)

proc crcFromRope(r: PRope): TCrc32 = 
  result = newCrcFromRopeAux(r, initCrc32)

proc writeRopeIfNotEqual(r: PRope, filename: string): bool = 
  # returns true if overwritten
  var c: TCrc32
  c = crcFromFile(filename)
  if c != crcFromRope(r): 
    writeRope(r, filename)
    result = true
  else: 
    result = false
  
new(N) # init dummy node for splay algorithm