diff options
author | Andreas Rumpf <andreas@andreas-desktop> | 2009-12-07 01:23:19 +0100 |
---|---|---|
committer | Andreas Rumpf <andreas@andreas-desktop> | 2009-12-07 01:23:19 +0100 |
commit | e254741541b0389dfb0b675116c76a6a144b90b7 (patch) | |
tree | c6769c04b3bdc6a77bcc5075b0df011252e3702b /rod/ropes.nim | |
parent | 90119066adf6a9a2e8d779d4955637c6dd99aba3 (diff) | |
download | Nim-e254741541b0389dfb0b675116c76a6a144b90b7.tar.gz |
version 0.8.5: added Nimrod version of the compiler
Diffstat (limited to 'rod/ropes.nim')
-rwxr-xr-x | rod/ropes.nim | 497 |
1 files changed, 497 insertions, 0 deletions
diff --git a/rod/ropes.nim b/rod/ropes.nim new file mode 100755 index 000000000..f9b1841ee --- /dev/null +++ b/rod/ropes.nim @@ -0,0 +1,497 @@ +# +# +# The Nimrod Compiler +# (c) Copyright 2009 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +# Ropes for the C code generator +# +# Ropes are a data structure that represents a very long string +# efficiently; especially concatenation is done in O(1) instead of O(N). +# Ropes make use a lazy evaluation: They are essentially concatenation +# trees that are only flattened when converting to a native Nimrod +# string or when written to disk. The empty string is represented by a +# nil pointer. +# A little picture makes everything clear: +# +# "this string" & " is internally " & "represented as" +# +# con -- inner nodes do not contain raw data +# / \ +# / \ +# / \ +# con "represented as" +# / \ +# / \ +# / \ +# / \ +# / \ +#"this string" " is internally " +# +# Note that this is the same as: +# "this string" & (" is internally " & "represented as") +# +# con +# / \ +# / \ +# / \ +# "this string" con +# / \ +# / \ +# / \ +# / \ +# / \ +#" is internally " "represented as" +# +# The 'con' operator is associative! This does not matter however for +# the algorithms we use for ropes. +# +# Note that the left and right pointers are not needed for leafs. +# Leafs have relatively high memory overhead (~30 bytes on a 32 +# bit machines) and we produce many of them. This is why we cache and +# share leafs accross different rope trees. +# To cache them they are inserted in another tree, a splay tree for best +# performance. But for the caching tree we use the leafs' left and right +# pointers. +# + +import + msgs, strutils, platform, nhashes, crc + +const + CacheLeafs* = true + countCacheMisses* = False # see what our little optimization gives + +type + TFormatStr* = string # later we may change it to CString for better + # performance of the code generator (assignments copy the format strings + # though it is not necessary) + PRope* = ref TRope + TRope*{.acyclic.} = object of TObject # the empty rope is represented by nil to safe space + left*, right*: PRope + length*: int + data*: string # != nil if a leaf + + TRopeSeq* = seq[PRope] + +proc con*(a, b: PRope): PRope +proc con*(a: PRope, b: string): PRope +proc con*(a: string, b: PRope): PRope +proc con*(a: openarray[PRope]): PRope +proc app*(a: var PRope, b: PRope) +proc app*(a: var PRope, b: string) +proc prepend*(a: var PRope, b: PRope) +proc toRope*(s: string): PRope +proc toRopeF*(r: BiggestFloat): PRope +proc toRope*(i: BiggestInt): PRope +proc ropeLen*(a: PRope): int +proc WriteRope*(head: PRope, filename: string) +proc writeRopeIfNotEqual*(r: PRope, filename: string): bool +proc ropeToStr*(p: PRope): string +proc ropef*(frmt: TFormatStr, args: openarray[PRope]): PRope +proc appf*(c: var PRope, frmt: TFormatStr, args: openarray[PRope]) +proc getCacheStats*(): string +proc RopeEqualsFile*(r: PRope, f: string): bool + # returns true if the rope r is the same as the contents of file f +proc RopeInvariant*(r: PRope): bool + # exported for debugging +# implementation + +proc ropeLen(a: PRope): int = + if a == nil: result = 0 + else: result = a.length + +proc newRope(data: string = nil): PRope = + new(result) + if data != nil: + result.length = len(data) + result.data = data + +var + cache: PRope # the root of the cache tree + misses, hits: int + N: PRope # dummy rope needed for splay algorithm + +proc getCacheStats(): string = + if hits + misses != 0: + result = "Misses: " & $(misses) & " total: " & $(hits + misses) & " quot: " & + $(toFloat(misses) / toFloat(hits + misses)) + else: + result = "" + +proc splay(s: string, tree: PRope, cmpres: var int): PRope = + var + le, r, y, t: PRope + c: int + t = tree + N.left = nil + N.right = nil # reset to nil + le = N + r = N + while true: + c = cmp(s, t.data) + if c < 0: + if (t.left != nil) and (s < t.left.data): + y = t.left + t.left = y.right + y.right = t + t = y + if t.left == nil: break + r.left = t + r = t + t = t.left + elif c > 0: + if (t.right != nil) and (s > t.right.data): + y = t.right + t.right = y.left + y.left = t + t = y + if t.right == nil: break + le.right = t + le = t + t = t.right + else: + break + cmpres = c + le.right = t.left + r.left = t.right + t.left = N.right + t.right = N.left + result = t + +proc insertInCache(s: string, tree: PRope): PRope = + # Insert i into the tree t, unless it's already there. + # Return a pointer to the resulting tree. + var + t: PRope + cmp: int + t = tree + if t == nil: + result = newRope(s) + if countCacheMisses: inc(misses) + return + t = splay(s, t, cmp) + if cmp == 0: + # We get here if it's already in the Tree + # Don't add it again + result = t + if countCacheMisses: inc(hits) + else: + if countCacheMisses: inc(misses) + result = newRope(s) + if cmp < 0: + result.left = t.left + result.right = t + t.left = nil + else: + # i > t.item: + result.right = t.right + result.left = t + t.right = nil + +proc RopeInvariant(r: PRope): bool = + if r == nil: + result = true + else: + result = true # + # if r.data <> snil then + # result := true + # else begin + # result := (r.left <> nil) and (r.right <> nil); + # if result then result := ropeInvariant(r.left); + # if result then result := ropeInvariant(r.right); + # end + +proc toRope(s: string): PRope = + if s == "": + result = nil + elif cacheLeafs: + result = insertInCache(s, cache) + cache = result + else: + result = newRope(s) + assert(RopeInvariant(result)) + +proc RopeSeqInsert(rs: var TRopeSeq, r: PRope, at: Natural) = + var length: int + length = len(rs) + if at > length: + setlen(rs, at + 1) + else: + setlen(rs, length + 1) # move old rope elements: + for i in countdown(length, at + 1): + rs[i] = rs[i - 1] # this is correct, I used pen and paper to validate it + rs[at] = r + +proc con(a, b: PRope): PRope = + assert(RopeInvariant(a)) + assert(RopeInvariant(b)) + if a == nil: + result = b + elif b == nil: + result = a + else: + result = newRope() + result.length = a.length + b.length + result.left = a + result.right = b + assert(RopeInvariant(result)) + +proc con(a: PRope, b: string): PRope = + var r: PRope + assert(RopeInvariant(a)) + if b == "": + result = a + else: + r = toRope(b) + if a == nil: + result = r + else: + result = newRope() + result.length = a.length + r.length + result.left = a + result.right = r + assert(RopeInvariant(result)) + +proc con(a: string, b: PRope): PRope = + var r: PRope + assert(RopeInvariant(b)) + if a == "": + result = b + else: + r = toRope(a) + if b == nil: + result = r + else: + result = newRope() + result.length = b.length + r.length + result.left = r + result.right = b + assert(RopeInvariant(result)) + +proc con(a: openarray[PRope]): PRope = + result = nil + for i in countup(0, high(a)): result = con(result, a[i]) + assert(RopeInvariant(result)) + +proc toRope(i: BiggestInt): PRope = + result = toRope($(i)) + +proc toRopeF(r: BiggestFloat): PRope = + result = toRope($(r)) + +proc app(a: var PRope, b: PRope) = + a = con(a, b) + assert(RopeInvariant(a)) + +proc app(a: var PRope, b: string) = + a = con(a, b) + assert(RopeInvariant(a)) + +proc prepend(a: var PRope, b: PRope) = + a = con(b, a) + assert(RopeInvariant(a)) + +proc InitStack(stack: var TRopeSeq) = + stack = @ [] + +proc push(stack: var TRopeSeq, r: PRope) = + var length: int + length = len(stack) + setlen(stack, length + 1) + stack[length] = r + +proc pop(stack: var TRopeSeq): PRope = + var length: int + length = len(stack) + result = stack[length - 1] + setlen(stack, length - 1) + +proc WriteRopeRec(f: var tfile, c: PRope) = + assert(RopeInvariant(c)) + if c == nil: return + if (c.data != nil): + write(f, c.data) + else: + writeRopeRec(f, c.left) + writeRopeRec(f, c.right) + +proc newWriteRopeRec(f: var tfile, c: PRope) = + var + stack: TRopeSeq + it: PRope + assert(RopeInvariant(c)) + initStack(stack) + push(stack, c) + while len(stack) > 0: + it = pop(stack) + while it.data == nil: + push(stack, it.right) + it = it.left + assert(it != nil) + assert(it.data != nil) + write(f, it.data) + +proc WriteRope(head: PRope, filename: string) = + var f: tfile # we use a textfile for automatic buffer handling + if open(f, filename, fmWrite): + if head != nil: newWriteRopeRec(f, head) + close(f) + else: + rawMessage(errCannotOpenFile, filename) + +proc recRopeToStr(result: var string, resultLen: var int, p: PRope) = + if p == nil: + return # do not add to result + if (p.data == nil): + recRopeToStr(result, resultLen, p.left) + recRopeToStr(result, resultLen, p.right) + else: + CopyMem(addr(result[resultLen + 0]), addr(p.data[0]), p.length) + Inc(resultLen, p.length) + assert(resultLen <= len(result)) + +proc newRecRopeToStr(result: var string, resultLen: var int, r: PRope) = + var + stack: TRopeSeq + it: PRope + initStack(stack) + push(stack, r) + while len(stack) > 0: + it = pop(stack) + while it.data == nil: + push(stack, it.right) + it = it.left + assert(it.data != nil) + CopyMem(addr(result[resultLen + 0]), addr(it.data[0]), it.length) + Inc(resultLen, it.length) + assert(resultLen <= len(result)) + +proc ropeToStr(p: PRope): string = + var resultLen: int + assert(RopeInvariant(p)) + if p == nil: + result = "" + else: + result = newString(p.length) + resultLen = 0 + newRecRopeToStr(result, resultLen, p) + +proc ropef(frmt: TFormatStr, args: openarray[PRope]): PRope = + var i, j, length, start, num: int + i = 0 + length = len(frmt) + result = nil + num = 0 + while i <= length + 0 - 1: + if frmt[i] == '$': + inc(i) # skip '$' + case frmt[i] + of '$': + app(result, "$") + inc(i) + of '#': + inc(i) + app(result, args[num]) + inc(num) + of '0'..'9': + j = 0 + while true: + j = (j * 10) + Ord(frmt[i]) - ord('0') + inc(i) + if (i > length + 0 - 1) or not (frmt[i] in {'0'..'9'}): break + num = j + if j > high(args) + 1: + internalError("ropes: invalid format string $" & $(j)) + app(result, args[j - 1]) + of 'N', 'n': + app(result, tnl) + inc(i) + else: InternalError("ropes: invalid format string $" & frmt[i]) + start = i + while (i <= length + 0 - 1): + if (frmt[i] != '$'): inc(i) + else: break + if i - 1 >= start: + app(result, copy(frmt, start, i - 1)) + assert(RopeInvariant(result)) + +proc appf(c: var PRope, frmt: TFormatStr, args: openarray[PRope]) = + app(c, ropef(frmt, args)) + +const + bufSize = 1024 # 1 KB is reasonable + +proc auxRopeEqualsFile(r: PRope, bin: var tfile, buf: Pointer): bool = + var readBytes: int + if (r.data != nil): + if r.length > bufSize: + internalError("ropes: token too long") + readBytes = readBuffer(bin, buf, r.length) + result = (readBytes == r.length) and + equalMem(buf, addr(r.data[0]), r.length) # BUGFIX + else: + result = auxRopeEqualsFile(r.left, bin, buf) + if result: result = auxRopeEqualsFile(r.right, bin, buf) + +proc RopeEqualsFile(r: PRope, f: string): bool = + var + bin: tfile + buf: Pointer + result = open(bin, f) + if not result: + return # not equal if file does not exist + buf = alloc(BufSize) + result = auxRopeEqualsFile(r, bin, buf) + if result: + result = readBuffer(bin, buf, bufSize) == 0 # really at the end of file? + dealloc(buf) + close(bin) + +proc crcFromRopeAux(r: PRope, startVal: TCrc32): TCrc32 = + if r.data != nil: + result = startVal + for i in countup(0, len(r.data) + 0 - 1): + result = updateCrc32(r.data[i], result) + else: + result = crcFromRopeAux(r.left, startVal) + result = crcFromRopeAux(r.right, result) + +proc newCrcFromRopeAux(r: PRope, startVal: TCrc32): TCrc32 = + var + stack: TRopeSeq + it: PRope + L, i: int + initStack(stack) + push(stack, r) + result = startVal + while len(stack) > 0: + it = pop(stack) + while it.data == nil: + push(stack, it.right) + it = it.left + assert(it.data != nil) + i = 0 + L = len(it.data) + 0 + while i < L: + result = updateCrc32(it.data[i], result) + inc(i) + +proc crcFromRope(r: PRope): TCrc32 = + result = newCrcFromRopeAux(r, initCrc32) + +proc writeRopeIfNotEqual(r: PRope, filename: string): bool = + # returns true if overwritten + var c: TCrc32 + c = crcFromFile(filename) + if c != crcFromRope(r): + writeRope(r, filename) + result = true + else: + result = false + +new(N) +# init dummy node for splay algorithm \ No newline at end of file |