# # # The Nim Compiler # (c) Copyright 2012 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # # Ropes for the C code generator # # Ropes are a data structure that represents a very long string # efficiently; especially concatenation is done in O(1) instead of O(N). # Ropes make use a lazy evaluation: They are essentially concatenation # trees that are only flattened when converting to a native Nim # string or when written to disk. The empty string is represented by a # nil pointer. # A little picture makes everything clear: # # "this string" & " is internally " & "represented as" # # con -- inner nodes do not contain raw data # / \ # / \ # / \ # con "represented as" # / \ # / \ # / \ # / \ # / \ #"this string" " is internally " # # Note that this is the same as: # "this string" & (" is internally " & "represented as") # # con # / \ # / \ # / \ # "this string" con # / \ # / \ # / \ # / \ # / \ #" is internally " "represented as" # # The 'con' operator is associative! This does not matter however for # the algorithms we use for ropes. # # Note that the left and right pointers are not needed for leaves. # Leaves have relatively high memory overhead (~30 bytes on a 32 # bit machines) and we produce many of them. This is why we cache and # share leaves across different rope trees. # To cache them they are inserted in a `cache` array. import platform, hashes type FormatStr* = string # later we may change it to CString for better # performance of the code generator (assignments # copy the format strings # though it is not necessary) Rope* = ref RopeObj RopeObj*{.acyclic.} = object of RootObj # the empty rope is represented # by nil to safe space left*, right*: Rope length*: int data*: string # != nil if a leaf RopeSeq* = seq[Rope] RopesError* = enum rCannotOpenFile rInvalidFormatStr # implementation var errorHandler*: proc(err: RopesError, msg: string, useWarning = false) # avoid dependency on msgs.nim proc len*(a: Rope): int = ## the rope's length if a == nil: result = 0 else: result = a.length proc newRope(data: string = nil): Rope = new(result) if data != nil: result.length = len(data) result.data = data proc newMutableRope*(capacity = 30): Rope = ## creates a new rope that supports direct modifications of the rope's ## 'data' and 'length' fields. new(result) result.data = newStringOfCap(capacity) proc freezeMutableRope*(r: Rope) {.inline.} = r.length = r.data.len var cache: array[0..2048*2 - 1, Rope] proc resetRopeCache* = for i in low(cache)..high(cache): cache[i] = nil proc ropeInvariant(r: Rope): bool = if r == nil: result = true else: result = true # # if r.data <> snil then # result := true # else begin # result := (r.left <> nil) and (r.right <> nil); # if result then result := ropeInvariant(r.left); # if result then result := ropeInvariant(r.right); # end var gCacheTries* = 0 var gCacheMisses* = 0 var gCacheIntTries* = 0 proc insertInCache(s: string): Rope = inc gCacheTries var h = hash(s) and high(cache) result = cache[h] if isNil(result) or result.data != s: inc gCacheMisses result = newRope(s) cache[h] = result proc rope*(s: string): Rope = ## Converts a string to a rope. if s.len == 0: result = nil else: result = insertInCache(s) assert(ropeInvariant(result)) proc rope*(i: BiggestInt): Rope = ## Converts an int to a rope. inc gCacheIntTries result = rope($i) proc rope*(f: BiggestFloat): Rope = ## Converts a float to a rope. result = rope($f) proc `&`*(a, b: Rope): Rope = if a == nil: result = b elif b == nil: result = a else: result = newRope() result.length = a.length + b.length result.left = a result.right = b proc `&`*(a: Rope, b: string): Rope = ## the concatenation operator for ropes. result = a & rope(b) proc `&`*(a: string, b: Rope): Rope = ## the concatenation operator for ropes. result = rope(a) & b proc `&`*(a: openArray[Rope]): Rope = ## the concatenation operator for an openarray of ropes. for i in countup(0, high(a)): result = result & a[i] proc add*(a: var Rope, b: Rope) = ## adds `b` to the rope `a`. a = a & b proc add*(a: var Rope, b: string) = ## adds `b` to the rope `a`. a = a & b iterator leaves*(r: Rope): string = ## iterates over any leaf string in the rope `r`. if r != nil: var stack = @[r] while stack.len > 0: var it = stack.pop while isNil(it.data): stack.add(it.right) it = it.left assert(it != nil) assert(it.data != nil) yield it.data iterator items*(r: Rope): char = ## iterates over any character in the rope `r`. for s in leaves(r): for c in items(s): yield c proc writeRope*(f: File, r: Rope) = ## writes a rope to a file. for s in leaves(r): write(f, s) proc writeRope*(head: Rope, filename: string, useWarning = false) = var f: File if open(f, filename, fmWrite): if head != nil: writeRope(f, head) close(f) else: errorHandler(rCannotOpenFile, filename, useWarning) proc `$`*(r: Rope): string = ## converts a rope back to a string. result = newString(r.len) setLen(result, 0) for s in leaves(r): add(result, s) proc ropeConcat*(a: varargs[Rope]): Rope = # not overloaded version of concat to speed-up `rfmt` a little bit for i in countup(0, high(a)): result = result & a[i] proc prepend*(a: var Rope, b: Rope) = a = b & a proc prepend*(a: var Rope, b: string) = a = b & a var rnl* = tnl.newRope softRnl* = tnl.newRope noRnl* = "".newRope proc `%`*(frmt: FormatStr, args: openArray[Rope]): Rope = var i = 0 var length = len(frmt) result = nil var num = 0 while i < length: if frmt[i] == '$': inc(i) # skip '$' case frmt[i] of '$': add(result, "$") inc(i) of '#': inc(i) add(result, args[num]) inc(num) of '0'..'9': var j = 0 while true: j = j * 10 + ord(frmt[i]) - ord('0') inc(i) if frmt[i] notin {'0'..'9'}: break num = j if j > high(args) + 1: errorHandler(rInvalidFormatStr, $(j)) else: add(result, args[j-1]) of '{': inc(i) var j = 0 while frmt[i] in {'0'..'9'}: j = j * 10 + ord(frmt[i]) - ord('0') inc(i) num = j if frmt[i] == '}': inc(i) else: errorHandler(rInvalidFormatStr, $(frmt[i])) if j > high(args) + 1: errorHandler(rInvalidFormatStr, $(j)) else: add(result, args[j-1]) of 'n': add(result, softRnl) inc(i) of 'N': add(result, rnl) inc(i) else: errorHandler(rInvalidFormatStr, $(frmt[i])) var start = i while i < length: if frmt[i] != '$': inc(i) else: break if i - 1 >= start: add(result, substr(frmt, start, i - 1)) assert(ropeInvariant(result)) proc addf*(c: var Rope, frmt: FormatStr, args: openArray[Rope]) = ## shortcut for ``add(c, frmt % args)``. add(c, frmt % args) when true: template `~`*(r: string): Rope = r % [] else: {.push stack_trace: off, line_trace: off.} proc `~`*(r: static[string]): Rope = # this is the new optimized "to rope" operator # the mnemonic is that `~` looks a bit like a rope :) var r {.global.} = r % [] return r {.pop.} const bufSize = 1024 # 1 KB is reasonable proc equalsFile*(r: Rope, f: File): bool = ## returns true if the contents of the file `f` equal `r`. var buf: array[bufSize, char] bpos = buf.len blen = buf.len btotal = 0 rtotal = 0 for s in leaves(r): var spos = 0 let slen = s.len rtotal += slen while spos < slen: if bpos == blen: # Read more data bpos = 0 blen = readBuffer(f, addr(buf[0]), buf.len) btotal += blen if blen == 0: # no more data in file result = false return let n = min(blen - bpos, slen - spos) # TODO There's gotta be a better way of comparing here... if not equalMem(addr(buf[bpos]), cast[pointer](cast[int](cstring(s))+spos), n): result = false return spos += n bpos += n result = readBuffer(f, addr(buf[0]), 1) == 0 and btotal == rtotal # check that we've read all proc equalsFile*(r: Rope, filename: string): bool = ## returns true if the contents of the file `f` equal `r`. If `f` does not ## exist, false is returned. var f: File result = open(f, filename) if result: result = equalsFile(r, f) close(f) proc writeRopeIfNotEqual*(r: Rope, filename: string): bool = # returns true if overwritten if not equalsFile(r, filename): writeRope(r, filename) result = true else: result = false