diff options
Diffstat (limited to 'compiler/ropes.nim')
-rw-r--r-- | compiler/ropes.nim | 298 |
1 files changed, 50 insertions, 248 deletions
diff --git a/compiler/ropes.nim b/compiler/ropes.nim index 358ce8a53..e0d5aa0d3 100644 --- a/compiler/ropes.nim +++ b/compiler/ropes.nim @@ -7,256 +7,78 @@ # distribution, for details about the copyright. # -# Ropes for the C code generator -# -# Ropes are a data structure that represents a very long string -# efficiently; especially concatenation is done in O(1) instead of O(N). -# Ropes make use a lazy evaluation: They are essentially concatenation -# trees that are only flattened when converting to a native Nim -# string or when written to disk. The empty string is represented by a -# nil pointer. -# A little picture makes everything clear: -# -# "this string" & " is internally " & "represented as" -# -# con -- inner nodes do not contain raw data -# / \ -# / \ -# / \ -# con "represented as" -# / \ -# / \ -# / \ -# / \ -# / \ -#"this string" " is internally " -# -# Note that this is the same as: -# "this string" & (" is internally " & "represented as") -# -# con -# / \ -# / \ -# / \ -# "this string" con -# / \ -# / \ -# / \ -# / \ -# / \ -#" is internally " "represented as" -# -# The 'con' operator is associative! This does not matter however for -# the algorithms we use for ropes. -# -# Note that the left and right pointers are not needed for leaves. -# Leaves have relatively high memory overhead (~30 bytes on a 32 -# bit machines) and we produce many of them. This is why we cache and -# share leaves across different rope trees. -# To cache them they are inserted in a `cache` array. +# Ropes for the C code generator. Ropes are mapped to `string` directly nowadays. + +from pathutils import AbsoluteFile -import - platform, hashes +when defined(nimPreviewSlimSystem): + import std/[assertions, syncio, formatfloat] type FormatStr* = string # later we may change it to CString for better # performance of the code generator (assignments # copy the format strings # though it is not necessary) - Rope* = ref RopeObj - RopeObj*{.acyclic.} = object of RootObj # the empty rope is represented - # by nil to safe space - left*, right*: Rope - length*: int - data*: string # != nil if a leaf - - RopeSeq* = seq[Rope] - - RopesError* = enum - rCannotOpenFile - rInvalidFormatStr - -# implementation + Rope* = string -var errorHandler*: proc(err: RopesError, msg: string, useWarning = false) - # avoid dependency on msgs.nim +proc newRopeAppender*(cap = 80): string {.inline.} = + result = newStringOfCap(cap) -proc len*(a: Rope): int = - ## the rope's length - if a == nil: result = 0 - else: result = a.length +proc freeze*(r: Rope) {.inline.} = discard -proc newRope(data: string = nil): Rope = - new(result) - if data != nil: - result.length = len(data) - result.data = data +proc resetRopeCache* = discard -proc newMutableRope*(capacity = 30): Rope = - ## creates a new rope that supports direct modifications of the rope's - ## 'data' and 'length' fields. - new(result) - result.data = newStringOfCap(capacity) - -proc freezeMutableRope*(r: Rope) {.inline.} = - r.length = r.data.len - -var - cache: array[0..2048*2 - 1, Rope] - -proc resetRopeCache* = - for i in low(cache)..high(cache): - cache[i] = nil - -proc ropeInvariant(r: Rope): bool = - if r == nil: - result = true - else: - result = true # - # if r.data <> snil then - # result := true - # else begin - # result := (r.left <> nil) and (r.right <> nil); - # if result then result := ropeInvariant(r.left); - # if result then result := ropeInvariant(r.right); - # end - -var gCacheTries* = 0 -var gCacheMisses* = 0 -var gCacheIntTries* = 0 - -proc insertInCache(s: string): Rope = - inc gCacheTries - var h = hash(s) and high(cache) - result = cache[h] - if isNil(result) or result.data != s: - inc gCacheMisses - result = newRope(s) - cache[h] = result - -proc rope*(s: string): Rope = - ## Converts a string to a rope. - if s.len == 0: - result = nil - else: - result = insertInCache(s) - assert(ropeInvariant(result)) +template rope*(s: string): string = s proc rope*(i: BiggestInt): Rope = ## Converts an int to a rope. - inc gCacheIntTries result = rope($i) proc rope*(f: BiggestFloat): Rope = ## Converts a float to a rope. result = rope($f) -proc `&`*(a, b: Rope): Rope = - if a == nil: - result = b - elif b == nil: - result = a - else: - result = newRope() - result.length = a.length + b.length - result.left = a - result.right = b - -proc `&`*(a: Rope, b: string): Rope = - ## the concatenation operator for ropes. - result = a & rope(b) - -proc `&`*(a: string, b: Rope): Rope = - ## the concatenation operator for ropes. - result = rope(a) & b - -proc `&`*(a: openArray[Rope]): Rope = - ## the concatenation operator for an openarray of ropes. - for i in countup(0, high(a)): result = result & a[i] - -proc add*(a: var Rope, b: Rope) = - ## adds `b` to the rope `a`. - a = a & b - -proc add*(a: var Rope, b: string) = - ## adds `b` to the rope `a`. - a = a & b - -iterator leaves*(r: Rope): string = - ## iterates over any leaf string in the rope `r`. - if r != nil: - var stack = @[r] - while stack.len > 0: - var it = stack.pop - while isNil(it.data): - stack.add(it.right) - it = it.left - assert(it != nil) - assert(it.data != nil) - yield it.data - -iterator items*(r: Rope): char = - ## iterates over any character in the rope `r`. - for s in leaves(r): - for c in items(s): yield c - proc writeRope*(f: File, r: Rope) = ## writes a rope to a file. - for s in leaves(r): write(f, s) + write(f, r) -proc writeRope*(head: Rope, filename: string, useWarning = false) = - var f: File - if open(f, filename, fmWrite): - if head != nil: writeRope(f, head) +proc writeRope*(head: Rope, filename: AbsoluteFile): bool = + var f: File = default(File) + if open(f, filename.string, fmWrite): + writeRope(f, head) close(f) + result = true else: - errorHandler(rCannotOpenFile, filename, useWarning) - -proc `$`*(r: Rope): string = - ## converts a rope back to a string. - result = newString(r.len) - setLen(result, 0) - for s in leaves(r): add(result, s) - -proc ropeConcat*(a: varargs[Rope]): Rope = - # not overloaded version of concat to speed-up `rfmt` a little bit - for i in countup(0, high(a)): result = result & a[i] + result = false -proc prepend*(a: var Rope, b: Rope) = a = b & a proc prepend*(a: var Rope, b: string) = a = b & a -var - rnl* = tnl.newRope - softRnl* = tnl.newRope - noRnl* = "".newRope - -proc `%`*(frmt: FormatStr, args: openArray[Rope]): Rope = +proc runtimeFormat*(frmt: FormatStr, args: openArray[Rope]): Rope = var i = 0 - var length = len(frmt) - result = nil + result = newRopeAppender() var num = 0 - while i < length: + while i < frmt.len: if frmt[i] == '$': inc(i) # skip '$' case frmt[i] of '$': - add(result, "$") + result.add("$") inc(i) of '#': inc(i) - add(result, args[num]) + result.add(args[num]) inc(num) of '0'..'9': var j = 0 while true: j = j * 10 + ord(frmt[i]) - ord('0') inc(i) - if frmt[i] notin {'0'..'9'}: break + if i >= frmt.len or frmt[i] notin {'0'..'9'}: break num = j if j > high(args) + 1: - errorHandler(rInvalidFormatStr, $(j)) + raiseAssert "invalid format string: " & frmt else: - add(result, args[j-1]) + result.add(args[j-1]) of '{': inc(i) var j = 0 @@ -265,60 +87,48 @@ proc `%`*(frmt: FormatStr, args: openArray[Rope]): Rope = inc(i) num = j if frmt[i] == '}': inc(i) - else: errorHandler(rInvalidFormatStr, $(frmt[i])) + else: + raiseAssert "invalid format string: " & frmt if j > high(args) + 1: - errorHandler(rInvalidFormatStr, $(j)) + raiseAssert "invalid format string: " & frmt else: - add(result, args[j-1]) + result.add(args[j-1]) of 'n': - add(result, softRnl) + result.add("\n") inc(i) of 'N': - add(result, rnl) + result.add("\n") inc(i) else: - errorHandler(rInvalidFormatStr, $(frmt[i])) - var start = i - while i < length: - if frmt[i] != '$': inc(i) - else: break - if i - 1 >= start: - add(result, substr(frmt, start, i - 1)) - assert(ropeInvariant(result)) + raiseAssert "invalid format string: " & frmt + else: + result.add(frmt[i]) + inc(i) -proc addf*(c: var Rope, frmt: FormatStr, args: openArray[Rope]) = - ## shortcut for ``add(c, frmt % args)``. - add(c, frmt % args) +proc `%`*(frmt: static[FormatStr], args: openArray[Rope]): Rope = + runtimeFormat(frmt, args) -when true: - template `~`*(r: string): Rope = r % [] -else: - {.push stack_trace: off, line_trace: off.} - proc `~`*(r: static[string]): Rope = - # this is the new optimized "to rope" operator - # the mnemonic is that `~` looks a bit like a rope :) - var r {.global.} = r % [] - return r - {.pop.} +template addf*(c: var Rope, frmt: FormatStr, args: openArray[Rope]) = + ## shortcut for ``add(c, frmt % args)``. + c.add(frmt % args) const bufSize = 1024 # 1 KB is reasonable -proc equalsFile*(r: Rope, f: File): bool = +proc equalsFile*(s: Rope, f: File): bool = ## returns true if the contents of the file `f` equal `r`. var - buf: array[bufSize, char] + buf: array[bufSize, char] = default(array[bufSize, char]) bpos = buf.len blen = buf.len btotal = 0 rtotal = 0 - for s in leaves(r): + when true: var spos = 0 - let slen = s.len - rtotal += slen - while spos < slen: + rtotal += s.len + while spos < s.len: if bpos == blen: # Read more data bpos = 0 @@ -327,7 +137,7 @@ proc equalsFile*(r: Rope, f: File): bool = if blen == 0: # no more data in file result = false return - let n = min(blen - bpos, slen - spos) + let n = min(blen - bpos, s.len - spos) # TODO There's gotta be a better way of comparing here... if not equalMem(addr(buf[bpos]), cast[pointer](cast[int](cstring(s))+spos), n): result = false @@ -338,19 +148,11 @@ proc equalsFile*(r: Rope, f: File): bool = result = readBuffer(f, addr(buf[0]), 1) == 0 and btotal == rtotal # check that we've read all -proc equalsFile*(r: Rope, filename: string): bool = +proc equalsFile*(r: Rope, filename: AbsoluteFile): bool = ## returns true if the contents of the file `f` equal `r`. If `f` does not ## exist, false is returned. - var f: File - result = open(f, filename) + var f: File = default(File) + result = open(f, filename.string) if result: result = equalsFile(r, f) close(f) - -proc writeRopeIfNotEqual*(r: Rope, filename: string): bool = - # returns true if overwritten - if not equalsFile(r, filename): - writeRope(r, filename) - result = true - else: - result = false |