diff options
Diffstat (limited to 'lib/pure/ropes.nim')
-rw-r--r--[-rwxr-xr-x] | lib/pure/ropes.nim | 436 |
1 files changed, 235 insertions, 201 deletions
diff --git a/lib/pure/ropes.nim b/lib/pure/ropes.nim index 4a6c3f530..8750aca87 100755..100644 --- a/lib/pure/ropes.nim +++ b/lib/pure/ropes.nim @@ -1,6 +1,6 @@ # # -# Nimrod's Runtime Library +# Nim's Runtime Library # (c) Copyright 2010 Andreas Rumpf # # See the file "copying.txt", included in this @@ -8,19 +8,21 @@ # ## This module contains support for a `rope`:idx: data type. -## Ropes can represent very long strings efficiently; especially concatenation +## Ropes can represent very long strings efficiently; in particular, concatenation ## is done in O(1) instead of O(n). They are essentially concatenation -## trees that are only flattened when converting to a native Nimrod -## string. The empty string is represented by ``nil``. Ropes are immutable and +## trees that are only flattened when converting to a native Nim +## string. The empty string is represented by `nil`. Ropes are immutable and ## subtrees can be shared without copying. ## Leaves can be cached for better memory efficiency at the cost of ## runtime efficiency. -include "system/inclrtl" +include system/inclrtl +import std/streams -{.deadCodeElim: on.} +when defined(nimPreviewSlimSystem): + import std/[syncio, formatfloat, assertions] -{.push debugger:off .} # the user does not want to trace a part +{.push debugger: off.} # the user does not want to trace a part # of the standard library! const @@ -29,72 +31,69 @@ const var cacheEnabled = false -type - PRope* = ref TRope ## empty rope is represented by nil - TRope {.acyclic, final, pure.} = object - left, right: PRope +type + Rope* {.acyclic.} = ref object + ## A rope data type. The empty rope is represented by `nil`. + left, right: Rope length: int - data: string # != nil if a leaf - -proc isConc(r: PRope): bool {.inline.} = return isNil(r.data) + data: string # not empty if a leaf # Note that the left and right pointers are not needed for leafs. # Leaves have relatively high memory overhead (~30 bytes on a 32 # bit machine) and we produce many of them. This is why we cache and -# share leafs accross different rope trees. +# share leafs across different rope trees. # To cache them they are inserted in another tree, a splay tree for best # performance. But for the caching tree we use the leaf's left and right # pointers. -proc len*(a: PRope): int {.rtl, extern: "nro$1".} = - ## the rope's length - if a == nil: result = 0 - else: result = a.length - -proc newRope(): PRope = new(result) -proc newRope(data: string): PRope = +proc len*(a: Rope): int {.rtl, extern: "nro$1".} = + ## The rope's length. + if a == nil: 0 else: a.length + +proc newRope(): Rope = new(result) +proc newRope(data: string): Rope = new(result) result.length = len(data) result.data = data -var - cache: PRope # the root of the cache tree - N: PRope # dummy rope needed for splay algorithm +var + cache {.threadvar.}: Rope # the root of the cache tree + N {.threadvar.}: Rope # dummy rope needed for splay algorithm when countCacheMisses: var misses, hits: int - -proc splay(s: string, tree: PRope, cmpres: var int): PRope = + +proc splay(s: string, tree: Rope, cmpres: var int): Rope = var c: int var t = tree N.left = nil - N.right = nil # reset to nil + N.right = nil # reset to nil var le = N var r = N - while true: + while true: c = cmp(s, t.data) - if c < 0: - if (t.left != nil) and (s < t.left.data): + if c < 0: + if (t.left != nil) and (s < t.left.data): var y = t.left t.left = y.right y.right = t t = y - if t.left == nil: break + if t.left == nil: break r.left = t r = t t = t.left - elif c > 0: - if (t.right != nil) and (s > t.right.data): + elif c > 0: + if (t.right != nil) and (s > t.right.data): var y = t.right t.right = y.left y.left = t t = y - if t.right == nil: break + if t.right == nil: break le.right = t le = t t = t.right - else: - break + else: + break cmpres = c le.right = t.left r.left = t.right @@ -102,218 +101,232 @@ proc splay(s: string, tree: PRope, cmpres: var int): PRope = t.right = N.left result = t -proc insertInCache(s: string, tree: PRope): PRope = +proc insertInCache(s: string, tree: Rope): Rope = var t = tree - if t == nil: + if t == nil: result = newRope(s) when countCacheMisses: inc(misses) - return + return var cmp: int t = splay(s, t, cmp) - if cmp == 0: + if cmp == 0: # We get here if it's already in the Tree # Don't add it again result = t when countCacheMisses: inc(hits) - else: + else: when countCacheMisses: inc(misses) result = newRope(s) - if cmp < 0: + if cmp < 0: result.left = t.left result.right = t t.left = nil - else: + else: # i > t.item: result.right = t.right result.left = t t.right = nil -proc rope*(s: string): PRope {.rtl, extern: "nro$1Str".} = - ## Converts a string to a rope. - if s.len == 0: +proc rope*(s: string = ""): Rope {.rtl, extern: "nro$1Str".} = + ## Converts a string to a rope. + runnableExamples: + let r = rope("I'm a rope") + doAssert $r == "I'm a rope" + + if s.len == 0: result = nil - elif cacheEnabled: - result = insertInCache(s, cache) - cache = result - else: - result = newRope(s) - -proc rope*(i: BiggestInt): PRope {.rtl, extern: "nro$1BiggestInt".} = - ## Converts an int to a rope. + else: + when nimvm: + # No caching in VM context + result = newRope(s) + else: + if cacheEnabled: + result = insertInCache(s, cache) + cache = result + else: + result = newRope(s) + +proc rope*(i: BiggestInt): Rope {.rtl, extern: "nro$1BiggestInt".} = + ## Converts an int to a rope. + runnableExamples: + let r = rope(429) + doAssert $r == "429" + result = rope($i) -proc rope*(f: BiggestFloat): PRope {.rtl, extern: "nro$1BiggestFloat".} = - ## Converts a float to a rope. +proc rope*(f: BiggestFloat): Rope {.rtl, extern: "nro$1BiggestFloat".} = + ## Converts a float to a rope. + runnableExamples: + let r = rope(4.29) + doAssert $r == "4.29" + result = rope($f) - + proc enableCache*() {.rtl, extern: "nro$1".} = ## Enables the caching of leaves. This reduces the memory footprint at ## the cost of runtime efficiency. cacheEnabled = true proc disableCache*() {.rtl, extern: "nro$1".} = - ## the cache is discarded and disabled. The GC will reuse its used memory. + ## The cache is discarded and disabled. The GC will reuse its used memory. cache = nil cacheEnabled = false -proc `&`*(a, b: PRope): PRope {.rtl, extern: "nroConcRopeRope".} = - ## the concatenation operator for ropes. - if a == nil: +proc `&`*(a, b: Rope): Rope {.rtl, extern: "nroConcRopeRope".} = + ## The concatenation operator for ropes. + runnableExamples: + let r = rope("Hello, ") & rope("Nim!") + doAssert $r == "Hello, Nim!" + + if a == nil: result = b - elif b == nil: + elif b == nil: result = a else: result = newRope() result.length = a.length + b.length - when false: - # XXX rebalancing would be nice, but is too expensive. - result.left = a.left - var x = newRope() - x.left = a.right - x.right = b - result.right = x - else: - result.left = a - result.right = b - -proc `&`*(a: PRope, b: string): PRope {.rtl, extern: "nroConcRopeStr".} = - ## the concatenation operator for ropes. + result.left = a + result.right = b + +proc `&`*(a: Rope, b: string): Rope {.rtl, extern: "nroConcRopeStr".} = + ## The concatenation operator for ropes. + runnableExamples: + let r = rope("Hello, ") & "Nim!" + doAssert $r == "Hello, Nim!" + result = a & rope(b) - -proc `&`*(a: string, b: PRope): PRope {.rtl, extern: "nroConcStrRope".} = - ## the concatenation operator for ropes. + +proc `&`*(a: string, b: Rope): Rope {.rtl, extern: "nroConcStrRope".} = + ## The concatenation operator for ropes. + runnableExamples: + let r = "Hello, " & rope("Nim!") + doAssert $r == "Hello, Nim!" + result = rope(a) & b - -proc `&`*(a: openarray[PRope]): PRope {.rtl, extern: "nroConcOpenArray".} = - ## the concatenation operator for an openarray of ropes. - for i in countup(0, high(a)): result = result & a[i] -proc add*(a: var PRope, b: PRope) {.rtl, extern: "nro$1Rope".} = - ## adds `b` to the rope `a`. +proc `&`*(a: openArray[Rope]): Rope {.rtl, extern: "nroConcOpenArray".} = + ## The concatenation operator for an `openArray` of ropes. + runnableExamples: + let r = &[rope("Hello, "), rope("Nim"), rope("!")] + doAssert $r == "Hello, Nim!" + + for item in a: result = result & item + +proc add*(a: var Rope, b: Rope) {.rtl, extern: "nro$1Rope".} = + ## Adds `b` to the rope `a`. + runnableExamples: + var r = rope("Hello, ") + r.add(rope("Nim!")) + doAssert $r == "Hello, Nim!" + a = a & b -proc add*(a: var PRope, b: string) {.rtl, extern: "nro$1Str".} = - ## adds `b` to the rope `a`. +proc add*(a: var Rope, b: string) {.rtl, extern: "nro$1Str".} = + ## Adds `b` to the rope `a`. + runnableExamples: + var r = rope("Hello, ") + r.add("Nim!") + doAssert $r == "Hello, Nim!" + a = a & b - -proc `[]`*(r: PRope, i: int): char {.rtl, extern: "nroCharAt".} = - ## returns the character at position `i` in the rope `r`. This is quite - ## expensive! Worst-case: O(n). If ``i >= r.len``, ``\0`` is returned. + +proc `[]`*(r: Rope, i: int): char {.rtl, extern: "nroCharAt".} = + ## Returns the character at position `i` in the rope `r`. This is quite + ## expensive! Worst-case: O(n). If `i >= r.len or i < 0`, `\0` is returned. + runnableExamples: + let r = rope("Hello, Nim!") + + doAssert r[0] == 'H' + doAssert r[7] == 'N' + doAssert r[22] == '\0' + var x = r var j = i - if x == nil: return + if x == nil or i < 0 or i >= r.len: return while true: - if not isConc(x): - if x.data.len <% j: return x.data[j] - return '\0' + if x != nil and x.data.len > 0: + # leaf + return x.data[j] else: - if x.left.len >% j: + if x.left.length > j: x = x.left else: + dec(j, x.left.length) x = x.right - dec(j, x.len) -iterator leaves*(r: PRope): string = - ## iterates over any leaf string in the rope `r`. +iterator leaves*(r: Rope): string = + ## Iterates over any leaf string in the rope `r`. + runnableExamples: + let r = rope("Hello") & rope(", Nim!") + let s = ["Hello", ", Nim!"] + var index = 0 + for leave in r.leaves: + doAssert leave == s[index] + inc(index) + if r != nil: var stack = @[r] - while stack.len > 0: + while stack.len > 0: var it = stack.pop - while isConc(it): + while it.left != nil: + assert(it.right != nil) stack.add(it.right) it = it.left assert(it != nil) - assert(it.data != nil) yield it.data - -iterator items*(r: PRope): char = - ## iterates over any character in the rope `r`. + +iterator items*(r: Rope): char = + ## Iterates over any character in the rope `r`. for s in leaves(r): for c in items(s): yield c -proc write*(f: TFile, r: PRope) {.rtl, extern: "nro$1".} = - ## writes a rope to a file. +proc write*(f: File, r: Rope) {.rtl, extern: "nro$1".} = + ## Writes a rope to a file. for s in leaves(r): write(f, s) -proc `$`*(r: PRope): string {.rtl, extern: "nroToString".}= - ## converts a rope back to a string. - result = newString(r.len) - setLen(result, 0) +proc write*(s: Stream, r: Rope) {.rtl, extern: "nroWriteStream".} = + ## Writes a rope to a stream. + for rs in leaves(r): write(s, rs) + +proc `$`*(r: Rope): string {.rtl, extern: "nroToString".} = + ## Converts a rope back to a string. + result = newStringOfCap(r.len) for s in leaves(r): add(result, s) -when false: - # Format string caching seems reasonable: All leaves can be shared and format - # string parsing has to be done only once. A compiled format string is stored - # as a rope. A negative length is used for the index into the args array. - proc compiledArg(idx: int): PRope = - new(result) - result.length = -idx - - proc compileFrmt(frmt: string): PRope = - var i = 0 - var length = len(frmt) - result = nil - var num = 0 - while i < length: - if frmt[i] == '$': - inc(i) - case frmt[i] - of '$': - add(result, "$") - inc(i) - of '#': - inc(i) - add(result, compiledArg(num+1)) - inc(num) - of '0'..'9': - var j = 0 - while true: - j = j * 10 + ord(frmt[i]) - ord('0') - inc(i) - if frmt[i] notin {'0'..'9'}: break - add(s, compiledArg(j)) - of '{': - inc(i) - var j = 0 - while frmt[i] in {'0'..'9'}: - j = j * 10 + ord(frmt[i]) - ord('0') - inc(i) - if frmt[i] == '}': inc(i) - else: raise newException(EInvalidValue, "invalid format string") - add(s, compiledArg(j)) - else: raise newException(EInvalidValue, "invalid format string") - var start = i - while i < length: - if frmt[i] != '$': inc(i) - else: break - if i - 1 >= start: - add(result, substr(frmt, start, i-1)) - -proc `%`*(frmt: string, args: openarray[PRope]): PRope {. - rtl, extern: "nroFormat".} = - ## `%` substitution operator for ropes. Does not support the ``$identifier`` - ## nor ``${identifier}`` notations. +proc `%`*(frmt: string, args: openArray[Rope]): Rope {.rtl, extern: "nroFormat".} = + ## `%` substitution operator for ropes. Does not support the `$identifier` + ## nor `${identifier}` notations. + runnableExamples: + let r1 = "$1 $2 $3" % [rope("Nim"), rope("is"), rope("a great language")] + doAssert $r1 == "Nim is a great language" + + let r2 = "$# $# $#" % [rope("Nim"), rope("is"), rope("a great language")] + doAssert $r2 == "Nim is a great language" + + let r3 = "${1} ${2} ${3}" % [rope("Nim"), rope("is"), rope("a great language")] + doAssert $r3 == "Nim is a great language" + var i = 0 var length = len(frmt) result = nil var num = 0 - while i < length: - if frmt[i] == '$': + while i < length: + if frmt[i] == '$': inc(i) case frmt[i] - of '$': + of '$': add(result, "$") inc(i) - of '#': + of '#': inc(i) add(result, args[num]) inc(num) - of '0'..'9': + of '0'..'9': var j = 0 - while true: + while true: j = j * 10 + ord(frmt[i]) - ord('0') inc(i) - if frmt[i] notin {'0'..'9'}: break + if i >= frmt.len or frmt[i] notin {'0'..'9'}: break add(result, args[j-1]) of '{': inc(i) @@ -322,44 +335,65 @@ proc `%`*(frmt: string, args: openarray[PRope]): PRope {. j = j * 10 + ord(frmt[i]) - ord('0') inc(i) if frmt[i] == '}': inc(i) - else: raise newException(EInvalidValue, "invalid format string") + else: raise newException(ValueError, "invalid format string") + add(result, args[j-1]) - else: raise newException(EInvalidValue, "invalid format string") + else: raise newException(ValueError, "invalid format string") var start = i - while i < length: + while i < length: if frmt[i] != '$': inc(i) - else: break - if i - 1 >= start: + else: break + if i - 1 >= start: add(result, substr(frmt, start, i - 1)) -proc addf*(c: var PRope, frmt: string, args: openarray[PRope]) {. - rtl, extern: "nro$1".} = - ## shortcut for ``add(c, frmt % args)``. +proc addf*(c: var Rope, frmt: string, args: openArray[Rope]) {.rtl, extern: "nro$1".} = + ## Shortcut for `add(c, frmt % args)`. + runnableExamples: + var r = rope("Dash: ") + r.addf "$1 $2 $3", [rope("Nim"), rope("is"), rope("a great language")] + doAssert $r == "Dash: Nim is a great language" + add(c, frmt % args) -proc equalsFile*(r: PRope, f: TFile): bool {.rtl, extern: "nro$1File".} = - ## returns true if the contents of the file `f` equal `r`. - var bufSize = 1024 # reasonable start value - var buf = alloc(BufSize) - for s in leaves(r): - if s.len > bufSize: - bufSize = max(bufSize * 2, s.len) - buf = realloc(buf, bufSize) - var readBytes = readBuffer(f, buf, s.len) - result = readBytes == s.len and equalMem(buf, cstring(s), s.len) - if not result: break - if result: - result = readBuffer(f, buf, 1) == 0 # really at the end of file? - dealloc(buf) - -proc equalsFile*(r: PRope, f: string): bool {.rtl, extern: "nro$1Str".} = - ## returns true if the contents of the file `f` equal `r`. If `f` does not - ## exist, false is returned. - var bin: TFile - result = open(bin, f) - if result: - result = equalsFile(r, bin) - close(bin) +when not defined(js) and not defined(nimscript): + const + bufSize = 1024 # 1 KB is reasonable + + proc equalsFile*(r: Rope, f: File): bool {.rtl, extern: "nro$1File".} = + ## Returns true if the contents of the file `f` equal `r`. + var + buf: array[bufSize, char] + bpos = buf.len + blen = buf.len + + for s in leaves(r): + var spos = 0 + let slen = s.len + while spos < slen: + if bpos == blen: + # Read more data + bpos = 0 + blen = readBuffer(f, addr(buf[0]), buf.len) + if blen == 0: # no more data in file + return false + let n = min(blen - bpos, slen - spos) + # TODO: There's gotta be a better way of comparing here... + if not equalMem(addr(buf[bpos]), + cast[pointer](cast[int](cstring(s)) + spos), n): + return false + spos += n + bpos += n + + result = readBuffer(f, addr(buf[0]), 1) == 0 # check that we've read all + + proc equalsFile*(r: Rope, filename: string): bool {.rtl, extern: "nro$1Str".} = + ## Returns true if the contents of the file `f` equal `r`. If `f` does not + ## exist, false is returned. + var f: File + result = open(f, filename) + if result: + result = equalsFile(r, f) + close(f) new(N) # init dummy node for splay algorithm |