diff options
-rw-r--r-- | lib/pure/memfiles.nim | 42 |
1 files changed, 36 insertions, 6 deletions
diff --git a/lib/pure/memfiles.nim b/lib/pure/memfiles.nim index 5113197f2..51f3662de 100644 --- a/lib/pure/memfiles.nim +++ b/lib/pure/memfiles.nim @@ -11,6 +11,9 @@ ## ## This module provides support for `memory mapped files`:idx: ## (Posix's `mmap`:idx:) on the different operating systems. +## +## It also provides some fast iterators over lines in text files +## delimited in Unix or Windows styles (or similarly delimited records). when defined(windows): import winlean @@ -249,7 +252,23 @@ type MemSlice* = object data*: pointer size*: int +proc c_memcpy(a, b: pointer, n: int) {.importc: "memcpy", header: "<string.h>".} + +proc toString*(ms: MemSlice): string {.inline.} = + ## Return a Nim string built from a MemSlice. + var buf = newString(ms.size) + c_memcpy(addr(buf[0]), ms.data, ms.size) + buf[ms.size] = '\0' + result = buf + iterator memSlices*(mfile: MemFile, delim='\l', eat='\r'): MemSlice {.inline.} = + ## Iterates over [optional eat]delim-delimited slices in a MemFile. + ## Default delimiting is [\r]\l which parse Unix or Windows text file lines. + ## Pass eat='\0' to be strictly delim-delimited. + ## This zero copy, memchr-limited method is probably the fastest way to + ## iterate through lines in a file, however the returned (data,size) objects + ## are NOT Nim strings or even terminated C strings. So, be careful how data + ## is accessed (e.g., use C mem* functions, not str* functions). proc c_memchr(cstr: pointer, c: char, n: csize): pointer {. importc: "memchr", header: "<string.h>" .} proc `-!`(p, q: pointer): int {.inline.} = return cast[int](p) -% cast[int](q) @@ -270,9 +289,20 @@ iterator memSlices*(mfile: MemFile, delim='\l', eat='\r'): MemSlice {.inline.} = ms.data = cast[pointer](cast[int](ending) +% 1) # skip delim remaining = mfile.size - (ms.data -! mfile.mem) -proc toString*(ms: MemSlice): string {.inline.} = - proc toNimStr(str: cstring, len: int): string {. importc: "toNimStr" .} - result = toNimStr(cast[cstring](ms.data), ms.size) - -iterator lines*(mfile: MemFile): string {.inline.} = - for ms in memSlices(mfile): yield toString(ms) +iterator lines*(mfile: MemFile, buf: var TaintedString, delim='\l', eat='\r'): TaintedString {.inline.} = + ## Replace contents of passed buffer with each new line, like readLine(File). + ## Default delimiting is [\r]\l which parse Unix or Windows text file lines. + ## Pass eat='\0' to be strictly delim-delimited. + for ms in memSlices(mfile, delim, eat): + buf.setLen(ms.size) + c_memcpy(addr(buf[0]), ms.data, ms.size) + buf[ms.size] = '\0' + yield buf + +iterator lines*(mfile: MemFile, delim='\l', eat='\r'): TaintedString {.inline.} = + ## Return each line in a file as a Nim string, like lines(File). + ## Default delimiting is [\r]\l which parse Unix or Windows text file lines. + ## Pass eat='\0' to be strictly delim-delimited. + var buf = TaintedString(newStringOfCap(80)) + for line in lines(mfile, buf, delim, eat): + yield buf |