summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorCharles Blake <cblake@csail.mit.edu>2015-08-02 12:03:52 -0400
committerCharles Blake <cblake@csail.mit.edu>2015-08-02 12:03:52 -0400
commit2bcd4e9a38e7d6d532fde43f9ee777a4363d383f (patch)
tree044e75059a5b62b1f9fbf7769d128b3547b30847
parenta9d09f28685aad5b4630bb3efd1276633419638b (diff)
downloadNim-2bcd4e9a38e7d6d532fde43f9ee777a4363d383f.tar.gz
Add top of module comment. Re-order definitions to
type, then converter, then 3 iterators lowest- to highest-level
(also fastest to slowest) including a new intermediate iterator
lines(MemFile, buffer) that is more like readLine(File) in case
that helps anyone port code.
Add doc comments.
Also have toString just use newString+c_memcpy instead of
currently fragile toNimStr which Araq wants a separate PR for.
-rw-r--r--lib/pure/memfiles.nim42
1 files changed, 36 insertions, 6 deletions
diff --git a/lib/pure/memfiles.nim b/lib/pure/memfiles.nim
index 5113197f2..51f3662de 100644
--- a/lib/pure/memfiles.nim
+++ b/lib/pure/memfiles.nim
@@ -11,6 +11,9 @@
 ##
 ## This module provides support for `memory mapped files`:idx:
 ## (Posix's `mmap`:idx:) on the different operating systems.
+##
+## It also provides some fast iterators over lines in text files
+## delimited in Unix or Windows styles (or similarly delimited records).
 
 when defined(windows):
   import winlean
@@ -249,7 +252,23 @@ type MemSlice* = object
   data*: pointer
   size*: int
 
+proc c_memcpy(a, b: pointer, n: int) {.importc: "memcpy", header: "<string.h>".}
+
+proc toString*(ms: MemSlice): string {.inline.} =
+  ## Return a Nim string built from a MemSlice.
+  var buf = newString(ms.size)
+  c_memcpy(addr(buf[0]), ms.data, ms.size)
+  buf[ms.size] = '\0'
+  result = buf
+
 iterator memSlices*(mfile: MemFile, delim='\l', eat='\r'): MemSlice {.inline.} =
+  ## Iterates over [optional eat]delim-delimited slices in a MemFile.
+  ## Default delimiting is [\r]\l which parse Unix or Windows text file lines.
+  ## Pass eat='\0' to be strictly delim-delimited.
+  ## This zero copy, memchr-limited method is probably the fastest way to
+  ## iterate through lines in a file, however the returned (data,size) objects
+  ## are NOT Nim strings or even terminated C strings.  So, be careful how data
+  ## is accessed (e.g., use C mem* functions, not str* functions).
   proc c_memchr(cstr: pointer, c: char, n: csize): pointer {.
        importc: "memchr", header: "<string.h>" .}
   proc `-!`(p, q: pointer): int {.inline.} = return cast[int](p) -% cast[int](q)
@@ -270,9 +289,20 @@ iterator memSlices*(mfile: MemFile, delim='\l', eat='\r'): MemSlice {.inline.} =
     ms.data = cast[pointer](cast[int](ending) +% 1)     # skip delim
     remaining = mfile.size - (ms.data -! mfile.mem)
 
-proc toString*(ms: MemSlice): string {.inline.} =
-  proc toNimStr(str: cstring, len: int): string {. importc: "toNimStr" .}
-  result = toNimStr(cast[cstring](ms.data), ms.size)
-
-iterator lines*(mfile: MemFile): string {.inline.} =
-  for ms in memSlices(mfile): yield toString(ms)
+iterator lines*(mfile: MemFile, buf: var TaintedString, delim='\l', eat='\r'): TaintedString {.inline.} =
+  ## Replace contents of passed buffer with each new line, like readLine(File).
+  ## Default delimiting is [\r]\l which parse Unix or Windows text file lines.
+  ## Pass eat='\0' to be strictly delim-delimited.
+  for ms in memSlices(mfile, delim, eat):
+    buf.setLen(ms.size)
+    c_memcpy(addr(buf[0]), ms.data, ms.size)
+    buf[ms.size] = '\0'
+    yield buf
+
+iterator lines*(mfile: MemFile, delim='\l', eat='\r'): TaintedString {.inline.} =
+  ## Return each line in a file as a Nim string, like lines(File).
+  ## Default delimiting is [\r]\l which parse Unix or Windows text file lines.
+  ## Pass eat='\0' to be strictly delim-delimited.
+  var buf = TaintedString(newStringOfCap(80))
+  for line in lines(mfile, buf, delim, eat):
+    yield buf