summary refs log tree commit diff stats
path: root/lib/pure/memfiles.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/memfiles.nim')
-rw-r--r--lib/pure/memfiles.nim41
1 files changed, 27 insertions, 14 deletions
diff --git a/lib/pure/memfiles.nim b/lib/pure/memfiles.nim
index f78cd8766..a506d2c24 100644
--- a/lib/pure/memfiles.nim
+++ b/lib/pure/memfiles.nim
@@ -12,8 +12,8 @@
 ## This module provides support for `memory mapped files`:idx:
 ## (Posix's `mmap`:idx:) on the different operating systems.
 ##
-## It also provides some fast iterators over lines in text files
-## delimited in Unix or Windows styles (or similarly delimited records).
+## It also provides some fast iterators over lines in text files (or
+## other "line-like", variable length, delimited records).
 
 when defined(windows):
   import winlean
@@ -262,18 +262,30 @@ proc `$`*(ms: MemSlice): string {.inline.} =
   result = buf
 
 iterator memSlices*(mfile: MemFile, delim='\l', eat='\r'): MemSlice {.inline.} =
-  ## Iterates over [optional eat]delim-delimited slices in a MemFile.
-  ## Default delimiting is [\\r]\\l which parses Unix/Windows text file lines.
-  ## Pass eat='\\0' to be strictly delim-delimited.
-  ## This zero copy, memchr-limited method is probably the fastest way to
-  ## iterate through lines in a file.  The returned (data,size) objects are
-  ## NOT Nim strings or even terminated C strings.  So, be careful how data
-  ## is accessed (e.g., think C mem* functions, not str* functions).  Example:
+  ## Iterates over [optional `eat`] `delim`-delimited slices in MemFile `mfile`.
+  ##
+  ## Default parameters parse lines ending in either Unix(\\l) or Windows(\\r\\l)
+  ## style on on a line-by-line basis.  I.e., not every line needs the same ending.
+  ## Unlike readLine(File) & lines(File), archaic MacOS9 \\r-delimited lines
+  ## are not supported as a third option for each line.  Such archaic MacOS9
+  ## files can be handled by passing delim='\\r', eat='\\0', though.
+  ##
+  ## Non-default delimiters can be passed to allow iteration over other sorts
+  ## of "line-like" variable length records.  Pass eat='\\0' to be strictly
+  ## `delim`-delimited. (Eating an optional prefix equal to '\\0' is not
+  ## supported.)
+  ##
+  ## This zero copy, memchr-limited interface is probably the fastest way to
+  ## iterate over line-like records in a file.  However, returned (data,size)
+  ## objects are not Nim strings, bounds checked Nim arrays, or even terminated
+  ## C strings.  So, care is required to access the data (e.g., think C mem*
+  ## functions, not str* functions).  Example:
   ##
   ## .. code-block:: nim
   ##   var count = 0
   ##   for slice in memSlices(memfiles.open("foo")):
-  ##     inc(count)
+  ##     if slice.size > 0 and cast[cstring](slice.data)[0] != '#':
+  ##       inc(count)
   ##   echo count
 
   proc c_memchr(cstr: pointer, c: char, n: csize): pointer {.
@@ -298,13 +310,14 @@ iterator memSlices*(mfile: MemFile, delim='\l', eat='\r'): MemSlice {.inline.} =
 
 iterator lines*(mfile: MemFile, buf: var TaintedString, delim='\l', eat='\r'): TaintedString {.inline.} =
   ## Replace contents of passed buffer with each new line, like readLine(File).
-  ## Default delimiting is [\\r]\\l which parses Unix/Windows text file lines.
-  ## Pass eat='\\0' to be strictly delim-delimited.  Example:
+  ## `delim`, `eat`, and delimiting logic is exactly as for memSlices, but Nim
+  ## strings are returned.  Example:
   ##
   ## .. code-block:: nim
   ##   var buffer: TaintedString = ""
   ##   for line in lines(memfiles.open("foo"), buffer):
   ##     echo line
+
   for ms in memSlices(mfile, delim, eat):
     buf.setLen(ms.size)
     c_memcpy(addr(buf[0]), ms.data, ms.size)
@@ -313,8 +326,8 @@ iterator lines*(mfile: MemFile, buf: var TaintedString, delim='\l', eat='\r'): T
 
 iterator lines*(mfile: MemFile, delim='\l', eat='\r'): TaintedString {.inline.} =
   ## Return each line in a file as a Nim string, like lines(File).
-  ## Default delimiting is [\\r]\\l which parses Unix/Windows text file lines.
-  ## Pass eat='\0' to be strictly delim-delimited.  Example:
+  ## `delim`, `eat`, and delimiting logic is exactly as for memSlices, but Nim
+  ## strings are returned.  Example:
   ##
   ## .. code-block:: nim
   ##   for line in lines(memfiles.open("foo")):