diff options
-rw-r--r-- | lib/pure/memfiles.nim | 41 |
1 files changed, 27 insertions, 14 deletions
diff --git a/lib/pure/memfiles.nim b/lib/pure/memfiles.nim index f78cd8766..a506d2c24 100644 --- a/lib/pure/memfiles.nim +++ b/lib/pure/memfiles.nim @@ -12,8 +12,8 @@ ## This module provides support for `memory mapped files`:idx: ## (Posix's `mmap`:idx:) on the different operating systems. ## -## It also provides some fast iterators over lines in text files -## delimited in Unix or Windows styles (or similarly delimited records). +## It also provides some fast iterators over lines in text files (or +## other "line-like", variable length, delimited records). when defined(windows): import winlean @@ -262,18 +262,30 @@ proc `$`*(ms: MemSlice): string {.inline.} = result = buf iterator memSlices*(mfile: MemFile, delim='\l', eat='\r'): MemSlice {.inline.} = - ## Iterates over [optional eat]delim-delimited slices in a MemFile. - ## Default delimiting is [\\r]\\l which parses Unix/Windows text file lines. - ## Pass eat='\\0' to be strictly delim-delimited. - ## This zero copy, memchr-limited method is probably the fastest way to - ## iterate through lines in a file. The returned (data,size) objects are - ## NOT Nim strings or even terminated C strings. So, be careful how data - ## is accessed (e.g., think C mem* functions, not str* functions). Example: + ## Iterates over [optional `eat`] `delim`-delimited slices in MemFile `mfile`. + ## + ## Default parameters parse lines ending in either Unix(\\l) or Windows(\\r\\l) + ## style on on a line-by-line basis. I.e., not every line needs the same ending. + ## Unlike readLine(File) & lines(File), archaic MacOS9 \\r-delimited lines + ## are not supported as a third option for each line. Such archaic MacOS9 + ## files can be handled by passing delim='\\r', eat='\\0', though. + ## + ## Non-default delimiters can be passed to allow iteration over other sorts + ## of "line-like" variable length records. Pass eat='\\0' to be strictly + ## `delim`-delimited. (Eating an optional prefix equal to '\\0' is not + ## supported.) + ## + ## This zero copy, memchr-limited interface is probably the fastest way to + ## iterate over line-like records in a file. However, returned (data,size) + ## objects are not Nim strings, bounds checked Nim arrays, or even terminated + ## C strings. So, care is required to access the data (e.g., think C mem* + ## functions, not str* functions). Example: ## ## .. code-block:: nim ## var count = 0 ## for slice in memSlices(memfiles.open("foo")): - ## inc(count) + ## if slice.size > 0 and cast[cstring](slice.data)[0] != '#': + ## inc(count) ## echo count proc c_memchr(cstr: pointer, c: char, n: csize): pointer {. @@ -298,13 +310,14 @@ iterator memSlices*(mfile: MemFile, delim='\l', eat='\r'): MemSlice {.inline.} = iterator lines*(mfile: MemFile, buf: var TaintedString, delim='\l', eat='\r'): TaintedString {.inline.} = ## Replace contents of passed buffer with each new line, like readLine(File). - ## Default delimiting is [\\r]\\l which parses Unix/Windows text file lines. - ## Pass eat='\\0' to be strictly delim-delimited. Example: + ## `delim`, `eat`, and delimiting logic is exactly as for memSlices, but Nim + ## strings are returned. Example: ## ## .. code-block:: nim ## var buffer: TaintedString = "" ## for line in lines(memfiles.open("foo"), buffer): ## echo line + for ms in memSlices(mfile, delim, eat): buf.setLen(ms.size) c_memcpy(addr(buf[0]), ms.data, ms.size) @@ -313,8 +326,8 @@ iterator lines*(mfile: MemFile, buf: var TaintedString, delim='\l', eat='\r'): T iterator lines*(mfile: MemFile, delim='\l', eat='\r'): TaintedString {.inline.} = ## Return each line in a file as a Nim string, like lines(File). - ## Default delimiting is [\\r]\\l which parses Unix/Windows text file lines. - ## Pass eat='\0' to be strictly delim-delimited. Example: + ## `delim`, `eat`, and delimiting logic is exactly as for memSlices, but Nim + ## strings are returned. Example: ## ## .. code-block:: nim ## for line in lines(memfiles.open("foo")): |