diff options
Diffstat (limited to 'lib/pure/memfiles.nim')
-rw-r--r-- | lib/pure/memfiles.nim | 119 |
1 files changed, 110 insertions, 9 deletions
diff --git a/lib/pure/memfiles.nim b/lib/pure/memfiles.nim index d49dfae9f..b9c574944 100644 --- a/lib/pure/memfiles.nim +++ b/lib/pure/memfiles.nim @@ -11,6 +11,9 @@ ## ## This module provides support for `memory mapped files`:idx: ## (Posix's `mmap`:idx:) on the different operating systems. +## +## It also provides some fast iterators over lines in text files (or +## other "line-like", variable length, delimited records). when defined(windows): import winlean @@ -29,8 +32,9 @@ type size*: int ## size of the memory mapped file when defined(windows): - fHandle: int - mapHandle: int + fHandle: Handle + mapHandle: Handle + wasOpened: bool ## only close if wasOpened else: handle: cint @@ -112,7 +116,8 @@ proc open*(filename: string, mode: FileMode = fmRead, template callCreateFile(winApiProc, filename: expr): expr = winApiProc( filename, - if readonly: GENERIC_READ else: GENERIC_ALL, + # GENERIC_ALL != (GENERIC_READ or GENERIC_WRITE) + if readonly: GENERIC_READ else: GENERIC_READ or GENERIC_WRITE, FILE_SHARE_READ, nil, if newFileSize != -1: CREATE_ALWAYS else: OPEN_EXISTING, @@ -128,7 +133,7 @@ proc open*(filename: string, mode: FileMode = fmRead, fail(osLastError(), "error opening file") if newFileSize != -1: - var + var sizeHigh = int32(newFileSize shr 32) sizeLow = int32(newFileSize and 0xffffffff) @@ -169,12 +174,14 @@ proc open*(filename: string, mode: FileMode = fmRead, if mappedSize != -1: result.size = min(fileSize, mappedSize).int else: result.size = fileSize.int + result.wasOpened = true + else: template fail(errCode: OSErrorCode, msg: expr) = rollback() if result.handle != 0: discard close(result.handle) raiseOSError(errCode) - + var flags = if readonly: O_RDONLY else: O_RDWR if newFileSize != -1: @@ -196,7 +203,7 @@ proc open*(filename: string, mode: FileMode = fmRead, if mappedSize != -1: result.size = mappedSize else: - var stat: TStat + var stat: Stat if fstat(result.handle, stat) != -1: # XXX: Hmm, this could be unsafe # Why is mmap taking int anyway? @@ -218,12 +225,12 @@ proc open*(filename: string, mode: FileMode = fmRead, proc close*(f: var MemFile) = ## closes the memory mapped file `f`. All changes are written back to the ## file system, if `f` was opened with write access. - + var error = false var lastErr: OSErrorCode when defined(windows): - if f.fHandle != INVALID_HANDLE_VALUE: + if f.fHandle != INVALID_HANDLE_VALUE and f.wasOpened: error = unmapViewOfFile(f.mem) == 0 lastErr = osLastError() error = (closeHandle(f.mapHandle) == 0) or error @@ -240,8 +247,102 @@ proc close*(f: var MemFile) = when defined(windows): f.fHandle = 0 f.mapHandle = 0 + f.wasOpened = false else: f.handle = 0 - + if error: raiseOSError(lastErr) +type MemSlice* = object ## represent slice of a MemFile for iteration over delimited lines/records + data*: pointer + size*: int + +proc c_memcpy(a, b: pointer, n: int) {.importc: "memcpy", header: "<string.h>".} + +proc `$`*(ms: MemSlice): string {.inline.} = + ## Return a Nim string built from a MemSlice. + var buf = newString(ms.size) + c_memcpy(addr(buf[0]), ms.data, ms.size) + buf[ms.size] = '\0' + result = buf + +iterator memSlices*(mfile: MemFile, delim='\l', eat='\r'): MemSlice {.inline.} = + ## Iterates over [optional `eat`] `delim`-delimited slices in MemFile `mfile`. + ## + ## Default parameters parse lines ending in either Unix(\\l) or Windows(\\r\\l) + ## style on on a line-by-line basis. I.e., not every line needs the same ending. + ## Unlike readLine(File) & lines(File), archaic MacOS9 \\r-delimited lines + ## are not supported as a third option for each line. Such archaic MacOS9 + ## files can be handled by passing delim='\\r', eat='\\0', though. + ## + ## Delimiters are not part of the returned slice. A final, unterminated line + ## or record is returned just like any other. + ## + ## Non-default delimiters can be passed to allow iteration over other sorts + ## of "line-like" variable length records. Pass eat='\\0' to be strictly + ## `delim`-delimited. (Eating an optional prefix equal to '\\0' is not + ## supported.) + ## + ## This zero copy, memchr-limited interface is probably the fastest way to + ## iterate over line-like records in a file. However, returned (data,size) + ## objects are not Nim strings, bounds checked Nim arrays, or even terminated + ## C strings. So, care is required to access the data (e.g., think C mem* + ## functions, not str* functions). Example: + ## + ## .. code-block:: nim + ## var count = 0 + ## for slice in memSlices(memfiles.open("foo")): + ## if slice.size > 0 and cast[cstring](slice.data)[0] != '#': + ## inc(count) + ## echo count + + proc c_memchr(cstr: pointer, c: char, n: csize): pointer {. + importc: "memchr", header: "<string.h>" .} + proc `-!`(p, q: pointer): int {.inline.} = return cast[int](p) -% cast[int](q) + var ms: MemSlice + var ending: pointer + ms.data = mfile.mem + var remaining = mfile.size + while remaining > 0: + ending = c_memchr(ms.data, delim, remaining) + if ending == nil: # unterminated final slice + ms.size = remaining # Weird case..check eat? + yield ms + break + ms.size = ending -! ms.data # delim is NOT included + if eat != '\0' and ms.size > 0 and cast[cstring](ms.data)[ms.size - 1] == eat: + dec(ms.size) # trim pre-delim char + yield ms + ms.data = cast[pointer](cast[int](ending) +% 1) # skip delim + remaining = mfile.size - (ms.data -! mfile.mem) + +iterator lines*(mfile: MemFile, buf: var TaintedString, delim='\l', eat='\r'): TaintedString {.inline.} = + ## Replace contents of passed buffer with each new line, like + ## `readLine(File) <system.html#readLine,File,TaintedString>`_. + ## `delim`, `eat`, and delimiting logic is exactly as for + ## `memSlices <#memSlices>`_, but Nim strings are returned. Example: + ## + ## .. code-block:: nim + ## var buffer: TaintedString = "" + ## for line in lines(memfiles.open("foo"), buffer): + ## echo line + + for ms in memSlices(mfile, delim, eat): + buf.setLen(ms.size) + c_memcpy(addr(buf[0]), ms.data, ms.size) + buf[ms.size] = '\0' + yield buf + +iterator lines*(mfile: MemFile, delim='\l', eat='\r'): TaintedString {.inline.} = + ## Return each line in a file as a Nim string, like + ## `lines(File) <system.html#lines.i,File>`_. + ## `delim`, `eat`, and delimiting logic is exactly as for + ## `memSlices <#memSlices>`_, but Nim strings are returned. Example: + ## + ## .. code-block:: nim + ## for line in lines(memfiles.open("foo")): + ## echo line + + var buf = TaintedString(newStringOfCap(80)) + for line in lines(mfile, buf, delim, eat): + yield buf |