diff options
Diffstat (limited to 'lib/pure/parsecsv.nim')
-rw-r--r-- | lib/pure/parsecsv.nim | 205 |
1 files changed, 90 insertions, 115 deletions
diff --git a/lib/pure/parsecsv.nim b/lib/pure/parsecsv.nim index 44703e3f8..c7bf0c9c1 100644 --- a/lib/pure/parsecsv.nim +++ b/lib/pure/parsecsv.nim @@ -13,10 +13,10 @@ ## Basic usage ## =========== ## -## .. code-block:: nim -## import parsecsv -## from os import paramStr -## from streams import newFileStream +## ```nim +## import std/parsecsv +## from std/os import paramStr +## from std/streams import newFileStream ## ## var s = newFileStream(paramStr(1), fmRead) ## if s == nil: @@ -29,12 +29,13 @@ ## for val in items(x.row): ## echo "##", val, "##" ## close(x) +## ``` ## ## For CSV files with a header row, the header can be read and then used as a ## reference for item access with `rowEntry <#rowEntry,CsvParser,string>`_: ## -## .. code-block:: nim -## import parsecsv +## ```nim +## import std/parsecsv ## ## # Prepare a file ## let content = """One,Two,Three,Four @@ -52,6 +53,7 @@ ## for col in items(p.headers): ## echo "##", col, ":", p.rowEntry(col), "##" ## p.close() +## ``` ## ## See also ## ======== @@ -65,8 +67,10 @@ ## * `parsesql module <parsesql.html>`_ for a SQL parser ## * `other parsers <lib.html#pure-libraries-parsers>`_ for other parsers -import - lexbase, streams +import std/[lexbase, streams] + +when defined(nimPreviewSlimSystem): + import std/syncio type CsvRow* = seq[string] ## A row in a CSV file. @@ -97,10 +101,10 @@ proc raiseEInvalidCsv(filename: string, line, col: int, e.msg = filename & "(" & $line & ", " & $col & ") Error: " & msg raise e -proc error(my: CsvParser, pos: int, msg: string) = - raiseEInvalidCsv(my.filename, my.lineNumber, getColNumber(my, pos), msg) +proc error(self: CsvParser, pos: int, msg: string) = + raiseEInvalidCsv(self.filename, self.lineNumber, getColNumber(self, pos), msg) -proc open*(my: var CsvParser, input: Stream, filename: string, +proc open*(self: var CsvParser, input: Stream, filename: string, separator = ',', quote = '"', escape = '\0', skipInitialSpace = false) = ## Initializes the parser with an input stream. `Filename` is only used @@ -108,10 +112,10 @@ proc open*(my: var CsvParser, input: Stream, filename: string, ## the diverse optional parameters: ## - `separator`: character used to separate fields ## - `quote`: Used to quote fields containing special characters like - ## `separator`, `quote` or new-line characters. '\0' disables the parsing + ## `separator`, `quote` or new-line characters. '\\0' disables the parsing ## of quotes. ## - `escape`: removes any special meaning from the following character; - ## '\0' disables escaping; if escaping is disabled and `quote` is not '\0', + ## '\\0' disables escaping; if escaping is disabled and `quote` is not '\\0', ## two `quote` characters are parsed one literal `quote` character. ## - `skipInitialSpace`: If true, whitespace immediately following the ## `separator` is ignored. @@ -120,29 +124,27 @@ proc open*(my: var CsvParser, input: Stream, filename: string, ## * `open proc <#open,CsvParser,string,char,char,char>`_ which creates the ## file stream for you runnableExamples: - import streams + import std/streams var strm = newStringStream("One,Two,Three\n1,2,3\n10,20,30") var parser: CsvParser parser.open(strm, "tmp.csv") parser.close() strm.close() - lexbase.open(my, input) - my.filename = filename - my.sep = separator - my.quote = quote - my.esc = escape - my.skipWhite = skipInitialSpace - my.row = @[] - my.currRow = 0 + lexbase.open(self, input) + self.filename = filename + self.sep = separator + self.quote = quote + self.esc = escape + self.skipWhite = skipInitialSpace -proc open*(my: var CsvParser, filename: string, +proc open*(self: var CsvParser, filename: string, separator = ',', quote = '"', escape = '\0', skipInitialSpace = false) = ## Similar to the `other open proc<#open,CsvParser,Stream,string,char,char,char>`_, ## but creates the file stream for you. runnableExamples: - from os import removeFile + from std/os import removeFile writeFile("tmp.csv", "One,Two,Three\n1,2,3\n10,20,300") var parser: CsvParser parser.open("tmp.csv") @@ -150,60 +152,60 @@ proc open*(my: var CsvParser, filename: string, removeFile("tmp.csv") var s = newFileStream(filename, fmRead) - if s == nil: my.error(0, "cannot open: " & filename) - open(my, s, filename, separator, + if s == nil: self.error(0, "cannot open: " & filename) + open(self, s, filename, separator, quote, escape, skipInitialSpace) -proc parseField(my: var CsvParser, a: var string) = - var pos = my.bufpos - if my.skipWhite: - while my.buf[pos] in {' ', '\t'}: inc(pos) +proc parseField(self: var CsvParser, a: var string) = + var pos = self.bufpos + if self.skipWhite: + while self.buf[pos] in {' ', '\t'}: inc(pos) setLen(a, 0) # reuse memory - if my.buf[pos] == my.quote and my.quote != '\0': + if self.buf[pos] == self.quote and self.quote != '\0': inc(pos) while true: - let c = my.buf[pos] + let c = self.buf[pos] if c == '\0': - my.bufpos = pos # can continue after exception? - error(my, pos, my.quote & " expected") + self.bufpos = pos # can continue after exception? + error(self, pos, self.quote & " expected") break - elif c == my.quote: - if my.esc == '\0' and my.buf[pos+1] == my.quote: - add(a, my.quote) + elif c == self.quote: + if self.esc == '\0' and self.buf[pos + 1] == self.quote: + add(a, self.quote) inc(pos, 2) else: inc(pos) break - elif c == my.esc: - add(a, my.buf[pos+1]) + elif c == self.esc: + add(a, self.buf[pos + 1]) inc(pos, 2) else: case c of '\c': - pos = handleCR(my, pos) + pos = handleCR(self, pos) add(a, "\n") of '\l': - pos = handleLF(my, pos) + pos = handleLF(self, pos) add(a, "\n") else: add(a, c) inc(pos) else: while true: - let c = my.buf[pos] - if c == my.sep: break + let c = self.buf[pos] + if c == self.sep: break if c in {'\c', '\l', '\0'}: break add(a, c) inc(pos) - my.bufpos = pos + self.bufpos = pos -proc processedRows*(my: var CsvParser): int = +proc processedRows*(self: var CsvParser): int {.inline.} = ## Returns number of the processed rows. ## ## But even if `readRow <#readRow,CsvParser,int>`_ arrived at EOF then ## processed rows counter is incremented. runnableExamples: - import streams + import std/streams var strm = newStringStream("One,Two,Three\n1,2,3") var parser: CsvParser @@ -220,16 +222,16 @@ proc processedRows*(my: var CsvParser): int = parser.close() strm.close() - return my.currRow + self.currRow -proc readRow*(my: var CsvParser, columns = 0): bool = +proc readRow*(self: var CsvParser, columns = 0): bool = ## Reads the next row; if `columns` > 0, it expects the row to have ## exactly this many columns. Returns false if the end of the file ## has been encountered else true. ## ## Blank lines are skipped. runnableExamples: - import streams + import std/streams var strm = newStringStream("One,Two,Three\n1,2,3\n\n10,20,30") var parser: CsvParser parser.open(strm, "tmp.csv") @@ -251,52 +253,52 @@ proc readRow*(my: var CsvParser, columns = 0): bool = strm.close() var col = 0 # current column - let oldpos = my.bufpos + let oldpos = self.bufpos # skip initial empty lines #8365 while true: - case my.buf[my.bufpos] - of '\c': my.bufpos = handleCR(my, my.bufpos) - of '\l': my.bufpos = handleLF(my, my.bufpos) + case self.buf[self.bufpos] + of '\c': self.bufpos = handleCR(self, self.bufpos) + of '\l': self.bufpos = handleLF(self, self.bufpos) else: break - while my.buf[my.bufpos] != '\0': - let oldlen = my.row.len - if oldlen < col+1: - setLen(my.row, col+1) - my.row[col] = "" - parseField(my, my.row[col]) + while self.buf[self.bufpos] != '\0': + let oldlen = self.row.len + if oldlen < col + 1: + setLen(self.row, col + 1) + self.row[col] = "" + parseField(self, self.row[col]) inc(col) - if my.buf[my.bufpos] == my.sep: - inc(my.bufpos) + if self.buf[self.bufpos] == self.sep: + inc(self.bufpos) else: - case my.buf[my.bufpos] + case self.buf[self.bufpos] of '\c', '\l': # skip empty lines: while true: - case my.buf[my.bufpos] - of '\c': my.bufpos = handleCR(my, my.bufpos) - of '\l': my.bufpos = handleLF(my, my.bufpos) + case self.buf[self.bufpos] + of '\c': self.bufpos = handleCR(self, self.bufpos) + of '\l': self.bufpos = handleLF(self, self.bufpos) else: break of '\0': discard - else: error(my, my.bufpos, my.sep & " expected") + else: error(self, self.bufpos, self.sep & " expected") break - setLen(my.row, col) + setLen(self.row, col) result = col > 0 if result and col != columns and columns > 0: - error(my, oldpos+1, $columns & " columns expected, but found " & + error(self, oldpos + 1, $columns & " columns expected, but found " & $col & " columns") - inc(my.currRow) + inc(self.currRow) -proc close*(my: var CsvParser) {.inline.} = - ## Closes the parser `my` and its associated input stream. - lexbase.close(my) +proc close*(self: var CsvParser) {.inline.} = + ## Closes the parser `self` and its associated input stream. + lexbase.close(self) -proc readHeaderRow*(my: var CsvParser) = +proc readHeaderRow*(self: var CsvParser) = ## Reads the first row and creates a look-up table for column numbers ## See also: ## * `rowEntry proc <#rowEntry,CsvParser,string>`_ runnableExamples: - import streams + import std/streams var strm = newStringStream("One,Two,Three\n1,2,3") var parser: CsvParser @@ -313,36 +315,41 @@ proc readHeaderRow*(my: var CsvParser) = parser.close() strm.close() - let present = my.readRow() + let present = self.readRow() if present: - my.headers = my.row + self.headers = self.row -proc rowEntry*(my: var CsvParser, entry: string): var string = +proc rowEntry*(self: var CsvParser, entry: string): var string = ## Accesses a specified `entry` from the current row. ## ## Assumes that `readHeaderRow <#readHeaderRow,CsvParser>`_ has already been ## called. + ## + ## If specified `entry` does not exist, raises KeyError. runnableExamples: - import streams + import std/streams var strm = newStringStream("One,Two,Three\n1,2,3\n\n10,20,30") var parser: CsvParser parser.open(strm, "tmp.csv") - ## Need calling `readHeaderRow`. + ## Requires calling `readHeaderRow`. parser.readHeaderRow() doAssert parser.readRow() doAssert parser.rowEntry("One") == "1" doAssert parser.rowEntry("Two") == "2" doAssert parser.rowEntry("Three") == "3" - ## `parser.rowEntry("NotExistEntry")` causes SIGSEGV fault. + doAssertRaises(KeyError): + discard parser.rowEntry("NonexistentEntry") parser.close() strm.close() - let index = my.headers.find(entry) + let index = self.headers.find(entry) if index >= 0: - result = my.row[index] + result = self.row[index] + else: + raise newException(KeyError, "Entry `" & entry & "` doesn't exist") when not defined(testing) and isMainModule: - import os + import std/os var s = newFileStream(paramStr(1), fmRead) if s == nil: quit("cannot open the file" & paramStr(1)) var x: CsvParser @@ -352,35 +359,3 @@ when not defined(testing) and isMainModule: for val in items(x.row): echo "##", val, "##" close(x) - -when isMainModule: - import os - import strutils - block: # Tests for reading the header row - let content = "\nOne,Two,Three,Four\n1,2,3,4\n10,20,30,40,\n100,200,300,400\n" - writeFile("temp.csv", content) - - var p: CsvParser - p.open("temp.csv") - p.readHeaderRow() - while p.readRow(): - let zeros = repeat('0', p.currRow-2) - doAssert p.rowEntry("One") == "1" & zeros - doAssert p.rowEntry("Two") == "2" & zeros - doAssert p.rowEntry("Three") == "3" & zeros - doAssert p.rowEntry("Four") == "4" & zeros - p.close() - - when not defined(testing): - var parser: CsvParser - parser.open("temp.csv") - parser.readHeaderRow() - while parser.readRow(): - echo "new row: " - for col in items(parser.headers): - echo "##", col, ":", parser.rowEntry(col), "##" - parser.close() - removeFile("temp.csv") - - # Tidy up - removeFile("temp.csv") |