diff options
Diffstat (limited to 'lib/pure/parsecsv.nim')
-rw-r--r-- | lib/pure/parsecsv.nim | 170 |
1 files changed, 147 insertions, 23 deletions
diff --git a/lib/pure/parsecsv.nim b/lib/pure/parsecsv.nim index 796114d37..e0c4f38a4 100644 --- a/lib/pure/parsecsv.nim +++ b/lib/pure/parsecsv.nim @@ -10,13 +10,18 @@ ## This module implements a simple high performance `CSV`:idx: ## (`comma separated value`:idx:) parser. ## -## Example: How to use the parser -## ============================== +## Basic usage +## =========== ## ## .. code-block:: nim -## import os, parsecsv, streams +## import parsecsv +## from os import paramStr +## from streams import newFileStream +## ## var s = newFileStream(paramStr(1), fmRead) -## if s == nil: quit("cannot open the file" & paramStr(1)) +## if s == nil: +## quit("cannot open the file" & paramStr(1)) +## ## var x: CsvParser ## open(x, s, paramStr(1)) ## while readRow(x): @@ -26,11 +31,11 @@ ## close(x) ## ## For CSV files with a header row, the header can be read and then used as a -## reference for item access with `rowEntry <#rowEntry.CsvParser.string>`_: +## reference for item access with `rowEntry <#rowEntry,CsvParser,string>`_: ## ## .. code-block:: nim ## import parsecsv -## import os +## ## # Prepare a file ## let content = """One,Two,Three,Four ## 1,2,3,4 @@ -47,24 +52,40 @@ ## for col in items(p.headers): ## echo "##", col, ":", p.rowEntry(col), "##" ## p.close() +## +## See also +## ======== +## +## * `streams module <streams.html>`_ for using +## `open proc <#open,CsvParser,Stream,string,Char,Char,Char>`_ +## and other stream processing (like `close proc <streams.html#close,Stream>`_) +## * `parseopt module <parseopt.html>`_ for a command line parser +## * `parsecfg module <parsecfg.html>`_ for a configuration file parser +## * `parsexml module <parsexml.html>`_ for a XML / HTML parser +## * `parsesql module <parsesql.html>`_ for a SQL parser +## * `other parsers <lib.html#pure-libraries-parsers>`_ for other parsers import lexbase, streams type - CsvRow* = seq[string] ## a row in a CSV file - CsvParser* = object of BaseLexer ## the parser object. - row*: CsvRow ## the current row + CsvRow* = seq[string] ## A row in a CSV file. + CsvParser* = object of BaseLexer ## The parser object. + ## + ## It consists of two public fields: + ## * `row` is the current row + ## * `headers` are the columns that are defined in the csv file + ## (read using `readHeaderRow <#readHeaderRow,CsvParser>`_). + ## Used with `rowEntry <#rowEntry,CsvParser,string>`_). + row*: CsvRow filename: string sep, quote, esc: char skipWhite: bool currRow: int - headers*: seq[string] ## The columns that are defined in the csv file - ## (read using `readHeaderRow <#readHeaderRow.CsvParser>`_). - ## Used with `rowEntry <#rowEntry.CsvParser.string>`_). + headers*: seq[string] - CsvError* = object of IOError ## exception that is raised if - ## a parsing error occurs + CsvError* = object of IOError ## An exception that is raised if + ## a parsing error occurs. proc raiseEInvalidCsv(filename: string, line, col: int, msg: string) {.noreturn.} = @@ -82,7 +103,7 @@ proc error(my: CsvParser, pos: int, msg: string) = proc open*(my: var CsvParser, input: Stream, filename: string, separator = ',', quote = '"', escape = '\0', skipInitialSpace = false) = - ## initializes the parser with an input stream. `Filename` is only used + ## Initializes the parser with an input stream. `Filename` is only used ## for nice error messages. The parser's behaviour can be controlled by ## the diverse optional parameters: ## - `separator`: character used to separate fields @@ -94,6 +115,18 @@ proc open*(my: var CsvParser, input: Stream, filename: string, ## two `quote` characters are parsed one literal `quote` character. ## - `skipInitialSpace`: If true, whitespace immediately following the ## `separator` is ignored. + ## + ## See also: + ## * `open proc <#open,CsvParser,string,Char,Char,Char>`_ which creates the + ## file stream for you + runnableExamples: + import streams + var strm = newStringStream("One,Two,Three\n1,2,3\n10,20,30") + var parser: CsvParser + parser.open(strm, "tmp.csv") + parser.close() + strm.close() + lexbase.open(my, input) my.filename = filename my.sep = separator @@ -106,7 +139,16 @@ proc open*(my: var CsvParser, input: Stream, filename: string, proc open*(my: var CsvParser, filename: string, separator = ',', quote = '"', escape = '\0', skipInitialSpace = false) = - ## same as the other `open` but creates the file stream for you. + ## Similar to the `other open proc<#open,CsvParser,Stream,string,Char,Char,Char>`_, + ## but creates the file stream for you. + runnableExamples: + from os import removeFile + writeFile("tmp.csv", "One,Two,Three\n1,2,3\n10,20,300") + var parser: CsvParser + parser.open("tmp.csv") + parser.close() + removeFile("tmp.csv") + var s = newFileStream(filename, fmRead) if s == nil: my.error(0, "cannot open: " & filename) open(my, s, filename, separator, @@ -159,17 +201,66 @@ proc parseField(my: var CsvParser, a: var string) = my.bufpos = pos proc processedRows*(my: var CsvParser): int = - ## returns number of the processed rows + ## Returns number of the processed rows. + ## + ## But even if `readRow <#readRow,CsvParser,int>`_ arrived at EOF then + ## processed rows counter is incremented. + runnableExamples: + import streams + + var strm = newStringStream("One,Two,Three\n1,2,3") + var parser: CsvParser + parser.open(strm, "tmp.csv") + doAssert parser.readRow() + doAssert parser.processedRows() == 1 + doAssert parser.readRow() + doAssert parser.processedRows() == 2 + ## Even if `readRow` arrived at EOF then `processedRows` is incremented. + doAssert parser.readRow() == false + doAssert parser.processedRows() == 3 + doAssert parser.readRow() == false + doAssert parser.processedRows() == 4 + parser.close() + strm.close() + return my.currRow proc readRow*(my: var CsvParser, columns = 0): bool = - ## reads the next row; if `columns` > 0, it expects the row to have + ## Reads the next row; if `columns` > 0, it expects the row to have ## exactly this many columns. Returns false if the end of the file ## has been encountered else true. ## ## Blank lines are skipped. + runnableExamples: + import streams + var strm = newStringStream("One,Two,Three\n1,2,3\n\n10,20,30") + var parser: CsvParser + parser.open(strm, "tmp.csv") + doAssert parser.readRow() + doAssert parser.row == @["One", "Two", "Three"] + doAssert parser.readRow() + doAssert parser.row == @["1", "2", "3"] + ## Blank lines are skipped. + doAssert parser.readRow() + doAssert parser.row == @["10", "20", "30"] + + var emptySeq: seq[string] + doAssert parser.readRow() == false + doAssert parser.row == emptySeq + doAssert parser.readRow() == false + doAssert parser.row == emptySeq + + parser.close() + strm.close() + var col = 0 # current column let oldpos = my.bufpos + # skip initial empty lines #8365 + while true: + case my.buf[my.bufpos] + of '\c': my.bufpos = handleCR(my, my.bufpos) + of '\l': my.bufpos = handleLF(my, my.bufpos) + else: break while my.buf[my.bufpos] != '\0': let oldlen = my.row.len if oldlen < col+1: @@ -200,12 +291,31 @@ proc readRow*(my: var CsvParser, columns = 0): bool = inc(my.currRow) proc close*(my: var CsvParser) {.inline.} = - ## closes the parser `my` and its associated input stream. + ## Closes the parser `my` and its associated input stream. lexbase.close(my) proc readHeaderRow*(my: var CsvParser) = ## Reads the first row and creates a look-up table for column numbers - ## See also `rowEntry <#rowEntry.CsvParser.string>`_. + ## See also: + ## * `rowEntry proc <#rowEntry,CsvParser,string>`_ + runnableExamples: + import streams + + var strm = newStringStream("One,Two,Three\n1,2,3") + var parser: CsvParser + parser.open(strm, "tmp.csv") + + parser.readHeaderRow() + doAssert parser.headers == @["One", "Two", "Three"] + doAssert parser.row == @["One", "Two", "Three"] + + doAssert parser.readRow() + doAssert parser.headers == @["One", "Two", "Three"] + doAssert parser.row == @["1", "2", "3"] + + parser.close() + strm.close() + let present = my.readRow() if present: my.headers = my.row @@ -213,8 +323,23 @@ proc readHeaderRow*(my: var CsvParser) = proc rowEntry*(my: var CsvParser, entry: string): var string = ## Acceses a specified `entry` from the current row. ## - ## Assumes that `readHeaderRow <#readHeaderRow.CsvParser>`_ has already been + ## Assumes that `readHeaderRow <#readHeaderRow,CsvParser>`_ has already been ## called. + runnableExamples: + import streams + var strm = newStringStream("One,Two,Three\n1,2,3\n\n10,20,30") + var parser: CsvParser + parser.open(strm, "tmp.csv") + ## Need calling `readHeaderRow`. + parser.readHeaderRow() + doAssert parser.readRow() + doAssert parser.rowEntry("One") == "1" + doAssert parser.rowEntry("Two") == "2" + doAssert parser.rowEntry("Three") == "3" + ## `parser.rowEntry("NotExistEntry")` causes SIGSEGV fault. + parser.close() + strm.close() + let index = my.headers.find(entry) if index >= 0: result = my.row[index] @@ -235,7 +360,7 @@ when isMainModule: import os import strutils block: # Tests for reading the header row - let content = "One,Two,Three,Four\n1,2,3,4\n10,20,30,40,\n100,200,300,400\n" + let content = "\nOne,Two,Three,Four\n1,2,3,4\n10,20,30,40,\n100,200,300,400\n" writeFile("temp.csv", content) var p: CsvParser @@ -262,4 +387,3 @@ when isMainModule: # Tidy up removeFile("temp.csv") - |