diff options
Diffstat (limited to 'nimlib/pure/parsecsv.nim')
-rwxr-xr-x | nimlib/pure/parsecsv.nim | 178 |
1 files changed, 0 insertions, 178 deletions
diff --git a/nimlib/pure/parsecsv.nim b/nimlib/pure/parsecsv.nim deleted file mode 100755 index 5970f2090..000000000 --- a/nimlib/pure/parsecsv.nim +++ /dev/null @@ -1,178 +0,0 @@ -# -# -# Nimrod's Runtime Library -# (c) Copyright 2009 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -## This module implements a simple high performance `CSV`:idx: -## (`comma separated value`:idx:) parser. -## -## Example: How to use the parser -## ============================== -## -## .. code-block:: nimrod -## import os, parsecsv, streams -## var s = newFileStream(ParamStr(1), fmRead) -## if s == nil: quit("cannot open the file" & ParamStr(1)) -## var x: TCsvParser -## open(x, s, ParamStr(1)) -## while readRow(x): -## Echo "new row: " -## for val in items(x.row): -## Echo "##", val, "##" -## close(x) -## - -import - lexbase, streams - -type - TCsvRow* = seq[string] ## a row in a CSV file - TCsvParser* = object of TBaseLexer ## the parser object. - row*: TCsvRow ## the current row - filename: string - sep, quote, esc: char - skipWhite: bool - currRow: int - - EInvalidCsv* = object of EIO ## exception that is raised if - ## a parsing error occurs - -proc raiseEInvalidCsv(filename: string, line, col: int, - msg: string) {.noreturn.} = - var e: ref EInvalidCsv - new(e) - e.msg = filename & "(" & $line & ", " & $col & ") Error: " & msg - raise e - -proc error(my: TCsvParser, pos: int, msg: string) = - raiseEInvalidCsv(my.filename, my.LineNumber, getColNumber(my, pos), msg) - -proc open*(my: var TCsvParser, input: PStream, filename: string, - separator = ',', quote = '"', escape = '\0', - skipInitialSpace = false) = - ## initializes the parser with an input stream. `Filename` is only used - ## for nice error messages. The parser's behaviour can be controlled by - ## the diverse optional parameters: - ## - `separator`: character used to separate fields - ## - `quote`: Used to quote fields containing special characters like - ## `separator`, `quote` or new-line characters. '\0' disables the parsing - ## of quotes. - ## - `escape`: removes any special meaning from the following character; - ## '\0' disables escaping; if escaping is disabled and `quote` is not '\0', - ## two `quote` characters are parsed one literal `quote` character. - ## - `skipInitialSpace`: If true, whitespace immediately following the - ## `separator` is ignored. - lexbase.open(my, input) - my.filename = filename - my.sep = separator - my.quote = quote - my.esc = escape - my.skipWhite = skipInitialSpace - my.row = @[] - my.currRow = 0 - -proc parseField(my: var TCsvParser, a: var string) = - var pos = my.bufpos - var buf = my.buf - if my.skipWhite: - while buf[pos] in {' ', '\t'}: inc(pos) - setLen(a, 0) # reuse memory - if buf[pos] == my.quote and my.quote != '\0': - inc(pos) - while true: - var c = buf[pos] - if c == '\0': - my.bufpos = pos # can continue after exception? - error(my, pos, my.quote & " expected") - break - elif c == my.quote: - if my.esc == '\0' and buf[pos+1] == my.quote: - add(a, my.quote) - inc(pos, 2) - else: - inc(pos) - break - elif c == my.esc: - add(a, buf[pos+1]) - inc(pos, 2) - else: - case c - of '\c': - pos = handleCR(my, pos) - buf = my.buf - add(a, "\n") - of '\l': - pos = handleLF(my, pos) - buf = my.buf - add(a, "\n") - else: - add(a, c) - inc(pos) - else: - while true: - var c = buf[pos] - if c == my.sep: break - if c in {'\c', '\l', '\0'}: break - add(a, c) - inc(pos) - my.bufpos = pos - -proc processedRows*(my: var TCsvParser): int = - ## returns number of the processed rows - return my.currRow - -proc readRow*(my: var TCsvParser, columns = 0): bool = - ## reads the next row; if `columns` > 0, it expects the row to have - ## exactly this many columns. Returns false if the end of the file - ## has been encountered else true. - var col = 0 # current column - var oldpos = my.bufpos - while my.buf[my.bufpos] != '\0': - var oldlen = my.row.len - if oldlen < col+1: - setLen(my.row, col+1) - my.row[col] = "" - parseField(my, my.row[col]) - inc(col) - if my.buf[my.bufpos] == my.sep: - inc(my.bufpos) - else: - case my.buf[my.bufpos] - of '\c', '\l': - # skip empty lines: - while true: - case my.buf[my.bufpos] - of '\c': my.bufpos = handleCR(my, my.bufpos) - of '\l': my.bufpos = handleLF(my, my.bufpos) - else: break - of '\0': nil - else: error(my, my.bufpos, my.sep & " expected") - break - - setlen(my.row, col) - result = col > 0 - if result and col != columns and columns > 0: - error(my, oldpos+1, $columns & " columns expected, but found " & - $col & " columns") - inc(my.currRow) - -proc close*(my: var TCsvParser) {.inline.} = - ## closes the parser `my` and its associated input stream. - lexbase.close(my) - -when isMainModule: - import os - var s = newFileStream(ParamStr(1), fmRead) - if s == nil: quit("cannot open the file" & ParamStr(1)) - var x: TCsvParser - open(x, s, ParamStr(1)) - while readRow(x): - Echo "new row: " - for val in items(x.row): - Echo "##", val, "##" - close(x) - |