(and-record trace [ label:string-address contents:string-address ]) (address trace-address (trace)) (array trace-address-array (trace-address)) (address trace-address-array-address (trace-address-array)) (address trace-address-array-address-address (trace-address-array-address)) (and-record instruction-trace [ call-stack:string-address-array-address pc:string-address ; should be integer? instruction:string-address children:trace-address-array-address ]) (address instruction-trace-address (instruction-trace)) (array instruction-trace-address-array (instruction-trace-address)) (address instruction-trace-address-array-address (instruction-trace-address-array)) (function parse-traces [ ; stream-address -> instruction-trace-address-array-address (default-space:space-address <- new space:literal 30:literal) ;? ($print (("parse-traces\n" literal))) ;? 2 (in:stream-address <- next-input) ; check input size ($print (("counting lines\n" literal))) (n:integer <- copy 0:literal) { begin (done?:boolean <- end-of-stream? in:stream-address) (break-if done?:boolean) ;? ($start-tracing) ;? 1 (c:character <- read-character in:stream-address) { begin (newline?:boolean <- equal c:character ((#\newline literal))) (break-unless newline?:boolean) (n:integer <- add n:integer 1:literal) { begin ;? (print?:boolean <- divides? n:integer 100:literal) ;? (break-unless print?:boolean) ($print ((" " literal))) ($print n:integer) ($print (("\n" literal))) } } ;? ($quit) ;? 1 (loop) } ($print n:integer) ($print ((" lines\n" literal))) (in:stream-address <- rewind-stream in:stream-address) ; prepare result (result:buffer-address <- init-buffer 30:literal) (curr-tail:instruction-trace-address <- copy nil:literal) (ch:buffer-address <- init-buffer 5:literal) ; accumulator for traces between instructions (run:string-address/const <- new "run") ($print (("parsing\n" literal))) (n:integer <- copy 0:literal) ; reading each line from 'in' { begin next-line (done?:boolean <- end-of-stream? in:stream-address) ;? ($print done?:boolean) ;? 1 ;? ($print (("\n" literal))) ;? 1 (break-if done?:boolean) ; parse next line as a generic trace (line:string-address <- read-line in:stream-address) { begin (n:integer <- add n:integer 1:literal) (print?:boolean <- divides? n:integer 100:literal) (break-unless print?:boolean) ($print ((" " literal))) ($print n:integer) ($print (("\n" literal))) } ;? (print-string nil:literal/terminal line:string-address) ;? 1 (f:trace-address <- parse-trace line:string-address) (l:string-address <- get f:trace-address/deref label:offset) { begin ; if it's an instruction trace with label 'run' (inst?:boolean <- string-equal l:string-address run:string-address/const) (break-unless inst?:boolean) ; add accumulated traces to curr-tail { begin (break-unless curr-tail:instruction-trace-address) (c:trace-address-array-address-address <- get-address curr-tail:instruction-trace-address/deref children:offset) (c:trace-address-array-address-address/deref <- to-array ch:buffer-address) ; clear 'ch' (ch:buffer-address <- init-buffer 5:literal) } ; append a new curr-tail to result (curr-tail:instruction-trace-address <- parse-instruction-trace f:trace-address) (result:buffer-address <- append result:buffer-address curr-tail:instruction-trace-address) (jump next-line:offset) ; loop } ; otherwise accumulate trace (loop-unless curr-tail:instruction-trace-address) (ch:buffer-address <- append ch:buffer-address f:trace-address) (loop) } ; add accumulated traces to final curr-tail ; todo: test { begin (break-unless curr-tail:instruction-trace-address) (c:trace-address-array-address-address <- get-address curr-tail:instruction-trace-address/deref children:offset) (c:trace-address-array-address-address/deref <- to-array ch:buffer-address) } (s:instruction-trace-address-array-address <- to-array result:buffer-address) (reply s:instruction-trace-address-array-address) ]) (function parse-instruction-trace [ ; trace-address -> instruction-trace-address (default-space:space-address <- new space:literal 30:literal) ;? ($print (("parse-instruction-trace\n" literal))) ;? 1 (in:trace-address <- next-input) (buf:string-address <- get in:trace-address/deref contents:offset) ;? (print-string nil:literal buf:string-address) ;? 1 ;? ($print (("\n" literal))) ;? 1 (result:instruction-trace-address <- new instruction-trace:literal) (f1:string-address rest:string-address <- split-first buf:string-address ((#\space literal))) ;? ($print (("call-stack: " literal))) ;? 1 ;? (print-string nil:literal f1:string-address) ;? 1 ;? ($print (("\n" literal))) ;? 1 (cs:string-address-array-address-address <- get-address result:instruction-trace-address/deref call-stack:offset) (cs:string-address-array-address-address/deref <- split f1:string-address ((#\/ literal))) (p:string-address-address <- get-address result:instruction-trace-address/deref pc:offset) (delim:string-address <- new ": ") (p:string-address-address/deref rest:string-address <- split-first-at-substring rest:string-address delim:string-address) (inst:string-address-address <- get-address result:instruction-trace-address/deref instruction:offset) (inst:string-address-address/deref <- copy rest:string-address) (reply result:instruction-trace-address) ]) (function parse-trace [ ; string-address -> trace-address (default-space:space-address <- new space:literal 30:literal) ;? ($print (("parse-trace\n" literal))) ;? 1 (in:string-address <- next-input) (result:trace-address <- new trace:literal) (delim:string-address <- new ": ") (first:string-address rest:string-address <- split-first-at-substring in:string-address delim:string-address) (l:string-address-address <- get-address result:trace-address/deref label:offset) (l:string-address-address/deref <- copy first:string-address) (c:string-address-address <- get-address result:trace-address/deref contents:offset) (c:string-address-address/deref <- copy rest:string-address) (reply result:trace-address) ]) (function print-trace [ (default-space:space-address <- new space:literal 30:literal) (screen:terminal-address <- next-input) (x:trace-address <- next-input) (l:string-address <- get x:trace-address/deref label:offset) (clear-line screen:terminal-address) (print-string screen:terminal-address l:string-address) (print-character screen:terminal-address ((#\space literal))) (print-character screen:terminal-address ((#\: literal))) (print-character screen:terminal-address ((#\space literal))) (c:string-address <- get x:trace-address/deref contents:offset) (print-string screen:terminal-address c:string-address) ]) (function print-instruction-trace-parent [ (default-space:space-address <- new space:literal 30:literal) (screen:terminal-address <- next-input) (x:instruction-trace-address <- next-input) (0:space-address/names:browser-state <- next-input) (clear-line screen:terminal-address) (print-character screen:terminal-address ((#\- literal))) (print-character screen:terminal-address ((#\space literal))) ; print call stack (c:string-address-array-address <- get x:instruction-trace-address/deref call-stack:offset) (i:integer <- copy 0:literal) (len:integer <- length c:string-address-array-address/deref) { begin (done?:boolean <- greater-or-equal i:integer len:integer) (break-if done?:boolean) (s:string-address <- index c:string-address-array-address/deref i:integer) (print-string screen:terminal-address s:string-address) (print-character screen:terminal-address ((#\/ literal))) (i:integer <- add i:integer 1:literal) (loop) } ; print pc (print-character screen:terminal-address ((#\space literal))) (p:string-address <- get x:instruction-trace-address/deref pc:offset) (print-string screen:terminal-address p:string-address) ; print instruction (print-character screen:terminal-address ((#\space literal))) (print-character screen:terminal-address ((#\: literal))) (print-character screen:terminal-address ((#\space literal))) (i:string-address <- get x:instruction-trace-address/deref instruction:offset) (print-string screen:terminal-address i:string-address) (add-line 0:space-address/browser-state screen:terminal-address) ]) (function print-instruction-trace [ (default-space:space-address <- new space:literal 30:literal) (screen:terminal-address <- next-input) (x:instruction-trace-address <- next-input) (0:space-address/names:browser-state <- next-input) (print-instruction-trace-parent screen:terminal-address x:instruction-trace-address 0:s
#
#
# Nim's Runtime Library
# (c) Copyright 2009 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module implements a simple high performance `CSV`:idx:
## (`comma separated value`:idx:) parser.
##
## Example: How to use the parser
## ==============================
##
## .. code-block:: nim
## import os, parsecsv, streams
## var s = newFileStream(paramStr(1), fmRead)
## if s == nil: quit("cannot open the file" & paramStr(1))
## var x: CsvParser
## open(x, s, paramStr(1))
## while readRow(x):
## echo "new row: "
## for val in items(x.row):
## echo "##", val, "##"
## close(x)
##
import
lexbase, streams
type
CsvRow* = seq[string] ## a row in a CSV file
CsvParser* = object of BaseLexer ## the parser object.
row*: CsvRow ## the current row
filename: string
sep, quote, esc: char
skipWhite: bool
currRow: int
CsvError* = object of IOError ## exception that is raised if
## a parsing error occurs
{.deprecated: [TCsvRow: CsvRow, TCsvParser: CsvParser, EInvalidCsv: CsvError].}
proc raiseEInvalidCsv(filename: string, line, col: int,
msg: string) {.noreturn.} =
var e: ref CsvError
new(e)
e.msg = filename & "(" & $line & ", " & $col & ") Error: " & msg
raise e
proc error(my: CsvParser, pos: int, msg: string) =
raiseEInvalidCsv(my.filename, my.lineNumber, getColNumber(my, pos), msg)
proc open*(my: var CsvParser, input: Stream, filename: string,
separator = ',', quote = '"', escape = '\0',
skipInitialSpace = false) =
## initializes the parser with an input stream. `Filename` is only used
## for nice error messages. The parser's behaviour can be controlled by
## the diverse optional parameters:
## - `separator`: character used to separate fields
## - `quote`: Used to quote fields containing special characters like
## `separator`, `quote` or new-line characters. '\0' disables the parsing
## of quotes.
## - `escape`: removes any special meaning from the following character;
## '\0' disables escaping; if escaping is disabled and `quote` is not '\0',
## two `quote` characters are parsed one literal `quote` character.
## - `skipInitialSpace`: If true, whitespace immediately following the
## `separator` is ignored.
lexbase.open(my, input)
my.filename = filename
my.sep = separator
my.quote = quote
my.esc = escape
my.skipWhite = skipInitialSpace
my.row = @[]
my.currRow = 0
proc parseField(my: var CsvParser, a: var string) =
var pos = my.bufpos
var buf = my.buf
if my.skipWhite:
while buf[pos] in {' ', '\t'}: inc(pos)
setLen(a, 0) # reuse memory
if buf[pos] == my.quote and my.quote != '\0':
inc(pos)
while true:
var c = buf[pos]
if c == '\0':
my.bufpos = pos # can continue after exception?
error(my, pos, my.quote & " expected")
break
elif c == my.quote:
if my.esc == '\0' and buf[pos+1] == my.quote:
add(a, my.quote)
inc(pos, 2)
else:
inc(pos)
break
elif c == my.esc:
add(a, buf[pos+1])
inc(pos, 2)
else:
case c
of '\c':
pos = handleCR(my, pos)
buf = my.buf
add(a, "\n")
of '\l':
pos = handleLF(my, pos)
buf = my.buf
add(a, "\n")
else:
add(a, c)
inc(pos)
else:
while true:
var c = buf[pos]
if c == my.sep: break
if c in {'\c', '\l', '\0'}: break
add(a, c)
inc(pos)
my.bufpos = pos
proc processedRows*(my: var CsvParser): int =
## returns number of the processed rows
return my.currRow
proc readRow*(my: var CsvParser, columns = 0): bool =
## reads the next row; if `columns` > 0, it expects the row to have
## exactly this many columns. Returns false if the end of the file
## has been encountered else true.
var col = 0 # current column
var oldpos = my.bufpos
while my.buf[my.bufpos] != '\0':
var oldlen = my.row.len
if oldlen < col+1:
setLen(my.row, col+1)
my.row[col] = ""
parseField(my, my.row[col])
inc(col)
if my.buf[my.bufpos] == my.sep:
inc(my.bufpos)
else:
case my.buf[my.bufpos]
of '\c', '\l':
# skip empty lines:
while true:
case my.buf[my.bufpos]
of '\c': my.bufpos = handleCR(my, my.bufpos)
of '\l': my.bufpos = handleLF(my, my.bufpos)
else: break
of '\0': discard
else: error(my, my.bufpos, my.sep & " expected")
break
setLen(my.row, col)
result = col > 0
if result and col != columns and columns > 0:
error(my, oldpos+1, $columns & " columns expected, but found " &
$col & " columns")
inc(my.currRow)
proc close*(my: var CsvParser) {.inline.} =
## closes the parser `my` and its associated input stream.
lexbase.close(my)
when not defined(testing) and isMainModule:
import os
var s = newFileStream(paramStr(1), fmRead)
if s == nil: quit("cannot open the file" & paramStr(1))
var x: CsvParser
open(x, s, paramStr(1))