# # # Nimrod's Runtime Library # (c) Copyright 2012 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # # A HIGH-PERFORMANCE configuration file parser; # the Nimrod version of this file is part of the # standard library. import llstream, nhashes, strutils, lexbase type TCfgEventKind* = enum cfgEof, # end of file reached cfgSectionStart, # a ``[section]`` has been parsed cfgKeyValuePair, # a ``key=value`` pair has been detected cfgOption, # a ``--key=value`` command line option cfgError # an error ocurred during parsing; msg contains the # error message TCfgEvent* = object of TObject case kind*: TCfgEventKind of cfgEof: nil of cfgSectionStart: section*: string of cfgKeyValuePair, cfgOption: key*, value*: string of cfgError: msg*: string TTokKind* = enum tkInvalid, tkEof, # order is important here! tkSymbol, tkEquals, tkColon, tkBracketLe, tkBracketRi, tkDashDash TToken*{.final.} = object # a token kind*: TTokKind # the type of the token literal*: string # the parsed (string) literal TParserState* = enum startState, commaState TCfgParser* = object of TBaseLexer tok*: TToken state*: TParserState filename*: string proc Open*(c: var TCfgParser, filename: string, inputStream: PLLStream) proc Close*(c: var TCfgParser) proc next*(c: var TCfgParser): TCfgEvent proc getColumn*(c: TCfgParser): int proc getLine*(c: TCfgParser): int proc getFilename*(c: TCfgParser): string proc errorStr*(c: TCfgParser, msg: string): string # implementation const SymChars: TCharSet = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'} # # ---------------------------------------------------------------------------- proc rawGetTok(c: var TCfgParser, tok: var TToken) proc open(c: var TCfgParser, filename: string, inputStream: PLLStream) = openBaseLexer(c, inputStream) c.filename = filename c.state = startState c.tok.kind = tkInvalid c.tok.literal = "" rawGetTok(c, c.tok) proc close(c: var TCfgParser) = closeBaseLexer(c) proc getColumn(c: TCfgParser): int = result = getColNumber(c, c.bufPos) proc getLine(c: TCfgParser): int = result = c.linenumber proc getFilename(c: TCfgParser): string = result = c.filename proc handleHexChar(c: var TCfgParser, xi: var int) = case c.buf[c.bufpos] of '0'..'9': xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')) inc(c.bufpos) of 'a'..'f': xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10) inc(c.bufpos) of 'A'..'F': xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10) inc(c.bufpos) else: nil proc handleDecChars(c: var TCfgParser, xi: var int) = while c.buf[c.bufpos] in {'0'..'9'}: xi = (xi * 10) + (ord(c.buf[c.bufpos]) - ord('0')) inc(c.bufpos) proc getEscapedChar(c: var TCfgParser, tok: var TToken) = var xi: int inc(c.bufpos) # skip '\' case c.buf[c.bufpos] of 'n', 'N': tok.literal = tok.literal & "\n" Inc(c.bufpos) of 'r', 'R', 'c', 'C': add(tok.literal, CR) Inc(c.bufpos) of 'l', 'L': add(tok.literal, LF) Inc(c.bufpos) of 'f', 'F': add(tok.literal, FF) inc(c.bufpos) of 'e', 'E': add(tok.literal, ESC) Inc(c.bufpos) of 'a', 'A': add(tok.literal, BEL) Inc(c.bufpos) of 'b', 'B': add(tok.literal, BACKSPACE) Inc(c.bufpos) of 'v', 'V': add(tok.literal, VT) Inc(c.bufpos) of 't', 'T': add(tok.literal, Tabulator) Inc(c.bufpos) of '\'', '\"': add(tok.literal, c.buf[c.bufpos]) Inc(c.bufpos) of '\\': add(tok.literal, '\\') Inc(c.bufpos) of 'x', 'X': inc(c.bufpos) xi = 0 handleHexChar(c, xi) handleHexChar(c, xi) add(tok.literal, Chr(xi)) of '0'..'9': xi = 0 handleDecChars(c, xi) if (xi <= 255): add(tok.literal, Chr(xi)) else: tok.kind = tkInvalid else: tok.kind = tkInvalid proc HandleCRLF(c: var TCfgParser, pos: int): int = case c.buf[pos] of CR: result = lexbase.HandleCR(c, pos) of LF: result = lexbase.HandleLF(c, pos) else: result = pos proc getString(c: var TCfgParser, tok: var TToken, rawMode: bool) = var pos: int ch: Char buf: cstring pos = c.bufPos + 1 # skip " buf = c.buf # put `buf` in a register tok.kind = tkSymbol if (buf[pos] == '\"') and (buf[pos + 1] == '\"'): # long string literal: inc(pos, 2) # skip "" # skip leading newline: pos = HandleCRLF(c, pos) buf = c.buf while true: case buf[pos] of '\"': if (buf[pos + 1] == '\"') and (buf[pos + 2] == '\"'): break add(tok.literal, '\"') Inc(pos) of CR, LF: pos = HandleCRLF(c, pos) buf = c.buf tok.literal = tok.literal & "\n" of lexbase.EndOfFile: tok.kind = tkInvalid break else: add(tok.literal, buf[pos]) Inc(pos) c.bufpos = pos + 3 # skip the three """ else: # ordinary string literal while true: ch = buf[pos] if ch == '\"': inc(pos) # skip '"' break if ch in {CR, LF, lexbase.EndOfFile}: tok.kind = tkInvalid break if (ch == '\\') and not rawMode: c.bufPos = pos getEscapedChar(c, tok) pos = c.bufPos else: add(tok.literal, ch) Inc(pos) c.bufpos = pos proc getSymbol(c: var TCfgParser, tok: var TToken) = var pos: int buf: cstring pos = c.bufpos buf = c.buf while true: add(tok.literal, buf[pos]) Inc(pos) if not (buf[pos] in SymChars): break c.bufpos = pos tok.kind = tkSymbol proc skip(c: var TCfgParser) = var buf: cstring pos: int pos = c.bufpos buf = c.buf while true: case buf[pos] of ' ': Inc(pos) of Tabulator: inc(pos) of '#', ';': while not (buf[pos] in {CR, LF, lexbase.EndOfFile}): inc(pos) of CR, LF: pos = HandleCRLF(c, pos) buf = c.buf else: break # EndOfFile also leaves the loop c.bufpos = pos proc rawGetTok(c: var TCfgParser, tok: var TToken) = tok.kind = tkInvalid setlen(tok.literal, 0) skip(c) case c.buf[c.bufpos] of '=': tok.kind = tkEquals inc(c.bufpos) tok.literal = "=" of '-': inc(c.bufPos) if c.buf[c.bufPos] == '-': inc(c.bufPos) tok.kind = tkDashDash tok.literal = "--" of ':': tok.kind = tkColon inc(c.bufpos) tok.literal = ":" of 'r', 'R': if c.buf[c.bufPos + 1] == '\"': Inc(c.bufPos) getString(c, tok, true) else: getSymbol(c, tok) of '[': tok.kind = tkBracketLe inc(c.bufpos) tok.literal = "[" of ']': tok.kind = tkBracketRi Inc(c.bufpos) tok.literal = "]" of '\"': getString(c, tok, false) of lexbase.EndOfFile: tok.kind = tkEof else: getSymbol(c, tok) proc errorStr(c: TCfgParser, msg: string): string = result = `%`("$1($2, $3) Error: $4", [c.filename, $(getLine(c)), $(getColumn(c)), msg]) proc getKeyValPair(c: var TCfgParser, kind: TCfgEventKind): TCfgEvent = if c.tok.kind == tkSymbol: result.kind = kind result.key = c.tok.literal result.value = "" rawGetTok(c, c.tok) while c.tok.literal == ".": add(result.key, '.') rawGetTok(c, c.tok) if c.tok.kind == tkSymbol: add(result.key, c.tok.literal) rawGetTok(c, c.tok) else: result.kind = cfgError result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) break if c.tok.kind in {tkEquals, tkColon}: rawGetTok(c, c.tok) if c.tok.kind == tkSymbol: result.value = c.tok.literal else: result.kind = cfgError result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) rawGetTok(c, c.tok) else: result.kind = cfgError result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) rawGetTok(c, c.tok) proc next(c: var TCfgParser): TCfgEvent = case c.tok.kind of tkEof: result.kind = cfgEof of tkDashDash: rawGetTok(c, c.tok) result = getKeyValPair(c, cfgOption) of tkSymbol: result = getKeyValPair(c, cfgKeyValuePair) of tkBracketLe: rawGetTok(c, c.tok) if c.tok.kind == tkSymbol: result.kind = cfgSectionStart result.section = c.tok.literal else: result.kind = cfgError result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal) rawGetTok(c, c.tok) if c.tok.kind == tkBracketRi: rawGetTok(c, c.tok) else: result.kind = cfgError result.msg = errorStr(c, "\']\' expected, but found: " & c.tok.literal) of tkInvalid, tkBracketRi, tkEquals, tkColon: result.kind = cfgError result.msg = errorStr(c, "invalid token: " & c.tok.literal) rawGetTok(c, c.tok)