//
//
// Nimrod's Runtime Library
// (c) Copyright 2009 Andreas Rumpf
//
// See the file "copying.txt", included in this
// distribution, for details about the copyright.
//
unit parsecfg;
// A HIGH-PERFORMANCE configuration file parser;
// the Nimrod version of this file is part of the
// standard library.
interface
{$include 'config.inc'}
uses
nsystem, charsets, llstream, sysutils, nhashes, strutils, lexbase;
type
TCfgEventKind = (
cfgEof, // end of file reached
cfgSectionStart, // a ``[section]`` has been parsed
cfgKeyValuePair, // a ``key=value`` pair has been detected
cfgOption, // a ``--key=value`` command line option
cfgError // an error ocurred during parsing; msg contains the
// error message
);
TCfgEvent = {@ignore} record
kind: TCfgEventKind;
section: string;
key, value: string;
msg: string;
end;
{@emit object(NObject)
case kind: TCfgEventKind of
cfgEof: ();
cfgSectionStart: (section: string);
cfgKeyValuePair, cfgOption: (key, value: string);
cfgError: (msg: string);
end;}
TTokKind = (tkInvalid, tkEof, // order is important here!
tkSymbol, tkEquals, tkColon,
tkBracketLe, tkBracketRi, tkDashDash
);
TToken = record // a token
kind: TTokKind; // the type of the token
literal: string; // the parsed (string) literal
end;
TParserState = (startState, commaState);
TCfgParser = object(TBaseLexer)
tok: TToken;
state: TParserState;
filename: string;
end;
procedure Open(var c: TCfgParser; const filename: string;
inputStream: PLLStream);
procedure Close(var c: TCfgParser);
function next(var c: TCfgParser): TCfgEvent;
function getColumn(const c: TCfgParser): int;
function getLine(const c: TCfgParser): int;
function getFilename(const c: TCfgParser): string;
function errorStr(const c: TCfgParser; const msg: string): string;
implementation
const
SymChars: TCharSet = ['a'..'z', 'A'..'Z', '0'..'9', '_', #128..#255];
// ----------------------------------------------------------------------------
procedure rawGetTok(var c: TCfgParser; var tok: TToken); forward;
procedure open(var c: TCfgParser; const filename: string;
inputStream: PLLStream);
begin
{@ignore}
FillChar(c, sizeof(c), 0);
{@emit}
openBaseLexer(c, inputStream);
c.filename := filename;
c.state := startState;
c.tok.kind := tkInvalid;
c.tok.literal := '';
rawGetTok(c, c.tok);
end;
procedure close(var c: TCfgParser);
begin
closeBaseLexer(c);
end;
function getColumn(const c: TCfgParser): int;
begin
result := getColNumber(c, c.bufPos)
end;
function getLine(const c: TCfgParser): int;
begin
result := c.linenumber
end;
function getFilename(const c: TCfgParser): string;
begin
result := c.filename
end;
// ----------------------------------------------------------------------------
procedure handleHexChar(var c: TCfgParser; var xi: int);
begin
case c.buf[c.bufpos] of
'0'..'9': begin
xi := (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0'));
inc(c.bufpos);
end;
'a'..'f': begin
xi := (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10);
inc(c.bufpos);
end;
'A'..'F': begin
xi := (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10);
inc(c.bufpos);
end;
else begin end // do nothing
end
end;
procedure handleDecChars(var c: TCfgParser; var xi: int);
begin
while c.buf[c.bufpos] in ['0'..'9'] do begin
xi := (xi * 10) + (ord(c.buf[c.bufpos]) - ord('0'));
inc(c.bufpos);
end;
end;
procedure getEscapedChar(var c: TCfgParser; var tok: TToken);
var
xi: int;
begin
inc(c.bufpos); // skip '\'
case c.buf[c.bufpos] of
'n', 'N': begin
tok.literal := tok.literal +{&} nl;
Inc(c.bufpos);
end;
'r', 'R', 'c', 'C': begin addChar(tok.literal, CR); Inc(c.bufpos); end;
'l', 'L': begin addChar(tok.literal, LF); Inc(c.bufpos); end;
'f', 'F': begin addChar(tok.literal, FF); inc(c.bufpos); end;
'e', 'E': begin addChar(tok.literal, ESC); Inc(c.bufpos); end;
'a', 'A': begin addChar(tok.literal, BEL); Inc(c.bufpos); end;
'b', 'B': begin addChar(tok.literal, BACKSPACE); Inc(c.bufpos); end;
'v', 'V': begin addChar(tok.literal, VT); Inc(c.bufpos); end;
't', 'T': begin addChar(tok.literal, Tabulator); Inc(c.bufpos); end;
'''', '"': begin addChar(tok.literal, c.buf[c.bufpos]); Inc(c.bufpos); end;
'\': begin addChar(tok.literal, '\'); Inc(c.bufpos) end;
'x', 'X': begin
inc(c.bufpos);
xi := 0;
handleHexChar(c, xi);
handleHexChar(c, xi);
addChar(tok.literal, Chr(xi));
end;
'0'..'9': begin
xi := 0;
handleDecChars(c, xi);
if (xi <= 255) then
addChar(tok.literal, Chr(xi))
else
tok.kind := tkInvalid
end
else tok.kind := tkInvalid
end
end;
function HandleCRLF(var c: TCfgParser; pos: int): int;
begin
case c.buf[pos] of
CR: result := lexbase.HandleCR(c, pos);
LF: result := lexbase.HandleLF(c, pos);
else result := pos
end
end;
procedure getString(var c: TCfgParser; var tok: TToken; rawMode: Boolean);
var
pos: int;
ch: Char;
buf: PChar;
begin
pos := c.bufPos + 1; // skip "
buf := c.buf; // put `buf` in a register
tok.kind := tkSymbol;
if (buf[pos] = '"') and (buf[pos+1] = '"') then begin
// long string literal:
inc(pos, 2); // skip ""
// skip leading newline:
pos := HandleCRLF(c, pos);
buf := c.buf;
repeat
case buf[pos] of
'"': begin
if (buf[pos+1] = '"') and (buf[pos+2] = '"') then
break;
addChar(tok.literal, '"');
Inc(pos)
end;
CR, LF: begin
pos := HandleCRLF(c, pos);
buf := c.buf;
tok.literal := tok.literal +{&} nl;
end;
lexbase.EndOfFile: begin
tok.kind := tkInvalid;
break
end
else begin
addChar(tok.literal, buf[pos]);
Inc(pos)
end
end
until false;
c.bufpos := pos + 3 // skip the three """
end
else begin // ordinary string literal
repeat
ch := buf[pos];
if ch = '"' then begin
inc(pos); // skip '"'
break
end;
if ch in [CR, LF, lexbase.EndOfFile] then begin
tok.kind := tkInvalid;
break
end;
if (ch = '\') and not rawMode then begin
c.bufPos := pos;
getEscapedChar(c, tok);
pos := c.bufPos;
end
else begin
addChar(tok.literal, ch);
Inc(pos)
end
until false;
c.bufpos := pos;
end
end;
procedure getSymbol(var c: TCfgParser; var tok: TToken);
var
pos: int;
buf: pchar;
begin
pos := c.bufpos;
buf := c.buf;
while true do begin
addChar(tok.literal, buf[pos]);
Inc(pos);
if not (buf[pos] in SymChars) then break;
end;
c.bufpos := pos;
tok.kind := tkSymbol
end;
procedure skip(var c: TCfgParser);
var
buf: PChar;
pos: int;
begin
pos := c.bufpos;
buf := c.buf;
repeat
case buf[pos] of
' ': Inc(pos);
Tabulator: inc(pos);
'#', ';': while not (buf[pos] in [CR, LF, lexbase.EndOfFile]) do inc(pos);
CR, LF: begin
pos := HandleCRLF(c, pos);
buf := c.buf;
end
else break // EndOfFile also leaves the loop
end
until false;
c.bufpos := pos;
end;
procedure rawGetTok(var c: TCfgParser; var tok: TToken);
begin
tok.kind := tkInvalid;
setLength(tok.literal, 0);
skip(c);
case c.buf[c.bufpos] of
'=': begin
tok.kind := tkEquals;
inc(c.bufpos);
tok.literal := '='+'';
end;
'-': begin
inc(c.bufPos);
if c.buf[c.bufPos] = '-' then inc(c.bufPos);
tok.kind := tkDashDash;
tok.literal := '--';
end;
':': begin
tok.kind := tkColon;
inc(c.bufpos);
tok.literal := ':'+'';
end;
'r', 'R': begin
if c.buf[c.bufPos+1] = '"' then begin
Inc(c.bufPos);
getString(c, tok, true);
end
else
getSymbol(c, tok);
end;
'[': begin
tok.kind := tkBracketLe;
inc(c.bufpos);
tok.literal := '['+'';
end;
']': begin
tok.kind := tkBracketRi;
Inc(c.bufpos);
tok.literal := ']'+'';
end;
'"': getString(c, tok, false);
lexbase.EndOfFile: tok.kind := tkEof;
else getSymbol(c, tok);
end
end;
function errorStr(const c: TCfgParser; const msg: string): string;
begin
result := format('$1($2, $3) Error: $4', [
c.filename, toString(getLine(c)), toString(getColumn(c)),
msg
]);
end;
function getKeyValPair(var c: TCfgParser; kind: TCfgEventKind): TCfgEvent;
begin
if c.tok.kind = tkSymbol then begin
result.kind := kind;
result.key := c.tok.literal;
result.value := '';
rawGetTok(c, c.tok);
while c.tok.literal = '.'+'' do begin
addChar(result.key, '.');
rawGetTok(c, c.tok);
if c.tok.kind = tkSymbol then begin
add(result.key, c.tok.literal);
rawGetTok(c, c.tok);
end
else begin
result.kind := cfgError;
result.msg := errorStr(c, 'symbol expected, but found: ' +
c.tok.literal);
break
end
end;
if c.tok.kind in [tkEquals, tkColon] then begin
rawGetTok(c, c.tok);
if c.tok.kind = tkSymbol then begin
result.value := c.tok.literal;
end
else begin
result.kind := cfgError;
result.msg := errorStr(c, 'symbol expected, but found: '
+ c.tok.literal);
end;
rawGetTok(c, c.tok);
end
end
else begin
result.kind := cfgError;
result.msg := errorStr(c, 'symbol expected, but found: ' + c.tok.literal);
rawGetTok(c, c.tok);
end;
end;
function next(var c: TCfgParser): TCfgEvent;
begin
case c.tok.kind of
tkEof: result.kind := cfgEof;
tkDashDash: begin
rawGetTok(c, c.tok);
result := getKeyValPair(c, cfgOption);
end;
tkSymbol: begin
result := getKeyValPair(c, cfgKeyValuePair);
end;
tkBracketLe: begin
rawGetTok(c, c.tok);
if c.tok.kind = tkSymbol then begin
result.kind := cfgSectionStart;
result.section := c.tok.literal;
end
else begin
result.kind := cfgError;
result.msg := errorStr(c, 'symbol expected, but found: ' + c.tok.literal);
end;
rawGetTok(c, c.tok);
if c.tok.kind = tkBracketRi then rawGetTok(c, c.tok)
else begin
result.kind := cfgError;
result.msg := errorStr(c, ''']'' expected, but found: ' + c.tok.literal);
end
end;
tkInvalid, tkBracketRi, tkEquals, tkColon: begin
result.kind := cfgError;
result.msg := errorStr(c, 'invalid token: ' + c.tok.literal);
rawGetTok(c, c.tok);
end
end
end;
end.