From 07d5a8085bbcc21a1d9d06a2976ecc00e9c8d55b Mon Sep 17 00:00:00 2001 From: Andreas Rumpf Date: Sat, 23 Aug 2008 11:16:44 +0200 Subject: too many changes to list --- nim/parsecfg.pas | 414 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 414 insertions(+) create mode 100644 nim/parsecfg.pas (limited to 'nim/parsecfg.pas') diff --git a/nim/parsecfg.pas b/nim/parsecfg.pas new file mode 100644 index 000000000..1f049536d --- /dev/null +++ b/nim/parsecfg.pas @@ -0,0 +1,414 @@ +// +// +// Nimrod's Runtime Library +// (c) Copyright 2008 Andreas Rumpf +// +// See the file "copying.txt", included in this +// distribution, for details about the copyright. +// +unit parsecfg; + +// A HIGH-PERFORMANCE configuration file parser; +// the Nimrod version of this file will become part +// of the standard library. + +interface + +{$include 'config.inc'} + +uses + charsets, nsystem, sysutils, hashes, strutils, lexbase; + +type + TCfgEventKind = ( + cfgEof, // end of file reached + cfgSectionStart, // a ``[section]`` has been parsed + cfgKeyValuePair, // a ``key=value`` pair has been detected + cfgOption, // a ``--key=value`` command line option + cfgError // an error ocurred during parsing; msg contains the + // error message + ); + TCfgEvent = {@ignore} record + kind: TCfgEventKind; + section: string; + key, value: string; + msg: string; + end; + {@emit object(NObject) + case kind: TCfgEventKind of + cfgSection: (section: string); + cfgKeyValuePair, cfgOption: (key, value: string); + cfgError: (msg: string); + end;} + TTokKind = (tkInvalid, tkEof, // order is important here! + tkSymbol, tkEquals, tkColon, + tkBracketLe, tkBracketRi, tkDashDash + ); + TToken = record // a token + kind: TTokKind; // the type of the token + literal: string; // the parsed (string) literal + end; + TParserState = (startState, commaState); + TCfgParser = object(TBaseLexer) + tok: TToken; + state: TParserState; + filename: string; + end; + +function Open(var c: TCfgParser; const filename: string): bool; +procedure OpenFromBuffer(var c: TCfgParser; const buf: string); +procedure Close(var c: TCfgParser); + +function next(var c: TCfgParser): TCfgEvent; + +function getColumn(const c: TCfgParser): int; +function getLine(const c: TCfgParser): int; +function getFilename(const c: TCfgParser): string; + +implementation + +const + SymChars: TCharSet = ['a'..'z', 'A'..'Z', '0'..'9', '_', #128..#255]; + +// ---------------------------------------------------------------------------- +procedure rawGetTok(var c: TCfgParser; var tok: TToken); forward; + +function open(var c: TCfgParser; const filename: string): bool; +begin +{@ignore} + FillChar(c, sizeof(c), 0); // work around Delphi/fpc bug +{@emit} + result := initBaseLexer(c, filename); + c.filename := filename; + c.state := startState; + c.tok.kind := tkInvalid; + c.tok.literal := ''; + if result then rawGetTok(c, c.tok); +end; + +procedure openFromBuffer(var c: TCfgParser; const buf: string); +begin +{@ignore} + FillChar(c, sizeof(c), 0); // work around Delphi/fpc bug +{@emit} + initBaseLexerFromBuffer(c, buf); + c.filename := 'buffer'; + c.state := startState; + c.tok.kind := tkInvalid; + c.tok.literal := ''; + rawGetTok(c, c.tok); +end; + +procedure close(var c: TCfgParser); +begin + deinitBaseLexer(c); +end; + +function getColumn(const c: TCfgParser): int; +begin + result := getColNumber(c, c.bufPos) +end; + +function getLine(const c: TCfgParser): int; +begin + result := c.linenumber +end; + +function getFilename(const c: TCfgParser): string; +begin + result := c.filename +end; + +// ---------------------------------------------------------------------------- + +procedure handleHexChar(var c: TCfgParser; var xi: int); +begin + case c.buf[c.bufpos] of + '0'..'9': begin + xi := (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')); + inc(c.bufpos); + end; + 'a'..'f': begin + xi := (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10); + inc(c.bufpos); + end; + 'A'..'F': begin + xi := (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10); + inc(c.bufpos); + end; + else begin end // do nothing + end +end; + +procedure handleDecChars(var c: TCfgParser; var xi: int); +begin + while c.buf[c.bufpos] in ['0'..'9'] do begin + xi := (xi * 10) + (ord(c.buf[c.bufpos]) - ord('0')); + inc(c.bufpos); + end; +end; + +procedure getEscapedChar(var c: TCfgParser; var tok: TToken); +var + xi: int; +begin + inc(c.bufpos); // skip '\' + case c.buf[c.bufpos] of + 'n', 'N': begin + tok.literal := tok.literal +{&} nl; + Inc(c.bufpos); + end; + 'r', 'R', 'c', 'C': begin addChar(tok.literal, CR); Inc(c.bufpos); end; + 'l', 'L': begin addChar(tok.literal, LF); Inc(c.bufpos); end; + 'f', 'F': begin addChar(tok.literal, FF); inc(c.bufpos); end; + 'e', 'E': begin addChar(tok.literal, ESC); Inc(c.bufpos); end; + 'a', 'A': begin addChar(tok.literal, BEL); Inc(c.bufpos); end; + 'b', 'B': begin addChar(tok.literal, BACKSPACE); Inc(c.bufpos); end; + 'v', 'V': begin addChar(tok.literal, VT); Inc(c.bufpos); end; + 't', 'T': begin addChar(tok.literal, Tabulator); Inc(c.bufpos); end; + '''', '"': begin addChar(tok.literal, c.buf[c.bufpos]); Inc(c.bufpos); end; + '\': begin addChar(tok.literal, '\'); Inc(c.bufpos) end; + 'x', 'X': begin + inc(c.bufpos); + xi := 0; + handleHexChar(c, xi); + handleHexChar(c, xi); + addChar(tok.literal, Chr(xi)); + end; + '0'..'9': begin + xi := 0; + handleDecChars(c, xi); + if (xi <= 255) then + addChar(tok.literal, Chr(xi)) + else + tok.kind := tkInvalid + end + else tok.kind := tkInvalid + end +end; + +function HandleCRLF(var c: TCfgParser; pos: int): int; +begin + case c.buf[pos] of + CR: result := lexbase.HandleCR(c, pos); + LF: result := lexbase.HandleLF(c, pos); + else result := pos + end +end; + +procedure getString(var c: TCfgParser; var tok: TToken; rawMode: Boolean); +var + pos: int; + ch: Char; + buf: PChar; +begin + pos := c.bufPos + 1; // skip " + buf := c.buf; // put `buf` in a register + tok.kind := tkSymbol; + if (buf[pos] = '"') and (buf[pos+1] = '"') then begin + // long string literal: + inc(pos, 2); // skip "" + // skip leading newline: + pos := HandleCRLF(c, pos); + repeat + case buf[pos] of + '"': begin + if (buf[pos+1] = '"') and (buf[pos+2] = '"') then + break; + addChar(tok.literal, '"'); + Inc(pos) + end; + CR, LF: begin + pos := HandleCRLF(c, pos); + tok.literal := tok.literal +{&} nl; + end; + lexbase.EndOfFile: begin + tok.kind := tkInvalid; + break + end + else begin + addChar(tok.literal, buf[pos]); + Inc(pos) + end + end + until false; + c.bufpos := pos + 3 // skip the three """ + end + else begin // ordinary string literal + repeat + ch := buf[pos]; + if ch = '"' then begin + inc(pos); // skip '"' + break + end; + if ch in [CR, LF, lexbase.EndOfFile] then begin + tok.kind := tkInvalid; + break + end; + if (ch = '\') and not rawMode then begin + c.bufPos := pos; + getEscapedChar(c, tok); + pos := c.bufPos; + end + else begin + addChar(tok.literal, ch); + Inc(pos) + end + until false; + c.bufpos := pos; + end +end; + +procedure getSymbol(var c: TCfgParser; var tok: TToken); +var + pos: int; + buf: pchar; +begin + pos := c.bufpos; + buf := c.buf; + while true do begin + addChar(tok.literal, buf[pos]); + Inc(pos); + if not (buf[pos] in SymChars) then break; + end; + c.bufpos := pos; + tok.kind := tkSymbol +end; + +procedure skip(var c: TCfgParser); +var + buf: PChar; + pos: int; +begin + pos := c.bufpos; + buf := c.buf; + repeat + case buf[pos] of + ' ': Inc(pos); + Tabulator: inc(pos); + '#', ';': while not (buf[pos] in [CR, LF, lexbase.EndOfFile]) do inc(pos); + CR, LF: pos := HandleCRLF(c, pos); + else break // EndOfFile also leaves the loop + end + until false; + c.bufpos := pos; +end; + +procedure rawGetTok(var c: TCfgParser; var tok: TToken); +begin + tok.kind := tkInvalid; + setLength(tok.literal, 0); + skip(c); + case c.buf[c.bufpos] of + '=': begin + tok.kind := tkEquals; + inc(c.bufpos); + tok.literal := '='+''; + end; + '-': begin + inc(c.bufPos); + if c.buf[c.bufPos] = '-' then inc(c.bufPos); + tok.kind := tkDashDash; + tok.literal := '--'; + end; + ':': begin + tok.kind := tkColon; + inc(c.bufpos); + tok.literal := ':'+''; + end; + 'r', 'R': begin + if c.buf[c.bufPos+1] = '"' then begin + Inc(c.bufPos); + getString(c, tok, true); + end + else + getSymbol(c, tok); + end; + '[': begin + tok.kind := tkBracketLe; + inc(c.bufpos); + tok.literal := '['+''; + end; + ']': begin + tok.kind := tkBracketRi; + Inc(c.bufpos); + tok.literal := ']'+''; + end; + '"': getString(c, tok, false); + lexbase.EndOfFile: tok.kind := tkEof; + else getSymbol(c, tok); + end +end; + +function errorStr(const c: TCfgParser; const msg: string): string; +begin + result := format('$1($2, $3) Error: $4', [ + c.filename, toString(getLine(c)), toString(getColumn(c)), + msg + ]); +end; + +function getKeyValPair(var c: TCfgParser; kind: TCfgEventKind): TCfgEvent; +begin + if c.tok.kind = tkSymbol then begin + result.kind := kind; + result.key := c.tok.literal; + result.value := ''; + rawGetTok(c, c.tok); + if c.tok.kind in [tkEquals, tkColon] then begin + rawGetTok(c, c.tok); + if c.tok.kind = tkSymbol then begin + result.value := c.tok.literal; + end + else begin + result.kind := cfgError; + result.msg := errorStr(c, 'symbol expected, but found: ' + + c.tok.literal); + end; + rawGetTok(c, c.tok); + end + end + else begin + result.kind := cfgError; + result.msg := errorStr(c, 'symbol expected, but found: ' + c.tok.literal); + rawGetTok(c, c.tok); + end; +end; + +function next(var c: TCfgParser): TCfgEvent; +begin + case c.tok.kind of + tkEof: result.kind := cfgEof; + tkDashDash: begin + rawGetTok(c, c.tok); + result := getKeyValPair(c, cfgOption); + end; + tkSymbol: begin + result := getKeyValPair(c, cfgKeyValuePair); + end; + tkBracketLe: begin + rawGetTok(c, c.tok); + if c.tok.kind = tkSymbol then begin + result.kind := cfgSectionStart; + result.section := c.tok.literal; + end + else begin + result.kind := cfgError; + result.msg := errorStr(c, 'symbol expected, but found: ' + c.tok.literal); + end; + rawGetTok(c, c.tok); + if c.tok.kind = tkBracketRi then rawGetTok(c, c.tok) + else begin + result.kind := cfgError; + result.msg := errorStr(c, ''']'' expected, but found: ' + c.tok.literal); + end + end; + tkInvalid, tkEquals, tkColon: begin + result.kind := cfgError; + result.msg := errorStr(c, 'invalid token: ' + c.tok.literal); + rawGetTok(c, c.tok); + end + end +end; + +end. -- cgit 1.4.1-2-gfad0