diff options
Diffstat (limited to 'nim/parsecfg.pas')
-rwxr-xr-x | nim/parsecfg.pas | 424 |
1 files changed, 0 insertions, 424 deletions
diff --git a/nim/parsecfg.pas b/nim/parsecfg.pas deleted file mode 100755 index ba6a98679..000000000 --- a/nim/parsecfg.pas +++ /dev/null @@ -1,424 +0,0 @@ -// -// -// Nimrod's Runtime Library -// (c) Copyright 2009 Andreas Rumpf -// -// See the file "copying.txt", included in this -// distribution, for details about the copyright. -// -unit parsecfg; - -// A HIGH-PERFORMANCE configuration file parser; -// the Nimrod version of this file is part of the -// standard library. - -interface - -{$include 'config.inc'} - -uses - nsystem, charsets, llstream, sysutils, nhashes, strutils, lexbase; - -type - TCfgEventKind = ( - cfgEof, // end of file reached - cfgSectionStart, // a ``[section]`` has been parsed - cfgKeyValuePair, // a ``key=value`` pair has been detected - cfgOption, // a ``--key=value`` command line option - cfgError // an error ocurred during parsing; msg contains the - // error message - ); - TCfgEvent = {@ignore} record - kind: TCfgEventKind; - section: string; - key, value: string; - msg: string; - end; - {@emit object(NObject) - case kind: TCfgEventKind of - cfgEof: (); - cfgSectionStart: (section: string); - cfgKeyValuePair, cfgOption: (key, value: string); - cfgError: (msg: string); - end;} - TTokKind = (tkInvalid, tkEof, // order is important here! - tkSymbol, tkEquals, tkColon, - tkBracketLe, tkBracketRi, tkDashDash - ); - TToken = record // a token - kind: TTokKind; // the type of the token - literal: string; // the parsed (string) literal - end; - TParserState = (startState, commaState); - TCfgParser = object(TBaseLexer) - tok: TToken; - state: TParserState; - filename: string; - end; - -procedure Open(var c: TCfgParser; const filename: string; - inputStream: PLLStream); -procedure Close(var c: TCfgParser); - -function next(var c: TCfgParser): TCfgEvent; - -function getColumn(const c: TCfgParser): int; -function getLine(const c: TCfgParser): int; -function getFilename(const c: TCfgParser): string; - -function errorStr(const c: TCfgParser; const msg: string): string; - -implementation - -const - SymChars: TCharSet = ['a'..'z', 'A'..'Z', '0'..'9', '_', #128..#255]; - -// ---------------------------------------------------------------------------- -procedure rawGetTok(var c: TCfgParser; var tok: TToken); forward; - -procedure open(var c: TCfgParser; const filename: string; - inputStream: PLLStream); -begin -{@ignore} - FillChar(c, sizeof(c), 0); -{@emit} - openBaseLexer(c, inputStream); - c.filename := filename; - c.state := startState; - c.tok.kind := tkInvalid; - c.tok.literal := ''; - rawGetTok(c, c.tok); -end; - -procedure close(var c: TCfgParser); -begin - closeBaseLexer(c); -end; - -function getColumn(const c: TCfgParser): int; -begin - result := getColNumber(c, c.bufPos) -end; - -function getLine(const c: TCfgParser): int; -begin - result := c.linenumber -end; - -function getFilename(const c: TCfgParser): string; -begin - result := c.filename -end; - -// ---------------------------------------------------------------------------- - -procedure handleHexChar(var c: TCfgParser; var xi: int); -begin - case c.buf[c.bufpos] of - '0'..'9': begin - xi := (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0')); - inc(c.bufpos); - end; - 'a'..'f': begin - xi := (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10); - inc(c.bufpos); - end; - 'A'..'F': begin - xi := (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10); - inc(c.bufpos); - end; - else begin end // do nothing - end -end; - -procedure handleDecChars(var c: TCfgParser; var xi: int); -begin - while c.buf[c.bufpos] in ['0'..'9'] do begin - xi := (xi * 10) + (ord(c.buf[c.bufpos]) - ord('0')); - inc(c.bufpos); - end; -end; - -procedure getEscapedChar(var c: TCfgParser; var tok: TToken); -var - xi: int; -begin - inc(c.bufpos); // skip '\' - case c.buf[c.bufpos] of - 'n', 'N': begin - tok.literal := tok.literal +{&} nl; - Inc(c.bufpos); - end; - 'r', 'R', 'c', 'C': begin addChar(tok.literal, CR); Inc(c.bufpos); end; - 'l', 'L': begin addChar(tok.literal, LF); Inc(c.bufpos); end; - 'f', 'F': begin addChar(tok.literal, FF); inc(c.bufpos); end; - 'e', 'E': begin addChar(tok.literal, ESC); Inc(c.bufpos); end; - 'a', 'A': begin addChar(tok.literal, BEL); Inc(c.bufpos); end; - 'b', 'B': begin addChar(tok.literal, BACKSPACE); Inc(c.bufpos); end; - 'v', 'V': begin addChar(tok.literal, VT); Inc(c.bufpos); end; - 't', 'T': begin addChar(tok.literal, Tabulator); Inc(c.bufpos); end; - '''', '"': begin addChar(tok.literal, c.buf[c.bufpos]); Inc(c.bufpos); end; - '\': begin addChar(tok.literal, '\'); Inc(c.bufpos) end; - 'x', 'X': begin - inc(c.bufpos); - xi := 0; - handleHexChar(c, xi); - handleHexChar(c, xi); - addChar(tok.literal, Chr(xi)); - end; - '0'..'9': begin - xi := 0; - handleDecChars(c, xi); - if (xi <= 255) then - addChar(tok.literal, Chr(xi)) - else - tok.kind := tkInvalid - end - else tok.kind := tkInvalid - end -end; - -function HandleCRLF(var c: TCfgParser; pos: int): int; -begin - case c.buf[pos] of - CR: result := lexbase.HandleCR(c, pos); - LF: result := lexbase.HandleLF(c, pos); - else result := pos - end -end; - -procedure getString(var c: TCfgParser; var tok: TToken; rawMode: Boolean); -var - pos: int; - ch: Char; - buf: PChar; -begin - pos := c.bufPos + 1; // skip " - buf := c.buf; // put `buf` in a register - tok.kind := tkSymbol; - if (buf[pos] = '"') and (buf[pos+1] = '"') then begin - // long string literal: - inc(pos, 2); // skip "" - // skip leading newline: - pos := HandleCRLF(c, pos); - buf := c.buf; - repeat - case buf[pos] of - '"': begin - if (buf[pos+1] = '"') and (buf[pos+2] = '"') then - break; - addChar(tok.literal, '"'); - Inc(pos) - end; - CR, LF: begin - pos := HandleCRLF(c, pos); - buf := c.buf; - tok.literal := tok.literal +{&} nl; - end; - lexbase.EndOfFile: begin - tok.kind := tkInvalid; - break - end - else begin - addChar(tok.literal, buf[pos]); - Inc(pos) - end - end - until false; - c.bufpos := pos + 3 // skip the three """ - end - else begin // ordinary string literal - repeat - ch := buf[pos]; - if ch = '"' then begin - inc(pos); // skip '"' - break - end; - if ch in [CR, LF, lexbase.EndOfFile] then begin - tok.kind := tkInvalid; - break - end; - if (ch = '\') and not rawMode then begin - c.bufPos := pos; - getEscapedChar(c, tok); - pos := c.bufPos; - end - else begin - addChar(tok.literal, ch); - Inc(pos) - end - until false; - c.bufpos := pos; - end -end; - -procedure getSymbol(var c: TCfgParser; var tok: TToken); -var - pos: int; - buf: pchar; -begin - pos := c.bufpos; - buf := c.buf; - while true do begin - addChar(tok.literal, buf[pos]); - Inc(pos); - if not (buf[pos] in SymChars) then break; - end; - c.bufpos := pos; - tok.kind := tkSymbol -end; - -procedure skip(var c: TCfgParser); -var - buf: PChar; - pos: int; -begin - pos := c.bufpos; - buf := c.buf; - repeat - case buf[pos] of - ' ': Inc(pos); - Tabulator: inc(pos); - '#', ';': while not (buf[pos] in [CR, LF, lexbase.EndOfFile]) do inc(pos); - CR, LF: begin - pos := HandleCRLF(c, pos); - buf := c.buf; - end - else break // EndOfFile also leaves the loop - end - until false; - c.bufpos := pos; -end; - -procedure rawGetTok(var c: TCfgParser; var tok: TToken); -begin - tok.kind := tkInvalid; - setLength(tok.literal, 0); - skip(c); - case c.buf[c.bufpos] of - '=': begin - tok.kind := tkEquals; - inc(c.bufpos); - tok.literal := '='+''; - end; - '-': begin - inc(c.bufPos); - if c.buf[c.bufPos] = '-' then inc(c.bufPos); - tok.kind := tkDashDash; - tok.literal := '--'; - end; - ':': begin - tok.kind := tkColon; - inc(c.bufpos); - tok.literal := ':'+''; - end; - 'r', 'R': begin - if c.buf[c.bufPos+1] = '"' then begin - Inc(c.bufPos); - getString(c, tok, true); - end - else - getSymbol(c, tok); - end; - '[': begin - tok.kind := tkBracketLe; - inc(c.bufpos); - tok.literal := '['+''; - end; - ']': begin - tok.kind := tkBracketRi; - Inc(c.bufpos); - tok.literal := ']'+''; - end; - '"': getString(c, tok, false); - lexbase.EndOfFile: tok.kind := tkEof; - else getSymbol(c, tok); - end -end; - -function errorStr(const c: TCfgParser; const msg: string): string; -begin - result := format('$1($2, $3) Error: $4', [ - c.filename, toString(getLine(c)), toString(getColumn(c)), - msg - ]); -end; - -function getKeyValPair(var c: TCfgParser; kind: TCfgEventKind): TCfgEvent; -begin - if c.tok.kind = tkSymbol then begin - result.kind := kind; - result.key := c.tok.literal; - result.value := ''; - rawGetTok(c, c.tok); - while c.tok.literal = '.'+'' do begin - addChar(result.key, '.'); - rawGetTok(c, c.tok); - if c.tok.kind = tkSymbol then begin - add(result.key, c.tok.literal); - rawGetTok(c, c.tok); - end - else begin - result.kind := cfgError; - result.msg := errorStr(c, 'symbol expected, but found: ' + - c.tok.literal); - break - end - end; - if c.tok.kind in [tkEquals, tkColon] then begin - rawGetTok(c, c.tok); - if c.tok.kind = tkSymbol then begin - result.value := c.tok.literal; - end - else begin - result.kind := cfgError; - result.msg := errorStr(c, 'symbol expected, but found: ' - + c.tok.literal); - end; - rawGetTok(c, c.tok); - end - end - else begin - result.kind := cfgError; - result.msg := errorStr(c, 'symbol expected, but found: ' + c.tok.literal); - rawGetTok(c, c.tok); - end; -end; - -function next(var c: TCfgParser): TCfgEvent; -begin - case c.tok.kind of - tkEof: result.kind := cfgEof; - tkDashDash: begin - rawGetTok(c, c.tok); - result := getKeyValPair(c, cfgOption); - end; - tkSymbol: begin - result := getKeyValPair(c, cfgKeyValuePair); - end; - tkBracketLe: begin - rawGetTok(c, c.tok); - if c.tok.kind = tkSymbol then begin - result.kind := cfgSectionStart; - result.section := c.tok.literal; - end - else begin - result.kind := cfgError; - result.msg := errorStr(c, 'symbol expected, but found: ' + c.tok.literal); - end; - rawGetTok(c, c.tok); - if c.tok.kind = tkBracketRi then rawGetTok(c, c.tok) - else begin - result.kind := cfgError; - result.msg := errorStr(c, ''']'' expected, but found: ' + c.tok.literal); - end - end; - tkInvalid, tkBracketRi, tkEquals, tkColon: begin - result.kind := cfgError; - result.msg := errorStr(c, 'invalid token: ' + c.tok.literal); - rawGetTok(c, c.tok); - end - end -end; - -end. |