summary refs log blame commit diff stats
path: root/nim/parsecfg.pas
blob: ba6a9867940be84f7758e851ad3592a4afb1e9a1 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
11


                                      
                                          






                                                    

                                                 





                       
                                                                    






                                                              
                                                                   









                               

                                         






                                                           
                                  
                                                
                                                      







                                          

                                                         







                                                  

                                                                  







                                                                               

                                                         

         
                            
       
                                



                          




                                   
                    











































































































                                                                               
                 









                                                           
                       
































































                                                        
                          
                                                                                



                                  
































                                                        
          




















                                                                  
                                                             










                                                                          



                                         
                                       








                                                                  






                                                   
                                                                




































                                                                                  
                                                                                 

         
                                                    







                                                                   
//
//
//            Nimrod's Runtime Library
//        (c) Copyright 2009 Andreas Rumpf
//
//    See the file "copying.txt", included in this
//    distribution, for details about the copyright.
//
unit parsecfg;

// A HIGH-PERFORMANCE configuration file parser;
// the Nimrod version of this file is part of the
// standard library.

interface

{$include 'config.inc'}

uses
  nsystem, charsets, llstream, sysutils, nhashes, strutils, lexbase;

type
  TCfgEventKind = (
    cfgEof,     // end of file reached
    cfgSectionStart, // a ``[section]`` has been parsed
    cfgKeyValuePair, // a ``key=value`` pair has been detected
    cfgOption, // a ``--key=value`` command line option
    cfgError   // an error ocurred during parsing; msg contains the
               // error message
  );
  TCfgEvent = {@ignore} record
    kind: TCfgEventKind;
    section: string;
    key, value: string;
    msg: string;
  end;
  {@emit object(NObject)
    case kind: TCfgEventKind of
      cfgEof: ();
      cfgSectionStart: (section: string);
      cfgKeyValuePair, cfgOption: (key, value: string);
      cfgError: (msg: string);
  end;}
  TTokKind = (tkInvalid, tkEof, // order is important here!
    tkSymbol, tkEquals, tkColon,
    tkBracketLe, tkBracketRi, tkDashDash
  );
  TToken = record       // a token
    kind: TTokKind;     // the type of the token
    literal: string;    // the parsed (string) literal
  end;
  TParserState = (startState, commaState);
  TCfgParser = object(TBaseLexer)
    tok: TToken;
    state: TParserState;
    filename: string;
  end;

procedure Open(var c: TCfgParser; const filename: string;
               inputStream: PLLStream);
procedure Close(var c: TCfgParser);

function next(var c: TCfgParser): TCfgEvent;

function getColumn(const c: TCfgParser): int;
function getLine(const c: TCfgParser): int;
function getFilename(const c: TCfgParser): string;

function errorStr(const c: TCfgParser; const msg: string): string;

implementation

const
  SymChars: TCharSet = ['a'..'z', 'A'..'Z', '0'..'9', '_', #128..#255];

// ----------------------------------------------------------------------------
procedure rawGetTok(var c: TCfgParser; var tok: TToken); forward;

procedure open(var c: TCfgParser; const filename: string;
               inputStream: PLLStream);
begin
{@ignore}
  FillChar(c, sizeof(c), 0);
{@emit}
  openBaseLexer(c, inputStream);
  c.filename := filename;
  c.state := startState;
  c.tok.kind := tkInvalid;
  c.tok.literal := '';
  rawGetTok(c, c.tok);
end;

procedure close(var c: TCfgParser);
begin
  closeBaseLexer(c);
end;

function getColumn(const c: TCfgParser): int;
begin
  result := getColNumber(c, c.bufPos)
end;

function getLine(const c: TCfgParser): int;
begin
  result := c.linenumber
end;

function getFilename(const c: TCfgParser): string;
begin
  result := c.filename
end;

// ----------------------------------------------------------------------------

procedure handleHexChar(var c: TCfgParser; var xi: int);
begin
  case c.buf[c.bufpos] of
    '0'..'9': begin
      xi := (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0'));
      inc(c.bufpos);
    end;
    'a'..'f': begin
      xi := (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10);
      inc(c.bufpos);
    end;
    'A'..'F': begin
      xi := (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10);
      inc(c.bufpos);
    end;
    else begin end // do nothing
  end
end;

procedure handleDecChars(var c: TCfgParser; var xi: int);
begin
  while c.buf[c.bufpos] in ['0'..'9'] do begin
    xi := (xi * 10) + (ord(c.buf[c.bufpos]) - ord('0'));
    inc(c.bufpos);
  end;
end;

procedure getEscapedChar(var c: TCfgParser; var tok: TToken);
var
  xi: int;
begin
  inc(c.bufpos); // skip '\'
  case c.buf[c.bufpos] of
    'n', 'N': begin
      tok.literal := tok.literal +{&} nl;
      Inc(c.bufpos);
    end;
    'r', 'R', 'c', 'C': begin addChar(tok.literal, CR); Inc(c.bufpos); end;
    'l', 'L': begin addChar(tok.literal, LF); Inc(c.bufpos); end;
    'f', 'F': begin addChar(tok.literal, FF); inc(c.bufpos); end;
    'e', 'E': begin addChar(tok.literal, ESC); Inc(c.bufpos); end;
    'a', 'A': begin addChar(tok.literal, BEL); Inc(c.bufpos); end;
    'b', 'B': begin addChar(tok.literal, BACKSPACE); Inc(c.bufpos); end;
    'v', 'V': begin addChar(tok.literal, VT); Inc(c.bufpos); end;
    't', 'T': begin addChar(tok.literal, Tabulator); Inc(c.bufpos); end;
    '''', '"': begin addChar(tok.literal, c.buf[c.bufpos]); Inc(c.bufpos); end;
    '\': begin addChar(tok.literal, '\'); Inc(c.bufpos) end;
    'x', 'X': begin
      inc(c.bufpos);
      xi := 0;
      handleHexChar(c, xi);
      handleHexChar(c, xi);
      addChar(tok.literal, Chr(xi));
    end;
    '0'..'9': begin
      xi := 0;
      handleDecChars(c, xi);
      if (xi <= 255) then
        addChar(tok.literal, Chr(xi))
      else
        tok.kind := tkInvalid
    end
    else tok.kind := tkInvalid
  end
end;

function HandleCRLF(var c: TCfgParser; pos: int): int;
begin
  case c.buf[pos] of
    CR: result := lexbase.HandleCR(c, pos);
    LF: result := lexbase.HandleLF(c, pos);
    else result := pos
  end
end;

procedure getString(var c: TCfgParser; var tok: TToken; rawMode: Boolean);
var
  pos: int;
  ch: Char;
  buf: PChar;
begin
  pos := c.bufPos + 1; // skip "
  buf := c.buf; // put `buf` in a register
  tok.kind := tkSymbol;
  if (buf[pos] = '"') and (buf[pos+1] = '"') then begin
    // long string literal:
    inc(pos, 2); // skip ""
    // skip leading newline:
    pos := HandleCRLF(c, pos);
    buf := c.buf;
    repeat
      case buf[pos] of
        '"': begin
          if (buf[pos+1] = '"') and (buf[pos+2] = '"') then
            break;
          addChar(tok.literal, '"');
          Inc(pos)
        end;
        CR, LF: begin
          pos := HandleCRLF(c, pos);
          buf := c.buf;
          tok.literal := tok.literal +{&} nl;
        end;
        lexbase.EndOfFile: begin
          tok.kind := tkInvalid;
          break
        end
        else begin
          addChar(tok.literal, buf[pos]);
          Inc(pos)
        end
      end
    until false;
    c.bufpos := pos + 3 // skip the three """
  end
  else begin // ordinary string literal
    repeat
      ch := buf[pos];
      if ch = '"' then begin
        inc(pos); // skip '"'
        break
      end;
      if ch in [CR, LF, lexbase.EndOfFile] then begin
        tok.kind := tkInvalid;
        break
      end;
      if (ch = '\') and not rawMode then begin
        c.bufPos := pos;
        getEscapedChar(c, tok);
        pos := c.bufPos;
      end
      else begin
        addChar(tok.literal, ch);
        Inc(pos)
      end
    until false;
    c.bufpos := pos;
  end
end;

procedure getSymbol(var c: TCfgParser; var tok: TToken);
var
  pos: int;
  buf: pchar;
begin
  pos := c.bufpos;
  buf := c.buf;
  while true do begin
    addChar(tok.literal, buf[pos]);
    Inc(pos);
    if not (buf[pos] in SymChars) then break;
  end;
  c.bufpos := pos;
  tok.kind := tkSymbol
end;

procedure skip(var c: TCfgParser);
var
  buf: PChar;
  pos: int;
begin
  pos := c.bufpos;
  buf := c.buf;
  repeat
    case buf[pos] of
      ' ': Inc(pos);
      Tabulator: inc(pos);
      '#', ';': while not (buf[pos] in [CR, LF, lexbase.EndOfFile]) do inc(pos);
      CR, LF: begin
        pos := HandleCRLF(c, pos);
        buf := c.buf;
      end
      else break // EndOfFile also leaves the loop
    end
  until false;
  c.bufpos := pos;
end;

procedure rawGetTok(var c: TCfgParser; var tok: TToken);
begin
  tok.kind := tkInvalid;
  setLength(tok.literal, 0);
  skip(c);
  case c.buf[c.bufpos] of
    '=': begin
      tok.kind := tkEquals;
      inc(c.bufpos);
      tok.literal := '='+'';
    end;
    '-': begin
      inc(c.bufPos);
      if c.buf[c.bufPos] = '-' then inc(c.bufPos);
      tok.kind := tkDashDash;
      tok.literal := '--';
    end;
    ':': begin
      tok.kind := tkColon;
      inc(c.bufpos);
      tok.literal := ':'+'';
    end;
    'r', 'R': begin
      if c.buf[c.bufPos+1] = '"' then begin
        Inc(c.bufPos);
        getString(c, tok, true);
      end
      else
        getSymbol(c, tok);
    end;
    '[': begin
      tok.kind := tkBracketLe;
      inc(c.bufpos);
      tok.literal := '['+'';
    end;
    ']': begin
      tok.kind := tkBracketRi;
      Inc(c.bufpos);
      tok.literal := ']'+'';
    end;
    '"': getString(c, tok, false);
    lexbase.EndOfFile: tok.kind := tkEof;
    else getSymbol(c, tok);
  end
end;

function errorStr(const c: TCfgParser; const msg: string): string;
begin
  result := format('$1($2, $3) Error: $4', [
    c.filename, toString(getLine(c)), toString(getColumn(c)),
    msg
  ]);
end;

function getKeyValPair(var c: TCfgParser; kind: TCfgEventKind): TCfgEvent;
begin
  if c.tok.kind = tkSymbol then begin
    result.kind := kind;
    result.key := c.tok.literal;
    result.value := '';
    rawGetTok(c, c.tok);
    while c.tok.literal = '.'+'' do begin
      addChar(result.key, '.');
      rawGetTok(c, c.tok);
      if c.tok.kind = tkSymbol then begin
        add(result.key, c.tok.literal);
        rawGetTok(c, c.tok);
      end
      else begin
        result.kind := cfgError;
        result.msg := errorStr(c, 'symbol expected, but found: ' +
                               c.tok.literal);
        break
      end
    end;
    if c.tok.kind in [tkEquals, tkColon] then begin
      rawGetTok(c, c.tok);
      if c.tok.kind = tkSymbol then begin
        result.value := c.tok.literal;
      end
      else begin
        result.kind := cfgError;
        result.msg := errorStr(c, 'symbol expected, but found: '
                               + c.tok.literal);
      end;
      rawGetTok(c, c.tok);
    end
  end
  else begin
    result.kind := cfgError;
    result.msg := errorStr(c, 'symbol expected, but found: ' + c.tok.literal);
    rawGetTok(c, c.tok);
  end;
end;

function next(var c: TCfgParser): TCfgEvent;
begin
  case c.tok.kind of
    tkEof: result.kind := cfgEof;
    tkDashDash: begin
      rawGetTok(c, c.tok);
      result := getKeyValPair(c, cfgOption);
    end;
    tkSymbol: begin
      result := getKeyValPair(c, cfgKeyValuePair);
    end;
    tkBracketLe: begin
      rawGetTok(c, c.tok);
      if c.tok.kind = tkSymbol then begin
        result.kind := cfgSectionStart;
        result.section := c.tok.literal;
      end
      else begin
        result.kind := cfgError;
        result.msg := errorStr(c, 'symbol expected, but found: ' + c.tok.literal);
      end;
      rawGetTok(c, c.tok);
      if c.tok.kind = tkBracketRi then rawGetTok(c, c.tok)
      else begin
        result.kind := cfgError;
        result.msg := errorStr(c, ''']'' expected, but found: ' + c.tok.literal);
      end
    end;
    tkInvalid, tkBracketRi, tkEquals, tkColon: begin
      result.kind := cfgError;
      result.msg := errorStr(c, 'invalid token: ' + c.tok.literal);
      rawGetTok(c, c.tok);
    end
  end
end;

end.